From 2b2d7ca7ce25fbec8389e7d85e57742caa47c97d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 10 Dec 2024 10:08:42 +0100 Subject: dma-buf: fix incorrect dma-fence documentation v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There isn't much worse than documentation giving an incorrect advise. Grabbing a spinlock while interrupts are disabled usually means that you must also disable interrupts for all other uses of this spinlock. Otherwise really hard to debug issues can occur. So fix that invalid documentation. v2: use Dmitry's suggestion on the documentation Signed-off-by: Christian König Reviewed-by: Simona Vetter (v1) Link: https://patchwork.freedesktop.org/patch/msgid/20250211163109.12200-2-christian.koenig@amd.com --- include/linux/dma-fence.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index e7ad819962e3..52587d390aca 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -169,8 +169,8 @@ struct dma_fence_ops { * implementation know that there is another driver waiting on the * signal (ie. hw->sw case). * - * This function can be called from atomic context, but not - * from irq context, so normal spinlocks can be used. + * This is called with irq's disabled, so only spinlocks which disable + * IRQ's can be used in the code outside of this callback. * * A return value of false indicates the fence already passed, * or some failure occurred that made it impossible to enable -- cgit v1.2.3 From 2ce07fea3cc8b866f7955a7ce1d62b0cc1f74819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 18 Sep 2024 08:16:57 +0200 Subject: dma-buf/dma-fence: remove unnecessary callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fence_value_str and timeline_value_str callbacks were just an unnecessary abstraction in the SW sync implementation. The only caller of those callbacks already knew that the fence in questions is a timeline_fence. So print the values directly instead of using a redirection. Additional to that remove the implementations from virtgpu and vgem. As far as I can see those were never used in the first place. Signed-off-by: Christian König Reviewed-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20250211163109.12200-3-christian.koenig@amd.com --- include/linux/dma-fence.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 52587d390aca..b12776883d14 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -238,27 +238,6 @@ struct dma_fence_ops { */ void (*release)(struct dma_fence *fence); - /** - * @fence_value_str: - * - * Callback to fill in free-form debug info specific to this fence, like - * the sequence number. - * - * This callback is optional. - */ - void (*fence_value_str)(struct dma_fence *fence, char *str, int size); - - /** - * @timeline_value_str: - * - * Fills in the current value of the timeline as a string, like the - * sequence number. Note that the specific fence passed to this function - * should not matter, drivers should only use it to look up the - * corresponding timeline structures. - */ - void (*timeline_value_str)(struct dma_fence *fence, - char *str, int size); - /** * @set_deadline: * -- cgit v1.2.3 From de68b17d5d0716c9a02b8a6ffa34f47c8f2f7690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 11 Feb 2025 15:26:16 +0100 Subject: dma-buf: dma-buf: stop mapping sg_tables on attach v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a workaround to smoothly transit from static to dynamic DMA-buf handling we cached the sg_table on attach if dynamic handling mismatched between exporter and importer. Since Dmitry and Thomas cleaned that up and also documented the lock handling we can drop this workaround now. V2: implement Sima's comments Signed-off-by: Christian König Reviewed-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20250211163109.12200-4-christian.koenig@amd.com --- include/linux/dma-buf.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 36216d28d8bd..c54ff2dda8cb 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -583,20 +583,6 @@ static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf) return !!dmabuf->ops->pin; } -/** - * dma_buf_attachment_is_dynamic - check if a DMA-buf attachment uses dynamic - * mappings - * @attach: the DMA-buf attachment to check - * - * Returns true if a DMA-buf importer wants to call the map/unmap functions with - * the dma_resv lock held. - */ -static inline bool -dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach) -{ - return !!attach->importer_ops; -} - struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, struct device *dev); struct dma_buf_attachment * -- cgit v1.2.3 From b72f66f22c0e39ae6684c43fead774c13db24e73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 11 Feb 2025 17:20:53 +0100 Subject: dma-buf: drop caching of sg_tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That was purely for the transition from static to dynamic dma-buf handling and can be removed again now. Signed-off-by: Christian König Reviewed-by: Simona Vetter Reviewed-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20250211163109.12200-5-christian.koenig@amd.com --- include/linux/dma-buf.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index c54ff2dda8cb..544f8f8c3f44 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -34,15 +34,6 @@ struct dma_buf_attachment; * @vunmap: [optional] unmaps a vmap from the buffer */ struct dma_buf_ops { - /** - * @cache_sgt_mapping: - * - * If true the framework will cache the first mapping made for each - * attachment. This avoids creating mappings for attachments multiple - * times. - */ - bool cache_sgt_mapping; - /** * @attach: * @@ -493,8 +484,6 @@ struct dma_buf_attach_ops { * @dmabuf: buffer for this attachment. * @dev: device attached to the buffer. * @node: list of dma_buf_attachment, protected by dma_resv lock of the dmabuf. - * @sgt: cached mapping. - * @dir: direction of cached mapping. * @peer2peer: true if the importer can handle peer resources without pages. * @priv: exporter specific attachment data. * @importer_ops: importer operations for this attachment, if provided @@ -514,8 +503,6 @@ struct dma_buf_attachment { struct dma_buf *dmabuf; struct device *dev; struct list_head node; - struct sg_table *sgt; - enum dma_data_direction dir; bool peer2peer; const struct dma_buf_attach_ops *importer_ops; void *importer_priv; -- cgit v1.2.3 From 87edca6261c1327977d86c16857e74f4fc7c3ae8 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 5 Mar 2025 14:05:50 +0100 Subject: drm/sched: Adjust outdated docu for run_job() The documentation for drm_sched_backend_ops.run_job() mentions a certain function called drm_sched_job_recovery(). This function does not exist. What's actually meant is drm_sched_resubmit_jobs(), which is by now also deprecated. Furthermore, the scheduler expects to "inherit" a reference on the fence from the run_job() callback. This, so far, is also not documented. Remove the mention of the removed function. Discourage the behavior of drm_sched_backend_ops.run_job() being called multiple times for the same job. Document the necessity of incrementing the refcount in run_job(). Acked-by: Danilo Krummrich Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250305130551.136682-3-phasta@kernel.org --- include/drm/gpu_scheduler.h | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 50928a7ae98e..6381baae8024 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -410,10 +410,36 @@ struct drm_sched_backend_ops { struct drm_sched_entity *s_entity); /** - * @run_job: Called to execute the job once all of the dependencies - * have been resolved. This may be called multiple times, if - * timedout_job() has happened and drm_sched_job_recovery() - * decides to try it again. + * @run_job: Called to execute the job once all of the dependencies + * have been resolved. + * + * @sched_job: the job to run + * + * The deprecated drm_sched_resubmit_jobs() (called by &struct + * drm_sched_backend_ops.timedout_job) can invoke this again with the + * same parameters. Using this is discouraged because it violates + * dma_fence rules, notably dma_fence_init() has to be called on + * already initialized fences for a second time. Moreover, this is + * dangerous because attempts to allocate memory might deadlock with + * memory management code waiting for the reset to complete. + * + * TODO: Document what drivers should do / use instead. + * + * This method is called in a workqueue context - either from the + * submit_wq the driver passed through drm_sched_init(), or, if the + * driver passed NULL, a separate, ordered workqueue the scheduler + * allocated. + * + * Note that the scheduler expects to 'inherit' its own reference to + * this fence from the callback. It does not invoke an extra + * dma_fence_get() on it. Consequently, this callback must take a + * reference for the scheduler, and additional ones for the driver's + * respective needs. + * + * Return: + * * On success: dma_fence the driver must signal once the hardware has + * completed the job ("hardware fence"). + * * On failure: NULL or an ERR_PTR. */ struct dma_fence *(*run_job)(struct drm_sched_job *sched_job); -- cgit v1.2.3 From 2eeed61db4550171a32854e4d3fdf974b749c214 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 5 Mar 2025 14:05:52 +0100 Subject: drm/sched: Update timedout_job()'s documentation drm_sched_backend_ops.timedout_job()'s documentation is outdated. It mentions the deprecated function drm_sched_resubmit_jobs(). Furthermore, it does not point out the important distinction between hardware and firmware schedulers. Since firmware schedulers typically only use one entity per scheduler, timeout handling is significantly more simple because the entity the faulted job came from can just be killed without affecting innocent processes. Update the documentation with that distinction and other details. Reformat the docstring to work to a unified style with the other handles. Acked-by: Danilo Krummrich Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250305130551.136682-5-phasta@kernel.org --- include/drm/gpu_scheduler.h | 78 +++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 6381baae8024..1a7e377d4cbb 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -383,8 +383,15 @@ struct drm_sched_job { struct xarray dependencies; }; +/** + * enum drm_gpu_sched_stat - the scheduler's status + * + * @DRM_GPU_SCHED_STAT_NONE: Reserved. Do not use. + * @DRM_GPU_SCHED_STAT_NOMINAL: Operation succeeded. + * @DRM_GPU_SCHED_STAT_ENODEV: Error: Device is not available anymore. + */ enum drm_gpu_sched_stat { - DRM_GPU_SCHED_STAT_NONE, /* Reserve 0 */ + DRM_GPU_SCHED_STAT_NONE, DRM_GPU_SCHED_STAT_NOMINAL, DRM_GPU_SCHED_STAT_ENODEV, }; @@ -447,43 +454,52 @@ struct drm_sched_backend_ops { * @timedout_job: Called when a job has taken too long to execute, * to trigger GPU recovery. * - * This method is called in a workqueue context. + * @sched_job: The job that has timed out + * + * Drivers typically issue a reset to recover from GPU hangs. + * This procedure looks very different depending on whether a firmware + * or a hardware scheduler is being used. + * + * For a FIRMWARE SCHEDULER, each ring has one scheduler, and each + * scheduler has one entity. Hence, the steps taken typically look as + * follows: * - * Drivers typically issue a reset to recover from GPU hangs, and this - * procedure usually follows the following workflow: + * 1. Stop the scheduler using drm_sched_stop(). This will pause the + * scheduler workqueues and cancel the timeout work, guaranteeing + * that nothing is queued while the ring is being removed. + * 2. Remove the ring. The firmware will make sure that the + * corresponding parts of the hardware are resetted, and that other + * rings are not impacted. + * 3. Kill the entity and the associated scheduler. * - * 1. Stop the scheduler using drm_sched_stop(). This will park the - * scheduler thread and cancel the timeout work, guaranteeing that - * nothing is queued while we reset the hardware queue - * 2. Try to gracefully stop non-faulty jobs (optional) - * 3. Issue a GPU reset (driver-specific) - * 4. Re-submit jobs using drm_sched_resubmit_jobs() - * 5. Restart the scheduler using drm_sched_start(). At that point, new - * jobs can be queued, and the scheduler thread is unblocked + * + * For a HARDWARE SCHEDULER, a scheduler instance schedules jobs from + * one or more entities to one ring. This implies that all entities + * associated with the affected scheduler cannot be torn down, because + * this would effectively also affect innocent userspace processes which + * did not submit faulty jobs (for example). + * + * Consequently, the procedure to recover with a hardware scheduler + * should look like this: + * + * 1. Stop all schedulers impacted by the reset using drm_sched_stop(). + * 2. Kill the entity the faulty job stems from. + * 3. Issue a GPU reset on all faulty rings (driver-specific). + * 4. Re-submit jobs on all schedulers impacted by re-submitting them to + * the entities which are still alive. + * 5. Restart all schedulers that were stopped in step #1 using + * drm_sched_start(). * * Note that some GPUs have distinct hardware queues but need to reset * the GPU globally, which requires extra synchronization between the - * timeout handler of the different &drm_gpu_scheduler. One way to - * achieve this synchronization is to create an ordered workqueue - * (using alloc_ordered_workqueue()) at the driver level, and pass this - * queue to drm_sched_init(), to guarantee that timeout handlers are - * executed sequentially. The above workflow needs to be slightly - * adjusted in that case: - * - * 1. Stop all schedulers impacted by the reset using drm_sched_stop() - * 2. Try to gracefully stop non-faulty jobs on all queues impacted by - * the reset (optional) - * 3. Issue a GPU reset on all faulty queues (driver-specific) - * 4. Re-submit jobs on all schedulers impacted by the reset using - * drm_sched_resubmit_jobs() - * 5. Restart all schedulers that were stopped in step #1 using - * drm_sched_start() + * timeout handlers of different schedulers. One way to achieve this + * synchronization is to create an ordered workqueue (using + * alloc_ordered_workqueue()) at the driver level, and pass this queue + * as drm_sched_init()'s @timeout_wq parameter. This will guarantee + * that timeout handlers are executed sequentially. * - * Return DRM_GPU_SCHED_STAT_NOMINAL, when all is normal, - * and the underlying driver has started or completed recovery. + * Return: The scheduler's status, defined by &enum drm_gpu_sched_stat * - * Return DRM_GPU_SCHED_STAT_ENODEV, if the device is no longer - * available, i.e. has been unplugged. */ enum drm_gpu_sched_stat (*timedout_job)(struct drm_sched_job *sched_job); -- cgit v1.2.3 From 143ec8d3f93965110689897f0d25165dc9664009 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 7 Mar 2025 09:03:59 +0100 Subject: drm/prime: Support dedicated DMA device for dma-buf imports Importing dma-bufs via PRIME requires a DMA-capable device. Devices on peripheral busses, such as USB, often cannot perform DMA by themselves. Without DMA-capable device PRIME import fails. DRM drivers for USB devices already use a separate DMA device for dma-buf imports. Make the mechanism generally available. Besides the case of USB, there are embedded DRM devices without DMA capability. DMA is performed by a separate controller. DRM drivers should set this accordingly. Add the field dma_dev to struct drm_device to refer to the device's DMA device. For USB this should be the USB controller. Use dma_dev in the PRIME import helpers, if set. v2: - acquire internal reference on dma_dev (Jani) - add DMA-controller usecase to docs (Maxime) Signed-off-by: Thomas Zimmermann Reviewed-by: Jani Nikula Reviewed-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20250307080836.42848-2-tzimmermann@suse.de --- include/drm/drm_device.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include') diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 6ea54a578cda..e2f894f1b90a 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -64,6 +64,28 @@ struct drm_device { /** @dev: Device structure of bus-device */ struct device *dev; + /** + * @dma_dev: + * + * Device for DMA operations. Only required if the device @dev + * cannot perform DMA by itself. Should be NULL otherwise. Call + * drm_dev_dma_dev() to get the DMA device instead of using this + * field directly. Call drm_dev_set_dma_dev() to set this field. + * + * DRM devices are sometimes bound to virtual devices that cannot + * perform DMA by themselves. Drivers should set this field to the + * respective DMA controller. + * + * Devices on USB and other peripheral busses also cannot perform + * DMA by themselves. The @dma_dev field should point the bus + * controller that does DMA on behalve of such a device. Required + * for importing buffers via dma-buf. + * + * If set, the DRM core automatically releases the reference on the + * device. + */ + struct device *dma_dev; + /** * @managed: * @@ -327,4 +349,23 @@ struct drm_device { struct dentry *debugfs_root; }; +void drm_dev_set_dma_dev(struct drm_device *dev, struct device *dma_dev); + +/** + * drm_dev_dma_dev - returns the DMA device for a DRM device + * @dev: DRM device + * + * Returns the DMA device of the given DRM device. By default, this + * the DRM device's parent. See drm_dev_set_dma_dev(). + * + * Returns: + * A DMA-capable device for the DRM device. + */ +static inline struct device *drm_dev_dma_dev(struct drm_device *dev) +{ + if (dev->dma_dev) + return dev->dma_dev; + return dev->dev; +} + #endif -- cgit v1.2.3 From 9497c5a0f7c26ff81f11df738a94c6b80f890c0a Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 26 Feb 2025 22:23:52 +0100 Subject: drm/bridge: move bridges_show logic from drm_debugfs.c In preparation to expose more info about bridges in debugfs, which will require more insight into drm_bridge data structures, move the bridges_show code to drm_bridge.c. Suggested-by: Jani Nikula Suggested-by: Dmitry Baryshkov Signed-off-by: Luca Ceresoli Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20250226-drm-debugfs-show-all-bridges-v8-1-bb511cc49d83@bootlin.com Signed-off-by: Louis Chauvet --- include/drm/drm_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index d4c75d59fa12..bacbc5dbf281 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -1108,4 +1108,6 @@ static inline struct drm_bridge *drmm_of_get_bridge(struct drm_device *drm, } #endif +void drm_bridge_debugfs_encoder_params(struct dentry *root, struct drm_encoder *encoder); + #endif -- cgit v1.2.3 From eff0347e7c228335e9ff64aaf02c66957803af6a Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 26 Feb 2025 22:23:53 +0100 Subject: drm/debugfs: add top-level 'bridges' file showing all added bridges The global bridges_list holding all the bridges between drm_bridge_add() and drm_bridge_remove() cannot be inspected via debugfs. Add a file showing it. To avoid code duplication, move the code printing a bridge info to a common function. Signed-off-by: Luca Ceresoli Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20250226-drm-debugfs-show-all-bridges-v8-2-bb511cc49d83@bootlin.com Signed-off-by: Louis Chauvet --- include/drm/drm_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index bacbc5dbf281..b0d86a685a41 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -1108,6 +1108,7 @@ static inline struct drm_bridge *drmm_of_get_bridge(struct drm_device *drm, } #endif +void drm_bridge_debugfs_params(struct dentry *root); void drm_bridge_debugfs_encoder_params(struct dentry *root, struct drm_encoder *encoder); #endif -- cgit v1.2.3 From c8619f5402cbcccfe58151b53421029852473e4c Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 10 Mar 2025 15:28:02 -0400 Subject: drm: add modifiers for Apple GPU layouts Apple GPUs support non-linear "GPU-tiled" image layouts. Add modifiers for these layouts. Mesa requires these modifiers to share non-linear buffers across processes, but no other userspace or kernel support is required/expected. These layouts are notably not used for interchange across hardware blocks (e.g. with the display controller). There are other layouts for that but we don't support them either in userspace or kernelspace yet (even downstream), so we don't add modifiers here. Acked-by: Faith Ekstrand Reviewed-by: Sven Peter Link: https://patchwork.freedesktop.org/patch/msgid/20250310-apple-twiddled-modifiers-v4-1-1ccac9544808@rosenzweig.io Signed-off-by: Alyssa Rosenzweig --- include/uapi/drm/drm_fourcc.h | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index e41a3cec6a9e..81202a50dc9e 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -422,6 +422,7 @@ extern "C" { #define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09 #define DRM_FORMAT_MOD_VENDOR_AMLOGIC 0x0a #define DRM_FORMAT_MOD_VENDOR_MTK 0x0b +#define DRM_FORMAT_MOD_VENDOR_APPLE 0x0c /* add more to the end as needed */ @@ -1494,6 +1495,50 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) /* alias for the most common tiling format */ #define DRM_FORMAT_MOD_MTK_16L_32S_TILE DRM_FORMAT_MOD_MTK(MTK_FMT_MOD_TILE_16L32S) +/* + * Apple GPU-tiled layouts. + * + * Apple GPUs support nonlinear tilings with optional lossless compression. + * + * GPU-tiled images are divided into 16KiB tiles: + * + * Bytes per pixel Tile size + * --------------- --------- + * 1 128x128 + * 2 128x64 + * 4 64x64 + * 8 64x32 + * 16 32x32 + * + * Tiles are raster-order. Pixels within a tile are interleaved (Morton order). + * + * Compressed images pad the body to 128-bytes and are immediately followed by a + * metadata section. The metadata section rounds the image dimensions to + * powers-of-two and contains 8 bytes for each 16x16 compression subtile. + * Subtiles are interleaved (Morton order). + * + * All images are 128-byte aligned. + * + * These layouts fundamentally do not have meaningful strides. No matter how we + * specify strides for these layouts, userspace unaware of Apple image layouts + * will be unable to use correctly the specified stride for any purpose. + * Userspace aware of the image layouts do not use strides. The most "correct" + * convention would be setting the image stride to 0. Unfortunately, some + * software assumes the stride is at least (width * bytes per pixel). We + * therefore require that stride equals (width * bytes per pixel). Since the + * stride is arbitrary here, we pick the simplest convention. + * + * Although containing two sections, compressed image layouts are treated in + * software as a single plane. This is modelled after AFBC, a similar + * scheme. Attempting to separate the sections to be "explicit" in DRM would + * only generate more confusion, as software does not treat the image this way. + * + * For detailed information on the hardware image layouts, see + * https://docs.mesa3d.org/drivers/asahi.html#image-layouts + */ +#define DRM_FORMAT_MOD_APPLE_GPU_TILED fourcc_mod_code(APPLE, 1) +#define DRM_FORMAT_MOD_APPLE_GPU_TILED_COMPRESSED fourcc_mod_code(APPLE, 2) + /* * AMD modifiers * -- cgit v1.2.3 From 62b1fa69f31969e11d03529d3a80599ddda2d043 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 15 Nov 2024 22:52:40 +1300 Subject: KVM: Export hardware virtualization enabling/disabling functions To support TDX, KVM will need to enable TDX during KVM module loading time. Enabling TDX requires enabling hardware virtualization first so that all online CPUs (and the new CPU going online) are in post-VMXON state. KVM by default enables hardware virtualization but that is done in kvm_init(), which must be the last step after all initialization is done thus is too late for enabling TDX. Export functions to enable/disable hardware virtualization so that TDX code can use them to handle hardware virtualization enabling before kvm_init(). Signed-off-by: Kai Huang Message-ID: Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f34f4cfaa513..1e75fa114f34 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2571,4 +2571,12 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, struct kvm_pre_fault_memory *range); #endif +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +int kvm_enable_virtualization(void); +void kvm_disable_virtualization(void); +#else +static inline int kvm_enable_virtualization(void) { return 0; } +static inline void kvm_disable_virtualization(void) { } +#endif + #endif -- cgit v1.2.3 From fcdbdf63431c9faf639bffc957ea2ce9b545432e Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 15 Nov 2024 22:52:41 +1300 Subject: KVM: VMX: Initialize TDX during KVM module load Before KVM can use TDX to create and run TDX guests, TDX needs to be initialized from two perspectives: 1) TDX module must be initialized properly to a working state; 2) A per-cpu TDX initialization, a.k.a the TDH.SYS.LP.INIT SEAMCALL must be done on any logical cpu before it can run any other TDX SEAMCALLs. The TDX host core-kernel provides two functions to do the above two respectively: tdx_enable() and tdx_cpu_enable(). There are two options in terms of when to initialize TDX: initialize TDX at KVM module loading time, or when creating the first TDX guest. Choose to initialize TDX during KVM module loading time: Initializing TDX module is both memory and CPU time consuming: 1) the kernel needs to allocate a non-trivial size(~1/256) of system memory as metadata used by TDX module to track each TDX-usable memory page's status; 2) the TDX module needs to initialize this metadata, one entry for each TDX-usable memory page. Also, the kernel uses alloc_contig_pages() to allocate those metadata chunks, because they are large and need to be physically contiguous. alloc_contig_pages() can fail. If initializing TDX when creating the first TDX guest, then there's chance that KVM won't be able to run any TDX guests albeit KVM _declares_ to be able to support TDX. This isn't good for the user. On the other hand, initializing TDX at KVM module loading time can make sure KVM is providing a consistent view of whether KVM can support TDX to the user. Always only try to initialize TDX after VMX has been initialized. TDX is based on VMX, and if VMX fails to initialize then TDX is likely to be broken anyway. Also, in practice, supporting TDX will require part of VMX and common x86 infrastructure in working order, so TDX cannot be enabled alone w/o VMX support. There are two cases that can result in failure to initialize TDX: 1) TDX cannot be supported (e.g., because of TDX is not supported or enabled by hardware, or module is not loaded, or missing some dependency in KVM's configuration); 2) Any unexpected error during TDX bring-up. For the first case only mark TDX is disabled but still allow KVM module to be loaded. For the second case just fail to load the KVM module so that the user can be aware. Because TDX costs additional memory, don't enable TDX by default. Add a new module parameter 'enable_tdx' to allow the user to opt-in. Note, the name tdx_init() has already been taken by the early boot code. Use tdx_bringup() for initializing TDX (and tdx_cleanup() since KVM doesn't actually teardown TDX). They don't match vt_init()/vt_exit(), vmx_init()/vmx_exit() etc but it's not end of the world. Also, once initialized, the TDX module cannot be disabled and enabled again w/o the TDX module runtime update, which isn't supported by the kernel. After TDX is enabled, nothing needs to be done when KVM disables hardware virtualization, e.g., when offlining CPU, or during suspend/resume. TDX host core-kernel code internally tracks TDX status and can handle "multiple enabling" scenario. Similar to KVM_AMD_SEV, add a new KVM_INTEL_TDX Kconfig to guide KVM TDX code. Make it depend on INTEL_TDX_HOST but not replace INTEL_TDX_HOST because in the longer term there's a use case that requires making SEAMCALLs w/o KVM as mentioned by Dan [1]. Link: https://lore.kernel.org/6723fc2070a96_60c3294dc@dwillia2-mobl3.amr.corp.intel.com.notmuch/ [1] Signed-off-by: Kai Huang Message-ID: <162f9dee05c729203b9ad6688db1ca2960b4b502.1731664295.git.kai.huang@intel.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1e75fa114f34..3bfe3140f444 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2284,6 +2284,7 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) } #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +extern bool enable_virt_at_load; extern bool kvm_rebooting; #endif -- cgit v1.2.3 From 7c035bea94074b19ed560a4f23a76c5a6c8e594f Mon Sep 17 00:00:00 2001 From: Zhiming Hu Date: Wed, 19 Feb 2025 09:02:51 -0500 Subject: KVM: TDX: Register TDX host key IDs to cgroup misc controller TDX host key IDs (HKID) are limit resources in a machine, and the misc cgroup lets the machine owner track their usage and limits the possibility of abusing them outside the owner's control. The cgroup v2 miscellaneous subsystem was introduced to control the resource of AMD SEV & SEV-ES ASIDs. Likewise introduce HKIDs as a misc resource. Signed-off-by: Zhiming Hu Signed-off-by: Isaku Yamahata Signed-off-by: Paolo Bonzini --- include/linux/misc_cgroup.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index 49eef10c8e59..8c0e4f4d71be 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -17,6 +17,10 @@ enum misc_res_type { MISC_CG_RES_SEV, /** @MISC_CG_RES_SEV_ES: AMD SEV-ES ASIDs resource */ MISC_CG_RES_SEV_ES, +#endif +#ifdef CONFIG_INTEL_TDX_HOST + /* Intel TDX HKIDs resource */ + MISC_CG_RES_TDX, #endif /** @MISC_CG_RES_TYPES: count of enum misc_res_type constants */ MISC_CG_RES_TYPES -- cgit v1.2.3 From c4a92f12cf35b83ce81757f6e5e8eb6223b87388 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Mon, 13 Jan 2025 11:08:41 +0800 Subject: KVM: Add parameter "kvm" to kvm_cpu_dirty_log_size() and its callers Add a parameter "kvm" to kvm_cpu_dirty_log_size() and down to its callers: kvm_dirty_ring_get_rsvd_entries(), kvm_dirty_ring_alloc(). This is a preparation to make cpu_dirty_log_size a per-VM value rather than a system-wide value. No function changes expected. Signed-off-by: Yan Zhao Signed-off-by: Paolo Bonzini --- include/linux/kvm_dirty_ring.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 4862c98d80d3..da4d9b5f58f1 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -32,7 +32,7 @@ struct kvm_dirty_ring { * If CONFIG_HAVE_HVM_DIRTY_RING not defined, kvm_dirty_ring.o should * not be included as well, so define these nop functions for the arch. */ -static inline u32 kvm_dirty_ring_get_rsvd_entries(void) +static inline u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm) { return 0; } @@ -42,7 +42,7 @@ static inline bool kvm_use_dirty_bitmap(struct kvm *kvm) return true; } -static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, +static inline int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring, int index, u32 size) { return 0; @@ -71,11 +71,12 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) #else /* CONFIG_HAVE_KVM_DIRTY_RING */ -int kvm_cpu_dirty_log_size(void); +int kvm_cpu_dirty_log_size(struct kvm *kvm); bool kvm_use_dirty_bitmap(struct kvm *kvm); bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm); -u32 kvm_dirty_ring_get_rsvd_entries(void); -int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); +u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm); +int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring, + int index, u32 size); /* * called with kvm->slots_lock held, returns the number of -- cgit v1.2.3 From 44428e4936022a7a31743017849b167e64f33a32 Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Sat, 22 Feb 2025 09:42:18 +0800 Subject: KVM: x86: Move pv_unhalted check out of kvm_vcpu_has_events() Move pv_unhalted check out of kvm_vcpu_has_events(), check pv_unhalted explicitly when handling PV unhalt and expose kvm_vcpu_has_events(). kvm_vcpu_has_events() returns true if pv_unhalted is set, and pv_unhalted is only cleared on transitions to KVM_MP_STATE_RUNNABLE. If the guest initiates a spurious wakeup, pv_unhalted could be left set in perpetuity. Currently, this is not problematic because kvm_vcpu_has_events() is only called when handling PV unhalt. However, if kvm_vcpu_has_events() is used for other purposes in the future, it could return the unexpected results. Export kvm_vcpu_has_events() for its usage in broader contexts. Suggested-by: Sean Christopherson Signed-off-by: Binbin Wu Message-ID: <20250222014225.897298-3-binbin.wu@linux.intel.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3bfe3140f444..ed1968f6f841 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1609,6 +1609,7 @@ void kvm_arch_disable_virtualization(void); int kvm_arch_enable_virtualization_cpu(void); void kvm_arch_disable_virtualization_cpu(void); #endif +bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 79462faa2b2aa89db029af5e61df11b5bb6ef4e3 Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Sat, 22 Feb 2025 09:42:23 +0800 Subject: KVM: TDX: Handle TDG.VP.VMCALL Convert TDG.VP.VMCALL to KVM_EXIT_SYSTEM_EVENT with a new type KVM_SYSTEM_EVENT_TDX_FATAL and forward it to userspace for handling. TD guest can use TDG.VP.VMCALL to report the fatal error it has experienced. This hypercall is special because TD guest is requesting a termination with the error information, KVM needs to forward the hypercall to userspace anyway, KVM doesn't do parsing or conversion, it just dumps the 16 general-purpose registers to userspace and let userspace decide what to do. Signed-off-by: Binbin Wu Message-ID: <20250222014225.897298-8-binbin.wu@linux.intel.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 45e6d8fca9b9..937400350317 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -375,6 +375,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_WAKEUP 4 #define KVM_SYSTEM_EVENT_SUSPEND 5 #define KVM_SYSTEM_EVENT_SEV_TERM 6 +#define KVM_SYSTEM_EVENT_TDX_FATAL 7 __u32 type; __u32 ndata; union { -- cgit v1.2.3 From 98007a0d56b07605c626c9bdb550b5ae5ce71453 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 13 Mar 2025 12:59:55 +0100 Subject: drm/bridge: Add encoder parameter to drm_bridge_funcs.attach The drm_bridge structure contains an encoder pointer that is widely used by bridge drivers. This pattern is largely documented as deprecated in other KMS entities for atomic drivers. However, one of the main use of that pointer is done in attach to just call drm_bridge_attach on the next bridge to add it to the bridge list. While this dereferences the bridge->encoder pointer, it's effectively the same encoder the bridge was being attached to. We can make it more explicit by adding the encoder the bridge is attached to to the list of attach parameters. This also removes the need to dereference bridge->encoder in most drivers. Reviewed-by: Dmitry Baryshkov Reviewed-by: Douglas Anderson Tested-by: Douglas Anderson Tested-by: Luca Ceresoli Reviewed-by: Luca Ceresoli Link: https://patchwork.freedesktop.org/patch/msgid/20250313-bridge-connector-v6-1-511c54a604fb@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_bridge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index b0d86a685a41..884ff1faa4c8 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -73,7 +73,7 @@ struct drm_bridge_funcs { * * Zero on success, error code on failure. */ - int (*attach)(struct drm_bridge *bridge, + int (*attach)(struct drm_bridge *bridge, struct drm_encoder *encoder, enum drm_bridge_attach_flags flags); /** -- cgit v1.2.3 From 93b244866cf641f83130bb08946f84a6fdeeca4c Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 13 Mar 2025 12:59:56 +0100 Subject: drm/bridge: Provide a helper to retrieve current bridge state The current bridge state is accessible from the drm_bridge structure, but since it's fairly indirect it's not easy to figure out. Provide a helper to retrieve it. Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20250313-bridge-connector-v6-2-511c54a604fb@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_bridge.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 884ff1faa4c8..cdad3b78a195 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -957,6 +957,38 @@ static inline struct drm_bridge *of_drm_find_bridge(struct device_node *np) } #endif +/** + * drm_bridge_get_current_state() - Get the current bridge state + * @bridge: bridge object + * + * This function must be called with the modeset lock held. + * + * RETURNS: + * + * The current bridge state, or NULL if there is none. + */ +static inline struct drm_bridge_state * +drm_bridge_get_current_state(struct drm_bridge *bridge) +{ + if (!bridge) + return NULL; + + /* + * Only atomic bridges will have bridge->base initialized by + * drm_atomic_private_obj_init(), so we need to make sure we're + * working with one before we try to use the lock. + */ + if (!bridge->funcs || !bridge->funcs->atomic_reset) + return NULL; + + drm_modeset_lock_assert_held(&bridge->base.lock); + + if (!bridge->base.state) + return NULL; + + return drm_priv_to_bridge_state(bridge->base.state); +} + /** * drm_bridge_get_next_bridge() - Get the next bridge in the chain * @bridge: bridge object -- cgit v1.2.3 From a7e4886e06f723045ce52de69491196a08cf14e9 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 13 Mar 2025 12:59:58 +0100 Subject: drm/atomic: Introduce helper to lookup connector by encoder With the bridges switching over to drm_bridge_connector, the direct association between a bridge driver and its connector was lost. This is mitigated for atomic bridge drivers by the fact you can access the encoder, and then call drm_atomic_get_old_connector_for_encoder() or drm_atomic_get_new_connector_for_encoder() with drm_atomic_state. This was also made easier by providing drm_atomic_state directly to all atomic hooks bridges can implement. However, bridge drivers don't have a way to access drm_atomic_state outside of the modeset path, like from the hotplug interrupt path or any interrupt handler. Let's introduce a function to retrieve the connector currently assigned to an encoder, without using drm_atomic_state, to make these drivers' life easier. Reviewed-by: Dmitry Baryshkov Reviewed-by: Simona Vetter Tested-by: Herve Codina Co-developed-by: Simona Vetter Signed-off-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20250313-bridge-connector-v6-4-511c54a604fb@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_atomic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 4c673f0698fe..38636a593c9d 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -625,6 +625,9 @@ drm_atomic_get_old_connector_for_encoder(const struct drm_atomic_state *state, struct drm_connector * drm_atomic_get_new_connector_for_encoder(const struct drm_atomic_state *state, struct drm_encoder *encoder); +struct drm_connector * +drm_atomic_get_connector_for_encoder(const struct drm_encoder *encoder, + struct drm_modeset_acquire_ctx *ctx); struct drm_crtc * drm_atomic_get_old_crtc_for_encoder(struct drm_atomic_state *state, -- cgit v1.2.3 From e4e3de631d148d075cb0b3dc3c4c62f1e70fc46c Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 13 Mar 2025 12:59:59 +0100 Subject: drm/tests: helpers: Create new helper to enable output We'll need the HDMI state tests light_up_connector() function in more tests, so let's promote it to a helper. Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20250313-bridge-connector-v6-5-511c54a604fb@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_kunit_helpers.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/drm/drm_kunit_helpers.h b/include/drm/drm_kunit_helpers.h index 11d59ce0bac0..1cda7281f300 100644 --- a/include/drm/drm_kunit_helpers.h +++ b/include/drm/drm_kunit_helpers.h @@ -9,6 +9,7 @@ #include +struct drm_connector; struct drm_crtc_funcs; struct drm_crtc_helper_funcs; struct drm_device; @@ -118,6 +119,13 @@ drm_kunit_helper_create_crtc(struct kunit *test, const struct drm_crtc_funcs *funcs, const struct drm_crtc_helper_funcs *helper_funcs); +int drm_kunit_helper_enable_crtc_connector(struct kunit *test, + struct drm_device *drm, + struct drm_crtc *crtc, + struct drm_connector *connector, + const struct drm_display_mode *mode, + struct drm_modeset_acquire_ctx *ctx); + struct drm_display_mode * drm_kunit_display_mode_from_cea_vic(struct kunit *test, struct drm_device *dev, u8 video_code); -- cgit v1.2.3 From 56ae6212417702f7e456007b2afa834810ed10a6 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 13 Mar 2025 13:00:02 +0100 Subject: drm/bridge: Add helper to reset bridge pipeline Let's provide an helper to make it easier for bridge drivers to power-cycle their bridge. In order to avoid a circular dependency between that new helper and drm_atomic_helper_reset_crtc(), this new helper will be in a drm_bridge_helper.c file to follow the pattern we have for other objects. Reviewed-by: Herve Codina Reviewed-by: Simona Vetter Tested-by: Herve Codina Co-developed-by: Simona Vetter Signed-off-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20250313-bridge-connector-v6-8-511c54a604fb@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_bridge_helper.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/drm/drm_bridge_helper.h (limited to 'include') diff --git a/include/drm/drm_bridge_helper.h b/include/drm/drm_bridge_helper.h new file mode 100644 index 000000000000..6c35b479ec2a --- /dev/null +++ b/include/drm/drm_bridge_helper.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#ifndef __DRM_BRIDGE_HELPER_H_ +#define __DRM_BRIDGE_HELPER_H_ + +struct drm_bridge; +struct drm_modeset_acquire_ctx; + +int drm_bridge_helper_reset_crtc(struct drm_bridge *bridge, + struct drm_modeset_acquire_ctx *ctx); + +#endif // __DRM_BRIDGE_HELPER_H_ -- cgit v1.2.3 From 0b28b7080ef5a323c3afa3860c3d45d627629830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 4 Feb 2025 14:14:12 +0100 Subject: platform: cznic: Add keyctl helpers for Turris platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Turris devices support signing messages with a per-device unique asymmetric key that was created on the device at manufacture time. Add helper module that helps to expose this ability via the keyctl() syscall. A device-specific driver can register a signing key by calling devm_turris_signing_key_create(). Both the `.turris-signing-keys` keyring and the signing key are created with only the VIEW, READ and SEARCH permissions for userspace - it is impossible to link / unlink / move them, set their attributes, or unlink the keyring from userspace. Signed-off-by: Marek Behún Signed-off-by: Arnd Bergmann --- include/linux/turris-signing-key.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 include/linux/turris-signing-key.h (limited to 'include') diff --git a/include/linux/turris-signing-key.h b/include/linux/turris-signing-key.h new file mode 100644 index 000000000000..032ca8cbf636 --- /dev/null +++ b/include/linux/turris-signing-key.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * 2025 by Marek Behún + */ + +#ifndef __TURRIS_SIGNING_KEY_H +#define __TURRIS_SIGNING_KEY_H + +#include +#include + +struct device; + +struct turris_signing_key_subtype { + u16 key_size; + u8 data_size; + u8 sig_size; + u8 public_key_size; + const char *hash_algo; + const void *(*get_public_key)(const struct key *key); + int (*sign)(const struct key *key, const void *msg, void *signature); +}; + +static inline struct device *turris_signing_key_get_dev(const struct key *key) +{ + return key->payload.data[1]; +} + +int +devm_turris_signing_key_create(struct device *dev, const struct turris_signing_key_subtype *subtype, + const char *desc); + +#endif /* __TURRIS_SIGNING_KEY_H */ -- cgit v1.2.3 From f878af62c06c3e0f2b94f6bafd040400d8cfa4d9 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 3 Mar 2025 15:52:58 +0100 Subject: drm/probe-helper: Do not fail from drmm_kms_helper_poll_init() Failing to set up connector polling is not significant enough to fail device probing. Print a warning and return nothing from the init helper. This only affects the managed init function. The unmanaged init already never fails with an error. Signed-off-by: Thomas Zimmermann Reviewed-by: Patrik Jakobsson Link: https://patchwork.freedesktop.org/patch/msgid/20250303145604.62962-4-tzimmermann@suse.de --- include/drm/drm_probe_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_probe_helper.h b/include/drm/drm_probe_helper.h index d6ce7b218b77..840ae5f798c2 100644 --- a/include/drm/drm_probe_helper.h +++ b/include/drm/drm_probe_helper.h @@ -17,7 +17,7 @@ int drm_helper_probe_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force); -int drmm_kms_helper_poll_init(struct drm_device *dev); +void drmm_kms_helper_poll_init(struct drm_device *dev); void drm_kms_helper_poll_init(struct drm_device *dev); void drm_kms_helper_poll_fini(struct drm_device *dev); bool drm_helper_hpd_irq_event(struct drm_device *dev); -- cgit v1.2.3 From d8343e115658fb35115e0720f4761ffa0147329a Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 24 Mar 2025 13:51:19 +0200 Subject: drm/display: dp: implement new access helpers Existing DPCD access functions return an error code or the number of bytes being read / write in case of partial access. However a lot of drivers either (incorrectly) ignore partial access or mishandle error codes. In other cases this results in a boilerplate code which compares returned value with the size. Implement new set of DPCD access helpers, which ignore partial access, always return 0 or an error code. Suggested-by: Jani Nikula Acked-by: Jani Nikula Reviewed-by: Lyude Paul Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250324-drm-rework-dpcd-access-v4-1-e80ff89593df@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- include/drm/display/drm_dp_helper.h | 92 ++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 5ae4241959f2..21e22289d1ca 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -527,6 +527,64 @@ ssize_t drm_dp_dpcd_read(struct drm_dp_aux *aux, unsigned int offset, ssize_t drm_dp_dpcd_write(struct drm_dp_aux *aux, unsigned int offset, void *buffer, size_t size); +/** + * drm_dp_dpcd_read_data() - read a series of bytes from the DPCD + * @aux: DisplayPort AUX channel (SST or MST) + * @offset: address of the (first) register to read + * @buffer: buffer to store the register values + * @size: number of bytes in @buffer + * + * Returns zero (0) on success, or a negative error + * code on failure. -EIO is returned if the request was NAKed by the sink or + * if the retry count was exceeded. If not all bytes were transferred, this + * function returns -EPROTO. Errors from the underlying AUX channel transfer + * function, with the exception of -EBUSY (which causes the transaction to + * be retried), are propagated to the caller. + */ +static inline int drm_dp_dpcd_read_data(struct drm_dp_aux *aux, + unsigned int offset, + void *buffer, size_t size) +{ + int ret; + + ret = drm_dp_dpcd_read(aux, offset, buffer, size); + if (ret < 0) + return ret; + if (ret < size) + return -EPROTO; + + return 0; +} + +/** + * drm_dp_dpcd_write_data() - write a series of bytes to the DPCD + * @aux: DisplayPort AUX channel (SST or MST) + * @offset: address of the (first) register to write + * @buffer: buffer containing the values to write + * @size: number of bytes in @buffer + * + * Returns zero (0) on success, or a negative error + * code on failure. -EIO is returned if the request was NAKed by the sink or + * if the retry count was exceeded. If not all bytes were transferred, this + * function returns -EPROTO. Errors from the underlying AUX channel transfer + * function, with the exception of -EBUSY (which causes the transaction to + * be retried), are propagated to the caller. + */ +static inline int drm_dp_dpcd_write_data(struct drm_dp_aux *aux, + unsigned int offset, + void *buffer, size_t size) +{ + int ret; + + ret = drm_dp_dpcd_write(aux, offset, buffer, size); + if (ret < 0) + return ret; + if (ret < size) + return -EPROTO; + + return 0; +} + /** * drm_dp_dpcd_readb() - read a single byte from the DPCD * @aux: DisplayPort AUX channel @@ -534,7 +592,8 @@ ssize_t drm_dp_dpcd_write(struct drm_dp_aux *aux, unsigned int offset, * @valuep: location where the value of the register will be stored * * Returns the number of bytes transferred (1) on success, or a negative - * error code on failure. + * error code on failure. In most of the cases you should be using + * drm_dp_dpcd_read_byte() instead. */ static inline ssize_t drm_dp_dpcd_readb(struct drm_dp_aux *aux, unsigned int offset, u8 *valuep) @@ -549,7 +608,8 @@ static inline ssize_t drm_dp_dpcd_readb(struct drm_dp_aux *aux, * @value: value to write to the register * * Returns the number of bytes transferred (1) on success, or a negative - * error code on failure. + * error code on failure. In most of the cases you should be using + * drm_dp_dpcd_write_byte() instead. */ static inline ssize_t drm_dp_dpcd_writeb(struct drm_dp_aux *aux, unsigned int offset, u8 value) @@ -557,6 +617,34 @@ static inline ssize_t drm_dp_dpcd_writeb(struct drm_dp_aux *aux, return drm_dp_dpcd_write(aux, offset, &value, 1); } +/** + * drm_dp_dpcd_read_byte() - read a single byte from the DPCD + * @aux: DisplayPort AUX channel + * @offset: address of the register to read + * @valuep: location where the value of the register will be stored + * + * Returns zero (0) on success, or a negative error code on failure. + */ +static inline int drm_dp_dpcd_read_byte(struct drm_dp_aux *aux, + unsigned int offset, u8 *valuep) +{ + return drm_dp_dpcd_read_data(aux, offset, valuep, 1); +} + +/** + * drm_dp_dpcd_write_byte() - write a single byte to the DPCD + * @aux: DisplayPort AUX channel + * @offset: address of the register to write + * @value: value to write to the register + * + * Returns zero (0) on success, or a negative error code on failure. + */ +static inline int drm_dp_dpcd_write_byte(struct drm_dp_aux *aux, + unsigned int offset, u8 value) +{ + return drm_dp_dpcd_write_data(aux, offset, &value, 1); +} + int drm_dp_read_dpcd_caps(struct drm_dp_aux *aux, u8 dpcd[DP_RECEIVER_CAP_SIZE]); -- cgit v1.2.3 From 8f5c4871a014cf31133476ffd2f117bffeaf6b60 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 23 Mar 2025 00:25:59 +0300 Subject: drm/gem: Change locked/unlocked postfix of drm_gem_v/unmap() function names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make drm/gem API function names consistent by having locked function use the _locked postfix in the name, while the unlocked variants don't use the _unlocked postfix. Rename drm_gem_v/unmap() function names to make them consistent with the rest of the API functions. Acked-by: Maxime Ripard Reviewed-by: Boris Brezillon Suggested-by: Boris Brezillon Reviewed-by: Christian König Acked-by: Thomas Zimmermann Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20250322212608.40511-2-dmitry.osipenko@collabora.com --- include/drm/drm_gem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 2bf893eabb4b..13c312ca07ae 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -537,8 +537,8 @@ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages, void drm_gem_lock(struct drm_gem_object *obj); void drm_gem_unlock(struct drm_gem_object *obj); -int drm_gem_vmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map); -void drm_gem_vunmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map); +int drm_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map); +void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map); int drm_gem_objects_lookup(struct drm_file *filp, void __user *bo_handles, int count, struct drm_gem_object ***objs_out); -- cgit v1.2.3 From 9a0fd089f08d059c1d9cc3f8ac6fc268e93ff6d7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 23 Mar 2025 00:26:00 +0300 Subject: drm/gem: Add _locked postfix to functions that have unlocked counterpart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add _locked postfix to drm_gem functions that have unlocked counterpart functions to make GEM functions naming more consistent and intuitive in regards to the locking requirements. Acked-by: Maxime Ripard Reviewed-by: Boris Brezillon Suggested-by: Boris Brezillon Reviewed-by: Christian König Acked-by: Thomas Zimmermann Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20250322212608.40511-3-dmitry.osipenko@collabora.com --- include/drm/drm_gem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 13c312ca07ae..43cf3c2c7ca0 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -561,7 +561,7 @@ unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned long *remaining, bool (*shrink)(struct drm_gem_object *obj)); -int drm_gem_evict(struct drm_gem_object *obj); +int drm_gem_evict_locked(struct drm_gem_object *obj); /** * drm_gem_object_is_shared_for_memory_stats - helper for shared memory stats -- cgit v1.2.3 From 5462dc8371486fe72129c7b1ad9386cf00679737 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 23 Mar 2025 00:26:01 +0300 Subject: drm/gem: Document locking rule of vmap and evict callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vmap/vunmap/evict GEM callbacks are always invoked with a held GEM's reservation lock. Document this locking rule for clarity. Reviewed-by: Boris Brezillon Reviewed-by: Christian König Acked-by: Thomas Zimmermann Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20250322212608.40511-4-dmitry.osipenko@collabora.com --- include/drm/drm_gem.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 43cf3c2c7ca0..9b71f7a9f3f8 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -159,7 +159,8 @@ struct drm_gem_object_funcs { * @vmap: * * Returns a virtual address for the buffer. Used by the - * drm_gem_dmabuf_vmap() helper. + * drm_gem_dmabuf_vmap() helper. Called with a held GEM reservation + * lock. * * This callback is optional. */ @@ -169,7 +170,8 @@ struct drm_gem_object_funcs { * @vunmap: * * Releases the address previously returned by @vmap. Used by the - * drm_gem_dmabuf_vunmap() helper. + * drm_gem_dmabuf_vunmap() helper. Called with a held GEM reservation + * lock. * * This callback is optional. */ @@ -192,7 +194,8 @@ struct drm_gem_object_funcs { * @evict: * * Evicts gem object out from memory. Used by the drm_gem_object_evict() - * helper. Returns 0 on success, -errno otherwise. + * helper. Returns 0 on success, -errno otherwise. Called with a held + * GEM reservation lock. * * This callback is optional. */ -- cgit v1.2.3 From 954907f7147dc43e0d1cd4d430c21d143d5fdf55 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 23 Mar 2025 00:26:03 +0300 Subject: drm/shmem-helper: Refactor locked/unlocked functions Add locked and remove unlocked postfixes from drm-shmem function names, making names consistent with the drm/gem core code. Reviewed-by: Boris Brezillon Suggested-by: Boris Brezillon Acked-by: Thomas Zimmermann Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20250322212608.40511-6-dmitry.osipenko@collabora.com --- include/drm/drm_gem_shmem_helper.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index cef5a6b5a4d6..0609e336479d 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -102,19 +102,19 @@ struct drm_gem_shmem_object *drm_gem_shmem_create_with_mnt(struct drm_device *de struct vfsmount *gemfs); void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem); -void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem); +void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem); int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem); -int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, - struct iosys_map *map); -void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, - struct iosys_map *map); +int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem, + struct iosys_map *map); +void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem, + struct iosys_map *map); int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma); int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_unpin_locked(struct drm_gem_shmem_object *shmem); -int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv); +int drm_gem_shmem_madvise_locked(struct drm_gem_shmem_object *shmem, int madv); static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem) { @@ -123,7 +123,7 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem !shmem->base.dma_buf && !drm_gem_is_imported(&shmem->base); } -void drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem); +void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem); struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem); struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem); @@ -214,12 +214,12 @@ static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_ } /* - * drm_gem_shmem_object_vmap - GEM object function for drm_gem_shmem_vmap() + * drm_gem_shmem_object_vmap - GEM object function for drm_gem_shmem_vmap_locked() * @obj: GEM object * @map: Returns the kernel virtual address of the SHMEM GEM object's backing store. * - * This function wraps drm_gem_shmem_vmap(). Drivers that employ the shmem helpers should - * use it as their &drm_gem_object_funcs.vmap handler. + * This function wraps drm_gem_shmem_vmap_locked(). Drivers that employ the shmem + * helpers should use it as their &drm_gem_object_funcs.vmap handler. * * Returns: * 0 on success or a negative error code on failure. @@ -229,7 +229,7 @@ static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - return drm_gem_shmem_vmap(shmem, map); + return drm_gem_shmem_vmap_locked(shmem, map); } /* @@ -237,15 +237,15 @@ static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, * @obj: GEM object * @map: Kernel virtual address where the SHMEM GEM object was mapped * - * This function wraps drm_gem_shmem_vunmap(). Drivers that employ the shmem helpers should - * use it as their &drm_gem_object_funcs.vunmap handler. + * This function wraps drm_gem_shmem_vunmap_locked(). Drivers that employ the shmem + * helpers should use it as their &drm_gem_object_funcs.vunmap handler. */ static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struct iosys_map *map) { struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - drm_gem_shmem_vunmap(shmem, map); + drm_gem_shmem_vunmap_locked(shmem, map); } /** -- cgit v1.2.3 From d586b535f1448a6969e2b664e4e061c40bec4679 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 23 Mar 2025 00:26:05 +0300 Subject: drm/shmem-helper: Add and use pages_pin_count Add separate pages_pin_count for tracking of whether drm-shmem pages are moveable or not. With the addition of memory shrinker support to drm-shmem, the pages_use_count will no longer determine whether pages are hard-pinned in memory, but whether pages exist and are soft-pinned (and could be swapped out). The pages_pin_count > 1 will hard-pin pages in memory. Acked-by: Maxime Ripard Reviewed-by: Boris Brezillon Suggested-by: Boris Brezillon Acked-by: Thomas Zimmermann Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20250322212608.40511-8-dmitry.osipenko@collabora.com --- include/drm/drm_gem_shmem_helper.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 0609e336479d..d411215fe494 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -39,6 +39,17 @@ struct drm_gem_shmem_object { */ unsigned int pages_use_count; + /** + * @pages_pin_count: + * + * Reference count on the pinned pages table. + * + * Pages are hard-pinned and reside in memory if count + * greater than zero. Otherwise, when count is zero, the pages are + * allowed to be evicted and purged by memory shrinker. + */ + refcount_t pages_pin_count; + /** * @madv: State for madvise * -- cgit v1.2.3 From 051b6646d36dc974c3884d75021bf282925b171c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 23 Mar 2025 00:26:06 +0300 Subject: drm/shmem-helper: Use refcount_t for pages_use_count Use atomic refcount_t helper for pages_use_count to optimize pin/unpin functions by skipping reservation locking while GEM's pin refcount > 1. Acked-by: Maxime Ripard Reviewed-by: Boris Brezillon Suggested-by: Boris Brezillon Acked-by: Thomas Zimmermann Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20250322212608.40511-9-dmitry.osipenko@collabora.com --- include/drm/drm_gem_shmem_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index d411215fe494..3a4be433d5f0 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -37,7 +37,7 @@ struct drm_gem_shmem_object { * Reference count on the pages table. * The pages are put when the count reaches zero. */ - unsigned int pages_use_count; + refcount_t pages_use_count; /** * @pages_pin_count: -- cgit v1.2.3 From 0271cc484f3f73d5c6f88d3e43a5d98e07c76381 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 23 Mar 2025 00:26:07 +0300 Subject: drm/shmem-helper: Switch drm_gem_shmem_vmap/vunmap to use pin/unpin The vmapped pages shall be pinned in memory and previously get/put_pages() were implicitly hard-pinning/unpinning the pages. This will no longer be the case with addition of memory shrinker because pages_use_count > 0 won't determine anymore whether pages are hard-pinned (they will be soft-pinned), while the new pages_pin_count will do the hard-pinning. Switch the vmap/vunmap() to use pin/unpin() functions in a preparation of addition of the memory shrinker support to drm-shmem. Acked-by: Maxime Ripard Reviewed-by: Boris Brezillon Acked-by: Thomas Zimmermann Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20250322212608.40511-10-dmitry.osipenko@collabora.com --- include/drm/drm_gem_shmem_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 3a4be433d5f0..8b9bba87ae63 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -130,7 +130,7 @@ int drm_gem_shmem_madvise_locked(struct drm_gem_shmem_object *shmem, int madv); static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem) { return (shmem->madv > 0) && - !shmem->vmap_use_count && shmem->sgt && + !refcount_read(&shmem->pages_pin_count) && shmem->sgt && !shmem->base.dma_buf && !drm_gem_is_imported(&shmem->base); } -- cgit v1.2.3 From e1fc39a923329c34c61dfed0665620d21826e8f4 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 23 Mar 2025 00:26:08 +0300 Subject: drm/shmem-helper: Use refcount_t for vmap_use_count Use refcount_t helper for vmap_use_count to make refcounting consistent with pages_use_count and pages_pin_count that use refcount_t. This also makes vmapping to benefit from the refcount_t's overflow checks. Acked-by: Maxime Ripard Reviewed-by: Boris Brezillon Suggested-by: Boris Brezillon Acked-by: Thomas Zimmermann Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20250322212608.40511-11-dmitry.osipenko@collabora.com --- include/drm/drm_gem_shmem_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 8b9bba87ae63..b4f993da3cae 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -82,7 +82,7 @@ struct drm_gem_shmem_object { * Reference count on the virtual address. * The address are un-mapped when the count reaches zero. */ - unsigned int vmap_use_count; + refcount_t vmap_use_count; /** * @pages_mark_dirty_on_put: -- cgit v1.2.3 From cca9734ebe55f6af11ce8d57ca1afdc4d158c808 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 25 Mar 2025 15:47:10 -0700 Subject: drm/xe/bmg: Add one additional PCI ID One additional BMG PCI ID has been added to the spec; make sure our driver recognizes devices with this ID properly. Bspec: 68090 Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Clint Taylor Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250325224709.4073080-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- include/drm/intel/pciids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 4736ea525048..d212848d07f3 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -850,6 +850,7 @@ MACRO__(0xE20C, ## __VA_ARGS__), \ MACRO__(0xE20D, ## __VA_ARGS__), \ MACRO__(0xE210, ## __VA_ARGS__), \ + MACRO__(0xE211, ## __VA_ARGS__), \ MACRO__(0xE212, ## __VA_ARGS__), \ MACRO__(0xE215, ## __VA_ARGS__), \ MACRO__(0xE216, ## __VA_ARGS__) -- cgit v1.2.3 From a05cea36dc9ef76b4c6651a25214b9e2b742d212 Mon Sep 17 00:00:00 2001 From: Saket Dumbre Date: Wed, 26 Mar 2025 21:07:06 +0100 Subject: ACPICA: New release 20240927 ACPICA commit 4632caf451c28da5355ab7131df8bef77818e0f4 Link: https://github.com/acpica/acpica/commit/4632caf4 Signed-off-by: Saket Dumbre Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/1918458.tdWV9SEqCh@rjwysocki.net --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 78b24b090488..25a071d305bd 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -12,7 +12,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20240827 +#define ACPI_CA_VERSION 0x20240927 #include #include -- cgit v1.2.3 From 5d2f7e76b70121fe06fd12315a6ea439e3bf0414 Mon Sep 17 00:00:00 2001 From: Zaid Alali Date: Wed, 26 Mar 2025 21:07:57 +0100 Subject: ACPICA: actbl1: Update values to hex to follow ACPI specs ACPICA commit 0b44ed75fb551bf3fbbbd39ca72bd932872fff20 ACPI specs(1) define Error Injection Actions in hex values. This commit intends to update values from decimal to hex to be consistent with ACPI specs. Link: https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html (1) Link: https://github.com/acpica/acpica/commit/0b44ed75 Signed-off-by: Zaid Alali Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/13729719.uLZWGnKmhe@rjwysocki.net --- include/acpi/actbl1.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 387fc821703a..1dc29933e2a3 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1024,17 +1024,17 @@ struct acpi_einj_entry { /* Values for Action field above */ enum acpi_einj_actions { - ACPI_EINJ_BEGIN_OPERATION = 0, - ACPI_EINJ_GET_TRIGGER_TABLE = 1, - ACPI_EINJ_SET_ERROR_TYPE = 2, - ACPI_EINJ_GET_ERROR_TYPE = 3, - ACPI_EINJ_END_OPERATION = 4, - ACPI_EINJ_EXECUTE_OPERATION = 5, - ACPI_EINJ_CHECK_BUSY_STATUS = 6, - ACPI_EINJ_GET_COMMAND_STATUS = 7, - ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 8, - ACPI_EINJ_GET_EXECUTE_TIMINGS = 9, - ACPI_EINJ_ACTION_RESERVED = 10, /* 10 and greater are reserved */ + ACPI_EINJ_BEGIN_OPERATION = 0x0, + ACPI_EINJ_GET_TRIGGER_TABLE = 0x1, + ACPI_EINJ_SET_ERROR_TYPE = 0x2, + ACPI_EINJ_GET_ERROR_TYPE = 0x3, + ACPI_EINJ_END_OPERATION = 0x4, + ACPI_EINJ_EXECUTE_OPERATION = 0x5, + ACPI_EINJ_CHECK_BUSY_STATUS = 0x6, + ACPI_EINJ_GET_COMMAND_STATUS = 0x7, + ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 0x8, + ACPI_EINJ_GET_EXECUTE_TIMINGS = 0x9, + ACPI_EINJ_ACTION_RESERVED = 0xA, /* 10 and greater are reserved */ ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */ }; -- cgit v1.2.3 From 56b594fdb6dd68d085067a69f5f78171aa6f1657 Mon Sep 17 00:00:00 2001 From: Zaid Alali Date: Wed, 26 Mar 2025 21:08:46 +0100 Subject: ACPICA: actbl1: Add EINJv2 get error type action ACPICA commit 6975cd07e20ba955556e1eafe8a326834c354ae6 Add EINJV2_GET_ERROR_TYPE as defined in the new specs(1)(2). Link: https://bugzilla.tianocore.org/show_bug.cgi?id=4615 (1) Link: https://bugzilla.tianocore.org/attachment.cgi?id=1446 (2) Link: https://github.com/acpica/acpica/commit/6975cd07 Signed-off-by: Zaid Alali Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3344273.aeNJFYEL58@rjwysocki.net --- include/acpi/actbl1.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 1dc29933e2a3..329454c303b9 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1034,7 +1034,8 @@ enum acpi_einj_actions { ACPI_EINJ_GET_COMMAND_STATUS = 0x7, ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 0x8, ACPI_EINJ_GET_EXECUTE_TIMINGS = 0x9, - ACPI_EINJ_ACTION_RESERVED = 0xA, /* 10 and greater are reserved */ + ACPI_EINJV2_GET_ERROR_TYPE = 0x11, + ACPI_EINJ_ACTION_RESERVED = 0x12, /* 0x12 and greater are reserved */ ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */ }; -- cgit v1.2.3 From 6ff5c32881bf7681523d9f6cd619baec8cc82927 Mon Sep 17 00:00:00 2001 From: Adam Lackorzynski Date: Wed, 26 Mar 2025 21:09:53 +0100 Subject: ACPICA: Fix typo in comments for SRAT structures ACPICA commit 218b5b3654b355e7481cbee8209f5212201b1196 Link: https://github.com/acpica/acpica/commit/218b5b36 Signed-off-by: Adam Lackorzynski Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/8507690.T7Z3S40VBb@rjwysocki.net --- include/acpi/actbl3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h index 5cd755143b7d..14082dd445f2 100644 --- a/include/acpi/actbl3.h +++ b/include/acpi/actbl3.h @@ -269,7 +269,7 @@ struct acpi_srat_gicc_affinity { #define ACPI_SRAT_GICC_ENABLED (1) /* 00: Use affinity structure */ -/* 4: GCC ITS Affinity (ACPI 6.2) */ +/* 4: GIC ITS Affinity (ACPI 6.2) */ struct acpi_srat_gic_its_affinity { struct acpi_subtable_header header; -- cgit v1.2.3 From 003802c3a732301ff9edd99fe410ee08efe5f157 Mon Sep 17 00:00:00 2001 From: Saket Dumbre Date: Wed, 26 Mar 2025 21:11:33 +0100 Subject: ACPICA: Logfile: Changes for version 20241212 ACPICA commit 7dae72155bf06b0edda9f3aea713da1d48c1c418 Link: https://github.com/acpica/acpica/commit/7dae7215 Signed-off-by: Saket Dumbre Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/22641776.EfDdHjke4D@rjwysocki.net --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 25a071d305bd..adf315e25d8d 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -12,7 +12,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20240927 +#define ACPI_CA_VERSION 0x20241212 #include #include -- cgit v1.2.3 From d499effe1d55552ac701e90fbd17ca418b2189df Mon Sep 17 00:00:00 2001 From: Aaron Ruby Date: Thu, 27 Mar 2025 15:25:49 +0000 Subject: drm/virtio: Add capset definitions to UAPI Since the context-type additions to the virtio-gpu spec, these have been defined locally in guest user-space, and virtio-gpu backend library code. Now, these capsets have been stabilized, and should be defined in a common space, in both the virtio_gpu header, and alongside the virtgpu_drm interface that they apply to. Reviewed-by: Gurchetan Singh Signed-off-by: Aaron Ruby Signed-off-by: Dmitry Osipenko [dmitry.osipenko@collabora.com: edit commit title] Link: https://patchwork.freedesktop.org/patch/msgid/YT3PR01MB5857E808EDF6949F2DF517FDAFA12@YT3PR01MB5857.CANPRD01.PROD.OUTLOOK.COM --- include/uapi/drm/virtgpu_drm.h | 6 ++++++ include/uapi/linux/virtio_gpu.h | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index c2ce71987e9b..9debb320c34b 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -163,6 +163,12 @@ struct drm_virtgpu_3d_wait { __u32 flags; }; +#define VIRTGPU_DRM_CAPSET_VIRGL 1 +#define VIRTGPU_DRM_CAPSET_VIRGL2 2 +#define VIRTGPU_DRM_CAPSET_GFXSTREAM_VULKAN 3 +#define VIRTGPU_DRM_CAPSET_VENUS 4 +#define VIRTGPU_DRM_CAPSET_CROSS_DOMAIN 5 +#define VIRTGPU_DRM_CAPSET_DRM 6 struct drm_virtgpu_get_caps { __u32 cap_set_id; __u32 cap_set_ver; diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index bf2c9cabd207..be109777d10d 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -309,8 +309,9 @@ struct virtio_gpu_cmd_submit { #define VIRTIO_GPU_CAPSET_VIRGL 1 #define VIRTIO_GPU_CAPSET_VIRGL2 2 -/* 3 is reserved for gfxstream */ +#define VIRTIO_GPU_CAPSET_GFXSTREAM_VULKAN 3 #define VIRTIO_GPU_CAPSET_VENUS 4 +#define VIRTIO_GPU_CAPSET_CROSS_DOMAIN 5 #define VIRTIO_GPU_CAPSET_DRM 6 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ -- cgit v1.2.3 From e83967c355d6194c906e7bb3f1c72cb002e14c9d Mon Sep 17 00:00:00 2001 From: Aradhya Bhatia Date: Sat, 29 Mar 2025 17:09:18 +0530 Subject: drm/mipi-dsi: Add helper to find input format Add a helper API that can be used by the DSI hosts to find the required input bus format for the given output dsi pixel format. Reviewed-by: Dmitry Baryshkov Reviewed-by: Tomi Valkeinen Tested-by: Tomi Valkeinen Signed-off-by: Aradhya Bhatia Signed-off-by: Aradhya Bhatia Link: https://lore.kernel.org/r/20250329113925.68204-8-aradhya.bhatia@linux.dev Signed-off-by: Dmitry Baryshkov --- include/drm/drm_mipi_dsi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index bd40a443385c..d6796ce9f5ff 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -293,6 +293,7 @@ void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx, const void *payload, size_t size); ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params, size_t num_params, void *data, size_t size); +u32 drm_mipi_dsi_get_input_bus_fmt(enum mipi_dsi_pixel_format dsi_format); #define mipi_dsi_msleep(ctx, delay) \ do { \ -- cgit v1.2.3 From ed9c594d495deb676825489032cee0a03ce29bf9 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Mon, 31 Mar 2025 11:15:25 -0400 Subject: drm/panel: Add new helpers for refcounted panel allocatons Introduce reference counted allocations for panels to avoid use-after-free. The patch adds the macro devm_drm_bridge_alloc() to allocate a new refcounted panel. Followed the documentation for drmm_encoder_alloc() and devm_drm_dev_alloc and other similar implementations for this purpose. Reviewed-by: Maxime Ripard Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250331-b4-panel-refcounting-v4-1-dad50c60c6c9@redhat.com Signed-off-by: Maxime Ripard --- include/drm/drm_panel.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/drm/drm_panel.h b/include/drm/drm_panel.h index a9c042c8dea1..415e85e8b76a 100644 --- a/include/drm/drm_panel.h +++ b/include/drm/drm_panel.h @@ -268,6 +268,28 @@ struct drm_panel { bool enabled; }; +void *__devm_drm_panel_alloc(struct device *dev, size_t size, size_t offset, + const struct drm_panel_funcs *funcs, + int connector_type); + +/** + * devm_drm_panel_alloc - Allocate and initialize a refcounted panel. + * + * @dev: struct device of the panel device + * @type: the type of the struct which contains struct &drm_panel + * @member: the name of the &drm_panel within @type + * @funcs: callbacks for this panel + * @connector_type: the connector type (DRM_MODE_CONNECTOR_*) corresponding to + * the panel interface + * + * Returns: + * Pointer to container structure embedding the panel, ERR_PTR on failure. + */ +#define devm_drm_panel_alloc(dev, type, member, funcs, connector_type) \ + ((type *)__devm_drm_panel_alloc(dev, sizeof(type), \ + offsetof(type, member), funcs, \ + connector_type)) + void drm_panel_init(struct drm_panel *panel, struct device *dev, const struct drm_panel_funcs *funcs, int connector_type); -- cgit v1.2.3 From dcba396f69073d4ea8c04374fdd207e67e5b5a94 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Mon, 31 Mar 2025 11:15:26 -0400 Subject: drm/panel: Add refcount support Allocate panel via reference counting. Add _get() and _put() helper functions to ensure panel allocations are refcounted. Avoid use after free by ensuring panel pointer is valid and can be usable till the last reference is put. Reviewed-by: Luca Ceresoli Reviewed-by: Maxime Ripard Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250331-b4-panel-refcounting-v4-2-dad50c60c6c9@redhat.com Signed-off-by: Maxime Ripard --- include/drm/drm_panel.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/drm/drm_panel.h b/include/drm/drm_panel.h index 415e85e8b76a..31d84f901c51 100644 --- a/include/drm/drm_panel.h +++ b/include/drm/drm_panel.h @@ -28,6 +28,7 @@ #include #include #include +#include struct backlight_device; struct dentry; @@ -266,6 +267,17 @@ struct drm_panel { * If true then the panel has been enabled. */ bool enabled; + + /** + * @container: Pointer to the private driver struct embedding this + * @struct drm_panel. + */ + void *container; + + /** + * @refcount: reference count of users referencing this panel. + */ + struct kref refcount; }; void *__devm_drm_panel_alloc(struct device *dev, size_t size, size_t offset, @@ -282,6 +294,10 @@ void *__devm_drm_panel_alloc(struct device *dev, size_t size, size_t offset, * @connector_type: the connector type (DRM_MODE_CONNECTOR_*) corresponding to * the panel interface * + * The reference count of the returned panel is initialized to 1. This + * reference will be automatically dropped via devm (by calling + * drm_panel_put()) when @dev is removed. + * * Returns: * Pointer to container structure embedding the panel, ERR_PTR on failure. */ @@ -294,6 +310,9 @@ void drm_panel_init(struct drm_panel *panel, struct device *dev, const struct drm_panel_funcs *funcs, int connector_type); +struct drm_panel *drm_panel_get(struct drm_panel *panel); +void drm_panel_put(struct drm_panel *panel); + void drm_panel_add(struct drm_panel *panel); void drm_panel_remove(struct drm_panel *panel); -- cgit v1.2.3 From c2d3a730069545f2cc2d644bfa8b1482e6388826 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 1 Apr 2025 08:57:58 -0700 Subject: drm/syncobj: Extend EXPORT_SYNC_FILE for timeline syncobjs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for exporting a dma_fence fd for a specific point on a timeline. This is needed for vtest/vpipe[1][2] to implement timeline syncobj support, as it needs a way to turn a point on a timeline back into a dma_fence fd. It also closes an odd omission from the syncobj UAPI. [1] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33433 [2] https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/805 v2: Add DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE v3: Add unstaged uabi header hunk v4: Also handle IMPORT_SYNC_FILE case v5: Address comments from Dmitry v6: checkpatch.pl nits Signed-off-by: Rob Clark Reviewed-by: Christian König Reviewed-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20250401155758.48855-1-robdclark@gmail.com Signed-off-by: Christian König --- include/uapi/drm/drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 7fba37b94401..e63a71d3c607 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -905,13 +905,17 @@ struct drm_syncobj_destroy { }; #define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1) #define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1) struct drm_syncobj_handle { __u32 handle; __u32 flags; __s32 fd; __u32 pad; + + __u64 point; }; struct drm_syncobj_transfer { -- cgit v1.2.3 From 78a0323506f01e8017a5826cd7e91951c13184fa Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Thu, 27 Mar 2025 23:46:22 +0000 Subject: x86/nmi: Consolidate NMI panic variables Commit: c305a4e98378 ("x86: Move sysctls into arch/x86") recently moved the sysctl handling of panic_on_unrecovered_nmi and panic_on_io_nmi to x86-specific code. These variables no longer need to be declared in the generic header file. Relocate the variable definitions and declarations closer to where they are used. This makes all the NMI panic options consistent and easier to track. [ mingo: Fixed up the SHA1 of the commit reference. ] Signed-off-by: Sohil Mehta Signed-off-by: Ingo Molnar Reviewed-by: Kai Huang Acked-by: Peter Zijlstra (Intel) Reviewed-by: Nikolay Borisov Cc: Joel Granados Link: https://lore.kernel.org/r/20250327234629.3953536-3-sohil.mehta@intel.com --- include/linux/panic.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/panic.h b/include/linux/panic.h index 2494d51707ef..4adc65766935 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -20,8 +20,6 @@ extern bool panic_triggering_all_cpu_backtrace; extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; -extern int panic_on_unrecovered_nmi; -extern int panic_on_io_nmi; extern int panic_on_warn; extern unsigned long panic_on_taint; -- cgit v1.2.3 From 8f0d3618c6b31b1f8ff0c6c08cf6b101e56d6f8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 13 Mar 2025 16:08:31 +0200 Subject: agp/intel-gtt: Add intel_gmch_gtt_read_entry() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915 wants to read out the PTE(s) populated by the BIOS/GOP to verify that the framebuffer is in the correct location. Introduce intel_gmch_gtt_read_entry() that reads out the PTE and decodes it to a somewhat abstract form. For now we just return the dma_addr, present bit, and local memory bit. I didn't bother with the snoop bit/etc. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250313140838.29742-4-ville.syrjala@linux.intel.com Reviewed-by: Jouni Högander --- include/drm/intel/intel-gtt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/intel/intel-gtt.h b/include/drm/intel/intel-gtt.h index cb0d5b7200c7..f53bcff01f22 100644 --- a/include/drm/intel/intel-gtt.h +++ b/include/drm/intel/intel-gtt.h @@ -28,6 +28,8 @@ void intel_gmch_gtt_insert_sg_entries(struct sg_table *st, unsigned int pg_start, unsigned int flags); void intel_gmch_gtt_clear_range(unsigned int first_entry, unsigned int num_entries); +dma_addr_t intel_gmch_gtt_read_entry(unsigned int pg, + bool *is_present, bool *is_local); /* Special gtt memory types */ #define AGP_DCACHE_MEMORY 1 -- cgit v1.2.3 From 09cdda7a60f45784cebddf1fa2109d6279f9890b Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Tue, 18 Mar 2025 14:34:35 +0800 Subject: drm/dp: Pull drm_dp_link_power_up/down from Tegra to common drm_dp_helper The helper functions drm_dp_link_power_up/down were moved to Tegra DRM in commit 9a42c7c647a9 ("drm/tegra: Move drm_dp_link helpers to Tegra DRM")". Now since more and more users are duplicating the same code in their own drivers, it's time to make them as DRM DP common helpers again. Signed-off-by: Andy Yan Acked-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250318063452.4983-1-andyshrk@163.com Signed-off-by: Dmitry Baryshkov --- include/drm/display/drm_dp_helper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 21e22289d1ca..d9614e2c8939 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -654,6 +654,8 @@ int drm_dp_dpcd_read_link_status(struct drm_dp_aux *aux, int drm_dp_dpcd_read_phy_link_status(struct drm_dp_aux *aux, enum drm_dp_phy dp_phy, u8 link_status[DP_LINK_STATUS_SIZE]); +int drm_dp_link_power_up(struct drm_dp_aux *aux, unsigned char revision); +int drm_dp_link_power_down(struct drm_dp_aux *aux, unsigned char revision); int drm_dp_dpcd_write_payload(struct drm_dp_aux *aux, int vcpid, u8 start_time_slot, u8 time_slot_count); -- cgit v1.2.3 From 4190aa3a76a6f840ace7d5e30719f85af0ed6302 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 31 Mar 2025 18:23:54 -0400 Subject: drm/edid: Use unsigned int in drm_add_modes_noedid() A negative resolution doesn't really make any sense, so let's make these parameters unsigned. In C this doesn't make much of a difference, but Rust is stricter about signed/unsigned casts and additionally can check for arithmetic over/underflows if CONFIG_RUST_OVERFLOW_CHECKS is enabled. Signed-off-by: Lyude Paul Reviewed-by: Thomas Zimmermann Cc: Greg Kroah-Hartman Cc: Maxime Ripard Cc: Thomas Zimmermann Link: https://lore.kernel.org/r/20250331222556.454334-2-lyude@redhat.com --- include/drm/drm_edid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index eaac5e665892..b38409670868 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -437,7 +437,7 @@ bool drm_detect_monitor_audio(const struct edid *edid); enum hdmi_quantization_range drm_default_rgb_quant_range(const struct drm_display_mode *mode); int drm_add_modes_noedid(struct drm_connector *connector, - int hdisplay, int vdisplay); + unsigned int hdisplay, unsigned int vdisplay); int drm_edid_header_is_valid(const void *edid); bool drm_edid_is_valid(struct edid *edid); -- cgit v1.2.3 From 8abaa80b403e1fc40877eb9b339ae0ad07e8968f Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 31 Mar 2025 18:23:55 -0400 Subject: drm/mode_config: Make drm_mode_config.(max|min)_(width|height) unsigned It doesn't make much sense to allow devices to specify their min/max resolution as signed integers, and in Rust with CONFIG_RUST_OVERFLOW_CHECKS enabled this provides us actual over/underflow checks. Similarly, it doesn't really make much sense for us to allow devices to specify their minimum/maximum resolution as signed. Signed-off-by: Lyude Paul Reviewed-by: Maxime Ripard Reviewed-by: Thomas Zimmermann Cc: Greg Kroah-Hartman Cc: Thomas Zimmermann Link: https://lore.kernel.org/r/20250331222556.454334-3-lyude@redhat.com --- include/drm/drm_mode_config.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 271765e2e9f2..4b8f0370b79b 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -532,8 +532,8 @@ struct drm_mode_config { */ struct list_head privobj_list; - int min_width, min_height; - int max_width, max_height; + unsigned int min_width, min_height; + unsigned int max_width, max_height; const struct drm_mode_config_funcs *funcs; /* output poll support */ -- cgit v1.2.3 From 3e816361e94a0e79b1aabf44abec552e9698b196 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 2 Apr 2025 11:09:25 -0700 Subject: sched/tracepoints: Move and extend the sched_process_exit() tracepoint It is useful to be able to access current->mm at task exit to, say, record a bunch of VMA information right before the task exits (e.g., for stack symbolization reasons when dealing with short-lived processes that exit in the middle of profiling session). Currently, trace_sched_process_exit() is triggered after exit_mm() which resets current->mm to NULL making this tracepoint unsuitable for inspecting and recording task's mm_struct-related data when tracing process lifetimes. There is a particularly suitable place, though, right after taskstats_exit() is called, but before we do exit_mm() and other exit_*() resource teardowns. taskstats performs a similar kind of accounting that some applications do with BPF, and so co-locating them seems like a good fit. So that's where trace_sched_process_exit() is moved with this patch. Also, existing trace_sched_process_exit() tracepoint is notoriously missing `group_dead` flag that is certainly useful in practice and some of our production applications have to work around this. So plumb `group_dead` through while at it, to have a richer and more complete tracepoint. Note that we can't use sched_process_template anymore, and so we use TRACE_EVENT()-based tracepoint definition. But all the field names and order, as well as assign and output logic remain intact. We just add one extra field at the end in backwards-compatible way. Document the dependency to sched_process_template anyway. Signed-off-by: Andrii Nakryiko Signed-off-by: Ingo Molnar Acked-by: Steven Rostedt (Google) Acked-by: Oleg Nesterov Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250402180925.90914-1-andrii@kernel.org --- include/trace/events/sched.h | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8994e97d86c1..3bec9fb73a36 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -326,11 +326,37 @@ DEFINE_EVENT(sched_process_template, sched_process_free, TP_ARGS(p)); /* - * Tracepoint for a task exiting: + * Tracepoint for a task exiting. + * Note, it's a superset of sched_process_template and should be kept + * compatible as much as possible. sched_process_exits has an extra + * `group_dead` argument, so sched_process_template can't be used, + * unfortunately, just like sched_migrate_task above. */ -DEFINE_EVENT(sched_process_template, sched_process_exit, - TP_PROTO(struct task_struct *p), - TP_ARGS(p)); +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p, bool group_dead), + + TP_ARGS(p, group_dead), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( bool, group_dead ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ + __entry->group_dead = group_dead; + ), + + TP_printk("comm=%s pid=%d prio=%d group_dead=%s", + __entry->comm, __entry->pid, __entry->prio, + __entry->group_dead ? "true" : "false" + ) +); /* * Tracepoint for waiting on task to unschedule: -- cgit v1.2.3 From 642989287350fa4964c37df0f3769094072421c3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 17:59:59 +0100 Subject: firmware: turris-mox-rwtm: fix building without CONFIG_KEYS "struct key" is defined conditionally, so the code referencing it must be made conditional as well: In file included from drivers/firmware/turris-mox-rwtm.c:29: include/linux/turris-signing-key.h: In function 'turris_signing_key_get_dev': include/linux/turris-signing-key.h:26:19: error: invalid use of undefined type 'const struct key' 26 | return key->payload.data[1]; | ^~ Signed-off-by: Arnd Bergmann --- include/linux/turris-signing-key.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/turris-signing-key.h b/include/linux/turris-signing-key.h index 032ca8cbf636..8a435b73c3a9 100644 --- a/include/linux/turris-signing-key.h +++ b/include/linux/turris-signing-key.h @@ -11,6 +11,7 @@ struct device; +#ifdef CONFIG_KEYS struct turris_signing_key_subtype { u16 key_size; u8 data_size; @@ -29,5 +30,6 @@ static inline struct device *turris_signing_key_get_dev(const struct key *key) int devm_turris_signing_key_create(struct device *dev, const struct turris_signing_key_subtype *subtype, const char *desc); +#endif #endif /* __TURRIS_SIGNING_KEY_H */ -- cgit v1.2.3 From 62aa5790cec89108a23052cc2f00fdd31f9adbac Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Mon, 31 Mar 2025 20:36:17 +0000 Subject: bpf: Fix a comment describing bpf_attr The map_fd field of the bpf_attr union is used in the BPF_MAP_FREEZE syscall. Explicitly mention this in the comments. Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250331203618.1973691-2-a.s.protopopov@gmail.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28705ae67784..07ee73cdf97b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1506,7 +1506,7 @@ union bpf_attr { __s32 map_token_fd; }; - struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ __u32 map_fd; __aligned_u64 key; union { -- cgit v1.2.3 From f6e9a26e2d488c743757d66898ae91c53ffbe528 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Thu, 3 Apr 2025 18:10:46 -0700 Subject: cgroup: move rstat base stat objects into their own struct This non-functional change serves as preparation for moving to subsystem-based rstat trees. The base stats are not an actual subsystem, but in future commits they will have exclusive rstat trees just as other subsystems will. Moving the base stat objects into a new struct allows the cgroup_rstat_cpu struct to become more compact since it now only contains the minimum amount of pointers needed for rstat participation. Subsystems will (in future commits) make use of the compact cgroup_rstat_cpu struct while avoiding the memory overhead of the base stat objects which they will not use. An instance of the new struct cgroup_rstat_base_cpu was placed on the cgroup struct so it can retain ownership of these base stats common to all cgroups. A helper function was added for looking up the cpu-specific base stats of a given cgroup. Finally, initialization and variable names were adjusted where applicable. Signed-off-by: JP Kobryn Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 5bc8f55c8cca..1bf2e8db6dac 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -344,10 +344,29 @@ struct cgroup_base_stat { * frequency decreases the cost of each read. * * This struct hosts both the fields which implement the above - - * updated_children and updated_next - and the fields which track basic - * resource statistics on top of it - bsync, bstat and last_bstat. + * updated_children and updated_next. */ struct cgroup_rstat_cpu { + /* + * Child cgroups with stat updates on this cpu since the last read + * are linked on the parent's ->updated_children through + * ->updated_next. + * + * In addition to being more compact, singly-linked list pointing + * to the cgroup makes it unnecessary for each per-cpu struct to + * point back to the associated cgroup. + * + * Protected by per-cpu cgroup_rstat_cpu_lock. + */ + struct cgroup *updated_children; /* terminated by self cgroup */ + struct cgroup *updated_next; /* NULL iff not on the list */ +}; + +/* + * This struct hosts the fields which track basic resource statistics on + * top of it - bsync, bstat and last_bstat. + */ +struct cgroup_rstat_base_cpu { /* * ->bsync protects ->bstat. These are the only fields which get * updated in the hot path. @@ -374,20 +393,6 @@ struct cgroup_rstat_cpu { * deltas to propagate to the per-cpu subtree_bstat. */ struct cgroup_base_stat last_subtree_bstat; - - /* - * Child cgroups with stat updates on this cpu since the last read - * are linked on the parent's ->updated_children through - * ->updated_next. - * - * In addition to being more compact, singly-linked list pointing - * to the cgroup makes it unnecessary for each per-cpu struct to - * point back to the associated cgroup. - * - * Protected by per-cpu cgroup_rstat_cpu_lock. - */ - struct cgroup *updated_children; /* terminated by self cgroup */ - struct cgroup *updated_next; /* NULL iff not on the list */ }; struct cgroup_freezer_state { @@ -518,6 +523,7 @@ struct cgroup { /* per-cpu recursive resource statistics */ struct cgroup_rstat_cpu __percpu *rstat_cpu; + struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu; struct list_head rstat_css_list; /* -- cgit v1.2.3 From 845a7245801142bfff411bc84afa8cdbc789562f Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Thu, 3 Apr 2025 18:10:47 -0700 Subject: cgroup: add helper for checking when css is cgroup::self The cgroup struct has a css field called "self". The main difference between this css and the others found in the cgroup::subsys array is that cgroup::self has a NULL subsystem pointer. There are several places where checks are performed to determine whether the css in question is cgroup::self or not. Instead of accessing css->ss directly, introduce a helper function that shows the intent and use where applicable. Signed-off-by: JP Kobryn Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e7da3c3b098b..65d95bb2199f 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -347,6 +347,11 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css) return css->flags & CSS_DYING; } +static inline bool css_is_cgroup(struct cgroup_subsys_state *css) +{ + return css->ss == NULL; +} + static inline void cgroup_get(struct cgroup *cgrp) { css_get(&cgrp->self); -- cgit v1.2.3 From a97915559f5c5ff1972d678b94fd460c72a3b5f2 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Thu, 3 Apr 2025 18:10:48 -0700 Subject: cgroup: change rstat function signatures from cgroup-based to css-based This non-functional change serves as preparation for moving to subsystem-based rstat trees. To simplify future commits, change the signatures of existing cgroup-based rstat functions to become css-based and rename them to reflect that. Though the signatures have changed, the implementations have not. Within these functions use the css->cgroup pointer to obtain the associated cgroup and allow code to function the same just as it did before this patch. At applicable call sites, pass the subsystem-specific css pointer as an argument or pass a pointer to cgroup::self if not in subsystem context. Note that cgroup_rstat_updated_list() and cgroup_rstat_push_children() are not altered yet since there would be a larger amount of css to cgroup conversions which may overcomplicate the code at this intermediate phase. Signed-off-by: JP Kobryn Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- include/linux/cgroup.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1bf2e8db6dac..e58bfb880111 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -536,7 +536,7 @@ struct cgroup { /* * A singly-linked list of cgroup structures to be rstat flushed. * This is a scratch field to be used exclusively by - * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock. + * css_rstat_flush_locked() and protected by cgroup_rstat_lock. */ struct cgroup *rstat_flush_next; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 65d95bb2199f..1f5b0a4a3356 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -693,8 +693,8 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) /* * cgroup scalable recursive statistics. */ -void cgroup_rstat_updated(struct cgroup *cgrp, int cpu); -void cgroup_rstat_flush(struct cgroup *cgrp); +void css_rstat_updated(struct cgroup_subsys_state *css, int cpu); +void css_rstat_flush(struct cgroup_subsys_state *css); /* * Basic resource stats. -- cgit v1.2.3 From a3375522bb5e28285cb1845ad5601bf4a581da04 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 4 Apr 2025 12:16:21 +0200 Subject: ASoC: core: Complete support for card rebinding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit e894efef9ac7 ("ASoC: core: add support to card rebind") there is a support for card rebind. The support is only partial though. Let's consider the following scenarios both of which aim to enumerate a sound card: 1) snd_soc_add_component(comp1); (...) snd_soc_register_card(card1); 2) snd_soc_register_card(card1); (...) snd_soc_add_component(comp1); For the sake of simplicity, let comp1 be the last dependency needed for the card1 to enumerate. Case 1) will end up succeeding whereas 2) is a certain fail - snd_soc_bind_card() does not honor unbind_card_list so even a non-fatal return code of EPROBE_DEFER will cause the card to collapse. Given the typical usecase of platform_device serving as a card->dev and its probe() ending with: int carddev_probe(struct platform_device *pdev) { (...) return devm_snd_soc_register_card(dev, card); } failure to register card triggers device_unbind_cleanup() - really_probe() in dd.c. To allow for card registration to be deferred while being friendly towards existing users of devm_snd_soc_register_card(), add new card->devres_dev field, and devm_xxx() variants for card registration: devm_snd_soc_register_deferrable_card() (external) devm_snd_soc_bind_card() (internal) In essence, if requested, devm_snd_soc_bind_card() replaces snd_soc_bind_card(). The rebind procedure takes care of destroying old devres before attempting the new bind. This makes sure nothing is left hanging if binding fails and card becomes unbound but is still registered to the ASoC framework. To allow snd_soc_bind_card() to be reused by the deferrable friends, move 'client_mutex' locking to the function's callers and select between devm_xxx and non-devm_xxx variants of snd_soc_bind_card() based on card->devres_dev. On top of the feature, the refactoring brings two benefits: a) single lock/unlock of 'client_mutex' in snd_soc_add_component() instead of ambiguous unlock and immediate lock in snd_soc_try_rebind_card() b) all unbind_card_list manipulations done under 'client_mutex' Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20250404101622.3673850-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/sound/soc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 952ed77b8c87..484d8b3a34f3 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -423,6 +423,7 @@ enum snd_soc_pcm_subclass { int snd_soc_register_card(struct snd_soc_card *card); void snd_soc_unregister_card(struct snd_soc_card *card); int devm_snd_soc_register_card(struct device *dev, struct snd_soc_card *card); +int devm_snd_soc_register_deferrable_card(struct device *dev, struct snd_soc_card *card); #ifdef CONFIG_PM_SLEEP int snd_soc_suspend(struct device *dev); int snd_soc_resume(struct device *dev); @@ -1087,6 +1088,7 @@ struct snd_soc_card { unsigned int fully_routed:1; unsigned int probed:1; unsigned int component_chaining:1; + struct device *devres_dev; void *drvdata; }; -- cgit v1.2.3 From dd8a9807fa03666bff52cb28472fb227eaac36c9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 31 Mar 2025 13:35:59 +0300 Subject: spi: Group CS related fields in struct spi_device The CS related fields are sparse in the struct spi_device. Group them. While at it, fix the comment style of cs_index_mask. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20250331103609.4160281-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 0ba5e49bace4..e10f0ebb8250 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -136,13 +136,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * @max_speed_hz: Maximum clock rate to be used with this chip * (on this board); may be changed by the device's driver. * The spi_transfer.speed_hz can override this for each transfer. - * @chip_select: Array of physical chipselect, spi->chipselect[i] gives - * the corresponding physical CS for logical CS i. - * @mode: The spi mode defines how data is clocked out and in. - * This may be changed by the device's driver. - * The "active low" default for chipselect mode can be overridden - * (by specifying SPI_CS_HIGH) as can the "MSB first" default for - * each word in a transfer (by specifying SPI_LSB_FIRST). * @bits_per_word: Data transfers involve one or more words; word sizes * like eight or 12 bits are common. In-memory wordsizes are * powers of two bytes (e.g. 20 bit samples use 32 bits). @@ -150,6 +143,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * default (0) indicating protocol words are eight bit bytes. * The spi_transfer.bits_per_word can override this for each transfer. * @rt: Make the pump thread real time priority. + * @mode: The spi mode defines how data is clocked out and in. + * This may be changed by the device's driver. + * The "active low" default for chipselect mode can be overridden + * (by specifying SPI_CS_HIGH) as can the "MSB first" default for + * each word in a transfer (by specifying SPI_LSB_FIRST). * @irq: Negative, or the number passed to request_irq() to receive * interrupts from this device. * @controller_state: Controller's runtime state @@ -162,8 +160,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * the device will bind to the named driver and only the named driver. * Do not set directly, because core frees it; use driver_set_override() to * set or clear it. - * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines - * (optional, NULL when not using a GPIO line) + * @pcpu_statistics: statistics for the spi_device * @word_delay: delay to be inserted between consecutive * words of a transfer * @cs_setup: delay to be introduced by the controller after CS is asserted @@ -171,8 +168,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * @cs_inactive: delay to be introduced by the controller after CS is * deasserted. If @cs_change_delay is used from @spi_transfer, then the * two delays will be added up. - * @pcpu_statistics: statistics for the spi_device + * @chip_select: Array of physical chipselect, spi->chipselect[i] gives + * the corresponding physical CS for logical CS i. * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array + * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines + * (optional, NULL when not using a GPIO line) * * A @spi_device is used to interchange data between an SPI target device * (usually a discrete chip) and CPU memory. @@ -187,7 +187,6 @@ struct spi_device { struct device dev; struct spi_controller *controller; u32 max_speed_hz; - u8 chip_select[SPI_CS_CNT_MAX]; u8 bits_per_word; bool rt; #define SPI_NO_TX BIT(31) /* No transmit wire */ @@ -218,23 +217,29 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */ + + /* The statistics */ + struct spi_statistics __percpu *pcpu_statistics; + struct spi_delay word_delay; /* Inter-word delay */ + /* CS delays */ struct spi_delay cs_setup; struct spi_delay cs_hold; struct spi_delay cs_inactive; - /* The statistics */ - struct spi_statistics __percpu *pcpu_statistics; + u8 chip_select[SPI_CS_CNT_MAX]; - /* Bit mask of the chipselect(s) that the driver need to use from - * the chipselect array.When the controller is capable to handle + /* + * Bit mask of the chipselect(s) that the driver need to use from + * the chipselect array. When the controller is capable to handle * multiple chip selects & memories are connected in parallel * then more than one bit need to be set in cs_index_mask. */ u32 cs_index_mask : SPI_CS_CNT_MAX; + struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */ + /* * Likely need more hooks for more protocol options affecting how * the controller talks to each chip, like: -- cgit v1.2.3 From 3860642e0a87c8aef3c4285f3bb4ad473cfd514f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 17 Mar 2025 16:33:54 +0800 Subject: crypto: api - Move alg destroy work from instance to template Commit 9ae4577bc077 ("crypto: api - Use work queue in crypto_destroy_instance") introduced a work struct to free an instance after the last user goes away. Move the delayed work from the instance into its template so that when the template is unregistered it can ensure that all its instances have been freed before returning. Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 6e07bbc04089..03b7eca8af9a 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -68,16 +68,17 @@ struct crypto_instance { struct crypto_spawn *spawns; }; - struct work_struct free_work; - void *__ctx[] CRYPTO_MINALIGN_ATTR; }; struct crypto_template { struct list_head list; struct hlist_head instances; + struct hlist_head dead; struct module *module; + struct work_struct free_work; + int (*create)(struct crypto_template *tmpl, struct rtattr **tb); char name[CRYPTO_MAX_ALG_NAME]; -- cgit v1.2.3 From c47e1f4142a3823b6e963e14db295a8a733804b5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 17 Mar 2025 16:33:59 +0800 Subject: crypto: scomp - Allocate per-cpu buffer on first use of each CPU Per-cpu buffers can be wasteful when the number of CPUs is large, especially if the buffer itself is likely to never be used. Reduce such wastage by only allocating them on first use of a particular CPU. On start-up allocate a single buffer on the first possible CPU. For every other CPU a work struct will be scheduled on first use to allocate the buffer for that CPU. Until the allocation succeeds simply use the first CPU's buffer which is protected under a spin lock. Signed-off-by: Herbert Xu --- include/crypto/acompress.h | 6 ------ include/crypto/internal/acompress.h | 1 - include/crypto/internal/scompress.h | 13 +++++++++++++ 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index c497c73baf13..f0e01ff77d92 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -130,14 +130,8 @@ struct crypto_acomp { struct crypto_tfm base; }; -struct crypto_acomp_stream { - spinlock_t lock; - void *ctx; -}; - #define COMP_ALG_COMMON { \ struct crypto_alg base; \ - struct crypto_acomp_stream __percpu *stream; \ } struct comp_alg_common COMP_ALG_COMMON; diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index aaf59f3236fa..2af690819a83 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -37,7 +37,6 @@ * * @reqsize: Context size for (de)compression requests * @base: Common crypto API algorithm data structure - * @stream: Per-cpu memory for algorithm * @calg: Cmonn algorithm data structure shared with scomp */ struct acomp_alg { diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h index f25aa2ea3b48..fd74e656ffd2 100644 --- a/include/crypto/internal/scompress.h +++ b/include/crypto/internal/scompress.h @@ -11,6 +11,8 @@ #include #include +#include +#include struct acomp_req; @@ -18,6 +20,11 @@ struct crypto_scomp { struct crypto_tfm base; }; +struct crypto_acomp_stream { + spinlock_t lock; + void *ctx; +}; + /** * struct scomp_alg - synchronous compression algorithm * @@ -27,6 +34,8 @@ struct crypto_scomp { * @decompress: Function performs a de-compress operation * @base: Common crypto API algorithm data structure * @stream: Per-cpu memory for algorithm + * @stream_work: Work struct to allocate stream memmory + * @stream_want: CPU mask for allocating stream memory * @calg: Cmonn algorithm data structure shared with acomp */ struct scomp_alg { @@ -39,6 +48,10 @@ struct scomp_alg { unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx); + struct crypto_acomp_stream __percpu *stream; + struct work_struct stream_work; + cpumask_t stream_want; + union { struct COMP_ALG_COMMON; struct comp_alg_common calg; -- cgit v1.2.3 From 42d9f6c774790d290c175e8775ce9f1366438098 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 19 Mar 2025 14:04:52 +0800 Subject: crypto: acomp - Move scomp stream allocation code into acomp Move the dynamic stream allocation code into acomp and make it available as a helper for acomp algorithms. Signed-off-by: Herbert Xu --- include/crypto/internal/acompress.h | 33 +++++++++++++++++++++++++++++++++ include/crypto/internal/scompress.h | 28 +++++++++------------------- 2 files changed, 42 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 2af690819a83..ee5eff19eaf4 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -11,6 +11,10 @@ #include #include +#include +#include +#include +#include #define ACOMP_REQUEST_ON_STACK(name, tfm) \ char __##name##_req[sizeof(struct acomp_req) + \ @@ -53,6 +57,24 @@ struct acomp_alg { }; }; +struct crypto_acomp_stream { + spinlock_t lock; + void *ctx; +}; + +struct crypto_acomp_streams { + /* These must come first because of struct scomp_alg. */ + void *(*alloc_ctx)(void); + union { + void (*free_ctx)(void *); + void (*cfree_ctx)(const void *); + }; + + struct crypto_acomp_stream __percpu *streams; + struct work_struct stream_work; + cpumask_t stream_want; +}; + /* * Transform internal helpers. */ @@ -157,4 +179,15 @@ static inline bool crypto_acomp_req_chain(struct crypto_acomp *tfm) return crypto_tfm_req_chain(&tfm->base); } +void crypto_acomp_free_streams(struct crypto_acomp_streams *s); +int crypto_acomp_alloc_streams(struct crypto_acomp_streams *s); + +struct crypto_acomp_stream *crypto_acomp_lock_stream_bh( + struct crypto_acomp_streams *s) __acquires(stream); + +static inline void crypto_acomp_unlock_stream_bh( + struct crypto_acomp_stream *stream) __releases(stream) +{ + spin_unlock_bh(&stream->lock); +} #endif diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h index fd74e656ffd2..533d6c16a491 100644 --- a/include/crypto/internal/scompress.h +++ b/include/crypto/internal/scompress.h @@ -9,22 +9,12 @@ #ifndef _CRYPTO_SCOMP_INT_H #define _CRYPTO_SCOMP_INT_H -#include -#include -#include -#include - -struct acomp_req; +#include struct crypto_scomp { struct crypto_tfm base; }; -struct crypto_acomp_stream { - spinlock_t lock; - void *ctx; -}; - /** * struct scomp_alg - synchronous compression algorithm * @@ -33,14 +23,10 @@ struct crypto_acomp_stream { * @compress: Function performs a compress operation * @decompress: Function performs a de-compress operation * @base: Common crypto API algorithm data structure - * @stream: Per-cpu memory for algorithm - * @stream_work: Work struct to allocate stream memmory - * @stream_want: CPU mask for allocating stream memory + * @streams: Per-cpu memory for algorithm * @calg: Cmonn algorithm data structure shared with acomp */ struct scomp_alg { - void *(*alloc_ctx)(void); - void (*free_ctx)(void *ctx); int (*compress)(struct crypto_scomp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx); @@ -48,9 +34,13 @@ struct scomp_alg { unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx); - struct crypto_acomp_stream __percpu *stream; - struct work_struct stream_work; - cpumask_t stream_want; + union { + struct { + void *(*alloc_ctx)(void); + void (*free_ctx)(void *ctx); + }; + struct crypto_acomp_streams streams; + }; union { struct COMP_ALG_COMMON; -- cgit v1.2.3 From 9c8cf582626ef56632006212b385cfbb6c54f094 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 19 Mar 2025 14:04:54 +0800 Subject: crypto: acomp - Add acomp_walk Add acomp_walk which is similar to skcipher_walk but tailored for acomp. Signed-off-by: Herbert Xu --- include/crypto/internal/acompress.h | 44 +++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index ee5eff19eaf4..fbbff9a8a2d9 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -75,6 +76,37 @@ struct crypto_acomp_streams { cpumask_t stream_want; }; +struct acomp_walk { + union { + /* Virtual address of the source. */ + struct { + struct { + const void *const addr; + } virt; + } src; + + /* Private field for the API, do not use. */ + struct scatter_walk in; + }; + + union { + /* Virtual address of the destination. */ + struct { + struct { + void *const addr; + } virt; + } dst; + + /* Private field for the API, do not use. */ + struct scatter_walk out; + }; + + unsigned int slen; + unsigned int dlen; + + int flags; +}; + /* * Transform internal helpers. */ @@ -190,4 +222,16 @@ static inline void crypto_acomp_unlock_stream_bh( { spin_unlock_bh(&stream->lock); } + +void acomp_walk_done_src(struct acomp_walk *walk, int used); +void acomp_walk_done_dst(struct acomp_walk *walk, int used); +int acomp_walk_next_src(struct acomp_walk *walk); +int acomp_walk_next_dst(struct acomp_walk *walk); +int acomp_walk_virt(struct acomp_walk *__restrict walk, + struct acomp_req *__restrict req); + +static inline bool acomp_walk_more_src(const struct acomp_walk *walk, int cur) +{ + return walk->slen != cur; +} #endif -- cgit v1.2.3 From f7b86e0e75bc234751cb7a82d888083a57ef28b2 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 24 Mar 2025 21:15:17 +0000 Subject: crypto: ccp - Add new SEV/SNP platform shutdown API Add new API interface to do SEV/SNP platform shutdown when KVM module is unloaded. Reviewed-by: Dionna Glaze Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Signed-off-by: Herbert Xu --- include/linux/psp-sev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index f3cad182d4ef..0b3a36bdaa90 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -954,6 +954,7 @@ int sev_do_cmd(int cmd, void *data, int *psp_ret); void *psp_copy_user_blob(u64 uaddr, u32 len); void *snp_alloc_firmware_page(gfp_t mask); void snp_free_firmware_page(void *addr); +void sev_platform_shutdown(void); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ @@ -988,6 +989,8 @@ static inline void *snp_alloc_firmware_page(gfp_t mask) static inline void snp_free_firmware_page(void *addr) { } +static inline void sev_platform_shutdown(void) { } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ -- cgit v1.2.3 From f98ed0dd58d9eabe5d240743fc26d4d22e202a07 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 3 Apr 2025 10:33:32 +0800 Subject: crypto: hash - Do not use shash in hard IRQs Update the documentation to be consistent with the fact that shash may not be used in hard IRQs. Reported-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/hash.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 2aa83ee0ec98..b987904afdba 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -834,7 +834,7 @@ static inline void *shash_desc_ctx(struct shash_desc *desc) * cipher handle must point to a keyed message digest cipher in order for this * function to succeed. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, @@ -851,7 +851,7 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, * crypto_shash_update and crypto_shash_final. The parameters have the same * meaning as discussed for those separate three functions. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest creation was successful; < 0 if an error * occurred */ @@ -871,7 +871,7 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data, * directly, and it allocates a hash descriptor on the stack internally. * Note that this stack allocation may be fairly large. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 on success; < 0 if an error occurred. */ int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data, @@ -886,7 +886,7 @@ int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data, * caller-allocated output buffer out which must have sufficient size (e.g. by * calling crypto_shash_descsize). * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the export creation was successful; < 0 if an error occurred */ int crypto_shash_export(struct shash_desc *desc, void *out); @@ -900,7 +900,7 @@ int crypto_shash_export(struct shash_desc *desc, void *out); * the input buffer. That buffer should have been generated with the * crypto_ahash_export function. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the import was successful; < 0 if an error occurred */ int crypto_shash_import(struct shash_desc *desc, const void *in); @@ -913,7 +913,7 @@ int crypto_shash_import(struct shash_desc *desc, const void *in); * operational state handle. Any potentially existing state created by * previous operations is discarded. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest initialization was successful; < 0 if an * error occurred */ @@ -935,7 +935,7 @@ static inline int crypto_shash_init(struct shash_desc *desc) * * Updates the message digest state of the operational state handle. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest update was successful; < 0 if an error * occurred */ @@ -952,7 +952,7 @@ int crypto_shash_update(struct shash_desc *desc, const u8 *data, * into the output buffer. The caller must ensure that the output buffer is * large enough by using crypto_shash_digestsize. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest creation was successful; < 0 if an error * occurred */ @@ -969,7 +969,7 @@ int crypto_shash_final(struct shash_desc *desc, u8 *out); * crypto_shash_update and crypto_shash_final. The parameters have the same * meaning as discussed for those separate functions. * - * Context: Any context. + * Context: Softirq or process context. * Return: 0 if the message digest creation was successful; < 0 if an error * occurred */ -- cgit v1.2.3 From e0f860a1ca32e7206b5cb43812acec9ffe211a52 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Apr 2025 09:19:57 +0200 Subject: crypto: ctr - remove unused crypto_ctr_encrypt_walk() crypto_ctr_encrypt_walk() is no longer used so remove it. Note that some existing drivers currently rely on the transitive includes of some other crypto headers so retain those for the time being. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/crypto/ctr.h | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) (limited to 'include') diff --git a/include/crypto/ctr.h b/include/crypto/ctr.h index da1ee73e9ce9..c41685874f00 100644 --- a/include/crypto/ctr.h +++ b/include/crypto/ctr.h @@ -10,56 +10,9 @@ #include #include -#include -#include #define CTR_RFC3686_NONCE_SIZE 4 #define CTR_RFC3686_IV_SIZE 8 #define CTR_RFC3686_BLOCK_SIZE 16 -static inline int crypto_ctr_encrypt_walk(struct skcipher_request *req, - void (*fn)(struct crypto_skcipher *, - const u8 *, u8 *)) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - int blocksize = crypto_skcipher_chunksize(tfm); - u8 buf[MAX_CIPHER_BLOCKSIZE]; - struct skcipher_walk walk; - int err; - - /* avoid integer division due to variable blocksize parameter */ - if (WARN_ON_ONCE(!is_power_of_2(blocksize))) - return -EINVAL; - - err = skcipher_walk_virt(&walk, req, false); - - while (walk.nbytes > 0) { - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - int nbytes = walk.nbytes; - int tail = 0; - - if (nbytes < walk.total) { - tail = walk.nbytes & (blocksize - 1); - nbytes -= tail; - } - - do { - int bsize = min(nbytes, blocksize); - - fn(tfm, walk.iv, buf); - - crypto_xor_cpy(dst, src, buf, bsize); - crypto_inc(walk.iv, blocksize); - - dst += bsize; - src += bsize; - nbytes -= bsize; - } while (nbytes > 0); - - err = skcipher_walk_done(&walk, tail); - } - return err; -} - #endif /* _CRYPTO_CTR_H */ -- cgit v1.2.3 From 4aa6dc909e400b247bd3abf29f805d49a9c140bf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 5 Apr 2025 11:26:02 -0700 Subject: crypto: chacha - centralize the skcipher wrappers for arch code Following the example of the crc32 and crc32c code, make the crypto subsystem register both generic and architecture-optimized chacha20, xchacha20, and xchacha12 skcipher algorithms, all implemented on top of the appropriate library functions. This eliminates the need for every architecture to implement the same skcipher glue code. To register the architecture-optimized skciphers only when architecture-optimized code is actually being used, add a function chacha_is_arch_optimized() and make each arch implement it. Change each architecture's ChaCha module_init function to arch_initcall so that the CPU feature detection is guaranteed to run before chacha_is_arch_optimized() gets called by crypto/chacha.c. In the case of s390, remove the CPU feature based module autoloading, which is no longer needed since the module just gets pulled in via function linkage. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/chacha.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index f8cc073bba41..58129e18cc31 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -99,4 +99,13 @@ static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src, chacha_crypt(state, dst, src, bytes, 20); } +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA) +bool chacha_is_arch_optimized(void); +#else +static inline bool chacha_is_arch_optimized(void) +{ + return false; +} +#endif + #endif /* _CRYPTO_CHACHA_H */ -- cgit v1.2.3 From d23fce15abd480811098c0bca6d4edeb17824279 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 5 Apr 2025 11:26:09 -0700 Subject: crypto: chacha - remove is now included only by crypto/chacha.c, so fold it into there. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/internal/chacha.h | 43 ---------------------------------------- 1 file changed, 43 deletions(-) delete mode 100644 include/crypto/internal/chacha.h (limited to 'include') diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h deleted file mode 100644 index b085dc1ac151..000000000000 --- a/include/crypto/internal/chacha.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _CRYPTO_INTERNAL_CHACHA_H -#define _CRYPTO_INTERNAL_CHACHA_H - -#include -#include -#include - -struct chacha_ctx { - u32 key[8]; - int nrounds; -}; - -static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize, int nrounds) -{ - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - int i; - - if (keysize != CHACHA_KEY_SIZE) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(ctx->key); i++) - ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32)); - - ctx->nrounds = nrounds; - return 0; -} - -static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize) -{ - return chacha_setkey(tfm, key, keysize, 20); -} - -static inline int chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize) -{ - return chacha_setkey(tfm, key, keysize, 12); -} - -#endif /* _CRYPTO_CHACHA_H */ -- cgit v1.2.3 From ae28e34400aab0dc4876eac83766fa9731f9901c Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 3 Apr 2025 12:53:28 -0400 Subject: drm/xe: Allow scratch page under fault mode for certain platform Normally scratch page is not allowed when a vm is operate under page fault mode, i.e., in the existing codes, DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE and DRM_XE_VM_CREATE_FLAG_FAULT_MODE are mutual exclusive. The reason is fault mode relies on recoverable page to work, while scratch page can mute recoverable page fault. On xe2 and xe3, out of bound prefetch can cause page fault and further system hang because xekmd can't resolve such page fault. SYCL and OCL language runtime requires out of bound prefetch to be silently dropped without causing any functional problem, thus the existing behavior doesn't meet language runtime requirement. At the same time, HW prefetching can cause page fault interrupt. Due to page fault interrupt overhead (i.e., need Guc and KMD involved to fix the page fault), HW prefetching can be slowed by many orders of magnitude. Fix those problems by allowing scratch page under fault mode for xe2 and xe3. With scratch page in place, HW prefetching could always hit scratch page instead of causing interrupt. A side effect is, scratch page could hide application program error. Application out of bound accesses are hided by scratch page mapping, instead of get reported to user. v2: Refine commit message (Thomas) v3: Move the scratch page flag check to after scratch page wa (Thomas) v4: drop NEEDS_SCRATCH macro (matt) Add a comment to DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE Signed-off-by: Oak Zeng Reviewed-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20250403165328.2438690-4-oak.zeng@intel.com Signed-off-by: Himal Prasad Ghimiray --- include/uapi/drm/xe_drm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 616916985e3f..9c08738c3b91 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -917,7 +917,11 @@ struct drm_xe_gem_mmap_offset { * struct drm_xe_vm_create - Input of &DRM_IOCTL_XE_VM_CREATE * * The @flags can be: - * - %DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE + * - %DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE - Map the whole virtual address + * space of the VM to scratch page. A vm_bind would overwrite the scratch + * page mapping. This flag is mutually exclusive with the + * %DRM_XE_VM_CREATE_FLAG_FAULT_MODE flag, with an exception of on x2 and + * xe3 platform. * - %DRM_XE_VM_CREATE_FLAG_LR_MODE - An LR, or Long Running VM accepts * exec submissions to its exec_queues that don't have an upper time * limit on the job execution time. But exec submissions to these -- cgit v1.2.3 From 1be1cd03a93339f14c8f4fe300bca321fddc6478 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 3 Apr 2025 18:59:14 +0300 Subject: gpiolib: acpi: Reduce memory footprint for struct acpi_gpio_params The line_index member in the struct acpi_gpio_params replicates what is covered in the ACPI GpioIo() or GpioInt() resource. The value there is limited to 16-bit one, so we don't really need to have a full 32-bit storage for it. Together with followed boolean the structure will be smaller. add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3 (-3) Function old new delta acpi_gpio_property_lookup 417 414 -3 Total: Before=15361, After=15358, chg -0.02% `pahole` difference before and after: - /* size: 12, cachelines: 1, members: 3 */ - /* padding: 3 */ + /* size: 8, cachelines: 1, members: 3 */ + /* padding: 1 */ Acked-by: Mika Westerberg Link: https://lore.kernel.org/r/20250403160034.2680485-4-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko --- include/linux/gpio/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 45b651c05b9c..899179972bec 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -587,7 +587,7 @@ struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev, struct acpi_gpio_params { unsigned int crs_entry_index; - unsigned int line_index; + unsigned short line_index; bool active_low; }; -- cgit v1.2.3 From 5741909697a31cfb08e45d56b4211959fb791487 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 19 Mar 2025 14:01:32 +1100 Subject: VFS: improve interface for lookup_one functions The family of functions: lookup_one() lookup_one_unlocked() lookup_one_positive_unlocked() appear designed to be used by external clients of the filesystem rather than by filesystems acting on themselves as the lookup_one_len family are used. They are used by: btrfs/ioctl - which is a user-space interface rather than an internal activity exportfs - i.e. from nfsd or the open_by_handle_at interface overlayfs - at access the underlying filesystems smb/server - for file service They should be used by nfsd (more than just the exportfs path) and cachefs but aren't. It would help if the documentation didn't claim they should "not be called by generic code". Also the path component name is passed as "name" and "len" which are (confusingly?) separate by the "base". In some cases the len in simply "strlen" and so passing a qstr using QSTR() would make the calling clearer. Other callers do pass separate name and len which are stored in a struct. Sometimes these are already stored in a qstr, other times it easily could be. So this patch changes these three functions to receive a 'struct qstr *', and improves the documentation. QSTR_LEN() is added to make it easy to pass a QSTR containing a known len. [brauner@kernel.org: take a struct qstr pointer] Signed-off-by: NeilBrown Link: https://lore.kernel.org/r/20250319031545.2999807-2-neil@brown.name Signed-off-by: Christian Brauner --- include/linux/dcache.h | 3 ++- include/linux/namei.h | 9 ++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8d1395f945bf..e974e63bcdbc 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -57,7 +57,8 @@ struct qstr { }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } -#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n)) +#define QSTR_LEN(n,l) (struct qstr)QSTR_INIT(n,l) +#define QSTR(n) QSTR_LEN(n, strlen(n)) extern const struct qstr empty_name; extern const struct qstr slash_name; diff --git a/include/linux/namei.h b/include/linux/namei.h index e3042176cdf4..ba02304a2a8a 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -73,13 +73,12 @@ extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); -struct dentry *lookup_one(struct mnt_idmap *, const char *, struct dentry *, int); +struct dentry *lookup_one(struct mnt_idmap *, struct qstr *, struct dentry *); struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, - const char *name, struct dentry *base, - int len); + struct qstr *name, struct dentry *base); struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, - const char *name, - struct dentry *base, int len); + struct qstr *name, + struct dentry *base); extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); -- cgit v1.2.3 From 0a02e1f4a54ace747304687ced3b76d159e58914 Mon Sep 17 00:00:00 2001 From: Yixun Lan Date: Wed, 26 Mar 2025 06:06:19 +0800 Subject: irqdomain: Support three-cell scheme interrupts Add new function *_twothreecell() to extend support to parse three-cell interrupts which encoded as , the translate function will retrieve irq number and flag from last two cells. This API will be used in gpio irq driver which need to work with two or three cells cases. Signed-off-by: Yixun Lan Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250326-04-gpio-irq-threecell-v3-1-aab006ab0e00@gentoo.org --- include/linux/irqdomain.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index bb7111105296..df7e9278c8ac 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -571,16 +571,16 @@ int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type); - -int irq_domain_translate_twocell(struct irq_domain *d, - struct irq_fwspec *fwspec, - unsigned long *out_hwirq, - unsigned int *out_type); - -int irq_domain_translate_onecell(struct irq_domain *d, - struct irq_fwspec *fwspec, - unsigned long *out_hwirq, - unsigned int *out_type); +int irq_domain_xlate_twothreecell(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); + +int irq_domain_translate_onecell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); +int irq_domain_translate_twocell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); +int irq_domain_translate_twothreecell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); /* IPI functions */ int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest); -- cgit v1.2.3 From 7b73c12c6ebf006ad496f0e38a605d92dfe05157 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 15:59:59 +0100 Subject: shmem: Add shmem_writeout() This will be the replacement for shmem_writepage(). Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250402150005.2309458-6-willy@infradead.org Reviewed-by: Baolin Wang Signed-off-by: Christian Brauner --- include/linux/shmem_fs.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 0b273a7b9f01..5f03a39a26f7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -104,10 +104,11 @@ static inline bool shmem_mapping(struct address_space *mapping) return false; } #endif /* CONFIG_SHMEM */ -extern void shmem_unlock_mapping(struct address_space *mapping); -extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, +void shmem_unlock_mapping(struct address_space *mapping); +struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); -extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); +int shmem_writeout(struct folio *folio, struct writeback_control *wbc); +void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); #ifdef CONFIG_TRANSPARENT_HUGEPAGE -- cgit v1.2.3 From 6b0dfabb35550cb7b0808585dea6c24971d685d3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 16:00:03 +0100 Subject: fs: Remove aops->writepage All callers and implementations are now removed, so remove the operation and update the documentation to match. Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250402150005.2309458-10-willy@infradead.org Signed-off-by: Christian Brauner --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..f7beefb917a8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -433,7 +433,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) } struct address_space_operations { - int (*writepage)(struct page *page, struct writeback_control *wbc); int (*read_folio)(struct file *, struct folio *); /* Write back some dirty pages from this mapping. */ -- cgit v1.2.3 From 559b3bbfa978ce3b23dc9c52d09a0eddca52c439 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 27 Mar 2025 10:06:10 -0400 Subject: locking/percpu-rwsem: add freezable alternative to down_read Percpu-rwsems are used for superblock locking. However, we know the read percpu-rwsem we take for sb_start_write() on a frozen filesystem needs not to inhibit system from suspending or hibernating. That means it needs to wait with TASK_UNINTERRUPTIBLE | TASK_FREEZABLE. Introduce a new percpu_down_read_freezable() that allows us to control whether TASK_FREEZABLE is added to the wait flags. Signed-off-by: James Bottomley Link: https://lore.kernel.org/r/20250327140613.25178-2-James.Bottomley@HansenPartnership.com Signed-off-by: Christian Brauner --- include/linux/percpu-rwsem.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index af7d75ede619..288f5235649a 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -43,9 +43,10 @@ is_static struct percpu_rw_semaphore name = { \ #define DEFINE_STATIC_PERCPU_RWSEM(name) \ __DEFINE_PERCPU_RWSEM(name, static) -extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool); +extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool, bool); -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +static inline void percpu_down_read_internal(struct percpu_rw_semaphore *sem, + bool freezable) { might_sleep(); @@ -63,7 +64,7 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) if (likely(rcu_sync_is_idle(&sem->rss))) this_cpu_inc(*sem->read_count); else - __percpu_down_read(sem, false); /* Unconditional memory barrier */ + __percpu_down_read(sem, false, freezable); /* Unconditional memory barrier */ /* * The preempt_enable() prevents the compiler from * bleeding the critical section out. @@ -71,6 +72,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) preempt_enable(); } +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +{ + percpu_down_read_internal(sem, false); +} + +static inline void percpu_down_read_freezable(struct percpu_rw_semaphore *sem, + bool freeze) +{ + percpu_down_read_internal(sem, freeze); +} + static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { bool ret = true; @@ -82,7 +94,7 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) if (likely(rcu_sync_is_idle(&sem->rss))) this_cpu_inc(*sem->read_count); else - ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ + ret = __percpu_down_read(sem, true, false); /* Unconditional memory barrier */ preempt_enable(); /* * The barrier() from preempt_enable() prevents the compiler from -- cgit v1.2.3 From f73bae83675b860cae9f58dba233b6be92e750ca Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 27 Mar 2025 10:06:11 -0400 Subject: fs: allow all writers to be frozen During freeze/thaw we need to be able to freeze all writers during suspend/hibernate. Otherwise tasks such as systemd-journald that mmap a file and write to it will not be frozen after we've already frozen the filesystem. This has some risk of not being able to freeze processes in case a process has acquired SB_FREEZE_PAGEFAULT under mmap_sem or SB_FREEZE_INTERNAL under some other filesytem specific lock. If the filesystem is frozen, a task can block on the frozen filesystem with e.g., mmap_sem held. If some other task then blocks on grabbing that mmap_sem, hibernation ill fail because it is unable to hibernate a task holding mmap_sem. This could be fixed by making a range of filesystem related locks use freezable sleeping. That's impractical and not warranted just for suspend/hibernate. Assume that this is an infrequent problem and we've given userspace a way to skip filesystem freezing through a sysfs file. Link: https://lore.kernel.org/r/20250402-work-freeze-v2-2-6719a97b52ac@kernel.org Link: https://lore.kernel.org/r/20250327140613.25178-3-James.Bottomley@HansenPartnership.com [brauner: make all freeze levels set TASK_FREEZABLE and rewrite commit message] Reviewed-by: Jan Kara Signed-off-by: James Bottomley Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..35b81f497904 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1780,7 +1780,7 @@ static inline void __sb_end_write(struct super_block *sb, int level) static inline void __sb_start_write(struct super_block *sb, int level) { - percpu_down_read(sb->s_writers.rw_sem + level - 1); + percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true); } static inline bool __sb_start_write_trylock(struct super_block *sb, int level) -- cgit v1.2.3 From 2992476528aeecbaee17ba0a6396a817481205a3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 29 Mar 2025 09:42:17 +0100 Subject: super: use a common iterator (Part 1) Use a common iterator for all callbacks. Link: https://lore.kernel.org/r/20250329-work-freeze-v2-4-a47af37ecc3d@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 35b81f497904..5e007bffd00e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3515,7 +3515,11 @@ extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); -extern void iterate_supers(void (*)(struct super_block *, void *), void *); +void __iterate_supers(void (*f)(struct super_block *, void *), void *arg, bool excl); +static inline void iterate_supers(void (*f)(struct super_block *, void *), void *arg) +{ + __iterate_supers(f, arg, false); +} extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); -- cgit v1.2.3 From b47e42d10e8c20525febccbd6e0dc8528861aea4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 29 Mar 2025 09:42:18 +0100 Subject: super: use common iterator (Part 2) Use a common iterator for all callbacks. We could go for something even more elaborate (advance step-by-step similar to iov_iter) but I really don't think this is warranted. Link: https://lore.kernel.org/r/20250329-work-freeze-v2-5-a47af37ecc3d@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5e007bffd00e..96f8c8a794ea 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3515,11 +3515,7 @@ extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); -void __iterate_supers(void (*f)(struct super_block *, void *), void *arg, bool excl); -static inline void iterate_supers(void (*f)(struct super_block *, void *), void *arg) -{ - __iterate_supers(f, arg, false); -} +extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); -- cgit v1.2.3 From 06f2f68a670aae28b825065439301831e74da880 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 15:31:15 +0100 Subject: genirq/generic-chip: Make locking unconditional The SMP conditional wrappers around raw_spin_[un]lock() have no real value. On !SMP kernels the lock operations are NOOPs except for a preempt_disable/enable() pair on PREEMPT enabled kernels, which are not really worth to optimize for. Aside of that this evades lockdep on !SMP kernels. Remove the !SMP stubs and make it unconditional. No functional change. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250313142524.011345765@linutronix.de --- include/linux/irq.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index dd5df1e2d032..500772943207 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1222,7 +1222,6 @@ static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d) #define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX) -#ifdef CONFIG_SMP static inline void irq_gc_lock(struct irq_chip_generic *gc) { raw_spin_lock(&gc->lock); @@ -1232,10 +1231,6 @@ static inline void irq_gc_unlock(struct irq_chip_generic *gc) { raw_spin_unlock(&gc->lock); } -#else -static inline void irq_gc_lock(struct irq_chip_generic *gc) { } -static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } -#endif /* * The irqsave variants are for usage in non interrupt code. Do not use -- cgit v1.2.3 From 7ae844a6650c5c15ccfbf76ed767e7f2cc61ec1d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2025 15:31:28 +0100 Subject: genirq/generic-chip: Remove unused lock wrappers All users are converted to lock guards. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250313142524.388478168@linutronix.de --- include/linux/irq.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 500772943207..d896d3a471ec 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1222,26 +1222,6 @@ static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d) #define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX) -static inline void irq_gc_lock(struct irq_chip_generic *gc) -{ - raw_spin_lock(&gc->lock); -} - -static inline void irq_gc_unlock(struct irq_chip_generic *gc) -{ - raw_spin_unlock(&gc->lock); -} - -/* - * The irqsave variants are for usage in non interrupt code. Do not use - * them in irq_chip callbacks. Use irq_gc_lock() instead. - */ -#define irq_gc_lock_irqsave(gc, flags) \ - raw_spin_lock_irqsave(&(gc)->lock, flags) - -#define irq_gc_unlock_irqrestore(gc, flags) \ - raw_spin_unlock_irqrestore(&(gc)->lock, flags) - static inline void irq_reg_writel(struct irq_chip_generic *gc, u32 val, int reg_offset) { -- cgit v1.2.3 From 30e7573babdcd46542364a2505d34112e62d5a22 Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Mon, 3 Mar 2025 16:25:10 +0100 Subject: dt-bindings: reset: Add T-HEAD TH1520 SoC Reset Controller Add a YAML schema for the T-HEAD TH1520 SoC reset controller. This controller manages resets for subsystems such as the GPU within the TH1520 SoC. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Michal Wilczynski Link: https://lore.kernel.org/r/20250303152511.494405-2-m.wilczynski@samsung.com Signed-off-by: Philipp Zabel --- include/dt-bindings/reset/thead,th1520-reset.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/dt-bindings/reset/thead,th1520-reset.h (limited to 'include') diff --git a/include/dt-bindings/reset/thead,th1520-reset.h b/include/dt-bindings/reset/thead,th1520-reset.h new file mode 100644 index 000000000000..00459f160489 --- /dev/null +++ b/include/dt-bindings/reset/thead,th1520-reset.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. + * Author: Michal Wilczynski + */ + +#ifndef _DT_BINDINGS_TH1520_RESET_H +#define _DT_BINDINGS_TH1520_RESET_H + +#define TH1520_RESET_ID_GPU 0 +#define TH1520_RESET_ID_GPU_CLKGEN 1 +#define TH1520_RESET_ID_NPU 2 +#define TH1520_RESET_ID_WDT0 3 +#define TH1520_RESET_ID_WDT1 4 + +#endif /* _DT_BINDINGS_TH1520_RESET_H */ -- cgit v1.2.3 From 1ce4c3aeef333be1e6290ec6d1f7891c2bfc7a1f Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 1 Apr 2025 11:37:16 +0200 Subject: firmware: sysfb: Move bpp-depth calculation into screen_info helper Move the calculation of the bits per pixels for screen_info into a helper function. This will make it available to other callers besides the firmware code. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250401094056.32904-14-tzimmermann@suse.de --- include/linux/screen_info.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 6a4a3cec4638..ab3cffbb58b7 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -128,6 +128,8 @@ static inline unsigned int screen_info_video_type(const struct screen_info *si) ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num); +u32 __screen_info_lfb_bits_per_pixel(const struct screen_info *si); + #if defined(CONFIG_PCI) void screen_info_apply_fixups(void); struct pci_dev *screen_info_pci_dev(const struct screen_info *si); -- cgit v1.2.3 From 32ae90c66fb62e4f6ae859422f733dfb249f10cf Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 1 Apr 2025 11:37:17 +0200 Subject: drm/sysfb: Add efidrm for EFI displays Add support for screen_info setups with VIDEO_TYPE_EFI. Provide the minimum functionality of reading modes, updating and clearing the display. There is existing support for these displays provided by simpledrm with CONFIG_SYSFB_SIMPLEFB=y. Using efidrm over simpledrm will allows for the mapping of video memory with correct caching. Simpledrm always assumes WC caching, while fully cached memory is possible with efidrm. Efidrm will also allow for the use of additional functionality provided by EFI, such as EDID information. In addition to efidrm, add struct pixel_format plus initializer macros. The type and macros describe pixel formats in a generic way on order to find the DRM format from the screen_info settings. Similar existing code in SIMPLEFB_FORMATS and fbdev is not really what is needed in efidrm, but SIMPLEFB_FORMATS can later be converted to struct pixel_format. v4: - depend on CONFIG_EFI - disallow module for now as efi_mem_desc_lookup() is not exported v3: - depend on !SYSFB_SIMPLEFB (Javier) Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250401094056.32904-15-tzimmermann@suse.de --- include/video/pixel_format.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/video/pixel_format.h (limited to 'include') diff --git a/include/video/pixel_format.h b/include/video/pixel_format.h new file mode 100644 index 000000000000..b5104b2a3a13 --- /dev/null +++ b/include/video/pixel_format.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef VIDEO_PIXEL_FORMAT_H +#define VIDEO_PIXEL_FORMAT_H + +struct pixel_format { + unsigned char bits_per_pixel; + bool indexed; + union { + struct { + struct { + unsigned char offset; + unsigned char length; + } alpha, red, green, blue; + }; + struct { + unsigned char offset; + unsigned char length; + } index; + }; +}; + +#define PIXEL_FORMAT_XRGB1555 \ + { 16, false, { .alpha = {0, 0}, .red = {10, 5}, .green = {5, 5}, .blue = {0, 5} } } + +#define PIXEL_FORMAT_RGB565 \ + { 16, false, { .alpha = {0, 0}, .red = {11, 5}, .green = {5, 6}, .blue = {0, 5} } } + +#define PIXEL_FORMAT_RGB888 \ + { 24, false, { .alpha = {0, 0}, .red = {16, 8}, .green = {8, 8}, .blue = {0, 8} } } + +#define PIXEL_FORMAT_XRGB8888 \ + { 32, false, { .alpha = {0, 0}, .red = {16, 8}, .green = {8, 8}, .blue = {0, 8} } } + +#define PIXEL_FORMAT_XBGR8888 \ + { 32, false, { .alpha = {0, 0}, .red = {0, 8}, .green = {8, 8}, .blue = {16, 8} } } + +#define PIXEL_FORMAT_XRGB2101010 \ + { 32, false, { .alpha = {0, 0}, .red = {20, 10}, .green = {10, 10}, .blue = {0, 10} } } + +#endif -- cgit v1.2.3 From 814d270b31d27b6ea4f722b8ae2db9802fe332ff Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 1 Apr 2025 11:37:21 +0200 Subject: drm/sysfb: vesadrm: Add gamma correction Add palette support and export GAMMA properties via sysfs. User-space compositors can use this interface for programming gamma ramps or night mode. Vesadrm supports palette updates via VGA DAC registers or VESA palette calls. Up to 256 palette entries are available. Userspace always supplies gamma ramps of 256 entries. If the native color format does not match this because pixel component have less then 8 bits, vesadrm interpolates among the palette entries. The code uses CamelCase style in a few places to match the VESA manuals. v3: - fix coding style v2: - use CONFIG_X86_32 instead of __i386__ (checkpatch) - protect struct vesadrm.pmi with CONFIG_X86_32 Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250401094056.32904-19-tzimmermann@suse.de --- include/linux/screen_info.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index ab3cffbb58b7..923d68e07679 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -126,6 +126,13 @@ static inline unsigned int screen_info_video_type(const struct screen_info *si) return VIDEO_TYPE_CGA; } +static inline u32 __screen_info_vesapm_info_base(const struct screen_info *si) +{ + if (si->vesapm_seg < 0xc000) + return 0; + return (si->vesapm_seg << 4) + si->vesapm_off; +} + ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num); u32 __screen_info_lfb_bits_per_pixel(const struct screen_info *si); -- cgit v1.2.3 From 0cc6aadd7fc1e629b715ea3d1ba537ef2da95eec Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 26 Mar 2025 18:47:35 +0100 Subject: drm/bridge: add devm_drm_bridge_alloc() Add a macro to allocate and initialize a DRM bridge embedded within a private driver struct. Compared to current practice, which is based on [devm_]kzalloc() allocation followed by open-coded initialization of fields, this allows to have a common and explicit API to allocate and initialize DRM bridges. Besides being useful to consolidate bridge driver code, this is a fundamental step in preparation for adding dynamic lifetime to bridges based on refcount. Reviewed-by: Maxime Ripard Signed-off-by: Luca Ceresoli Link: https://patchwork.freedesktop.org/patch/msgid/20250326-drm-bridge-refcount-v9-1-5e0661fe1f84@bootlin.com Signed-off-by: Louis Chauvet --- include/drm/drm_bridge.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index cdad3b78a195..a59277674d5a 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -941,6 +941,23 @@ drm_priv_to_bridge(struct drm_private_obj *priv) return container_of(priv, struct drm_bridge, base); } +void *__devm_drm_bridge_alloc(struct device *dev, size_t size, size_t offset, + const struct drm_bridge_funcs *funcs); + +/** + * devm_drm_bridge_alloc - Allocate and initialize a bridge + * @dev: struct device of the bridge device + * @type: the type of the struct which contains struct &drm_bridge + * @member: the name of the &drm_bridge within @type + * @funcs: callbacks for this bridge + * + * Returns: + * Pointer to new bridge, or ERR_PTR on failure. + */ +#define devm_drm_bridge_alloc(dev, type, member, funcs) \ + ((type *)__devm_drm_bridge_alloc(dev, sizeof(type), \ + offsetof(type, member), funcs)) + void drm_bridge_add(struct drm_bridge *bridge); int devm_drm_bridge_add(struct device *dev, struct drm_bridge *bridge); void drm_bridge_remove(struct drm_bridge *bridge); -- cgit v1.2.3 From 30d1b37d4c022b5e6c164c090a60a3be1a66876e Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 26 Mar 2025 18:47:36 +0100 Subject: drm/bridge: add support for refcounting DRM bridges are currently considered as a fixed element of a DRM card, and thus their lifetime is assumed to extend for as long as the card exists. New use cases, such as hot-pluggable hardware with video bridges, require DRM bridges to be added to and removed from a DRM card without tearing the card down. This is possible for connectors already (used by DP MST), it is now needed for DRM bridges as well. As a first preliminary step, make bridges reference-counted to allow a struct drm_bridge (along with the private driver structure embedding it) to stay allocated even after the driver has been removed, until the last reference is put. Signed-off-by: Luca Ceresoli Reviewed-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20250326-drm-bridge-refcount-v9-2-5e0661fe1f84@bootlin.com Signed-off-by: Louis Chauvet --- include/drm/drm_bridge.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index a59277674d5a..df9bbf6fd1fb 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -840,6 +840,18 @@ struct drm_bridge { const struct drm_bridge_timings *timings; /** @funcs: control functions */ const struct drm_bridge_funcs *funcs; + + /** + * @container: Pointer to the private driver struct embedding this + * @struct drm_bridge. + */ + void *container; + + /** + * @refcount: reference count of users referencing this bridge. + */ + struct kref refcount; + /** @driver_private: pointer to the bridge driver's internal context */ void *driver_private; /** @ops: bitmask of operations supported by the bridge */ @@ -941,6 +953,9 @@ drm_priv_to_bridge(struct drm_private_obj *priv) return container_of(priv, struct drm_bridge, base); } +struct drm_bridge *drm_bridge_get(struct drm_bridge *bridge); +void drm_bridge_put(struct drm_bridge *bridge); + void *__devm_drm_bridge_alloc(struct device *dev, size_t size, size_t offset, const struct drm_bridge_funcs *funcs); @@ -951,6 +966,10 @@ void *__devm_drm_bridge_alloc(struct device *dev, size_t size, size_t offset, * @member: the name of the &drm_bridge within @type * @funcs: callbacks for this bridge * + * The reference count of the returned bridge is initialized to 1. This + * reference will be automatically dropped via devm (by calling + * drm_bridge_put()) when @dev is removed. + * * Returns: * Pointer to new bridge, or ERR_PTR on failure. */ -- cgit v1.2.3 From ad2698efce37e910dcf3c3914263e6cb3e86f8cd Mon Sep 17 00:00:00 2001 From: Nas Chung Date: Thu, 25 Jul 2024 15:10:32 +0900 Subject: media: uapi: v4l: Change V4L2_TYPE_IS_CAPTURE condition Explicitly compare a buffer type only with valid buffer types, to avoid matching a buffer type outside of the valid buffer type set. Signed-off-by: Nas Chung Reviewed-by: Michael Tretter Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c8cb2796130f..ccd6ad53432e 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -153,10 +153,18 @@ enum v4l2_buf_type { V4L2_BUF_TYPE_SDR_OUTPUT = 12, V4L2_BUF_TYPE_META_CAPTURE = 13, V4L2_BUF_TYPE_META_OUTPUT = 14, + /* + * Note: V4L2_TYPE_IS_VALID and V4L2_TYPE_IS_OUTPUT must + * be updated if a new type is added. + */ /* Deprecated, do not use */ V4L2_BUF_TYPE_PRIVATE = 0x80, }; +#define V4L2_TYPE_IS_VALID(type) \ + ((type) >= V4L2_BUF_TYPE_VIDEO_CAPTURE &&\ + (type) <= V4L2_BUF_TYPE_META_OUTPUT) + #define V4L2_TYPE_IS_MULTIPLANAR(type) \ ((type) == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE \ || (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) @@ -171,7 +179,8 @@ enum v4l2_buf_type { || (type) == V4L2_BUF_TYPE_SDR_OUTPUT \ || (type) == V4L2_BUF_TYPE_META_OUTPUT) -#define V4L2_TYPE_IS_CAPTURE(type) (!V4L2_TYPE_IS_OUTPUT(type)) +#define V4L2_TYPE_IS_CAPTURE(type) \ + (V4L2_TYPE_IS_VALID(type) && !V4L2_TYPE_IS_OUTPUT(type)) enum v4l2_tuner_type { V4L2_TUNER_RADIO = 1, -- cgit v1.2.3 From f81f69a0e3da141bdd73a16b8676f4e542533d87 Mon Sep 17 00:00:00 2001 From: Nas Chung Date: Thu, 25 Jul 2024 15:10:34 +0900 Subject: media: uapi: v4l: Fix V4L2_TYPE_IS_OUTPUT condition V4L2_TYPE_IS_OUTPUT() returns true for V4L2_BUF_TYPE_VIDEO_OVERLAY which definitely belongs to CAPTURE. Signed-off-by: Nas Chung Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index ccd6ad53432e..af86ece741e9 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -172,7 +172,6 @@ enum v4l2_buf_type { #define V4L2_TYPE_IS_OUTPUT(type) \ ((type) == V4L2_BUF_TYPE_VIDEO_OUTPUT \ || (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE \ - || (type) == V4L2_BUF_TYPE_VIDEO_OVERLAY \ || (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY \ || (type) == V4L2_BUF_TYPE_VBI_OUTPUT \ || (type) == V4L2_BUF_TYPE_SLICED_VBI_OUTPUT \ -- cgit v1.2.3 From 5d04b41889596adab613b0e2f27f76f6414cda66 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 7 Apr 2025 15:48:23 +0300 Subject: drm/bridge: split HDMI Audio from DRM_BRIDGE_OP_HDMI As pointed out by Laurent, OP bits are supposed to describe operations. Split DRM_BRIDGE_OP_HDMI_AUDIO from DRM_BRIDGE_OP_HDMI instead of overloading DRM_BRIDGE_OP_HDMI. Signed-off-by: Dmitry Baryshkov Reviewed-by: Maxime Ripard Link: https://lore.kernel.org/r/20250314-dp-hdmi-audio-v6-1-dbd228fa73d7@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- include/drm/drm_bridge.h | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index df9bbf6fd1fb..a6fa9bdf10a3 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -681,8 +681,10 @@ struct drm_bridge_funcs { /** * @hdmi_audio_startup: * - * Called when ASoC starts an audio stream setup. The - * @hdmi_audio_startup() is optional. + * Called when ASoC starts an audio stream setup. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_AUDIO flag in their &drm_bridge->ops. * * Returns: * 0 on success, a negative error code otherwise @@ -693,8 +695,10 @@ struct drm_bridge_funcs { /** * @hdmi_audio_prepare: * Configures HDMI-encoder for audio stream. Can be called multiple - * times for each setup. Mandatory if HDMI audio is enabled in the - * bridge's configuration. + * times for each setup. + * + * This callback is optional but it must be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_AUDIO flag in their &drm_bridge->ops. * * Returns: * 0 on success, a negative error code otherwise @@ -707,8 +711,10 @@ struct drm_bridge_funcs { /** * @hdmi_audio_shutdown: * - * Shut down the audio stream. Mandatory if HDMI audio is enabled in - * the bridge's configuration. + * Shut down the audio stream. + * + * This callback is optional but it must be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_AUDIO flag in their &drm_bridge->ops. * * Returns: * 0 on success, a negative error code otherwise @@ -719,8 +725,10 @@ struct drm_bridge_funcs { /** * @hdmi_audio_mute_stream: * - * Mute/unmute HDMI audio stream. The @hdmi_audio_mute_stream callback - * is optional. + * Mute/unmute HDMI audio stream. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_HDMI_AUDIO flag in their &drm_bridge->ops. * * Returns: * 0 on success, a negative error code otherwise @@ -814,6 +822,17 @@ enum drm_bridge_ops { * drivers. */ DRM_BRIDGE_OP_HDMI = BIT(4), + /** + * @DRM_BRIDGE_OP_HDMI_AUDIO: The bridge provides HDMI audio operations. + * Bridges that set this flag must implement the + * &drm_bridge_funcs->hdmi_audio_prepare and + * &drm_bridge_funcs->hdmi_audio_shutdown callbacks. + * + * Note: currently there can be at most one bridge in a chain that sets + * this bit. This is to simplify corresponding glue code in connector + * drivers. + */ + DRM_BRIDGE_OP_HDMI_AUDIO = BIT(5), }; /** @@ -926,23 +945,26 @@ struct drm_bridge { unsigned int max_bpc; /** - * @hdmi_audio_dev: device to be used as a parent for the HDMI Codec + * @hdmi_audio_dev: device to be used as a parent for the HDMI Codec if + * @DRM_BRIDGE_OP_HDMI_AUDIO is set. */ struct device *hdmi_audio_dev; /** * @hdmi_audio_max_i2s_playback_channels: maximum number of playback - * I2S channels for the HDMI codec + * I2S channels for the bridge that sets @DRM_BRIDGE_OP_HDMI_AUDIO. */ int hdmi_audio_max_i2s_playback_channels; /** - * @hdmi_audio_spdif_playback: set if HDMI codec has S/PDIF playback port + * @hdmi_audio_spdif_playback: set if this bridge has S/PDIF playback + * port for @DRM_BRIDGE_OP_HDMI_AUDIO */ unsigned int hdmi_audio_spdif_playback : 1; /** - * @hdmi_audio_dai_port: sound DAI port, -1 if it is not enabled + * @hdmi_audio_dai_port: sound DAI port for @DRM_BRIDGE_OP_HDMI_AUDIO, + * -1 if it is not used. */ int hdmi_audio_dai_port; }; -- cgit v1.2.3 From d87ecc232706ace682a900a45018d843f148da56 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 14 Mar 2025 11:36:49 +0200 Subject: drm/bridge: add function interface for DisplayPort audio implementation It is common for the DisplayPort bridges to implement audio support. In preparation to providing a generic framework for DP audio, add corresponding interface to struct drm_bridge. As suggested by Maxime for now this is mostly c&p of the corresponding HDMI audio API. Reviewed-by: Maxime Ripard Link: https://lore.kernel.org/r/20250314-dp-hdmi-audio-v6-2-dbd228fa73d7@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- include/drm/drm_bridge.h | 88 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 82 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index a6fa9bdf10a3..4e418a29a9ff 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -737,6 +737,65 @@ struct drm_bridge_funcs { struct drm_bridge *bridge, bool enable, int direction); + /** + * @dp_audio_startup: + * + * Called when ASoC starts a DisplayPort audio stream setup. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_DP_AUDIO flag in their &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*dp_audio_startup)(struct drm_connector *connector, + struct drm_bridge *bridge); + + /** + * @dp_audio_prepare: + * Configures DisplayPort audio stream. Can be called multiple + * times for each setup. + * + * This callback is optional but it must be implemented by bridges that + * set the @DRM_BRIDGE_OP_DP_AUDIO flag in their &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*dp_audio_prepare)(struct drm_connector *connector, + struct drm_bridge *bridge, + struct hdmi_codec_daifmt *fmt, + struct hdmi_codec_params *hparms); + + /** + * @dp_audio_shutdown: + * + * Shut down the DisplayPort audio stream. + * + * This callback is optional but it must be implemented by bridges that + * set the @DRM_BRIDGE_OP_DP_AUDIO flag in their &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + void (*dp_audio_shutdown)(struct drm_connector *connector, + struct drm_bridge *bridge); + + /** + * @dp_audio_mute_stream: + * + * Mute/unmute DisplayPort audio stream. + * + * This callback is optional, it can be implemented by bridges that + * set the @DRM_BRIDGE_OP_DP_AUDIO flag in their &drm_bridge->ops. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*dp_audio_mute_stream)(struct drm_connector *connector, + struct drm_bridge *bridge, + bool enable, int direction); + /** * @debugfs_init: * @@ -830,9 +889,24 @@ enum drm_bridge_ops { * * Note: currently there can be at most one bridge in a chain that sets * this bit. This is to simplify corresponding glue code in connector - * drivers. + * drivers. Also it is not possible to have a bridge in the chain that + * sets @DRM_BRIDGE_OP_DP_AUDIO if there is a bridge that sets this + * flag. */ DRM_BRIDGE_OP_HDMI_AUDIO = BIT(5), + /** + * @DRM_BRIDGE_OP_DP_AUDIO: The bridge provides DisplayPort audio operations. + * Bridges that set this flag must implement the + * &drm_bridge_funcs->dp_audio_prepare and + * &drm_bridge_funcs->dp_audio_shutdown callbacks. + * + * Note: currently there can be at most one bridge in a chain that sets + * this bit. This is to simplify corresponding glue code in connector + * drivers. Also it is not possible to have a bridge in the chain that + * sets @DRM_BRIDGE_OP_HDMI_AUDIO if there is a bridge that sets this + * flag. + */ + DRM_BRIDGE_OP_DP_AUDIO = BIT(6), }; /** @@ -946,25 +1020,27 @@ struct drm_bridge { /** * @hdmi_audio_dev: device to be used as a parent for the HDMI Codec if - * @DRM_BRIDGE_OP_HDMI_AUDIO is set. + * either of @DRM_BRIDGE_OP_HDMI_AUDIO or @DRM_BRIDGE_OP_DP_AUDIO is set. */ struct device *hdmi_audio_dev; /** * @hdmi_audio_max_i2s_playback_channels: maximum number of playback - * I2S channels for the bridge that sets @DRM_BRIDGE_OP_HDMI_AUDIO. + * I2S channels for the @DRM_BRIDGE_OP_HDMI_AUDIO or + * @DRM_BRIDGE_OP_DP_AUDIO. */ int hdmi_audio_max_i2s_playback_channels; /** * @hdmi_audio_spdif_playback: set if this bridge has S/PDIF playback - * port for @DRM_BRIDGE_OP_HDMI_AUDIO + * port for @DRM_BRIDGE_OP_HDMI_AUDIO or @DRM_BRIDGE_OP_DP_AUDIO. */ unsigned int hdmi_audio_spdif_playback : 1; /** - * @hdmi_audio_dai_port: sound DAI port for @DRM_BRIDGE_OP_HDMI_AUDIO, - * -1 if it is not used. + * @hdmi_audio_dai_port: sound DAI port for either of + * @DRM_BRIDGE_OP_HDMI_AUDIO and @DRM_BRIDGE_OP_DP_AUDIO, -1 if it is + * not used. */ int hdmi_audio_dai_port; }; -- cgit v1.2.3 From 318c9eef63dd30b59dc8d63c7205ae997aa1e524 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 7 Apr 2025 13:23:43 +0200 Subject: ALSA: hda: Allow to fetch hlink by ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with LNL platform, Intel HDAudio Links carry IDs specifying non-HDAudio transfer type they help facilitate e.g.: 0xC0 for I2S as defined by AZX_REG_ML_LEPTR_ID_INTEL_SSP. The mechanism accounts for LEPTR register as it is Reserved if LCAP.ALT for given Link equals 0. Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Acked-by: Liam Girdwood Link: https://patch.msgid.link/20250407112352.3720779-2-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/sound/hdaudio_ext.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index 4c7a40e149a5..60ec12e3b72f 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -22,6 +22,7 @@ void snd_hdac_ext_bus_ppcap_enable(struct hdac_bus *chip, bool enable); void snd_hdac_ext_bus_ppcap_int_enable(struct hdac_bus *chip, bool enable); int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus); +struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_id(struct hdac_bus *bus, u32 id); struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_addr(struct hdac_bus *bus, int addr); struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_name(struct hdac_bus *bus, const char *codec_name); @@ -97,12 +98,16 @@ struct hdac_ext_link { void __iomem *ml_addr; /* link output stream reg pointer */ u32 lcaps; /* link capablities */ u16 lsdiid; /* link sdi identifier */ + u32 id; int ref_count; struct list_head list; }; +#define hdac_ext_link_alt(link) ((link)->lcaps & AZX_ML_HDA_LCAP_ALT) +#define hdac_ext_link_ofls(link) ((link)->lcaps & AZX_ML_HDA_LCAP_OFLS) + int snd_hdac_ext_bus_link_power_up(struct hdac_ext_link *hlink); int snd_hdac_ext_bus_link_power_down(struct hdac_ext_link *hlink); int snd_hdac_ext_bus_link_power_up_all(struct hdac_bus *bus); -- cgit v1.2.3 From b9a3ec604993074eb6f5d08b14fb7913d1fae48b Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 7 Apr 2025 13:23:45 +0200 Subject: ASoC: Intel: avs: Read HW capabilities when possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with LunarLake (LNL) and onward, some hardware capabilities are visible to the sound driver directly. At the same time, these may no longer be visible to the AudioDSP firmware. Update resource allocation function to rely on the registers when possible. Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Acked-by: Liam Girdwood Link: https://patch.msgid.link/20250407112352.3720779-4-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/sound/hdaudio_ext.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index 60ec12e3b72f..7de390022ac2 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -99,6 +99,7 @@ struct hdac_ext_link { u32 lcaps; /* link capablities */ u16 lsdiid; /* link sdi identifier */ u32 id; + u8 slcount; int ref_count; -- cgit v1.2.3 From af1c968d25c7c44cd2738349c479f8b610a3fc40 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 7 Apr 2025 13:23:47 +0200 Subject: ASoC: Intel: avs: PTL-based platforms support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define handlers specific to ACE platforms, that Frisco Lake (FCL), a PantherLake (PTL)-based platform, is founded upon. Most operations are still inherited from their predecessors with the major difference being AudioDSP cores management - replaced by DSP-domain power management. Software has to ensure the DSP domain is both powered on and its power-gating disabled before it can be utilized for streaming. Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Acked-by: Liam Girdwood Link: https://patch.msgid.link/20250407112352.3720779-6-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2e28182c3af0..981ed45cc45e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3070,6 +3070,7 @@ #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 #define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 #define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff +#define PCI_DEVICE_ID_INTEL_HDA_FCL 0x67a8 #define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000 #define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010 #define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020 -- cgit v1.2.3 From 83b9ae77f06607d19f7d3dcc6008742051137b27 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 4 Apr 2025 11:03:30 +0200 Subject: lib/string_helpers: Introduce parse_int_array() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Existing parse_inte_array_user() works with __user buffers only. Separate array parsing from __user bits so the functionality can be utilized with kernel buffers too. Cc: Andy Shevchenko Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250404090337.3564117-2-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/linux/string_helpers.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index e93fbb5b0c01..3fb88a1e9898 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -31,6 +31,7 @@ enum string_size_units { int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, char *buf, int len); +int parse_int_array(const char *buf, size_t count, int **array); int parse_int_array_user(const char __user *from, size_t count, int **array); #define UNESCAPE_SPACE BIT(0) -- cgit v1.2.3 From dcbe2aeda2e09eb69f5feba7e171db2836d9999d Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Tue, 25 Feb 2025 10:40:23 +0100 Subject: media: v4l2: Add NV15 and NV20 pixel formats Add NV15 and NV20 pixel formats used by the Rockchip Video Decoder for 10-bit buffers. NV15 and NV20 is 10-bit 4:2:0/4:2:2 semi-planar YUV formats similar to NV12 and NV16, using 10-bit components with no padding between each component. Instead, a group of 4 luminance/chrominance samples are stored over 5 bytes in little endian order: YYYY = UVUV = 4 * 10 bits = 40 bits = 5 bytes The '15' and '20' suffix refers to the optimum effective bits per pixel which is achieved when the total number of luminance samples is a multiple of 8 for NV15 and 4 for NV20. Signed-off-by: Jonas Karlman Reviewed-by: Nicolas Dufresne Tested-by: Nicolas Dufresne Tested-by: Christopher Obbard Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index af86ece741e9..ca7b3e8863ca 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -651,8 +651,10 @@ struct v4l2_pix_format { /* two planes -- one Y, one Cr + Cb interleaved */ #define V4L2_PIX_FMT_NV12 v4l2_fourcc('N', 'V', '1', '2') /* 12 Y/CbCr 4:2:0 */ #define V4L2_PIX_FMT_NV21 v4l2_fourcc('N', 'V', '2', '1') /* 12 Y/CrCb 4:2:0 */ +#define V4L2_PIX_FMT_NV15 v4l2_fourcc('N', 'V', '1', '5') /* 15 Y/CbCr 4:2:0 10-bit packed */ #define V4L2_PIX_FMT_NV16 v4l2_fourcc('N', 'V', '1', '6') /* 16 Y/CbCr 4:2:2 */ #define V4L2_PIX_FMT_NV61 v4l2_fourcc('N', 'V', '6', '1') /* 16 Y/CrCb 4:2:2 */ +#define V4L2_PIX_FMT_NV20 v4l2_fourcc('N', 'V', '2', '0') /* 20 Y/CbCr 4:2:2 10-bit packed */ #define V4L2_PIX_FMT_NV24 v4l2_fourcc('N', 'V', '2', '4') /* 24 Y/CbCr 4:4:4 */ #define V4L2_PIX_FMT_NV42 v4l2_fourcc('N', 'V', '4', '2') /* 24 Y/CrCb 4:4:4 */ #define V4L2_PIX_FMT_P010 v4l2_fourcc('P', '0', '1', '0') /* 24 Y/CbCr 4:2:0 10-bit per component */ -- cgit v1.2.3 From 5f3e0b4a1f59ab616a09a45fddddefa6ef756229 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Mon, 7 Apr 2025 01:58:05 +0200 Subject: fs: predict not having to do anything in fdput() This matches the annotation in fdget(). Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/20250406235806.1637000-2-mjguzik@gmail.com Signed-off-by: Christian Brauner --- include/linux/file.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/file.h b/include/linux/file.h index 302f11355b10..af1768d934a0 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -59,7 +59,7 @@ static inline struct fd CLONED_FD(struct file *f) static inline void fdput(struct fd fd) { - if (fd.word & FDPUT_FPUT) + if (unlikely(fd.word & FDPUT_FPUT)) fput(fd_file(fd)); } -- cgit v1.2.3 From fa6fe07d1536361a227d655e69ca270faf28fdbe Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 19 Mar 2025 14:01:35 +1100 Subject: VFS: rename lookup_one_len family to lookup_noperm and remove permission check The lookup_one_len family of functions is (now) only used internally by a filesystem on itself either - in a context where permission checking is irrelevant such as by a virtual filesystem populating itself, or xfs accessing its ORPHANAGE or dquota accessing the quota file; or - in a context where a permission check (MAY_EXEC on the parent) has just been performed such as a network filesystem finding in "silly-rename" file in the same directory. This is also the context after the _parentat() functions where currently lookup_one_qstr_excl() is used. So the permission check is pointless. The name "one_len" is unhelpful in understanding the purpose of these functions and should be changed. Most of the callers pass the len as "strlen()" so using a qstr and QSTR() can simplify the code. This patch renames these functions (include lookup_positive_unlocked() which is part of the family despite the name) to have a name based on "lookup_noperm". They are changed to receive a 'struct qstr' instead of separate name and len. In a few cases the use of QSTR() results in a new call to strlen(). try_lookup_noperm() takes a pointer to a qstr instead of the whole qstr. This is consistent with d_hash_and_lookup() (which is nearly identical) and useful for lookup_noperm_unlocked(). The new lookup_noperm_common() doesn't take a qstr yet. That will be tidied up in a subsequent patch. Signed-off-by: NeilBrown Link: https://lore.kernel.org/r/20250319031545.2999807-5-neil@brown.name Signed-off-by: Christian Brauner --- include/linux/namei.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index ba02304a2a8a..cb6ce97782e0 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -69,10 +69,10 @@ int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); -extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); -extern struct dentry *lookup_one_len(const char *, struct dentry *, int); -extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); -extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); +extern struct dentry *try_lookup_noperm(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm_unlocked(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm_positive_unlocked(struct qstr *, struct dentry *); struct dentry *lookup_one(struct mnt_idmap *, struct qstr *, struct dentry *); struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, struct qstr *name, struct dentry *base); -- cgit v1.2.3 From 06c567403ae5a0b56005c2d4a184c903f572c844 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 19 Mar 2025 14:01:36 +1100 Subject: Use try_lookup_noperm() instead of d_hash_and_lookup() outside of VFS try_lookup_noperm() and d_hash_and_lookup() are nearly identical. The former does some validation of the name where the latter doesn't. Outside of the VFS that validation is likely valuable, and having only one exported function for this task is certainly a good idea. So make d_hash_and_lookup() local to VFS files and change all other callers to try_lookup_noperm(). Note that the arguments are swapped. Signed-off-by: NeilBrown Link: https://lore.kernel.org/r/20250319031545.2999807-6-neil@brown.name Signed-off-by: Christian Brauner --- include/linux/dcache.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e974e63bcdbc..a324f82df562 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -288,7 +288,6 @@ extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); extern struct dentry *d_lookup(const struct dentry *, const struct qstr *); -extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); static inline unsigned d_count(const struct dentry *dentry) { -- cgit v1.2.3 From 6703cb3dced01f32982b9f7069ef1336d0225077 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Mon, 24 Mar 2025 16:56:48 +0900 Subject: RDMA/rxe: Enable ODP in RDMA FLUSH operation For persistent memories, add rxe_odp_flush_pmem_iova() so that ODP specific steps are executed. Otherwise, no additional consideration is required. Signed-off-by: Daisuke Matsuda Link: https://patch.msgid.link/20250324075649.3313968-2-matsuda-daisuke@fujitsu.com Reviewed-by: Li Zhijian Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d42eae69d9a8..41bf98ccb275 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -325,6 +325,7 @@ enum ib_odp_transport_cap_bits { IB_ODP_SUPPORT_READ = 1 << 3, IB_ODP_SUPPORT_ATOMIC = 1 << 4, IB_ODP_SUPPORT_SRQ_RECV = 1 << 5, + IB_ODP_SUPPORT_FLUSH = 1 << 6, }; struct ib_odp_caps { -- cgit v1.2.3 From da916e96e2dedcb2d40de77a7def833d315b81a6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 Oct 2024 10:21:41 +0200 Subject: perf: Make perf_pmu_unregister() useable Previously it was only safe to call perf_pmu_unregister() if there were no active events of that pmu around -- which was impossible to guarantee since it races all sorts against perf_init_event(). Rework the whole thing by: - keeping track of all events for a given pmu - 'hiding' the pmu from perf_init_event() - waiting for the appropriate (s)rcu grace periods such that all prior references to the PMU will be completed - detaching all still existing events of that pmu (see first point) and moving them to a new REVOKED state. - actually freeing the pmu data. Where notably the new REVOKED state must inhibit all event actions from reaching code that wants to use event->pmu. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ravi Bangoria Link: https://lkml.kernel.org/r/20250307193723.525402029@infradead.org --- include/linux/perf_event.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0069ba6866a4..7f49a58b271d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -325,6 +325,9 @@ struct perf_output_handle; struct pmu { struct list_head entry; + spinlock_t events_lock; + struct list_head events; + struct module *module; struct device *dev; struct device *parent; @@ -622,9 +625,10 @@ struct perf_addr_filter_range { * enum perf_event_state - the states of an event: */ enum perf_event_state { - PERF_EVENT_STATE_DEAD = -4, - PERF_EVENT_STATE_EXIT = -3, - PERF_EVENT_STATE_ERROR = -2, + PERF_EVENT_STATE_DEAD = -5, + PERF_EVENT_STATE_REVOKED = -4, /* pmu gone, must not touch */ + PERF_EVENT_STATE_EXIT = -3, /* task died, still inherit */ + PERF_EVENT_STATE_ERROR = -2, /* scheduling error, can enable */ PERF_EVENT_STATE_OFF = -1, PERF_EVENT_STATE_INACTIVE = 0, PERF_EVENT_STATE_ACTIVE = 1, @@ -865,6 +869,7 @@ struct perf_event { void *security; #endif struct list_head sb_list; + struct list_head pmu_list; /* * Certain events gets forwarded to another pmu internally by over- @@ -1155,7 +1160,7 @@ extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); extern void perf_event_itrace_started(struct perf_event *event); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); -extern void perf_pmu_unregister(struct pmu *pmu); +extern int perf_pmu_unregister(struct pmu *pmu); extern void __perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task); @@ -1760,7 +1765,7 @@ static inline bool needs_branch_stack(struct perf_event *event) static inline bool has_aux(struct perf_event *event) { - return event->pmu->setup_aux; + return event->pmu && event->pmu->setup_aux; } static inline bool has_aux_action(struct perf_event *event) -- cgit v1.2.3 From 4dfe3232cc04325a09e96f6c7f9546ba6c0b132b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 27 Mar 2025 12:52:13 -0700 Subject: perf/x86: Add dynamic constraint More and more features require a dynamic event constraint, e.g., branch counter logging, auto counter reload, Arch PEBS, etc. Add a generic flag, PMU_FL_DYN_CONSTRAINT, to indicate the case. It avoids keeping adding the individual flag in intel_cpuc_prepare(). Add a variable dyn_constraint in the struct hw_perf_event to track the dynamic constraint of the event. Apply it if it's updated. Apply the generic dynamic constraint for branch counter logging. Many features on and after V6 require dynamic constraint. So unconditionally set the flag for V6+. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Thomas Falcon Link: https://lkml.kernel.org/r/20250327195217.2683619-2-kan.liang@linux.intel.com --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7f49a58b271d..54dad174ed7a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -158,6 +158,7 @@ struct hw_perf_event { struct { /* hardware */ u64 config; u64 last_tag; + u64 dyn_constraint; unsigned long config_base; unsigned long event_base; int event_base_rdpmc; -- cgit v1.2.3 From c9449c8506a5df5052ef4d17867699517b10b55a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 27 Mar 2025 12:52:15 -0700 Subject: perf: Extend the bit width of the arch-specific flag The auto counter reload feature requires an event flag to indicate an auto counter reload group, which can only be scheduled on specific counters that enumerated in CPUID. However, the hw_perf_event.flags has run out on X86. Two solutions were considered to address the issue. - Currently, 20 bits are reserved for the architecture-specific flags. Only the bit 31 is used for the generic flag. There is still plenty of space left. Reserve 8 more bits for the arch-specific flags. - Add a new X86 specific hw_perf_event.flags1 to support more flags. The former is implemented. Enough room is still left in the global generic flag. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Thomas Falcon Link: https://lkml.kernel.org/r/20250327195217.2683619-4-kan.liang@linux.intel.com --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 54dad174ed7a..5c547329cf02 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -144,7 +144,7 @@ struct hw_perf_event_extra { * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific * usage. */ -#define PERF_EVENT_FLAG_ARCH 0x000fffff +#define PERF_EVENT_FLAG_ARCH 0x0fffffff #define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0); -- cgit v1.2.3 From ec980e4facef8110f6fce27e5b6344660117f01f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 27 Mar 2025 12:52:17 -0700 Subject: perf/x86/intel: Support auto counter reload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The relative rates among two or more events are useful for performance analysis, e.g., a high branch miss rate may indicate a performance issue. Usually, the samples with a relative rate that exceeds some threshold are more useful. However, the traditional sampling takes samples of events separately. To get the relative rates among two or more events, a high sample rate is required, which can bring high overhead. Many samples taken in the non-hotspot area are also dropped (useless) in the post-process. The auto counter reload (ACR) feature takes samples when the relative rate of two or more events exceeds some threshold, which provides the fine-grained information at a low cost. To support the feature, two sets of MSRs are introduced. For a given counter IA32_PMC_GPn_CTR/IA32_PMC_FXm_CTR, bit fields in the IA32_PMC_GPn_CFG_B/IA32_PMC_FXm_CFG_B MSR indicate which counter(s) can cause a reload of that counter. The reload value is stored in the IA32_PMC_GPn_CFG_C/IA32_PMC_FXm_CFG_C. The details can be found at Intel SDM (085), Volume 3, 21.9.11 Auto Counter Reload. In the hw_config(), an ACR event is specially configured, because the cause/reloadable counter mask has to be applied to the dyn_constraint. Besides the HW limit, e.g., not support perf metrics, PDist and etc, a SW limit is applied as well. ACR events in a group must be contiguous. It facilitates the later conversion from the event idx to the counter idx. Otherwise, the intel_pmu_acr_late_setup() has to traverse the whole event list again to find the "cause" event. Also, add a new flag PERF_X86_EVENT_ACR to indicate an ACR group, which is set to the group leader. The late setup() is also required for an ACR group. It's to convert the event idx to the counter idx, and saved it in hw.config1. The ACR configuration MSRs are only updated in the enable_event(). The disable_event() doesn't clear the ACR CFG register. Add acr_cfg_b/acr_cfg_c in the struct cpu_hw_events to cache the MSR values. It can avoid a MSR write if the value is not changed. Expose an acr_mask to the sysfs. The perf tool can utilize the new format to configure the relation of events in the group. The bit sequence of the acr_mask follows the events enabled order of the group. Example: Here is the snippet of the mispredict.c. Since the array has a random numbers, jumps are random and often mispredicted. The mispredicted rate depends on the compared value. For the Loop1, ~11% of all branches are mispredicted. For the Loop2, ~21% of all branches are mispredicted. main() { ... for (i = 0; i < N; i++) data[i] = rand() % 256; ... /* Loop 1 */ for (k = 0; k < 50; k++) for (i = 0; i < N; i++) if (data[i] >= 64) sum += data[i]; ... ... /* Loop 2 */ for (k = 0; k < 50; k++) for (i = 0; i < N; i++) if (data[i] >= 128) sum += data[i]; ... } Usually, a code with a high branch miss rate means a bad performance. To understand the branch miss rate of the codes, the traditional method usually samples both branches and branch-misses events. E.g., perf record -e "{cpu_atom/branch-misses/ppu, cpu_atom/branch-instructions/u}" -c 1000000 -- ./mispredict [ perf record: Woken up 4 times to write data ] [ perf record: Captured and wrote 0.925 MB perf.data (5106 samples) ] The 5106 samples are from both events and spread in both Loops. In the post-process stage, a user can know that the Loop 2 has a 21% branch miss rate. Then they can focus on the samples of branch-misses events for the Loop 2. With this patch, the user can generate the samples only when the branch miss rate > 20%. For example, perf record -e "{cpu_atom/branch-misses,period=200000,acr_mask=0x2/ppu, cpu_atom/branch-instructions,period=1000000,acr_mask=0x3/u}" -- ./mispredict (Two different periods are applied to branch-misses and branch-instructions. The ratio is set to 20%. If the branch-instructions is overflowed first, the branch-miss rate < 20%. No samples should be generated. All counters should be automatically reloaded. If the branch-misses is overflowed first, the branch-miss rate > 20%. A sample triggered by the branch-misses event should be generated. Just the counter of the branch-instructions should be automatically reloaded. The branch-misses event should only be automatically reloaded when the branch-instructions is overflowed. So the "cause" event is the branch-instructions event. The acr_mask is set to 0x2, since the event index in the group of branch-instructions is 1. The branch-instructions event is automatically reloaded no matter which events are overflowed. So the "cause" events are the branch-misses and the branch-instructions event. The acr_mask should be set to 0x3.) [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.098 MB perf.data (2498 samples) ] $perf report Percent │154: movl $0x0,-0x14(%rbp) │ ↓ jmp 1af │ for (i = j; i < N; i++) │15d: mov -0x10(%rbp),%eax │ mov %eax,-0x18(%rbp) │ ↓ jmp 1a2 │ if (data[i] >= 128) │165: mov -0x18(%rbp),%eax │ cltq │ lea 0x0(,%rax,4),%rdx │ mov -0x8(%rbp),%rax │ add %rdx,%rax │ mov (%rax),%eax │ ┌──cmp $0x7f,%eax 100.00 0.00 │ ├──jle 19e │ │sum += data[i]; The 2498 samples are all from the branch-misses events for the Loop 2. The number of samples and overhead is significantly reduced without losing any information. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Thomas Falcon Link: https://lkml.kernel.org/r/20250327195217.2683619-6-kan.liang@linux.intel.com --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5c547329cf02..947ad12dfdbe 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -157,6 +157,7 @@ struct hw_perf_event { union { struct { /* hardware */ u64 config; + u64 config1; u64 last_tag; u64 dyn_constraint; unsigned long config_base; -- cgit v1.2.3 From 0a7de4a8f898c480ffafe024c4a0a8b8819597f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Apr 2025 16:36:02 +0000 Subject: net: rps: remove kfree_rcu_mightsleep() use Add an rcu_head to sd_flow_limit and rps_sock_flow_table structs to use the more conventional and predictable k[v]free_rcu(). Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250407163602.170356-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/rps.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/rps.h b/include/net/rps.h index e358e9711f27..507f4aa5d39b 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -57,9 +57,10 @@ struct rps_dev_flow_table { * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { - u32 mask; + struct rcu_head rcu; + u32 mask; - u32 ents[] ____cacheline_aligned_in_smp; + u32 ents[] ____cacheline_aligned_in_smp; }; #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) -- cgit v1.2.3 From 7eb61c2dffa635e4fb05f89736d8fcf39bb24d42 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 Oct 2023 12:17:41 +0200 Subject: drm/amdgpu: UAPI for user queue management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch intorduces new UAPI/IOCTL for usermode graphics queue. The userspace app will fill this structure and request the graphics driver to add a graphics work queue for it. The output of this UAPI is a queue id. This UAPI maps the queue into GPU, so the graphics app can start submitting work to the queue as soon as the call returns. V2: Addressed review comments from Alex and Christian - Make the doorbell offset's comment clearer - Change the output parameter name to queue_id V3: Integration with doorbell manager V4: - Updated the UAPI doc (Pierre-Eric) - Created a Union for engine specific MQDs (Alex) - Added Christian's R-B V5: - Add variables for GDS and CSA in MQD structure (Alex) - Make MQD data a ptr-size pair instead of union (Alex) V9: - renamed struct drm_amdgpu_userq_mqd_gfx_v11 to struct drm_amdgpu_userq_mqd as its being used for SDMA and compute queues as well V10: - keeping the drm_amdgpu_userq_mqd IP independent, moving the _gfx_v11 objects in a separate structure in other patch. (Alex) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 90 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 25d5c6e90a99..53081050cb3e 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -54,6 +54,7 @@ extern "C" { #define DRM_AMDGPU_VM 0x13 #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 #define DRM_AMDGPU_SCHED 0x15 +#define DRM_AMDGPU_USERQ 0x16 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -71,6 +72,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) /** * DOC: memory domains @@ -319,6 +321,94 @@ union drm_amdgpu_ctx { union drm_amdgpu_ctx_out out; }; +/* user queue IOCTL */ +#define AMDGPU_USERQ_OP_CREATE 1 +#define AMDGPU_USERQ_OP_FREE 2 + +/* Flag to indicate secure buffer related workload, unused for now */ +#define AMDGPU_USERQ_MQD_FLAGS_SECURE (1 << 0) +/* Flag to indicate AQL workload, unused for now */ +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1) + +/* + * MQD (memory queue descriptor) is a set of parameters which allow + * the GPU to uniquely define and identify a usermode queue. This + * structure defines the MQD for GFX-V11 IP ver 0. + */ +struct drm_amdgpu_userq_in { + /** AMDGPU_USERQ_OP_* */ + __u32 op; + /** Queue handle for USERQ_OP_FREE */ + __u32 queue_id; + /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ + __u32 ip_type; + /** + * @flags: flags to indicate special function for queue like secure + * buffer (TMZ). Unused for now. + */ + __u32 flags; + /** + * @doorbell_handle: the handle of doorbell GEM object + * associated to this client. + */ + __u32 doorbell_handle; + /** + * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo. + * Kernel will generate absolute doorbell offset using doorbell_handle + * and doorbell_offset in the doorbell bo. + */ + __u32 doorbell_offset; + + /** + * @queue_va: Virtual address of the GPU memory which holds the queue + * object. The queue holds the workload packets. + */ + __u64 queue_va; + /** + * @queue_size: Size of the queue in bytes, this needs to be 256-byte + * aligned. + */ + __u64 queue_size; + /** + * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + */ + __u64 rptr_va; + /** + * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + * + * Queue, RPTR and WPTR can come from the same object, as long as the size + * and alignment related requirements are met. + */ + __u64 wptr_va; + /** + * @mqd: Queue descriptor for USERQ_OP_CREATE + * MQD data can be of different size for different GPU IP/engine and + * their respective versions/revisions, so this points to a __u64 * + * which holds MQD of this usermode queue. + */ + __u64 mqd; + /** + * @size: size of MQD data in bytes, it must match the MQD structure + * size of the respective engine/revision defined in UAPI for ex, for + * gfx_v11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx_v11). + */ + __u64 mqd_size; +}; + +struct drm_amdgpu_userq_out { + /** Queue handle */ + __u32 queue_id; + /** Flags */ + __u32 flags; +}; + +union drm_amdgpu_userq { + struct drm_amdgpu_userq_in in; + struct drm_amdgpu_userq_out out; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From a1d201e16940775f0e2f0230960d69e40879ec6d Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 2 May 2024 12:37:30 +0200 Subject: drm/amdgpu: enable GFX-V11 userqueue support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables GFX-v11 IP support in the usermode queue base code. It typically: - adds a GFX_v11 specific MQD structure - sets IP functions to create and destroy MQDs - sets MQD objects coming from userspace V10: introduced this spearate patch for GFX V11 enabling (Alex). V11: Addressed review comments: - update the comments in GFX mqd structure informing user about using the INFO IOCTL for object sizes (Alex) - rename struct drm_amdgpu_userq_mqd_gfx_v11 to drm_amdgpu_userq_mqd_gfx11 (Marek) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 53081050cb3e..4e07e15d5076 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -409,6 +409,25 @@ union drm_amdgpu_userq { struct drm_amdgpu_userq_out out; }; +/* GFX V11 IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_gfx11 { + /** + * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 shadow_va; + /** + * @gds_va: Virtual address of the GPU memory to hold the GDS buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 gds_va; + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 csa_va; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From 543b6145377458b5ec0d1440606c31db62867bf4 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 27 Aug 2024 14:52:07 +0530 Subject: drm/amdgpu: enable SDMA usermode queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch does necessary modifications to enable the SDMA usermode queues using the existing userqueue infrastructure. V9: introduced this patch in the series V10: use header file instead of extern (Alex) V11: rename drm_amdgpu_userq_mqd_sdma_gfx_v11 to drm_amdgpu_userq_mqd_sdma_gfx11 (Marek) Cc: Christian König Cc: Alex Deucher Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4e07e15d5076..6ae988574084 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -428,6 +428,16 @@ struct drm_amdgpu_userq_mqd_gfx11 { __u64 csa_va; }; +/* GFX V11 SDMA IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_sdma_gfx11 { + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 csa_va; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From 2c695d7c072067cd53fb52e52aa0b48277120314 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 9 May 2024 14:31:15 +0200 Subject: drm/amdgpu: enable compute/gfx usermode queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch does the necessary changes required to enable compute workload support using the existing usermode queues infrastructure. V9: Patch introduced V10: Add custom IP specific mqd strcuture for compute (Alex) V11: Rename drm_amdgpu_userq_mqd_compute_gfx_v11 to drm_amdgpu_userq_mqd_compute_gfx11 (Marek) Cc: Alex Deucher Cc: Christian Koenig Acked-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 6ae988574084..59f0818e8dcd 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -438,6 +438,16 @@ struct drm_amdgpu_userq_mqd_sdma_gfx11 { __u64 csa_va; }; +/* GFX V11 Compute IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_compute_gfx11 { + /** + * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. + * This must be a from a separate GPU object, and must be at least 1 page + * sized. + */ + __u64 eop_va; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From 6b0c7c367317a663a58f72f79e73ad787aac873d Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:09:50 +0530 Subject: drm/amdgpu: UAPI headers for userqueue Secure semaphore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add UAPI header support for userqueue Secure semaphore v2: Worked on review comments from Christian for the following modifications - Add bo handles, bo flags and padding fields. - Include value/va in a combined array. v3: Worked on review comments from Christian - Add num_fences field to obtain the number of objects required to allocate memory for userq_fence_info. - Replace obj_handle name with syncobj_handle. - Replace point name with syncobj_point. - Replace count_handles name with num_syncobj_handles. - Fix structure padding related issues. v4: Worked on review comments from Christian - Modify the bo flags description. Signed-off-by: Alex Deucher Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 115 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 59f0818e8dcd..4e9414c0f924 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -55,6 +55,8 @@ extern "C" { #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 #define DRM_AMDGPU_SCHED 0x15 #define DRM_AMDGPU_USERQ 0x16 +#define DRM_AMDGPU_USERQ_SIGNAL 0x17 +#define DRM_AMDGPU_USERQ_WAIT 0x18 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -73,6 +75,8 @@ extern "C" { #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) #define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) +#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal) +#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait) /** * DOC: memory domains @@ -448,6 +452,117 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 { __u64 eop_va; }; +/* dma_resv usage flag */ +#define AMDGPU_USERQ_BO_WRITE 1 + +/* userq signal/wait ioctl */ +struct drm_amdgpu_userq_signal { + /** + * @queue_id: Queue handle used by the userq fence creation function + * to retrieve the WPTR. + */ + __u32 queue_id; + /** + * @flags: flags to indicate special function for userq fence creation. + * Unused for now. + */ + __u32 flags; + /** + * @syncobj_handles_array: An array of syncobj handles used by the userq fence + * creation IOCTL to install the created dma_fence object which can be + * utilized by userspace to explicitly synchronize GPU commands. + */ + __u64 syncobj_handles_array; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles_array. + */ + __u64 num_syncobj_handles; + /** + * @syncobj_point: A given point on the timeline to be signaled. + * Unused for now. + */ + __u64 syncobj_point; + /** + * @bo_handles_array: An array of GEM BO handles used by the userq fence creation + * IOCTL to install the created dma_fence object which can be utilized by + * userspace to synchronize the BO usage between user processes. + */ + __u64 bo_handles_array; + /** + * @num_bo_handles: A count that represents the number of GEM BO handles in + * @bo_handles_array. + */ + __u32 num_bo_handles; + /** + * @bo_flags: flags to indicate BOs synchronize for READ or WRITE + */ + __u32 bo_flags; +}; + +struct drm_amdgpu_userq_fence_info { + /** + * @va: A gpu address allocated for each queue which stores the + * read pointer (RPTR) value. + */ + __u64 va; + /** + * @value: A 64 bit value represents the write pointer (WPTR) of the + * queue commands which compared with the RPTR value to signal the + * fences. + */ + __u64 value; +}; + +struct drm_amdgpu_userq_wait { + /** + * @waitq_id: Queue handle used to retrieve the queue information to store + * the fence driver references in the wait user queue structure. + */ + __u32 waitq_id; + /** + * @flags: flags to specify special function for userq wait information. + * Unused for now. + */ + __u32 flags; + /** + * @bo_wait_flags: flags to define the BOs for READ or WRITE to store the + * matching fence wait info pair in @userq_fence_info. + */ + __u32 bo_wait_flags; + __u32 pad; + /** + * @syncobj_handles_array: An array of syncobj handles defined to get the + * fence wait information of every syncobj handles in the array. + */ + __u64 syncobj_handles_array; + /** + * @bo_handles_array: An array of GEM BO handles defined to fetch the fence + * wait information of every BO handles in the array. + */ + __u64 bo_handles_array; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles_array. + */ + __u32 num_syncobj_handles; + /** + * @num_bo_handles: A count that represents the number of GEM BO handles in + * @bo_handles_array. + */ + __u32 num_bo_handles; + /** + * @userq_fence_info: An array of fence information (va and value) pair of each + * objects stored in @syncobj_handles_array and @bo_handles_array. + */ + __u64 userq_fence_info; + /** + * @num_fences: A count that represents the number of actual fences installed in + * each syncobj and bo handles. + */ + __u64 num_fences; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From 15e30a6e479282fef4365bd586159911c8cf140d Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:33:28 +0530 Subject: drm/amdgpu: Add wait IOCTL timeline syncobj support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add user fence wait IOCTL timeline syncobj support. v2:(Christian) - handle dma_fence_wait() return value. - shorten the variable name syncobj_timeline_points a bit. - move num_points up to avoid padding issues. v3:(Christian) - Handle timeline drm_syncobj_find_fence() call error handling - Use dma_fence_unwrap_for_each() in timeline fence as there could be more than one fence. v4:(Christian) - Drop the first num_fences since fence is always included in the dma_fence_unwrap_for_each() iteration, when fence != f then fence is most likely just a container. v5: Added Alex RB to merge the kernel UAPI changes since he has already approved the amdgpu_drm.h changes. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4e9414c0f924..1a21259cb8c4 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -530,12 +530,26 @@ struct drm_amdgpu_userq_wait { * matching fence wait info pair in @userq_fence_info. */ __u32 bo_wait_flags; - __u32 pad; + /** + * @num_points: A count that represents the number of timeline syncobj handles in + * syncobj_handles_array. + */ + __u32 num_points; /** * @syncobj_handles_array: An array of syncobj handles defined to get the * fence wait information of every syncobj handles in the array. */ __u64 syncobj_handles_array; + /** + * @syncobj_timeline_handles: An array of timeline syncobj handles defined to get the + * fence wait information of every timeline syncobj handles in the array. + */ + __u64 syncobj_timeline_handles; + /** + * @syncobj_timeline_points: An array of timeline syncobj points defined to get the + * fence wait points of every timeline syncobj handles in the syncobj_handles_array. + */ + __u64 syncobj_timeline_points; /** * @bo_handles_array: An array of GEM BO handles defined to fetch the fence * wait information of every BO handles in the array. -- cgit v1.2.3 From 70773bef4e091ff6d2a91e3dfb4f29013eb81f1f Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 25 Sep 2024 18:09:49 +0200 Subject: drm/amdgpu: update userqueue BOs and PDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch updates the VM_IOCTL to allow userspace to synchronize the mapping/unmapping of a BO in the page table. The major changes are: - it adds a drm_timeline object as an input parameter to the VM IOCTL. - this object is used by the kernel to sync the update of the BO in the page table during the mapping of the object. - the kernel also synchronizes the tlb flush of the page table entry of this object during the unmapping (Added in this series: https://patchwork.freedesktop.org/series/131276/ and https://patchwork.freedesktop.org/patch/584182/) - the userspace can wait on this timeline, and then the BO is ready to be consumed by the GPU. The UAPI for the same has been approved here: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 V2: - remove the eviction fence coupling V3: - added the drm timeline support instead of input/output fence (Christian) V4: - made timeline 64-bit (Christian) - bug fix (Arvind) V5: GLCTS bug fix (Arvind) V6: Rename syncobj_handle -> timeline_syncobj_out Rename point -> timeline_point_in (Marek) V7: Addressed review comments from Christian: - do not send last_update fence in case of vm_clear_freed, instead return the fence from gen_va_update_vm - move the functions to update bo_mapping to amdgpu_gem.c - do not use amdgpu_userq_update_vm anymore in userq_create() V8: Addressed review comments from Christian: - Split amdgpu_gem_update_bo_mapping function. - amdgpu_gem_va_update_vm should return stub for error. V9: Addressed review comments from Christian: - Rename the function amdgpu_gem_update_timeline_node. - amdgpu_gem_update_timeline_node should be void function. - when timeline_point is zero don't allocate a chain and call drm_syncobj_replace_fence() instead of drm_syncobj_add_point(). V11: rebase V12: Fix 32-bit holes issue in sturct drm_amdgpu_gem_va. V13: Fix the review comment by renaming timeline syncobj (Marek) Cc: Alex Deucher Cc: Felix Kuehling Cc: Christian König Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1a21259cb8c4..ca82935ff93a 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -857,6 +857,15 @@ struct drm_amdgpu_gem_va { __u64 offset_in_bo; /** Specify mapping size. Must be correctly aligned. */ __u64 map_size; + /** + * vm_timeline_point is a sequence number used to add new timeline point. + */ + __u64 vm_timeline_point; + /** + * The vm page table update fence is installed in given vm_timeline_syncobj_out + * at vm_timeline_point. + */ + __u32 vm_timeline_syncobj_out; }; #define AMDGPU_HW_IP_GFX 0 -- cgit v1.2.3 From cb4a73f46f253b5f7a30b1e0488c8ef2832e8747 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:59:04 +0530 Subject: drm/amdgpu: Add separate array of read and write for BO handles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop AMDGPU_USERQ_BO_WRITE as this should not be a global option of the IOCTL, It should be option per buffer. Hence adding separate array for read and write BO handles. v2(Marek): - Internal kernel details shouldn't be here. This file should only document the observed behavior, not the implementation . v3: - Fix DAL CI clang issue. v4: - Added Alex RB to merge the kernel UAPI changes since he has already approved the amdgpu_drm.h changes. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Alex Deucher Acked-by: Christian König Suggested-by: Marek Olšák Suggested-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 50 +++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index ca82935ff93a..02cf03e811d5 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -452,9 +452,6 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 { __u64 eop_va; }; -/* dma_resv usage flag */ -#define AMDGPU_USERQ_BO_WRITE 1 - /* userq signal/wait ioctl */ struct drm_amdgpu_userq_signal { /** @@ -484,20 +481,30 @@ struct drm_amdgpu_userq_signal { */ __u64 syncobj_point; /** - * @bo_handles_array: An array of GEM BO handles used by the userq fence creation - * IOCTL to install the created dma_fence object which can be utilized by - * userspace to synchronize the BO usage between user processes. + * @bo_read_handles: The list of BO handles that the submitted user queue job + * is using for read only. This will update BO fences in the kernel. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of BO handles that the submitted user queue job + * is using for write only. This will update BO fences in the kernel. + */ + __u64 bo_write_handles; + /** + * @num_read_bo_handles: A count that represents the number of read BO handles in + * @bo_read_handles. */ - __u64 bo_handles_array; + __u32 num_read_bo_handles; /** - * @num_bo_handles: A count that represents the number of GEM BO handles in - * @bo_handles_array. + * @num_write_bo_handles: A count that represents the number of write BO handles in + * @bo_write_handles. */ - __u32 num_bo_handles; + __u32 num_write_bo_handles; /** * @bo_flags: flags to indicate BOs synchronize for READ or WRITE */ __u32 bo_flags; + __u32 pad; }; struct drm_amdgpu_userq_fence_info { @@ -551,20 +558,31 @@ struct drm_amdgpu_userq_wait { */ __u64 syncobj_timeline_points; /** - * @bo_handles_array: An array of GEM BO handles defined to fetch the fence - * wait information of every BO handles in the array. + * @bo_read_handles: The list of read BO handles submitted by the user queue + * job to get the va/value pairs. */ - __u64 bo_handles_array; + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of write BO handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 bo_write_handles; /** * @num_syncobj_handles: A count that represents the number of syncobj handles in * @syncobj_handles_array. */ __u32 num_syncobj_handles; /** - * @num_bo_handles: A count that represents the number of GEM BO handles in - * @bo_handles_array. + * @num_read_bo_handles: A count that represents the number of read BO handles in + * @bo_read_handles. + */ + __u32 num_read_bo_handles; + /** + * @num_write_bo_handles: A count that represents the number of write BO handles in + * @bo_write_handles. */ - __u32 num_bo_handles; + __u32 num_write_bo_handles; + __u32 pad; /** * @userq_fence_info: An array of fence information (va and value) pair of each * objects stored in @syncobj_handles_array and @bo_handles_array. -- cgit v1.2.3 From 38c67ec9aa4be7bc0ae55e7bebe95f615fa09a1e Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 25 Sep 2024 18:10:41 +0200 Subject: drm/amdgpu: Add input fence to sync bo map/unmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds input fences to VM_IOCTL for buffer object. The kernel will map/unmap the BO only when the fence is signaled. The UAPI for the same has been approved here: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 V2: Bug fix (Arvind) V3: Bug fix (Arvind) V4: Rename UAPI objects as per UAPI review (Marek) V5: Addressed review comemnts from Christian - function should return error. - Add 'TODO' comment - The input fence should be independent of the operation. V6: Addressed review comemnts from Christian - Release the memory allocated by memdup_user(). V7: Addressed review comemnts from Christian - Drop the debug print and add "return r;" for the error handling. V11: Rebase v12: Fix 32-bit holes issue in sturct drm_amdgpu_gem_va. v13: Fix deadlock issue. v14: Fix merge conflict. v15: Fix review comment by renaming syncobj handles. Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 02cf03e811d5..0910a6f8c5f2 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -884,6 +884,10 @@ struct drm_amdgpu_gem_va { * at vm_timeline_point. */ __u32 vm_timeline_syncobj_out; + /** the number of syncobj handles in @input_fence_syncobj_handles */ + __u32 num_syncobj_handles; + /** Array of sync object handle to wait for given input fences */ + __u64 input_fence_syncobj_handles; }; #define AMDGPU_HW_IP_GFX 0 -- cgit v1.2.3 From 2e06b175fff5ba1415b74fed441c0d066b8c8dab Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 11 Nov 2024 12:34:30 +0100 Subject: drm/amdgpu: fix userqueue UAPI comments This patch fixes some of the pending UAPI review comments from the libDRM/UAPI review process. - It updates some outdated comments in the userqueue UAPI header highlighted during the libdrm UAPI review. - It removes the GDS BO support which was found unused. - It also removes the unused flags parameter from the UAPI. - It also adds a padding variables in userqueue in/out structures. (Pierre-Eric and Marek) - clarify comments on top of drm_amdgpu_userq_in - clarify comment for queue_id (in) - clarify comment for mqd - clarify comment for compute MQD size - clarify comment for queue_id (out) - remove GDB object from BO object list - remove the unused flags parameter Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 54 +++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 0910a6f8c5f2..6158496cc1d0 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -325,35 +325,28 @@ union drm_amdgpu_ctx { union drm_amdgpu_ctx_out out; }; -/* user queue IOCTL */ +/* user queue IOCTL operations */ #define AMDGPU_USERQ_OP_CREATE 1 #define AMDGPU_USERQ_OP_FREE 2 -/* Flag to indicate secure buffer related workload, unused for now */ -#define AMDGPU_USERQ_MQD_FLAGS_SECURE (1 << 0) -/* Flag to indicate AQL workload, unused for now */ -#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1) - /* - * MQD (memory queue descriptor) is a set of parameters which allow - * the GPU to uniquely define and identify a usermode queue. This - * structure defines the MQD for GFX-V11 IP ver 0. + * This structure is a container to pass input configuration + * info for all supported userqueue related operations. + * For operation AMDGPU_USERQ_OP_CREATE: user is expected + * to set all fields, excep the parameter 'queue_id'. + * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected + * to be set is 'queue_id', eveything else is ignored. */ struct drm_amdgpu_userq_in { /** AMDGPU_USERQ_OP_* */ __u32 op; - /** Queue handle for USERQ_OP_FREE */ + /** Queue id passed for operation USERQ_OP_FREE */ __u32 queue_id; /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ __u32 ip_type; - /** - * @flags: flags to indicate special function for queue like secure - * buffer (TMZ). Unused for now. - */ - __u32 flags; /** * @doorbell_handle: the handle of doorbell GEM object - * associated to this client. + * associated with this userqueue client. */ __u32 doorbell_handle; /** @@ -362,7 +355,7 @@ struct drm_amdgpu_userq_in { * and doorbell_offset in the doorbell bo. */ __u32 doorbell_offset; - + __u32 _pad; /** * @queue_va: Virtual address of the GPU memory which holds the queue * object. The queue holds the workload packets. @@ -387,25 +380,31 @@ struct drm_amdgpu_userq_in { */ __u64 wptr_va; /** - * @mqd: Queue descriptor for USERQ_OP_CREATE + * @mqd: MQD (memory queue descriptor) is a set of parameters which allow + * the GPU to uniquely define and identify a usermode queue. + * * MQD data can be of different size for different GPU IP/engine and * their respective versions/revisions, so this points to a __u64 * - * which holds MQD of this usermode queue. + * which holds IP specific MQD of this usermode queue. */ __u64 mqd; /** * @size: size of MQD data in bytes, it must match the MQD structure * size of the respective engine/revision defined in UAPI for ex, for - * gfx_v11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx_v11). + * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11). */ __u64 mqd_size; }; +/* The structure to carry output of userqueue ops */ struct drm_amdgpu_userq_out { - /** Queue handle */ + /** + * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique + * queue ID to represent the newly created userqueue in the system, otherwise + * it should be ignored. + */ __u32 queue_id; - /** Flags */ - __u32 flags; + __u32 _pad; }; union drm_amdgpu_userq { @@ -420,11 +419,6 @@ struct drm_amdgpu_userq_mqd_gfx11 { * Use AMDGPU_INFO_IOCTL to find the exact size of the object. */ __u64 shadow_va; - /** - * @gds_va: Virtual address of the GPU memory to hold the GDS buffer. - * Use AMDGPU_INFO_IOCTL to find the exact size of the object. - */ - __u64 gds_va; /** * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. * Use AMDGPU_INFO_IOCTL to find the exact size of the object. @@ -446,8 +440,8 @@ struct drm_amdgpu_userq_mqd_sdma_gfx11 { struct drm_amdgpu_userq_mqd_compute_gfx11 { /** * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. - * This must be a from a separate GPU object, and must be at least 1 page - * sized. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. */ __u64 eop_va; }; -- cgit v1.2.3 From 2761bb9a31f1b863037547d73dc6aac1461ceab6 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 11 Nov 2024 12:43:07 +0530 Subject: drm/amdgpu: Modify userq signal/wait struct field names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify kernel UAPI userq signal/wait struct field names and description corresponding to the libdrm UAPI review comments. libdrm MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 102 +++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 6158496cc1d0..72dc16dbca7f 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -453,27 +453,17 @@ struct drm_amdgpu_userq_signal { * to retrieve the WPTR. */ __u32 queue_id; + __u32 pad; /** - * @flags: flags to indicate special function for userq fence creation. - * Unused for now. - */ - __u32 flags; - /** - * @syncobj_handles_array: An array of syncobj handles used by the userq fence - * creation IOCTL to install the created dma_fence object which can be - * utilized by userspace to explicitly synchronize GPU commands. + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to be signaled. */ - __u64 syncobj_handles_array; + __u64 syncobj_handles; /** * @num_syncobj_handles: A count that represents the number of syncobj handles in - * @syncobj_handles_array. + * @syncobj_handles. */ __u64 num_syncobj_handles; - /** - * @syncobj_point: A given point on the timeline to be signaled. - * Unused for now. - */ - __u64 syncobj_point; /** * @bo_read_handles: The list of BO handles that the submitted user queue job * is using for read only. This will update BO fences in the kernel. @@ -485,20 +475,15 @@ struct drm_amdgpu_userq_signal { */ __u64 bo_write_handles; /** - * @num_read_bo_handles: A count that represents the number of read BO handles in + * @num_bo_read_handles: A count that represents the number of read BO handles in * @bo_read_handles. */ - __u32 num_read_bo_handles; + __u32 num_bo_read_handles; /** - * @num_write_bo_handles: A count that represents the number of write BO handles in + * @num_bo_write_handles: A count that represents the number of write BO handles in * @bo_write_handles. */ - __u32 num_write_bo_handles; - /** - * @bo_flags: flags to indicate BOs synchronize for READ or WRITE - */ - __u32 bo_flags; - __u32 pad; + __u32 num_bo_write_handles; }; struct drm_amdgpu_userq_fence_info { @@ -517,38 +502,18 @@ struct drm_amdgpu_userq_fence_info { struct drm_amdgpu_userq_wait { /** - * @waitq_id: Queue handle used to retrieve the queue information to store - * the fence driver references in the wait user queue structure. - */ - __u32 waitq_id; - /** - * @flags: flags to specify special function for userq wait information. - * Unused for now. - */ - __u32 flags; - /** - * @bo_wait_flags: flags to define the BOs for READ or WRITE to store the - * matching fence wait info pair in @userq_fence_info. - */ - __u32 bo_wait_flags; - /** - * @num_points: A count that represents the number of timeline syncobj handles in - * syncobj_handles_array. - */ - __u32 num_points; - /** - * @syncobj_handles_array: An array of syncobj handles defined to get the - * fence wait information of every syncobj handles in the array. + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to get the va/value pairs. */ - __u64 syncobj_handles_array; + __u64 syncobj_handles; /** - * @syncobj_timeline_handles: An array of timeline syncobj handles defined to get the - * fence wait information of every timeline syncobj handles in the array. + * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by + * the user queue job to get the va/value pairs at given @syncobj_timeline_points. */ - __u64 syncobj_timeline_handles; + __u64 syncobj_timeline_handles; /** - * @syncobj_timeline_points: An array of timeline syncobj points defined to get the - * fence wait points of every timeline syncobj handles in the syncobj_handles_array. + * @syncobj_timeline_points: The list of timeline syncobj points submitted by the + * user queue job for the corresponding @syncobj_timeline_handles. */ __u64 syncobj_timeline_points; /** @@ -561,32 +526,37 @@ struct drm_amdgpu_userq_wait { * job to get the va/value pairs. */ __u64 bo_write_handles; + /** + * @num_syncobj_timeline_handles: A count that represents the number of timeline + * syncobj handles in @syncobj_timeline_handles. + */ + __u16 num_syncobj_timeline_handles; + /** + * @num_fences: This field can be used both as input and output. As input it defines + * the maximum number of fences that can be returned and as output it will specify + * how many fences were actually returned from the ioctl. + */ + __u16 num_fences; /** * @num_syncobj_handles: A count that represents the number of syncobj handles in - * @syncobj_handles_array. + * @syncobj_handles. */ __u32 num_syncobj_handles; /** - * @num_read_bo_handles: A count that represents the number of read BO handles in + * @num_bo_read_handles: A count that represents the number of read BO handles in * @bo_read_handles. */ - __u32 num_read_bo_handles; + __u32 num_bo_read_handles; /** - * @num_write_bo_handles: A count that represents the number of write BO handles in + * @num_bo_write_handles: A count that represents the number of write BO handles in * @bo_write_handles. */ - __u32 num_write_bo_handles; - __u32 pad; - /** - * @userq_fence_info: An array of fence information (va and value) pair of each - * objects stored in @syncobj_handles_array and @bo_handles_array. - */ - __u64 userq_fence_info; + __u32 num_bo_write_handles; /** - * @num_fences: A count that represents the number of actual fences installed in - * each syncobj and bo handles. + * @out_fences: The field is a return value from the ioctl containing the list of + * address/value pairs to wait for. */ - __u64 num_fences; + __u64 out_fences; }; /* vm ioctl */ -- cgit v1.2.3 From 90c448fef3120d79bd8031665213981c966dbaf4 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 30 Oct 2024 15:39:42 +0100 Subject: drm/amdgpu: add new AMDGPU_INFO subquery for userq objects This patch adds a new subquery (AMDGPU_INFO_UQ_FW_AREAS) in AMDGPU_INFO_IOCTL to get the size and alignment of shadow and csa objects from the FW setup. This information is required for the userqueue consumers. V2: Added Alex's suggestions and addressed review comments: - make this query IP specific (GFX/SDMA etc) - give a better title (AMDGPU_INFO_UQ_METADATA) - restructured the code as per sample code shared by Alex V3: Split the UAPI patch from shadow_size_fn modifications V4: Addressed review comments from UAPI review (Marek/Pierre-Eric) - Change the query name to AMDGPU_INFO_UQ_FW_AREAS - remove unused inpur parameter for AMDGPU_HW_IP* UAPI link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/400/ Cc: Alex Deucher Cc: Christian Koenig Cc: Arvind Yadav Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 72dc16dbca7f..5dbd9037afe7 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1193,6 +1193,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow { #define AMDGPU_INFO_MAX_IBS 0x22 /* query last page fault info */ #define AMDGPU_INFO_GPUVM_FAULT 0x23 +/* query FW object size and alignment */ +#define AMDGPU_INFO_UQ_FW_AREAS 0x24 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff @@ -1469,6 +1471,27 @@ struct drm_amdgpu_info_hw_ip { __u32 ip_discovery_version; }; +/* GFX metadata BO sizes and alignment info (in bytes) */ +struct drm_amdgpu_info_uq_fw_areas_gfx { + /* shadow area size */ + __u32 shadow_size; + /* shadow area base virtual mem alignment */ + __u32 shadow_alignment; + /* context save area size */ + __u32 csa_size; + /* context save area base virtual mem alignment */ + __u32 csa_alignment; +}; + +/* IP specific fw related information used in the + * subquery AMDGPU_INFO_UQ_FW_AREAS + */ +struct drm_amdgpu_info_uq_fw_areas { + union { + struct drm_amdgpu_info_uq_fw_areas_gfx gfx; + }; +}; + struct drm_amdgpu_info_num_handles { /** Max handles as supported by firmware for UVD */ __u32 uvd_max_handles; @@ -1532,6 +1555,23 @@ struct drm_amdgpu_info_gpuvm_fault { __u32 vmhub; }; +struct drm_amdgpu_info_uq_metadata_gfx { + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; +}; + +struct drm_amdgpu_info_uq_metadata { + union { + struct drm_amdgpu_info_uq_metadata_gfx gfx; + }; +}; + /* * Supported GPU families */ -- cgit v1.2.3 From 1af688126361bc881b134b9d25738e22dd457f30 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 24 Mar 2025 16:26:00 -0400 Subject: drm/amdgpu: add UAPI to query if user queues are supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an INFO query to check if user queues are supported. v2: switch to a mask of IPs (Marek) v3: move to drm_amdgpu_info_device (Marek) Cc: marek.olsak@amd.com Cc: prike.liang@amd.com Cc: sunil.khatri@amd.com Cc: yogesh.mohanmarimuthu@amd.com Reviewed-by: Marek Olšák Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 5dbd9037afe7..ef97c0d78b8a 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1453,6 +1453,9 @@ struct drm_amdgpu_info_device { __u32 csa_size; /* context save area base virtual alignment for gfx11 */ __u32 csa_alignment; + /* Userq IP mask (1 << AMDGPU_HW_IP_*) */ + __u32 userq_ip_mask; + __u32 pad; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3 From 12a2bf6765c2a61eb7f20870452bb915eb28fdcc Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 8 Apr 2025 16:54:14 -0400 Subject: drm: Add UAPI for the Asahi driver This adds the UAPI for the Asahi driver targeting the GPU in the Apple M1 and M2 series systems on chip. The UAPI design is based on other modern Vulkan-capable drivers, including Xe and Panthor. Memory management is based on explicit VM management. Synchronization is exclusively explicit sync. This UAPI is validated against our open source Mesa stack, which is fully conformant to the OpenGL 4.6, OpenGL ES 3.2, OpenCL 3.0, and Vulkan 1.4 standards. The Vulkan driver supports sparse, exercising the VM_BIND mechanism. This patch adds the standalone UAPI header. It is implemented by an open source DRM driver written in Rust. We fully intend to upstream this driver when possible. However, as a production graphics driver, it depends on a significant number of Rust abstractions that will take a long time to upstream. In the mean time, our userspace is upstream in Mesa but is not allowed to probe with upstream Mesa as the UAPI is not yet reviewed and merged in the upstream kernel. Although we ship a patched Mesa in Fedora Asahi Remix, any containers shipping upstream Mesa builds are broken for our users, including upstream Flatpak and Waydroid runtimes. Additionally, it forces us to maintain forks of Mesa and virglrenderer, which complicates bisects. The intention in sending out this patch is for this UAPI to be thoroughly reviewed. Once we as the DRM community are satisfied with the UAPI, this header lands signifying that the UAPI is stable and must only be evolved in backwards-compatible ways; it will be the UAPI implemented in the DRM driver that eventually lands upstream. That promise lets us enable upstream Mesa, solving all these issues while the upstream Rust abstractions are developed. https://github.com/alyssarosenzweig/linux/commits/agx-uapi-v7 contains the DRM driver implementing this proposed UAPI. https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33984 contains the Mesa patches to implement this proposed UAPI. That Linux and Mesa branch together give a complete graphics/compute stack on top of this UAPI. Co-developed-by: Asahi Lina Signed-off-by: Asahi Lina Acked-by: Simona Vetter Reviewed-by: Neal Gompa Reviewed-by: Janne Grunau Reviewed-by: Faith Ekstrand Link: https://lore.kernel.org/r/20250408-agx-uapi-v7-1-ad122d4f7324@rosenzweig.io Signed-off-by: Alyssa Rosenzweig --- include/uapi/drm/asahi_drm.h | 1194 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1194 insertions(+) create mode 100644 include/uapi/drm/asahi_drm.h (limited to 'include') diff --git a/include/uapi/drm/asahi_drm.h b/include/uapi/drm/asahi_drm.h new file mode 100644 index 000000000000..de67f1c603af --- /dev/null +++ b/include/uapi/drm/asahi_drm.h @@ -0,0 +1,1194 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) The Asahi Linux Contributors + * Copyright (C) 2018-2023 Collabora Ltd. + * Copyright (C) 2014-2018 Broadcom + */ +#ifndef _ASAHI_DRM_H_ +#define _ASAHI_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * DOC: Introduction to the Asahi UAPI + * + * This documentation describes the Asahi IOCTLs. + * + * Just a few generic rules about the data passed to the Asahi IOCTLs (cribbed + * from Panthor): + * + * - Structures must be aligned on 64-bit/8-byte. If the object is not + * naturally aligned, a padding field must be added. + * - Fields must be explicitly aligned to their natural type alignment with + * pad[0..N] fields. + * - All padding fields will be checked by the driver to make sure they are + * zeroed. + * - Flags can be added, but not removed/replaced. + * - New fields can be added to the main structures (the structures + * directly passed to the ioctl). Those fields can be added at the end of + * the structure, or replace existing padding fields. Any new field being + * added must preserve the behavior that existed before those fields were + * added when a value of zero is passed. + * - New fields can be added to indirect objects (objects pointed by the + * main structure), iff those objects are passed a size to reflect the + * size known by the userspace driver (see + * drm_asahi_cmd_header::size). + * - If the kernel driver is too old to know some fields, those will be + * ignored if zero, and otherwise rejected (and so will be zero on output). + * - If userspace is too old to know some fields, those will be zeroed + * (input) before the structure is parsed by the kernel driver. + * - Each new flag/field addition must come with a driver version update so + * the userspace driver doesn't have to guess which flags are supported. + * - Structures should not contain unions, as this would defeat the + * extensibility of such structures. + * - IOCTLs can't be removed or replaced. New IOCTL IDs should be placed + * at the end of the drm_asahi_ioctl_id enum. + */ + +/** + * enum drm_asahi_ioctl_id - IOCTL IDs + * + * Place new ioctls at the end, don't re-order, don't replace or remove entries. + * + * These IDs are not meant to be used directly. Use the DRM_IOCTL_ASAHI_xxx + * definitions instead. + */ +enum drm_asahi_ioctl_id { + /** @DRM_ASAHI_GET_PARAMS: Query device properties. */ + DRM_ASAHI_GET_PARAMS = 0, + + /** @DRM_ASAHI_GET_TIME: Query device time. */ + DRM_ASAHI_GET_TIME, + + /** @DRM_ASAHI_VM_CREATE: Create a GPU VM address space. */ + DRM_ASAHI_VM_CREATE, + + /** @DRM_ASAHI_VM_DESTROY: Destroy a VM. */ + DRM_ASAHI_VM_DESTROY, + + /** @DRM_ASAHI_VM_BIND: Bind/unbind memory to a VM. */ + DRM_ASAHI_VM_BIND, + + /** @DRM_ASAHI_GEM_CREATE: Create a buffer object. */ + DRM_ASAHI_GEM_CREATE, + + /** + * @DRM_ASAHI_GEM_MMAP_OFFSET: Get offset to pass to mmap() to map a + * given GEM handle. + */ + DRM_ASAHI_GEM_MMAP_OFFSET, + + /** @DRM_ASAHI_GEM_BIND_OBJECT: Bind memory as a special object */ + DRM_ASAHI_GEM_BIND_OBJECT, + + /** @DRM_ASAHI_QUEUE_CREATE: Create a scheduling queue. */ + DRM_ASAHI_QUEUE_CREATE, + + /** @DRM_ASAHI_QUEUE_DESTROY: Destroy a scheduling queue. */ + DRM_ASAHI_QUEUE_DESTROY, + + /** @DRM_ASAHI_SUBMIT: Submit commands to a queue. */ + DRM_ASAHI_SUBMIT, +}; + +#define DRM_ASAHI_MAX_CLUSTERS 64 + +/** + * struct drm_asahi_params_global - Global parameters. + * + * This struct may be queried by drm_asahi_get_params. + */ +struct drm_asahi_params_global { + /** @features: Feature bits from drm_asahi_feature */ + __u64 features; + + /** @gpu_generation: GPU generation, e.g. 13 for G13G */ + __u32 gpu_generation; + + /** @gpu_variant: GPU variant as a character, e.g. 'C' for G13C */ + __u32 gpu_variant; + + /** + * @gpu_revision: GPU revision in BCD, e.g. 0x00 for 'A0' or + * 0x21 for 'C1' + */ + __u32 gpu_revision; + + /** @chip_id: Chip ID in BCD, e.g. 0x8103 for T8103 */ + __u32 chip_id; + + /** @num_dies: Number of dies in the SoC */ + __u32 num_dies; + + /** @num_clusters_total: Number of GPU clusters (across all dies) */ + __u32 num_clusters_total; + + /** + * @num_cores_per_cluster: Number of logical cores per cluster + * (including inactive/nonexistent) + */ + __u32 num_cores_per_cluster; + + /** @max_frequency_khz: Maximum GPU core clock frequency */ + __u32 max_frequency_khz; + + /** @core_masks: Bitmask of present/enabled cores per cluster */ + __u64 core_masks[DRM_ASAHI_MAX_CLUSTERS]; + + /** + * @vm_start: VM range start VMA. Together with @vm_end, this defines + * the window of valid GPU VAs. Userspace is expected to subdivide VAs + * out of this window. + * + * This window contains all virtual addresses that userspace needs to + * know about. There may be kernel-internal GPU VAs outside this range, + * but that detail is not relevant here. + */ + __u64 vm_start; + + /** @vm_end: VM range end VMA */ + __u64 vm_end; + + /** + * @vm_kernel_min_size: Minimum kernel VMA window size. + * + * When creating a VM, userspace is required to carve out a section of + * virtual addresses (within the range given by @vm_start and + * @vm_end). The kernel will allocate various internal structures + * within the specified VA range. + * + * Allowing userspace to choose the VA range for the kernel, rather than + * the kernel reserving VAs and requiring userspace to cope, can assist + * in implementing SVM. + */ + __u64 vm_kernel_min_size; + + /** + * @max_commands_per_submission: Maximum number of supported commands + * per submission. This mirrors firmware limits. Userspace must split up + * larger command buffers, which may require inserting additional + * synchronization. + */ + __u32 max_commands_per_submission; + + /** + * @max_attachments: Maximum number of drm_asahi_attachment's per + * command + */ + __u32 max_attachments; + + /** + * @command_timestamp_frequency_hz: Timebase frequency for timestamps + * written during command execution, specified via drm_asahi_timestamp + * structures. As this rate is controlled by the firmware, it is a + * queryable parameter. + * + * Userspace must divide by this frequency to convert timestamps to + * seconds, rather than hardcoding a particular firmware's rate. + */ + __u64 command_timestamp_frequency_hz; +}; + +/** + * enum drm_asahi_feature - Feature bits + * + * This covers only features that userspace cannot infer from the architecture + * version. Most features don't need to be here. + */ +enum drm_asahi_feature { + /** + * @DRM_ASAHI_FEATURE_SOFT_FAULTS: GPU has "soft fault" enabled. Shader + * loads of unmapped memory will return zero. Shader stores to unmapped + * memory will be silently discarded. Note that only shader load/store + * is affected. Other hardware units are not affected, notably including + * texture sampling. + * + * Soft fault is set when initializing the GPU and cannot be runtime + * toggled. Therefore, it is exposed as a feature bit and not a + * userspace-settable flag on the VM. When soft fault is enabled, + * userspace can speculate memory accesses more aggressively. + */ + DRM_ASAHI_FEATURE_SOFT_FAULTS = (1UL) << 0, +}; + +/** + * struct drm_asahi_get_params - Arguments passed to DRM_IOCTL_ASAHI_GET_PARAMS + */ +struct drm_asahi_get_params { + /** @param_group: Parameter group to fetch (MBZ) */ + __u32 param_group; + + /** @pad: MBZ */ + __u32 pad; + + /** @pointer: User pointer to write parameter struct */ + __u64 pointer; + + /** + * @size: Size of the user buffer. In case of older userspace, this may + * be less than sizeof(struct drm_asahi_params_global). The kernel will + * not write past the length specified here, allowing extensibility. + */ + __u64 size; +}; + +/** + * struct drm_asahi_vm_create - Arguments passed to DRM_IOCTL_ASAHI_VM_CREATE + */ +struct drm_asahi_vm_create { + /** + * @kernel_start: Start of the kernel-reserved address range. See + * drm_asahi_params_global::vm_kernel_min_size. + * + * Both @kernel_start and @kernel_end must be within the range of + * valid VAs given by drm_asahi_params_global::vm_start and + * drm_asahi_params_global::vm_end. The size of the kernel range + * (@kernel_end - @kernel_start) must be at least + * drm_asahi_params_global::vm_kernel_min_size. + * + * Userspace must not bind any memory on this VM into this reserved + * range, it is for kernel use only. + */ + __u64 kernel_start; + + /** + * @kernel_end: End of the kernel-reserved address range. See + * @kernel_start. + */ + __u64 kernel_end; + + /** @vm_id: Returned VM ID */ + __u32 vm_id; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * struct drm_asahi_vm_destroy - Arguments passed to DRM_IOCTL_ASAHI_VM_DESTROY + */ +struct drm_asahi_vm_destroy { + /** @vm_id: VM ID to be destroyed */ + __u32 vm_id; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * enum drm_asahi_gem_flags - Flags for GEM creation + */ +enum drm_asahi_gem_flags { + /** + * @DRM_ASAHI_GEM_WRITEBACK: BO should be CPU-mapped as writeback. + * + * Map as writeback instead of write-combine. This optimizes for CPU + * reads. + */ + DRM_ASAHI_GEM_WRITEBACK = (1L << 0), + + /** + * @DRM_ASAHI_GEM_VM_PRIVATE: BO is private to this GPU VM (no exports). + */ + DRM_ASAHI_GEM_VM_PRIVATE = (1L << 1), +}; + +/** + * struct drm_asahi_gem_create - Arguments passed to DRM_IOCTL_ASAHI_GEM_CREATE + */ +struct drm_asahi_gem_create { + /** @size: Size of the BO */ + __u64 size; + + /** @flags: Combination of drm_asahi_gem_flags flags. */ + __u32 flags; + + /** + * @vm_id: VM ID to assign to the BO, if DRM_ASAHI_GEM_VM_PRIVATE is set + */ + __u32 vm_id; + + /** @handle: Returned GEM handle for the BO */ + __u32 handle; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * struct drm_asahi_gem_mmap_offset - Arguments passed to + * DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET + */ +struct drm_asahi_gem_mmap_offset { + /** @handle: Handle for the object being mapped. */ + __u32 handle; + + /** @flags: Must be zero */ + __u32 flags; + + /** @offset: The fake offset to use for subsequent mmap call */ + __u64 offset; +}; + +/** + * enum drm_asahi_bind_flags - Flags for GEM binding + */ +enum drm_asahi_bind_flags { + /** + * @DRM_ASAHI_BIND_UNBIND: Instead of binding a GEM object to the range, + * simply unbind the GPU VMA range. + */ + DRM_ASAHI_BIND_UNBIND = (1L << 0), + + /** @DRM_ASAHI_BIND_READ: Map BO with GPU read permission */ + DRM_ASAHI_BIND_READ = (1L << 1), + + /** @DRM_ASAHI_BIND_WRITE: Map BO with GPU write permission */ + DRM_ASAHI_BIND_WRITE = (1L << 2), + + /** + * @DRM_ASAHI_BIND_SINGLE_PAGE: Map a single page of the BO repeatedly + * across the VA range. + * + * This is useful to fill a VA range with scratch pages or zero pages. + * It is intended as a mechanism to accelerate sparse. + */ + DRM_ASAHI_BIND_SINGLE_PAGE = (1L << 3), +}; + +/** + * struct drm_asahi_gem_bind_op - Description of a single GEM bind operation. + */ +struct drm_asahi_gem_bind_op { + /** @flags: Combination of drm_asahi_bind_flags flags. */ + __u32 flags; + + /** @handle: GEM object to bind (except for UNBIND) */ + __u32 handle; + + /** + * @offset: Offset into the object (except for UNBIND). + * + * For a regular bind, this is the beginning of the region of the GEM + * object to bind. + * + * For a single-page bind, this is the offset to the single page that + * will be repeatedly bound. + * + * Must be page-size aligned. + */ + __u64 offset; + + /** + * @range: Number of bytes to bind/unbind to @addr. + * + * Must be page-size aligned. + */ + __u64 range; + + /** + * @addr: Address to bind to. + * + * Must be page-size aligned. + */ + __u64 addr; +}; + +/** + * struct drm_asahi_vm_bind - Arguments passed to + * DRM_IOCTL_ASAHI_VM_BIND + */ +struct drm_asahi_vm_bind { + /** @vm_id: The ID of the VM to bind to */ + __u32 vm_id; + + /** @num_binds: number of binds in this IOCTL. */ + __u32 num_binds; + + /** + * @stride: Stride in bytes between consecutive binds. This allows + * extensibility of drm_asahi_gem_bind_op. + */ + __u32 stride; + + /** @pad: MBZ */ + __u32 pad; + + /** + * @userptr: User pointer to an array of @num_binds structures of type + * @drm_asahi_gem_bind_op and size @stride bytes. + */ + __u64 userptr; +}; + +/** + * enum drm_asahi_bind_object_op - Special object bind operation + */ +enum drm_asahi_bind_object_op { + /** @DRM_ASAHI_BIND_OBJECT_OP_BIND: Bind a BO as a special GPU object */ + DRM_ASAHI_BIND_OBJECT_OP_BIND = 0, + + /** @DRM_ASAHI_BIND_OBJECT_OP_UNBIND: Unbind a special GPU object */ + DRM_ASAHI_BIND_OBJECT_OP_UNBIND = 1, +}; + +/** + * enum drm_asahi_bind_object_flags - Special object bind flags + */ +enum drm_asahi_bind_object_flags { + /** + * @DRM_ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS: Map a BO as a timestamp + * buffer. + */ + DRM_ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS = (1L << 0), +}; + +/** + * struct drm_asahi_gem_bind_object - Arguments passed to + * DRM_IOCTL_ASAHI_GEM_BIND_OBJECT + */ +struct drm_asahi_gem_bind_object { + /** @op: Bind operation (enum drm_asahi_bind_object_op) */ + __u32 op; + + /** @flags: Combination of drm_asahi_bind_object_flags flags. */ + __u32 flags; + + /** @handle: GEM object to bind/unbind (BIND) */ + __u32 handle; + + /** @vm_id: The ID of the VM to operate on (MBZ currently) */ + __u32 vm_id; + + /** @offset: Offset into the object (BIND only) */ + __u64 offset; + + /** @range: Number of bytes to bind/unbind (BIND only) */ + __u64 range; + + /** @object_handle: Object handle (out for BIND, in for UNBIND) */ + __u32 object_handle; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * enum drm_asahi_cmd_type - Command type + */ +enum drm_asahi_cmd_type { + /** + * @DRM_ASAHI_CMD_RENDER: Render command, executing on the render + * subqueue. Combined vertex and fragment operation. + * + * Followed by a @drm_asahi_cmd_render payload. + */ + DRM_ASAHI_CMD_RENDER = 0, + + /** + * @DRM_ASAHI_CMD_COMPUTE: Compute command on the compute subqueue. + * + * Followed by a @drm_asahi_cmd_compute payload. + */ + DRM_ASAHI_CMD_COMPUTE = 1, + + /** + * @DRM_ASAHI_SET_VERTEX_ATTACHMENTS: Software command to set + * attachments for subsequent vertex shaders in the same submit. + * + * Followed by (possibly multiple) @drm_asahi_attachment payloads. + */ + DRM_ASAHI_SET_VERTEX_ATTACHMENTS = 2, + + /** + * @DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS: Software command to set + * attachments for subsequent fragment shaders in the same submit. + * + * Followed by (possibly multiple) @drm_asahi_attachment payloads. + */ + DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS = 3, + + /** + * @DRM_ASAHI_SET_COMPUTE_ATTACHMENTS: Software command to set + * attachments for subsequent compute shaders in the same submit. + * + * Followed by (possibly multiple) @drm_asahi_attachment payloads. + */ + DRM_ASAHI_SET_COMPUTE_ATTACHMENTS = 4, +}; + +/** + * enum drm_asahi_priority - Scheduling queue priority. + * + * These priorities are forwarded to the firmware to influence firmware + * scheduling. The exact policy is ultimately decided by firmware, but + * these enums allow userspace to communicate the intentions. + */ +enum drm_asahi_priority { + /** @DRM_ASAHI_PRIORITY_LOW: Low priority queue. */ + DRM_ASAHI_PRIORITY_LOW = 0, + + /** @DRM_ASAHI_PRIORITY_MEDIUM: Medium priority queue. */ + DRM_ASAHI_PRIORITY_MEDIUM = 1, + + /** + * @DRM_ASAHI_PRIORITY_HIGH: High priority queue. + * + * Reserved for future extension. + */ + DRM_ASAHI_PRIORITY_HIGH = 2, + + /** + * @DRM_ASAHI_PRIORITY_REALTIME: Real-time priority queue. + * + * Reserved for future extension. + */ + DRM_ASAHI_PRIORITY_REALTIME = 3, +}; + +/** + * struct drm_asahi_queue_create - Arguments passed to + * DRM_IOCTL_ASAHI_QUEUE_CREATE + */ +struct drm_asahi_queue_create { + /** @flags: MBZ */ + __u32 flags; + + /** @vm_id: The ID of the VM this queue is bound to */ + __u32 vm_id; + + /** @priority: One of drm_asahi_priority */ + __u32 priority; + + /** @queue_id: The returned queue ID */ + __u32 queue_id; + + /** + * @usc_exec_base: GPU base address for all USC binaries (shaders) on + * this queue. USC addresses are 32-bit relative to this 64-bit base. + * + * This sets the following registers on all queue commands: + * + * USC_EXEC_BASE_TA (vertex) + * USC_EXEC_BASE_ISP (fragment) + * USC_EXEC_BASE_CP (compute) + * + * While the hardware lets us configure these independently per command, + * we do not have a use case for this. Instead, we expect userspace to + * fix a 4GiB VA carveout for USC memory and pass its base address here. + */ + __u64 usc_exec_base; +}; + +/** + * struct drm_asahi_queue_destroy - Arguments passed to + * DRM_IOCTL_ASAHI_QUEUE_DESTROY + */ +struct drm_asahi_queue_destroy { + /** @queue_id: The queue ID to be destroyed */ + __u32 queue_id; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * enum drm_asahi_sync_type - Sync item type + */ +enum drm_asahi_sync_type { + /** @DRM_ASAHI_SYNC_SYNCOBJ: Binary sync object */ + DRM_ASAHI_SYNC_SYNCOBJ = 0, + + /** @DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ: Timeline sync object */ + DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ = 1, +}; + +/** + * struct drm_asahi_sync - Sync item + */ +struct drm_asahi_sync { + /** @sync_type: One of drm_asahi_sync_type */ + __u32 sync_type; + + /** @handle: The sync object handle */ + __u32 handle; + + /** @timeline_value: Timeline value for timeline sync objects */ + __u64 timeline_value; +}; + +/** + * define DRM_ASAHI_BARRIER_NONE - Command index for no barrier + * + * This special value may be passed in to drm_asahi_command::vdm_barrier or + * drm_asahi_command::cdm_barrier to indicate that the respective subqueue + * should not wait on any previous work. + */ +#define DRM_ASAHI_BARRIER_NONE (0xFFFFu) + +/** + * struct drm_asahi_cmd_header - Top level command structure + * + * This struct is core to the command buffer definition and therefore is not + * extensible. + */ +struct drm_asahi_cmd_header { + /** @cmd_type: One of drm_asahi_cmd_type */ + __u16 cmd_type; + + /** + * @size: Size of this command, not including this header. + * + * For hardware commands, this enables extensibility of commands without + * requiring extra command types. Passing a command that is shorter + * than expected is explicitly allowed for backwards-compatibility. + * Truncated fields will be zeroed. + * + * For the synthetic attachment setting commands, this implicitly + * encodes the number of attachments. These commands take multiple + * fixed-size @drm_asahi_attachment structures as their payload, so size + * equals number of attachments * sizeof(struct drm_asahi_attachment). + */ + __u16 size; + + /** + * @vdm_barrier: VDM (render) command index to wait on. + * + * Barriers are indices relative to the beginning of a given submit. A + * barrier of 0 waits on commands submitted to the respective subqueue + * in previous submit ioctls. A barrier of N waits on N previous + * commands on the subqueue within the current submit ioctl. As a + * special case, passing @DRM_ASAHI_BARRIER_NONE avoids waiting on any + * commands in the subqueue. + * + * Examples: + * + * 0: This waits on all previous work. + * + * NONE: This does not wait for anything on this subqueue. + * + * 1: This waits on the first render command in the submit. + * This is valid only if there are multiple render commands in the + * same submit. + * + * Barriers are valid only for hardware commands. Synthetic software + * commands to set attachments must pass NONE here. + */ + __u16 vdm_barrier; + + /** + * @cdm_barrier: CDM (compute) command index to wait on. + * + * See @vdm_barrier, and replace VDM/render with CDM/compute. + */ + __u16 cdm_barrier; +}; + +/** + * struct drm_asahi_submit - Arguments passed to DRM_IOCTL_ASAHI_SUBMIT + */ +struct drm_asahi_submit { + /** + * @syncs: An optional pointer to an array of drm_asahi_sync. The first + * @in_sync_count elements are in-syncs, then the remaining + * @out_sync_count elements are out-syncs. Using a single array with + * explicit partitioning simplifies handling. + */ + __u64 syncs; + + /** + * @cmdbuf: Pointer to the command buffer to submit. + * + * This is a flat command buffer. By design, it contains no CPU + * pointers, which makes it suitable for a virtgpu wire protocol without + * requiring any serializing/deserializing step. + * + * It consists of a series of commands. Each command begins with a + * fixed-size @drm_asahi_cmd_header header and is followed by a + * variable-length payload according to the type and size in the header. + * + * The combined count of "real" hardware commands must be nonzero and at + * most drm_asahi_params_global::max_commands_per_submission. + */ + __u64 cmdbuf; + + /** @flags: Flags for command submission (MBZ) */ + __u32 flags; + + /** @queue_id: The queue ID to be submitted to */ + __u32 queue_id; + + /** + * @in_sync_count: Number of sync objects to wait on before starting + * this job. + */ + __u32 in_sync_count; + + /** + * @out_sync_count: Number of sync objects to signal upon completion of + * this job. + */ + __u32 out_sync_count; + + /** @cmdbuf_size: Command buffer size in bytes */ + __u32 cmdbuf_size; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * struct drm_asahi_attachment - Describe an "attachment". + * + * Attachments are any memory written by shaders, notably including render + * target attachments written by the end-of-tile program. This is purely a hint + * about the accessed memory regions. It is optional to specify, which is + * fortunate as it cannot be specified precisely with bindless access anyway. + * But where possible, it's probably a good idea for userspace to include these + * hints, forwarded to the firmware. + * + * This struct is implicitly sized and therefore is not extensible. + */ +struct drm_asahi_attachment { + /** @pointer: Base address of the attachment */ + __u64 pointer; + + /** @size: Size of the attachment in bytes */ + __u64 size; + + /** @pad: MBZ */ + __u32 pad; + + /** @flags: MBZ */ + __u32 flags; +}; + +enum drm_asahi_render_flags { + /** + * @DRM_ASAHI_RENDER_VERTEX_SCRATCH: A vertex stage shader uses scratch + * memory. + */ + DRM_ASAHI_RENDER_VERTEX_SCRATCH = (1U << 0), + + /** + * @DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES: Process even empty tiles. + * This must be set when clearing render targets. + */ + DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES = (1U << 1), + + /** + * @DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING: Run vertex stage on a single + * cluster (on multi-cluster GPUs) + * + * This harms performance but can workaround certain sync/coherency + * bugs, and therefore is useful for debugging. + */ + DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING = (1U << 2), + + /** + * @DRM_ASAHI_RENDER_DBIAS_IS_INT: Use integer depth bias formula. + * + * Graphics specifications contain two alternate formulas for depth + * bias, a float formula used with floating-point depth buffers and an + * integer formula using with unorm depth buffers. This flag specifies + * that the integer formula should be used. If omitted, the float + * formula is used instead. + * + * This corresponds to bit 18 of the relevant hardware control register, + * so we match that here for efficiency. + */ + DRM_ASAHI_RENDER_DBIAS_IS_INT = (1U << 18), +}; + +/** + * struct drm_asahi_zls_buffer - Describe a depth or stencil buffer. + * + * These fields correspond to hardware registers in the ZLS (Z Load/Store) unit. + * There are three hardware registers for each field respectively for loads, + * stores, and partial renders. In practice, it makes sense to set all to the + * same values, except in exceptional cases not yet implemented in userspace, so + * we do not duplicate here for simplicity/efficiency. + * + * This struct is embedded in other structs and therefore is not extensible. + */ +struct drm_asahi_zls_buffer { + /** @base: Base address of the buffer */ + __u64 base; + + /** + * @comp_base: If the load buffer is compressed, address of the + * compression metadata section. + */ + __u64 comp_base; + + /** + * @stride: If layered rendering is enabled, the number of bytes + * between each layer of the buffer. + */ + __u32 stride; + + /** + * @comp_stride: If layered rendering is enabled, the number of bytes + * between each layer of the compression metadata. + */ + __u32 comp_stride; +}; + +/** + * struct drm_asahi_timestamp - Describe a timestamp write. + * + * The firmware can optionally write the GPU timestamp at render pass + * granularities, but it needs to be mapped specially via + * DRM_IOCTL_ASAHI_GEM_BIND_OBJECT. This structure therefore describes where to + * write as a handle-offset pair, rather than a GPU address like normal. + * + * This struct is embedded in other structs and therefore is not extensible. + */ +struct drm_asahi_timestamp { + /** + * @handle: Handle of the timestamp buffer, or 0 to skip this + * timestamp. If nonzero, this must equal the value returned in + * drm_asahi_gem_bind_object::object_handle. + */ + __u32 handle; + + /** @offset: Offset to write into the timestamp buffer */ + __u32 offset; +}; + +/** + * struct drm_asahi_timestamps - Describe timestamp writes. + * + * Each operation that can be timestamped, can be timestamped at the start and + * end. Therefore, drm_asahi_timestamp structs always come in pairs, bundled + * together into drm_asahi_timestamps. + * + * This struct is embedded in other structs and therefore is not extensible. + */ +struct drm_asahi_timestamps { + /** @start: Timestamp recorded at the start of the operation */ + struct drm_asahi_timestamp start; + + /** @end: Timestamp recorded at the end of the operation */ + struct drm_asahi_timestamp end; +}; + +/** + * struct drm_asahi_helper_program - Describe helper program configuration. + * + * The helper program is a compute-like kernel required for various hardware + * functionality. Its most important role is dynamically allocating + * scratch/stack memory for individual subgroups, by partitioning a static + * allocation shared for the whole device. It is supplied by userspace via + * drm_asahi_helper_program and internally dispatched by the hardware as needed. + * + * This struct is embedded in other structs and therefore is not extensible. + */ +struct drm_asahi_helper_program { + /** + * @binary: USC address to the helper program binary. This is a tagged + * pointer with configuration in the bottom bits. + */ + __u32 binary; + + /** @cfg: Additional configuration bits for the helper program. */ + __u32 cfg; + + /** + * @data: Data passed to the helper program. This value is not + * interpreted by the kernel, firmware, or hardware in any way. It is + * simply a sideband for userspace, set with the submit ioctl and read + * via special registers inside the helper program. + * + * In practice, userspace will pass a 64-bit GPU VA here pointing to the + * actual arguments, which presumably don't fit in 64-bits. + */ + __u64 data; +}; + +/** + * struct drm_asahi_bg_eot - Describe a background or end-of-tile program. + * + * The background and end-of-tile programs are dispatched by the hardware at the + * beginning and end of rendering. As the hardware "tilebuffer" is simply local + * memory, these programs are necessary to implement API-level render targets. + * The fragment-like background program is responsible for loading either the + * clear colour or the existing render target contents, while the compute-like + * end-of-tile program stores the tilebuffer contents to memory. + * + * This struct is embedded in other structs and therefore is not extensible. + */ +struct drm_asahi_bg_eot { + /** + * @usc: USC address of the hardware USC words binding resources + * (including images and uniforms) and the program itself. Note this is + * an additional layer of indirection compared to the helper program, + * avoiding the need for a sideband for data. This is a tagged pointer + * with additional configuration in the bottom bits. + */ + __u32 usc; + + /** + * @rsrc_spec: Resource specifier for the program. This is a packed + * hardware data structure describing the required number of registers, + * uniforms, bound textures, and bound samplers. + */ + __u32 rsrc_spec; +}; + +/** + * struct drm_asahi_cmd_render - Command to submit 3D + * + * This command submits a single render pass. The hardware control stream may + * include many draws and subpasses, but within the command, the framebuffer + * dimensions and attachments are fixed. + * + * The hardware requires the firmware to set a large number of Control Registers + * setting up state at render pass granularity before each command rendering 3D. + * The firmware bundles this state into data structures. Unfortunately, we + * cannot expose either any of that directly to userspace, because the + * kernel-firmware ABI is not stable. Although we can guarantee the firmware + * updates in tandem with the kernel, we cannot break old userspace when + * upgrading the firmware and kernel. Therefore, we need to abstract well the + * data structures to avoid tying our hands with future firmwares. + * + * The bulk of drm_asahi_cmd_render therefore consists of values of hardware + * control registers, marshalled via the firmware interface. + * + * The framebuffer/tilebuffer dimensions are also specified here. In addition to + * being passed to the firmware/hardware, the kernel requires these dimensions + * to calculate various essential tiling-related data structures. It is + * unfortunate that our submits are heavier than on vendors with saner + * hardware-software interfaces. The upshot is all of this information is + * readily available to userspace with all current APIs. + * + * It looks odd - but it's not overly burdensome and it ensures we can remain + * compatible with old userspace. + */ +struct drm_asahi_cmd_render { + /** @flags: Combination of drm_asahi_render_flags flags. */ + __u32 flags; + + /** + * @isp_zls_pixels: ISP_ZLS_PIXELS register value. This contains the + * depth/stencil width/height, which may differ from the framebuffer + * width/height. + */ + __u32 isp_zls_pixels; + + /** + * @vdm_ctrl_stream_base: VDM_CTRL_STREAM_BASE register value. GPU + * address to the beginning of the VDM control stream. + */ + __u64 vdm_ctrl_stream_base; + + /** @vertex_helper: Helper program used for the vertex shader */ + struct drm_asahi_helper_program vertex_helper; + + /** @fragment_helper: Helper program used for the fragment shader */ + struct drm_asahi_helper_program fragment_helper; + + /** + * @isp_scissor_base: ISP_SCISSOR_BASE register value. GPU address of an + * array of scissor descriptors indexed in the render pass. + */ + __u64 isp_scissor_base; + + /** + * @isp_dbias_base: ISP_DBIAS_BASE register value. GPU address of an + * array of depth bias values indexed in the render pass. + */ + __u64 isp_dbias_base; + + /** + * @isp_oclqry_base: ISP_OCLQRY_BASE register value. GPU address of an + * array of occlusion query results written by the render pass. + */ + __u64 isp_oclqry_base; + + /** @depth: Depth buffer */ + struct drm_asahi_zls_buffer depth; + + /** @stencil: Stencil buffer */ + struct drm_asahi_zls_buffer stencil; + + /** @zls_ctrl: ZLS_CTRL register value */ + __u64 zls_ctrl; + + /** @ppp_multisamplectl: PPP_MULTISAMPLECTL register value */ + __u64 ppp_multisamplectl; + + /** + * @sampler_heap: Base address of the sampler heap. This heap is used + * for both vertex shaders and fragment shaders. The registers are + * per-stage, but there is no known use case for separate heaps. + */ + __u64 sampler_heap; + + /** @ppp_ctrl: PPP_CTRL register value */ + __u32 ppp_ctrl; + + /** @width_px: Framebuffer width in pixels */ + __u16 width_px; + + /** @height_px: Framebuffer height in pixels */ + __u16 height_px; + + /** @layers: Number of layers in the framebuffer */ + __u16 layers; + + /** @sampler_count: Number of samplers in the sampler heap. */ + __u16 sampler_count; + + /** @utile_width_px: Width of a logical tilebuffer tile in pixels */ + __u8 utile_width_px; + + /** @utile_height_px: Height of a logical tilebuffer tile in pixels */ + __u8 utile_height_px; + + /** @samples: # of samples in the framebuffer. Must be 1, 2, or 4. */ + __u8 samples; + + /** @sample_size_B: # of bytes in the tilebuffer required per sample. */ + __u8 sample_size_B; + + /** + * @isp_merge_upper_x: 32-bit float used in the hardware triangle + * merging. Calculate as: tan(60 deg) * width. + * + * Making these values UAPI avoids requiring floating-point calculations + * in the kernel in the hot path. + */ + __u32 isp_merge_upper_x; + + /** + * @isp_merge_upper_y: 32-bit float. Calculate as: tan(60 deg) * height. + * See @isp_merge_upper_x. + */ + __u32 isp_merge_upper_y; + + /** @bg: Background program run for each tile at the start */ + struct drm_asahi_bg_eot bg; + + /** @eot: End-of-tile program ran for each tile at the end */ + struct drm_asahi_bg_eot eot; + + /** + * @partial_bg: Background program ran at the start of each tile when + * resuming the render pass during a partial render. + */ + struct drm_asahi_bg_eot partial_bg; + + /** + * @partial_eot: End-of-tile program ran at the end of each tile when + * pausing the render pass during a partial render. + */ + struct drm_asahi_bg_eot partial_eot; + + /** + * @isp_bgobjdepth: ISP_BGOBJDEPTH register value. This is the depth + * buffer clear value, encoded in the depth buffer's format: either a + * 32-bit float or a 16-bit unorm (with upper bits zeroed). + */ + __u32 isp_bgobjdepth; + + /** + * @isp_bgobjvals: ISP_BGOBJVALS register value. The bottom 8-bits + * contain the stencil buffer clear value. + */ + __u32 isp_bgobjvals; + + /** @ts_vtx: Timestamps for the vertex portion of the render */ + struct drm_asahi_timestamps ts_vtx; + + /** @ts_frag: Timestamps for the fragment portion of the render */ + struct drm_asahi_timestamps ts_frag; +}; + +/** + * struct drm_asahi_cmd_compute - Command to submit compute + * + * This command submits a control stream consisting of compute dispatches. There + * is essentially no limit on how many compute dispatches may be included in a + * single compute command, although timestamps are at command granularity. + */ +struct drm_asahi_cmd_compute { + /** @flags: MBZ */ + __u32 flags; + + /** @sampler_count: Number of samplers in the sampler heap. */ + __u32 sampler_count; + + /** + * @cdm_ctrl_stream_base: CDM_CTRL_STREAM_BASE register value. GPU + * address to the beginning of the CDM control stream. + */ + __u64 cdm_ctrl_stream_base; + + /** + * @cdm_ctrl_stream_end: GPU base address to the end of the hardware + * control stream. Note this only considers the first contiguous segment + * of the control stream, as the stream might jump elsewhere. + */ + __u64 cdm_ctrl_stream_end; + + /** @sampler_heap: Base address of the sampler heap. */ + __u64 sampler_heap; + + /** @helper: Helper program used for this compute command */ + struct drm_asahi_helper_program helper; + + /** @ts: Timestamps for the compute command */ + struct drm_asahi_timestamps ts; +}; + +/** + * struct drm_asahi_get_time - Arguments passed to DRM_IOCTL_ASAHI_GET_TIME + */ +struct drm_asahi_get_time { + /** @flags: MBZ. */ + __u64 flags; + + /** @gpu_timestamp: On return, the GPU timestamp in nanoseconds. */ + __u64 gpu_timestamp; +}; + +/** + * DRM_IOCTL_ASAHI() - Build an Asahi IOCTL number + * @__access: Access type. Must be R, W or RW. + * @__id: One of the DRM_ASAHI_xxx id. + * @__type: Suffix of the type being passed to the IOCTL. + * + * Don't use this macro directly, use the DRM_IOCTL_ASAHI_xxx + * values instead. + * + * Return: An IOCTL number to be passed to ioctl() from userspace. + */ +#define DRM_IOCTL_ASAHI(__access, __id, __type) \ + DRM_IO ## __access(DRM_COMMAND_BASE + DRM_ASAHI_ ## __id, \ + struct drm_asahi_ ## __type) + +/* Note: this is an enum so that it can be resolved by Rust bindgen. */ +enum { + DRM_IOCTL_ASAHI_GET_PARAMS = DRM_IOCTL_ASAHI(W, GET_PARAMS, get_params), + DRM_IOCTL_ASAHI_GET_TIME = DRM_IOCTL_ASAHI(WR, GET_TIME, get_time), + DRM_IOCTL_ASAHI_VM_CREATE = DRM_IOCTL_ASAHI(WR, VM_CREATE, vm_create), + DRM_IOCTL_ASAHI_VM_DESTROY = DRM_IOCTL_ASAHI(W, VM_DESTROY, vm_destroy), + DRM_IOCTL_ASAHI_VM_BIND = DRM_IOCTL_ASAHI(W, VM_BIND, vm_bind), + DRM_IOCTL_ASAHI_GEM_CREATE = DRM_IOCTL_ASAHI(WR, GEM_CREATE, gem_create), + DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET = DRM_IOCTL_ASAHI(WR, GEM_MMAP_OFFSET, gem_mmap_offset), + DRM_IOCTL_ASAHI_GEM_BIND_OBJECT = DRM_IOCTL_ASAHI(WR, GEM_BIND_OBJECT, gem_bind_object), + DRM_IOCTL_ASAHI_QUEUE_CREATE = DRM_IOCTL_ASAHI(WR, QUEUE_CREATE, queue_create), + DRM_IOCTL_ASAHI_QUEUE_DESTROY = DRM_IOCTL_ASAHI(W, QUEUE_DESTROY, queue_destroy), + DRM_IOCTL_ASAHI_SUBMIT = DRM_IOCTL_ASAHI(W, SUBMIT, submit), +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _ASAHI_DRM_H_ */ -- cgit v1.2.3 From a36283e2b683f172aa1760c77325e50b16c0f792 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 7 Apr 2025 17:45:41 +0200 Subject: udp_tunnel: create a fastpath GRO lookup. Most UDP tunnels bind a socket to a local port, with ANY address, no peer and no interface index specified. Additionally it's quite common to have a single tunnel device per namespace. Track in each namespace the UDP tunnel socket respecting the above. When only a single one is present, store a reference in the netns. When such reference is not NULL, UDP tunnel GRO lookup just need to match the incoming packet destination port vs the socket local port. The tunnel socket never sets the reuse[port] flag[s]. When bound to no address and interface, no other socket can exist in the same netns matching the specified local port. Matching packets with non-local destination addresses will be aggregated, and eventually segmented as needed - no behavior changes intended. Restrict the optimization to kernel sockets only: it covers all the relevant use-cases, and user-space owned sockets could be disconnected and rebound after setup_udp_tunnel_sock(), breaking the uniqueness assumption Note that the UDP tunnel socket reference is stored into struct netns_ipv4 for both IPv4 and IPv6 tunnels. That is intentional to keep all the fastpath-related netns fields in the same struct and allow cacheline-based optimization. Currently both the IPv4 and IPv6 socket pointer share the same cacheline as the `udp_table` field. Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/41d16bc8d1257d567f9344c445b4ae0b4a91ede4.1744040675.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/udp.h | 16 ++++++++++++++++ include/net/netns/ipv4.h | 11 +++++++++++ include/net/udp.h | 1 + include/net/udp_tunnel.h | 12 ++++++++++++ 4 files changed, 40 insertions(+) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 0807e21cfec9..895240177f4f 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -101,6 +101,13 @@ struct udp_sock { /* Cache friendly copy of sk->sk_peek_off >= 0 */ bool peeking_with_offset; + + /* + * Accounting for the tunnel GRO fastpath. + * Unprotected by compilers guard, as it uses space available in + * the last UDP socket cacheline. + */ + struct hlist_node tunnel_list; }; #define udp_test_bit(nr, sk) \ @@ -219,4 +226,13 @@ static inline void udp_allow_gso(struct sock *sk) #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) +static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6) +{ +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) + return rcu_dereference(net->ipv4.udp_tunnel_gro[is_ipv6].sk); +#else + return NULL; +#endif +} + #endif /* _LINUX_UDP_H */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 650b2dc9199f..6373e3f17da8 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -47,6 +47,11 @@ struct sysctl_fib_multipath_hash_seed { }; #endif +struct udp_tunnel_gro { + struct sock __rcu *sk; + struct hlist_head list; +}; + struct netns_ipv4 { /* Cacheline organization can be found documented in * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. @@ -85,6 +90,11 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; struct udp_table *udp_table; +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) + /* Not in a pernet subsys because need to be available at GRO stage */ + struct udp_tunnel_gro udp_tunnel_gro[2]; +#endif + #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; struct ctl_table_header *frags_hdr; @@ -277,4 +287,5 @@ struct netns_ipv4 { struct hlist_head *inet_addr_lst; struct delayed_work addr_chk_work; }; + #endif diff --git a/include/net/udp.h b/include/net/udp.h index 6e89520e100d..a772510b2aa5 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -290,6 +290,7 @@ static inline void udp_lib_init_sock(struct sock *sk) struct udp_sock *up = udp_sk(sk); skb_queue_head_init(&up->reader_queue); + INIT_HLIST_NODE(&up->tunnel_list); up->forward_threshold = sk->sk_rcvbuf >> 2; set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index a93dc51f6323..1bb2b852e90e 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -191,6 +191,18 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) } #endif +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) +void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add); +#else +static inline void udp_tunnel_update_gro_lookup(struct net *net, + struct sock *sk, bool add) {} +#endif + +static inline void udp_tunnel_cleanup_gro(struct sock *sk) +{ + udp_tunnel_update_gro_lookup(sock_net(sk), sk, false); +} + static inline void udp_tunnel_encap_enable(struct sock *sk) { if (udp_test_and_set_bit(ENCAP_ENABLED, sk)) -- cgit v1.2.3 From 5d7f5b2f6b935517ee5fd8058dc32342a5cba3e1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 7 Apr 2025 17:45:42 +0200 Subject: udp_tunnel: use static call for GRO hooks when possible It's quite common to have a single UDP tunnel type active in the whole system. In such a case we can replace the indirect call for the UDP tunnel GRO callback with a static call. Add the related accounting in the control path and switch to static call when possible. To keep the code simple use a static array for the registered tunnel types, and size such array based on the kernel config. Note that there are valid kernel configurations leading to UDP_MAX_TUNNEL_TYPES == 0 even with IS_ENABLED(CONFIG_NET_UDP_TUNNEL), Explicitly skip the accounting in such a case, to avoid compile warning when accessing "udp_tunnel_gro_types". Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/53d156cdfddcc9678449e873cc83e68fa1582653.1744040675.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/net/udp_tunnel.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 1bb2b852e90e..288f06f23a80 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -193,13 +193,16 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) #if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add); +void udp_tunnel_update_gro_rcv(struct sock *sk, bool add); #else static inline void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add) {} +static inline void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) {} #endif static inline void udp_tunnel_cleanup_gro(struct sock *sk) { + udp_tunnel_update_gro_rcv(sk, false); udp_tunnel_update_gro_lookup(sock_net(sk), sk, false); } @@ -212,6 +215,7 @@ static inline void udp_tunnel_encap_enable(struct sock *sk) if (READ_ONCE(sk->sk_family) == PF_INET6) ipv6_stub->udpv6_encap_enable(); #endif + udp_tunnel_update_gro_rcv(sk, true); udp_encap_enable(); } -- cgit v1.2.3 From 420aabef3ab5fa743afb4d3d391f03ef0e777ca8 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 7 Apr 2025 21:01:02 +0200 Subject: net: Drop unused @sk of __skb_try_recv_from_queue() __skb_try_recv_from_queue() deals with a queue, @sk is not used since commit e427cad6eee4 ("net: datagram: drop 'destructor' argument from several helpers"). Remove sk from function parameters, adapt callers. No functional change intended. Signed-off-by: Michal Luczaj Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250407-cleanup-drop-param-sk-v1-1-cd076979afac@rbox.co Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b974a277975a..f1381aff0f89 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4105,8 +4105,7 @@ static inline void skb_frag_list_init(struct sk_buff *skb) int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, int *err, long *timeo_p, const struct sk_buff *skb); -struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, - struct sk_buff_head *queue, +struct sk_buff *__skb_try_recv_from_queue(struct sk_buff_head *queue, unsigned int flags, int *off, int *err, struct sk_buff **last); -- cgit v1.2.3 From c07d3aede2b26830ee63f64d8326f6a87dee3a6d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 4 Apr 2025 15:58:59 -0700 Subject: fscrypt: add support for hardware-wrapped keys Add support for hardware-wrapped keys to fscrypt. Such keys are protected from certain attacks, such as cold boot attacks. For more information, see the "Hardware-wrapped keys" section of Documentation/block/inline-encryption.rst. To support hardware-wrapped keys in fscrypt, we allow the fscrypt master keys to be hardware-wrapped. File contents encryption is done by passing the wrapped key to the inline encryption hardware via blk-crypto. Other fscrypt operations such as filenames encryption continue to be done by the kernel, using the "software secret" which the hardware derives. For more information, see the documentation which this patch adds to Documentation/filesystems/fscrypt.rst. Note that this feature doesn't require any filesystem-specific changes. However it does depend on inline encryption support, and thus currently it is only applicable to ext4 and f2fs. The version of this feature introduced by this patch is mostly equivalent to the version that has existed downstream in the Android Common Kernels since 2020. However, a couple fixes are included. First, the flags field in struct fscrypt_add_key_arg is now placed in the proper location. Second, key identifiers for HW-wrapped keys are now derived using a distinct HKDF context byte; this fixes a bug where a raw key could have the same identifier as a HW-wrapped key. Note that as a result of these fixes, the version of this feature introduced by this patch is not UAPI or on-disk format compatible with the version in the Android Common Kernels, though the divergence is limited to just those specific fixes. This version should be used going forwards. This patch has been heavily rewritten from the original version by Gaurav Kashyap and Barani Muthukumaran . Tested-by: Bartosz Golaszewski # sm8650 Link: https://lore.kernel.org/r/20250404225859.172344-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/uapi/linux/fscrypt.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 7a8f4c290187..3aff99f2696a 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -119,7 +119,7 @@ struct fscrypt_key_specifier { */ struct fscrypt_provisioning_key_payload { __u32 type; - __u32 __reserved; + __u32 flags; __u8 raw[]; }; @@ -128,7 +128,9 @@ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; __u32 key_id; - __u32 __reserved[8]; +#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 + __u32 flags; + __u32 __reserved[7]; __u8 raw[]; }; -- cgit v1.2.3 From 265daffe788aa1cc5925d0afcde4fe6e99c66638 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 7 Apr 2025 09:08:14 +0200 Subject: gpio: provide gpiod_is_equal() There are users in the kernel that directly compare raw GPIO descriptor pointers in order to determine whether they refer to the same physical GPIO pin. This accidentally works like this but is not guaranteed by any API contract. Let's provide a comparator function that hides the actual logic. Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250407-gpiod-is-equal-v1-1-7d85f568ae6e@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/consumer.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 45b651c05b9c..7355abadaef4 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -180,6 +180,8 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, enum gpiod_flags flags, const char *label); +bool gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other); + #else /* CONFIG_GPIOLIB */ #include @@ -547,6 +549,13 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, return ERR_PTR(-ENOSYS); } +static inline bool +gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other) +{ + WARN_ON(desc || other); + return false; +} + #endif /* CONFIG_GPIOLIB */ #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_HTE) -- cgit v1.2.3 From dd293df6395a2c9e0fc4faa8defeceaa907e7717 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Sun, 29 Dec 2024 16:11:25 +0100 Subject: tracing: Move trace sysctls into trace.c Move trace ctl tables into their own const array in kernel/trace/trace.c. The sysctl table register is called with subsys_initcall placing if after its original place in proc_root_init. This is part of a greater effort to move ctl tables into their respective subsystems which will reduce the merge conflicts in kernel/sysctl.c. Signed-off-by: Joel Granados Acked-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fbabc3d848b3..59774513ae45 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1298,16 +1298,9 @@ static inline void unpause_graph_tracing(void) { } #ifdef CONFIG_TRACING enum ftrace_dump_mode; -#define MAX_TRACER_SIZE 100 -extern char ftrace_dump_on_oops[]; extern int ftrace_dump_on_oops_enabled(void); -extern int tracepoint_printk; extern void disable_trace_on_warning(void); -extern int __disable_trace_on_warning; - -int tracepoint_printk_sysctl(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); #else /* CONFIG_TRACING */ static inline void disable_trace_on_warning(void) { } -- cgit v1.2.3 From 67049b53e06fa1758df1463789f286a7cba67c50 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Wed, 8 Jan 2025 12:55:58 +0100 Subject: stack_tracer: move sysctl registration to kernel/trace/trace_stack.c Move stack_tracer_enabled into trace_stack_sysctl_table. This is part of a greater effort to move ctl tables into their respective subsystems which will reduce the merge conflicts in kernel/sysctl.c. Acked-by: Steven Rostedt (Google) Signed-off-by: Joel Granados --- include/linux/ftrace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 59774513ae45..95851a6fb942 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -569,8 +569,6 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, #ifdef CONFIG_STACK_TRACER -extern int stack_tracer_enabled; - int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -- cgit v1.2.3 From e1bdbbc98279164d910d2de82a745f090a8b249f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 7 Apr 2025 13:36:55 -0500 Subject: ACPI: Add missing prototype for non CONFIG_SUSPEND/CONFIG_X86 case acpi_register_lps0_dev() and acpi_unregister_lps0_dev() may be used in drivers that don't require CONFIG_SUSPEND or compile on !X86. Add prototypes for those cases. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502191627.fRgoBwcZ-lkp@intel.com/ Signed-off-by: Mario Limonciello Link: https://patch.msgid.link/20250407183656.1503446-1-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3f2e93ed9730..fc372bbaa547 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1125,13 +1125,13 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); -#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86) struct acpi_s2idle_dev_ops { struct list_head list_node; void (*prepare)(void); void (*check)(void); void (*restore)(void); }; +#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86) int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg); void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg); int acpi_get_lps0_constraint(struct acpi_device *adev); @@ -1140,6 +1140,13 @@ static inline int acpi_get_lps0_constraint(struct device *dev) { return ACPI_STATE_UNKNOWN; } +static inline int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg) +{ + return -ENODEV; +} +static inline void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg) +{ +} #endif /* CONFIG_SUSPEND && CONFIG_X86 */ void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else -- cgit v1.2.3 From b84001ad0ceeb34bc3fd6c383f197326d4fe8353 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Mon, 24 Mar 2025 16:56:49 +0900 Subject: RDMA/rxe: Enable ODP in ATOMIC WRITE operation Add rxe_odp_do_atomic_write() so that ODP specific steps are applied to ATOMIC WRITE requests. Signed-off-by: Daisuke Matsuda Link: https://patch.msgid.link/20250324075649.3313968-3-matsuda-daisuke@fujitsu.com Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 41bf98ccb275..0a7ccd08b365 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -326,6 +326,7 @@ enum ib_odp_transport_cap_bits { IB_ODP_SUPPORT_ATOMIC = 1 << 4, IB_ODP_SUPPORT_SRQ_RECV = 1 << 5, IB_ODP_SUPPORT_FLUSH = 1 << 6, + IB_ODP_SUPPORT_ATOMIC_WRITE = 1 << 7, }; struct ib_odp_caps { -- cgit v1.2.3 From 092d00ead733563f6d278295e0b5c5f97558b726 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Mar 2025 11:56:38 +0100 Subject: cleanup: Provide retain_and_null_ptr() In cases where an allocation is consumed by another function, the allocation needs to be retained on success or freed on failure. The code pattern is usually: struct foo *f = kzalloc(sizeof(*f), GFP_KERNEL); struct bar *b; ,,, // Initialize f ... if (ret) goto free; ... bar = bar_create(f); if (!bar) { ret = -ENOMEM; goto free; } ... return 0; free: kfree(f); return ret; This prevents using __free(kfree) on @f because there is no canonical way to tell the cleanup code that the allocation should not be freed. Abusing no_free_ptr() by force ignoring the return value is not really a sensible option either. Provide an explicit macro retain_and_null_ptr(), which NULLs the cleanup pointer. That makes it easy to analyze and reason about. Signed-off-by: Thomas Gleixner Reviewed-by: Jonathan Cameron Reviewed-by: James Bottomley Link: https://lore.kernel.org/all/20250319105506.083538907@linutronix.de --- include/linux/cleanup.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 7e57047e1564..7093e1d08af0 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -216,6 +216,25 @@ const volatile void * __must_check_fn(const volatile void *val) #define return_ptr(p) return no_free_ptr(p) +/* + * Only for situations where an allocation is handed in to another function + * and consumed by that function on success. + * + * struct foo *f __free(kfree) = kzalloc(sizeof(*f), GFP_KERNEL); + * + * setup(f); + * if (some_condition) + * return -EINVAL; + * .... + * ret = bar(f); + * if (!ret) + * retain_and_null_ptr(f); + * return ret; + * + * After retain_and_null_ptr(f) the variable f is NULL and cannot be + * dereferenced anymore. + */ +#define retain_and_null_ptr(p) ((void)__get_and_null(p, NULL)) /* * DEFINE_CLASS(name, type, exit, init, init_args...): -- cgit v1.2.3 From 0dac2b09303c89ffb21d25d40bf6aefd39f214b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Mar 2025 11:56:39 +0100 Subject: genirq/msi: Use lock guards for MSI descriptor locking Provide a lock guard for MSI descriptor locking and update the core code accordingly. No functional change intended. Signed-off-by: Thomas Gleixner Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/all/20250319105506.144672678@linutronix.de --- include/linux/irqdomain.h | 2 ++ include/linux/msi.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index bb7111105296..2f955716cf82 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -281,6 +281,8 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) void irq_domain_free_fwnode(struct fwnode_handle *fwnode); +DEFINE_FREE(irq_domain_free_fwnode, struct fwnode_handle *, if (_T) irq_domain_free_fwnode(_T)) + struct irq_domain_chip_generic_info; /** diff --git a/include/linux/msi.h b/include/linux/msi.h index 86e42742fd0f..4e439916dbcf 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -232,6 +232,9 @@ int msi_setup_device_data(struct device *dev); void msi_lock_descs(struct device *dev); void msi_unlock_descs(struct device *dev); +DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, msi_lock_descs(_T->lock), + msi_unlock_descs(_T->lock)); + struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, enum msi_desc_filter filter); -- cgit v1.2.3 From 9357e329cdeb6f2d27b431a22d4965700bec478a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Mar 2025 11:57:01 +0100 Subject: genirq/msi: Rename msi_[un]lock_descs() Now that all abuse is gone and the legit users are converted to guard(msi_descs_lock), rename the lock functions and document them as internal. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/all/20250319105506.864699741@linutronix.de --- include/linux/msi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 4e439916dbcf..8c0ec9fc05a3 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -229,11 +229,11 @@ struct msi_dev_domain { int msi_setup_device_data(struct device *dev); -void msi_lock_descs(struct device *dev); -void msi_unlock_descs(struct device *dev); +void __msi_lock_descs(struct device *dev); +void __msi_unlock_descs(struct device *dev); -DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, msi_lock_descs(_T->lock), - msi_unlock_descs(_T->lock)); +DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, __msi_lock_descs(_T->lock), + __msi_unlock_descs(_T->lock)); struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, enum msi_desc_filter filter); -- cgit v1.2.3 From 6fec833b9d70c54ceacbf7d07665215fbd0cddef Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 28 Mar 2025 21:42:48 +0100 Subject: cpufreq: Add and use cpufreq policy locking guards Introduce "read" and "write" locking guards for cpufreq policies and use them where applicable in the cpufreq core. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Reviewed-by: Mario Limonciello Acked-by: Sudeep Holla Tested-by: Sudeep Holla Link: https://patch.msgid.link/8518682.T7Z3S40VBb@rjwysocki.net --- include/linux/cpufreq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 400fee6427a5..cb972d2aa8df 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -170,6 +170,12 @@ struct cpufreq_policy { struct notifier_block nb_max; }; +DEFINE_GUARD(cpufreq_policy_write, struct cpufreq_policy *, + down_write(&_T->rwsem), up_write(&_T->rwsem)) + +DEFINE_GUARD(cpufreq_policy_read, struct cpufreq_policy *, + down_read(&_T->rwsem), up_read(&_T->rwsem)) + /* * Used for passing new cpufreq policy data to the cpufreq driver's ->verify() * callback for sanitization. That callback is only expected to modify the min -- cgit v1.2.3 From c7282dce257480f0f22ed3db69cfb400a18709f4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 28 Mar 2025 21:45:38 +0100 Subject: cpufreq: Drop cpufreq_cpu_acquire() and cpufreq_cpu_release() Since cpufreq_cpu_acquire() and cpufreq_cpu_release() have no more users in the tree, remove them. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Reviewed-by: Mario Limonciello Acked-by: Sudeep Holla Tested-by: Sudeep Holla Link: https://patch.msgid.link/3880470.kQq0lBPeGt@rjwysocki.net --- include/linux/cpufreq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index cb972d2aa8df..a33a094ef755 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -241,8 +241,6 @@ void disable_cpufreq(void); u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); -struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu); -void cpufreq_cpu_release(struct cpufreq_policy *policy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); void refresh_frequency_limits(struct cpufreq_policy *policy); void cpufreq_update_policy(unsigned int cpu); -- cgit v1.2.3 From eaff6b62d3439ca6ee00dba4f77673a8c37dac20 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 28 Mar 2025 21:48:54 +0100 Subject: cpufreq: Pass policy pointer to ->update_limits() Since cpufreq_update_limits() obtains a cpufreq policy pointer for the given CPU and reference counts the corresponding policy object, it may as well pass the policy pointer to the cpufreq driver's ->update_limits() callback which allows that callback to avoid invoking cpufreq_cpu_get() for the same CPU. Accordingly, redefine ->update_limits() to take a policy pointer instead of a CPU number and update both drivers implementing it, intel_pstate and amd-pstate, as needed. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Reviewed-by: Mario Limonciello Acked-by: Srinivas Pandruvada Acked-by: Sudeep Holla Tested-by: Sudeep Holla Link: https://patch.msgid.link/8560367.NyiUUSuA9g@rjwysocki.net --- include/linux/cpufreq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a33a094ef755..f3cf2adea18f 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -399,7 +399,7 @@ struct cpufreq_driver { unsigned int (*get)(unsigned int cpu); /* Called to update policy limits on firmware notifications. */ - void (*update_limits)(unsigned int cpu); + void (*update_limits)(struct cpufreq_policy *policy); /* optional */ int (*bios_limit)(int cpu, unsigned int *limit); -- cgit v1.2.3 From 855c634930f04c21434fae9c41a7a7372b6ac879 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 27 Mar 2025 12:07:08 +0100 Subject: PCI: Remove pcim_iounmap_regions() All users of the deprecated function pcim_iounmap_regions() have been ported by now. Remove it. Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Zijun Hu Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250327110707.20025-4-phasta@kernel.org --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e96..a60fdb344d9e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2322,7 +2322,6 @@ void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name); -void pcim_iounmap_regions(struct pci_dev *pdev, int mask); void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar, unsigned long offset, unsigned long len); -- cgit v1.2.3 From a82dc19db13649aa4232ce37cb6f4ceff851e2fe Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 Apr 2025 12:59:48 -0700 Subject: net: avoid potential race between netdev_get_by_index_lock() and netns switch netdev_get_by_index_lock() performs following steps: rcu_lock(); dev = lookup(netns, ifindex); dev_get(dev); rcu_unlock(); [... lock & validate the dev ...] return dev Validation right now only checks if the device is registered but since the lookup is netns-aware we must also protect against the device switching netns right after we dropped the RCU lock. Otherwise the caller in netns1 may get a pointer to a device which has just switched to netns2. We can't hold the lock for the entire netns change process (because of the NETDEV_UNREGISTER notifier), and there's no existing marking to indicate that the netns is unlisted because of netns move, so add one. AFAIU none of the existing netdev_get_by_index_lock() callers can suffer from this problem (NAPI code double checks the netns membership and other callers are either under rtnl_lock or not ns-sensitive), so this patch does not have to be treated as a fix. Reviewed-by: Joe Damato Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250408195956.412733-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cf3b6445817b..8e9be80bc167 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1952,6 +1952,7 @@ enum netdev_reg_state { * @priv_destructor: Called from unregister * @npinfo: XXX: need comments on this one * @nd_net: Network namespace this network device is inside + * protected by @lock * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type @@ -2359,6 +2360,9 @@ struct net_device { bool dismantle; + /** @moving_ns: device is changing netns, protected by @lock */ + bool moving_ns; + enum { RTNL_LINK_INITIALIZED, RTNL_LINK_INITIALIZING, @@ -2521,7 +2525,7 @@ struct net_device { * @net_shaper_hierarchy, @reg_state, @threaded * * Double protects: - * @up + * @up, @moving_ns, @nd_net * * Double ops protects: * @real_num_rx_queues, @real_num_tx_queues -- cgit v1.2.3 From 606048cbd8346e616cfaee01b0143d072534136d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 Apr 2025 12:59:49 -0700 Subject: net: designate XSK pool pointers in queues as "ops protected" Read accesses go via xsk_get_pool_from_qid(), the call coming from the core and gve look safe (other "ops locked" drivers don't support XSK). Write accesses go via xsk_reg_pool_at_qid() and xsk_clear_pool_at_qid(). Former is already under the ops lock, latter is not (both coming from the workqueue via xp_clear_dev() and NETDEV_UNREGISTER via xsk_notifier()). Acked-by: Stanislav Fomichev Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250408195956.412733-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + include/net/netdev_rx_queue.h | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8e9be80bc167..7242fb8a22fc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -688,6 +688,7 @@ struct netdev_queue { /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; #ifdef CONFIG_XDP_SOCKETS + /* "ops protected", see comment about net_device::lock */ struct xsk_buff_pool *pool; #endif diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index b2238b551dce..8cdcd138b33f 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -20,12 +20,12 @@ struct netdev_rx_queue { struct net_device *dev; netdevice_tracker dev_tracker; + /* All fields below are "ops protected", + * see comment about net_device::lock + */ #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif - /* NAPI instance for the queue - * "ops protected", see comment about net_device::lock - */ struct napi_struct *napi; struct pp_memory_provider_params mp_params; } ____cacheline_aligned_in_smp; -- cgit v1.2.3 From 4ec9031cbeb73a66979560bbb6d355329be762de Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 Apr 2025 12:59:50 -0700 Subject: netdev: add "ops compat locking" helpers Add helpers to "lock a netdev in a backward-compatible way", which for ops-locked netdevs will mean take the instance lock. For drivers which haven't opted into the ops locking we'll take rtnl_lock. The scoped foreach is dropping and re-taking the lock for each device, even if prev and next are both under rtnl_lock. I hope that's fine since we expect that netdev nl to be mostly supported by modern drivers, and modern drivers should also opt into the instance locking. Note that these helpers are mostly needed for queue related state, because drivers modify queue config in their ops in a non-atomic way. Or differently put, queue changes don't have a clear-cut API like NAPI configuration. Any state that can should just use the instance lock directly, not the "compat" hacks. Reviewed-by: Joe Damato Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250408195956.412733-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netdev_lock.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index c316b551df8d..5706835a660c 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -64,6 +64,22 @@ netdev_ops_assert_locked_or_invisible(const struct net_device *dev) netdev_ops_assert_locked(dev); } +static inline void netdev_lock_ops_compat(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_lock(dev); + else + rtnl_lock(); +} + +static inline void netdev_unlock_ops_compat(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_unlock(dev); + else + rtnl_unlock(); +} + static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) { -- cgit v1.2.3 From 03df156dd3a6d5992f17682cd5c3b11e5ffdae02 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 Apr 2025 12:59:52 -0700 Subject: xdp: double protect netdev->xdp_flags with netdev->lock Protect xdp_features with netdev->lock. This way pure readers no longer have to take rtnl_lock to access the field. This includes calling NETDEV_XDP_FEAT_CHANGE under the lock. Looks like that's fine for bonding, the only "real" listener, it's the same as ethtool feature change. In terms of normal drivers - only GVE need special consideration (other drivers don't use instance lock or don't support XDP). It calls xdp_set_features_flag() helper from gve_init_priv() which in turn is called from gve_reset_recovery() (locked), or prior to netdev registration. So switch to _locked. Reviewed-by: Joe Damato Acked-by: Stanislav Fomichev Acked-by: Harshitha Ramamurthy Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20250408195956.412733-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- include/net/xdp.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7242fb8a22fc..dece2ae396a1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2526,7 +2526,7 @@ struct net_device { * @net_shaper_hierarchy, @reg_state, @threaded * * Double protects: - * @up, @moving_ns, @nd_net + * @up, @moving_ns, @nd_net, @xdp_flags * * Double ops protects: * @real_num_rx_queues, @real_num_tx_queues diff --git a/include/net/xdp.h b/include/net/xdp.h index 48efacbaa35d..20e41b5ff319 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -616,6 +616,7 @@ struct xdp_metadata_ops { u32 bpf_xdp_metadata_kfunc_id(int id); bool bpf_dev_bound_kfunc_id(u32 btf_id); void xdp_set_features_flag(struct net_device *dev, xdp_features_t val); +void xdp_set_features_flag_locked(struct net_device *dev, xdp_features_t val); void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg); void xdp_features_clear_redirect_target(struct net_device *dev); #else -- cgit v1.2.3 From ce7b14947484e6190372f2c3dbfb69aafbc4c0fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 Apr 2025 12:59:55 -0700 Subject: netdev: depend on netdev->lock for qstats in ops locked drivers We mostly needed rtnl_lock in qstat to make sure the queue count is stable while we work. For "ops locked" drivers the instance lock protects the queue count, so we don't have to take rtnl_lock. For currently ops-locked drivers: netdevsim and bnxt need the protection from netdev going down while we dump, which instance lock provides. gve doesn't care. Reviewed-by: Joe Damato Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250408195956.412733-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 825141d675e5..ea709b59d827 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -85,9 +85,11 @@ struct netdev_queue_stats_tx { * for some of the events is not maintained, and reliable "total" cannot * be provided). * + * Ops are called under the instance lock if netdev_need_ops_lock() + * returns true, otherwise under rtnl_lock. * Device drivers can assume that when collecting total device stats, * the @get_base_stats and subsequent per-queue calls are performed - * "atomically" (without releasing the rtnl_lock). + * "atomically" (without releasing the relevant lock). * * Device drivers are encouraged to reset the per-queue statistics when * number of queues change. This is because the primary use case for -- cgit v1.2.3 From 229671ac60e298b85c2644f52d7e487e9f487d06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Apr 2025 20:27:42 +0000 Subject: net: remove cpu stall in txq_trans_update() txq_trans_update() currently uses txq->xmit_lock_owner to conditionally update txq->trans_start. For regular devices, txq->xmit_lock_owner is updated from HARD_TX_LOCK() and HARD_TX_UNLOCK(), and this apparently causes cpu stalls. Using dev->lltx, which sits in a read-mostly cache-line, and already used in HARD_TX_LOCK() and HARD_TX_UNLOCK() helps cpu prediction. On an AMD EPYC 7B12 dual socket server, tcp_rr with 128 threads and 30,000 flows gets a 5 % increase in throughput. As explained in commit 95ecba62e2fd ("net: fix races in netdev_tx_sent_queue()/dev_watchdog()") I am planning to no longer update txq->trans_start in the fast path in a followup patch. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250408202742.2145516-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dece2ae396a1..a28a08046615 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4693,9 +4693,10 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) /* * txq->trans_start can be read locklessly from dev_watchdog() */ -static inline void txq_trans_update(struct netdev_queue *txq) +static inline void txq_trans_update(const struct net_device *dev, + struct netdev_queue *txq) { - if (txq->xmit_lock_owner != -1) + if (!dev->lltx) WRITE_ONCE(txq->trans_start, jiffies); } @@ -5214,7 +5215,7 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi rc = __netdev_start_xmit(ops, skb, dev, more); if (rc == NETDEV_TX_OK) - txq_trans_update(txq); + txq_trans_update(dev, txq); return rc; } -- cgit v1.2.3 From b412fd6bcc4c1696b0674434f56b198950c0e2bf Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 8 Apr 2025 11:00:04 +0200 Subject: bpf: Clarify role of BPF_F_RECOMPUTE_CSUM BPF_F_RECOMPUTE_CSUM doesn't update the actual L3 and L4 checksums in the packet, but simply updates skb->csum (according to skb->ip_summed). This patch clarifies that to avoid confusions. Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/ff6895d42936f03dbb82334d8bcfd50e00c79086.1744102490.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 07ee73cdf97b..14ef3db844fa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1995,11 +1995,15 @@ union bpf_attr { * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). + * associated to *skb*, at *offset*. The *flags* are a combination + * of the following values: + * + * **BPF_F_RECOMPUTE_CSUM** + * Automatically update *skb*\ **->csum** after storing the + * bytes. + * **BPF_F_INVALIDATE_HASH** + * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\ + * **->l4hash** to 0. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers -- cgit v1.2.3 From 5a15a050df714959f0d5a57ac3201bd1c6594984 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 8 Apr 2025 11:00:51 +0200 Subject: bpf: Clarify the meaning of BPF_F_PSEUDO_HDR In the bpf_l4_csum_replace helper, the BPF_F_PSEUDO_HDR flag should only be set if the modified header field is part of the pseudo-header. If you modify for example the UDP ports and pass BPF_F_PSEUDO_HDR, inet_proto_csum_replace4 will update skb->csum even though it shouldn't (the port and the UDP checksum updates null each other). Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/5126ef84ba75425b689482cbc98bffe75e5d8ab0.1744102490.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 14ef3db844fa..71d5ac83cf5d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2055,7 +2055,7 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more -- cgit v1.2.3 From 8702048bb8313a21ec9179e9ce7ad3c5cb2ef072 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Mon, 7 Apr 2025 15:42:25 +0200 Subject: mm/kmap: Add kmap_local_page_try_from_panic() kmap_local_page() can be unsafe to call from a panic handler, if CONFIG_HIGHMEM is set, and the page is in the highmem zone. So add kmap_local_page_try_from_panic() to handle this case. Suggested-by: Simona Vetter Reviewed-by: Thomas Gleixner Signed-off-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20250407140138.162383-2-jfalempe@redhat.com --- include/linux/highmem-internal.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index dd100e849f5e..9a7683d79a4b 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -73,6 +73,14 @@ static inline void *kmap_local_page(struct page *page) return __kmap_local_page_prot(page, kmap_prot); } +static inline void *kmap_local_page_try_from_panic(struct page *page) +{ + if (!PageHighMem(page)) + return page_address(page); + /* If the page is in HighMem, it's not safe to kmap it.*/ + return NULL; +} + static inline void *kmap_local_folio(struct folio *folio, size_t offset) { struct page *page = folio_page(folio, offset / PAGE_SIZE); @@ -180,6 +188,11 @@ static inline void *kmap_local_page(struct page *page) return page_address(page); } +static inline void *kmap_local_page_try_from_panic(struct page *page) +{ + return page_address(page); +} + static inline void *kmap_local_folio(struct folio *folio, size_t offset) { return page_address(&folio->page) + offset; -- cgit v1.2.3 From c9ff2808790f0fd4ca16f30f63abe70914a5a292 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Mon, 7 Apr 2025 15:42:26 +0200 Subject: drm/panic: Add support to scanout buffer as array of pages Some drivers like virtio-gpu, don't map the scanout buffer in the kernel. Calling vmap() in a panic handler is not safe, and writing an atomic_vmap() API is more complex than expected [1]. So instead, pass the array of pages of the scanout buffer to the panic handler, and map only one page at a time to draw the pixels. This is obviously slow, but acceptable for a panic handler. [1] https://lore.kernel.org/dri-devel/20250305152555.318159-1-ryasuoka@redhat.com/ Acked-by: Thomas Zimmermann Acked-by: Simona Vetter Signed-off-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20250407140138.162383-3-jfalempe@redhat.com --- include/drm/drm_panic.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h index ff78d00c3da5..310c88c4d336 100644 --- a/include/drm/drm_panic.h +++ b/include/drm/drm_panic.h @@ -39,6 +39,16 @@ struct drm_scanout_buffer { */ struct iosys_map map[DRM_FORMAT_MAX_PLANES]; + /** + * @pages: Optional, if the scanout buffer is not mapped, set this field + * to the array of pages of the scanout buffer. The panic code will use + * kmap_local_page_try_from_panic() to map one page at a time to write + * all the pixels. This array shouldn't be allocated from the + * get_scanoutbuffer() callback. + * The scanout buffer should be in linear format. + */ + struct page **pages; + /** * @width: Width of the scanout buffer, in pixels. */ @@ -57,7 +67,7 @@ struct drm_scanout_buffer { /** * @set_pixel: Optional function, to set a pixel color on the * framebuffer. It allows to handle special tiling format inside the - * driver. + * driver. It takes precedence over the @map and @pages fields. */ void (*set_pixel)(struct drm_scanout_buffer *sb, unsigned int x, unsigned int y, u32 color); -- cgit v1.2.3 From 1490cbb9dbfd0eabfe45f9b674097aea7e6760fc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 Mar 2025 16:54:51 +0200 Subject: device property: Split fwnode_get_child_node_count() The new helper is introduced to allow counting the child firmware nodes of their parent without requiring a device to be passed. This also makes the fwnode and device property API more symmetrical with the rest. Signed-off-by: Andy Shevchenko Acked-by: "Rafael J. Wysocki" Reviewed-by: Sakari Ailus Reviewed-by: Heikki Krogerus Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20250310150835.3139322-2-andriy.shevchenko@linux.intel.com Signed-off-by: Lee Jones --- include/linux/property.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index e214ecd241eb..bc5bfc98176b 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -208,7 +208,12 @@ DEFINE_FREE(fwnode_handle, struct fwnode_handle *, fwnode_handle_put(_T)) int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index); int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name); -unsigned int device_get_child_node_count(const struct device *dev); +unsigned int fwnode_get_child_node_count(const struct fwnode_handle *fwnode); + +static inline unsigned int device_get_child_node_count(const struct device *dev) +{ + return fwnode_get_child_node_count(dev_fwnode(dev)); +} static inline int device_property_read_u8(const struct device *dev, const char *propname, u8 *val) -- cgit v1.2.3 From 7e3711eb87c584ed224a7ad7000eba36e6fa3a51 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 21 Mar 2025 10:53:55 +0100 Subject: fbdev: Track display blanking state Store the display's blank status in struct fb_info.blank and track it in fb_blank(). As an extra, the status is now available from the sysfs blank attribute. Support for blanking is optional. Therefore framebuffer_alloc() initializes the state to FB_BLANK_UNBLANK (i.e., the display is on). If the fb_blank callback has been set, register_framebuffer() sets the state to FB_BLANK_POWERDOWN. On the first modeset, the call to fb_blank() will update it to _UNBLANK. This is important, as listeners to FB_EVENT_BLANK will now see the display being switched on. Signed-off-by: Thomas Zimmermann Acked-by: Simona Vetter Link: https://lore.kernel.org/r/20250321095517.313713-3-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/fb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index cd653862ab99..348aa2e146e2 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -472,6 +472,8 @@ struct fb_info { struct list_head modelist; /* mode list */ struct fb_videomode *mode; /* current mode */ + int blank; /* current blanking; see FB_BLANK_ constants */ + #if IS_ENABLED(CONFIG_FB_BACKLIGHT) /* assigned backlight device */ /* set before framebuffer registration, -- cgit v1.2.3 From 726491f2038ec71122d45700f3abf36fdb277aaa Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 21 Mar 2025 10:53:57 +0100 Subject: backlight: Implement fbdev tracking with blank state from event Look at the blank state provided by FB_EVENT_BLANK to determine whether to enable or disable a backlight. Remove the tracking fields from struct backlight_device. Tracking requires three variables, fb_on, prev_fb_on and the backlight's use_count. If fb_on is true, the display has been unblanked. The backlight needs to be enabled if the display was blanked before (i.e., prev_fb_on is false) or if use_count is still at 0. If fb_on is false, the display has been blanked. In this case, the backlight has to be disabled was unblanked before and the backlight's use_count is greater than 0. This change removes fbdev state tracking from blacklight. All the backlight requires it its own use counter and information about changes to the display. Removing fbdev internals makes backlight drivers easier to integrate into other display drivers, such as DRM. Signed-off-by: Thomas Zimmermann Reviewed-by: "Daniel Thompson (RISCstar)" Acked-by: Simona Vetter Link: https://lore.kernel.org/r/20250321095517.313713-5-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/backlight.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index f5652e5a9060..03723a5478f8 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -294,15 +294,7 @@ struct backlight_device { struct device dev; /** - * @fb_bl_on: The state of individual fbdev's. - * - * Multiple fbdev's may share one backlight device. The fb_bl_on - * records the state of the individual fbdev. - */ - bool fb_bl_on[FB_MAX]; - - /** - * @use_count: The number of uses of fb_bl_on. + * @use_count: The number of unblanked displays. */ int use_count; }; -- cgit v1.2.3 From b01beb2f1f6bcda17634a5b529865ffc5a9b795f Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 21 Mar 2025 10:53:59 +0100 Subject: backlight: Replace fb events with a dedicated function call Remove support for fb events from backlight subsystem. Provide the helper backlight_notify_blank_all() instead. Also export the existing helper backlight_notify_blank() to update a single backlight device. In fbdev, call either helper to inform the backlight subsystem of changes to a display's blank state. If the framebuffer device has a specific backlight, only update this one; otherwise update all. v4: - protect blacklight declarations with IS_REACHABLE() (kernel test robot) v3: - declare empty fb_bl_notify_blank() as static inline (kernel test robot) Signed-off-by: Thomas Zimmermann Acked-by: Simona Vetter Reviewed-by: "Daniel Thompson (RISCstar)" Link: https://lore.kernel.org/r/20250321095517.313713-7-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/backlight.h | 22 ++++++++++++++++------ include/linux/fb.h | 4 ++++ 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 03723a5478f8..10e626db7eee 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -12,7 +12,6 @@ #include #include #include -#include #include /** @@ -278,11 +277,6 @@ struct backlight_device { */ const struct backlight_ops *ops; - /** - * @fb_notif: The framebuffer notifier block - */ - struct notifier_block fb_notif; - /** * @entry: List entry of all registered backlight devices */ @@ -400,6 +394,22 @@ struct backlight_device *backlight_device_get_by_type(enum backlight_type type); int backlight_device_set_brightness(struct backlight_device *bd, unsigned long brightness); +#if IS_REACHABLE(CONFIG_BACKLIGHT_CLASS_DEVICE) +void backlight_notify_blank(struct backlight_device *bd, + struct device *display_dev, + bool fb_on, bool prev_fb_on); +void backlight_notify_blank_all(struct device *display_dev, + bool fb_on, bool prev_fb_on); +#else +static inline void backlight_notify_blank(struct backlight_device *bd, + struct device *display_dev, + bool fb_on, bool prev_fb_on) +{ } +static inline void backlight_notify_blank_all(struct device *display_dev, + bool fb_on, bool prev_fb_on) +{ } +#endif + #define to_backlight_device(obj) container_of(obj, struct backlight_device, dev) /** diff --git a/include/linux/fb.h b/include/linux/fb.h index 348aa2e146e2..835e13d6eba8 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -758,11 +758,15 @@ extern void fb_bl_default_curve(struct fb_info *fb_info, u8 off, u8 min, u8 max) #if IS_ENABLED(CONFIG_FB_BACKLIGHT) struct backlight_device *fb_bl_device(struct fb_info *info); +void fb_bl_notify_blank(struct fb_info *info, int old_blank); #else static inline struct backlight_device *fb_bl_device(struct fb_info *info) { return NULL; } + +static inline void fb_bl_notify_blank(struct fb_info *info, int old_blank) +{ } #endif static inline struct lcd_device *fb_lcd_device(struct fb_info *info) -- cgit v1.2.3 From bc70cc84f5a2ebfd7e7112e9b8837e0c7954fc65 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 21 Mar 2025 10:54:01 +0100 Subject: backlight: lcd: Replace fb events with a dedicated function call Remove support for fb events from the lcd subsystem. Provide the helper lcd_notify_blank_all() instead. In fbdev, call lcd_notify_blank_all() to inform the lcd subsystem of changes to a display's blank state. Fbdev maintains a list of all installed notifiers. Instead of fbdev notifiers, maintain an internal list of lcd devices. v3: - export lcd_notify_mode_change_all() (kernel test robot) v2: - maintain global list of lcd devices - avoid IS_REACHABLE() in source file - use lock guards - initialize lcd list and list mutex Signed-off-by: Thomas Zimmermann Acked-by: Simona Vetter Reviewed-by: "Daniel Thompson (RISCstar)" Link: https://lore.kernel.org/r/20250321095517.313713-9-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/lcd.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/lcd.h b/include/linux/lcd.h index c3ccdff4519a..d4fa03722b72 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -11,7 +11,6 @@ #include #include -#include #define LCD_POWER_ON (0) #define LCD_POWER_REDUCED (1) // deprecated; don't use in new code @@ -79,8 +78,11 @@ struct lcd_device { const struct lcd_ops *ops; /* Serialise access to set_power method */ struct mutex update_lock; - /* The framebuffer notifier block */ - struct notifier_block fb_notif; + + /** + * @entry: List entry of all registered lcd devices + */ + struct list_head entry; struct device dev; }; @@ -125,6 +127,19 @@ extern void lcd_device_unregister(struct lcd_device *ld); extern void devm_lcd_device_unregister(struct device *dev, struct lcd_device *ld); +#if IS_REACHABLE(CONFIG_LCD_CLASS_DEVICE) +void lcd_notify_blank_all(struct device *display_dev, int power); +void lcd_notify_mode_change_all(struct device *display_dev, + unsigned int width, unsigned int height); +#else +static inline void lcd_notify_blank_all(struct device *display_dev, int power) +{} + +static inline void lcd_notify_mode_change_all(struct device *display_dev, + unsigned int width, unsigned int height) +{} +#endif + #define to_lcd_device(obj) container_of(obj, struct lcd_device, dev) static inline void * lcd_get_data(struct lcd_device *ld_dev) -- cgit v1.2.3 From dc2139c0aa3283e5749109641af1878ed7bf7371 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 21 Mar 2025 10:54:03 +0100 Subject: leds: backlight trigger: Replace fb events with a dedicated function call Remove support for fb events from the led backlight trigger. Provide the helper ledtrig_backlight_blank() instead. Call it from fbdev to inform the trigger of changes to a display's blank state. Fbdev maintains a list of all installed notifiers. Instead of the fbdev notifiers, maintain an internal list of led backlight triggers. v3: - export ledtrig_backlight_blank() v2: - maintain global list of led backlight triggers (Lee) - avoid IS_REACHABLE() in source file (Lee) - notify on changes to blank state instead of display state - use lock guards - initialize led list and list mutex Signed-off-by: Thomas Zimmermann Acked-by: Simona Vetter Link: https://lore.kernel.org/r/20250321095517.313713-11-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/leds.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index 98f9719c924c..b3f0aa081064 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -640,6 +640,12 @@ static inline void ledtrig_flash_ctrl(bool on) {} static inline void ledtrig_torch_ctrl(bool on) {} #endif +#if IS_REACHABLE(CONFIG_LEDS_TRIGGER_BACKLIGHT) +void ledtrig_backlight_blank(bool blank); +#else +static inline void ledtrig_backlight_blank(bool blank) {} +#endif + /* * Generic LED platform data for describing LED names and default triggers. */ -- cgit v1.2.3 From d32a0b567a8a8b6e677d35c4f8eb8bd32b7029a0 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 21 Mar 2025 10:54:04 +0100 Subject: fbdev: Remove constants of unused events The constants FB_EVENT_MODE_CHANGE and FB_EVENT_BLANK are unused. Remove them from the header file. Signed-off-by: Thomas Zimmermann Acked-by: Simona Vetter Link: https://lore.kernel.org/r/20250321095517.313713-12-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/fb.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 835e13d6eba8..05cc251035da 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -129,18 +129,12 @@ struct fb_cursor_user { * Register/unregister for framebuffer events */ -/* The resolution of the passed in fb_info about to change */ -#define FB_EVENT_MODE_CHANGE 0x01 - #ifdef CONFIG_GUMSTIX_AM200EPD /* only used by mach-pxa/am200epd.c */ #define FB_EVENT_FB_REGISTERED 0x05 #define FB_EVENT_FB_UNREGISTERED 0x06 #endif -/* A display blank is requested */ -#define FB_EVENT_BLANK 0x09 - struct fb_event { struct fb_info *info; void *data; -- cgit v1.2.3 From c435e608cf59ffab815aa2571182dc8c50fe4112 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Apr 2025 22:28:54 +0200 Subject: x86/msr: Rename 'rdmsrl()' to 'rdmsrq()' Suggested-by: "H. Peter Anvin" Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: Juergen Gross Cc: Dave Hansen Cc: Xin Li Cc: Linus Torvalds --- include/hyperv/hvgdk_mini.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index abf0bd76e370..53699750db86 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -1013,7 +1013,7 @@ enum hv_register_name { /* * To support arch-generic code calling hv_set/get_register: - * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl + * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrq/wrmsrl * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall */ #define HV_MSR_CRASH_P0 (HV_X64_MSR_CRASH_P0) -- cgit v1.2.3 From 78255eb23973323633432d9ec40b65c15e41888a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Apr 2025 22:28:55 +0200 Subject: x86/msr: Rename 'wrmsrl()' to 'wrmsrq()' Suggested-by: "H. Peter Anvin" Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: Juergen Gross Cc: Dave Hansen Cc: Xin Li Cc: Linus Torvalds --- include/hyperv/hvgdk_mini.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 53699750db86..68606fa5fe73 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -1013,7 +1013,7 @@ enum hv_register_name { /* * To support arch-generic code calling hv_set/get_register: - * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrq/wrmsrl + * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrq/wrmsrq * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall */ #define HV_MSR_CRASH_P0 (HV_X64_MSR_CRASH_P0) -- cgit v1.2.3 From 589a7c406a721f5d3a818ad0003799180f027dfa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Apr 2025 12:20:43 +0200 Subject: cpufreq: Drop unused cpufreq_get_policy() A recent change has introduced a bug into cpufreq_get_policy(), but this function is not used, so it's better to drop it altogether. Reported-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Acked-by: Sudeep Holla Link: https://patch.msgid.link/2802770.mvXUDI8C0e@rjwysocki.net --- include/linux/cpufreq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index f3cf2adea18f..850fe7371cb1 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -241,7 +241,6 @@ void disable_cpufreq(void); u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); -int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); void refresh_frequency_limits(struct cpufreq_policy *policy); void cpufreq_update_policy(unsigned int cpu); void cpufreq_update_limits(unsigned int cpu); -- cgit v1.2.3 From 588d5d20ca8defa5ba5d1b536ff3695f6ab7aa87 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Thu, 10 Apr 2025 14:01:14 +0530 Subject: phy: exynos5-usbdrd: add exynos7870 USBDRD support Implement support for Exynos7870 USB DRD on top of the existing exynos5-usbdrd driver. Exynos7870 has a single USB 2.0 DRD PHY controller and no 3.0 PHYs. Thus, it only supports the UTMI interface. Moreover, the PMU register offset for enabling the PHY controller is different for SoCs such as Exynos7870, where BIT(0) is for the 3.0 PHY and BIT(1) is for the 2.0 PHY. The phy_isol function for Exynos7870 uses the appropriate register offsets. Signed-off-by: Kaustabh Chakraborty Link: https://lore.kernel.org/r/20250410-exynos7870-usbphy-v2-3-2eb005987455@disroot.org Signed-off-by: Vinod Koul --- include/linux/soc/samsung/exynos-regs-pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index ce1a3790d6fb..cde299a85384 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -55,6 +55,8 @@ #define EXYNOS4_MIPI_PHY_SRESETN (1 << 1) #define EXYNOS4_MIPI_PHY_MRESETN (1 << 2) #define EXYNOS4_MIPI_PHY_RESET_MASK (3 << 1) +/* USB PHY enable bit, valid for Exynos7870 */ +#define EXYNOS7870_USB2PHY_ENABLE (1 << 1) #define S5P_INFORM0 0x0800 #define S5P_INFORM1 0x0804 -- cgit v1.2.3 From b2849b0723668ae3e0739b2c9c066eafe8bc0961 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 3 Apr 2025 12:09:40 +0200 Subject: svsm: Add header with SVSM_VTPM_CMD helpers Add helpers for the SVSM_VTPM_CMD calls used by the vTPM protocol defined by the AMD SVSM spec [1]. The vTPM protocol follows the Official TPM 2.0 Reference Implementation (originally by Microsoft, now part of the TCG) simulator protocol. [1] "Secure VM Service Module for SEV-SNP Guests" Publication # 58019 Revision: 1.00 Co-developed-by: James Bottomley Signed-off-by: James Bottomley Co-developed-by: Claudio Carvalho Signed-off-by: Claudio Carvalho Signed-off-by: Stefano Garzarella Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Jarkko Sakkinen Link: https://lore.kernel.org/r/20250403100943.120738-3-sgarzare@redhat.com --- include/linux/tpm_svsm.h | 149 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 include/linux/tpm_svsm.h (limited to 'include') diff --git a/include/linux/tpm_svsm.h b/include/linux/tpm_svsm.h new file mode 100644 index 000000000000..38e341f9761a --- /dev/null +++ b/include/linux/tpm_svsm.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 James.Bottomley@HansenPartnership.com + * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved. + * + * Helpers for the SVSM_VTPM_CMD calls used by the vTPM protocol defined by the + * AMD SVSM spec [1]. + * + * The vTPM protocol follows the Official TPM 2.0 Reference Implementation + * (originally by Microsoft, now part of the TCG) simulator protocol. + * + * [1] "Secure VM Service Module for SEV-SNP Guests" + * Publication # 58019 Revision: 1.00 + */ +#ifndef _TPM_SVSM_H_ +#define _TPM_SVSM_H_ + +#include +#include +#include + +#define SVSM_VTPM_MAX_BUFFER 4096 /* max req/resp buffer size */ + +/** + * struct svsm_vtpm_request - Generic request for single word command + * @cmd: The command to send + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 15: vTPM Common Request/Response Structure + * Byte Size     In/Out    Description + * Offset    (Bytes) + * 0x000     4          In        Platform command + *                         Out       Platform command response size + */ +struct svsm_vtpm_request { + u32 cmd; +}; + +/** + * struct svsm_vtpm_response - Generic response + * @size: The response size (zero if nothing follows) + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 15: vTPM Common Request/Response Structure + * Byte Size     In/Out    Description + * Offset    (Bytes) + * 0x000     4          In        Platform command + *                         Out       Platform command response size + * + * Note: most TCG Simulator commands simply return zero here with no indication + * of success or failure. + */ +struct svsm_vtpm_response { + u32 size; +}; + +/** + * struct svsm_vtpm_cmd_request - Structure for a TPM_SEND_COMMAND request + * @cmd: The command to send (must be TPM_SEND_COMMAND) + * @locality: The locality + * @buf_size: The size of the input buffer following + * @buf: A buffer of size buf_size + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 16: TPM_SEND_COMMAND Request Structure + * Byte Size Meaning + * Offset    (Bytes) + * 0x000     4          Platform command (8) + * 0x004     1          Locality (must-be-0) + * 0x005     4          TPM Command size (in bytes) + * 0x009     Variable   TPM Command + * + * Note: the TCG Simulator expects @buf_size to be equal to the size of the + * specific TPM command, otherwise an TPM_RC_COMMAND_SIZE error is returned. + */ +struct svsm_vtpm_cmd_request { + u32 cmd; + u8 locality; + u32 buf_size; + u8 buf[]; +} __packed; + +/** + * struct svsm_vtpm_cmd_response - Structure for a TPM_SEND_COMMAND response + * @buf_size: The size of the output buffer following + * @buf: A buffer of size buf_size + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 17: TPM_SEND_COMMAND Response Structure + * Byte Size Meaning + * Offset    (Bytes) + * 0x000     4          Response size (in bytes) + * 0x004     Variable   Response + */ +struct svsm_vtpm_cmd_response { + u32 buf_size; + u8 buf[]; +}; + +/** + * svsm_vtpm_cmd_request_fill() - Fill a TPM_SEND_COMMAND request to be sent to SVSM + * @req: The struct svsm_vtpm_cmd_request to fill + * @locality: The locality + * @buf: The buffer from where to copy the payload of the command + * @len: The size of the buffer + * + * Return: 0 on success, negative error code on failure. + */ +static inline int +svsm_vtpm_cmd_request_fill(struct svsm_vtpm_cmd_request *req, u8 locality, + const u8 *buf, size_t len) +{ + if (len > SVSM_VTPM_MAX_BUFFER - sizeof(*req)) + return -EINVAL; + + req->cmd = 8; /* TPM_SEND_COMMAND */ + req->locality = locality; + req->buf_size = len; + + memcpy(req->buf, buf, len); + + return 0; +} + +/** + * svsm_vtpm_cmd_response_parse() - Parse a TPM_SEND_COMMAND response received from SVSM + * @resp: The struct svsm_vtpm_cmd_response to parse + * @buf: The buffer where to copy the response + * @len: The size of the buffer + * + * Return: buffer size filled with the response on success, negative error + * code on failure. + */ +static inline int +svsm_vtpm_cmd_response_parse(const struct svsm_vtpm_cmd_response *resp, u8 *buf, + size_t len) +{ + if (len < resp->buf_size) + return -E2BIG; + + if (resp->buf_size > SVSM_VTPM_MAX_BUFFER - sizeof(*resp)) + return -EINVAL; // Invalid response from the platform TPM + + memcpy(buf, resp->buf, resp->buf_size); + + return resp->buf_size; +} + +#endif /* _TPM_SVSM_H_ */ -- cgit v1.2.3 From 74a70e80daa993445a8f0be817f8152a3b9d3b41 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 9 Apr 2025 22:43:10 +0200 Subject: PCI: Remove pci_fixup_cardbus() Since 1c7f4fe86f17 ("powerpc/pci: Remove pcibios_setup_bus_devices()") there's no architecture left setting pci_fixup_cardbus. Therefore remove support from PCI core. Signed-off-by: Heiner Kallweit Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/8de7da4c-2b16-4ee1-8c42-0d04f3c821c6@gmail.com --- include/linux/pci.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e96..d26e6611bd00 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1139,9 +1139,6 @@ resource_size_t pcibios_align_resource(void *, const struct resource *, resource_size_t, resource_size_t); -/* Weak but can be overridden by arch */ -void pci_fixup_cardbus(struct pci_bus *); - /* Generic PCI functions used internally */ void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region, -- cgit v1.2.3 From 04271411121a58d37f47b065bc872f333274bf1f Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 9 Apr 2025 19:26:04 +0800 Subject: tcp: add TCP_RFC7323_TW_PAWS drop reason Devices in the networking path, such as firewalls, NATs, or routers, which can perform SNAT or DNAT, use addresses from their own limited address pools to masquerade the source address during forwarding, causing PAWS verification to fail more easily. Currently, packet loss statistics for PAWS can only be viewed through MIB, which is a global metric and cannot be precisely obtained through tracing to get the specific 4-tuple of the dropped packet. In the past, we had to use kprobe ret to retrieve relevant skb information from tcp_timewait_state_process(). We add a drop_reason pointer, similar to what previous commit does: commit e34100c2ecbb ("tcp: add a drop_reason pointer to tcp_check_req()") This commit addresses the PAWSESTABREJECTED case and also sets the corresponding drop reason. We use 'pwru' to test. Before this commit: '''' ./pwru 'port 9999' 2025/04/07 13:40:19 Listening for events.. TUPLE FUNC 172.31.75.115:12345->172.31.75.114:9999(tcp) sk_skb_reason_drop(SKB_DROP_REASON_NOT_SPECIFIED) ''' After this commit: ''' ./pwru 'port 9999' 2025/04/07 13:51:34 Listening for events.. TUPLE FUNC 172.31.75.115:12345->172.31.75.114:9999(tcp) sk_skb_reason_drop(SKB_DROP_REASON_TCP_RFC7323_TW_PAWS) ''' Suggested-by: Eric Dumazet Signed-off-by: Jiayuan Chen Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250409112614.16153-2-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 6 ++++++ include/net/tcp.h | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index e4fdc6b54cef..9701d7f936f6 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -40,6 +40,7 @@ FN(TCP_OFOMERGE) \ FN(TCP_RFC7323_PAWS) \ FN(TCP_RFC7323_PAWS_ACK) \ + FN(TCP_RFC7323_TW_PAWS) \ FN(TCP_RFC7323_TSECR) \ FN(TCP_LISTEN_OVERFLOW) \ FN(TCP_OLD_SEQUENCE) \ @@ -283,6 +284,11 @@ enum skb_drop_reason { * Corresponds to LINUX_MIB_PAWS_OLD_ACK. */ SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK, + /** + * @SKB_DROP_REASON_TCP_RFC7323_TW_PAWS: PAWS check, socket is in + * TIME_WAIT state. + */ + SKB_DROP_REASON_TCP_RFC7323_TW_PAWS, /** * @SKB_DROP_REASON_TCP_RFC7323_TSECR: PAWS check, invalid TSEcr. * Corresponds to LINUX_MIB_TSECRREJECTED. diff --git a/include/net/tcp.h b/include/net/tcp.h index 4450c384ef17..5078ad868fee 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -427,7 +427,8 @@ enum tcp_tw_status { enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th, - u32 *tw_isn); + u32 *tw_isn, + enum skb_drop_reason *drop_reason); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, bool *lost_race, enum skb_drop_reason *drop_reason); -- cgit v1.2.3 From c449d5f3a3d70b6223af8df2cadca3ca6eacb613 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 9 Apr 2025 19:26:05 +0800 Subject: tcp: add LINUX_MIB_PAWS_TW_REJECTED counter When TCP is in TIME_WAIT state, PAWS verification uses LINUX_PAWSESTABREJECTED, which is ambiguous and cannot be distinguished from other PAWS verification processes. We added a new counter, like the existing PAWS_OLD_ACK one. Also we update the doc with previously missing PAWS_OLD_ACK. usage: ''' nstat -az | grep PAWSTimewait TcpExtPAWSTimewait 1 0.0 ''' Suggested-by: Eric Dumazet Signed-off-by: Jiayuan Chen Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250409112614.16153-3-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 1 + include/uapi/linux/snmp.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 9701d7f936f6..bea77934a235 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -287,6 +287,7 @@ enum skb_drop_reason { /** * @SKB_DROP_REASON_TCP_RFC7323_TW_PAWS: PAWS check, socket is in * TIME_WAIT state. + * Corresponds to LINUX_MIB_PAWS_TW_REJECTED. */ SKB_DROP_REASON_TCP_RFC7323_TW_PAWS, /** diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index ec47f9b68a1b..1d234d7e1892 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -188,6 +188,7 @@ enum LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */ LINUX_MIB_TSECRREJECTED, /* TSEcrRejected */ LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */ + LINUX_MIB_PAWS_TW_REJECTED, /* PAWSTimewait */ LINUX_MIB_DELAYEDACKS, /* DelayedACKs */ LINUX_MIB_DELAYEDACKLOCKED, /* DelayedACKLocked */ LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */ -- cgit v1.2.3 From b1e904999542ad6764eafa54545f1c55776006d1 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 8 Apr 2025 11:32:01 -0700 Subject: net: pass const to msg_data_left() The msg_data_left() function doesn't modify the struct msghdr parameter, so mark it as const. This allows the function to be used with const references, improving type safety and making the API more flexible. Signed-off-by: Breno Leitao Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250408-tcpsendmsg-v3-1-208b87064c28@debian.org Signed-off-by: Jakub Kicinski --- include/linux/socket.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index c3322eb3d686..3b262487ec06 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } -static inline size_t msg_data_left(struct msghdr *msg) +static inline size_t msg_data_left(const struct msghdr *msg) { return iov_iter_count(&msg->msg_iter); } -- cgit v1.2.3 From 0f08335ade71273f89d19412268b48b55f3e3726 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 8 Apr 2025 11:32:02 -0700 Subject: trace: tcp: Add tracepoint for tcp_sendmsg_locked() Add a tracepoint to monitor TCP send operations, enabling detailed visibility into TCP message transmission. Create a new tracepoint within the tcp_sendmsg_locked function, capturing traditional fields along with size_goal, which indicates the optimal data size for a single TCP segment. Additionally, a reference to the struct sock sk is passed, allowing direct access for BPF programs. The implementation is largely based on David's patch[1] and suggestions. Link: https://lore.kernel.org/all/70168c8f-bf52-4279-b4c4-be64527aa1ac@kernel.org/ [1] Signed-off-by: Breno Leitao Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250408-tcpsendmsg-v3-2-208b87064c28@debian.org Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 1a40c41ff8c3..75d3d53a3832 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -259,6 +259,30 @@ TRACE_EVENT(tcp_retransmit_synack, __entry->saddr_v6, __entry->daddr_v6) ); +TRACE_EVENT(tcp_sendmsg_locked, + TP_PROTO(const struct sock *sk, const struct msghdr *msg, + const struct sk_buff *skb, int size_goal), + + TP_ARGS(sk, msg, skb, size_goal), + + TP_STRUCT__entry( + __field(const void *, skb_addr) + __field(int, skb_len) + __field(int, msg_left) + __field(int, size_goal) + ), + + TP_fast_assign( + __entry->skb_addr = skb; + __entry->skb_len = skb ? skb->len : 0; + __entry->msg_left = msg_data_left(msg); + __entry->size_goal = size_goal; + ), + + TP_printk("skb_addr %p skb_len %d msg_left %d size_goal %d", + __entry->skb_addr, __entry->skb_len, __entry->msg_left, + __entry->size_goal)); + DECLARE_TRACE(tcp_cwnd_reduction_tp, TP_PROTO(const struct sock *sk, int newly_acked_sacked, int newly_lost, int flag), -- cgit v1.2.3 From de66754e9f8029f8ae955a588959b99cab56b506 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 9 Apr 2025 12:47:34 -0700 Subject: xhci: sideband: add initial api to register a secondary interrupter entity Introduce XHCI sideband, which manages the USB endpoints being requested by a client driver. This is used for when client drivers are attempting to offload USB endpoints to another entity for handling USB transfers. XHCI sec intr will allow for drivers to fetch the required information about the transfer ring, so the user can submit transfers independently. Expose the required APIs for drivers to register and request for a USB endpoint and to manage XHCI secondary interrupters. Driver renaming, multiple ring segment page linking, proper endpoint clean up, and allowing module compilation added by Wesley Cheng to complete original concept code by Mathias Nyman. Tested-by: Puma Hsu Tested-by: Daehwan Jung Signed-off-by: Mathias Nyman Co-developed-by: Wesley Cheng Signed-off-by: Wesley Cheng Acked-by: Mark Brown Link: https://lore.kernel.org/r/20250409194804.3773260-2-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/xhci-sideband.h | 74 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 include/linux/usb/xhci-sideband.h (limited to 'include') diff --git a/include/linux/usb/xhci-sideband.h b/include/linux/usb/xhci-sideband.h new file mode 100644 index 000000000000..4b382af892fa --- /dev/null +++ b/include/linux/usb/xhci-sideband.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * xHCI host controller sideband support + * + * Copyright (c) 2023-2025, Intel Corporation. + * + * Author: Mathias Nyman + */ +#ifndef __LINUX_XHCI_SIDEBAND_H +#define __LINUX_XHCI_SIDEBAND_H + +#include +#include + +#define EP_CTX_PER_DEV 31 /* FIXME defined twice, from xhci.h */ + +struct xhci_sideband; + +enum xhci_sideband_type { + XHCI_SIDEBAND_AUDIO, + XHCI_SIDEBAND_VENDOR, +}; + +/** + * struct xhci_sideband - representation of a sideband accessed usb device. + * @xhci: The xhci host controller the usb device is connected to + * @vdev: the usb device accessed via sideband + * @eps: array of endpoints controlled via sideband + * @ir: event handling and buffer for sideband accessed device + * @type: xHCI sideband type + * @mutex: mutex for sideband operations + * @intf: USB sideband client interface + * + * FIXME usb device accessed via sideband Keeping track of sideband accessed usb devices. + */ +struct xhci_sideband { + struct xhci_hcd *xhci; + struct xhci_virt_device *vdev; + struct xhci_virt_ep *eps[EP_CTX_PER_DEV]; + struct xhci_interrupter *ir; + enum xhci_sideband_type type; + + /* Synchronizing xHCI sideband operations with client drivers operations */ + struct mutex mutex; + + struct usb_interface *intf; +}; + +struct xhci_sideband * +xhci_sideband_register(struct usb_interface *intf, enum xhci_sideband_type type); +void +xhci_sideband_unregister(struct xhci_sideband *sb); +int +xhci_sideband_add_endpoint(struct xhci_sideband *sb, + struct usb_host_endpoint *host_ep); +int +xhci_sideband_remove_endpoint(struct xhci_sideband *sb, + struct usb_host_endpoint *host_ep); +int +xhci_sideband_stop_endpoint(struct xhci_sideband *sb, + struct usb_host_endpoint *host_ep); +struct sg_table * +xhci_sideband_get_endpoint_buffer(struct xhci_sideband *sb, + struct usb_host_endpoint *host_ep); +struct sg_table * +xhci_sideband_get_event_buffer(struct xhci_sideband *sb); +int +xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg, + bool ip_autoclear, u32 imod_interval); +void +xhci_sideband_remove_interrupter(struct xhci_sideband *sb); +int +xhci_sideband_interrupter_id(struct xhci_sideband *sb); +#endif /* __LINUX_XHCI_SIDEBAND_H */ -- cgit v1.2.3 From fce57295497df70711d50ed01bf6d914de0ca647 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 9 Apr 2025 12:47:36 -0700 Subject: usb: host: xhci-mem: Allow for interrupter clients to choose specific index Some clients may operate only on a specific XHCI interrupter instance. Allow for the associated class driver to request for the interrupter that it requires. Tested-by: Puma Hsu Tested-by: Daehwan Jung Signed-off-by: Wesley Cheng Acked-by: Mark Brown Link: https://lore.kernel.org/r/20250409194804.3773260-4-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/xhci-sideband.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/xhci-sideband.h b/include/linux/usb/xhci-sideband.h index 4b382af892fa..f8722afb8a2d 100644 --- a/include/linux/usb/xhci-sideband.h +++ b/include/linux/usb/xhci-sideband.h @@ -66,7 +66,7 @@ struct sg_table * xhci_sideband_get_event_buffer(struct xhci_sideband *sb); int xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg, - bool ip_autoclear, u32 imod_interval); + bool ip_autoclear, u32 imod_interval, int intr_num); void xhci_sideband_remove_interrupter(struct xhci_sideband *sb); int -- cgit v1.2.3 From b85a2ebda1034881d09e6127726dc78950669474 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 9 Apr 2025 12:47:38 -0700 Subject: usb: host: xhci: Notify xHCI sideband on transfer ring free In the case of handling a USB bus reset, the xhci_discover_or_reset_device can run without first notifying the xHCI sideband client driver to stop or prevent the use of the transfer ring. It was seen that when a bus reset situation happened, the USB offload driver was attempting to fetch the xHCI transfer ring information, which was already freed. Tested-by: Puma Hsu Tested-by: Daehwan Jung Signed-off-by: Wesley Cheng Acked-by: Mark Brown Link: https://lore.kernel.org/r/20250409194804.3773260-6-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/xhci-sideband.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/xhci-sideband.h b/include/linux/usb/xhci-sideband.h index f8722afb8a2d..45288c392f6e 100644 --- a/include/linux/usb/xhci-sideband.h +++ b/include/linux/usb/xhci-sideband.h @@ -21,6 +21,20 @@ enum xhci_sideband_type { XHCI_SIDEBAND_VENDOR, }; +enum xhci_sideband_notify_type { + XHCI_SIDEBAND_XFER_RING_FREE, +}; + +/** + * struct xhci_sideband_event - sideband event + * @type: notifier type + * @evt_data: event data + */ +struct xhci_sideband_event { + enum xhci_sideband_notify_type type; + void *evt_data; +}; + /** * struct xhci_sideband - representation of a sideband accessed usb device. * @xhci: The xhci host controller the usb device is connected to @@ -30,6 +44,7 @@ enum xhci_sideband_type { * @type: xHCI sideband type * @mutex: mutex for sideband operations * @intf: USB sideband client interface + * @notify_client: callback for xHCI sideband sequences * * FIXME usb device accessed via sideband Keeping track of sideband accessed usb devices. */ @@ -44,10 +59,14 @@ struct xhci_sideband { struct mutex mutex; struct usb_interface *intf; + int (*notify_client)(struct usb_interface *intf, + struct xhci_sideband_event *evt); }; struct xhci_sideband * -xhci_sideband_register(struct usb_interface *intf, enum xhci_sideband_type type); +xhci_sideband_register(struct usb_interface *intf, enum xhci_sideband_type type, + int (*notify_client)(struct usb_interface *intf, + struct xhci_sideband_event *evt)); void xhci_sideband_unregister(struct xhci_sideband *sb); int @@ -71,4 +90,13 @@ void xhci_sideband_remove_interrupter(struct xhci_sideband *sb); int xhci_sideband_interrupter_id(struct xhci_sideband *sb); + +#if IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) +void xhci_sideband_notify_ep_ring_free(struct xhci_sideband *sb, + unsigned int ep_index); +#else +static inline void xhci_sideband_notify_ep_ring_free(struct xhci_sideband *sb, + unsigned int ep_index) +{ } +#endif /* IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) */ #endif /* __LINUX_XHCI_SIDEBAND_H */ -- cgit v1.2.3 From 67890d579402804b1d32b3280d9860073542528e Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 9 Apr 2025 12:47:40 -0700 Subject: ALSA: Add USB audio device jack type Add an USB jack type, in order to support notifying of a valid USB audio device. Since USB audio devices can have a slew of different configurations that reach beyond the basic headset and headphone use cases, classify these devices differently. Reviewed-by: Pierre-Louis Bossart Signed-off-by: Wesley Cheng Acked-by: Mark Brown Link: https://lore.kernel.org/r/20250409194804.3773260-8-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/mod_devicetable.h | 2 +- include/sound/jack.h | 4 +++- include/uapi/linux/input-event-codes.h | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index bd7e60c0b72f..dde836875deb 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -340,7 +340,7 @@ struct pcmcia_device_id { #define INPUT_DEVICE_ID_LED_MAX 0x0f #define INPUT_DEVICE_ID_SND_MAX 0x07 #define INPUT_DEVICE_ID_FF_MAX 0x7f -#define INPUT_DEVICE_ID_SW_MAX 0x10 +#define INPUT_DEVICE_ID_SW_MAX 0x11 #define INPUT_DEVICE_ID_PROP_MAX 0x1f #define INPUT_DEVICE_ID_MATCH_BUS 1 diff --git a/include/sound/jack.h b/include/sound/jack.h index 1ed90e2109e9..bd3f62281c97 100644 --- a/include/sound/jack.h +++ b/include/sound/jack.h @@ -22,6 +22,7 @@ struct input_dev; * @SND_JACK_VIDEOOUT: Video out * @SND_JACK_AVOUT: AV (Audio Video) out * @SND_JACK_LINEIN: Line in + * @SND_JACK_USB: USB audio device * @SND_JACK_BTN_0: Button 0 * @SND_JACK_BTN_1: Button 1 * @SND_JACK_BTN_2: Button 2 @@ -43,6 +44,7 @@ enum snd_jack_types { SND_JACK_VIDEOOUT = 0x0010, SND_JACK_AVOUT = SND_JACK_LINEOUT | SND_JACK_VIDEOOUT, SND_JACK_LINEIN = 0x0020, + SND_JACK_USB = 0x0040, /* Kept separate from switches to facilitate implementation */ SND_JACK_BTN_0 = 0x4000, @@ -54,7 +56,7 @@ enum snd_jack_types { }; /* Keep in sync with definitions above */ -#define SND_JACK_SWITCH_TYPES 6 +#define SND_JACK_SWITCH_TYPES 7 struct snd_jack { struct list_head kctl_list; diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 5a199f3d4a26..3b2524e4b667 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -925,7 +925,8 @@ #define SW_MUTE_DEVICE 0x0e /* set = device disabled */ #define SW_PEN_INSERTED 0x0f /* set = pen inserted */ #define SW_MACHINE_COVER 0x10 /* set = cover closed */ -#define SW_MAX 0x10 +#define SW_USB_INSERT 0x11 /* set = USB audio device connected */ +#define SW_MAX 0x11 #define SW_CNT (SW_MAX+1) /* -- cgit v1.2.3 From dba7759af789b75240379e94017e655221c83225 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 9 Apr 2025 12:47:47 -0700 Subject: ASoC: Add SoC USB APIs for adding an USB backend Some platforms may have support for offloading USB audio devices to a dedicated audio DSP. Introduce a set of APIs that allow for management of USB sound card and PCM devices enumerated by the USB SND class driver. This allows for the ASoC components to be aware of what USB devices are available for offloading. Reviewed-by: Pierre-Louis Bossart Signed-off-by: Wesley Cheng Acked-by: Mark Brown Link: https://lore.kernel.org/r/20250409194804.3773260-15-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/sound/soc-usb.h | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 include/sound/soc-usb.h (limited to 'include') diff --git a/include/sound/soc-usb.h b/include/sound/soc-usb.h new file mode 100644 index 000000000000..a7be5d05218f --- /dev/null +++ b/include/sound/soc-usb.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __LINUX_SND_SOC_USB_H +#define __LINUX_SND_SOC_USB_H + +#include + +/** + * struct snd_soc_usb_device - SoC USB representation of a USB sound device + * @card_idx: sound card index associated with USB device + * @chip_idx: USB sound chip array index + * @cpcm_idx: capture PCM index array associated with USB device + * @ppcm_idx: playback PCM index array associated with USB device + * @num_capture: number of capture streams + * @num_playback: number of playback streams + * @list: list head for SoC USB devices + **/ +struct snd_soc_usb_device { + int card_idx; + int chip_idx; + + /* PCM index arrays */ + unsigned int *cpcm_idx; /* TODO: capture path is not tested yet */ + unsigned int *ppcm_idx; + int num_capture; /* TODO: capture path is not tested yet */ + int num_playback; + + struct list_head list; +}; + +/** + * struct snd_soc_usb - representation of a SoC USB backend entity + * @list: list head for SND SOC struct list + * @component: reference to ASoC component + * @connection_status_cb: callback to notify connection events + * @priv_data: driver data + **/ +struct snd_soc_usb { + struct list_head list; + struct snd_soc_component *component; + int (*connection_status_cb)(struct snd_soc_usb *usb, + struct snd_soc_usb_device *sdev, + bool connected); + void *priv_data; +}; + +#if IS_ENABLED(CONFIG_SND_SOC_USB) +int snd_soc_usb_connect(struct device *usbdev, struct snd_soc_usb_device *sdev); +int snd_soc_usb_disconnect(struct device *usbdev, struct snd_soc_usb_device *sdev); +void *snd_soc_usb_find_priv_data(struct device *usbdev); + +struct snd_soc_usb *snd_soc_usb_allocate_port(struct snd_soc_component *component, + void *data); +void snd_soc_usb_free_port(struct snd_soc_usb *usb); +void snd_soc_usb_add_port(struct snd_soc_usb *usb); +void snd_soc_usb_remove_port(struct snd_soc_usb *usb); +#else +static inline int snd_soc_usb_connect(struct device *usbdev, + struct snd_soc_usb_device *sdev) +{ + return -ENODEV; +} + +static inline int snd_soc_usb_disconnect(struct device *usbdev, + struct snd_soc_usb_device *sdev) +{ + return -EINVAL; +} + +static inline void *snd_soc_usb_find_priv_data(struct device *usbdev) +{ + return NULL; +} + +static inline struct snd_soc_usb * +snd_soc_usb_allocate_port(struct snd_soc_component *component, void *data) +{ + return ERR_PTR(-ENOMEM); +} + +static inline void snd_soc_usb_free_port(struct snd_soc_usb *usb) +{ } + +static inline void snd_soc_usb_add_port(struct snd_soc_usb *usb) +{ } + +static inline void snd_soc_usb_remove_port(struct snd_soc_usb *usb) +{ } +#endif /* IS_ENABLED(CONFIG_SND_SOC_USB) */ +#endif /*__LINUX_SND_SOC_USB_H */ -- cgit v1.2.3 From 00f5d6bfba3ac7b28dd9bbf00eab5db35e0347b7 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 9 Apr 2025 12:47:48 -0700 Subject: ASoC: usb: Add PCM format check API for USB backend Introduce a helper to check if a particular PCM format is supported by the USB audio device connected. If the USB audio device does not have an audio profile which can support the requested format, then notify the USB backend. Reviewed-by: Pierre-Louis Bossart Signed-off-by: Wesley Cheng Acked-by: Mark Brown Link: https://lore.kernel.org/r/20250409194804.3773260-16-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/sound/soc-usb.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/sound/soc-usb.h b/include/sound/soc-usb.h index a7be5d05218f..188b8df52932 100644 --- a/include/sound/soc-usb.h +++ b/include/sound/soc-usb.h @@ -48,6 +48,10 @@ struct snd_soc_usb { }; #if IS_ENABLED(CONFIG_SND_SOC_USB) +int snd_soc_usb_find_supported_format(int card_idx, + struct snd_pcm_hw_params *params, + int direction); + int snd_soc_usb_connect(struct device *usbdev, struct snd_soc_usb_device *sdev); int snd_soc_usb_disconnect(struct device *usbdev, struct snd_soc_usb_device *sdev); void *snd_soc_usb_find_priv_data(struct device *usbdev); @@ -58,6 +62,13 @@ void snd_soc_usb_free_port(struct snd_soc_usb *usb); void snd_soc_usb_add_port(struct snd_soc_usb *usb); void snd_soc_usb_remove_port(struct snd_soc_usb *usb); #else +static inline int +snd_soc_usb_find_supported_format(int card_idx, struct snd_pcm_hw_params *params, + int direction) +{ + return -EINVAL; +} + static inline int snd_soc_usb_connect(struct device *usbdev, struct snd_soc_usb_device *sdev) { -- cgit v1.2.3 From 0bb5f3614b2588f758e6db21397776d7164f3b66 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 9 Apr 2025 12:47:49 -0700 Subject: ASoC: usb: Create SOC USB SND jack kcontrol Expose API for creation of a jack control for notifying of available devices that are plugged in/discovered, and that support offloading. This allows for control names to be standardized across implementations of USB audio offloading. Reviewed-by: Pierre-Louis Bossart Signed-off-by: Wesley Cheng Acked-by: Mark Brown Link: https://lore.kernel.org/r/20250409194804.3773260-17-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/sound/soc-usb.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/sound/soc-usb.h b/include/sound/soc-usb.h index 188b8df52932..e9eb8dbb2e0e 100644 --- a/include/sound/soc-usb.h +++ b/include/sound/soc-usb.h @@ -56,6 +56,9 @@ int snd_soc_usb_connect(struct device *usbdev, struct snd_soc_usb_device *sdev); int snd_soc_usb_disconnect(struct device *usbdev, struct snd_soc_usb_device *sdev); void *snd_soc_usb_find_priv_data(struct device *usbdev); +int snd_soc_usb_setup_offload_jack(struct snd_soc_component *component, + struct snd_soc_jack *jack); + struct snd_soc_usb *snd_soc_usb_allocate_port(struct snd_soc_component *component, void *data); void snd_soc_usb_free_port(struct snd_soc_usb *usb); @@ -86,6 +89,12 @@ static inline void *snd_soc_usb_find_priv_data(struct device *usbdev) return NULL; } +static inline int snd_soc_usb_setup_offload_jack(struct snd_soc_component *component, + struct snd_soc_jack *jack) +{ + return 0; +} + static inline struct snd_soc_usb * snd_soc_usb_allocate_port(struct snd_soc_component *component, void *data) { -- cgit v1.2.3 From 234ed325920c4441090e4dd5441d4e424c1804c9 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 9 Apr 2025 12:47:50 -0700 Subject: ASoC: usb: Fetch ASoC card and pcm device information USB SND needs to know how the USB offload path is being routed. This would allow for applications to open the corresponding sound card and pcm device when it wants to take the audio offload path. This callback should return the mapped indexes based on the USB SND device information. Reviewed-by: Pierre-Louis Bossart Signed-off-by: Wesley Cheng Acked-by: Mark Brown Link: https://lore.kernel.org/r/20250409194804.3773260-18-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/sound/soc-usb.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/sound/soc-usb.h b/include/sound/soc-usb.h index e9eb8dbb2e0e..124acb1939e5 100644 --- a/include/sound/soc-usb.h +++ b/include/sound/soc-usb.h @@ -8,6 +8,11 @@ #include +enum snd_soc_usb_kctl { + SND_SOC_USB_KCTL_CARD_ROUTE, + SND_SOC_USB_KCTL_PCM_ROUTE, +}; + /** * struct snd_soc_usb_device - SoC USB representation of a USB sound device * @card_idx: sound card index associated with USB device @@ -36,6 +41,12 @@ struct snd_soc_usb_device { * @list: list head for SND SOC struct list * @component: reference to ASoC component * @connection_status_cb: callback to notify connection events + * @update_offload_route_info: callback to fetch mapped ASoC card and pcm + * device pair. This is unrelated to the concept + * of DAPM route. The "route" argument carries + * an array used for a kcontrol output for either + * the card or pcm index. "path" determines the + * which entry to look for. (ie mapped card or pcm) * @priv_data: driver data **/ struct snd_soc_usb { @@ -44,6 +55,10 @@ struct snd_soc_usb { int (*connection_status_cb)(struct snd_soc_usb *usb, struct snd_soc_usb_device *sdev, bool connected); + int (*update_offload_route_info)(struct snd_soc_component *component, + int card, int pcm, int direction, + enum snd_soc_usb_kctl path, + long *route); void *priv_data; }; @@ -58,6 +73,9 @@ void *snd_soc_usb_find_priv_data(struct device *usbdev); int snd_soc_usb_setup_offload_jack(struct snd_soc_component *component, struct snd_soc_jack *jack); +int snd_soc_usb_update_offload_route(struct device *dev, int card, int pcm, + int direction, enum snd_soc_usb_kctl path, + long *route); struct snd_soc_usb *snd_soc_usb_allocate_port(struct snd_soc_component *component, void *data); @@ -95,6 +113,13 @@ static inline int snd_soc_usb_setup_offload_jack(struct snd_soc_component *compo return 0; } +static int snd_soc_usb_update_offload_route(struct device *dev, int card, int pcm, + int direction, enum snd_soc_usb_kctl path, + long *route) +{ + return -ENODEV; +} + static inline struct snd_soc_usb * snd_soc_usb_allocate_port(struct snd_soc_component *component, void *data) { -- cgit v1.2.3 From 55b5fb369c02177b2f7ea790cec839fc5ec57173 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 9 Apr 2025 12:47:53 -0700 Subject: ASoC: dt-bindings: qcom,q6dsp-lpass-ports: Add USB_RX port Q6DSP supports handling of USB playback audio data if USB audio offloading is enabled. Add a new definition for the USB_RX AFE port, which is referenced when the AFE port is started. Acked-by: Krzysztof Kozlowski Signed-off-by: Wesley Cheng Acked-by: Mark Brown Link: https://lore.kernel.org/r/20250409194804.3773260-21-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/dt-bindings/sound/qcom,q6dsp-lpass-ports.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/sound/qcom,q6dsp-lpass-ports.h b/include/dt-bindings/sound/qcom,q6dsp-lpass-ports.h index 39f203256c4f..6d1ce7f5da51 100644 --- a/include/dt-bindings/sound/qcom,q6dsp-lpass-ports.h +++ b/include/dt-bindings/sound/qcom,q6dsp-lpass-ports.h @@ -139,6 +139,7 @@ #define DISPLAY_PORT_RX_5 133 #define DISPLAY_PORT_RX_6 134 #define DISPLAY_PORT_RX_7 135 +#define USB_RX 136 #define LPASS_CLK_ID_PRI_MI2S_IBIT 1 #define LPASS_CLK_ID_PRI_MI2S_EBIT 2 -- cgit v1.2.3 From 72b0b8b2998043c50b191d9668c7a828ca0d5c70 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 9 Apr 2025 12:47:57 -0700 Subject: ASoC: qcom: qdsp6: Add USB backend ASoC driver for Q6 Create a USB BE component that will register a new USB port to the ASoC USB framework. This will handle determination on if the requested audio profile is supported by the USB device currently selected. Check for if the PCM format is supported during the hw_params callback. If the profile is not supported then the userspace ALSA entity will receive an error, and can take further action. Signed-off-by: Wesley Cheng Acked-by: Mark Brown Link: https://lore.kernel.org/r/20250409194804.3773260-25-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/sound/q6usboffload.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/sound/q6usboffload.h (limited to 'include') diff --git a/include/sound/q6usboffload.h b/include/sound/q6usboffload.h new file mode 100644 index 000000000000..f7e2449fe1b3 --- /dev/null +++ b/include/sound/q6usboffload.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * sound/q6usboffload.h -- QDSP6 USB offload + * + * Copyright (c) 2022-2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +/** + * struct q6usb_offload - USB backend DAI link offload parameters + * @dev: dev handle to usb be + * @domain: allocated iommu domain + * @intr_num: usb interrupter number + * @sid: streamID for iommu + **/ +struct q6usb_offload { + struct device *dev; + struct iommu_domain *domain; + u16 intr_num; + u8 sid; +}; -- cgit v1.2.3 From 10ed34d6eaaf86e301a8f2dd190d26dfbc9799bd Mon Sep 17 00:00:00 2001 From: Sandor Yu Date: Tue, 18 Mar 2025 14:35:35 +0200 Subject: phy: Add HDMI configuration options Allow HDMI PHYs to be configured through the generic functions through a custom structure added to the generic union. The parameters added here are based on HDMI PHY implementation practices. The current set of parameters should cover the potential users. Signed-off-by: Sandor Yu Reviewed-by: Dmitry Baryshkov Reviewed-by: Maxime Ripard Acked-by: Vinod Koul Link: https://lore.kernel.org/r/d1cff6c03ec3732d2244022029245ab2d954d997.1734340233.git.Sandor.yu@nxp.com Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20250318-phy-sam-hdptx-bpc-v6-1-8cb1678e7663@collabora.com Signed-off-by: Vinod Koul --- include/linux/phy/phy-hdmi.h | 19 +++++++++++++++++++ include/linux/phy/phy.h | 7 ++++++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 include/linux/phy/phy-hdmi.h (limited to 'include') diff --git a/include/linux/phy/phy-hdmi.h b/include/linux/phy/phy-hdmi.h new file mode 100644 index 000000000000..6a696922bc7f --- /dev/null +++ b/include/linux/phy/phy-hdmi.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2022,2024 NXP + */ + +#ifndef __PHY_HDMI_H_ +#define __PHY_HDMI_H_ + +/** + * struct phy_configure_opts_hdmi - HDMI configuration set + * @tmds_char_rate: HDMI TMDS Character Rate in Hertz. + * + * This structure is used to represent the configuration state of a HDMI phy. + */ +struct phy_configure_opts_hdmi { + unsigned long long tmds_char_rate; +}; + +#endif /* __PHY_HDMI_H_ */ diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index e63e6e70e860..437769e061b7 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -42,7 +43,8 @@ enum phy_mode { PHY_MODE_MIPI_DPHY, PHY_MODE_SATA, PHY_MODE_LVDS, - PHY_MODE_DP + PHY_MODE_DP, + PHY_MODE_HDMI, }; enum phy_media { @@ -60,11 +62,14 @@ enum phy_media { * the DisplayPort protocol. * @lvds: Configuration set applicable for phys supporting * the LVDS phy mode. + * @hdmi: Configuration set applicable for phys supporting + * the HDMI phy mode. */ union phy_configure_opts { struct phy_configure_opts_mipi_dphy mipi_dphy; struct phy_configure_opts_dp dp; struct phy_configure_opts_lvds lvds; + struct phy_configure_opts_hdmi hdmi; }; /** -- cgit v1.2.3 From 3bb9286f4ece6acbc1fbaa9f192a82645d30efbf Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 18 Mar 2025 14:35:36 +0200 Subject: phy: hdmi: Add color depth configuration Extend the HDMI configuration options to allow managing bits per color channel. This is required by some PHY drivers such as rockchip-samsung-hdptx. Reviewed-by: Dmitry Baryshkov Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20250318-phy-sam-hdptx-bpc-v6-2-8cb1678e7663@collabora.com Signed-off-by: Vinod Koul --- include/linux/phy/phy-hdmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy/phy-hdmi.h b/include/linux/phy/phy-hdmi.h index 6a696922bc7f..f0ec963c6e84 100644 --- a/include/linux/phy/phy-hdmi.h +++ b/include/linux/phy/phy-hdmi.h @@ -9,11 +9,13 @@ /** * struct phy_configure_opts_hdmi - HDMI configuration set * @tmds_char_rate: HDMI TMDS Character Rate in Hertz. + * @bpc: Bits per color channel. * * This structure is used to represent the configuration state of a HDMI phy. */ struct phy_configure_opts_hdmi { unsigned long long tmds_char_rate; + unsigned int bpc; }; #endif /* __PHY_HDMI_H_ */ -- cgit v1.2.3 From cc2308ebbbb16bbefbc89e4da2f827e28b082411 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Fri, 11 Apr 2025 11:32:35 +0530 Subject: drm/dp: Add smooth brightness register bit definition Add DP_EDP_SMOOTH_BRIGHTNESS register bit definition for EDP_GENERAL_CAPABILITY 2 register. --v2 -Add eDP 2.0 comment [Ankit] Signed-off-by: Suraj Kandpal Reviewed-by: Arun R Murthy Link: https://lore.kernel.org/r/20250411060235.2732060-2-suraj.kandpal@intel.com --- include/drm/display/drm_dp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index c413ef68f9a3..3001c0b6e7bb 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -1025,6 +1025,7 @@ #define DP_EDP_GENERAL_CAP_2 0x703 # define DP_EDP_OVERDRIVE_ENGINE_ENABLED (1 << 0) # define DP_EDP_PANEL_LUMINANCE_CONTROL_CAPABLE (1 << 4) +# define DP_EDP_SMOOTH_BRIGHTNESS_CAPABLE (1 << 6) /* eDP 2.0 */ #define DP_EDP_GENERAL_CAP_3 0x704 /* eDP 1.4 */ # define DP_EDP_X_REGION_CAP_MASK (0xf << 0) -- cgit v1.2.3 From 1692632146451184c4bcb68554098470a119fb01 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 8 Apr 2025 20:08:51 +0800 Subject: USB: core: Correct API usb_(enable|disable)_autosuspend() prototypes API usb_(enable|disable)_autosuspend() have inconsistent prototypes regarding if CONFIG_PM is defined. Correct prototypes when the macro is undefined by referring to those when the macro is defined. Signed-off-by: Zijun Hu Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20250408-fix_usb_hdr-v1-1-e785c5b49481@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index b46738701f8d..1b2545b4363b 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -815,10 +815,10 @@ static inline void usb_mark_last_busy(struct usb_device *udev) #else -static inline int usb_enable_autosuspend(struct usb_device *udev) -{ return 0; } -static inline int usb_disable_autosuspend(struct usb_device *udev) -{ return 0; } +static inline void usb_enable_autosuspend(struct usb_device *udev) +{ } +static inline void usb_disable_autosuspend(struct usb_device *udev) +{ } static inline int usb_autopm_get_interface(struct usb_interface *intf) { return 0; } -- cgit v1.2.3 From 2acaf27cd7f4f32bfe8bf7335690618e2417e744 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:54 -0400 Subject: vt: move unicode processing to a separate file This will make it easier to maintain. Also make it depend on CONFIG_CONSOLE_TRANSLATIONS. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-3-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index c35db4896c37..caf079bcb8c9 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -28,6 +28,7 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); +bool ucs_is_double_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -57,6 +58,11 @@ static inline int conv_uni_to_8bit(u32 uni) } static inline void console_map_init(void) { } + +static inline bool ucs_is_double_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit v1.2.3 From e88391f730e46d208b7fb37b02611d24137af1ef Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:55 -0400 Subject: vt: properly support zero-width Unicode code points Zero-width Unicode code points are causing misalignment in vertically aligned content, disrupting the visual layout. Let's handle zero-width code points more intelligently. Double-width code points are stored in the screen grid followed by a white space code point to create the expected screen layout. When a double-width code point is followed by a zero-width code point in the console incoming bytestream (e.g., an emoji with a presentation selector) then we may replace the white space padding by that zero-width code point instead of dropping it. This maximize screen content information while preserving proper layout. If a zero-width code point is preceded by a single-width code point then the above trick is not possible and such zero-width code point must be dropped. VS16 (Variation Selector 16, U+FE0F) is special as it doubles the width of the preceding single-width code point. We handle that case by giving VS16 a width of 1 when that happens. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-4-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index caf079bcb8c9..7d778752dcef 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,6 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); +static inline bool ucs_is_zero_width(uint32_t cp) +{ + /* coming soon */ + return false; +} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -63,6 +68,11 @@ static inline bool ucs_is_double_width(uint32_t cp) { return false; } + +static inline bool ucs_is_zero_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit v1.2.3 From 3a1ab63aa05b4736a7d30ae0a769385662f13def Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:57 -0400 Subject: vt: update ucs_width.c using gen_ucs_width.py This replaces ucs_width.c with the code generated by gen_ucs_width.py providing comprehensive tables for double-width and zero-width Unicode code points. Also make ucs_is_zero_width() effective. Note: scripts/checkpatch.pl complains about "... exceeds 100 columns". Please ignore. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-6-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 7d778752dcef..b3a911866662 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,11 +29,7 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -static inline bool ucs_is_zero_width(uint32_t cp) -{ - /* coming soon */ - return false; -} +bool ucs_is_zero_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) -- cgit v1.2.3 From 54af55b990eda5a6a0140a3cded8094b42c0c3b7 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:59 -0400 Subject: vt: create ucs_recompose.c using gen_ucs_recompose.py This provides ucs_recompose() to recompose two Unicode characters into a single character if possible. This is needed for the VT to properly display decomposed UTF8 sequences. Note: scripts/checkpatch.pl complains about "... exceeds 100 columns". Please ignore. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-8-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index b3a911866662..4d3a34c288e5 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -30,6 +30,7 @@ int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); +uint32_t ucs_recompose(uint32_t base, uint32_t combining); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -69,6 +70,11 @@ static inline bool ucs_is_zero_width(uint32_t cp) { return false; } + +static inline uint32_t ucs_recompose(uint32_t base, uint32_t combining) +{ + return 0; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit v1.2.3 From 4172b556fd5bdfdf9b2d1e42da39df6ce99ee989 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 7 Apr 2025 15:32:33 -0400 Subject: drm/amdkfd: add smi events for process start and end rocm-smi will be able to show the events for KFD process start/end, it is the implementation of this feature. Signed-off-by: Eric Huang Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 1e59344c5673..04c7d283dc7d 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -536,6 +536,8 @@ enum kfd_smi_event { KFD_SMI_EVENT_QUEUE_EVICTION = 9, KFD_SMI_EVENT_QUEUE_RESTORE = 10, KFD_SMI_EVENT_UNMAP_FROM_GPU = 11, + KFD_SMI_EVENT_PROCESS_START = 12, + KFD_SMI_EVENT_PROCESS_END = 13, /* * max event number, as a flag bit to get events from all processes, @@ -651,6 +653,9 @@ struct kfd_ioctl_smi_events_args { "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\ (node), (unmap_trigger) +#define KFD_EVENT_FMT_PROCESS(pid, task_name)\ + "%x %s\n", (pid), (task_name) + /************************************************************************************************** * CRIU IOCTLs (Checkpoint Restore In Userspace) * -- cgit v1.2.3 From 11d4c388a38ab6e95a7be82a44a55ecc32cd6a14 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 4 Apr 2025 16:15:30 -0700 Subject: scsi: soc: qcom: ice: Make qcom_ice_program_key() take struct blk_crypto_key qcom_ice_program_key() currently accepts the key as an array of bytes, algorithm ID, key size enum, and data unit size. However both callers have a struct blk_crypto_key which contains all that information. Thus they both have similar code that converts the blk_crypto_key into the form that qcom_ice_program_key() wants. Once wrapped key support is added, the key type would need to be added to the arguments too. Therefore, this patch changes qcom_ice_program_key() to take in all this information as a struct blk_crypto_key directly. The calling code is updated accordingly. This ends up being much simpler, and it makes the key type be passed down automatically once wrapped key support is added. Based on a patch by Gaurav Kashyap that replaced the byte array argument only. This patch makes the blk_crypto_key replace other arguments like the algorithm ID too, ensuring that there remains only one source of truth. Acked-by: Manivannan Sadhasivam Tested-by: Bartosz Golaszewski # sm8650 Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20250404231533.174419-2-ebiggers@kernel.org Acked-by: Konrad Dybcio Acked-by: Ulf Hansson # For MMC Signed-off-by: Martin K. Petersen --- include/soc/qcom/ice.h | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/soc/qcom/ice.h b/include/soc/qcom/ice.h index fdf1b5c21eb9..24573e425039 100644 --- a/include/soc/qcom/ice.h +++ b/include/soc/qcom/ice.h @@ -6,32 +6,16 @@ #ifndef __QCOM_ICE_H__ #define __QCOM_ICE_H__ +#include #include struct qcom_ice; -enum qcom_ice_crypto_key_size { - QCOM_ICE_CRYPTO_KEY_SIZE_INVALID = 0x0, - QCOM_ICE_CRYPTO_KEY_SIZE_128 = 0x1, - QCOM_ICE_CRYPTO_KEY_SIZE_192 = 0x2, - QCOM_ICE_CRYPTO_KEY_SIZE_256 = 0x3, - QCOM_ICE_CRYPTO_KEY_SIZE_512 = 0x4, -}; - -enum qcom_ice_crypto_alg { - QCOM_ICE_CRYPTO_ALG_AES_XTS = 0x0, - QCOM_ICE_CRYPTO_ALG_BITLOCKER_AES_CBC = 0x1, - QCOM_ICE_CRYPTO_ALG_AES_ECB = 0x2, - QCOM_ICE_CRYPTO_ALG_ESSIV_AES_CBC = 0x3, -}; - int qcom_ice_enable(struct qcom_ice *ice); int qcom_ice_resume(struct qcom_ice *ice); int qcom_ice_suspend(struct qcom_ice *ice); -int qcom_ice_program_key(struct qcom_ice *ice, - u8 algorithm_id, u8 key_size, - const u8 crypto_key[], u8 data_unit_size, - int slot); +int qcom_ice_program_key(struct qcom_ice *ice, unsigned int slot, + const struct blk_crypto_key *blk_key); int qcom_ice_evict_key(struct qcom_ice *ice, int slot); struct qcom_ice *devm_of_qcom_ice_get(struct device *dev); -- cgit v1.2.3 From 7cc6e0c34b21b6e981afde695132f38e174c7aa5 Mon Sep 17 00:00:00 2001 From: Gaurav Kashyap Date: Fri, 4 Apr 2025 16:15:31 -0700 Subject: scsi: soc: qcom: ice: Add HWKM support to the ICE driver Qualcomm's Inline Crypto Engine (ICE) version 3.2 and later includes a key management hardware block called the Hardware Key Manager (HWKM). Add support for HWKM to the ICE driver. HWKM provides hardware-wrapped key support where the ICE (storage) keys are not exposed to software and instead are protected in hardware. Later patches will wire up this feature to ufs-qcom and sdhci-msm using the support added in this patch. HWKM and legacy mode are currently mutually exclusive. The selection of which mode to use has to be made before the storage driver(s) registers any inline encryption capable disk(s) with the block layer (i.e., generally at boot time) so that the appropriate crypto capabilities can be advertised to upper layers. Therefore, make the ICE driver select HWKM mode when the all of the following are true: - The new module parameter qcom_ice.use_wrapped_keys=1 is specified. - HWKM is present and is at least v2, i.e. ICE is v3.2.1 or later. - The SCM calls needed to fully use HWKM are supported by TrustZone. [EB: merged related patches; fixed the module parameter to work correctly; dropped unnecessary support for HWKM v1; fixed error handling; improved log messages, comments, and commit message; fixed naming; merged enable and init functions; and other cleanups] Signed-off-by: Gaurav Kashyap Signed-off-by: Bartosz Golaszewski Co-developed-by: Eric Biggers Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20250404231533.174419-3-ebiggers@kernel.org Acked-by: Ulf Hansson # For MMC Signed-off-by: Martin K. Petersen --- include/soc/qcom/ice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/soc/qcom/ice.h b/include/soc/qcom/ice.h index 24573e425039..4bee553f0a59 100644 --- a/include/soc/qcom/ice.h +++ b/include/soc/qcom/ice.h @@ -17,6 +17,18 @@ int qcom_ice_suspend(struct qcom_ice *ice); int qcom_ice_program_key(struct qcom_ice *ice, unsigned int slot, const struct blk_crypto_key *blk_key); int qcom_ice_evict_key(struct qcom_ice *ice, int slot); +enum blk_crypto_key_type qcom_ice_get_supported_key_type(struct qcom_ice *ice); +int qcom_ice_derive_sw_secret(struct qcom_ice *ice, + const u8 *eph_key, size_t eph_key_size, + u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE]); +int qcom_ice_generate_key(struct qcom_ice *ice, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); +int qcom_ice_prepare_key(struct qcom_ice *ice, + const u8 *lt_key, size_t lt_key_size, + u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); +int qcom_ice_import_key(struct qcom_ice *ice, + const u8 *raw_key, size_t raw_key_size, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); struct qcom_ice *devm_of_qcom_ice_get(struct device *dev); #endif /* __QCOM_ICE_H__ */ -- cgit v1.2.3 From 56f4beffc9a056f70e56f53d01451441106bd4ff Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 7 Apr 2025 12:17:03 +0200 Subject: scsi: ufs: core: Drop last_intr_status/ts stats In order to prepare introduction of a threaded interrupt handler, drop last_intr_status & last_intr_ts drop the ufs_stats struct, and the associated debug code. Suggested-by: Bart Van Assche Reviewed-by: Bart Van Assche Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250407-topic-ufs-use-threaded-irq-v3-1-08bee980f71e@linaro.org Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index e3909cc691b2..fffa9cc46543 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -501,8 +501,6 @@ struct ufs_event_hist { /** * struct ufs_stats - keeps usage/err statistics - * @last_intr_status: record the last interrupt status. - * @last_intr_ts: record the last interrupt timestamp. * @hibern8_exit_cnt: Counter to keep track of number of exits, * reset this after link-startup. * @last_hibern8_exit_tstamp: Set time after the hibern8 exit. @@ -510,9 +508,6 @@ struct ufs_event_hist { * @event: array with event history. */ struct ufs_stats { - u32 last_intr_status; - u64 last_intr_ts; - u32 hibern8_exit_cnt; u64 last_hibern8_exit_tstamp; struct ufs_event_hist event[UFS_EVT_CNT]; -- cgit v1.2.3 From 2d6c7bcc6cca63238fce0a95874b053500a1112e Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 7 Apr 2025 12:17:04 +0200 Subject: scsi: ufs: core: Track when MCQ ESI is enabled In preparation of adding a threaded interrupt handler, track when the MCQ ESI interrupt handlers were installed so we can optimize the MCQ interrupt handling to avoid walking the threaded handler in the case ESI handlers are enabled. Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250407-topic-ufs-use-threaded-irq-v3-2-08bee980f71e@linaro.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index fffa9cc46543..ec999fa67124 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -954,6 +954,7 @@ enum ufshcd_mcq_opr { * ufshcd_resume_complete() * @mcq_sup: is mcq supported by UFSHC * @mcq_enabled: is mcq ready to accept requests + * @mcq_esi_enabled: is mcq ESI configured * @res: array of resource info of MCQ registers * @mcq_base: Multi circular queue registers base address * @uhq: array of supported hardware queues @@ -1122,6 +1123,7 @@ struct ufs_hba { bool mcq_sup; bool lsdb_sup; bool mcq_enabled; + bool mcq_esi_enabled; struct ufshcd_res_info res[RES_MAX]; void __iomem *mcq_base; struct ufs_hw_queue *uhq; -- cgit v1.2.3 From 500d4b742e0cc712160054413e129a568467a7de Mon Sep 17 00:00:00 2001 From: Huan Tang Date: Fri, 11 Apr 2025 17:29:24 +0800 Subject: scsi: ufs: core: Add WB buffer resize support Follow JESD220G, support a WB buffer resize function through sysfs. The host can obtain resize hint and resize status, and enable the resize operation. Add three sysfs nodes: 1. wb_resize_enable 2. wb_resize_hint 3. wb_resize_status The detailed definition of the three nodes can be found in the sysfs documentation. Signed-off-by: Huan Tang Signed-off-by: Lu Hongfei Link: https://lore.kernel.org/r/20250411092924.1116-1-tanghuan@vivo.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufs.h | 34 +++++++++++++++++++++++++++++++++- include/ufs/ufshcd.h | 1 + 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index 8a24ed59ec46..9188f7aa99da 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -180,7 +180,10 @@ enum attr_idn { QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE = 0x1D, QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST = 0x1E, QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE = 0x1F, - QUERY_ATTR_IDN_TIMESTAMP = 0x30 + QUERY_ATTR_IDN_TIMESTAMP = 0x30, + QUERY_ATTR_IDN_WB_BUF_RESIZE_HINT = 0x3C, + QUERY_ATTR_IDN_WB_BUF_RESIZE_EN = 0x3D, + QUERY_ATTR_IDN_WB_BUF_RESIZE_STATUS = 0x3E, }; /* Descriptor idn for Query requests */ @@ -289,6 +292,7 @@ enum device_desc_param { DEVICE_DESC_PARAM_PRDCT_REV = 0x2A, DEVICE_DESC_PARAM_HPB_VER = 0x40, DEVICE_DESC_PARAM_HPB_CONTROL = 0x42, + DEVICE_DESC_PARAM_EXT_WB_SUP = 0x4D, DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP = 0x4F, DEVICE_DESC_PARAM_WB_PRESRV_USRSPC_EN = 0x53, DEVICE_DESC_PARAM_WB_TYPE = 0x54, @@ -383,6 +387,11 @@ enum { UFSHCD_AMP = 3, }; +/* Possible values for wExtendedWriteBoosterSupport */ +enum { + UFS_DEV_WB_BUF_RESIZE = BIT(0), +}; + /* Possible values for dExtendedUFSFeaturesSupport */ enum { UFS_DEV_HIGH_TEMP_NOTIF = BIT(4), @@ -454,6 +463,28 @@ enum ufs_ref_clk_freq { REF_CLK_FREQ_INVAL = -1, }; +/* bWriteBoosterBufferResizeEn attribute */ +enum wb_resize_en { + WB_RESIZE_EN_IDLE = 0, + WB_RESIZE_EN_DECREASE = 1, + WB_RESIZE_EN_INCREASE = 2, +}; + +/* bWriteBoosterBufferResizeHint attribute */ +enum wb_resize_hint { + WB_RESIZE_HINT_KEEP = 0, + WB_RESIZE_HINT_DECREASE = 1, + WB_RESIZE_HINT_INCREASE = 2, +}; + +/* bWriteBoosterBufferResizeStatus attribute */ +enum wb_resize_status { + WB_RESIZE_STATUS_IDLE = 0, + WB_RESIZE_STATUS_IN_PROGRESS = 1, + WB_RESIZE_STATUS_COMPLETE_SUCCESS = 2, + WB_RESIZE_STATUS_GENERAL_FAILURE = 3, +}; + /* Query response result code */ enum { QUERY_RESULT_SUCCESS = 0x00, @@ -578,6 +609,7 @@ struct ufs_dev_info { bool wb_buf_flush_enabled; u8 wb_dedicated_lu; u8 wb_buffer_type; + u16 ext_wb_sup; bool b_rpm_dev_flush_capable; u8 b_presrv_uspc_en; diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index ec999fa67124..a2ffdc0b1044 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1468,6 +1468,7 @@ int ufshcd_advanced_rpmb_req_handler(struct ufs_hba *hba, struct utp_upiu_req *r struct scatterlist *sg_list, enum dma_data_direction dir); int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable); int ufshcd_wb_toggle_buf_flush(struct ufs_hba *hba, bool enable); +int ufshcd_wb_set_resize_en(struct ufs_hba *hba, enum wb_resize_en en_mode); int ufshcd_suspend_prepare(struct device *dev); int __ufshcd_suspend_prepare(struct device *dev, bool rpm_ok_for_spm); void ufshcd_resume_complete(struct device *dev); -- cgit v1.2.3 From 2a63dd0edf388802074f1d4d6b588a3b4c380688 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 9 Apr 2025 19:36:45 -0700 Subject: net: Retire DCCP socket. DCCP was orphaned in 2021 by commit 054c4610bd05 ("MAINTAINERS: dccp: move Gerrit Renker to CREDITS"), which noted that the last maintainer had been inactive for five years. In recent years, it has become a playground for syzbot, and most changes to DCCP have been odd bug fixes triggered by syzbot. Apart from that, the only changes have been driven by treewide or networking API updates or adjustments related to TCP. Thus, in 2023, we announced we would remove DCCP in 2025 via commit b144fcaf46d4 ("dccp: Print deprecation notice."). Since then, only one individual has contacted the netdev mailing list. [0] There is ongoing research for Multipath DCCP. The repository is hosted on GitHub [1], and development is not taking place through the upstream community. While the repository is published under the GPLv2 license, the scheduling part remains proprietary, with a LICENSE file [2] stating: "This is not Open Source software." The researcher mentioned a plan to address the licensing issue, upstream the patches, and step up as a maintainer, but there has been no further communication since then. Maintaining DCCP for a decade without any real users has become a burden. Therefore, it's time to remove it. Removing DCCP will also provide significant benefits to TCP. It allows us to freely reorganize the layout of struct inet_connection_sock, which is currently shared with DCCP, and optimize it to reduce the number of cachelines accessed in the TCP fast path. Note that we keep DCCP netfilter modules as requested. [3] Link: https://lore.kernel.org/netdev/20230710182253.81446-1-kuniyu@amazon.com/T/#u #[0] Link: https://github.com/telekom/mp-dccp #[1] Link: https://github.com/telekom/mp-dccp/blob/mpdccp_v03_k5.10/net/dccp/non_gpl_scheduler/LICENSE #[2] Link: https://lore.kernel.org/netdev/Z_VQ0KlCRkqYWXa-@calendula/ #[3] Signed-off-by: Kuniyuki Iwashima Acked-by: Paul Moore (LSM and SELinux) Acked-by: Casey Schaufler Link: https://patch.msgid.link/20250410023921.11307-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/linux/dccp.h | 289 ------------------------------------------ include/linux/tfrc.h | 51 -------- include/net/inet_hashtables.h | 6 +- include/net/rstreason.h | 2 +- include/net/secure_seq.h | 4 - include/trace/events/sock.h | 1 - include/trace/events/sunrpc.h | 2 - 7 files changed, 2 insertions(+), 353 deletions(-) delete mode 100644 include/linux/tfrc.h (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 325af611909f..0b61b8b996d4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -2,79 +2,8 @@ #ifndef _LINUX_DCCP_H #define _LINUX_DCCP_H - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include #include -enum dccp_state { - DCCP_OPEN = TCP_ESTABLISHED, - DCCP_REQUESTING = TCP_SYN_SENT, - DCCP_LISTEN = TCP_LISTEN, - DCCP_RESPOND = TCP_SYN_RECV, - /* - * States involved in closing a DCCP connection: - * 1) ACTIVE_CLOSEREQ is entered by a server sending a CloseReq. - * - * 2) CLOSING can have three different meanings (RFC 4340, 8.3): - * a. Client has performed active-close, has sent a Close to the server - * from state OPEN or PARTOPEN, and is waiting for the final Reset - * (in this case, SOCK_DONE == 1). - * b. Client is asked to perform passive-close, by receiving a CloseReq - * in (PART)OPEN state. It sends a Close and waits for final Reset - * (in this case, SOCK_DONE == 0). - * c. Server performs an active-close as in (a), keeps TIMEWAIT state. - * - * 3) The following intermediate states are employed to give passively - * closing nodes a chance to process their unread data: - * - PASSIVE_CLOSE (from OPEN => CLOSED) and - * - PASSIVE_CLOSEREQ (from (PART)OPEN to CLOSING; case (b) above). - */ - DCCP_ACTIVE_CLOSEREQ = TCP_FIN_WAIT1, - DCCP_PASSIVE_CLOSE = TCP_CLOSE_WAIT, /* any node receiving a Close */ - DCCP_CLOSING = TCP_CLOSING, - DCCP_TIME_WAIT = TCP_TIME_WAIT, - DCCP_CLOSED = TCP_CLOSE, - DCCP_NEW_SYN_RECV = TCP_NEW_SYN_RECV, - DCCP_PARTOPEN = TCP_MAX_STATES, - DCCP_PASSIVE_CLOSEREQ, /* clients receiving CloseReq */ - DCCP_MAX_STATES -}; - -enum { - DCCPF_OPEN = TCPF_ESTABLISHED, - DCCPF_REQUESTING = TCPF_SYN_SENT, - DCCPF_LISTEN = TCPF_LISTEN, - DCCPF_RESPOND = TCPF_SYN_RECV, - DCCPF_ACTIVE_CLOSEREQ = TCPF_FIN_WAIT1, - DCCPF_CLOSING = TCPF_CLOSING, - DCCPF_TIME_WAIT = TCPF_TIME_WAIT, - DCCPF_CLOSED = TCPF_CLOSE, - DCCPF_NEW_SYN_RECV = TCPF_NEW_SYN_RECV, - DCCPF_PARTOPEN = (1 << DCCP_PARTOPEN), -}; - -static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) -{ - return (struct dccp_hdr *)skb_transport_header(skb); -} - -static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen) -{ - skb_push(skb, headlen); - skb_reset_transport_header(skb); - return memset(skb_transport_header(skb), 0, headlen); -} - static inline struct dccp_hdr_ext *dccp_hdrx(const struct dccp_hdr *dh) { return (struct dccp_hdr_ext *)((unsigned char *)dh + sizeof(*dh)); @@ -85,12 +14,6 @@ static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh) return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); } -static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - return __dccp_basic_hdr_len(dh); -} - static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh) { __u64 seq_nr = ntohs(dh->dccph_seq); @@ -103,222 +26,10 @@ static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh) return seq_nr; } -static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) -{ - return (struct dccp_hdr_request *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - -static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) -{ - return (struct dccp_hdr_ack_bits *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - -static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) -{ - const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); - return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + ntohl(dhack->dccph_ack_nr_low); -} - -static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) -{ - return (struct dccp_hdr_response *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - -static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) -{ - return (struct dccp_hdr_reset *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh) { return __dccp_basic_hdr_len(dh) + dccp_packet_hdr_len(dh->dccph_type); } -static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) -{ - return __dccp_hdr_len(dccp_hdr(skb)); -} - -/** - * struct dccp_request_sock - represent DCCP-specific connection request - * @dreq_inet_rsk: structure inherited from - * @dreq_iss: initial sequence number, sent on the first Response (RFC 4340, 7.1) - * @dreq_gss: greatest sequence number sent (for retransmitted Responses) - * @dreq_isr: initial sequence number received in the first Request - * @dreq_gsr: greatest sequence number received (for retransmitted Request(s)) - * @dreq_service: service code present on the Request (there is just one) - * @dreq_featneg: feature negotiation options for this connection - * The following two fields are analogous to the ones in dccp_sock: - * @dreq_timestamp_echo: last received timestamp to echo (13.1) - * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo - */ -struct dccp_request_sock { - struct inet_request_sock dreq_inet_rsk; - __u64 dreq_iss; - __u64 dreq_gss; - __u64 dreq_isr; - __u64 dreq_gsr; - __be32 dreq_service; - spinlock_t dreq_lock; - struct list_head dreq_featneg; - __u32 dreq_timestamp_echo; - __u32 dreq_timestamp_time; -}; - -static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) -{ - return (struct dccp_request_sock *)req; -} - -extern struct inet_timewait_death_row dccp_death_row; - -extern int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, - struct sk_buff *skb); - -struct dccp_options_received { - u64 dccpor_ndp:48; - u32 dccpor_timestamp; - u32 dccpor_timestamp_echo; - u32 dccpor_elapsed_time; -}; - -struct ccid; - -enum dccp_role { - DCCP_ROLE_UNDEFINED, - DCCP_ROLE_LISTEN, - DCCP_ROLE_CLIENT, - DCCP_ROLE_SERVER, -}; - -struct dccp_service_list { - __u32 dccpsl_nr; - __be32 dccpsl_list[]; -}; - -#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) -#define DCCP_SERVICE_CODE_IS_ABSENT 0 - -static inline bool dccp_list_has_service(const struct dccp_service_list *sl, - const __be32 service) -{ - if (likely(sl != NULL)) { - u32 i = sl->dccpsl_nr; - while (i--) - if (sl->dccpsl_list[i] == service) - return true; - } - return false; -} - -struct dccp_ackvec; - -/** - * struct dccp_sock - DCCP socket state - * - * @dccps_swl - sequence number window low - * @dccps_swh - sequence number window high - * @dccps_awl - acknowledgement number window low - * @dccps_awh - acknowledgement number window high - * @dccps_iss - initial sequence number sent - * @dccps_isr - initial sequence number received - * @dccps_osr - first OPEN sequence number received - * @dccps_gss - greatest sequence number sent - * @dccps_gsr - greatest valid sequence number received - * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss - * @dccps_service - first (passive sock) or unique (active sock) service code - * @dccps_service_list - second .. last service code on passive socket - * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option - * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo - * @dccps_l_ack_ratio - feature-local Ack Ratio - * @dccps_r_ack_ratio - feature-remote Ack Ratio - * @dccps_l_seq_win - local Sequence Window (influences ack number validity) - * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) - * @dccps_pcslen - sender partial checksum coverage (via sockopt) - * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) - * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) - * @dccps_ndp_count - number of Non Data Packets since last data packet - * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) - * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) - * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) - * @dccps_hc_rx_ackvec - rx half connection ack vector - * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) - * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) - * @dccps_options_received - parsed set of retrieved options - * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy - * @dccps_tx_qlen - maximum length of the TX queue - * @dccps_role - role of this sock, one of %dccp_role - * @dccps_hc_rx_insert_options - receiver wants to add options when acking - * @dccps_hc_tx_insert_options - sender wants to add options when sending - * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) - * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" - * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets - * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) - * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) - */ -struct dccp_sock { - /* inet_connection_sock has to be the first member of dccp_sock */ - struct inet_connection_sock dccps_inet_connection; -#define dccps_syn_rtt dccps_inet_connection.icsk_ack.lrcvtime - __u64 dccps_swl; - __u64 dccps_swh; - __u64 dccps_awl; - __u64 dccps_awh; - __u64 dccps_iss; - __u64 dccps_isr; - __u64 dccps_osr; - __u64 dccps_gss; - __u64 dccps_gsr; - __u64 dccps_gar; - __be32 dccps_service; - __u32 dccps_mss_cache; - struct dccp_service_list *dccps_service_list; - __u32 dccps_timestamp_echo; - __u32 dccps_timestamp_time; - __u16 dccps_l_ack_ratio; - __u16 dccps_r_ack_ratio; - __u64 dccps_l_seq_win:48; - __u64 dccps_r_seq_win:48; - __u8 dccps_pcslen:4; - __u8 dccps_pcrlen:4; - __u8 dccps_send_ndp_count:1; - __u64 dccps_ndp_count:48; - unsigned long dccps_rate_last; - struct list_head dccps_featneg; - struct dccp_ackvec *dccps_hc_rx_ackvec; - struct ccid *dccps_hc_rx_ccid; - struct ccid *dccps_hc_tx_ccid; - struct dccp_options_received dccps_options_received; - __u8 dccps_qpolicy; - __u32 dccps_tx_qlen; - enum dccp_role dccps_role:2; - __u8 dccps_hc_rx_insert_options:1; - __u8 dccps_hc_tx_insert_options:1; - __u8 dccps_server_timewait:1; - __u8 dccps_sync_scheduled:1; - struct tasklet_struct dccps_xmitlet; - struct timer_list dccps_xmit_timer; -}; - -#define dccp_sk(ptr) container_of_const(ptr, struct dccp_sock, \ - dccps_inet_connection.icsk_inet.sk) - -static inline const char *dccp_role(const struct sock *sk) -{ - switch (dccp_sk(sk)->dccps_role) { - case DCCP_ROLE_UNDEFINED: return "undefined"; - case DCCP_ROLE_LISTEN: return "listen"; - case DCCP_ROLE_SERVER: return "server"; - case DCCP_ROLE_CLIENT: return "client"; - } - return NULL; -} - -extern void dccp_syn_ack_timeout(const struct request_sock *req); - #endif /* _LINUX_DCCP_H */ diff --git a/include/linux/tfrc.h b/include/linux/tfrc.h deleted file mode 100644 index a5acc768085d..000000000000 --- a/include/linux/tfrc.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _LINUX_TFRC_H_ -#define _LINUX_TFRC_H_ -/* - * TFRC - Data Structures for the TCP-Friendly Rate Control congestion - * control mechanism as specified in RFC 3448. - * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald - * Copyright (c) 2005 Arnaldo Carvalho de Melo - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - */ -#include - -/** tfrc_rx_info - TFRC Receiver Data Structure - * - * @tfrcrx_x_recv: receiver estimate of sending rate (3.2.2) - * @tfrcrx_rtt: round-trip-time (communicated by sender) - * @tfrcrx_p: current estimate of loss event rate (3.2.2) - */ -struct tfrc_rx_info { - __u32 tfrcrx_x_recv; - __u32 tfrcrx_rtt; - __u32 tfrcrx_p; -}; - -/** tfrc_tx_info - TFRC Sender Data Structure - * - * @tfrctx_x: computed transmit rate (4.3 (4)) - * @tfrctx_x_recv: receiver estimate of send rate (4.3) - * @tfrctx_x_calc: return value of throughput equation (3.1) - * @tfrctx_rtt: (moving average) estimate of RTT (4.3) - * @tfrctx_p: current loss event rate (5.4) - * @tfrctx_rto: estimate of RTO, equals 4*RTT (4.3) - * @tfrctx_ipi: inter-packet interval (4.6) - * - * Note: X and X_recv are both maintained in units of 64 * bytes/second. This - * enables a finer resolution of sending rates and avoids problems with - * integer arithmetic; u32 is not sufficient as scaling consumes 6 bits. - */ -struct tfrc_tx_info { - __u64 tfrctx_x; - __u64 tfrctx_x_recv; - __u32 tfrctx_x_calc; - __u32 tfrctx_rtt; - __u32 tfrctx_p; - __u32 tfrctx_rto; - __u32 tfrctx_ipi; -}; - -#endif /* _LINUX_TFRC_H_ */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 949641e92539..d172b64a6320 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -177,12 +177,8 @@ struct inet_hashinfo { static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk) { -#if IS_ENABLED(CONFIG_IP_DCCP) - return sk->sk_prot->h.hashinfo ? : - sock_net(sk)->ipv4.tcp_death_row.hashinfo; -#else + /* TODO: rename function */ return sock_net(sk)->ipv4.tcp_death_row.hashinfo; -#endif } static inline struct inet_listen_hashbucket * diff --git a/include/net/rstreason.h b/include/net/rstreason.h index 69cb2e52b7da..979ac87b5d99 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -36,7 +36,7 @@ /** * enum sk_rst_reason - the reasons of socket reset * - * The reasons of sk reset, which are used in DCCP/TCP/MPTCP protocols. + * The reasons of sk reset, which are used in TCP/MPTCP protocols. * * There are three parts in order: * 1) skb drop reasons: relying on drop reasons for such as passive reset diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index 21e7fa2a1813..cddebafb9f77 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -16,9 +16,5 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport); u32 secure_tcpv6_ts_off(const struct net *net, const __be32 *saddr, const __be32 *daddr); -u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport); #endif /* _NET_SECURE_SEQ */ diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 3836de435d9d..b5310439536e 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -19,7 +19,6 @@ /* The protocol traced by inet_sock_set_state */ #define inet_protocol_names \ EM(IPPROTO_TCP) \ - EM(IPPROTO_DCCP) \ EM(IPPROTO_SCTP) \ EMe(IPPROTO_MPTCP) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5d331383047b..de214f1dea58 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -21,7 +21,6 @@ TRACE_DEFINE_ENUM(SOCK_DGRAM); TRACE_DEFINE_ENUM(SOCK_RAW); TRACE_DEFINE_ENUM(SOCK_RDM); TRACE_DEFINE_ENUM(SOCK_SEQPACKET); -TRACE_DEFINE_ENUM(SOCK_DCCP); TRACE_DEFINE_ENUM(SOCK_PACKET); #define show_socket_type(type) \ @@ -31,7 +30,6 @@ TRACE_DEFINE_ENUM(SOCK_PACKET); { SOCK_RAW, "RAW" }, \ { SOCK_RDM, "RDM" }, \ { SOCK_SEQPACKET, "SEQPACKET" }, \ - { SOCK_DCCP, "DCCP" }, \ { SOCK_PACKET, "PACKET" }) /* This list is known to be incomplete, add new enums as needed. */ -- cgit v1.2.3 From 22d6c9eebf2e68e6ab831ded37daaa83daff6bb8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 9 Apr 2025 19:36:46 -0700 Subject: net: Unexport shared functions for DCCP. DCCP was removed, so many inet functions no longer need to be exported. Let's unexport or use EXPORT_IPV6_MOD() for such functions. sk_free_unlock_clone() is inlined in sk_clone_lock() as it's the only caller. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250410023921.11307-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 694f954258d4..bb4d6189292f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1781,7 +1781,6 @@ void sk_free(struct sock *sk); void sk_net_refcnt_upgrade(struct sock *sk); void sk_destruct(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); -void sk_free_unlock_clone(struct sock *sk); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority); -- cgit v1.2.3 From 235bd9d21fcdf07dd125daa3e60ab64f8aefb927 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 9 Apr 2025 19:36:47 -0700 Subject: tcp: Rename tcp_or_dccp_get_hashinfo(). DCCP was removed, so tcp_or_dccp_get_hashinfo() should be renamed. Let's rename it to tcp_get_hashinfo(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250410023921.11307-5-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index d172b64a6320..4564b5d348b1 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -175,9 +175,8 @@ struct inet_hashinfo { bool pernet; } ____cacheline_aligned_in_smp; -static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk) +static inline struct inet_hashinfo *tcp_get_hashinfo(const struct sock *sk) { - /* TODO: rename function */ return sock_net(sk)->ipv4.tcp_death_row.hashinfo; } -- cgit v1.2.3 From 88113e09ada52be28968aacf4af7b3d667832f00 Mon Sep 17 00:00:00 2001 From: Mukesh Kumar Savaliya Date: Fri, 4 Apr 2025 19:24:27 +0530 Subject: spi: Add support for Double Transfer Rate (DTR) mode Introduce support for protocol drivers to specify whether a transfer should use single or dual transfer mode. Currently, the SPI controller cannot determine this information from the user, leading to potential limitations in transfer capabilities. Add a new field `dtr_mode` in the `spi_transfer` structure. The `dtr_mode` field allows protocol drivers to indicate if Double Transfer Rate (DTR) mode is supported for a given transfer. When `dtr_mode` is set to true, the SPI controller will use DTR mode; otherwise, it will default to single transfer mode. Introduce another field `dtr_caps` to indicate if the QSPI controller is capable of supporting DTR mode (SDR and DDR). By default, both `dtr_caps` and `dtr_mode` will be false. These flags manage the QSPI controller's DTR mode capabilities within the SPI framework. The QSPI controller driver uses these flags to configure single or double transfer rates using the controller register. The existing spi-mem driver helps configure the DTR mode but is limited to memory devices. There is no support available to set DTR mode for non-memory devices, e.g., touch or any generic SPI sensor. This change is backward compatible and doesn't break existing SPI or QSPI drivers. Changes include: - Addition of `dtr_mode` and `dtr_caps` fields in the `spi_transfer` structure. - Documentation updates to reflect the new `dtr_mode` and `dtr_caps` fields. Signed-off-by: Mukesh Kumar Savaliya Link: https://patch.msgid.link/20250404135427.313825-1-quic_msavaliy@quicinc.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e10f0ebb8250..834a09bd8ccc 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -508,6 +508,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * found. * @put_offload: release the offload instance acquired by @get_offload. * @mem_caps: controller capabilities for the handling of memory operations. + * @dtr_caps: true if controller has dtr(single/dual transfer rate) capability. + * QSPI based controller should fill this based on controller's capability. * @unprepare_message: undo any work done by prepare_message(). * @target_abort: abort the ongoing transfer request on an SPI target controller * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS @@ -751,6 +753,9 @@ struct spi_controller { const struct spi_controller_mem_ops *mem_ops; const struct spi_controller_mem_caps *mem_caps; + /* SPI or QSPI controller can set to true if supports SDR/DDR transfer rate */ + bool dtr_caps; + struct spi_offload *(*get_offload)(struct spi_device *spi, const struct spi_offload_config *config); void (*put_offload)(struct spi_offload *offload); @@ -1003,6 +1008,7 @@ struct spi_res { * processed the word, i.e. the "pre" timestamp should be taken before * transmitting the "pre" word, and the "post" timestamp after receiving * transmit confirmation from the controller for the "post" word. + * @dtr_mode: true if supports double transfer rate. * @timestamped: true if the transfer has been timestamped * @error: Error status logged by SPI controller driver. * @@ -1054,6 +1060,9 @@ struct spi_res { * two should both be set. User can set transfer mode with SPI_NBITS_SINGLE(1x) * SPI_NBITS_DUAL(2x) and SPI_NBITS_QUAD(4x) to support these three transfer. * + * User may also set dtr_mode to true to use dual transfer mode if desired. if + * not, default considered as single transfer mode. + * * The code that submits an spi_message (and its spi_transfers) * to the lower layers is responsible for managing its memory. * Zero-initialize every field you don't set up explicitly, to @@ -1088,6 +1097,7 @@ struct spi_transfer { unsigned tx_nbits:4; unsigned rx_nbits:4; unsigned timestamped:1; + bool dtr_mode; #define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ #define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ #define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ -- cgit v1.2.3 From 7cfe1e208b86c7fd4b35a8a502a8b15604eec1e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 3 Apr 2025 17:11:32 +0200 Subject: pwm: Make chip parameter to pwmchip_get_drvdata() a const pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dev_get_drvdata()'s parameter is a const pointer, so the chip passed to pwmchip_get_drvdata() can be const, too. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20250403151134.266388-2-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 9ece4e5d3815..bf0469b2201d 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -369,7 +369,7 @@ static inline struct device *pwmchip_parent(const struct pwm_chip *chip) return chip->dev.parent; } -static inline void *pwmchip_get_drvdata(struct pwm_chip *chip) +static inline void *pwmchip_get_drvdata(const struct pwm_chip *chip) { return dev_get_drvdata(&chip->dev); } -- cgit v1.2.3 From cb7ca40a3882360ce87191793449d48df0b29184 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Apr 2025 23:11:22 +0200 Subject: x86/fpu: Make task_struct::thread constant size Turn thread.fpu into a pointer. Since most FPU code internals work by passing around the FPU pointer already, the code generation impact is small. This allows us to remove the old kludge of task_struct being variable size: struct task_struct { ... /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. */ randomized_struct_fields_end /* CPU-specific state of this task: */ struct thread_struct thread; /* * WARNING: on x86, 'thread_struct' contains a variable-sized * structure. It *MUST* be at the end of 'task_struct'. * * Do not put anything below here! */ }; ... which creates a number of problems, such as requiring thread_struct to be the last member of the struct - not allowing it to be struct-randomized, etc. But the primary motivation is to allow the decoupling of task_struct from hardware details ( in particular), and to eventually allow the per-task infrastructure: DECLARE_PER_TASK(type, name); ... per_task(current, name) = val; ... which requires task_struct to be a constant size struct. The fpu_thread_struct_whitelist() quirk to hardened usercopy can be removed, now that the FPU structure is not embedded in the task struct anymore, which reduces text footprint a bit. Fixed-by: Oleg Nesterov Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Chang S. Bae Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250409211127.3544993-4-mingo@kernel.org --- include/linux/sched.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f96ac1982893..4ecc0c6b1cb0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1646,22 +1646,15 @@ struct task_struct { struct user_event_mm *user_event_mm; #endif - /* - * New fields for task_struct should be added above here, so that - * they are included in the randomized portion of task_struct. - */ - randomized_struct_fields_end - /* CPU-specific state of this task: */ struct thread_struct thread; /* - * WARNING: on x86, 'thread_struct' contains a variable-sized - * structure. It *MUST* be at the end of 'task_struct'. - * - * Do not put anything below here! + * New fields for task_struct should be added above here, so that + * they are included in the randomized portion of task_struct. */ -}; + randomized_struct_fields_end +} __attribute__ ((aligned (64))); #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) -- cgit v1.2.3 From c04269c02273b24398590398c0b73605c72f17ac Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 7 Apr 2025 20:16:22 +0100 Subject: dt-bindings: clock: renesas: Document RZ/V2N SoC CPG Document the device tree bindings for the Renesas RZ/V2N (R9A09G056) SoC Clock Pulse Generator (CPG). Update `renesas,rzv2h-cpg.yaml` to include the compatible string for RZ/V2N SoC and adjust the title and description accordingly. Additionally, introduce `renesas,r9a09g056-cpg.h` to define core clock constants for the RZ/V2N SoC. Note the existing RZ/V2H(P) family-specific clock driver will be reused for this SoC. Signed-off-by: Lad Prabhakar Acked-by: Rob Herring (Arm) Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250407191628.323613-7-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a09g056-cpg.h | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/dt-bindings/clock/renesas,r9a09g056-cpg.h (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g056-cpg.h b/include/dt-bindings/clock/renesas,r9a09g056-cpg.h new file mode 100644 index 000000000000..f4905b27f8d9 --- /dev/null +++ b/include/dt-bindings/clock/renesas,r9a09g056-cpg.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + * + * Copyright (C) 2025 Renesas Electronics Corp. + */ +#ifndef __DT_BINDINGS_CLOCK_RENESAS_R9A09G056_CPG_H__ +#define __DT_BINDINGS_CLOCK_RENESAS_R9A09G056_CPG_H__ + +#include + +/* Core Clock list */ +#define R9A09G056_SYS_0_PCLK 0 +#define R9A09G056_CA55_0_CORE_CLK0 1 +#define R9A09G056_CA55_0_CORE_CLK1 2 +#define R9A09G056_CA55_0_CORE_CLK2 3 +#define R9A09G056_CA55_0_CORE_CLK3 4 +#define R9A09G056_CA55_0_PERIPHCLK 5 +#define R9A09G056_CM33_CLK0 6 +#define R9A09G056_CST_0_SWCLKTCK 7 +#define R9A09G056_IOTOP_0_SHCLK 8 +#define R9A09G056_USB2_0_CLK_CORE0 9 +#define R9A09G056_GBETH_0_CLK_PTP_REF_I 10 +#define R9A09G056_GBETH_1_CLK_PTP_REF_I 11 + +#endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G056_CPG_H__ */ -- cgit v1.2.3 From 34180863e000f325e5d11a2fd4af300a0ae50f71 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 8 Apr 2025 16:44:27 +0800 Subject: firmware: arm_scmi: imx: Add i.MX95 LMM protocol Add Logical Machine Management(LMM) protocol which is intended for boot, shutdown, and reset of other logical machines (LM). It is usually used to allow one LM to manager another used as an offload or accelerator engine. Reviewed-by: Cristian Marussi Signed-off-by: Peng Fan Message-Id: <20250408-imx-lmm-cpu-v4-3-4c5f4a456e49@nxp.com> Signed-off-by: Sudeep Holla --- include/linux/scmi_imx_protocol.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h index 53b356a26414..2a96fc29cb6f 100644 --- a/include/linux/scmi_imx_protocol.h +++ b/include/linux/scmi_imx_protocol.h @@ -11,8 +11,10 @@ #include #include #include +#include #include +#define SCMI_PROTOCOL_IMX_LMM 0x80 #define SCMI_PROTOCOL_IMX_BBM 0x81 #define SCMI_PROTOCOL_IMX_MISC 0x84 @@ -57,4 +59,34 @@ struct scmi_imx_misc_proto_ops { int (*misc_ctrl_req_notify)(const struct scmi_protocol_handle *ph, u32 ctrl_id, u32 evt_id, u32 flags); }; + +/* See LMM_ATTRIBUTES in imx95.rst */ +#define LMM_ID_DISCOVER 0xFFFFFFFFU +#define LMM_MAX_NAME 16 + +enum scmi_imx_lmm_state { + LMM_STATE_LM_OFF, + LMM_STATE_LM_ON, + LMM_STATE_LM_SUSPEND, + LMM_STATE_LM_POWERED, +}; + +struct scmi_imx_lmm_info { + u32 lmid; + enum scmi_imx_lmm_state state; + u32 errstatus; + u8 name[LMM_MAX_NAME]; +}; + +struct scmi_imx_lmm_proto_ops { + int (*lmm_power_boot)(const struct scmi_protocol_handle *ph, u32 lmid, + bool boot); + int (*lmm_info)(const struct scmi_protocol_handle *ph, u32 lmid, + struct scmi_imx_lmm_info *info); + int (*lmm_reset_vector_set)(const struct scmi_protocol_handle *ph, + u32 lmid, u32 cpuid, u32 flags, u64 vector); + int (*lmm_shutdown)(const struct scmi_protocol_handle *ph, u32 lmid, + u32 flags); +}; + #endif -- cgit v1.2.3 From e68c305bc2f3cdc0b6fa21cd9579650c1457c070 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 8 Apr 2025 16:44:28 +0800 Subject: firmware: arm_scmi: imx: Add i.MX95 CPU Protocol This protocol allows an agent to start, stop a CPU or set reset vector. It is used to manage auxiliary CPUs in an LM (e.g. additional cores in an AP cluster). Signed-off-by: Peng Fan Message-Id: <20250408-imx-lmm-cpu-v4-4-4c5f4a456e49@nxp.com> Signed-off-by: Sudeep Holla --- include/linux/scmi_imx_protocol.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h index 2a96fc29cb6f..27bd372cbfb1 100644 --- a/include/linux/scmi_imx_protocol.h +++ b/include/linux/scmi_imx_protocol.h @@ -16,6 +16,7 @@ #define SCMI_PROTOCOL_IMX_LMM 0x80 #define SCMI_PROTOCOL_IMX_BBM 0x81 +#define SCMI_PROTOCOL_IMX_CPU 0x82 #define SCMI_PROTOCOL_IMX_MISC 0x84 #define SCMI_IMX_VENDOR "NXP" @@ -89,4 +90,13 @@ struct scmi_imx_lmm_proto_ops { u32 flags); }; +struct scmi_imx_cpu_proto_ops { + int (*cpu_reset_vector_set)(const struct scmi_protocol_handle *ph, + u32 cpuid, u64 vector, bool start, + bool boot, bool resume); + int (*cpu_start)(const struct scmi_protocol_handle *ph, u32 cpuid, + bool start); + int (*cpu_started)(const struct scmi_protocol_handle *ph, u32 cpuid, + bool *started); +}; #endif -- cgit v1.2.3 From 028363685bd0b7a19b4a820f82dd905b1dc83999 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 9 Apr 2025 15:59:57 +0200 Subject: espintcp: remove encap socket caching to avoid reference leak The current scheme for caching the encap socket can lead to reference leaks when we try to delete the netns. The reference chain is: xfrm_state -> enacp_sk -> netns Since the encap socket is a userspace socket, it holds a reference on the netns. If we delete the espintcp state (through flush or individual delete) before removing the netns, the reference on the socket is dropped and the netns is correctly deleted. Otherwise, the netns may not be reachable anymore (if all processes within the ns have terminated), so we cannot delete the xfrm state to drop its reference on the socket. This patch results in a small (~2% in my tests) performance regression. A GC-type mechanism could be added for the socket cache, to clear references if the state hasn't been used "recently", but it's a lot more complex than just not caching the socket. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 39365fd2ea17..06ab2a3d2ebd 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -236,7 +236,6 @@ struct xfrm_state { /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; - struct sock __rcu *encap_sk; /* NAT keepalive */ u32 nat_keepalive_interval; /* seconds */ -- cgit v1.2.3 From d1e70eed0b30bd2b15fc6c93b5701be564bbe353 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 14 Apr 2025 14:32:31 +0800 Subject: ASoC: soc-acpi: add get_function_tplg_files ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We always use a single topology that contains all PCM devices belonging to a machine configuration. However, with SDCA, we want to be able to load function topologies based on the supported device functions. This change is in preparation for loading those function topologies. Signed-off-by: Bard Liao Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Link: https://patch.msgid.link/20250414063239.85200-4-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index 72e371a21767..b8af309c2683 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -10,6 +10,7 @@ #include #include #include +#include struct snd_soc_acpi_package_context { char *name; /* package name */ @@ -193,6 +194,15 @@ struct snd_soc_acpi_link_adr { * is not constant since this field may be updated at run-time * @sof_tplg_filename: Sound Open Firmware topology file name, if enabled * @tplg_quirk_mask: quirks to select different topology files dynamically + * @get_function_tplg_files: This is an optional callback, if specified then instead of + * the single sof_tplg_filename the callback will return the list of function topology + * files to be loaded. + * Return value: The number of the files or negative ERRNO. 0 means that the single topology + * file should be used, no function topology split can be used on the machine. + * @card: the pointer of the card + * @mach: the pointer of the machine driver + * @prefix: the prefix of the topology file name. Typically, it is the path. + * @tplg_files: the pointer of the array of the topology file names. */ /* Descriptor for SST ASoC machine driver */ struct snd_soc_acpi_mach { @@ -212,6 +222,9 @@ struct snd_soc_acpi_mach { struct snd_soc_acpi_mach_params mach_params; const char *sof_tplg_filename; const u32 tplg_quirk_mask; + int (*get_function_tplg_files)(struct snd_soc_card *card, + const struct snd_soc_acpi_mach *mach, + const char *prefix, const char ***tplg_files); }; #define SND_SOC_ACPI_MAX_CODECS 3 -- cgit v1.2.3 From 7242bbf418f0521c0e3dd7034b7b3cc4413d7d5a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 8 Apr 2025 16:44:29 +0800 Subject: firmware: imx: Add i.MX95 SCMI LMM driver The i.MX95 System manager exports SCMI LMM protocol for linux to manage Logical Machines. The driver is to use the LMM Protocol interface to boot, shutdown a LM. Reviewed-by: Cristian Marussi Signed-off-by: Peng Fan Message-Id: <20250408-imx-lmm-cpu-v4-5-4c5f4a456e49@nxp.com> Signed-off-by: Sudeep Holla --- include/linux/firmware/imx/sm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h index 9b85a3f028d1..bc27b04afb2f 100644 --- a/include/linux/firmware/imx/sm.h +++ b/include/linux/firmware/imx/sm.h @@ -8,6 +8,7 @@ #include #include +#include #include #define SCMI_IMX_CTRL_PDM_CLK_SEL 0 /* AON PDM clock sel */ @@ -20,4 +21,17 @@ int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val); int scmi_imx_misc_ctrl_set(u32 id, u32 val); +enum scmi_imx_lmm_op { + SCMI_IMX_LMM_BOOT, + SCMI_IMX_LMM_POWER_ON, + SCMI_IMX_LMM_SHUTDOWN, +}; + +/* For shutdown pperation */ +#define SCMI_IMX_LMM_OP_FORCEFUL 0 +#define SCMI_IMX_LMM_OP_GRACEFUL BIT(0) + +int scmi_imx_lmm_operation(u32 lmid, enum scmi_imx_lmm_op op, u32 flags); +int scmi_imx_lmm_info(u32 lmid, struct scmi_imx_lmm_info *info); +int scmi_imx_lmm_reset_vector_set(u32 lmid, u32 cpuid, u32 flags, u64 vector); #endif -- cgit v1.2.3 From 1055faa5d6606cccc706e37956f6ff8fb7c22d55 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 8 Apr 2025 16:44:30 +0800 Subject: firmware: imx: Add i.MX95 SCMI CPU driver The i.MX95 System manager exports SCMI CPU protocol for linux to manage cpu cores. The driver is to use the cpu Protocol interface to start, stop a cpu cores (eg, M7). Reviewed-by: Cristian Marussi Signed-off-by: Peng Fan Message-Id: <20250408-imx-lmm-cpu-v4-6-4c5f4a456e49@nxp.com> Signed-off-by: Sudeep Holla --- include/linux/firmware/imx/sm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h index bc27b04afb2f..a8a17eeb7d90 100644 --- a/include/linux/firmware/imx/sm.h +++ b/include/linux/firmware/imx/sm.h @@ -21,6 +21,11 @@ int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val); int scmi_imx_misc_ctrl_set(u32 id, u32 val); +int scmi_imx_cpu_start(u32 cpuid, bool start); +int scmi_imx_cpu_started(u32 cpuid, bool *started); +int scmi_imx_cpu_reset_vector_set(u32 cpuid, u64 vector, bool start, bool boot, + bool resume); + enum scmi_imx_lmm_op { SCMI_IMX_LMM_BOOT, SCMI_IMX_LMM_POWER_ON, -- cgit v1.2.3 From c26c192c3d486a2a7d83d254bae294c2f8f50abf Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Apr 2025 22:00:56 +0200 Subject: udp: properly deal with xfrm encap and ADDRFORM UDP GRO accounting assumes that the GRO receive callback is always set when the UDP tunnel is enabled, but syzkaller proved otherwise, leading tot the following splat: WARNING: CPU: 0 PID: 5837 at net/ipv4/udp_offload.c:123 udp_tunnel_update_gro_rcv+0x28d/0x4c0 net/ipv4/udp_offload.c:123 Modules linked in: CPU: 0 UID: 0 PID: 5837 Comm: syz-executor850 Not tainted 6.14.0-syzkaller-13320-g420aabef3ab5 #0 PREEMPT(full) Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 RIP: 0010:udp_tunnel_update_gro_rcv+0x28d/0x4c0 net/ipv4/udp_offload.c:123 Code: 00 00 e8 c6 5a 2f f7 48 c1 e5 04 48 8d b5 20 53 c7 9a ba 10 00 00 00 4c 89 ff e8 ce 87 99 f7 e9 ce 00 00 00 e8 a4 5a 2f f7 90 <0f> 0b 90 e9 de fd ff ff bf 01 00 00 00 89 ee e8 cf 5e 2f f7 85 ed RSP: 0018:ffffc90003effa88 EFLAGS: 00010293 RAX: ffffffff8a93fc9c RBX: 0000000000000000 RCX: ffff8880306f9e00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffffffff8a93fabe R09: 1ffffffff20bfb2e R10: dffffc0000000000 R11: fffffbfff20bfb2f R12: ffff88814ef21738 R13: dffffc0000000000 R14: ffff88814ef21778 R15: 1ffff11029de42ef FS: 0000000000000000(0000) GS:ffff888124f96000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f04eec760d0 CR3: 000000000eb38000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: udp_tunnel_cleanup_gro include/net/udp_tunnel.h:205 [inline] udpv6_destroy_sock+0x212/0x270 net/ipv6/udp.c:1829 sk_common_release+0x71/0x2e0 net/core/sock.c:3896 inet_release+0x17d/0x200 net/ipv4/af_inet.c:435 __sock_release net/socket.c:647 [inline] sock_close+0xbc/0x240 net/socket.c:1391 __fput+0x3e9/0x9f0 fs/file_table.c:465 task_work_run+0x251/0x310 kernel/task_work.c:227 exit_task_work include/linux/task_work.h:40 [inline] do_exit+0xa11/0x27f0 kernel/exit.c:953 do_group_exit+0x207/0x2c0 kernel/exit.c:1102 __do_sys_exit_group kernel/exit.c:1113 [inline] __se_sys_exit_group kernel/exit.c:1111 [inline] __x64_sys_exit_group+0x3f/0x40 kernel/exit.c:1111 x64_sys_call+0x26c3/0x26d0 arch/x86/include/generated/asm/syscalls_64.h:232 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f04eebfac79 Code: Unable to access opcode bytes at 0x7f04eebfac4f. RSP: 002b:00007fffdcaa34a8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f04eebfac79 RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000 RBP: 00007f04eec75270 R08: ffffffffffffffb8 R09: 00007fffdcaa36c8 R10: 0000200000000000 R11: 0000000000000246 R12: 00007f04eec75270 R13: 0000000000000000 R14: 00007f04eec75cc0 R15: 00007f04eebcca70 Address the issue moving the accounting hook into setup_udp_tunnel_sock() and set_xfrm_gro_udp_encap_rcv(), where the GRO callback is actually set. set_xfrm_gro_udp_encap_rcv() is prone to races with IPV6_ADDRFORM, run the relevant setsockopt under the socket lock to ensure using consistent values of sk_family and up->encap_type. Refactor the GRO callback selection code, to make it clear that the function pointer is always initialized. Reported-by: syzbot+8c469a2260132cd095c1@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8c469a2260132cd095c1 Fixes: 172bf009c18d ("xfrm: Support GRO for IPv4 ESP in UDP encapsulation") Fixes: 5d7f5b2f6b935 ("udp_tunnel: use static call for GRO hooks when possible") Signed-off-by: Paolo Abeni Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/92bcdb6899145a9a387c8fa9e3ca656642a43634.1744228733.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/net/udp_tunnel.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 288f06f23a80..2df3b8344eb5 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -215,7 +215,6 @@ static inline void udp_tunnel_encap_enable(struct sock *sk) if (READ_ONCE(sk->sk_family) == PF_INET6) ipv6_stub->udpv6_encap_enable(); #endif - udp_tunnel_update_gro_rcv(sk, true); udp_encap_enable(); } -- cgit v1.2.3 From 097f171f98289cf737437599c40b0d1e81266e9e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 9 Apr 2025 18:42:46 -0700 Subject: net: convert dev->rtnl_link_state to a bool netdevice reg_state was split into two 16 bit enums back in 2010 in commit a2835763e130 ("rtnetlink: handle rtnl_link netlink notifications manually"). Since the split the fields have been moved apart, and last year we converted reg_state to a normal u8 in commit 4d42b37def70 ("net: convert dev->reg_state to u8"). rtnl_link_state being a 16 bitfield makes no sense. Convert it to a single bool, it seems very unlikely after 15 years that we'll need more values in it. We could drop dev->rtnl_link_ops from the conditions but feels like having it there more clearly points at the reason for this hack. Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250410014246.780885-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8544f6a680c..e6036b82ef4c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1946,9 +1946,6 @@ enum netdev_reg_state { * * @reg_state: Register/unregister state machine * @dismantle: Device is going to be freed - * @rtnl_link_state: This enum represents the phases of creating - * a new link - * * @needs_free_netdev: Should unregister perform free_netdev? * @priv_destructor: Called from unregister * @npinfo: XXX: need comments on this one @@ -2363,11 +2360,8 @@ struct net_device { /** @moving_ns: device is changing netns, protected by @lock */ bool moving_ns; - - enum { - RTNL_LINK_INITIALIZED, - RTNL_LINK_INITIALIZING, - } rtnl_link_state:16; + /** @rtnl_link_initializing: Device being created, suppress events */ + bool rtnl_link_initializing; bool needs_free_netdev; void (*priv_destructor)(struct net_device *dev); -- cgit v1.2.3 From e846fb5e7c5243c65ff67247cb29a9d76bbcc4e8 Mon Sep 17 00:00:00 2001 From: Joseph Huang Date: Fri, 11 Apr 2025 11:03:16 -0400 Subject: net: bridge: mcast: Add offload failed mdb flag Add MDB_FLAGS_OFFLOAD_FAILED and MDB_PG_FLAGS_OFFLOAD_FAILED to indicate that an attempt to offload the MDB entry to switchdev has failed. Signed-off-by: Joseph Huang Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250411150323.1117797-2-Joseph.Huang@garmin.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index a5b743a2f775..f2a6de424f3f 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -699,10 +699,11 @@ struct br_mdb_entry { #define MDB_TEMPORARY 0 #define MDB_PERMANENT 1 __u8 state; -#define MDB_FLAGS_OFFLOAD (1 << 0) -#define MDB_FLAGS_FAST_LEAVE (1 << 1) -#define MDB_FLAGS_STAR_EXCL (1 << 2) -#define MDB_FLAGS_BLOCKED (1 << 3) +#define MDB_FLAGS_OFFLOAD (1 << 0) +#define MDB_FLAGS_FAST_LEAVE (1 << 1) +#define MDB_FLAGS_STAR_EXCL (1 << 2) +#define MDB_FLAGS_BLOCKED (1 << 3) +#define MDB_FLAGS_OFFLOAD_FAILED (1 << 4) __u8 flags; __u16 vid; struct { -- cgit v1.2.3 From 9fbe1e3e61c21508861a72324087aeeea85f796f Mon Sep 17 00:00:00 2001 From: Joseph Huang Date: Fri, 11 Apr 2025 11:03:17 -0400 Subject: net: bridge: Add offload_fail_notification bopt Add BR_BOOLOPT_MDB_OFFLOAD_FAIL_NOTIFICATION bool option. Signed-off-by: Joseph Huang Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250411150323.1117797-3-Joseph.Huang@garmin.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index f2a6de424f3f..73876c0e2bba 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -831,6 +831,7 @@ enum br_boolopt_id { BR_BOOLOPT_NO_LL_LEARN, BR_BOOLOPT_MCAST_VLAN_SNOOPING, BR_BOOLOPT_MST_ENABLE, + BR_BOOLOPT_MDB_OFFLOAD_FAIL_NOTIFICATION, BR_BOOLOPT_MAX }; -- cgit v1.2.3 From cd3c93167da0e760b5819246eae7a4ea30fd014b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Apr 2025 12:41:36 +0200 Subject: page_pool: Move pp_magic check into helper functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we are about to stash some more information into the pp_magic field, let's move the magic signature checks into a pair of helper functions so it can be changed in one place. Reviewed-by: Mina Almasry Tested-by: Yonglong Liu Acked-by: Jesper Dangaard Brouer Reviewed-by: Ilias Apalodimas Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250409-page-pool-track-dma-v9-1-6a9ef2e0cba8@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/mm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b7f13f087954..56c47f4a38ca 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4248,4 +4248,24 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); #define VM_SEALED_SYSMAP VM_NONE #endif +/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is + * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for + * the head page of compound page and bit 1 for pfmemalloc page. + * page_is_pfmemalloc() is checked in __page_pool_put_page() to avoid recycling + * the pfmemalloc page. + */ +#define PP_MAGIC_MASK ~0x3UL + +#ifdef CONFIG_PAGE_POOL +static inline bool page_pool_page_is_pp(struct page *page) +{ + return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; +} +#else +static inline bool page_pool_page_is_pp(struct page *page) +{ + return false; +} +#endif + #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From ee62ce7a1d909ccba0399680a03c2dee83bcae95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Apr 2025 12:41:37 +0200 Subject: page_pool: Track DMA-mapped pages and unmap them when destroying the pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When enabling DMA mapping in page_pool, pages are kept DMA mapped until they are released from the pool, to avoid the overhead of re-mapping the pages every time they are used. This causes resource leaks and/or crashes when there are pages still outstanding while the device is torn down, because page_pool will attempt an unmap through a non-existent DMA device on the subsequent page return. To fix this, implement a simple tracking of outstanding DMA-mapped pages in page pool using an xarray. This was first suggested by Mina[0], and turns out to be fairly straight forward: We simply store pointers to pages directly in the xarray with xa_alloc() when they are first DMA mapped, and remove them from the array on unmap. Then, when a page pool is torn down, it can simply walk the xarray and unmap all pages still present there before returning, which also allows us to get rid of the get/put_device() calls in page_pool. Using xa_cmpxchg(), no additional synchronisation is needed, as a page will only ever be unmapped once. To avoid having to walk the entire xarray on unmap to find the page reference, we stash the ID assigned by xa_alloc() into the page structure itself, using the upper bits of the pp_magic field. This requires a couple of defines to avoid conflicting with the POINTER_POISON_DELTA define, but this is all evaluated at compile-time, so does not affect run-time performance. The bitmap calculations in this patch gives the following number of bits for different architectures: - 23 bits on 32-bit architectures - 21 bits on PPC64 (because of the definition of ILLEGAL_POINTER_VALUE) - 32 bits on other 64-bit architectures Stashing a value into the unused bits of pp_magic does have the effect that it can make the value stored there lie outside the unmappable range (as governed by the mmap_min_addr sysctl), for architectures that don't define ILLEGAL_POINTER_VALUE. This means that if one of the pointers that is aliased to the pp_magic field (such as page->lru.next) is dereferenced while the page is owned by page_pool, that could lead to a dereference into userspace, which is a security concern. The risk of this is mitigated by the fact that (a) we always clear pp_magic before releasing a page from page_pool, and (b) this would need a use-after-free bug for struct page, which can have many other risks since page->lru.next is used as a generic list pointer in multiple places in the kernel. As such, with this patch we take the position that this risk is negligible in practice. For more discussion, see[1]. Since all the tracking added in this patch is performed on DMA map/unmap, no additional code is needed in the fast path, meaning the performance overhead of this tracking is negligible there. A micro-benchmark shows that the total overhead of the tracking itself is about 400 ns (39 cycles(tsc) 395.218 ns; sum for both map and unmap[2]). Since this cost is only paid on DMA map and unmap, it seems like an acceptable cost to fix the late unmap issue. Further optimisation can narrow the cases where this cost is paid (for instance by eliding the tracking when DMA map/unmap is a no-op). The extra memory needed to track the pages is neatly encapsulated inside xarray, which uses the 'struct xa_node' structure to track items. This structure is 576 bytes long, with slots for 64 items, meaning that a full node occurs only 9 bytes of overhead per slot it tracks (in practice, it probably won't be this efficient, but in any case it should be an acceptable overhead). [0] https://lore.kernel.org/all/CAHS8izPg7B5DwKfSuzz-iOop_YRbk3Sd6Y4rX7KBG9DcVJcyWg@mail.gmail.com/ [1] https://lore.kernel.org/r/20250320023202.GA25514@openwall.com [2] https://lore.kernel.org/r/ae07144c-9295-4c9d-a400-153bb689fe9e@huawei.com Reported-by: Yonglong Liu Closes: https://lore.kernel.org/r/8743264a-9700-4227-a556-5f931c720211@huawei.com Fixes: ff7d6b27f894 ("page_pool: refurbish version of page_pool code") Suggested-by: Mina Almasry Reviewed-by: Mina Almasry Reviewed-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Qiuling Ren Tested-by: Yuying Ma Tested-by: Yonglong Liu Acked-by: Jesper Dangaard Brouer Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250409-page-pool-track-dma-v9-2-6a9ef2e0cba8@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/mm.h | 46 +++++++++++++++++++++++++++++++++++++++---- include/linux/poison.h | 4 ++++ include/net/page_pool/types.h | 6 ++++++ 3 files changed, 52 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 56c47f4a38ca..130d3c9d2ee4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4248,13 +4248,51 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); #define VM_SEALED_SYSMAP VM_NONE #endif +/* + * DMA mapping IDs for page_pool + * + * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and + * stashes it in the upper bits of page->pp_magic. We always want to be able to + * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP + * pages can have arbitrary kernel pointers stored in the same field as pp_magic + * (since it overlaps with page->lru.next), so we must ensure that we cannot + * mistake a valid kernel pointer with any of the values we write into this + * field. + * + * On architectures that set POISON_POINTER_DELTA, this is already ensured, + * since this value becomes part of PP_SIGNATURE; meaning we can just use the + * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the + * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is + * 0, we make sure that we leave the two topmost bits empty, as that guarantees + * we won't mistake a valid kernel pointer for a value we set, regardless of the + * VMSPLIT setting. + * + * Altogether, this means that the number of bits available is constrained by + * the size of an unsigned long (at the upper end, subtracting two bits per the + * above), and the definition of PP_SIGNATURE (with or without + * POISON_POINTER_DELTA). + */ +#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) +#if POISON_POINTER_DELTA > 0 +/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA + * index to not overlap with that if set + */ +#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) +#else +/* Always leave out the topmost two; see above. */ +#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2) +#endif + +#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ + PP_DMA_INDEX_SHIFT) + /* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for - * the head page of compound page and bit 1 for pfmemalloc page. - * page_is_pfmemalloc() is checked in __page_pool_put_page() to avoid recycling - * the pfmemalloc page. + * the head page of compound page and bit 1 for pfmemalloc page, as well as the + * bits used for the DMA index. page_is_pfmemalloc() is checked in + * __page_pool_put_page() to avoid recycling the pfmemalloc page. */ -#define PP_MAGIC_MASK ~0x3UL +#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) #ifdef CONFIG_PAGE_POOL static inline bool page_pool_page_is_pp(struct page *page) diff --git a/include/linux/poison.h b/include/linux/poison.h index 331a9a996fa8..8ca2235f78d5 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -70,6 +70,10 @@ #define KEY_DESTROY 0xbd /********** net/core/page_pool.c **********/ +/* + * page_pool uses additional free bits within this value to store data, see the + * definition of PP_DMA_INDEX_MASK in mm.h + */ #define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) /********** net/core/skbuff.c **********/ diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 36eb57d73abc..431b593de709 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA @@ -33,6 +34,9 @@ #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \ PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM) +/* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */ +#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1) + /* * Fast allocation side cache array/stack * @@ -221,6 +225,8 @@ struct page_pool { void *mp_priv; const struct memory_provider_ops *mp_ops; + struct xarray dma_mapped; + #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ struct page_pool_recycle_stats __percpu *recycle_stats; -- cgit v1.2.3 From ceaceaf79ea0fe337344fc5c1fb10a421a362410 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 11 Apr 2025 23:04:19 +0100 Subject: net: ethtool: fix get_ts_stats() documentation Commit 0e9c127729be ("ethtool: add interface to read Tx hardware timestamping statistics") added documentation for timestamping statistics, but added the detailed explanation for this method to the get_ts_info() rather than get_ts_stats(). Move it to the correct entry. Cc: Rahul Rameshbabu Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1u3MTz-000Crx-IW@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 8210ece94fa6..013d25858642 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -926,10 +926,11 @@ struct kernel_ethtool_ts_info { * @get_ts_info: Get the time stamping and PTP hardware clock capabilities. * It may be called with RCU, or rtnl or reference on the device. * Drivers supporting transmit time stamps in software should set this to - * ethtool_op_get_ts_info(). Drivers must not zero statistics which they - * don't report. The stats structure is initialized to ETHTOOL_STAT_NOT_SET - * indicating driver does not report statistics. - * @get_ts_stats: Query the device hardware timestamping statistics. + * ethtool_op_get_ts_info(). + * @get_ts_stats: Query the device hardware timestamping statistics. Drivers + * must not zero statistics which they don't report. The stats structure + * is initialized to ETHTOOL_STAT_NOT_SET indicating driver does not + * report statistics. * @get_module_info: Get the size and type of the eeprom contained within * a plug-in module. * @get_module_eeprom: Get the eeprom information from the plug-in module -- cgit v1.2.3 From 7a60d91c690bf73c2c78e763efa29f294e217c3a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 11 Apr 2025 13:52:32 -0700 Subject: net: Add ->exit_rtnl() hook to struct pernet_operations. struct pernet_operations provides two batching hooks; ->exit_batch() and ->exit_batch_rtnl(). The batching variant is beneficial if ->exit() meets any of the following conditions: 1) ->exit() repeatedly acquires a global lock for each netns 2) ->exit() has a time-consuming operation that can be factored out (e.g. synchronize_rcu(), smp_mb(), etc) 3) ->exit() does not need to repeat the same iterations for each netns (e.g. inet_twsk_purge()) Currently, none of the ->exit_batch_rtnl() functions satisfy any of the above conditions because RTNL is factored out and held by the caller and all of these functions iterate over the dying netns list. Also, we want to hold per-netns RTNL there but avoid spreading __rtnl_net_lock() across multiple locations. Let's add ->exit_rtnl() hook and run it under __rtnl_net_lock(). The following patches will convert all ->exit_batch_rtnl() users to ->exit_rtnl(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250411205258.63164-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/net_namespace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bd57d8fb54f1..b071e6eed9d5 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -475,6 +475,8 @@ struct pernet_operations { void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); /* Following method is called with RTNL held. */ + void (*exit_rtnl)(struct net *net, + struct list_head *dev_kill_list); void (*exit_batch_rtnl)(struct list_head *net_exit_list, struct list_head *dev_kill_list); unsigned int * const id; -- cgit v1.2.3 From a967e01e2ad201f6ddc778ed65a5dae1c68ee8a5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 11 Apr 2025 13:52:35 -0700 Subject: ipv4: ip_tunnel: Convert ip_tunnel_delete_nets() callers to ->exit_rtnl(). ip_tunnel_delete_nets() iterates the dying netns list and performs the same operations for each. Let's export ip_tunnel_destroy() as ip_tunnel_delete_net() and call it from ->exit_rtnl(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: David Ahern Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250411205258.63164-7-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index a36a335cef9f..0c3d571a04a1 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -377,10 +377,9 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev); int ip_tunnel_get_iflink(const struct net_device *dev); int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); - -void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, - struct rtnl_link_ops *ops, - struct list_head *dev_to_kill); +void ip_tunnel_delete_net(struct net *net, unsigned int id, + struct rtnl_link_ops *ops, + struct list_head *dev_to_kill); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); -- cgit v1.2.3 From c57a9c503543cd8829eeaaf88362199e0491c0d7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 11 Apr 2025 13:52:43 -0700 Subject: net: Remove ->exit_batch_rtnl(). There are no ->exit_batch_rtnl() users remaining. Let's remove the hook. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250411205258.63164-15-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/net_namespace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index b071e6eed9d5..025a7574b275 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -477,8 +477,6 @@ struct pernet_operations { /* Following method is called with RTNL held. */ void (*exit_rtnl)(struct net *net, struct list_head *dev_kill_list); - void (*exit_batch_rtnl)(struct list_head *net_exit_list, - struct list_head *dev_kill_list); unsigned int * const id; const size_t size; }; -- cgit v1.2.3 From 651f88cb046c5e002f7c11de2cebf207787d2346 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 12 Apr 2025 09:08:45 +0100 Subject: net: stmmac: remove eee_usecs_rate plat_dat->eee_users_rate is now unused, so remove this member. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1u3Vuv-000E7y-9k@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index c4ec8bb8144e..8aed09d65b4a 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -276,7 +276,6 @@ struct plat_stmmacenet_data { int mac_port_sel_speed; int has_xgmac; u8 vlan_fail_q; - unsigned long eee_usecs_rate; struct pci_dev *pdev; int int_snapshot_num; int msi_mac_vec; -- cgit v1.2.3 From 23738cc8048322cf324f330cd697380fb3455da5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Apr 2025 10:52:47 +0100 Subject: rxrpc: Pull out certain app callback funcs into an ops table A number of functions separately furnish an AF_RXRPC socket with callback function pointers into a kernel app (such as the AFS filesystem) that is using it. Replace most of these with an ops table for the entire socket. This makes it easier to add more callback functions. Note that the call incoming data processing callback is retaind as that gets set to different things, depending on the type of op. Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250411095303.2316168-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/net/af_rxrpc.h | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index cf793d18e5df..ebb6092c488b 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -29,18 +29,23 @@ enum rxrpc_interruptibility { */ extern atomic_t rxrpc_debug_id; +/* + * Operations table for rxrpc to call out to a kernel application (e.g. kAFS). + */ +struct rxrpc_kernel_ops { + void (*notify_new_call)(struct sock *sk, struct rxrpc_call *call, + unsigned long user_call_ID); + void (*discard_new_call)(struct rxrpc_call *call, unsigned long user_call_ID); + void (*user_attach_call)(struct rxrpc_call *call, unsigned long user_call_ID); +}; + typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *, unsigned long); -typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *, - unsigned long); -typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long); -typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long); -void rxrpc_kernel_new_call_notification(struct socket *, - rxrpc_notify_new_call_t, - rxrpc_discard_new_call_t); +void rxrpc_kernel_set_notifications(struct socket *sock, + const struct rxrpc_kernel_ops *app_ops); struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct rxrpc_peer *peer, struct key *key, @@ -72,9 +77,9 @@ const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer); unsigned long rxrpc_kernel_set_peer_data(struct rxrpc_peer *peer, unsigned long app_data); unsigned long rxrpc_kernel_get_peer_data(const struct rxrpc_peer *peer); unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *); -int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, - rxrpc_user_attach_call_t, unsigned long, gfp_t, - unsigned int); +int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx, + unsigned long user_call_ID, gfp_t gfp, + unsigned int debug_id); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); -- cgit v1.2.3 From 5800b1cf3fd8ccab752a101865be1e76dac33142 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Apr 2025 10:52:49 +0100 Subject: rxrpc: Allow CHALLENGEs to the passed to the app for a RESPONSE Allow the app to request that CHALLENGEs be passed to it through an out-of-band queue that allows recvmsg() to pick it up so that the app can add data to it with sendmsg(). This will allow the application (AFS or userspace) to interact with the process if it wants to and put values into user-defined fields. This will be used by AFS when talking to a fileserver to supply that fileserver with a crypto key by which callback RPCs can be encrypted (ie. notifications from the fileserver to the client). Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250411095303.2316168-5-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/net/af_rxrpc.h | 24 +++++++++++++++++++++++ include/trace/events/rxrpc.h | 18 ++++++++++++++++- include/uapi/linux/rxrpc.h | 46 ++++++++++++++++++++++++++++++++------------ 3 files changed, 75 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index ebb6092c488b..0b209f703ffc 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -16,6 +16,7 @@ struct sock; struct socket; struct rxrpc_call; struct rxrpc_peer; +struct krb5_buffer; enum rxrpc_abort_reason; enum rxrpc_interruptibility { @@ -24,6 +25,10 @@ enum rxrpc_interruptibility { RXRPC_UNINTERRUPTIBLE, /* Call should not be interruptible at all */ }; +enum rxrpc_oob_type { + RXRPC_OOB_CHALLENGE, /* Security challenge for a connection */ +}; + /* * Debug ID counter for tracing. */ @@ -37,6 +42,7 @@ struct rxrpc_kernel_ops { unsigned long user_call_ID); void (*discard_new_call)(struct rxrpc_call *call, unsigned long user_call_ID); void (*user_attach_call)(struct rxrpc_call *call, unsigned long user_call_ID); + void (*notify_oob)(struct sock *sk, struct sk_buff *oob); }; typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, @@ -88,5 +94,23 @@ void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *, int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val); int rxrpc_sock_set_security_keyring(struct sock *, struct key *); +int rxrpc_sock_set_manage_response(struct sock *sk, bool set); + +enum rxrpc_oob_type rxrpc_kernel_query_oob(struct sk_buff *oob, + struct rxrpc_peer **_peer, + unsigned long *_peer_appdata); +struct sk_buff *rxrpc_kernel_dequeue_oob(struct socket *sock, + enum rxrpc_oob_type *_type); +void rxrpc_kernel_free_oob(struct sk_buff *oob); +void rxrpc_kernel_query_challenge(struct sk_buff *challenge, + struct rxrpc_peer **_peer, + unsigned long *_peer_appdata, + u16 *_service_id, u8 *_security_index); +int rxrpc_kernel_reject_challenge(struct sk_buff *challenge, u32 abort_code, + int error, enum rxrpc_abort_reason why); +int rxkad_kernel_respond_to_challenge(struct sk_buff *challenge); +u32 rxgk_kernel_query_challenge(struct sk_buff *challenge); +int rxgk_kernel_respond_to_challenge(struct sk_buff *challenge, + struct krb5_buffer *appdata); #endif /* _NET_RXRPC_H */ diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index cad50d91077e..08ecebd90595 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -25,6 +25,7 @@ EM(afs_abort_probeuuid_negative, "afs-probeuuid-neg") \ EM(afs_abort_send_data_error, "afs-send-data") \ EM(afs_abort_unmarshal_error, "afs-unmarshal") \ + EM(afs_abort_unsupported_sec_class, "afs-unsup-sec-class") \ /* rxperf errors */ \ EM(rxperf_abort_general_error, "rxperf-error") \ EM(rxperf_abort_oom, "rxperf-oom") \ @@ -77,6 +78,7 @@ EM(rxrpc_abort_call_timeout, "call-timeout") \ EM(rxrpc_abort_no_service_key, "no-serv-key") \ EM(rxrpc_abort_nomem, "nomem") \ + EM(rxrpc_abort_response_sendmsg, "resp-sendmsg") \ EM(rxrpc_abort_service_not_offered, "serv-not-offered") \ EM(rxrpc_abort_shut_down, "shut-down") \ EM(rxrpc_abort_unsupported_security, "unsup-sec") \ @@ -133,24 +135,33 @@ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ EM(rxrpc_skb_get_local_work, "GET locl-work") \ + EM(rxrpc_skb_get_post_oob, "GET post-oob ") \ EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \ EM(rxrpc_skb_get_to_recvmsg_oos, "GET to-recv-o") \ EM(rxrpc_skb_new_encap_rcv, "NEW encap-rcv") \ EM(rxrpc_skb_new_error_report, "NEW error-rpt") \ EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \ + EM(rxrpc_skb_new_response_rxgk, "NEW resp-rxgk") \ + EM(rxrpc_skb_new_response_rxkad, "NEW resp-rxkd") \ EM(rxrpc_skb_new_unshared, "NEW unshared ") \ EM(rxrpc_skb_put_call_rx, "PUT call-rx ") \ + EM(rxrpc_skb_put_challenge, "PUT challenge") \ EM(rxrpc_skb_put_conn_secured, "PUT conn-secd") \ EM(rxrpc_skb_put_conn_work, "PUT conn-work") \ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ EM(rxrpc_skb_put_input, "PUT input ") \ EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ + EM(rxrpc_skb_put_oob, "PUT oob ") \ EM(rxrpc_skb_put_purge, "PUT purge ") \ + EM(rxrpc_skb_put_purge_oob, "PUT purge-oob") \ + EM(rxrpc_skb_put_response, "PUT response ") \ EM(rxrpc_skb_put_rotate, "PUT rotate ") \ EM(rxrpc_skb_put_unknown, "PUT unknown ") \ EM(rxrpc_skb_see_conn_work, "SEE conn-work") \ + EM(rxrpc_skb_see_oob_challenge, "SEE oob-chall") \ EM(rxrpc_skb_see_recvmsg, "SEE recvmsg ") \ + EM(rxrpc_skb_see_recvmsg_oob, "SEE recvm-oob") \ EM(rxrpc_skb_see_reject, "SEE reject ") \ EM(rxrpc_skb_see_rotate, "SEE rotate ") \ E_(rxrpc_skb_see_version, "SEE version ") @@ -216,9 +227,11 @@ EM(rxrpc_conn_free, "FREE ") \ EM(rxrpc_conn_get_activate_call, "GET act-call") \ EM(rxrpc_conn_get_call_input, "GET inp-call") \ + EM(rxrpc_conn_get_challenge_input, "GET inp-chal") \ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ EM(rxrpc_conn_get_idle, "GET idle ") \ EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \ + EM(rxrpc_conn_get_poke_response, "GET response") \ EM(rxrpc_conn_get_poke_secured, "GET secured ") \ EM(rxrpc_conn_get_poke_timer, "GET poke ") \ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ @@ -226,10 +239,12 @@ EM(rxrpc_conn_new_service, "NEW service ") \ EM(rxrpc_conn_put_call, "PUT call ") \ EM(rxrpc_conn_put_call_input, "PUT inp-call") \ + EM(rxrpc_conn_put_challenge_input, "PUT inp-chal") \ EM(rxrpc_conn_put_conn_input, "PUT inp-conn") \ EM(rxrpc_conn_put_discard_idle, "PUT disc-idl") \ EM(rxrpc_conn_put_local_dead, "PUT loc-dead") \ EM(rxrpc_conn_put_noreuse, "PUT noreuse ") \ + EM(rxrpc_conn_put_oob, "PUT oob ") \ EM(rxrpc_conn_put_poke, "PUT poke ") \ EM(rxrpc_conn_put_service_reaped, "PUT svc-reap") \ EM(rxrpc_conn_put_unbundle, "PUT unbundle") \ @@ -331,6 +346,7 @@ EM(rxrpc_recvmsg_full, "FULL") \ EM(rxrpc_recvmsg_hole, "HOLE") \ EM(rxrpc_recvmsg_next, "NEXT") \ + EM(rxrpc_recvmsg_oobq, "OOBQ") \ EM(rxrpc_recvmsg_requeue, "REQU") \ EM(rxrpc_recvmsg_return, "RETN") \ EM(rxrpc_recvmsg_terminal, "TERM") \ @@ -456,7 +472,7 @@ EM(rxrpc_tx_point_conn_abort, "ConnAbort") \ EM(rxrpc_tx_point_reject, "Reject") \ EM(rxrpc_tx_point_rxkad_challenge, "RxkadChall") \ - EM(rxrpc_tx_point_rxkad_response, "RxkadResp") \ + EM(rxrpc_tx_point_response, "Response") \ EM(rxrpc_tx_point_version_keepalive, "VerKeepalive") \ E_(rxrpc_tx_point_version_reply, "VerReply") diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h index 8f8dc7a937a4..c4e9833b0a12 100644 --- a/include/uapi/linux/rxrpc.h +++ b/include/uapi/linux/rxrpc.h @@ -36,26 +36,33 @@ struct sockaddr_rxrpc { #define RXRPC_MIN_SECURITY_LEVEL 4 /* minimum security level */ #define RXRPC_UPGRADEABLE_SERVICE 5 /* Upgrade service[0] -> service[1] */ #define RXRPC_SUPPORTED_CMSG 6 /* Get highest supported control message type */ +#define RXRPC_MANAGE_RESPONSE 7 /* [clnt] Want to manage RESPONSE packets */ /* * RxRPC control messages * - If neither abort or accept are specified, the message is a data message. * - terminal messages mean that a user call ID tag can be recycled + * - C/S/- indicate whether these are applicable to client, server or both * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg() */ enum rxrpc_cmsg_type { - RXRPC_USER_CALL_ID = 1, /* sr: user call ID specifier */ - RXRPC_ABORT = 2, /* sr: abort request / notification [terminal] */ - RXRPC_ACK = 3, /* -r: [Service] RPC op final ACK received [terminal] */ - RXRPC_NET_ERROR = 5, /* -r: network error received [terminal] */ - RXRPC_BUSY = 6, /* -r: server busy received [terminal] */ - RXRPC_LOCAL_ERROR = 7, /* -r: local error generated [terminal] */ - RXRPC_NEW_CALL = 8, /* -r: [Service] new incoming call notification */ - RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ - RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ - RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */ - RXRPC_SET_CALL_TIMEOUT = 13, /* s-: Set one or more call timeouts */ - RXRPC_CHARGE_ACCEPT = 14, /* s-: Charge the accept pool with a user call ID */ + RXRPC_USER_CALL_ID = 1, /* -sr: User call ID specifier */ + RXRPC_ABORT = 2, /* -sr: Abort request / notification [terminal] */ + RXRPC_ACK = 3, /* S-r: RPC op final ACK received [terminal] */ + RXRPC_NET_ERROR = 5, /* --r: Network error received [terminal] */ + RXRPC_BUSY = 6, /* C-r: Server busy received [terminal] */ + RXRPC_LOCAL_ERROR = 7, /* --r: Local error generated [terminal] */ + RXRPC_NEW_CALL = 8, /* S-r: New incoming call notification */ + RXRPC_EXCLUSIVE_CALL = 10, /* Cs-: Call should be on exclusive connection */ + RXRPC_UPGRADE_SERVICE = 11, /* Cs-: Request service upgrade for client call */ + RXRPC_TX_LENGTH = 12, /* -s-: Total length of Tx data */ + RXRPC_SET_CALL_TIMEOUT = 13, /* -s-: Set one or more call timeouts */ + RXRPC_CHARGE_ACCEPT = 14, /* Ss-: Charge the accept pool with a user call ID */ + RXRPC_OOB_ID = 15, /* -sr: OOB message ID */ + RXRPC_CHALLENGED = 16, /* C-r: Info on a received CHALLENGE */ + RXRPC_RESPOND = 17, /* Cs-: Respond to a challenge */ + RXRPC_RESPONDED = 18, /* S-r: Data received in RESPONSE */ + RXRPC_RESP_RXGK_APPDATA = 19, /* Cs-: RESPONSE: RxGK app data to include */ RXRPC__SUPPORTED }; @@ -118,4 +125,19 @@ enum rxrpc_cmsg_type { #define RXKADDATALEN 19270411 /* user data too long */ #define RXKADILLEGALLEVEL 19270412 /* caller not authorised to use encrypted conns */ +/* + * Challenge information in the RXRPC_CHALLENGED control message. + */ +struct rxrpc_challenge { + __u16 service_id; /* The service ID of the connection (may be upgraded) */ + __u8 security_index; /* The security index of the connection */ + __u8 pad; /* Round out to a multiple of 4 bytes. */ + /* ... The security class gets to append extra information ... */ +}; + +struct rxgk_challenge { + struct rxrpc_challenge base; + __u32 enctype; /* Krb5 encoding type */ +}; + #endif /* _UAPI_LINUX_RXRPC_H */ -- cgit v1.2.3 From 01af64269751f261421a9e80a527c8e987aeda8d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Apr 2025 10:52:50 +0100 Subject: rxrpc: Add the security index for yfs-rxgk Add the security index and abort codes for the YFS variant of rxgk. Signed-off-by: David Howells Link: https://patch.msgid.link/20250411095303.2316168-6-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/crypto/krb5.h | 5 +++++ include/uapi/linux/rxrpc.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/crypto/krb5.h b/include/crypto/krb5.h index 62d998e62f47..71dd38f59be1 100644 --- a/include/crypto/krb5.h +++ b/include/crypto/krb5.h @@ -63,6 +63,11 @@ struct scatterlist; #define KEY_USAGE_SEED_ENCRYPTION (0xAA) #define KEY_USAGE_SEED_INTEGRITY (0x55) +/* + * Standard Kerberos error codes. + */ +#define KRB5_PROG_KEYTYPE_NOSUPP -1765328233 + /* * Mode of operation. */ diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h index c4e9833b0a12..d9735abd4c79 100644 --- a/include/uapi/linux/rxrpc.h +++ b/include/uapi/linux/rxrpc.h @@ -80,6 +80,7 @@ enum rxrpc_cmsg_type { #define RXRPC_SECURITY_RXKAD 2 /* kaserver or kerberos 4 */ #define RXRPC_SECURITY_RXGK 4 /* gssapi-based */ #define RXRPC_SECURITY_RXK5 5 /* kerberos 5 */ +#define RXRPC_SECURITY_YFS_RXGK 6 /* YFS gssapi-based */ /* * RxRPC-level abort codes @@ -125,6 +126,36 @@ enum rxrpc_cmsg_type { #define RXKADDATALEN 19270411 /* user data too long */ #define RXKADILLEGALLEVEL 19270412 /* caller not authorised to use encrypted conns */ +/* + * RxGK GSSAPI security abort codes. + */ +#if 0 /* Original standard abort codes (used by OpenAFS) */ +#define RXGK_INCONSISTENCY 1233242880 /* Security module structure inconsistent */ +#define RXGK_PACKETSHORT 1233242881 /* Packet too short for security challenge */ +#define RXGK_BADCHALLENGE 1233242882 /* Invalid security challenge */ +#define RXGK_BADETYPE 1233242883 /* Invalid or impermissible encryption type */ +#define RXGK_BADLEVEL 1233242884 /* Invalid or impermissible security level */ +#define RXGK_BADKEYNO 1233242885 /* Key version number not found */ +#define RXGK_EXPIRED 1233242886 /* Token has expired */ +#define RXGK_NOTAUTH 1233242887 /* Caller not authorized */ +#define RXGK_BAD_TOKEN 1233242888 /* Security object was passed a bad token */ +#define RXGK_SEALED_INCON 1233242889 /* Sealed data inconsistent */ +#define RXGK_DATA_LEN 1233242890 /* User data too long */ +#define RXGK_BAD_QOP 1233242891 /* Inadequate quality of protection available */ +#else /* Revised standard abort codes (used by YFS) */ +#define RXGK_INCONSISTENCY 1233242880 /* Security module structure inconsistent */ +#define RXGK_PACKETSHORT 1233242881 /* Packet too short for security challenge */ +#define RXGK_BADCHALLENGE 1233242882 /* Security challenge/response failed */ +#define RXGK_SEALEDINCON 1233242883 /* Sealed data is inconsistent */ +#define RXGK_NOTAUTH 1233242884 /* Caller not authorised */ +#define RXGK_EXPIRED 1233242885 /* Authentication expired */ +#define RXGK_BADLEVEL 1233242886 /* Unsupported or not permitted security level */ +#define RXGK_BADKEYNO 1233242887 /* Bad transport key number */ +#define RXGK_NOTRXGK 1233242888 /* Security layer is not rxgk */ +#define RXGK_UNSUPPORTED 1233242889 /* Endpoint does not support rxgk */ +#define RXGK_GSSERROR 1233242890 /* GSSAPI mechanism error */ +#endif + /* * Challenge information in the RXRPC_CHALLENGED control message. */ -- cgit v1.2.3 From 0ca100ff4df64f5d0f6c1dd5080c3e096786bea6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Apr 2025 10:52:51 +0100 Subject: rxrpc: Add YFS RxGK (GSSAPI) security class Add support for the YFS-variant RxGK security class to support GSSAPI-derived authentication. This also allows the use of better crypto over the rxkad security class. The key payload is XDR encoded of the form: typedef int64_t opr_time; const AFSTOKEN_RK_TIX_MAX = 12000; /* Matches entry in rxkad.h */ struct token_rxkad { afs_int32 viceid; afs_int32 kvno; afs_int64 key; afs_int32 begintime; afs_int32 endtime; afs_int32 primary_flag; opaque ticket; }; struct token_rxgk { opr_time begintime; opr_time endtime; afs_int64 level; afs_int64 lifetime; afs_int64 bytelife; afs_int64 enctype; opaque key<>; opaque ticket<>; }; const AFSTOKEN_UNION_NOAUTH = 0; const AFSTOKEN_UNION_KAD = 2; const AFSTOKEN_UNION_YFSGK = 6; union ktc_tokenUnion switch (afs_int32 type) { case AFSTOKEN_UNION_KAD: token_rxkad kad; case AFSTOKEN_UNION_YFSGK: token_rxgk gk; }; const AFSTOKEN_LENGTH_MAX = 16384; typedef opaque token_opaque; const AFSTOKEN_MAX = 8; const AFSTOKEN_CELL_MAX = 64; struct ktc_setTokenData { afs_int32 flags; string cell; token_opaque tokens; }; The parser for the basic token struct is already present, as is the rxkad token type. This adds a parser for the rxgk token type. Signed-off-by: David Howells cc: Marc Dionne cc: Herbert Xu cc: Chuck Lever cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250411095303.2316168-7-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/keys/rxrpc-type.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h index 333c0f49a9cd..0ddbe197a261 100644 --- a/include/keys/rxrpc-type.h +++ b/include/keys/rxrpc-type.h @@ -9,6 +9,7 @@ #define _KEYS_RXRPC_TYPE_H #include +#include /* * key type for AF_RXRPC keys @@ -31,6 +32,21 @@ struct rxkad_key { u8 ticket[]; /* the encrypted ticket */ }; +/* + * RxRPC key for YFS-RxGK (type-6 security) + */ +struct rxgk_key { + s64 begintime; /* Time at which the ticket starts */ + s64 endtime; /* Time at which the ticket ends */ + u64 lifetime; /* Maximum lifespan of a connection (seconds) */ + u64 bytelife; /* Maximum number of bytes on a connection */ + unsigned int enctype; /* Encoding type */ + s8 level; /* Negotiated security RXRPC_SECURITY_PLAIN/AUTH/ENCRYPT */ + struct krb5_buffer key; /* Master key, K0 */ + struct krb5_buffer ticket; /* Ticket to be passed to server */ + u8 _key[]; /* Key storage */ +}; + /* * list of tokens attached to an rxrpc key */ @@ -40,6 +56,7 @@ struct rxrpc_key_token { struct rxrpc_key_token *next; /* the next token in the list */ union { struct rxkad_key *kad; + struct rxgk_key *rxgk; }; }; -- cgit v1.2.3 From 9d1d2b59341f58126a69b51f9f5f8ccb9f12e54a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Apr 2025 10:52:53 +0100 Subject: rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI) Implement the basic parts of the yfs-rxgk security class (security index 6) to support GSSAPI-negotiated security. Signed-off-by: David Howells cc: Marc Dionne cc: Herbert Xu cc: Chuck Lever cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250411095303.2316168-9-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 45 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 08ecebd90595..aab81e8196ae 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -69,6 +69,38 @@ EM(rxkad_abort_resp_tkt_sname, "rxkad-resp-tk-sname") \ EM(rxkad_abort_resp_unknown_tkt, "rxkad-resp-unknown-tkt") \ EM(rxkad_abort_resp_version, "rxkad-resp-version") \ + /* RxGK security errors */ \ + EM(rxgk_abort_1_verify_mic_eproto, "rxgk1-vfy-mic-eproto") \ + EM(rxgk_abort_2_decrypt_eproto, "rxgk2-dec-eproto") \ + EM(rxgk_abort_2_short_data, "rxgk2-short-data") \ + EM(rxgk_abort_2_short_encdata, "rxgk2-short-encdata") \ + EM(rxgk_abort_2_short_header, "rxgk2-short-hdr") \ + EM(rxgk_abort_bad_key_number, "rxgk-bad-key-num") \ + EM(rxgk_abort_chall_key_expired, "rxgk-chall-key-exp") \ + EM(rxgk_abort_chall_no_key, "rxgk-chall-nokey") \ + EM(rxgk_abort_chall_short, "rxgk-chall-short") \ + EM(rxgk_abort_resp_auth_dec, "rxgk-resp-auth-dec") \ + EM(rxgk_abort_resp_bad_callid, "rxgk-resp-bad-callid") \ + EM(rxgk_abort_resp_bad_nonce, "rxgk-resp-bad-nonce") \ + EM(rxgk_abort_resp_bad_param, "rxgk-resp-bad-param") \ + EM(rxgk_abort_resp_call_ctr, "rxgk-resp-call-ctr") \ + EM(rxgk_abort_resp_call_state, "rxgk-resp-call-state") \ + EM(rxgk_abort_resp_internal_error, "rxgk-resp-int-error") \ + EM(rxgk_abort_resp_nopkg, "rxgk-resp-nopkg") \ + EM(rxgk_abort_resp_short_applen, "rxgk-resp-short-applen") \ + EM(rxgk_abort_resp_short_auth, "rxgk-resp-short-auth") \ + EM(rxgk_abort_resp_short_call_list, "rxgk-resp-short-callls") \ + EM(rxgk_abort_resp_short_packet, "rxgk-resp-short-packet") \ + EM(rxgk_abort_resp_short_yfs_klen, "rxgk-resp-short-yfs-klen") \ + EM(rxgk_abort_resp_short_yfs_key, "rxgk-resp-short-yfs-key") \ + EM(rxgk_abort_resp_short_yfs_tkt, "rxgk-resp-short-yfs-tkt") \ + EM(rxgk_abort_resp_tok_dec, "rxgk-resp-tok-dec") \ + EM(rxgk_abort_resp_tok_internal_error, "rxgk-resp-tok-int-err") \ + EM(rxgk_abort_resp_tok_keyerr, "rxgk-resp-tok-keyerr") \ + EM(rxgk_abort_resp_tok_nokey, "rxgk-resp-tok-nokey") \ + EM(rxgk_abort_resp_tok_nopkg, "rxgk-resp-tok-nopkg") \ + EM(rxgk_abort_resp_tok_short, "rxgk-resp-tok-short") \ + EM(rxgk_abort_resp_xdr_align, "rxgk-resp-xdr-align") \ /* rxrpc errors */ \ EM(rxrpc_abort_call_improper_term, "call-improper-term") \ EM(rxrpc_abort_call_reset, "call-reset") \ @@ -471,6 +503,7 @@ EM(rxrpc_tx_point_call_final_resend, "CallFinalResend") \ EM(rxrpc_tx_point_conn_abort, "ConnAbort") \ EM(rxrpc_tx_point_reject, "Reject") \ + EM(rxrpc_tx_point_rxgk_challenge, "RxGKChall") \ EM(rxrpc_tx_point_rxkad_challenge, "RxkadChall") \ EM(rxrpc_tx_point_response, "Response") \ EM(rxrpc_tx_point_version_keepalive, "VerKeepalive") \ @@ -489,6 +522,7 @@ #define rxrpc_txbuf_traces \ EM(rxrpc_txbuf_alloc_data, "ALLOC DATA ") \ + EM(rxrpc_txbuf_alloc_response, "ALLOC RESP ") \ EM(rxrpc_txbuf_free, "FREE ") \ EM(rxrpc_txbuf_get_buffer, "GET BUFFER ") \ EM(rxrpc_txbuf_get_trans, "GET TRANS ") \ @@ -496,6 +530,7 @@ EM(rxrpc_txbuf_put_cleaned, "PUT CLEANED") \ EM(rxrpc_txbuf_put_nomem, "PUT NOMEM ") \ EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \ + EM(rxrpc_txbuf_put_response_tx, "PUT RESP TX") \ EM(rxrpc_txbuf_put_send_aborted, "PUT SEND-X ") \ EM(rxrpc_txbuf_put_trans, "PUT TRANS ") \ EM(rxrpc_txbuf_see_lost, "SEE LOST ") \ @@ -1178,6 +1213,7 @@ TRACE_EVENT(rxrpc_rx_challenge, __field(u32, version) __field(u32, nonce) __field(u32, min_level) + __field(u8, security_ix) ), TP_fast_assign( @@ -1186,11 +1222,13 @@ TRACE_EVENT(rxrpc_rx_challenge, __entry->version = version; __entry->nonce = nonce; __entry->min_level = min_level; + __entry->security_ix = conn->security_ix; ), - TP_printk("C=%08x CHALLENGE %08x v=%x n=%x ml=%x", + TP_printk("C=%08x CHALLENGE r=%08x sx=%u v=%x n=%x ml=%x", __entry->conn, __entry->serial, + __entry->security_ix, __entry->version, __entry->nonce, __entry->min_level) @@ -1208,6 +1246,7 @@ TRACE_EVENT(rxrpc_rx_response, __field(u32, version) __field(u32, kvno) __field(u32, ticket_len) + __field(u8, security_ix) ), TP_fast_assign( @@ -1216,11 +1255,13 @@ TRACE_EVENT(rxrpc_rx_response, __entry->version = version; __entry->kvno = kvno; __entry->ticket_len = ticket_len; + __entry->security_ix = conn->security_ix; ), - TP_printk("C=%08x RESPONSE %08x v=%x kvno=%x tl=%x", + TP_printk("C=%08x RESPONSE r=%08x sx=%u v=%x kvno=%x tl=%x", __entry->conn, __entry->serial, + __entry->security_ix, __entry->version, __entry->kvno, __entry->ticket_len) -- cgit v1.2.3 From 7a7513a3081c6a2729d8570c77bbed1978277dc9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Apr 2025 10:52:54 +0100 Subject: rxrpc: rxgk: Implement connection rekeying Implement rekeying of connections with the RxGK security class. This involves regenerating the keys with a different key number as part of the input data after a certain amount of time or a certain amount of bytes encrypted. Rekeying may be triggered by either end. The LSW of the key number is inserted into the security-specific field in the RX header, and we try and expand it to 32-bits to make it last longer. Signed-off-by: David Howells cc: Marc Dionne cc: Herbert Xu cc: Chuck Lever cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250411095303.2316168-10-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index aab81e8196ae..920439df1f6f 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -2725,6 +2725,30 @@ TRACE_EVENT(rxrpc_rack_timer, ktime_to_us(__entry->delay)) ); +TRACE_EVENT(rxrpc_rxgk_rekey, + TP_PROTO(struct rxrpc_connection *conn, + unsigned int current_key, unsigned int requested_key), + + TP_ARGS(conn, current_key, requested_key), + + TP_STRUCT__entry( + __field(unsigned int, conn) + __field(unsigned int, current_key) + __field(unsigned int, requested_key) + ), + + TP_fast_assign( + __entry->conn = conn->debug_id; + __entry->current_key = current_key; + __entry->requested_key = requested_key; + ), + + TP_printk("C=%08x cur=%x req=%x", + __entry->conn, + __entry->current_key, + __entry->requested_key) + ); + #undef EM #undef E_ -- cgit v1.2.3 From d03539d5c2dec9b028297c15e57bd3c01d0d9c0d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Apr 2025 10:52:56 +0100 Subject: rxrpc: Display security params in the afs_cb_call tracepoint Make the afs_cb_call tracepoint display some security parameters to make debugging easier. Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250411095303.2316168-12-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/net/af_rxrpc.h | 2 ++ include/trace/events/afs.h | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 0b209f703ffc..f15341594cc8 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -112,5 +112,7 @@ int rxkad_kernel_respond_to_challenge(struct sk_buff *challenge); u32 rxgk_kernel_query_challenge(struct sk_buff *challenge); int rxgk_kernel_respond_to_challenge(struct sk_buff *challenge, struct krb5_buffer *appdata); +u8 rxrpc_kernel_query_call_security(struct rxrpc_call *call, + u16 *_service_id, u32 *_enctype); #endif /* _NET_RXRPC_H */ diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 8857f5ea77d4..7f83d242c8e9 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -663,19 +663,26 @@ TRACE_EVENT(afs_cb_call, __field(unsigned int, call) __field(u32, op) __field(u16, service_id) + __field(u8, security_ix) + __field(u32, enctype) ), TP_fast_assign( __entry->call = call->debug_id; __entry->op = call->operation_ID; __entry->service_id = call->service_id; + __entry->security_ix = call->security_ix; + __entry->enctype = call->enctype; ), - TP_printk("c=%08x %s", + TP_printk("c=%08x %s sv=%u sx=%u en=%u", __entry->call, __entry->service_id == 2501 ? __print_symbolic(__entry->op, yfs_cm_operations) : - __print_symbolic(__entry->op, afs_cm_operations)) + __print_symbolic(__entry->op, afs_cm_operations), + __entry->service_id, + __entry->security_ix, + __entry->enctype) ); TRACE_EVENT(afs_call, -- cgit v1.2.3 From fba6995798c6085a0c2fc67e0cacd489a6971044 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Apr 2025 10:52:58 +0100 Subject: rxrpc: Add more CHALLENGE/RESPONSE packet tracing Add more tracing for CHALLENGE and RESPONSE packets. Currently, rxrpc only has client-relevant tracepoints (rx_challenge and tx_response), but add the server-side ones too. Further, record the service ID in the rx_challenge tracepoint as well. Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250411095303.2316168-14-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 78 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 920439df1f6f..378d2dfc7392 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1201,6 +1201,39 @@ TRACE_EVENT(rxrpc_rx_conn_abort, __entry->abort_code) ); +TRACE_EVENT(rxrpc_tx_challenge, + TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, + u32 version, u32 nonce), + + TP_ARGS(conn, serial, version, nonce), + + TP_STRUCT__entry( + __field(unsigned int, conn) + __field(rxrpc_serial_t, serial) + __field(u32, version) + __field(u32, nonce) + __field(u16, service_id) + __field(u8, security_ix) + ), + + TP_fast_assign( + __entry->conn = conn->debug_id; + __entry->serial = serial; + __entry->version = version; + __entry->nonce = nonce; + __entry->service_id = conn->service_id; + __entry->security_ix = conn->security_ix; + ), + + TP_printk("C=%08x CHALLENGE r=%08x sv=%u+%u v=%x n=%x", + __entry->conn, + __entry->serial, + __entry->service_id, + __entry->security_ix, + __entry->version, + __entry->nonce) + ); + TRACE_EVENT(rxrpc_rx_challenge, TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, u32 version, u32 nonce, u32 min_level), @@ -1213,6 +1246,7 @@ TRACE_EVENT(rxrpc_rx_challenge, __field(u32, version) __field(u32, nonce) __field(u32, min_level) + __field(u16, service_id) __field(u8, security_ix) ), @@ -1222,18 +1256,60 @@ TRACE_EVENT(rxrpc_rx_challenge, __entry->version = version; __entry->nonce = nonce; __entry->min_level = min_level; + __entry->service_id = conn->service_id; __entry->security_ix = conn->security_ix; ), - TP_printk("C=%08x CHALLENGE r=%08x sx=%u v=%x n=%x ml=%x", + TP_printk("C=%08x CHALLENGE r=%08x sv=%u+%u v=%x n=%x ml=%x", __entry->conn, __entry->serial, + __entry->service_id, __entry->security_ix, __entry->version, __entry->nonce, __entry->min_level) ); +TRACE_EVENT(rxrpc_tx_response, + TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, + struct rxrpc_skb_priv *rsp), + + TP_ARGS(conn, serial, rsp), + + TP_STRUCT__entry( + __field(unsigned int, conn) + __field(rxrpc_serial_t, serial) + __field(rxrpc_serial_t, challenge) + __field(u32, version) + __field(u32, kvno) + __field(u16, ticket_len) + __field(u16, appdata_len) + __field(u16, service_id) + __field(u8, security_ix) + ), + + TP_fast_assign( + __entry->conn = conn->debug_id; + __entry->serial = serial; + __entry->challenge = rsp->resp.challenge_serial; + __entry->version = rsp->resp.version; + __entry->kvno = rsp->resp.kvno; + __entry->ticket_len = rsp->resp.ticket_len; + __entry->service_id = conn->service_id; + __entry->security_ix = conn->security_ix; + ), + + TP_printk("C=%08x RESPONSE r=%08x cr=%08x sv=%u+%u v=%x kv=%x tl=%u", + __entry->conn, + __entry->serial, + __entry->challenge, + __entry->service_id, + __entry->security_ix, + __entry->version, + __entry->kvno, + __entry->ticket_len) + ); + TRACE_EVENT(rxrpc_rx_response, TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, u32 version, u32 kvno, u32 ticket_len), -- cgit v1.2.3 From 2396356a945bb022aff02656f59c2a45d457043f Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Wed, 26 Feb 2025 09:14:51 +0000 Subject: fuse: add more control over cache invalidation behaviour Currently userspace is able to notify the kernel to invalidate the cache for an inode. This means that, if all the inodes in a filesystem need to be invalidated, then userspace needs to iterate through all of them and do this kernel notification separately. This patch adds the concept of 'epoch': each fuse connection will have the current epoch initialized and every new dentry will have it's d_time set to the current epoch value. A new operation will then allow userspace to increment the epoch value. Every time a dentry is d_revalidate()'ed, it's epoch is compared with the current connection epoch and invalidated if it's value is different. Signed-off-by: Luis Henriques Tested-by: Laura Promberger Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 5ec43ecbceb7..122d6586e8d4 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -232,6 +232,9 @@ * * 7.43 * - add FUSE_REQUEST_TIMEOUT + * + * 7.44 + * - add FUSE_NOTIFY_INC_EPOCH */ #ifndef _LINUX_FUSE_H @@ -267,7 +270,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 43 +#define FUSE_KERNEL_MINOR_VERSION 44 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -671,6 +674,7 @@ enum fuse_notify_code { FUSE_NOTIFY_RETRIEVE = 5, FUSE_NOTIFY_DELETE = 6, FUSE_NOTIFY_RESEND = 7, + FUSE_NOTIFY_INC_EPOCH = 8, FUSE_NOTIFY_CODE_MAX, }; -- cgit v1.2.3 From fec450ca15af63649e219060f37a8ec673333726 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 8 Apr 2025 16:54:25 +0300 Subject: drm/display: hdmi: provide central data authority for ACR params HDMI standard defines recommended N and CTS values for Audio Clock Regeneration. Currently each driver implements those, frequently in somewhat unique way. Provide a generic helper for getting those values to be used by the HDMI drivers. The helper is added to drm_hdmi_helper.c rather than drm_hdmi_audio.c since HDMI drivers can be using this helper function even without switching to DRM HDMI Audio helpers. Note: currently this only handles the values per HDMI 1.4b Section 7.2 and HDMI 2.0 Section 9.2.1. Later the table can be expanded to accommodate for Deep Color TMDS char rates per HDMI 1.4 Appendix D and/or HDMI 2.0 / 2.1 Appendix C). Reviewed-by: Maxime Ripard Link: https://lore.kernel.org/r/20250408-drm-hdmi-acr-v2-1-dee7298ab1af@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- include/drm/display/drm_hdmi_helper.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/drm/display/drm_hdmi_helper.h b/include/drm/display/drm_hdmi_helper.h index 57e3b18c15ec..09145c9ee9fc 100644 --- a/include/drm/display/drm_hdmi_helper.h +++ b/include/drm/display/drm_hdmi_helper.h @@ -28,4 +28,10 @@ unsigned long long drm_hdmi_compute_mode_clock(const struct drm_display_mode *mode, unsigned int bpc, enum hdmi_colorspace fmt); +void +drm_hdmi_acr_get_n_cts(unsigned long long tmds_char_rate, + unsigned int sample_rate, + unsigned int *out_n, + unsigned int *out_cts); + #endif -- cgit v1.2.3 From d30f83d278a921485b3e877d03fe937bccfcbcdd Mon Sep 17 00:00:00 2001 From: Raviteja Laggyshetty Date: Tue, 15 Apr 2025 09:53:38 +0000 Subject: interconnect: core: Add dynamic id allocation support The current interconnect framework relies on static IDs for node creation and registration, which limits topologies with multiple instances of the same interconnect provider. To address this, introduce icc_node_create_dyn() and icc_link_nodes() APIs to dynamically allocate IDs for interconnect nodes during creation and link. This change removes the dependency on static IDs, allowing multiple instances of the same hardware, such as EPSS L3. Signed-off-by: Raviteja Laggyshetty Link: https://lore.kernel.org/r/20250415095343.32125-3-quic_rlaggysh@quicinc.com Signed-off-by: Georgi Djakov --- include/linux/interconnect-provider.h | 12 ++++++++++++ include/linux/interconnect.h | 3 +++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index f5aef8784692..55cfebc658e6 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -116,8 +116,10 @@ struct icc_node { int icc_std_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, u32 peak_bw, u32 *agg_avg, u32 *agg_peak); +struct icc_node *icc_node_create_dyn(void); struct icc_node *icc_node_create(int id); void icc_node_destroy(int id); +int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node); int icc_link_create(struct icc_node *node, const int dst_id); void icc_node_add(struct icc_node *node, struct icc_provider *provider); void icc_node_del(struct icc_node *node); @@ -136,6 +138,11 @@ static inline int icc_std_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, return -ENOTSUPP; } +static inline struct icc_node *icc_node_create_dyn(void) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct icc_node *icc_node_create(int id) { return ERR_PTR(-ENOTSUPP); @@ -145,6 +152,11 @@ static inline void icc_node_destroy(int id) { } +static inline int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node) +{ + return -EOPNOTSUPP; +} + static inline int icc_link_create(struct icc_node *node, const int dst_id) { return -ENOTSUPP; diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h index 97ac253df62c..e4b8808823ad 100644 --- a/include/linux/interconnect.h +++ b/include/linux/interconnect.h @@ -20,6 +20,9 @@ #define Mbps_to_icc(x) ((x) * 1000 / 8) #define Gbps_to_icc(x) ((x) * 1000 * 1000 / 8) +/* macro to indicate dynamic id allocation */ +#define ICC_ALLOC_DYN_ID -1 + struct icc_path; struct device; -- cgit v1.2.3 From fcdf212fd9b36c299d90229e9546c077db2215ce Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 15 Apr 2025 11:03:55 +0200 Subject: ASoC: cs-amp-lib: Annotate struct cirrus_amp_efi_data with __counted_by() Add the __counted_by() compiler attribute to the flexible array member 'data' to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. No functional changes intended. Signed-off-by: Thorsten Blum Reviewed-by: Richard Fitzgerald Link: https://patch.msgid.link/20250415090354.92211-2-thorsten.blum@linux.dev Signed-off-by: Mark Brown --- include/sound/cs-amp-lib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/cs-amp-lib.h b/include/sound/cs-amp-lib.h index f481148735e1..5459c221badf 100644 --- a/include/sound/cs-amp-lib.h +++ b/include/sound/cs-amp-lib.h @@ -23,7 +23,7 @@ struct cirrus_amp_cal_data { struct cirrus_amp_efi_data { u32 size; u32 count; - struct cirrus_amp_cal_data data[]; + struct cirrus_amp_cal_data data[] __counted_by(count); } __packed; /** -- cgit v1.2.3 From 0c4f8ed498cea1e2beebf7aa3d198fa381264f88 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 14 Apr 2025 15:22:09 -0700 Subject: mm: skip folio reclaim in legacy memcg contexts for deadlockable mappings Currently in shrink_folio_list(), reclaim for folios under writeback falls into 3 different cases: 1) Reclaim is encountering an excessive number of folios under writeback and this folio has both the writeback and reclaim flags set 2) Dirty throttling is enabled (this happens if reclaim through cgroup is not enabled, if reclaim through cgroupv2 memcg is enabled, or if reclaim is on the root cgroup), or if the folio is not marked for immediate reclaim, or if the caller does not have __GFP_FS (or __GFP_IO if it's going to swap) set 3) Legacy cgroupv1 encounters a folio that already has the reclaim flag set and the caller did not have __GFP_FS (or __GFP_IO if swap) set In cases 1) and 2), we activate the folio and skip reclaiming it while in case 3), we wait for writeback to finish on the folio and then try to reclaim the folio again. In case 3, we wait on writeback because cgroupv1 does not have dirty folio throttling, as such this is a mitigation against the case where there are too many folios in writeback with nothing else to reclaim. If a filesystem (eg fuse) may deadlock due to reclaim waiting on writeback, then the filesystem needs to add inefficient messy workarounds to prevent this. To improve the performance of these filesystems, this commit adds two things: a) a AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM mapping flag that filesystems may set to indicate that reclaim should not wait on writeback b) if legacy memcg encounters a folio with this AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM flag set (eg case 3), the folio will be activated and skip reclaim (eg default to behavior in case 2) instead. Signed-off-by: Joanne Koong Reviewed-by: Shakeel Butt Reviewed-by: Jeff Layton Acked-by: David Hildenbrand Reviewed-by: Jingbo Xu Signed-off-by: Miklos Szeredi --- include/linux/pagemap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 26baa78f1ca7..e19ff3183bbf 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -210,6 +210,7 @@ enum mapping_flags { AS_STABLE_WRITES = 7, /* must wait for writeback before modifying folio contents */ AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ + AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, /* Bits 16-25 are used for FOLIO_ORDER */ AS_FOLIO_ORDER_BITS = 5, AS_FOLIO_ORDER_MIN = 16, @@ -335,6 +336,16 @@ static inline bool mapping_inaccessible(struct address_space *mapping) return test_bit(AS_INACCESSIBLE, &mapping->flags); } +static inline void mapping_set_writeback_may_deadlock_on_reclaim(struct address_space *mapping) +{ + set_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); +} + +static inline bool mapping_writeback_may_deadlock_on_reclaim(struct address_space *mapping) +{ + return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->gfp_mask; -- cgit v1.2.3 From ac03495d7359285a007ec4fdc08d3843bb5d6b7e Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 15 Apr 2025 12:50:15 +0100 Subject: ASoC: dt-bindings: Add Cirrus Logic CS48L32 audio DSP The CS48L32 is an Audio DSP with microphone inputs and SPI control interface. It has a programmable DSP and a variety of power-efficient fixed-function audio processors, with configurable digital mixing and routing. Most properties are core properties: supply regulators, gpios, clocks, interrupt parent and SPI interface. The custom properties define the configuration of the microphone inputs to match what is physically attached to them. Signed-off-by: Richard Fitzgerald Reviewed-by: "Rob Herring (Arm)" Link: https://patch.msgid.link/20250415115016.505777-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/dt-bindings/sound/cs48l32.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/dt-bindings/sound/cs48l32.h (limited to 'include') diff --git a/include/dt-bindings/sound/cs48l32.h b/include/dt-bindings/sound/cs48l32.h new file mode 100644 index 000000000000..4e82260fff67 --- /dev/null +++ b/include/dt-bindings/sound/cs48l32.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Device Tree defines for CS48L32 DSP. + * + * Copyright (C) 2016-2018, 2022, 2025 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#ifndef DT_BINDINGS_SOUND_CS48L32_H +#define DT_BINDINGS_SOUND_CS48L32_H + +/* Values for cirrus,in-type */ +#define CS48L32_IN_TYPE_DIFF 0 +#define CS48L32_IN_TYPE_SE 1 + +/* Values for cirrus,pdm-sup */ +#define CS48L32_PDM_SUP_VOUT_MIC 0 +#define CS48L32_PDM_SUP_MICBIAS1 1 + +#endif -- cgit v1.2.3 From e2bcbf99d045f6ae3826e39d1ed25978de17cbfe Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 15 Apr 2025 12:50:16 +0100 Subject: ASoC: cs48l32: Add driver for Cirrus Logic CS48L32 audio DSP Add a codec driver for the Cirrus Logic CS48L32 audio DSP. The CS48L32 is a low-power audio DSP with microphone inputs for "Always on Voice" (i.e. voice trigger) and voice command processing. It has a programmable Halo Core DSP and a variety of power-efficient fixed-function audio processors, with configurable digital mixing and routing. There are two I2S/TDM audio serial ports. Four analogue inputs are available through IN1. These feed into a 2-channel ADC through an analogue mux. There is an ALSA control for each IN1 ADC channel to select which analogue input to use. A dedicated digital mic (DMIC) PDM input is available on IN2. Two PDM outputs can feed DMIC inputs on another codec or a host DMIC/PDM input. An on-board regulator provides a power supply or bias voltage to attached microphones. Three switchable MICBIAS outputs are fed from this allowing only the microphone in use to be powered-up. There are DAPM widgets for these outputs: MICBIAS1A, MICBIAS1B and MICBIAS1C. The machine driver must create a DAPM route from the required MICBIAS1x widget to the INn input widgets to make the MICBIAS switch on when the audio input is powered-up. For example if the microphone feeding CS48L32 pin IN1LN_1 is powered from MICBIAS1A, the machine driver must create the path: (sink) IN1LN_1 <----- (source) MICBIAS1A Co-developed-by: Stuart Henderson Signed-off-by: Stuart Henderson Co-developed-by: Qi Zhou Signed-off-by: Qi Zhou Co-developed-by: Piotr Stankiewicz Signed-off-by: Piotr Stankiewicz Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250415115016.505777-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs48l32.h | 47 ++++ include/sound/cs48l32_registers.h | 530 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 577 insertions(+) create mode 100644 include/sound/cs48l32.h create mode 100644 include/sound/cs48l32_registers.h (limited to 'include') diff --git a/include/sound/cs48l32.h b/include/sound/cs48l32.h new file mode 100644 index 000000000000..27b3e7cf999a --- /dev/null +++ b/include/sound/cs48l32.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Register definitions for Cirrus Logic CS48L32 + * + * Copyright (C) 2017-2018, 2020, 2022, 2025 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#ifndef CS48L32_H +#define CS48L32_H + +/* pll_id for snd_soc_component_set_pll() */ +#define CS48L32_FLL1_REFCLK 1 + +/* source for snd_soc_component_set_pll() */ +#define CS48L32_FLL_SRC_NONE -1 +#define CS48L32_FLL_SRC_MCLK1 0 +#define CS48L32_FLL_SRC_PDMCLK 5 +#define CS48L32_FLL_SRC_ASP1_BCLK 8 +#define CS48L32_FLL_SRC_ASP2_BCLK 9 +#define CS48L32_FLL_SRC_ASP1_FSYNC 12 +#define CS48L32_FLL_SRC_ASP2_FSYNC 13 + +/* clk_id for snd_soc_component_set_sysclk() and snd_soc_dai_set_sysclk() */ +#define CS48L32_CLK_SYSCLK_1 1 +#define CS48L32_CLK_SYSCLK_2 2 +#define CS48L32_CLK_SYSCLK_3 3 +#define CS48L32_CLK_SYSCLK_4 4 +#define CS48L32_CLK_DSPCLK 7 +#define CS48L32_CLK_PDM_FLLCLK 13 + +/* source for snd_soc_component_set_sysclk() */ +#define CS48L32_CLK_SRC_MCLK1 0x0 +#define CS48L32_CLK_SRC_FLL1 0x4 +#define CS48L32_CLK_SRC_ASP1_BCLK 0x8 +#define CS48L32_CLK_SRC_ASP2_BCLK 0x9 + +struct cs48l32 { + struct regmap *regmap; + struct device *dev; + struct gpio_desc *reset_gpio; + struct clk *mclk1; + struct regulator_bulk_data core_supplies[2]; + struct regulator *vdd_d; + int irq; +}; +#endif diff --git a/include/sound/cs48l32_registers.h b/include/sound/cs48l32_registers.h new file mode 100644 index 000000000000..f29410fdf76f --- /dev/null +++ b/include/sound/cs48l32_registers.h @@ -0,0 +1,530 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Register definitions for Cirrus Logic CS48L32 + * + * Copyright (C) 2017-2018, 2020, 2022, 2025 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#ifndef CS48L32_REGISTERS_H +#define CS48L32_REGISTERS_H + +/* Register Addresses. */ +#define CS48L32_DEVID 0x0 +#define CS48L32_REVID 0x4 +#define CS48L32_OTPID 0x10 +#define CS48L32_SFT_RESET 0x20 +#define CS48L32_CTRL_IF_DEBUG3 0xA8 +#define CS48L32_MCU_CTRL1 0x804 +#define CS48L32_GPIO1_CTRL1 0xc08 +#define CS48L32_GPIO3_CTRL1 0xc10 +#define CS48L32_GPIO7_CTRL1 0xc20 +#define CS48L32_GPIO16_CTRL1 0xc44 +#define CS48L32_OUTPUT_SYS_CLK 0x1020 +#define CS48L32_AUXPDM_CTRL 0x1044 +#define CS48L32_AUXPDM_CTRL2 0x105c +#define CS48L32_CLOCK32K 0x1400 +#define CS48L32_SYSTEM_CLOCK1 0x1404 +#define CS48L32_SYSTEM_CLOCK2 0x1408 +#define CS48L32_SAMPLE_RATE1 0x1420 +#define CS48L32_SAMPLE_RATE2 0x1424 +#define CS48L32_SAMPLE_RATE3 0x1428 +#define CS48L32_SAMPLE_RATE4 0x142c +#define CS48L32_DSP_CLOCK1 0x1510 +#define CS48L32_FLL1_CONTROL1 0x1c00 +#define CS48L32_FLL1_CONTROL5 0x1c10 +#define CS48L32_FLL1_CONTROL6 0x1c14 +#define CS48L32_FLL1_GPIO_CLOCK 0x1ca0 +#define CS48L32_CHARGE_PUMP1 0x2000 +#define CS48L32_LDO2_CTRL1 0x2408 +#define CS48L32_MICBIAS_CTRL1 0x2410 +#define CS48L32_MICBIAS_CTRL5 0x2418 +#define CS48L32_IRQ1_CTRL_AOD 0x2710 +#define CS48L32_AOD_PAD_CTRL 0x2718 +#define CS48L32_INPUT_CONTROL 0x4000 +#define CS48L32_INPUT_STATUS 0x4004 +#define CS48L32_INPUT_RATE_CONTROL 0x4008 +#define CS48L32_INPUT_CONTROL2 0x400c +#define CS48L32_INPUT_CONTROL3 0x4014 +#define CS48L32_INPUT1_CONTROL1 0x4020 +#define CS48L32_IN1L_CONTROL1 0x4024 +#define CS48L32_IN1L_CONTROL2 0x4028 +#define CS48L32_IN1R_CONTROL1 0x4044 +#define CS48L32_IN1R_CONTROL2 0x4048 +#define CS48L32_INPUT2_CONTROL1 0x4060 +#define CS48L32_IN2L_CONTROL1 0x4064 +#define CS48L32_IN2L_CONTROL2 0x4068 +#define CS48L32_IN2R_CONTROL1 0x4084 +#define CS48L32_IN2R_CONTROL2 0x4088 +#define CS48L32_INPUT_HPF_CONTROL 0x4244 +#define CS48L32_INPUT_VOL_CONTROL 0x4248 +#define CS48L32_AUXPDM_CONTROL1 0x4300 +#define CS48L32_AUXPDM_CONTROL2 0x4304 +#define CS48L32_AUXPDM1_CONTROL1 0x4308 +#define CS48L32_AUXPDM2_CONTROL1 0x4310 +#define CS48L32_ADC1L_ANA_CONTROL1 0x4688 +#define CS48L32_ADC1R_ANA_CONTROL1 0x468c +#define CS48L32_ASP1_ENABLES1 0x6000 +#define CS48L32_ASP1_CONTROL3 0x600C +#define CS48L32_ASP1_DATA_CONTROL5 0x6040 +#define CS48L32_ASP2_ENABLES1 0x6080 +#define CS48L32_ASP2_CONTROL3 0x608C +#define CS48L32_ASP2_DATA_CONTROL5 0x60c0 +#define CS48L32_ASP1TX1_INPUT1 0x8200 +#define CS48L32_ASP1TX2_INPUT1 0x8210 +#define CS48L32_ASP1TX3_INPUT1 0x8220 +#define CS48L32_ASP1TX4_INPUT1 0x8230 +#define CS48L32_ASP1TX5_INPUT1 0x8240 +#define CS48L32_ASP1TX6_INPUT1 0x8250 +#define CS48L32_ASP1TX7_INPUT1 0x8260 +#define CS48L32_ASP1TX8_INPUT1 0x8270 +#define CS48L32_ASP1TX8_INPUT4 0x827c +#define CS48L32_ASP2TX1_INPUT1 0x8300 +#define CS48L32_ASP2TX2_INPUT1 0x8310 +#define CS48L32_ASP2TX3_INPUT1 0x8320 +#define CS48L32_ASP2TX4_INPUT1 0x8330 +#define CS48L32_ASP2TX4_INPUT4 0x833c +#define CS48L32_ISRC1INT1_INPUT1 0x8980 +#define CS48L32_ISRC1INT2_INPUT1 0x8990 +#define CS48L32_ISRC1INT3_INPUT1 0x89a0 +#define CS48L32_ISRC1INT4_INPUT1 0x89b0 +#define CS48L32_ISRC1DEC1_INPUT1 0x89c0 +#define CS48L32_ISRC1DEC2_INPUT1 0x89d0 +#define CS48L32_ISRC1DEC3_INPUT1 0x89e0 +#define CS48L32_ISRC1DEC4_INPUT1 0x89f0 +#define CS48L32_ISRC2INT1_INPUT1 0x8a00 +#define CS48L32_ISRC2INT2_INPUT1 0x8a10 +#define CS48L32_ISRC2DEC1_INPUT1 0x8a40 +#define CS48L32_ISRC2DEC2_INPUT1 0x8a50 +#define CS48L32_ISRC3INT1_INPUT1 0x8a80 +#define CS48L32_ISRC3INT2_INPUT1 0x8a90 +#define CS48L32_ISRC3DEC1_INPUT1 0x8ac0 +#define CS48L32_ISRC3DEC2_INPUT1 0x8ad0 +#define CS48L32_EQ1_INPUT1 0x8b80 +#define CS48L32_EQ2_INPUT1 0x8b90 +#define CS48L32_EQ3_INPUT1 0x8ba0 +#define CS48L32_EQ4_INPUT1 0x8bb0 +#define CS48L32_EQ4_INPUT4 0x8bbc +#define CS48L32_DRC1L_INPUT1 0x8c00 +#define CS48L32_DRC1R_INPUT1 0x8c10 +#define CS48L32_DRC1R_INPUT4 0x8c1c +#define CS48L32_DRC2L_INPUT1 0x8c20 +#define CS48L32_DRC2R_INPUT1 0x8c30 +#define CS48L32_DRC2R_INPUT4 0x8c3c +#define CS48L32_LHPF1_INPUT1 0x8c80 +#define CS48L32_LHPF1_INPUT4 0x8c8c +#define CS48L32_LHPF2_INPUT1 0x8c90 +#define CS48L32_LHPF2_INPUT4 0x8c9c +#define CS48L32_LHPF3_INPUT1 0x8ca0 +#define CS48L32_LHPF3_INPUT4 0x8cac +#define CS48L32_LHPF4_INPUT1 0x8cb0 +#define CS48L32_LHPF4_INPUT4 0x8cbc +#define CS48L32_DSP1RX1_INPUT1 0x9000 +#define CS48L32_DSP1RX2_INPUT1 0x9010 +#define CS48L32_DSP1RX3_INPUT1 0x9020 +#define CS48L32_DSP1RX4_INPUT1 0x9030 +#define CS48L32_DSP1RX5_INPUT1 0x9040 +#define CS48L32_DSP1RX6_INPUT1 0x9050 +#define CS48L32_DSP1RX7_INPUT1 0x9060 +#define CS48L32_DSP1RX8_INPUT1 0x9070 +#define CS48L32_DSP1RX8_INPUT4 0x907c +#define CS48L32_ISRC1_CONTROL1 0xa400 +#define CS48L32_ISRC1_CONTROL2 0xa404 +#define CS48L32_ISRC2_CONTROL1 0xa510 +#define CS48L32_ISRC2_CONTROL2 0xa514 +#define CS48L32_ISRC3_CONTROL1 0xa620 +#define CS48L32_ISRC3_CONTROL2 0xa624 +#define CS48L32_FX_SAMPLE_RATE 0xa800 +#define CS48L32_EQ_CONTROL1 0xa808 +#define CS48L32_EQ_CONTROL2 0xa80c +#define CS48L32_EQ1_GAIN1 0xa810 +#define CS48L32_EQ1_GAIN2 0xa814 +#define CS48L32_EQ1_BAND1_COEFF1 0xa818 +#define CS48L32_EQ1_BAND1_COEFF2 0xa81c +#define CS48L32_EQ1_BAND1_PG 0xa820 +#define CS48L32_EQ1_BAND2_COEFF1 0xa824 +#define CS48L32_EQ1_BAND2_COEFF2 0xa828 +#define CS48L32_EQ1_BAND2_PG 0xa82c +#define CS48L32_EQ1_BAND3_COEFF1 0xa830 +#define CS48L32_EQ1_BAND3_COEFF2 0xa834 +#define CS48L32_EQ1_BAND3_PG 0xa838 +#define CS48L32_EQ1_BAND4_COEFF1 0xa83c +#define CS48L32_EQ1_BAND4_COEFF2 0xa840 +#define CS48L32_EQ1_BAND4_PG 0xa844 +#define CS48L32_EQ1_BAND5_COEFF1 0xa848 +#define CS48L32_EQ1_BAND5_PG 0xa850 +#define CS48L32_EQ2_GAIN1 0xa854 +#define CS48L32_EQ2_GAIN2 0xa858 +#define CS48L32_EQ2_BAND1_COEFF1 0xa85c +#define CS48L32_EQ2_BAND1_COEFF2 0xa860 +#define CS48L32_EQ2_BAND1_PG 0xa864 +#define CS48L32_EQ2_BAND2_COEFF1 0xa868 +#define CS48L32_EQ2_BAND2_COEFF2 0xa86c +#define CS48L32_EQ2_BAND2_PG 0xa870 +#define CS48L32_EQ2_BAND3_COEFF1 0xa874 +#define CS48L32_EQ2_BAND3_COEFF2 0xa878 +#define CS48L32_EQ2_BAND3_PG 0xa87c +#define CS48L32_EQ2_BAND4_COEFF1 0xa880 +#define CS48L32_EQ2_BAND4_COEFF2 0xa884 +#define CS48L32_EQ2_BAND4_PG 0xa888 +#define CS48L32_EQ2_BAND5_COEFF1 0xa88c +#define CS48L32_EQ2_BAND5_PG 0xa894 +#define CS48L32_EQ3_GAIN1 0xa898 +#define CS48L32_EQ3_GAIN2 0xa89c +#define CS48L32_EQ3_BAND1_COEFF1 0xa8a0 +#define CS48L32_EQ3_BAND1_COEFF2 0xa8a4 +#define CS48L32_EQ3_BAND1_PG 0xa8a8 +#define CS48L32_EQ3_BAND2_COEFF1 0xa8ac +#define CS48L32_EQ3_BAND2_COEFF2 0xa8b0 +#define CS48L32_EQ3_BAND2_PG 0xa8b4 +#define CS48L32_EQ3_BAND3_COEFF1 0xa8b8 +#define CS48L32_EQ3_BAND3_COEFF2 0xa8bc +#define CS48L32_EQ3_BAND3_PG 0xa8c0 +#define CS48L32_EQ3_BAND4_COEFF1 0xa8c4 +#define CS48L32_EQ3_BAND4_COEFF2 0xa8c8 +#define CS48L32_EQ3_BAND4_PG 0xa8cc +#define CS48L32_EQ3_BAND5_COEFF1 0xa8d0 +#define CS48L32_EQ3_BAND5_PG 0xa8d8 +#define CS48L32_EQ4_GAIN1 0xa8dc +#define CS48L32_EQ4_GAIN2 0xa8e0 +#define CS48L32_EQ4_BAND1_COEFF1 0xa8e4 +#define CS48L32_EQ4_BAND1_COEFF2 0xa8e8 +#define CS48L32_EQ4_BAND1_PG 0xa8ec +#define CS48L32_EQ4_BAND2_COEFF1 0xa8f0 +#define CS48L32_EQ4_BAND2_COEFF2 0xa8f4 +#define CS48L32_EQ4_BAND2_PG 0xa8f8 +#define CS48L32_EQ4_BAND3_COEFF1 0xa8fc +#define CS48L32_EQ4_BAND3_COEFF2 0xa900 +#define CS48L32_EQ4_BAND3_PG 0xa904 +#define CS48L32_EQ4_BAND4_COEFF1 0xa908 +#define CS48L32_EQ4_BAND4_COEFF2 0xa90c +#define CS48L32_EQ4_BAND4_PG 0xa910 +#define CS48L32_EQ4_BAND5_COEFF1 0xa914 +#define CS48L32_EQ4_BAND5_PG 0xa91c +#define CS48L32_LHPF_CONTROL1 0xaa30 +#define CS48L32_LHPF_CONTROL2 0xaa34 +#define CS48L32_LHPF1_COEFF 0xaa38 +#define CS48L32_LHPF2_COEFF 0xaa3c +#define CS48L32_LHPF3_COEFF 0xaa40 +#define CS48L32_LHPF4_COEFF 0xaa44 +#define CS48L32_DRC1_CONTROL1 0xab00 +#define CS48L32_DRC1_CONTROL4 0xab0c +#define CS48L32_DRC2_CONTROL1 0xab14 +#define CS48L32_DRC2_CONTROL4 0xab20 +#define CS48L32_TONE_GENERATOR1 0xb000 +#define CS48L32_TONE_GENERATOR2 0xb004 +#define CS48L32_COMFORT_NOISE_GENERATOR 0xb400 +#define CS48L32_US_CONTROL 0xb800 +#define CS48L32_US1_CONTROL 0xb804 +#define CS48L32_US1_DET_CONTROL 0xb808 +#define CS48L32_US2_CONTROL 0xb814 +#define CS48L32_US2_DET_CONTROL 0xb818 +#define CS48L32_DSP1_XM_SRAM_IBUS_SETUP_0 0x1700c +#define CS48L32_DSP1_XM_SRAM_IBUS_SETUP_1 0x17010 +#define CS48L32_DSP1_XM_SRAM_IBUS_SETUP_24 0x1706c +#define CS48L32_DSP1_YM_SRAM_IBUS_SETUP_0 0x17070 +#define CS48L32_DSP1_YM_SRAM_IBUS_SETUP_1 0x17074 +#define CS48L32_DSP1_YM_SRAM_IBUS_SETUP_8 0x17090 +#define CS48L32_DSP1_PM_SRAM_IBUS_SETUP_0 0x17094 +#define CS48L32_DSP1_PM_SRAM_IBUS_SETUP_1 0x17098 +#define CS48L32_DSP1_PM_SRAM_IBUS_SETUP_7 0x170b0 +#define CS48L32_IRQ1_STATUS 0x18004 +#define CS48L32_IRQ1_EINT_1 0x18010 +#define CS48L32_IRQ1_EINT_2 0x18014 +#define CS48L32_IRQ1_EINT_7 0x18028 +#define CS48L32_IRQ1_EINT_9 0x18030 +#define CS48L32_IRQ1_EINT_11 0x18038 +#define CS48L32_IRQ1_STS_1 0x18090 +#define CS48L32_IRQ1_STS_6 0x180a4 +#define CS48L32_IRQ1_STS_11 0x180b8 +#define CS48L32_IRQ1_MASK_1 0x18110 +#define CS48L32_IRQ1_MASK_2 0x18114 +#define CS48L32_IRQ1_MASK_7 0x18128 +#define CS48L32_IRQ1_MASK_9 0x18130 +#define CS48L32_IRQ1_MASK_11 0x18138 +#define CS48L32_DSP1_XMEM_PACKED_0 0x2000000 +#define CS48L32_DSP1_XMEM_PACKED_LAST 0x208fff0 +#define CS48L32_DSP1_SYS_INFO_ID 0x25e0000 +#define CS48L32_DSP1_AHBM_WINDOW_DEBUG_1 0x25e2044 +#define CS48L32_DSP1_XMEM_UNPACKED24_0 0x2800000 +#define CS48L32_DSP1_XMEM_UNPACKED24_LAST 0x28bfff4 +#define CS48L32_DSP1_CLOCK_FREQ 0x2b80000 +#define CS48L32_DSP1_SAMPLE_RATE_TX8 0x2b802b8 +#define CS48L32_DSP1_SCRATCH1 0x2b805c0 +#define CS48L32_DSP1_SCRATCH4 0x2b805d8 +#define CS48L32_DSP1_CCM_CORE_CONTROL 0x2bc1000 +#define CS48L32_DSP1_STREAM_ARB_RESYNC_MSK1 0x2bc5a00 +#define CS48L32_DSP1_YMEM_PACKED_0 0x2c00000 +#define CS48L32_DSP1_YMEM_PACKED_LAST 0x2c2fff0 +#define CS48L32_DSP1_YMEM_UNPACKED24_0 0x3400000 +#define CS48L32_DSP1_YMEM_UNPACKED24_LAST 0x343fff4 +#define CS48L32_DSP1_PMEM_0 0x3800000 +#define CS48L32_DSP1_PMEM_LAST 0x3845fe8 + +/* (0x0) DEVID */ +#define CS48L32_DEVID_MASK 0x00ffffff +#define CS48L32_DEVID_SHIFT 0 + +/* (0x4) REVID */ +#define CS48L32_AREVID_MASK 0x000000f0 +#define CS48L32_AREVID_SHIFT 4 +#define CS48L32_MTLREVID_MASK 0x0000000f +#define CS48L32_MTLREVID_SHIFT 0 + +/* (0x10) OTPID */ +#define CS48L32_OTPID_MASK 0x0000000f + +/* (0x0804) MCU_CTRL1 */ +#define CS48L32_MCU_STS_MASK 0x0000ff00 +#define CS48L32_MCU_STS_SHIFT 8 + +/* (0xc08) GPIO1_CTRL1 */ +#define CS48L32_GPIOX_CTRL1_FN_MASK 0x000003ff + +/* (0x1020) OUTPUT_SYS_CLK */ +#define CS48L32_OPCLK_EN_SHIFT 15 +#define CS48L32_OPCLK_DIV_MASK 0x000000f8 +#define CS48L32_OPCLK_DIV_SHIFT 3 +#define CS48L32_OPCLK_SEL_MASK 0x00000007 + +/* (0x105c) AUXPDM_CTRL2 */ +#define CS48L32_AUXPDMDAT2_SRC_SHIFT 4 +#define CS48L32_AUXPDMDAT1_SRC_SHIFT 0 + +/* (0x1400) CLOCK32K */ +#define CS48L32_CLK_32K_EN_MASK 0x00000040 +#define CS48L32_CLK_32K_SRC_MASK 0x00000003 + +/* (0x1404) SYSTEM_CLOCK1 */ +#define CS48L32_SYSCLK_FRAC_MASK 0x00008000 +#define CS48L32_SYSCLK_FREQ_MASK 0x00000700 +#define CS48L32_SYSCLK_FREQ_SHIFT 8 +#define CS48L32_SYSCLK_EN_SHIFT 6 +#define CS48L32_SYSCLK_SRC_MASK 0x0000001f +#define CS48L32_SYSCLK_SRC_SHIFT 0 + +/* (0x1408) SYSTEM_CLOCK2 */ +#define CS48L32_SYSCLK_FREQ_STS_MASK 0x00000700 +#define CS48L32_SYSCLK_FREQ_STS_SHIFT 8 + +/* (0x1420) SAMPLE_RATE1 */ +#define CS48L32_SAMPLE_RATE_1_MASK 0x0000001f +#define CS48L32_SAMPLE_RATE_1_SHIFT 0 + +/* (0x1510) DSP_CLOCK1 */ +#define CS48L32_DSP_CLK_FREQ_MASK 0xffff0000 +#define CS48L32_DSP_CLK_FREQ_SHIFT 16 + +/* (0x1c00) FLL_CONTROL1 */ +#define CS48L32_FLL_CTRL_UPD_MASK 0x00000004 +#define CS48L32_FLL_HOLD_MASK 0x00000002 +#define CS48L32_FLL_EN_MASK 0x00000001 + +/* (0x1c04) FLL_CONTROL2 */ +#define CS48L32_FLL_LOCKDET_THR_MASK 0xf0000000 +#define CS48L32_FLL_LOCKDET_THR_SHIFT 28 +#define CS48L32_FLL_LOCKDET_MASK 0x08000000 +#define CS48L32_FLL_PHASEDET_MASK 0x00400000 +#define CS48L32_FLL_PHASEDET_SHIFT 22 +#define CS48L32_FLL_REFCLK_DIV_MASK 0x00030000 +#define CS48L32_FLL_REFCLK_DIV_SHIFT 16 +#define CS48L32_FLL_REFCLK_SRC_MASK 0x0000f000 +#define CS48L32_FLL_REFCLK_SRC_SHIFT 12 +#define CS48L32_FLL_N_MASK 0x000003ff +#define CS48L32_FLL_N_SHIFT 0 + +/* (0x1c08) FLL_CONTROL3 */ +#define CS48L32_FLL_LAMBDA_MASK 0xffff0000 +#define CS48L32_FLL_LAMBDA_SHIFT 16 +#define CS48L32_FLL_THETA_MASK 0x0000ffff +#define CS48L32_FLL_THETA_SHIFT 0 + +/* (0x1c0c) FLL_CONTROL4 */ +#define CS48L32_FLL_FD_GAIN_COARSE_SHIFT 16 +#define CS48L32_FLL_HP_MASK 0x00003000 +#define CS48L32_FLL_HP_SHIFT 12 +#define CS48L32_FLL_FB_DIV_MASK 0x000003ff +#define CS48L32_FLL_FB_DIV_SHIFT 0 + +/* (0x1c10) FLL_CONTROL5 */ +#define CS48L32_FLL_FRC_INTEG_UPD_MASK 0x00008000 + +/* (0x2000) CHARGE_PUMP1 */ +#define CS48L32_CP2_BYPASS_SHIFT 1 +#define CS48L32_CP2_EN_SHIFT 0 + +/* (0x2408) LDO2_CTRL1 */ +#define CS48L32_LDO2_VSEL_MASK 0x000007e0 +#define CS48L32_LDO2_VSEL_SHIFT 5 + +/* (0x2410) MICBIAS_CTRL1 */ +#define CS48L32_MICB1_LVL_MASK 0x000001e0 +#define CS48L32_MICB1_LVL_SHIFT 5 +#define CS48L32_MICB1_EN_SHIFT 0 + +/* (0x2418) MICBIAS_CTRL5 */ +#define CS48L32_MICB1C_EN_SHIFT 8 +#define CS48L32_MICB1B_EN_SHIFT 4 +#define CS48L32_MICB1A_EN_SHIFT 0 + +/* (0x2710) IRQ1_CTRL_AOD */ +#define CS48L32_IRQ_POL_MASK 0x00000400 + +/* (0x4000) INPUT_CONTROL */ +#define CS48L32_IN2L_EN_SHIFT 3 +#define CS48L32_IN2R_EN_SHIFT 2 +#define CS48L32_IN1L_EN_SHIFT 1 +#define CS48L32_IN1R_EN_SHIFT 0 + +/* (0x400c) INPUT_CONTROL2 */ +#define CS48L32_PDM_FLLCLK_SRC_MASK 0x0000000f +#define CS48L32_PDM_FLLCLK_SRC_SHIFT 0 + +/* (0x4014) INPUT_CONTROL3 */ +#define CS48L32_IN_VU 0x20000000 +#define CS48L32_IN_VU_MASK 0x20000000 +#define CS48L32_IN_VU_SHIFT 29 +#define CS48L32_IN_VU_WIDTH 1 + +/* (0x4020) INPUT1_CONTROL1 */ +#define CS48L32_IN1_OSR_SHIFT 16 +#define CS48L32_IN1_PDM_SUP_MASK 0x00000300 +#define CS48L32_IN1_PDM_SUP_SHIFT 8 +#define CS48L32_IN1_MODE_SHIFT 0 + +/* + * (0x4024) IN1L_CONTROL1 + * (0x4044) IN1R_CONTROL1 + */ +#define CS48L32_INx_SRC_MASK 0x30000000 +#define CS48L32_INx_SRC_SHIFT 28 +#define CS48L32_INx_RATE_MASK 0x0000f800 +#define CS48L32_INx_RATE_SHIFT 11 +#define CS48L32_INx_HPF_SHIFT 2 +#define CS48L32_INx_LP_MODE_SHIFT 0 + +/* + * (0x4028) IN1L_CONTROL2 + * (0x4048) IN1R_CONTROL2 + */ +#define CS48L32_INx_MUTE_MASK 0x10000000 +#define CS48L32_INx_VOL_SHIFT 16 +#define CS48L32_INx_PGA_VOL_SHIFT 1 + +/* (0x4244) INPUT_HPF_CONTROL */ +#define CS48L32_IN_HPF_CUT_SHIFT 0 + +/* (0x4248) INPUT_VOL_CONTROL */ +#define CS48L32_IN_VD_RAMP_SHIFT 4 +#define CS48L32_IN_VI_RAMP_SHIFT 0 + +/* (0x4308) AUXPDM1_CONTROL1 */ +#define CS48L32_AUXPDM1_FREQ_SHIFT 16 +#define CS48L32_AUXPDM1_SRC_MASK 0x00000f00 +#define CS48L32_AUXPDM1_SRC_SHIFT 8 + +/* (0x4688) ADC1L_ANA_CONTROL1 */ +/* (0x468c) ADC1R_ANA_CONTROL1 */ +#define CS48L32_ADC1x_INT_ENA_FRC_MASK 0x00000002 + +/* (0x6004) ASPn_CONTROL1 */ +#define CS48L32_ASP_RATE_MASK 0x00001f00 +#define CS48L32_ASP_RATE_SHIFT 8 +#define CS48L32_ASP_BCLK_FREQ_MASK 0x0000003f + +/* (0x6008) ASPn_CONTROL2 */ +#define CS48L32_ASP_RX_WIDTH_MASK 0xff000000 +#define CS48L32_ASP_RX_WIDTH_SHIFT 24 +#define CS48L32_ASP_TX_WIDTH_MASK 0x00ff0000 +#define CS48L32_ASP_TX_WIDTH_SHIFT 16 +#define CS48L32_ASP_FMT_MASK 0x00000700 +#define CS48L32_ASP_FMT_SHIFT 8 +#define CS48L32_ASP_BCLK_INV_MASK 0x00000040 +#define CS48L32_ASP_BCLK_MSTR_MASK 0x00000010 +#define CS48L32_ASP_FSYNC_INV_MASK 0x00000004 +#define CS48L32_ASP_FSYNC_MSTR_MASK 0x00000001 + +/* (0x6010) ASPn_CONTROL3 */ +#define CS48L32_ASP_DOUT_HIZ_MASK 0x00000003 + +/* (0x6030) ASPn_DATA_CONTROL1 */ +#define CS48L32_ASP_TX_WL_MASK 0x0000003f + +/* (0x6040) ASPn_DATA_CONTROL5 */ +#define CS48L32_ASP_RX_WL_MASK 0x0000003f + +/* (0x82xx - 0x90xx) *_INPUT[1-4] */ +#define CS48L32_MIXER_VOL_MASK 0x00FE0000 +#define CS48L32_MIXER_VOL_SHIFT 17 +#define CS48L32_MIXER_VOL_WIDTH 7 +#define CS48L32_MIXER_SRC_MASK 0x000001ff +#define CS48L32_MIXER_SRC_SHIFT 0 +#define CS48L32_MIXER_SRC_WIDTH 9 + +/* (0xa400) ISRC1_CONTROL1 */ +#define CS48L32_ISRC1_FSL_MASK 0xf8000000 +#define CS48L32_ISRC1_FSL_SHIFT 27 +#define CS48L32_ISRC1_FSH_MASK 0x0000f800 +#define CS48L32_ISRC1_FSH_SHIFT 11 + +/* (0xa404) ISRC1_CONTROL2 */ +#define CS48L32_ISRC1_INT4_EN_SHIFT 11 +#define CS48L32_ISRC1_INT3_EN_SHIFT 10 +#define CS48L32_ISRC1_INT2_EN_SHIFT 9 +#define CS48L32_ISRC1_INT1_EN_SHIFT 8 +#define CS48L32_ISRC1_DEC4_EN_SHIFT 3 +#define CS48L32_ISRC1_DEC3_EN_SHIFT 2 +#define CS48L32_ISRC1_DEC2_EN_SHIFT 1 +#define CS48L32_ISRC1_DEC1_EN_SHIFT 0 + +/* (0xa800) FX_SAMPLE_RATE */ +#define CS48L32_FX_RATE_MASK 0x0000f800 +#define CS48L32_FX_RATE_SHIFT 11 + +/* (0xab00) DRC1_CONTROL1 */ +#define CS48L32_DRC1L_EN_SHIFT 1 +#define CS48L32_DRC1R_EN_SHIFT 0 + +/* (0xb400) Comfort_Noise_Generator */ +#define CS48L32_NOISE_GEN_RATE_MASK 0x0000f800 +#define CS48L32_NOISE_GEN_RATE_SHIFT 11 +#define CS48L32_NOISE_GEN_EN_SHIFT 5 +#define CS48L32_NOISE_GEN_GAIN_SHIFT 0 + +/* (0xb800) US_CONTROL */ +#define CS48L32_US1_DET_EN_SHIFT 8 + +/* (0xb804) US1_CONTROL */ +#define CS48L32_US1_RATE_MASK 0xf8000000 +#define CS48L32_US1_RATE_SHIFT 27 +#define CS48L32_US1_GAIN_SHIFT 12 +#define CS48L32_US1_SRC_MASK 0x00000f00 +#define CS48L32_US1_SRC_SHIFT 8 +#define CS48L32_US1_FREQ_MASK 0x00000070 +#define CS48L32_US1_FREQ_SHIFT 4 + +/* (0xb808) US1_DET_CONTROL */ +#define CS48L32_US1_DET_DCY_SHIFT 28 +#define CS48L32_US1_DET_HOLD_SHIFT 24 +#define CS48L32_US1_DET_NUM_SHIFT 20 +#define CS48L32_US1_DET_THR_SHIFT 16 +#define CS48L32_US1_DET_LPF_CUT_SHIFT 5 +#define CS48L32_US1_DET_LPF_SHIFT 4 + +/* (0x18004) IRQ1_STATUS */ +#define CS48L32_IRQ1_STS_MASK 0x00000001 + +/* (0x18014) IRQ1_EINT_2 */ +#define CS48L32_BOOT_DONE_EINT1_MASK 0x00000008 + +/* (0x18028) IRQ1_EINT_7 */ +#define CS48L32_DSP1_MPU_ERR_EINT1_MASK 0x00200000 +#define CS48L32_DSP1_WDT_EXPIRE_EINT1_MASK 0x00100000 + +/* (0x18030) IRQ1_EINT_9 */ +#define CS48L32_DSP1_IRQ0_EINT1_MASK 0x00000001 + +/* (0x180a4) IRQ1_STS_6 */ +#define CS48L32_FLL1_LOCK_STS1_MASK 0x00000001 + +#endif -- cgit v1.2.3 From eaa0d30216c1d54b157ea0ad7f35ba76f6e9a825 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 18 Feb 2025 20:29:46 +0100 Subject: driver core: auxiliary bus: add device creation helpers Add helper functions to create a device on the auxiliary bus. This is meant for fairly simple usage of the auxiliary bus, to avoid having the same code repeated in the different drivers. Suggested-by: Stephen Boyd Cc: Arnd Bergmann Signed-off-by: Jerome Brunet Reviewed-by: Greg Kroah-Hartman Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/20250218-aux-device-create-helper-v4-1-c3d7dfdea2e6@baylibre.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 65dd7f154374..4086afd0cc6b 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -254,6 +254,23 @@ int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, struct module * void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); +struct auxiliary_device *auxiliary_device_create(struct device *dev, + const char *modname, + const char *devname, + void *platform_data, + int id); +void auxiliary_device_destroy(void *auxdev); + +struct auxiliary_device *__devm_auxiliary_device_create(struct device *dev, + const char *modname, + const char *devname, + void *platform_data, + int id); + +#define devm_auxiliary_device_create(dev, devname, platform_data) \ + __devm_auxiliary_device_create(dev, KBUILD_MODNAME, devname, \ + platform_data, 0) + /** * module_auxiliary_driver() - Helper macro for registering an auxiliary driver * @__auxiliary_driver: auxiliary driver struct -- cgit v1.2.3 From 981527828c301644bc4014faa9c523e8a5e32a32 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Sat, 12 Apr 2025 12:18:37 +0300 Subject: platform/mellanox: Rename field to improve code readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename field 'counter' in 'mlxreg_core_hotplug_platform_data' to count. Signed-off-by: Vadim Pasternak Link: https://lore.kernel.org/r/20250412091843.33943-2-vadimp@nvidia.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/mlxreg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index 0b9f81a6f753..f6cca7a035c7 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -209,7 +209,7 @@ struct mlxreg_core_platform_data { * @items: same type components with the hotplug capability; * @irq: platform interrupt number; * @regmap: register map of parent device; - * @counter: number of the components with the hotplug capability; + * @count: number of the components with the hotplug capability; * @cell: location of top aggregation interrupt register; * @mask: top aggregation interrupt common mask; * @cell_low: location of low aggregation interrupt register; @@ -224,7 +224,7 @@ struct mlxreg_core_hotplug_platform_data { struct mlxreg_core_item *items; int irq; void *regmap; - int counter; + int count; u32 cell; u32 mask; u32 cell_low; -- cgit v1.2.3 From 6e83166dd8003e8611f253426b85e0c3d933e1c2 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sun, 13 Apr 2025 11:34:32 +0200 Subject: mptcp: sched: remove mptcp_sched_data This is a follow-up of commit b68b106b0f15 ("mptcp: sched: reduce size for unused data"), now removing the mptcp_sched_data structure. Now is a good time to do that, because the previously mentioned WIP work has been updated, no longer depending on this structure. Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250413-net-next-mptcp-sched-mib-sft-misc-v2-1-0f83a4350150@kernel.org Signed-off-by: Jakub Kicinski --- include/net/mptcp.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index bfbad695951c..f7263fe2a2e4 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -101,18 +101,9 @@ struct mptcp_out_options { #define MPTCP_SCHED_MAX 128 #define MPTCP_SCHED_BUF_MAX (MPTCP_SCHED_NAME_MAX * MPTCP_SCHED_MAX) -#define MPTCP_SUBFLOWS_MAX 8 - -struct mptcp_sched_data { - u8 subflows; - struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX]; -}; - struct mptcp_sched_ops { - int (*get_send)(struct mptcp_sock *msk, - struct mptcp_sched_data *data); - int (*get_retrans)(struct mptcp_sock *msk, - struct mptcp_sched_data *data); + int (*get_send)(struct mptcp_sock *msk); + int (*get_retrans)(struct mptcp_sock *msk); char name[MPTCP_SCHED_NAME_MAX]; struct module *owner; -- cgit v1.2.3 From f99564688f38458d86b64f099ebf03f19517cf77 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 13 Apr 2025 16:09:40 +0200 Subject: net: phy: remove device_phy_find_device AFAICS this function has never had a user. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/ab7b8094-2eea-4e82-a047-fd60117f220b@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index a2bfae80c449..fb755358d965 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1757,7 +1757,6 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id); struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode); struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode); -struct phy_device *device_phy_find_device(struct device *dev); struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); @@ -1779,11 +1778,6 @@ struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode) return NULL; } -static inline struct phy_device *device_phy_find_device(struct device *dev) -{ - return NULL; -} - static inline struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode) { -- cgit v1.2.3 From 95d06e92a401928fe46fda7616e460f39cb7211b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 14 Apr 2025 06:24:07 -0700 Subject: netlink: Introduce nlmsg_payload helper Create a new helper function, nlmsg_payload(), to simplify checking and retrieving Netlink message payloads. This reduces boilerplate code for users who need to verify the message length before accessing its data. Suggested-by: Jakub Kicinski Signed-off-by: Breno Leitao Reviewed-by: Jakub Kicinski Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250414-nlmsg-v2-1-3d90cb42c6af@debian.org Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 29e0db940382..82e07e272290 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -611,6 +611,22 @@ static inline int nlmsg_len(const struct nlmsghdr *nlh) return nlh->nlmsg_len - NLMSG_HDRLEN; } +/** + * nlmsg_payload - message payload if the data fits in the len + * @nlh: netlink message header + * @len: struct length + * + * Returns: The netlink message payload/data if the length is sufficient, + * otherwise NULL. + */ +static inline void *nlmsg_payload(const struct nlmsghdr *nlh, size_t len) +{ + if (nlh->nlmsg_len < nlmsg_msg_size(len)) + return NULL; + + return nlmsg_data(nlh); +} + /** * nlmsg_attrdata - head of attributes data * @nlh: netlink message header -- cgit v1.2.3 From 97d06802d10a2827ef46fd31789a26117ce7f3d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 13 Mar 2025 16:57:45 +0100 Subject: sysfs: constify bin_attribute argument of bin_attribute::read/write() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All callback implementers have been moved to the const variant of the callbacks. The signature of the original callbacks can now be changed. Also remove the now unnecessary transition machinery inside __BIN_ATTR(). Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250313-sysfs-const-bin_attr-final-v2-1-96284e1e88ce@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 18f7e1fd093c..576b8b3c60af 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -306,11 +306,11 @@ struct bin_attribute { size_t size; void *private; struct address_space *(*f_mapping)(void); - ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *, + ssize_t (*read)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); ssize_t (*read_new)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); - ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, + ssize_t (*write)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); ssize_t (*write_new)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); @@ -332,28 +332,11 @@ struct bin_attribute { */ #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr) -typedef ssize_t __sysfs_bin_rw_handler_new(struct file *, struct kobject *, - const struct bin_attribute *, char *, loff_t, size_t); - /* macros to create static binary attributes easier */ #define __BIN_ATTR(_name, _mode, _read, _write, _size) { \ .attr = { .name = __stringify(_name), .mode = _mode }, \ - .read = _Generic(_read, \ - __sysfs_bin_rw_handler_new * : NULL, \ - default : _read \ - ), \ - .read_new = _Generic(_read, \ - __sysfs_bin_rw_handler_new * : _read, \ - default : NULL \ - ), \ - .write = _Generic(_write, \ - __sysfs_bin_rw_handler_new * : NULL, \ - default : _write \ - ), \ - .write_new = _Generic(_write, \ - __sysfs_bin_rw_handler_new * : _write, \ - default : NULL \ - ), \ + .read = _read, \ + .write = _write, \ .size = _size, \ } -- cgit v1.2.3 From 9bec944506faf10d6274c75e3a55bc107b75cea3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 13 Mar 2025 16:57:46 +0100 Subject: sysfs: constify attribute_group::bin_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All users of this field have been migrated to bin_attrs_new. It can now be constified. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250313-sysfs-const-bin_attr-final-v2-2-96284e1e88ce@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 576b8b3c60af..f418aae4f113 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -107,7 +107,7 @@ struct attribute_group { int); struct attribute **attrs; union { - struct bin_attribute **bin_attrs; + const struct bin_attribute *const *bin_attrs; const struct bin_attribute *const *bin_attrs_new; }; }; -- cgit v1.2.3 From 2af781a9edc4ef5f6684c0710cc3542d9be48b31 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 10 Apr 2025 17:27:12 +0200 Subject: PCI: pciehp: Ignore Link Down/Up caused by Secondary Bus Reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a Secondary Bus Reset is issued at a hotplug port, it causes a Data Link Layer State Changed event as a side effect. On hotplug ports using in-band presence detect, it additionally causes a Presence Detect Changed event. These spurious events should not result in teardown and re-enumeration of the device in the slot. Hence commit 2e35afaefe64 ("PCI: pciehp: Add reset_slot() method") masked the Presence Detect Changed Enable bit in the Slot Control register during a Secondary Bus Reset. Commit 06a8d89af551 ("PCI: pciehp: Disable link notification across slot reset") additionally masked the Data Link Layer State Changed Enable bit. However masking those bits only disables interrupt generation (PCIe r6.2 sec 6.7.3.1). The events are still visible in the Slot Status register and picked up by the IRQ handler if it runs during a Secondary Bus Reset. This can happen if the interrupt is shared or if an unmasked hotplug event occurs, e.g. Attention Button Pressed or Power Fault Detected. The likelihood of this happening used to be small, so it wasn't much of a problem in practice. That has changed with the recent introduction of bandwidth control in v6.13-rc1 with commit 665745f27487 ("PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller"): Bandwidth control shares the interrupt with PCIe hotplug. A Secondary Bus Reset causes a Link Bandwidth Notification, so the hotplug IRQ handler runs, picks up the masked events and tears down the device in the slot. As a result, Joel reports VFIO passthrough failure of a GPU, which Ilpo root-caused to the incorrect handling of masked hotplug events. Clearly, a more reliable way is needed to ignore spurious hotplug events. For Downstream Port Containment, a new ignore mechanism was introduced by commit a97396c6eb13 ("PCI: pciehp: Ignore Link Down/Up caused by DPC"). It has been working reliably for the past four years. Adapt it for Secondary Bus Resets. Introduce two helpers to annotate code sections which cause spurious link changes: pci_hp_ignore_link_change() and pci_hp_unignore_link_change() Use those helpers in lieu of masking interrupts in the Slot Control register. Introduce a helper to check whether such a code section is executing concurrently and if so, await it: pci_hp_spurious_link_change() Invoke the helper in the hotplug IRQ thread pciehp_ist(). Re-use the IRQ thread's existing code which ignores DPC-induced link changes unless the link is unexpectedly down after reset recovery or the device was replaced during the bus reset. That code block in pciehp_ist() was previously only executed if a Data Link Layer State Changed event has occurred. Additionally execute it for Presence Detect Changed events. That's necessary for compatibility with PCIe r1.0 hotplug ports because Data Link Layer State Changed didn't exist before PCIe r1.1. DPC was added with PCIe r3.1 and thus DPC-capable hotplug ports always support Data Link Layer State Changed events. But the same cannot be assumed for Secondary Bus Reset, which already existed in PCIe r1.0. Secondary Bus Reset is only one of many causes of spurious link changes. Others include runtime suspend to D3cold, firmware updates or FPGA reconfiguration. The new pci_hp_{,un}ignore_link_change() helpers may be used by all kinds of drivers to annotate such code sections, hence their declarations are publicly visible in . A case in point is the Mellanox Ethernet driver which disables a firmware reset feature if the Ethernet card is attached to a hotplug port, see commit 3d7a3f2612d7 ("net/mlx5: Nack sync reset request when HotPlug is enabled"). Going forward, PCIe hotplug will be able to cope gracefully with all such use cases once the code sections are properly annotated. The new helpers internally use two bits in struct pci_dev's priv_flags as well as a wait_queue. This mirrors what was done for DPC by commit a97396c6eb13 ("PCI: pciehp: Ignore Link Down/Up caused by DPC"). That may be insufficient if spurious link changes are caused by multiple sources simultaneously. An example might be a Secondary Bus Reset issued by AER during FPGA reconfiguration. If this turns out to happen in real life, support for it can easily be added by replacing the PCI_LINK_CHANGING flag with an atomic_t counter incremented by pci_hp_ignore_link_change() and decremented by pci_hp_unignore_link_change(). Instead of awaiting a zero PCI_LINK_CHANGING flag, the pci_hp_spurious_link_change() helper would then simply await a zero counter. Fixes: 665745f27487 ("PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller") Reported-by: Joel Mathew Thomas Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219765 Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Tested-by: Joel Mathew Thomas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/d04deaf49d634a2edf42bf3c06ed81b4ca54d17b.1744298239.git.lukas@wunner.de --- include/linux/pci.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e96..833b54f3ce6d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1848,6 +1848,14 @@ static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } #endif +#ifdef CONFIG_HOTPLUG_PCI +void pci_hp_ignore_link_change(struct pci_dev *pdev); +void pci_hp_unignore_link_change(struct pci_dev *pdev); +#else +static inline void pci_hp_ignore_link_change(struct pci_dev *pdev) { } +static inline void pci_hp_unignore_link_change(struct pci_dev *pdev) { } +#endif + #ifdef CONFIG_PCIEAER bool pci_aer_available(void); #else -- cgit v1.2.3 From 7c571ac57d9d97190dcba18212fabf99888b0c48 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 14 Apr 2025 14:26:30 -0700 Subject: net: ptp: introduce .supported_extts_flags to ptp_clock_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PTP_EXTTS_REQUEST(2) ioctl has a flags field which specifies how the external timestamp request should behave. This includes which edge of the signal to timestamp, as well as a specialized "offset" mode. It is expected that more flags will be added in the future. Driver authors routinely do not check the flags, often accepting requests with flags which they do not support. Even drivers which do check flags may not be future-proofed to reject flags not yet defined. Thus, any future flag additions often require manually updating drivers to reject these flags. This approach of hoping we catch flag checks during review, or playing whack-a-mole after the fact is the wrong approach. Introduce the "supported_extts_flags" field to the ptp_clock_info structure. This field defines the set of flags the device actually supports. Update the core character device logic to check this field and reject unsupported requests. Getting this right is somewhat tricky. First, to avoid unnecessary repetition and make basic functionality work when .supported_extts_flags is 0, the core always accepts the PTP_ENABLE_FEATURE flag. This flag is used to set the 'on' parameter to the .enable function and is thus always 'supported' by all drivers. For backwards compatibility, the PTP_RISING_EDGE and PTP_FALLING_EDGE flags are merely "hints" when using the old PTP_EXTTS_REQUEST ioctl, and are not expected to be enforced. If the user issues PTP_EXTTS_REQUEST2, the PTP_STRICT_FLAGS flag is added which is supposed to inform the driver to strictly validate the flags and reject unsupported requests. To handle this, first check if the driver reports PTP_STRICT_FLAGS support. If it does not, then always allow the PTP_RISING_EDGE and PTP_FALLING_EDGE flags. This keeps backwards compatibility with the original PTP_EXTTS_REQUEST ioctl where these flags are not guaranteed to be honored. This way, drivers which do not set the supported_extts_flags will continue to accept requests for the original PTP_EXTTS_REQUEST ioctl. The core will automatically reject requests with new flags, and correctly reject requests with PTP_STRICT_FLAGS, where the driver is supposed to strictly validate the flags. Update the various drivers, refactoring their validation logic into the .supported_extts_flags field. For consistency and readability, PTP_ENABLE_FEATURE is not set in the supported flags list, and PTP_EXTTS_EDGES is expanded to PTP_RISING_EDGE | PTP_FALLING_EDGE in all cases. Note the following driver files set n_ext_ts to a non-zero value but did not check flags at all: • drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c • drivers/net/ethernet/freescale/enetc/enetc_ptp.c • drivers/net/ethernet/intel/i40e/i40e_ptp.c • drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c • drivers/net/ethernet/renesas/ravb_ptp.c • drivers/net/ethernet/renesas/rtsn.c • drivers/net/ethernet/renesas/rtsn.h • drivers/net/ethernet/ti/am65-cpts.c • drivers/net/ethernet/ti/cpts.h • drivers/net/ethernet/ti/icssg/icss_iep.c • drivers/net/ethernet/xscale/ptp_ixp46x.c • drivers/net/phy/bcm-phy-ptp.c • drivers/ptp/ptp_ocp.c • drivers/ptp/ptp_pch.c • drivers/ptp/ptp_qoriq.c These drivers behavior does change slightly: they will now reject the PTP_EXTTS_REQUEST2 ioctl, because they do not strictly validate their flags. This also makes them no longer incorrectly accept PTP_EXT_OFFSET. Also note that the renesas ravb driver does not support PTP_STRICT_FLAGS. We could leave the .supported_extts_flags as 0, but I added the PTP_RISING_EDGE | PTP_FALLING_EDGE since the driver previously manually validated these flags. This is equivalent to 0 because the core will allow these flags regardless unless PTP_STRICT_FLAGS is also set. Signed-off-by: Jacob Keller Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20250414-jk-supported-perout-flags-v2-1-f6b17d15475c@intel.com Signed-off-by: Jakub Kicinski --- include/linux/ptp_clock_kernel.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 0d68d09bedd1..25cba2e5ee69 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -68,6 +68,17 @@ struct ptp_system_timestamp { * @n_per_out: The number of programmable periodic signals. * @n_pins: The number of programmable pins. * @pps: Indicates whether the clock supports a PPS callback. + * + * @supported_extts_flags: The set of flags the driver supports for the + * PTP_EXTTS_REQUEST ioctl. The PTP core will use + * this list to reject unsupported requests. + * PTP_ENABLE_FEATURE is assumed and does not need to + * be included. If PTP_STRICT_FLAGS is *not* set, + * then both PTP_RISING_EDGE and PTP_FALLING_EDGE + * will be assumed. Note that PTP_STRICT_FLAGS must + * be set if the drivers wants to honor + * PTP_EXTTS_REQUEST2 and any future flags. + * * @pin_config: Array of length 'n_pins'. If the number of * programmable pins is nonzero, then drivers must * allocate and initialize this array. @@ -174,6 +185,7 @@ struct ptp_clock_info { int n_per_out; int n_pins; int pps; + unsigned int supported_extts_flags; struct ptp_pin_desc *pin_config; int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); int (*adjphase)(struct ptp_clock_info *ptp, s32 phase); -- cgit v1.2.3 From d9f3e9ecc4562ae07aaf614cf0a6690ef7ca0e10 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 14 Apr 2025 14:26:31 -0700 Subject: net: ptp: introduce .supported_perout_flags to ptp_clock_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PTP_PEROUT_REQUEST2 ioctl has gained support for flags specifying specific output behavior including PTP_PEROUT_ONE_SHOT, PTP_PEROUT_DUTY_CYCLE, PTP_PEROUT_PHASE. Driver authors are notorious for not checking the flags of the request. This results in misinterpreting the request, generating an output signal that does not match the requested value. It is anticipated that even more flags will be added in the future, resulting in even more broken requests. Expecting these issues to be caught during review or playing whack-a-mole after the fact is not a great solution. Instead, introduce the supported_perout_flags field in the ptp_clock_info structure. Update the core character device logic to explicitly reject any request which has a flag not on this list. This ensures that drivers must 'opt in' to the flags they support. Drivers which don't set the .supported_perout_flags field will not need to check that unsupported flags aren't passed, as the core takes care of this. Update the drivers which do support flags to set this new field. Note the following driver files set n_per_out to a non-zero value but did not check the flags at all: • drivers/ptp/ptp_clockmatrix.c • drivers/ptp/ptp_idt82p33.c • drivers/ptp/ptp_fc3.c • drivers/net/ethernet/ti/am65-cpts.c • drivers/net/ethernet/aquantia/atlantic/aq_ptp.c • drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c • drivers/net/dsa/sja1105/sja1105_ptp.c • drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c • drivers/net/ethernet/mscc/ocelot_vsc7514.c • drivers/net/ethernet/intel/i40e/i40e_ptp.c Reviewed-by: Vadim Fedorenko Signed-off-by: Jacob Keller Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20250414-jk-supported-perout-flags-v2-2-f6b17d15475c@intel.com Signed-off-by: Jakub Kicinski --- include/linux/ptp_clock_kernel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 25cba2e5ee69..eced7e9bf69a 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -69,6 +69,11 @@ struct ptp_system_timestamp { * @n_pins: The number of programmable pins. * @pps: Indicates whether the clock supports a PPS callback. * + * @supported_perout_flags: The set of flags the driver supports for the + * PTP_PEROUT_REQUEST ioctl. The PTP core will + * reject a request with any flag not specified + * here. + * * @supported_extts_flags: The set of flags the driver supports for the * PTP_EXTTS_REQUEST ioctl. The PTP core will use * this list to reject unsupported requests. @@ -185,6 +190,7 @@ struct ptp_clock_info { int n_per_out; int n_pins; int pps; + unsigned int supported_perout_flags; unsigned int supported_extts_flags; struct ptp_pin_desc *pin_config; int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); -- cgit v1.2.3 From 64929fe8c0a43508eee952cf57903a61c52601e7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 12 Apr 2025 13:36:55 +0800 Subject: crypto: acomp - Remove request chaining Request chaining requires the user to do too much book keeping. Remove it from acomp. Signed-off-by: Herbert Xu --- include/crypto/acompress.h | 14 -------------- include/crypto/internal/acompress.h | 5 ----- 2 files changed, 19 deletions(-) (limited to 'include') diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index f0e01ff77d92..080e134df35c 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -52,10 +52,6 @@ struct acomp_req; struct folio; struct acomp_req_chain { - struct list_head head; - struct acomp_req *req0; - struct acomp_req *cur; - int (*op)(struct acomp_req *req); crypto_completion_t compl; void *data; struct scatterlist ssg; @@ -68,8 +64,6 @@ struct acomp_req_chain { u8 *dst; struct folio *dfolio; }; - size_t soff; - size_t doff; u32 flags; }; @@ -343,8 +337,6 @@ static inline void acomp_request_set_callback(struct acomp_req *req, req->base.data = data; req->base.flags &= keep; req->base.flags |= flgs & ~keep; - - crypto_reqchain_init(&req->base); } /** @@ -552,12 +544,6 @@ static inline void acomp_request_set_dst_folio(struct acomp_req *req, req->base.flags |= CRYPTO_ACOMP_REQ_DST_FOLIO; } -static inline void acomp_request_chain(struct acomp_req *req, - struct acomp_req *head) -{ - crypto_request_chain(&req->base, &head->base); -} - /** * crypto_acomp_compress() -- Invoke asynchronous compress operation * diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index fbbff9a8a2d9..960cdd1f3a57 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -151,11 +151,6 @@ void crypto_unregister_acomp(struct acomp_alg *alg); int crypto_register_acomps(struct acomp_alg *algs, int count); void crypto_unregister_acomps(struct acomp_alg *algs, int count); -static inline bool acomp_request_chained(struct acomp_req *req) -{ - return crypto_request_chained(&req->base); -} - static inline bool acomp_request_issg(struct acomp_req *req) { return !(req->base.flags & (CRYPTO_ACOMP_REQ_SRC_VIRT | -- cgit v1.2.3 From 5bb61dc76d11a661c323dee1505b408d18c31565 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 12 Apr 2025 13:37:00 +0800 Subject: crypto: ahash - Remove request chaining Request chaining requires the user to do too much book keeping. Remove it from ahash. Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 5 ----- include/crypto/hash.h | 12 ------------ include/crypto/internal/hash.h | 5 ----- include/linux/crypto.h | 15 --------------- 4 files changed, 37 deletions(-) (limited to 'include') diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 03b7eca8af9a..ede622ecefa8 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -267,11 +267,6 @@ static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK; } -static inline bool crypto_request_chained(struct crypto_async_request *req) -{ - return !list_empty(&req->list); -} - static inline bool crypto_tfm_req_chain(struct crypto_tfm *tfm) { return tfm->__crt_alg->cra_flags & CRYPTO_ALG_REQ_CHAIN; diff --git a/include/crypto/hash.h b/include/crypto/hash.h index a13e2de3c9b4..113a5835e586 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -630,7 +630,6 @@ static inline void ahash_request_set_callback(struct ahash_request *req, flags &= ~keep; req->base.flags &= keep; req->base.flags |= flags; - crypto_reqchain_init(&req->base); } /** @@ -679,12 +678,6 @@ static inline void ahash_request_set_virt(struct ahash_request *req, req->base.flags |= CRYPTO_AHASH_REQ_VIRT; } -static inline void ahash_request_chain(struct ahash_request *req, - struct ahash_request *head) -{ - crypto_request_chain(&req->base, &head->base); -} - /** * DOC: Synchronous Message Digest API * @@ -986,11 +979,6 @@ static inline void shash_desc_zero(struct shash_desc *desc) sizeof(*desc) + crypto_shash_descsize(desc->tfm)); } -static inline int ahash_request_err(struct ahash_request *req) -{ - return req->base.err; -} - static inline bool ahash_is_async(struct crypto_ahash *tfm) { return crypto_tfm_is_async(&tfm->base); diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 052ac7924af3..e2a1fac38610 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -247,11 +247,6 @@ static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm) return container_of(tfm, struct crypto_shash, base); } -static inline bool ahash_request_chained(struct ahash_request *req) -{ - return false; -} - static inline bool ahash_request_isvirt(struct ahash_request *req) { return req->base.flags & CRYPTO_AHASH_REQ_VIRT; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 1e3809d28abd..dd817f56ff0c 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -179,7 +178,6 @@ struct crypto_async_request { struct crypto_tfm *tfm; u32 flags; - int err; }; /** @@ -473,19 +471,6 @@ static inline unsigned int crypto_tfm_ctx_alignment(void) return __alignof__(tfm->__crt_ctx); } -static inline void crypto_reqchain_init(struct crypto_async_request *req) -{ - req->err = -EINPROGRESS; - INIT_LIST_HEAD(&req->list); -} - -static inline void crypto_request_chain(struct crypto_async_request *req, - struct crypto_async_request *head) -{ - req->err = -EINPROGRESS; - list_add_tail(&req->list, &head->list); -} - static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm) { return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; -- cgit v1.2.3 From 1451e3e561be9ff4e86b911b9367a2223275d16f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:02:51 +0800 Subject: crypto: api - Add helpers to manage request flags Add helpers so that the ON_STACK request flag management is not duplicated all over the place. Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 5 +++++ include/linux/crypto.h | 24 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index ede622ecefa8..6999e10ea09e 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -272,4 +272,9 @@ static inline bool crypto_tfm_req_chain(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_flags & CRYPTO_ALG_REQ_CHAIN; } +static inline u32 crypto_request_flags(struct crypto_async_request *req) +{ + return req->flags & ~CRYPTO_TFM_REQ_ON_STACK; +} + #endif /* _CRYPTO_ALGAPI_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index dd817f56ff0c..a387f1547ea0 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -476,5 +476,29 @@ static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; } +static inline bool crypto_req_on_stack(struct crypto_async_request *req) +{ + return req->flags & CRYPTO_TFM_REQ_ON_STACK; +} + +static inline void crypto_request_set_callback( + struct crypto_async_request *req, u32 flags, + crypto_completion_t compl, void *data) +{ + u32 keep = CRYPTO_TFM_REQ_ON_STACK; + + req->complete = compl; + req->data = data; + req->flags &= keep; + req->flags |= flags & ~keep; +} + +static inline void crypto_request_set_tfm(struct crypto_async_request *req, + struct crypto_tfm *tfm) +{ + req->tfm = tfm; + req->flags &= ~CRYPTO_TFM_REQ_ON_STACK; +} + #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.3 From b04b395f7a29ed28d3cb27a7b39ac67dfb959fa0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:02:53 +0800 Subject: crypto: acomp - Use request flag helpers and add acomp_request_flags Use the newly added request flag helpers to manage the request flags. Also add acomp_request_flags which lets bottom-level users to access the request flags without the bits private to the acomp API. Signed-off-by: Herbert Xu --- include/crypto/acompress.h | 27 ++++++++++++++++----------- include/crypto/internal/acompress.h | 6 ++++++ 2 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index 080e134df35c..f383a4008854 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -38,6 +38,12 @@ /* Set this bit if destination is a folio. */ #define CRYPTO_ACOMP_REQ_DST_FOLIO 0x00000040 +/* Private flags that should not be touched by the user. */ +#define CRYPTO_ACOMP_REQ_PRIVATE \ + (CRYPTO_ACOMP_REQ_SRC_VIRT | CRYPTO_ACOMP_REQ_SRC_NONDMA | \ + CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_DST_NONDMA | \ + CRYPTO_ACOMP_REQ_SRC_FOLIO | CRYPTO_ACOMP_REQ_DST_FOLIO) + #define CRYPTO_ACOMP_DST_MAX 131072 #define MAX_SYNC_COMP_REQSIZE 0 @@ -201,7 +207,7 @@ static inline unsigned int crypto_acomp_reqsize(struct crypto_acomp *tfm) static inline void acomp_request_set_tfm(struct acomp_req *req, struct crypto_acomp *tfm) { - req->base.tfm = crypto_acomp_tfm(tfm); + crypto_request_set_tfm(&req->base, crypto_acomp_tfm(tfm)); } static inline bool acomp_is_async(struct crypto_acomp *tfm) @@ -298,6 +304,11 @@ static inline void *acomp_request_extra(struct acomp_req *req) return (void *)((char *)req + len); } +static inline bool acomp_req_on_stack(struct acomp_req *req) +{ + return crypto_req_on_stack(&req->base); +} + /** * acomp_request_free() -- zeroize and free asynchronous (de)compression * request as well as the output buffer if allocated @@ -307,7 +318,7 @@ static inline void *acomp_request_extra(struct acomp_req *req) */ static inline void acomp_request_free(struct acomp_req *req) { - if (!req || (req->base.flags & CRYPTO_TFM_REQ_ON_STACK)) + if (!req || acomp_req_on_stack(req)) return; kfree_sensitive(req); } @@ -328,15 +339,9 @@ static inline void acomp_request_set_callback(struct acomp_req *req, crypto_completion_t cmpl, void *data) { - u32 keep = CRYPTO_ACOMP_REQ_SRC_VIRT | CRYPTO_ACOMP_REQ_SRC_NONDMA | - CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_DST_NONDMA | - CRYPTO_ACOMP_REQ_SRC_FOLIO | CRYPTO_ACOMP_REQ_DST_FOLIO | - CRYPTO_TFM_REQ_ON_STACK; - - req->base.complete = cmpl; - req->base.data = data; - req->base.flags &= keep; - req->base.flags |= flgs & ~keep; + flgs &= ~CRYPTO_ACOMP_REQ_PRIVATE; + flgs |= req->base.flags & CRYPTO_ACOMP_REQ_PRIVATE; + crypto_request_set_callback(&req->base, flgs, cmpl, data); } /** diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 960cdd1f3a57..5483ca5b46ad 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -229,4 +229,10 @@ static inline bool acomp_walk_more_src(const struct acomp_walk *walk, int cur) { return walk->slen != cur; } + +static inline u32 acomp_request_flags(struct acomp_req *req) +{ + return crypto_request_flags(&req->base) & ~CRYPTO_ACOMP_REQ_PRIVATE; +} + #endif -- cgit v1.2.3 From 05fa2c6e87da31eab150cdaca6697cd1de122ec7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:02:55 +0800 Subject: crypto: acomp - Add ACOMP_FBREQ_ON_STACK Add a helper to create an on-stack fallback request from a given request. Use this helper in acomp_do_nondma. Signed-off-by: Herbert Xu --- include/crypto/internal/acompress.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 5483ca5b46ad..8840fd2c1db5 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -23,6 +23,12 @@ struct acomp_req *name = acomp_request_on_stack_init( \ __##name##_req, (tfm), 0, true) +#define ACOMP_FBREQ_ON_STACK(name, req) \ + char __##name##_req[sizeof(struct acomp_req) + \ + MAX_SYNC_COMP_REQSIZE] CRYPTO_MINALIGN_ATTR; \ + struct acomp_req *name = acomp_fbreq_on_stack_init( \ + __##name##_req, (req)) + /** * struct acomp_alg - asynchronous compression algorithm * @@ -235,4 +241,24 @@ static inline u32 acomp_request_flags(struct acomp_req *req) return crypto_request_flags(&req->base) & ~CRYPTO_ACOMP_REQ_PRIVATE; } +static inline struct acomp_req *acomp_fbreq_on_stack_init( + char *buf, struct acomp_req *old) +{ + struct crypto_acomp *tfm = crypto_acomp_reqtfm(old); + struct acomp_req *req; + + req = acomp_request_on_stack_init(buf, tfm, 0, true); + acomp_request_set_callback(req, acomp_request_flags(old), NULL, NULL); + req->base.flags &= ~CRYPTO_ACOMP_REQ_PRIVATE; + req->base.flags |= old->base.flags & CRYPTO_ACOMP_REQ_PRIVATE; + req->src = old->src; + req->dst = old->dst; + req->slen = old->slen; + req->dlen = old->dlen; + req->soff = old->soff; + req->doff = old->doff; + + return req; +} + #endif -- cgit v1.2.3 From 097c432caaa6d91f87732fe991cb08139e31101a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:03:00 +0800 Subject: crypto: acomp - Add ACOMP_REQUEST_CLONE Add a new helper ACOMP_REQUEST_CLONE that will transform a stack request into a dynamically allocated one if possible, and otherwise switch it over to the sycnrhonous fallback transform. The intended usage is: ACOMP_STACK_ON_REQUEST(req, tfm); ... err = crypto_acomp_compress(req); /* The request cannot complete synchronously. */ if (err == -EAGAIN) { /* This will not fail. */ req = ACOMP_REQUEST_CLONE(req, gfp); /* Redo operation. */ err = crypto_acomp_compress(req); } Signed-off-by: Herbert Xu --- include/crypto/acompress.h | 30 ++++++++++++++++++++++++++---- include/crypto/internal/acompress.h | 11 +++-------- 2 files changed, 29 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index f383a4008854..93cee67c27c0 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -49,10 +49,19 @@ #define MAX_SYNC_COMP_REQSIZE 0 #define ACOMP_REQUEST_ALLOC(name, tfm, gfp) \ + char __##name##_req[sizeof(struct acomp_req) + \ + MAX_SYNC_COMP_REQSIZE] CRYPTO_MINALIGN_ATTR; \ + struct acomp_req *name = acomp_request_alloc_init( \ + __##name##_req, (tfm), (gfp)) + +#define ACOMP_REQUEST_ON_STACK(name, tfm) \ char __##name##_req[sizeof(struct acomp_req) + \ MAX_SYNC_COMP_REQSIZE] CRYPTO_MINALIGN_ATTR; \ struct acomp_req *name = acomp_request_on_stack_init( \ - __##name##_req, (tfm), (gfp), false) + __##name##_req, (tfm)) + +#define ACOMP_REQUEST_CLONE(name, gfp) \ + acomp_request_clone(name, sizeof(__##name##_req), gfp) struct acomp_req; struct folio; @@ -571,12 +580,12 @@ int crypto_acomp_compress(struct acomp_req *req); */ int crypto_acomp_decompress(struct acomp_req *req); -static inline struct acomp_req *acomp_request_on_stack_init( - char *buf, struct crypto_acomp *tfm, gfp_t gfp, bool stackonly) +static inline struct acomp_req *acomp_request_alloc_init( + char *buf, struct crypto_acomp *tfm, gfp_t gfp) { struct acomp_req *req; - if (!stackonly && (req = acomp_request_alloc(tfm, gfp))) + if ((req = acomp_request_alloc(tfm, gfp))) return req; req = (void *)buf; @@ -586,4 +595,17 @@ static inline struct acomp_req *acomp_request_on_stack_init( return req; } +static inline struct acomp_req *acomp_request_on_stack_init( + char *buf, struct crypto_acomp *tfm) +{ + struct acomp_req *req = (void *)buf; + + acomp_request_set_tfm(req, tfm); + req->base.flags = CRYPTO_TFM_REQ_ON_STACK; + return req; +} + +struct acomp_req *acomp_request_clone(struct acomp_req *req, + size_t total, gfp_t gfp); + #endif diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 8840fd2c1db5..b51d66633935 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -17,12 +17,6 @@ #include #include -#define ACOMP_REQUEST_ON_STACK(name, tfm) \ - char __##name##_req[sizeof(struct acomp_req) + \ - MAX_SYNC_COMP_REQSIZE] CRYPTO_MINALIGN_ATTR; \ - struct acomp_req *name = acomp_request_on_stack_init( \ - __##name##_req, (tfm), 0, true) - #define ACOMP_FBREQ_ON_STACK(name, req) \ char __##name##_req[sizeof(struct acomp_req) + \ MAX_SYNC_COMP_REQSIZE] CRYPTO_MINALIGN_ATTR; \ @@ -245,9 +239,10 @@ static inline struct acomp_req *acomp_fbreq_on_stack_init( char *buf, struct acomp_req *old) { struct crypto_acomp *tfm = crypto_acomp_reqtfm(old); - struct acomp_req *req; + struct acomp_req *req = (void *)buf; - req = acomp_request_on_stack_init(buf, tfm, 0, true); + acomp_request_set_tfm(req, tfm->fb); + req->base.flags = CRYPTO_TFM_REQ_ON_STACK; acomp_request_set_callback(req, acomp_request_flags(old), NULL, NULL); req->base.flags &= ~CRYPTO_ACOMP_REQ_PRIVATE; req->base.flags |= old->base.flags & CRYPTO_ACOMP_REQ_PRIVATE; -- cgit v1.2.3 From 018cba2ecc3bb97d3cb24470d2e1245cd90d98c3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:03:05 +0800 Subject: crypto: acomp - Remove ACOMP_REQUEST_ALLOC Remove ACOMP_REQUEST_ALLOC in favour of ACOMP_REQUEST_ON_STACK with ACOMP_REQUEST_CLONE. Signed-off-by: Herbert Xu --- include/crypto/acompress.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index 93cee67c27c0..96ec0090a855 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -48,12 +48,6 @@ #define MAX_SYNC_COMP_REQSIZE 0 -#define ACOMP_REQUEST_ALLOC(name, tfm, gfp) \ - char __##name##_req[sizeof(struct acomp_req) + \ - MAX_SYNC_COMP_REQSIZE] CRYPTO_MINALIGN_ATTR; \ - struct acomp_req *name = acomp_request_alloc_init( \ - __##name##_req, (tfm), (gfp)) - #define ACOMP_REQUEST_ON_STACK(name, tfm) \ char __##name##_req[sizeof(struct acomp_req) + \ MAX_SYNC_COMP_REQSIZE] CRYPTO_MINALIGN_ATTR; \ @@ -580,21 +574,6 @@ int crypto_acomp_compress(struct acomp_req *req); */ int crypto_acomp_decompress(struct acomp_req *req); -static inline struct acomp_req *acomp_request_alloc_init( - char *buf, struct crypto_acomp *tfm, gfp_t gfp) -{ - struct acomp_req *req; - - if ((req = acomp_request_alloc(tfm, gfp))) - return req; - - req = (void *)buf; - acomp_request_set_tfm(req, tfm->fb); - req->base.flags = CRYPTO_TFM_REQ_ON_STACK; - - return req; -} - static inline struct acomp_req *acomp_request_on_stack_init( char *buf, struct crypto_acomp *tfm) { -- cgit v1.2.3 From 5f3437e9c89eec7bbf0ee5f582894d41f57528bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:05:27 +0800 Subject: crypto: acomp - Simplify folio handling Rather than storing the folio as is and handling it later, convert it to a scatterlist right away. Signed-off-by: Herbert Xu --- include/crypto/acompress.h | 45 ++++++------------------------------- include/crypto/internal/acompress.h | 16 +------------ 2 files changed, 8 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index 96ec0090a855..1b30290d6380 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -32,17 +32,10 @@ /* Set this bit for if virtual address destination cannot be used for DMA. */ #define CRYPTO_ACOMP_REQ_DST_NONDMA 0x00000010 -/* Set this bit if source is a folio. */ -#define CRYPTO_ACOMP_REQ_SRC_FOLIO 0x00000020 - -/* Set this bit if destination is a folio. */ -#define CRYPTO_ACOMP_REQ_DST_FOLIO 0x00000040 - /* Private flags that should not be touched by the user. */ #define CRYPTO_ACOMP_REQ_PRIVATE \ (CRYPTO_ACOMP_REQ_SRC_VIRT | CRYPTO_ACOMP_REQ_SRC_NONDMA | \ - CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_DST_NONDMA | \ - CRYPTO_ACOMP_REQ_SRC_FOLIO | CRYPTO_ACOMP_REQ_DST_FOLIO) + CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_DST_NONDMA) #define CRYPTO_ACOMP_DST_MAX 131072 @@ -84,10 +77,6 @@ struct acomp_req_chain { * @dst: Destination scatterlist * @svirt: Source virtual address * @dvirt: Destination virtual address - * @sfolio: Source folio - * @soff: Source folio offset - * @dfolio: Destination folio - * @doff: Destination folio offset * @slen: Size of the input buffer * @dlen: Size of the output buffer and number of bytes produced * @chain: Private API code data, do not use @@ -98,15 +87,11 @@ struct acomp_req { union { struct scatterlist *src; const u8 *svirt; - struct folio *sfolio; }; union { struct scatterlist *dst; u8 *dvirt; - struct folio *dfolio; }; - size_t soff; - size_t doff; unsigned int slen; unsigned int dlen; @@ -373,8 +358,6 @@ static inline void acomp_request_set_params(struct acomp_req *req, req->base.flags &= ~(CRYPTO_ACOMP_REQ_SRC_VIRT | CRYPTO_ACOMP_REQ_SRC_NONDMA | - CRYPTO_ACOMP_REQ_SRC_FOLIO | - CRYPTO_ACOMP_REQ_DST_FOLIO | CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_DST_NONDMA); } @@ -397,7 +380,6 @@ static inline void acomp_request_set_src_sg(struct acomp_req *req, req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_NONDMA; req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_VIRT; - req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_FOLIO; } /** @@ -417,7 +399,6 @@ static inline void acomp_request_set_src_dma(struct acomp_req *req, req->slen = slen; req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_NONDMA; - req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_FOLIO; req->base.flags |= CRYPTO_ACOMP_REQ_SRC_VIRT; } @@ -438,7 +419,6 @@ static inline void acomp_request_set_src_nondma(struct acomp_req *req, req->svirt = src; req->slen = slen; - req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_FOLIO; req->base.flags |= CRYPTO_ACOMP_REQ_SRC_NONDMA; req->base.flags |= CRYPTO_ACOMP_REQ_SRC_VIRT; } @@ -457,13 +437,9 @@ static inline void acomp_request_set_src_folio(struct acomp_req *req, struct folio *folio, size_t off, unsigned int len) { - req->sfolio = folio; - req->soff = off; - req->slen = len; - - req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_NONDMA; - req->base.flags &= ~CRYPTO_ACOMP_REQ_SRC_VIRT; - req->base.flags |= CRYPTO_ACOMP_REQ_SRC_FOLIO; + sg_init_table(&req->chain.ssg, 1); + sg_set_folio(&req->chain.ssg, folio, len, off); + acomp_request_set_src_sg(req, &req->chain.ssg, len); } /** @@ -484,7 +460,6 @@ static inline void acomp_request_set_dst_sg(struct acomp_req *req, req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_NONDMA; req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_VIRT; - req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_FOLIO; } /** @@ -504,7 +479,6 @@ static inline void acomp_request_set_dst_dma(struct acomp_req *req, req->dlen = dlen; req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_NONDMA; - req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_FOLIO; req->base.flags |= CRYPTO_ACOMP_REQ_DST_VIRT; } @@ -524,7 +498,6 @@ static inline void acomp_request_set_dst_nondma(struct acomp_req *req, req->dvirt = dst; req->dlen = dlen; - req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_FOLIO; req->base.flags |= CRYPTO_ACOMP_REQ_DST_NONDMA; req->base.flags |= CRYPTO_ACOMP_REQ_DST_VIRT; } @@ -543,13 +516,9 @@ static inline void acomp_request_set_dst_folio(struct acomp_req *req, struct folio *folio, size_t off, unsigned int len) { - req->dfolio = folio; - req->doff = off; - req->dlen = len; - - req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_NONDMA; - req->base.flags &= ~CRYPTO_ACOMP_REQ_DST_VIRT; - req->base.flags |= CRYPTO_ACOMP_REQ_DST_FOLIO; + sg_init_table(&req->chain.dsg, 1); + sg_set_folio(&req->chain.dsg, folio, len, off); + acomp_request_set_dst_sg(req, &req->chain.dsg, len); } /** diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index b51d66633935..d6d53c7696fd 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -154,9 +154,7 @@ void crypto_unregister_acomps(struct acomp_alg *algs, int count); static inline bool acomp_request_issg(struct acomp_req *req) { return !(req->base.flags & (CRYPTO_ACOMP_REQ_SRC_VIRT | - CRYPTO_ACOMP_REQ_DST_VIRT | - CRYPTO_ACOMP_REQ_SRC_FOLIO | - CRYPTO_ACOMP_REQ_DST_FOLIO)); + CRYPTO_ACOMP_REQ_DST_VIRT)); } static inline bool acomp_request_src_isvirt(struct acomp_req *req) @@ -191,16 +189,6 @@ static inline bool acomp_request_isnondma(struct acomp_req *req) CRYPTO_ACOMP_REQ_DST_NONDMA); } -static inline bool acomp_request_src_isfolio(struct acomp_req *req) -{ - return req->base.flags & CRYPTO_ACOMP_REQ_SRC_FOLIO; -} - -static inline bool acomp_request_dst_isfolio(struct acomp_req *req) -{ - return req->base.flags & CRYPTO_ACOMP_REQ_DST_FOLIO; -} - static inline bool crypto_acomp_req_chain(struct crypto_acomp *tfm) { return crypto_tfm_req_chain(&tfm->base); @@ -250,8 +238,6 @@ static inline struct acomp_req *acomp_fbreq_on_stack_init( req->dst = old->dst; req->slen = old->slen; req->dlen = old->dlen; - req->soff = old->soff; - req->doff = old->doff; return req; } -- cgit v1.2.3 From fcfbdddc6f0260174f76e185c9768a950f0872be Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:11:33 +0800 Subject: crypto: ctr - Remove unnecessary header inclusions Now that the broken drivers have been fixed, remove the unnecessary inclusions from crypto/ctr.h. Signed-off-by: Herbert Xu --- include/crypto/ctr.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/crypto/ctr.h b/include/crypto/ctr.h index c41685874f00..06984a26c8cf 100644 --- a/include/crypto/ctr.h +++ b/include/crypto/ctr.h @@ -8,9 +8,6 @@ #ifndef _CRYPTO_CTR_H #define _CRYPTO_CTR_H -#include -#include - #define CTR_RFC3686_NONCE_SIZE 4 #define CTR_RFC3686_IV_SIZE 8 #define CTR_RFC3686_BLOCK_SIZE 16 -- cgit v1.2.3 From 6eed1e3552fc076d2617f6793cb148d485696ab6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:20:57 +0800 Subject: crypto: api - Mark cra_init/cra_exit as deprecated These functions have been obsoleted by the type-specific init/exit functions. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index a387f1547ea0..56cf229e2530 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -300,17 +300,8 @@ struct cipher_alg { * by @cra_type and @cra_flags above, the associated structure must be * filled with callbacks. This field might be empty. This is the case * for ahash, shash. - * @cra_init: Initialize the cryptographic transformation object. This function - * is used to initialize the cryptographic transformation object. - * This function is called only once at the instantiation time, right - * after the transformation context was allocated. In case the - * cryptographic hardware has some special requirements which need to - * be handled by software, this function shall check for the precise - * requirement of the transformation and put any software fallbacks - * in place. - * @cra_exit: Deinitialize the cryptographic transformation object. This is a - * counterpart to @cra_init, used to remove various changes set in - * @cra_init. + * @cra_init: Deprecated, do not use. + * @cra_exit: Deprecated, do not use. * @cra_u.cipher: Union member which contains a single-block symmetric cipher * definition. See @struct @cipher_alg. * @cra_module: Owner of this transformation implementation. Set to THIS_MODULE -- cgit v1.2.3 From afddce13ce81d52a13898fa0700917835c71acd6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:20:59 +0800 Subject: crypto: api - Add reqsize to crypto_alg Add a reqsize field to crypto_alg with the intention of replacing the type-specific reqsize field currently used by ahash and acomp. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 56cf229e2530..15476b085ce3 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -276,6 +276,7 @@ struct cipher_alg { * to the alignmask of the algorithm being used, in order to * avoid the API having to realign them. Note: the alignmask is * not supported for hash algorithms and is always 0 for them. + * @cra_reqsize: Size of the request context for this algorithm. * @cra_priority: Priority of this transformation implementation. In case * multiple transformations with same @cra_name are available to * the Crypto API, the kernel will use the one with highest @@ -322,6 +323,7 @@ struct crypto_alg { unsigned int cra_blocksize; unsigned int cra_ctxsize; unsigned int cra_alignmask; + unsigned int cra_reqsize; int cra_priority; refcount_t cra_refcnt; @@ -441,6 +443,11 @@ static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_alignmask; } +static inline unsigned int crypto_tfm_alg_reqsize(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_reqsize; +} + static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm) { return tfm->crt_flags; -- cgit v1.2.3 From 300e6d6e9ebf2dcc0ca3eab68ed65207e947926c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:21:09 +0800 Subject: crypto: acomp - Remove reqsize field Remove the type-specific reqsize field in favour of the common one. Signed-off-by: Herbert Xu --- include/crypto/internal/acompress.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index d6d53c7696fd..0f3ad65be2d9 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -40,7 +40,6 @@ * counterpart to @init, used to remove various changes set in * @init. * - * @reqsize: Context size for (de)compression requests * @base: Common crypto API algorithm data structure * @calg: Cmonn algorithm data structure shared with scomp */ @@ -50,8 +49,6 @@ struct acomp_alg { int (*init)(struct crypto_acomp *tfm); void (*exit)(struct crypto_acomp *tfm); - unsigned int reqsize; - union { struct COMP_ALG_COMMON; struct comp_alg_common calg; -- cgit v1.2.3 From c360df01c6c19959d2218cbad56d5b7c2ac7a476 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:21:11 +0800 Subject: crypto: ahash - Use cra_reqsize Use the common reqsize field and remove reqsize from ahash_alg. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 113a5835e586..eceb2ed04f26 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -136,7 +136,6 @@ struct ahash_request { * This is a counterpart to @init_tfm, used to remove * various changes set in @init_tfm. * @clone_tfm: Copy transform into new object, may allocate memory. - * @reqsize: Size of the request context. * @halg: see struct hash_alg_common */ struct ahash_alg { @@ -153,8 +152,6 @@ struct ahash_alg { void (*exit_tfm)(struct crypto_ahash *tfm); int (*clone_tfm)(struct crypto_ahash *dst, struct crypto_ahash *src); - unsigned int reqsize; - struct hash_alg_common halg; }; -- cgit v1.2.3 From 6b7f9397c98c72902f9364056413c73fe6dee1d8 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 7 Apr 2025 21:32:42 +0200 Subject: crypto: ecdsa - Fix NIST P521 key size reported by KEYCTL_PKEY_QUERY When user space issues a KEYCTL_PKEY_QUERY system call for a NIST P521 key, the key_size is incorrectly reported as 528 bits instead of 521. That's because the key size obtained through crypto_sig_keysize() is in bytes and software_key_query() multiplies by 8 to yield the size in bits. The underlying assumption is that the key size is always a multiple of 8. With the recent addition of NIST P521, that's no longer the case. Fix by returning the key_size in bits from crypto_sig_keysize() and adjusting the calculations in software_key_query(). The ->key_size() callbacks of sig_alg algorithms now return the size in bits, whereas the ->digest_size() and ->max_size() callbacks return the size in bytes. This matches with the units in struct keyctl_pkey_query. Fixes: a7d45ba77d3d ("crypto: ecdsa - Register NIST P521 and extend test suite") Signed-off-by: Lukas Wunner Reviewed-by: Stefan Berger Reviewed-by: Ignat Korchagin Signed-off-by: Herbert Xu --- include/crypto/sig.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/sig.h b/include/crypto/sig.h index 11024708c069..fa6dafafab3f 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -128,7 +128,7 @@ static inline void crypto_free_sig(struct crypto_sig *tfm) /** * crypto_sig_keysize() - Get key size * - * Function returns the key size in bytes. + * Function returns the key size in bits. * Function assumes that the key is already set in the transformation. If this * function is called without a setkey or with a failed setkey, you may end up * in a NULL dereference. -- cgit v1.2.3 From 7ba8df47810f0731da1c9edd9aa8c1836279a93d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 11 Apr 2025 15:38:43 +0800 Subject: asm-generic: Make simd.h more resilient Add missing header inclusions and protect against double inclusion. Signed-off-by: Herbert Xu --- include/asm-generic/simd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h index d0343d58a74a..ac29a22eb7cf 100644 --- a/include/asm-generic/simd.h +++ b/include/asm-generic/simd.h @@ -1,6 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_SIMD_H +#define _ASM_GENERIC_SIMD_H -#include +#include +#include +#include /* * may_use_simd - whether it is allowable at this time to issue SIMD @@ -13,3 +17,5 @@ static __must_check inline bool may_use_simd(void) { return !in_interrupt(); } + +#endif /* _ASM_GENERIC_SIMD_H */ -- cgit v1.2.3 From 34f170a590710737f9611231cd77b37ea41f707a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 11 Apr 2025 15:38:50 +0800 Subject: crypto: simd - Include asm/simd.h in internal/simd.h Now that the asm/simd.h files have been made safe against double inclusion, include it directly in internal/simd.h. Signed-off-by: Herbert Xu --- include/crypto/internal/simd.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h index be97b97a75dd..f56049bd1660 100644 --- a/include/crypto/internal/simd.h +++ b/include/crypto/internal/simd.h @@ -6,6 +6,7 @@ #ifndef _CRYPTO_INTERNAL_SIMD_H #define _CRYPTO_INTERNAL_SIMD_H +#include #include #include @@ -46,9 +47,6 @@ void simd_unregister_aeads(struct aead_alg *algs, int count, * self-tests, in order to test the no-SIMD fallback code. This override is * currently limited to configurations where the extra self-tests are enabled, * because it might be a bit too invasive to be part of the regular self-tests. - * - * This is a macro so that , which some architectures don't have, - * doesn't have to be included directly here. */ #ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS DECLARE_PER_CPU(bool, crypto_simd_disabled_for_test); -- cgit v1.2.3 From 66fecd9d94034b6193636fe3bc01f089439f5413 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 11 Apr 2025 21:20:51 +0200 Subject: crypto: skcipher - Realign struct skcipher_walk to save 8 bytes Reduce skcipher_walk's struct size by 8 bytes by realigning its members. pahole output before: /* size: 120, cachelines: 2, members: 13 */ /* sum members: 108, holes: 2, sum holes: 8 */ /* padding: 4 */ /* last cacheline: 56 bytes */ and after: /* size: 112, cachelines: 2, members: 13 */ /* padding: 4 */ /* last cacheline: 48 bytes */ No functional changes intended. Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu --- include/crypto/internal/skcipher.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index a958ab0636ad..0cad8e7364c8 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -67,8 +67,6 @@ struct skcipher_walk { struct scatter_walk in; }; - unsigned int nbytes; - union { /* Virtual address of the destination. */ struct { @@ -81,6 +79,7 @@ struct skcipher_walk { struct scatter_walk out; }; + unsigned int nbytes; unsigned int total; u8 *page; -- cgit v1.2.3 From f1440a90465bea1993f937ac7add592ce1e4ff44 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 12 Apr 2025 13:16:43 +0800 Subject: crypto: api - Add support for duplicating algorithms before registration If the bit CRYPTO_ALG_DUP_FIRST is set, an algorithm will be duplicated by kmemdup before registration. This is inteded for hardware-based algorithms that may be unplugged at will. Do not use this if the algorithm data structure is embedded in a bigger data structure. Perform the duplication in the driver instead. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 15476b085ce3..b89b1b348095 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -49,6 +49,15 @@ */ #define CRYPTO_ALG_NEED_FALLBACK 0x00000100 +/* + * Set if the algorithm data structure should be duplicated into + * kmalloc memory before registration. This is useful for hardware + * that can be disconnected at will. Do not use this if the data + * structure is embedded into a bigger one. Duplicate the overall + * data structure in the driver in that case. + */ +#define CRYPTO_ALG_DUP_FIRST 0x00000200 + /* * Set if the algorithm has passed automated run-time testing. Note that * if there is no run-time testing for a given algorithm it is considered -- cgit v1.2.3 From 90916934fd093edf62bc0c5c9a940a8efa7db2f8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 12 Apr 2025 18:47:47 +0800 Subject: crypto: shash - Remove dynamic descsize As all users of the dynamic descsize have been converted to use a static one instead, remove support for dynamic descsize. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index eceb2ed04f26..87518cf3b2d8 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -235,7 +235,6 @@ struct crypto_ahash { }; struct crypto_shash { - unsigned int descsize; struct crypto_tfm base; }; @@ -810,7 +809,7 @@ static inline void crypto_shash_clear_flags(struct crypto_shash *tfm, u32 flags) */ static inline unsigned int crypto_shash_descsize(struct crypto_shash *tfm) { - return tfm->descsize; + return crypto_shash_alg(tfm)->descsize; } static inline void *shash_desc_ctx(struct shash_desc *desc) -- cgit v1.2.3 From 04bfa4c7d5119ca38f8133bfdae7957a60c8b221 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 12 Apr 2025 18:57:19 +0800 Subject: crypto: hash - Add HASH_REQUEST_ON_STACK Allow any ahash to be used with a stack request, with optional dynamic allocation when async is needed. The intended usage is: HASH_REQUEST_ON_STACK(req, tfm); ... err = crypto_ahash_digest(req); /* The request cannot complete synchronously. */ if (err == -EAGAIN) { /* This will not fail. */ req = HASH_REQUEST_CLONE(req, gfp); /* Redo operation. */ err = crypto_ahash_digest(req); } Signed-off-by: Herbert Xu --- include/crypto/hash.h | 54 ++++++++++++++++++++++++++++++++++-------- include/crypto/internal/hash.h | 26 +++++++++++++++++++- 2 files changed, 69 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 87518cf3b2d8..b00ea8407f9c 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -16,6 +16,9 @@ /* Set this bit for virtual address instead of SG list. */ #define CRYPTO_AHASH_REQ_VIRT 0x00000001 +#define CRYPTO_AHASH_REQ_PRIVATE \ + CRYPTO_AHASH_REQ_VIRT + struct crypto_ahash; /** @@ -167,12 +170,22 @@ struct shash_desc { * containing a 'struct sha3_state'. */ #define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 360) +#define MAX_SYNC_HASH_REQSIZE HASH_MAX_DESCSIZE #define SHASH_DESC_ON_STACK(shash, ctx) \ char __##shash##_desc[sizeof(struct shash_desc) + HASH_MAX_DESCSIZE] \ __aligned(__alignof__(struct shash_desc)); \ struct shash_desc *shash = (struct shash_desc *)__##shash##_desc +#define HASH_REQUEST_ON_STACK(name, _tfm) \ + char __##name##_req[sizeof(struct ahash_request) + \ + MAX_SYNC_HASH_REQSIZE] CRYPTO_MINALIGN_ATTR; \ + struct ahash_request *name = \ + ahash_request_on_stack_init(__##name##_req, (_tfm)) + +#define HASH_REQUEST_CLONE(name, gfp) \ + hash_request_clone(name, sizeof(__##name##_req), gfp) + /** * struct shash_alg - synchronous message digest definition * @init: see struct ahash_alg @@ -231,6 +244,7 @@ struct crypto_ahash { bool using_shash; /* Underlying algorithm is shash, not ahash */ unsigned int statesize; unsigned int reqsize; + struct crypto_ahash *fb; struct crypto_tfm base; }; @@ -248,6 +262,11 @@ struct crypto_shash { * CRYPTO_ALG_TYPE_SKCIPHER API applies here as well. */ +static inline bool ahash_req_on_stack(struct ahash_request *req) +{ + return crypto_req_on_stack(&req->base); +} + static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_ahash, base); @@ -544,7 +563,7 @@ int crypto_ahash_update(struct ahash_request *req); static inline void ahash_request_set_tfm(struct ahash_request *req, struct crypto_ahash *tfm) { - req->base.tfm = crypto_ahash_tfm(tfm); + crypto_request_set_tfm(&req->base, crypto_ahash_tfm(tfm)); } /** @@ -578,9 +597,12 @@ static inline struct ahash_request *ahash_request_alloc_noprof( * ahash_request_free() - zeroize and free the request data structure * @req: request data structure cipher handle to be freed */ -static inline void ahash_request_free(struct ahash_request *req) +void ahash_request_free(struct ahash_request *req); + +static inline void ahash_request_zero(struct ahash_request *req) { - kfree_sensitive(req); + memzero_explicit(req, sizeof(*req) + + crypto_ahash_reqsize(crypto_ahash_reqtfm(req))); } static inline struct ahash_request *ahash_request_cast( @@ -619,13 +641,9 @@ static inline void ahash_request_set_callback(struct ahash_request *req, crypto_completion_t compl, void *data) { - u32 keep = CRYPTO_AHASH_REQ_VIRT; - - req->base.complete = compl; - req->base.data = data; - flags &= ~keep; - req->base.flags &= keep; - req->base.flags |= flags; + flags &= ~CRYPTO_AHASH_REQ_PRIVATE; + flags |= req->base.flags & CRYPTO_AHASH_REQ_PRIVATE; + crypto_request_set_callback(&req->base, flags, compl, data); } /** @@ -870,6 +888,9 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data, int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data, unsigned int len, u8 *out); +int crypto_hash_digest(struct crypto_ahash *tfm, const u8 *data, + unsigned int len, u8 *out); + /** * crypto_shash_export() - extract operational state for message digest * @desc: reference to the operational state handle whose state is exported @@ -980,4 +1001,17 @@ static inline bool ahash_is_async(struct crypto_ahash *tfm) return crypto_tfm_is_async(&tfm->base); } +static inline struct ahash_request *ahash_request_on_stack_init( + char *buf, struct crypto_ahash *tfm) +{ + struct ahash_request *req = (void *)buf; + + ahash_request_set_tfm(req, tfm); + req->base.flags = CRYPTO_TFM_REQ_ON_STACK; + return req; +} + +struct ahash_request *ahash_request_clone(struct ahash_request *req, + size_t total, gfp_t gfp); + #endif /* _CRYPTO_HASH_H */ diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index e2a1fac38610..45c728ac2621 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -11,6 +11,12 @@ #include #include +#define HASH_FBREQ_ON_STACK(name, req) \ + char __##name##_req[sizeof(struct ahash_request) + \ + MAX_SYNC_HASH_REQSIZE] CRYPTO_MINALIGN_ATTR; \ + struct ahash_request *name = ahash_fbreq_on_stack_init( \ + __##name##_req, (req)) + struct ahash_request; struct ahash_instance { @@ -187,7 +193,7 @@ static inline void ahash_request_complete(struct ahash_request *req, int err) static inline u32 ahash_request_flags(struct ahash_request *req) { - return req->base.flags; + return crypto_request_flags(&req->base) & ~CRYPTO_AHASH_REQ_PRIVATE; } static inline struct crypto_ahash *crypto_spawn_ahash( @@ -257,5 +263,23 @@ static inline bool crypto_ahash_req_chain(struct crypto_ahash *tfm) return crypto_tfm_req_chain(&tfm->base); } +static inline struct ahash_request *ahash_fbreq_on_stack_init( + char *buf, struct ahash_request *old) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(old); + struct ahash_request *req = (void *)buf; + + ahash_request_set_tfm(req, tfm->fb); + req->base.flags = CRYPTO_TFM_REQ_ON_STACK; + ahash_request_set_callback(req, ahash_request_flags(old), NULL, NULL); + req->base.flags &= ~CRYPTO_AHASH_REQ_PRIVATE; + req->base.flags |= old->base.flags & CRYPTO_AHASH_REQ_PRIVATE; + req->src = old->src; + req->result = old->result; + req->nbytes = old->nbytes; + + return req; +} + #endif /* _CRYPTO_INTERNAL_HASH_H */ -- cgit v1.2.3 From a58d1c0be43b9f21da16471f9786d5d1fc3a2aa7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 12 Apr 2025 18:57:26 +0800 Subject: crypto: hash - Update HASH_MAX_DESCSIZE comment The biggest context is not sha3_generic (356), but sha-s390 (360). Signed-off-by: Herbert Xu --- include/crypto/hash.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index b00ea8407f9c..58ac1423dc38 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -166,8 +166,8 @@ struct shash_desc { #define HASH_MAX_DIGESTSIZE 64 /* - * Worst case is hmac(sha3-224-generic). Its context is a nested 'shash_desc' - * containing a 'struct sha3_state'. + * Worst case is hmac(sha-224-s390). Its context is a nested 'shash_desc' + * containing a 'struct s390_sha_ctx'. */ #define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 360) #define MAX_SYNC_HASH_REQSIZE HASH_MAX_DESCSIZE -- cgit v1.2.3 From cb16ba46958e41f8d805cbdb027a601b3a0c65b4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 12 Apr 2025 18:57:31 +0800 Subject: crypto: lib/sm3 - Export generic block function Export the generic block function so that it can be used by the Crypto API. Signed-off-by: Herbert Xu --- include/crypto/sm3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h index 1f021ad0533f..d49211ba9a20 100644 --- a/include/crypto/sm3.h +++ b/include/crypto/sm3.h @@ -58,6 +58,7 @@ static inline void sm3_init(struct sm3_state *sctx) sctx->count = 0; } +void sm3_block_generic(struct sm3_state *sctx, u8 const *data, int blocks); void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len); void sm3_final(struct sm3_state *sctx, u8 *out); -- cgit v1.2.3 From e13b67e9d02829620c1c57d1b008e9c7ef954d19 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 12 Apr 2025 18:57:33 +0800 Subject: crypto: sm3-base - Use sm3_init Remove the duplicate init code and simply call sm3_init. Signed-off-by: Herbert Xu --- include/crypto/sm3_base.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'include') diff --git a/include/crypto/sm3_base.h b/include/crypto/sm3_base.h index b33ed39c2bce..835896228e8e 100644 --- a/include/crypto/sm3_base.h +++ b/include/crypto/sm3_base.h @@ -20,18 +20,7 @@ typedef void (sm3_block_fn)(struct sm3_state *sst, u8 const *src, int blocks); static inline int sm3_base_init(struct shash_desc *desc) { - struct sm3_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SM3_IVA; - sctx->state[1] = SM3_IVB; - sctx->state[2] = SM3_IVC; - sctx->state[3] = SM3_IVD; - sctx->state[4] = SM3_IVE; - sctx->state[5] = SM3_IVF; - sctx->state[6] = SM3_IVG; - sctx->state[7] = SM3_IVH; - sctx->count = 0; - + sm3_init(shash_desc_ctx(desc)); return 0; } -- cgit v1.2.3 From ecaa4be1280a4faf72dc4b6b4f6d867332d5762e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 12 Apr 2025 21:54:14 -0700 Subject: crypto: poly1305 - centralize the shash wrappers for arch code Following the example of the crc32, crc32c, and chacha code, make the crypto subsystem register both generic and architecture-optimized poly1305 shash algorithms, both implemented on top of the appropriate library functions. This eliminates the need for every architecture to implement the same shash glue code. Note that the poly1305 shash requires that the key be prepended to the data, which differs from the library functions where the key is simply a parameter to poly1305_init(). Previously this was handled at a fairly low level, polluting the library code with shash-specific code. Reorganize things so that the shash code handles this quirk itself. Also, to register the architecture-optimized shashes only when architecture-optimized code is actually being used, add a function poly1305_is_arch_optimized() and make each arch implement it. Change each architecture's Poly1305 module_init function to arch_initcall so that the CPU feature detection is guaranteed to run before poly1305_is_arch_optimized() gets called by crypto/poly1305.c. (In cases where poly1305_is_arch_optimized() just returns true unconditionally, using arch_initcall is not strictly needed, but it's still good to be consistent across architectures.) Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/poly1305.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 090692ec3bc7..91444965772a 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -96,4 +96,13 @@ static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest) poly1305_final_generic(desc, digest); } +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305) +bool poly1305_is_arch_optimized(void); +#else +static inline bool poly1305_is_arch_optimized(void) +{ + return false; +} +#endif + #endif -- cgit v1.2.3 From 5f7325fbb3d416ba66f163a9272e17d70f1d9bf4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 12 Apr 2025 21:54:21 -0700 Subject: crypto: poly1305 - remove rset and sset fields of poly1305_desc_ctx These fields are no longer needed, so remove them. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/poly1305.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 91444965772a..6e21ec2d1dc2 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -43,10 +43,6 @@ struct poly1305_desc_ctx { u8 buf[POLY1305_BLOCK_SIZE]; /* bytes used in partial buffer */ unsigned int buflen; - /* how many keys have been set in r[] */ - unsigned short rset; - /* whether s[] has been set */ - bool sset; /* finalize key */ u32 s[4]; /* accumulator */ -- cgit v1.2.3 From 43eca05b6a3b917c600e10cc6b06bfa57fa57401 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Fri, 11 Apr 2025 10:49:56 +0300 Subject: xfrm: Add explicit dev to .xdo_dev_state_{add,delete,free} Previously, device driver IPSec offload implementations would fall into two categories: 1. Those that used xso.dev to determine the offload device. 2. Those that used xso.real_dev to determine the offload device. The first category didn't work with bonding while the second did. In a non-bonding setup the two pointers are the same. This commit adds explicit pointers for the offload netdevice to .xdo_dev_state_add() / .xdo_dev_state_delete() / .xdo_dev_state_free() which eliminates the confusion and allows drivers from the first category to work with bonding. xso.real_dev now becomes a private pointer managed by the bonding driver. Signed-off-by: Cosmin Ratiu Reviewed-by: Leon Romanovsky Reviewed-by: Nikolay Aleksandrov Signed-off-by: Steffen Klassert --- include/linux/netdevice.h | 10 +++++++--- include/net/xfrm.h | 8 ++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8544f6a680c..88dfb8aeed3c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1013,9 +1013,13 @@ struct netdev_bpf { #ifdef CONFIG_XFRM_OFFLOAD struct xfrmdev_ops { - int (*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack); - void (*xdo_dev_state_delete) (struct xfrm_state *x); - void (*xdo_dev_state_free) (struct xfrm_state *x); + int (*xdo_dev_state_add)(struct net_device *dev, + struct xfrm_state *x, + struct netlink_ext_ack *extack); + void (*xdo_dev_state_delete)(struct net_device *dev, + struct xfrm_state *x); + void (*xdo_dev_state_free)(struct net_device *dev, + struct xfrm_state *x); bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 39365fd2ea17..3d2f6c879311 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -147,8 +147,16 @@ enum { }; struct xfrm_dev_offload { + /* The device for this offload. + * Device drivers should not use this directly, as that will prevent + * them from working with bonding device. Instead, the device passed + * to the add/delete callbacks should be used. + */ struct net_device *dev; netdevice_tracker dev_tracker; + /* This is a private pointer used by the bonding driver. + * Device drivers should not use it. + */ struct net_device *real_dev; unsigned long offload_handle; u8 dir : 2; -- cgit v1.2.3 From d2fddbd3479928e52061e1c8dd302006b6283ce8 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Fri, 11 Apr 2025 10:49:58 +0300 Subject: bonding: Fix multiple long standing offload races Refactor the bonding ipsec offload operations to fix a number of long-standing control plane races between state migration and user deletion and a few other issues. xfrm state deletion can happen concurrently with bond_change_active_slave() operation. This manifests itself as a bond_ipsec_del_sa() call with x->lock held, followed by a bond_ipsec_free_sa() a bit later from a wq. The alternate path of these calls coming from xfrm_dev_state_flush() can't happen, as that needs the RTNL lock and bond_change_active_slave() already holds it. 1. bond_ipsec_del_sa_all() might call xdo_dev_state_delete() a second time on an xfrm state that was concurrently killed. This is bad. 2. bond_ipsec_add_sa_all() can add a state on the new device, but pending bond_ipsec_free_sa() calls from the old device will then hit the WARN_ON() and then, worse, call xdo_dev_state_free() on the new device without a corresponding xdo_dev_state_delete(). 3. Resolve a sleeping in atomic context introduced by the mentioned "Fixes" commit. bond_ipsec_del_sa_all() and bond_ipsec_add_sa_all() now acquire x->lock and check for x->km.state to help with problems 1 and 2. And since xso.real_dev is now a private pointer managed by the bonding driver in xfrm state, make better use of it to fully fix problems 1 and 2. In bond_ipsec_del_sa_all(), set xso.real_dev to NULL while holding both the mutex and x->lock, which makes sure that neither bond_ipsec_del_sa() nor bond_ipsec_free_sa() could run concurrently. Fix problem 3 by moving the list cleanup (which requires the mutex) from bond_ipsec_del_sa() (called from atomic context) to bond_ipsec_free_sa() Finally, simplify bond_ipsec_del_sa() and bond_ipsec_free_sa() by using xso->real_dev directly, since it's now protected by locks and can be trusted to always reflect the offload device. Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to mutex") Signed-off-by: Cosmin Ratiu Reviewed-by: Leon Romanovsky Reviewed-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Tested-by: Hangbin Liu Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 3d2f6c879311..b7e8f3f49627 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -154,8 +154,11 @@ struct xfrm_dev_offload { */ struct net_device *dev; netdevice_tracker dev_tracker; - /* This is a private pointer used by the bonding driver. - * Device drivers should not use it. + /* This is a private pointer used by the bonding driver (and eventually + * should be moved there). Device drivers should not use it. + * Protected by xfrm_state.lock AND bond.ipsec_lock in most cases, + * except in the .xdo_dev_state_del() flow, where only xfrm_state.lock + * is held. */ struct net_device *real_dev; unsigned long offload_handle; -- cgit v1.2.3 From adc8d1200dbbe6726abf89070edb58bca95f1485 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 16 Apr 2025 10:01:36 +0300 Subject: i2c: core: Deprecate of_node in struct i2c_board_info Two members of the same or quite similar semantics is quite confusing to begin with. Moreover, fwnode covers all possible firmware descriptions that Linux kernel supports. Deprecate of_node in struct i2c_board_info, so users will be warned and in the future there is a plan to convert the users and remove it completely. Tested-by: Tomi Valkeinen Reviewed-by: Sakari Ailus Acked-by: Sakari Ailus Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 2e4903b7f7bc..cc1437f29823 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -405,7 +405,7 @@ static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } * @addr: stored in i2c_client.addr * @dev_name: Overrides the default - dev_name if set * @platform_data: stored in i2c_client.dev.platform_data - * @of_node: pointer to OpenFirmware device node + * @of_node: **DEPRECATED** - use @fwnode for this * @fwnode: device node supplied by the platform firmware * @swnode: software node for the device * @resources: resources associated with the device -- cgit v1.2.3 From 0e3f6c3696424fa90d6f512779d617a05a1cf031 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Wed, 9 Apr 2025 05:34:44 +0000 Subject: sched/topology: Introduce sched_update_asym_prefer_cpu() A subset of AMD Processors supporting Preferred Core Rankings also feature the ability to dynamically switch these rankings at runtime to bias load balancing towards or away from the LLC domain with larger cache. To support dynamically updating "sg->asym_prefer_cpu" without needing to rebuild the sched domain, introduce sched_update_asym_prefer_cpu() which recomutes the "asym_prefer_cpu" when the core-ranking of a CPU changes. sched_update_asym_prefer_cpu() swaps the "sg->asym_prefer_cpu" with the CPU whose ranking has changed if the new ranking is greater than that of the "asym_prefer_cpu". If CPU whose ranking has changed is the current "asym_prefer_cpu", it scans the CPUs of the sched groups to find the new "asym_prefer_cpu" and sets it accordingly. get_group() for non-overlapping sched domains returns the sched group for the first CPU in the sched_group_span() which ensures all CPUs in the group see the updated value of "asym_prefer_cpu". Overlapping groups are allocated differently and will require moving the "asym_prefer_cpu" to "sg->sgc" but since the current implementations do not set "SD_ASYM_PACKING" at NUMA domains, skip additional indirection and place a SCHED_WARN_ON() to alert any future users. Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250409053446.23367-3-kprateek.nayak@amd.com --- include/linux/sched/topology.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 7b4301b7235f..198bb5cc1774 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -195,6 +195,8 @@ struct sched_domain_topology_level { }; extern void __init set_sched_topology(struct sched_domain_topology_level *tl); +extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio); + # define SD_INIT_NAME(type) .name = #type @@ -223,6 +225,10 @@ static inline bool cpus_share_resources(int this_cpu, int that_cpu) return true; } +static inline void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) +{ +} + #endif /* !CONFIG_SMP */ #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) -- cgit v1.2.3 From 61e312a001a394a93998c353af859841ddf50d5d Mon Sep 17 00:00:00 2001 From: Haylen Chu Date: Wed, 16 Apr 2025 13:54:01 +0000 Subject: dt-bindings: soc: spacemit: Add spacemit,k1-syscon Document APMU, MPMU and APBC syscons found on SpacemiT K1 SoC, which are capable of generating clock and reset signals. Additionally, APMU and MPMU manage power domains. Signed-off-by: Haylen Chu Reviewed-by: Krzysztof Kozlowski Reviewed-by: Alex Elder Reviewed-by: Yixun Lan Link: https://lore.kernel.org/r/20250416135406.16284-2-heylenay@4d2.org Signed-off-by: Yixun Lan --- include/dt-bindings/clock/spacemit,k1-syscon.h | 210 +++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 include/dt-bindings/clock/spacemit,k1-syscon.h (limited to 'include') diff --git a/include/dt-bindings/clock/spacemit,k1-syscon.h b/include/dt-bindings/clock/spacemit,k1-syscon.h new file mode 100644 index 000000000000..0c5b9b5f6073 --- /dev/null +++ b/include/dt-bindings/clock/spacemit,k1-syscon.h @@ -0,0 +1,210 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2024-2025 Haylen Chu + */ + +#ifndef _DT_BINDINGS_SPACEMIT_CCU_H_ +#define _DT_BINDINGS_SPACEMIT_CCU_H_ + +/* MPMU clocks */ +#define CLK_PLL1_307P2 0 +#define CLK_PLL1_76P8 1 +#define CLK_PLL1_61P44 2 +#define CLK_PLL1_153P6 3 +#define CLK_PLL1_102P4 4 +#define CLK_PLL1_51P2 5 +#define CLK_PLL1_51P2_AP 6 +#define CLK_PLL1_57P6 7 +#define CLK_PLL1_25P6 8 +#define CLK_PLL1_12P8 9 +#define CLK_PLL1_12P8_WDT 10 +#define CLK_PLL1_6P4 11 +#define CLK_PLL1_3P2 12 +#define CLK_PLL1_1P6 13 +#define CLK_PLL1_0P8 14 +#define CLK_PLL1_409P6 15 +#define CLK_PLL1_204P8 16 +#define CLK_PLL1_491 17 +#define CLK_PLL1_245P76 18 +#define CLK_PLL1_614 19 +#define CLK_PLL1_47P26 20 +#define CLK_PLL1_31P5 21 +#define CLK_PLL1_819 22 +#define CLK_PLL1_1228 23 +#define CLK_SLOW_UART 24 +#define CLK_SLOW_UART1 25 +#define CLK_SLOW_UART2 26 +#define CLK_WDT 27 +#define CLK_RIPC 28 +#define CLK_I2S_SYSCLK 29 +#define CLK_I2S_BCLK 30 +#define CLK_APB 31 +#define CLK_WDT_BUS 32 + +/* APBC clocks */ +#define CLK_UART0 0 +#define CLK_UART2 1 +#define CLK_UART3 2 +#define CLK_UART4 3 +#define CLK_UART5 4 +#define CLK_UART6 5 +#define CLK_UART7 6 +#define CLK_UART8 7 +#define CLK_UART9 8 +#define CLK_GPIO 9 +#define CLK_PWM0 10 +#define CLK_PWM1 11 +#define CLK_PWM2 12 +#define CLK_PWM3 13 +#define CLK_PWM4 14 +#define CLK_PWM5 15 +#define CLK_PWM6 16 +#define CLK_PWM7 17 +#define CLK_PWM8 18 +#define CLK_PWM9 19 +#define CLK_PWM10 20 +#define CLK_PWM11 21 +#define CLK_PWM12 22 +#define CLK_PWM13 23 +#define CLK_PWM14 24 +#define CLK_PWM15 25 +#define CLK_PWM16 26 +#define CLK_PWM17 27 +#define CLK_PWM18 28 +#define CLK_PWM19 29 +#define CLK_SSP3 30 +#define CLK_RTC 31 +#define CLK_TWSI0 32 +#define CLK_TWSI1 33 +#define CLK_TWSI2 34 +#define CLK_TWSI4 35 +#define CLK_TWSI5 36 +#define CLK_TWSI6 37 +#define CLK_TWSI7 38 +#define CLK_TWSI8 39 +#define CLK_TIMERS1 40 +#define CLK_TIMERS2 41 +#define CLK_AIB 42 +#define CLK_ONEWIRE 43 +#define CLK_SSPA0 44 +#define CLK_SSPA1 45 +#define CLK_DRO 46 +#define CLK_IR 47 +#define CLK_TSEN 48 +#define CLK_IPC_AP2AUD 49 +#define CLK_CAN0 50 +#define CLK_CAN0_BUS 51 +#define CLK_UART0_BUS 52 +#define CLK_UART2_BUS 53 +#define CLK_UART3_BUS 54 +#define CLK_UART4_BUS 55 +#define CLK_UART5_BUS 56 +#define CLK_UART6_BUS 57 +#define CLK_UART7_BUS 58 +#define CLK_UART8_BUS 59 +#define CLK_UART9_BUS 60 +#define CLK_GPIO_BUS 61 +#define CLK_PWM0_BUS 62 +#define CLK_PWM1_BUS 63 +#define CLK_PWM2_BUS 64 +#define CLK_PWM3_BUS 65 +#define CLK_PWM4_BUS 66 +#define CLK_PWM5_BUS 67 +#define CLK_PWM6_BUS 68 +#define CLK_PWM7_BUS 69 +#define CLK_PWM8_BUS 70 +#define CLK_PWM9_BUS 71 +#define CLK_PWM10_BUS 72 +#define CLK_PWM11_BUS 73 +#define CLK_PWM12_BUS 74 +#define CLK_PWM13_BUS 75 +#define CLK_PWM14_BUS 76 +#define CLK_PWM15_BUS 77 +#define CLK_PWM16_BUS 78 +#define CLK_PWM17_BUS 79 +#define CLK_PWM18_BUS 80 +#define CLK_PWM19_BUS 81 +#define CLK_SSP3_BUS 82 +#define CLK_RTC_BUS 83 +#define CLK_TWSI0_BUS 84 +#define CLK_TWSI1_BUS 85 +#define CLK_TWSI2_BUS 86 +#define CLK_TWSI4_BUS 87 +#define CLK_TWSI5_BUS 88 +#define CLK_TWSI6_BUS 89 +#define CLK_TWSI7_BUS 90 +#define CLK_TWSI8_BUS 91 +#define CLK_TIMERS1_BUS 92 +#define CLK_TIMERS2_BUS 93 +#define CLK_AIB_BUS 94 +#define CLK_ONEWIRE_BUS 95 +#define CLK_SSPA0_BUS 96 +#define CLK_SSPA1_BUS 97 +#define CLK_TSEN_BUS 98 +#define CLK_IPC_AP2AUD_BUS 99 + +/* APMU clocks */ +#define CLK_CCI550 0 +#define CLK_CPU_C0_HI 1 +#define CLK_CPU_C0_CORE 2 +#define CLK_CPU_C0_ACE 3 +#define CLK_CPU_C0_TCM 4 +#define CLK_CPU_C1_HI 5 +#define CLK_CPU_C1_CORE 6 +#define CLK_CPU_C1_ACE 7 +#define CLK_CCIC_4X 8 +#define CLK_CCIC1PHY 9 +#define CLK_SDH_AXI 10 +#define CLK_SDH0 11 +#define CLK_SDH1 12 +#define CLK_SDH2 13 +#define CLK_USB_P1 14 +#define CLK_USB_AXI 15 +#define CLK_USB30 16 +#define CLK_QSPI 17 +#define CLK_QSPI_BUS 18 +#define CLK_DMA 19 +#define CLK_AES 20 +#define CLK_VPU 21 +#define CLK_GPU 22 +#define CLK_EMMC 23 +#define CLK_EMMC_X 24 +#define CLK_AUDIO 25 +#define CLK_HDMI 26 +#define CLK_PMUA_ACLK 27 +#define CLK_PCIE0_MASTER 28 +#define CLK_PCIE0_SLAVE 29 +#define CLK_PCIE0_DBI 30 +#define CLK_PCIE1_MASTER 31 +#define CLK_PCIE1_SLAVE 32 +#define CLK_PCIE1_DBI 33 +#define CLK_PCIE2_MASTER 34 +#define CLK_PCIE2_SLAVE 35 +#define CLK_PCIE2_DBI 36 +#define CLK_EMAC0_BUS 37 +#define CLK_EMAC0_PTP 38 +#define CLK_EMAC1_BUS 39 +#define CLK_EMAC1_PTP 40 +#define CLK_JPG 41 +#define CLK_CCIC2PHY 42 +#define CLK_CCIC3PHY 43 +#define CLK_CSI 44 +#define CLK_CAMM0 45 +#define CLK_CAMM1 46 +#define CLK_CAMM2 47 +#define CLK_ISP_CPP 48 +#define CLK_ISP_BUS 49 +#define CLK_ISP 50 +#define CLK_DPU_MCLK 51 +#define CLK_DPU_ESC 52 +#define CLK_DPU_BIT 53 +#define CLK_DPU_PXCLK 54 +#define CLK_DPU_HCLK 55 +#define CLK_DPU_SPI 56 +#define CLK_DPU_SPI_HBUS 57 +#define CLK_DPU_SPIBUS 58 +#define CLK_DPU_SPI_ACLK 59 +#define CLK_V2D 60 +#define CLK_EMMC_BUS 61 + +#endif /* _DT_BINDINGS_SPACEMIT_CCU_H_ */ -- cgit v1.2.3 From 8090804045066ab8cd92737c8e2adfb46f166c0f Mon Sep 17 00:00:00 2001 From: Haylen Chu Date: Wed, 16 Apr 2025 13:54:02 +0000 Subject: dt-bindings: clock: spacemit: Add spacemit,k1-pll Add definition for the PLL found on SpacemiT K1 SoC, which takes the external 24MHz oscillator as input and generates clocks in various frequencies for the system. Signed-off-by: Haylen Chu Reviewed-by: Krzysztof Kozlowski Reviewed-by: Alex Elder Reviewed-by: Yixun Lan Link: https://lore.kernel.org/r/20250416135406.16284-3-heylenay@4d2.org Signed-off-by: Yixun Lan --- include/dt-bindings/clock/spacemit,k1-syscon.h | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/spacemit,k1-syscon.h b/include/dt-bindings/clock/spacemit,k1-syscon.h index 0c5b9b5f6073..35968ae98246 100644 --- a/include/dt-bindings/clock/spacemit,k1-syscon.h +++ b/include/dt-bindings/clock/spacemit,k1-syscon.h @@ -6,6 +6,43 @@ #ifndef _DT_BINDINGS_SPACEMIT_CCU_H_ #define _DT_BINDINGS_SPACEMIT_CCU_H_ +/* APBS (PLL) clocks */ +#define CLK_PLL1 0 +#define CLK_PLL2 1 +#define CLK_PLL3 2 +#define CLK_PLL1_D2 3 +#define CLK_PLL1_D3 4 +#define CLK_PLL1_D4 5 +#define CLK_PLL1_D5 6 +#define CLK_PLL1_D6 7 +#define CLK_PLL1_D7 8 +#define CLK_PLL1_D8 9 +#define CLK_PLL1_D11 10 +#define CLK_PLL1_D13 11 +#define CLK_PLL1_D23 12 +#define CLK_PLL1_D64 13 +#define CLK_PLL1_D10_AUD 14 +#define CLK_PLL1_D100_AUD 15 +#define CLK_PLL2_D1 16 +#define CLK_PLL2_D2 17 +#define CLK_PLL2_D3 18 +#define CLK_PLL2_D4 19 +#define CLK_PLL2_D5 20 +#define CLK_PLL2_D6 21 +#define CLK_PLL2_D7 22 +#define CLK_PLL2_D8 23 +#define CLK_PLL3_D1 24 +#define CLK_PLL3_D2 25 +#define CLK_PLL3_D3 26 +#define CLK_PLL3_D4 27 +#define CLK_PLL3_D5 28 +#define CLK_PLL3_D6 29 +#define CLK_PLL3_D7 30 +#define CLK_PLL3_D8 31 +#define CLK_PLL3_80 32 +#define CLK_PLL3_40 33 +#define CLK_PLL3_20 34 + /* MPMU clocks */ #define CLK_PLL1_307P2 0 #define CLK_PLL1_76P8 1 -- cgit v1.2.3 From cd1fafe7da1f6f2aa25723e317f6e8e9d0c050a1 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 15 Apr 2025 05:24:58 +0000 Subject: eth: bnxt: add support rx side device memory TCP Currently, bnxt_en driver satisfies the requirements of the Device memory TCP, which is HDS. So, it implements rx-side Device memory TCP for bnxt_en driver. It requires only converting the page API to netmem API. `struct page` of agg rings are changed to `netmem_ref netmem` and corresponding functions are changed to a variant of netmem API. It also passes PP_FLAG_ALLOW_UNREADABLE_NETMEM flag to a parameter of page_pool. The netmem will be activated only when a user requests devmem TCP. When netmem is activated, received data is unreadable and netmem is disabled, received data is readable. But drivers don't need to handle both cases because netmem core API will handle it properly. So, using proper netmem API is enough for drivers. Device memory TCP can be tested with tools/testing/selftests/drivers/net/hw/ncdevmem. This is tested with BCM57504-N425G and firmware version 232.0.155.8/pkg 232.1.132.8. Reviewed-by: Mina Almasry Tested-by: David Wei Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250415052458.1260575-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 582a3d00cbe2..93f2c31baf9b 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -395,6 +395,12 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } +static inline void page_pool_recycle_direct_netmem(struct page_pool *pool, + netmem_ref netmem) +{ + page_pool_put_full_netmem(pool, netmem, true); +} + #define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \ (sizeof(dma_addr_t) > sizeof(unsigned long)) @@ -492,4 +498,9 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) page_pool_update_nid(pool, new_nid); } +static inline bool page_pool_is_unreadable(struct page_pool *pool) +{ + return !!pool->mp_ops; +} + #endif /* _NET_PAGE_POOL_HELPERS_H */ -- cgit v1.2.3 From ddd0855fa3c3f1de020ab3aeddce15fe38e116f7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 15 Apr 2025 17:23:19 +0800 Subject: crypto: deflate - Make the acomp walk atomic Add an atomic flag to the acomp walk and use that in deflate. Due to the use of a per-cpu context, it is impossible to sleep during the walk in deflate. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202504151654.4c3b6393-lkp@intel.com Fixes: 08cabc7d3c86 ("crypto: deflate - Convert to acomp") Signed-off-by: Herbert Xu --- include/crypto/internal/acompress.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 0f3ad65be2d9..7eda32619024 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -208,7 +208,7 @@ void acomp_walk_done_dst(struct acomp_walk *walk, int used); int acomp_walk_next_src(struct acomp_walk *walk); int acomp_walk_next_dst(struct acomp_walk *walk); int acomp_walk_virt(struct acomp_walk *__restrict walk, - struct acomp_req *__restrict req); + struct acomp_req *__restrict req, bool atomic); static inline bool acomp_walk_more_src(const struct acomp_walk *walk, int cur) { -- cgit v1.2.3 From dd09eb0e2cc4ba91cd64dba2b733d3f8e1248fd8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 7 Apr 2025 10:29:35 -0700 Subject: EISA: Increase length of device names GCC 15's -Wunterminated-string-initialization warned about truncated name strings. Instead of marking them with the "nonstring" attribute[1], increase their length to correctly include enough space for the terminating NUL character, as they are used with %s format specifiers when showing resource allocations in /proc/ioports: seq_printf(m, "%*s%0*llx-%0*llx : %s\n", ..., r->name); The strings in eisa.ids have a max length of 73, and the 50 limit was an arbitrary limit that was removed back in 2008 with commit ca52a49846f1 ("driver core: remove DEVICE_NAME_SIZE define"). Change the limit to 74 so nothing is truncated any more. Additionally fix the Makefile to use "if_changed" instead of "cmd" to detect changes to the command line used to generate the target, otherwise devlist.h won't be rebuilt. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117178 [1] Signed-off-by: Kees Cook Acked-by: Alejandro Colomar Link: https://lore.kernel.org/r/20250407172926.it.281-kees@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/eisa.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/eisa.h b/include/linux/eisa.h index f98200cae637..21a2ecc1e538 100644 --- a/include/linux/eisa.h +++ b/include/linux/eisa.h @@ -28,6 +28,9 @@ #define EISA_CONFIG_ENABLED 1 #define EISA_CONFIG_FORCED 2 +/* Chosen to hold the longest string in eisa.ids. */ +#define EISA_DEVICE_INFO_NAME_SIZE 74 + /* There is not much we can say about an EISA device, apart from * signature, slot number, and base address. dma_mask is set by * default to parent device mask..*/ @@ -41,7 +44,7 @@ struct eisa_device { u64 dma_mask; struct device dev; /* generic device */ #ifdef CONFIG_EISA_NAMES - char pretty_name[50]; + char pretty_name[EISA_DEVICE_INFO_NAME_SIZE]; #endif }; -- cgit v1.2.3 From ab244a394c7f13f6573744b9ca72bb22151a3ec4 Mon Sep 17 00:00:00 2001 From: Chiachang Wang Date: Thu, 13 Mar 2025 02:36:40 +0000 Subject: xfrm: Migrate offload configuration Add hardware offload configuration to XFRM_MSG_MIGRATE using an option netlink attribute XFRMA_OFFLOAD_DEV. In the existing xfrm_state_migrate(), the xfrm_init_state() is called assuming no hardware offload by default. Even the original xfrm_state is configured with offload, the setting will be reset. If the device is configured with hardware offload, it's reasonable to allow the device to maintain its hardware offload mode. But the device will end up with offload disabled after receiving a migration event when the device migrates the connection from one netdev to another one. The devices that support migration may work with different underlying networks, such as mobile devices. The hardware setting should be forwarded to the different netdev based on the migration configuration. This change provides the capability for user space to migrate from one netdev to another. Test: Tested with kernel test in the Android tree located in https://android.googlesource.com/kernel/tests/ The xfrm_tunnel_test.py under the tests folder in particular. Signed-off-by: Chiachang Wang Reviewed-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b7e8f3f49627..466423a1a70a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1904,12 +1904,16 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n u32 if_id); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m, - struct xfrm_encap_tmpl *encap); + struct xfrm_encap_tmpl *encap, + struct net *net, + struct xfrm_user_offload *xuo, + struct netlink_ext_ack *extack); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k, struct net *net, struct xfrm_encap_tmpl *encap, u32 if_id, - struct netlink_ext_ack *extack); + struct netlink_ext_ack *extack, + struct xfrm_user_offload *xuo); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); -- cgit v1.2.3 From e80c235994fd1d7004a4e5d64123f8f07ec80ade Mon Sep 17 00:00:00 2001 From: Alan Borzeszkowski Date: Mon, 14 Apr 2025 19:55:53 +0200 Subject: thunderbolt: Add Thunderbolt/USB4 <-> USB3 match function This function checks whether given USB4 port device matches with USB3.x port device, using ACPI _DSD property. It is designed to be used by component framework to match USB4 ports with Type-C ports they are connected to. Also, added USB4 config stub in case mapping function is not reachable. Signed-off-by: Alan Borzeszkowski Reviewed-by: Heikki Krogerus Signed-off-by: Mika Westerberg --- include/linux/thunderbolt.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 7d902d8c054b..75247486616b 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -11,6 +11,13 @@ #ifndef THUNDERBOLT_H_ #define THUNDERBOLT_H_ +#include + +struct fwnode_handle; +struct device; + +#if IS_REACHABLE(CONFIG_USB4) + #include #include #include @@ -674,4 +681,15 @@ static inline struct device *tb_ring_dma_device(struct tb_ring *ring) return &ring->nhi->pdev->dev; } +bool usb4_usb3_port_match(struct device *usb4_port_dev, + const struct fwnode_handle *usb3_port_fwnode); + +#else /* CONFIG_USB4 */ +static inline bool usb4_usb3_port_match(struct device *usb4_port_dev, + const struct fwnode_handle *usb3_port_fwnode) +{ + return false; +} +#endif /* CONFIG_USB4 */ + #endif /* THUNDERBOLT_H_ */ -- cgit v1.2.3 From b7a63391aa982295bbb3125e7d4470f51f31ff0f Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 15 Apr 2025 13:17:19 +0200 Subject: ovpn: add basic netlink support This commit introduces basic netlink support with family registration/unregistration functionalities and stub pre/post-doit. More importantly it introduces the YAML uAPI description along with its auto-generated files: - include/uapi/linux/ovpn.h - drivers/net/ovpn/netlink-gen.c - drivers/net/ovpn/netlink-gen.h Reviewed-by: Donald Hunter Signed-off-by: Antonio Quartulli Link: https://patch.msgid.link/20250415-b4-ovpn-v26-2-577f6097b964@openvpn.net Reviewed-by: Sabrina Dubroca Tested-by: Oleksandr Natalenko Signed-off-by: Paolo Abeni --- include/uapi/linux/ovpn.h | 109 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 include/uapi/linux/ovpn.h (limited to 'include') diff --git a/include/uapi/linux/ovpn.h b/include/uapi/linux/ovpn.h new file mode 100644 index 000000000000..680d1522dc87 --- /dev/null +++ b/include/uapi/linux/ovpn.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/ovpn.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_OVPN_H +#define _UAPI_LINUX_OVPN_H + +#define OVPN_FAMILY_NAME "ovpn" +#define OVPN_FAMILY_VERSION 1 + +#define OVPN_NONCE_TAIL_SIZE 8 + +enum ovpn_cipher_alg { + OVPN_CIPHER_ALG_NONE, + OVPN_CIPHER_ALG_AES_GCM, + OVPN_CIPHER_ALG_CHACHA20_POLY1305, +}; + +enum ovpn_del_peer_reason { + OVPN_DEL_PEER_REASON_TEARDOWN, + OVPN_DEL_PEER_REASON_USERSPACE, + OVPN_DEL_PEER_REASON_EXPIRED, + OVPN_DEL_PEER_REASON_TRANSPORT_ERROR, + OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT, +}; + +enum ovpn_key_slot { + OVPN_KEY_SLOT_PRIMARY, + OVPN_KEY_SLOT_SECONDARY, +}; + +enum { + OVPN_A_PEER_ID = 1, + OVPN_A_PEER_REMOTE_IPV4, + OVPN_A_PEER_REMOTE_IPV6, + OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID, + OVPN_A_PEER_REMOTE_PORT, + OVPN_A_PEER_SOCKET, + OVPN_A_PEER_SOCKET_NETNSID, + OVPN_A_PEER_VPN_IPV4, + OVPN_A_PEER_VPN_IPV6, + OVPN_A_PEER_LOCAL_IPV4, + OVPN_A_PEER_LOCAL_IPV6, + OVPN_A_PEER_LOCAL_PORT, + OVPN_A_PEER_KEEPALIVE_INTERVAL, + OVPN_A_PEER_KEEPALIVE_TIMEOUT, + OVPN_A_PEER_DEL_REASON, + OVPN_A_PEER_VPN_RX_BYTES, + OVPN_A_PEER_VPN_TX_BYTES, + OVPN_A_PEER_VPN_RX_PACKETS, + OVPN_A_PEER_VPN_TX_PACKETS, + OVPN_A_PEER_LINK_RX_BYTES, + OVPN_A_PEER_LINK_TX_BYTES, + OVPN_A_PEER_LINK_RX_PACKETS, + OVPN_A_PEER_LINK_TX_PACKETS, + + __OVPN_A_PEER_MAX, + OVPN_A_PEER_MAX = (__OVPN_A_PEER_MAX - 1) +}; + +enum { + OVPN_A_KEYCONF_PEER_ID = 1, + OVPN_A_KEYCONF_SLOT, + OVPN_A_KEYCONF_KEY_ID, + OVPN_A_KEYCONF_CIPHER_ALG, + OVPN_A_KEYCONF_ENCRYPT_DIR, + OVPN_A_KEYCONF_DECRYPT_DIR, + + __OVPN_A_KEYCONF_MAX, + OVPN_A_KEYCONF_MAX = (__OVPN_A_KEYCONF_MAX - 1) +}; + +enum { + OVPN_A_KEYDIR_CIPHER_KEY = 1, + OVPN_A_KEYDIR_NONCE_TAIL, + + __OVPN_A_KEYDIR_MAX, + OVPN_A_KEYDIR_MAX = (__OVPN_A_KEYDIR_MAX - 1) +}; + +enum { + OVPN_A_IFINDEX = 1, + OVPN_A_PEER, + OVPN_A_KEYCONF, + + __OVPN_A_MAX, + OVPN_A_MAX = (__OVPN_A_MAX - 1) +}; + +enum { + OVPN_CMD_PEER_NEW = 1, + OVPN_CMD_PEER_SET, + OVPN_CMD_PEER_GET, + OVPN_CMD_PEER_DEL, + OVPN_CMD_PEER_DEL_NTF, + OVPN_CMD_KEY_NEW, + OVPN_CMD_KEY_GET, + OVPN_CMD_KEY_SWAP, + OVPN_CMD_KEY_SWAP_NTF, + OVPN_CMD_KEY_DEL, + + __OVPN_CMD_MAX, + OVPN_CMD_MAX = (__OVPN_CMD_MAX - 1) +}; + +#define OVPN_MCGRP_PEERS "peers" + +#endif /* _UAPI_LINUX_OVPN_H */ -- cgit v1.2.3 From c2d950c4672a012ea9765c15a389cdcdf919f652 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 15 Apr 2025 13:17:20 +0200 Subject: ovpn: add basic interface creation/destruction/management routines Add basic infrastructure for handling ovpn interfaces. Tested-by: Donald Hunter Signed-off-by: Antonio Quartulli Link: https://patch.msgid.link/20250415-b4-ovpn-v26-3-577f6097b964@openvpn.net Reviewed-by: Sabrina Dubroca Tested-by: Oleksandr Natalenko Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 318386cc5b0d..3ad2d5d98034 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1986,4 +1986,19 @@ enum { #define IFLA_DSA_MAX (__IFLA_DSA_MAX - 1) +/* OVPN section */ + +enum ovpn_mode { + OVPN_MODE_P2P, + OVPN_MODE_MP, +}; + +enum { + IFLA_OVPN_UNSPEC, + IFLA_OVPN_MODE, + __IFLA_OVPN_MAX, +}; + +#define IFLA_OVPN_MAX (__IFLA_OVPN_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ -- cgit v1.2.3 From f6226ae7a0cd47aaa9175aca6a1e19600f884cbf Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 15 Apr 2025 13:17:23 +0200 Subject: ovpn: introduce the ovpn_socket object This specific structure is used in the ovpn kernel module to wrap and carry around a standard kernel socket. ovpn takes ownership of passed sockets and therefore an ovpn specific objects is attached to them for status tracking purposes. Initially only UDP support is introduced. TCP will come in a later patch. Cc: willemdebruijn.kernel@gmail.com Signed-off-by: Antonio Quartulli Link: https://patch.msgid.link/20250415-b4-ovpn-v26-6-577f6097b964@openvpn.net Reviewed-by: Sabrina Dubroca Tested-by: Oleksandr Natalenko Signed-off-by: Paolo Abeni --- include/uapi/linux/udp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index d85d671deed3..edca3e430305 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -43,5 +43,6 @@ struct udphdr { #define UDP_ENCAP_GTP1U 5 /* 3GPP TS 29.060 */ #define UDP_ENCAP_RXRPC 6 #define TCP_ENCAP_ESPINTCP 7 /* Yikes, this is really xfrm encap types. */ +#define UDP_ENCAP_OVPNINUDP 8 /* OpenVPN traffic */ #endif /* _UAPI_LINUX_UDP_H */ -- cgit v1.2.3 From 17240749f26e07cafa676688d8a3326086498447 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 15 Apr 2025 13:17:29 +0200 Subject: skb: implement skb_send_sock_locked_with_flags() When sending an skb over a socket using skb_send_sock_locked(), it is currently not possible to specify any flag to be set in msghdr->msg_flags. However, we may want to pass flags the user may have specified, like MSG_NOSIGNAL. Extend __skb_send_sock() with a new argument 'flags' and add a new interface named skb_send_sock_locked_with_flags(). Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Simon Horman Signed-off-by: Antonio Quartulli Link: https://patch.msgid.link/20250415-b4-ovpn-v26-12-577f6097b964@openvpn.net Reviewed-by: Sabrina Dubroca Tested-by: Oleksandr Natalenko Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f1381aff0f89..beb084ee4f4d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4145,6 +4145,8 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, unsigned int flags); int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len); +int skb_send_sock_locked_with_flags(struct sock *sk, struct sk_buff *skb, + int offset, int len, int flags); int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); -- cgit v1.2.3 From 13f43d7cf3e0570004a0d960bc1be23db827c2ff Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 8 Apr 2025 13:53:56 -0300 Subject: iommu/pages: Formalize the freelist API We want to get rid of struct page references outside the internal allocator implementation. The free list has the driver open code something like: list_add_tail(&virt_to_page(ptr)->lru, freelist); Move the above into a small inline and make the freelist into a wrapper type 'struct iommu_pages_list' so that the compiler can help check all the conversion. This struct has also proven helpful in some future ideas to convert to a singly linked list to get an extra pointer in the struct page, and to signal that the pages should be freed with RCU. Use a temporary _Generic so we don't need to rename the free function as the patches progress. Tested-by: Nicolin Chen Tested-by: Alejandro Jimenez Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/8-v4-c8663abbb606+3f7-iommu_pages_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ccce8a751e2a..6dd3a221c5bc 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -341,6 +341,18 @@ typedef unsigned int ioasid_t; /* Read but do not clear any dirty bits */ #define IOMMU_DIRTY_NO_CLEAR (1 << 0) +/* + * Pages allocated through iommu_alloc_pages_node() can be placed on this list + * using iommu_pages_list_add(). Note: ONLY pages from iommu_alloc_pages_node() + * can be used this way! + */ +struct iommu_pages_list { + struct list_head pages; +}; + +#define IOMMU_PAGES_LIST_INIT(name) \ + ((struct iommu_pages_list){ .pages = LIST_HEAD_INIT(name.pages) }) + #ifdef CONFIG_IOMMU_API /** -- cgit v1.2.3 From 868240c34eb113e4121d4ad8ad7458d8caad87d3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 8 Apr 2025 13:53:59 -0300 Subject: iommu: Change iommu_iotlb_gather to use iommu_page_list This converts the remaining places using list of pages to the new API. The Intel free path was shared with its gather path, so it is converted at the same time. Reviewed-by: Lu Baolu Tested-by: Nicolin Chen Tested-by: Alejandro Jimenez Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/11-v4-c8663abbb606+3f7-iommu_pages_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6dd3a221c5bc..3fb62165db19 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -375,7 +375,7 @@ struct iommu_iotlb_gather { unsigned long start; unsigned long end; size_t pgsize; - struct list_head freelist; + struct iommu_pages_list freelist; bool queued; }; @@ -864,7 +864,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { *gather = (struct iommu_iotlb_gather) { .start = ULONG_MAX, - .freelist = LIST_HEAD_INIT(gather->freelist), + .freelist = IOMMU_PAGES_LIST_INIT(gather->freelist), }; } -- cgit v1.2.3 From b3efacc451e19a85fb5acb56d95f40532c4e31d2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 8 Apr 2025 13:54:03 -0300 Subject: iommu/pages: Allow sub page sizes to be passed into the allocator Generally drivers have a specific idea what their HW structure size should be. In a lot of cases this is related to PAGE_SIZE, but not always. ARM64, for example, allows a 4K IO page table size on a 64K CPU page table system. Currently we don't have any good support for sub page allocations, but make the API accommodate this by accepting a sub page size from the caller and rounding up internally. This is done by moving away from order as the size input and using size: size == 1 << (order + PAGE_SHIFT) Following patches convert drivers away from using order and try to specify allocation sizes independent of PAGE_SIZE. Reviewed-by: Lu Baolu Tested-by: Alejandro Jimenez Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/15-v4-c8663abbb606+3f7-iommu_pages_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3fb62165db19..062818b51822 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -342,9 +342,9 @@ typedef unsigned int ioasid_t; #define IOMMU_DIRTY_NO_CLEAR (1 << 0) /* - * Pages allocated through iommu_alloc_pages_node() can be placed on this list - * using iommu_pages_list_add(). Note: ONLY pages from iommu_alloc_pages_node() - * can be used this way! + * Pages allocated through iommu_alloc_pages_node_sz() can be placed on this + * list using iommu_pages_list_add(). Note: ONLY pages from + * iommu_alloc_pages_node_sz() can be used this way! */ struct iommu_pages_list { struct list_head pages; -- cgit v1.2.3 From 52edd094750a0a08cb54321df07d9cddbdd2b17c Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 10 Apr 2025 16:40:07 +0200 Subject: dt-bindings: iommu: mediatek: Add binding for MT6893 MM IOMMU Add binding for the MediaTek Dimensity 1200 (MT6893) SoC's MultiMedia (MM) IOMMU. Signed-off-by: AngeloGioacchino Del Regno Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250410144008.475888-2-angelogioacchino.delregno@collabora.com Signed-off-by: Joerg Roedel --- .../memory/mediatek,mt6893-memory-port.h | 288 +++++++++++++++++++++ 1 file changed, 288 insertions(+) create mode 100644 include/dt-bindings/memory/mediatek,mt6893-memory-port.h (limited to 'include') diff --git a/include/dt-bindings/memory/mediatek,mt6893-memory-port.h b/include/dt-bindings/memory/mediatek,mt6893-memory-port.h new file mode 100644 index 000000000000..26e8b400db0d --- /dev/null +++ b/include/dt-bindings/memory/mediatek,mt6893-memory-port.h @@ -0,0 +1,288 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2020 MediaTek Inc. + * Copyright (c) 2025 Collabora Ltd + * AngeloGioacchino Del Regno + */ +#ifndef _DT_BINDINGS_MEMORY_MT6893_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MT6893_LARB_PORT_H_ + +#include + +/* + * MM IOMMU supports 16GB dma address. + * + * The address will preassign like this: + * + * modules dma-address-region larbs-ports + * disp 0 ~ 4G larb0/2 + * vcodec 4G ~ 8G larb4/5/7 + * cam/mdp 8G ~ 12G larb9/11/13/14/16/17/18/19/20 + * CCU0 0x4000_0000 ~ 0x43ff_ffff larb13: port 9/10 + * CCU1 0x4400_0000 ~ 0x47ff_ffff larb14: port 4/5 + * + * larb3/6/8/10/12/15 are null. + */ + +/* larb0 */ +#define M4U_PORT_L0_DISP_POSTMASK0 MTK_M4U_DOM_ID(0, 0) +#define M4U_PORT_L0_MDP_RDMA4 MTK_M4U_DOM_ID(0, 1) +#define M4U_PORT_L0_OVL_RDMA0_HDR MTK_M4U_DOM_ID(0, 2) +#define M4U_PORT_L0_OVL_2L_RDMA1_HDR MTK_M4U_DOM_ID(0, 3) +#define M4U_PORT_L0_OVL_2L_RDMA3_HDR MTK_M4U_DOM_ID(0, 4) +#define M4U_PORT_L0_OVL_RDMA0 MTK_M4U_DOM_ID(0, 5) +#define M4U_PORT_L0_OVL_2L_RDMA1 MTK_M4U_DOM_ID(0, 6) +#define M4U_PORT_L0_OVL_2L_RDMA3 MTK_M4U_DOM_ID(0, 7) +#define M4U_PORT_L0_OVL_RDMA1_SYSRAM MTK_M4U_DOM_ID(0, 8) +#define M4U_PORT_L0_OVL_2L_RDMA0_SYSRAM MTK_M4U_DOM_ID(0, 9) +#define M4U_PORT_L0_OVL_2L_RDMA2_SYSRAM MTK_M4U_DOM_ID(0, 10) +#define M4U_PORT_L0_DISP_WDMA0 MTK_M4U_DOM_ID(0, 11) +#define M4U_PORT_L0_DISP_RDMA0 MTK_M4U_DOM_ID(0, 12) +#define M4U_PORT_L0_DISP_UFBC_WDMA0 MTK_M4U_DOM_ID(0, 13) +#define M4U_PORT_L0_DISP_FAKE0 MTK_M4U_DOM_ID(0, 14) + +/* larb1 */ +#define M4U_PORT_L1_DISP_POSTMASK1 MTK_M4U_DOM_ID(1, 0) +#define M4U_PORT_L1_MDP_RDMA5 MTK_M4U_DOM_ID(1, 1) +#define M4U_PORT_L1_OVL_RDMA1_HDR MTK_M4U_DOM_ID(1, 2) +#define M4U_PORT_L1_OVL_2L_RDMA0_HDR MTK_M4U_DOM_ID(1, 3) +#define M4U_PORT_L1_OVL_2L_RDMA2_HDR MTK_M4U_DOM_ID(1, 4) +#define M4U_PORT_L1_OVL_RDMA1 MTK_M4U_DOM_ID(1, 5) +#define M4U_PORT_L1_OVL_2L_RDMA0 MTK_M4U_DOM_ID(1, 6) +#define M4U_PORT_L1_OVL_2L_RDMA2 MTK_M4U_DOM_ID(1, 7) +#define M4U_PORT_L1_OVL_RDMA0_SYSRAM MTK_M4U_DOM_ID(1, 8) +#define M4U_PORT_L1_OVL_2L_RDMA1_SYSRAM MTK_M4U_DOM_ID(1, 9) +#define M4U_PORT_L1_OVL_2L_RDMA3_SYSRAM MTK_M4U_DOM_ID(1, 10) +#define M4U_PORT_L1_DISP_WDMA1 MTK_M4U_DOM_ID(1, 11) +#define M4U_PORT_L1_DISP_RDMA1 MTK_M4U_DOM_ID(1, 12) +#define M4U_PORT_L1_DISP_UFBC_WDMA1 MTK_M4U_DOM_ID(1, 13) +#define M4U_PORT_L1_DISP_FAKE1 MTK_M4U_DOM_ID(1, 14) + +/* larb2 */ +#define M4U_PORT_L2_MDP_RDMA0 MTK_M4U_DOM_ID(2, 0) +#define M4U_PORT_L2_MDP_RDMA2 MTK_M4U_DOM_ID(2, 1) +#define M4U_PORT_L2_MDP_WROT0 MTK_M4U_DOM_ID(2, 2) +#define M4U_PORT_L2_MDP_WROT2 MTK_M4U_DOM_ID(2, 3) +#define M4U_PORT_L2_MDP_FILMGRAIN0 MTK_M4U_DOM_ID(2, 4) +#define M4U_PORT_L2_MDP_FAKE0 MTK_M4U_DOM_ID(2, 5) + +/* larb3: null */ + +/* larb4 */ +#define M4U_PORT_L4_VDEC_MC_EXT_MDP MTK_M4U_DOM_ID(4, 0) +#define M4U_PORT_L4_VDEC_UFO_EXT_MDP MTK_M4U_DOM_ID(4, 1) +#define M4U_PORT_L4_VDEC_PP_EXT_MDP MTK_M4U_DOM_ID(4, 2) +#define M4U_PORT_L4_VDEC_PRED_RD_EXT_MDP MTK_M4U_DOM_ID(4, 3) +#define M4U_PORT_L4_VDEC_PRED_WR_EXT_MDP MTK_M4U_DOM_ID(4, 4) +#define M4U_PORT_L4_VDEC_PPWRAP_EXT_MDP MTK_M4U_DOM_ID(4, 5) +#define M4U_PORT_L4_VDEC_TILE_EXT_MDP MTK_M4U_DOM_ID(4, 6) +#define M4U_PORT_L4_VDEC_VLD_EXT_MDP MTK_M4U_DOM_ID(4, 7) +#define M4U_PORT_L4_VDEC_VLD2_EXT_MDP MTK_M4U_DOM_ID(4, 8) +#define M4U_PORT_L4_VDEC_AVC_MV_EXT_MDP MTK_M4U_DOM_ID(4, 9) +#define M4U_PORT_L4_VDEC_RG_CTRL_DMA_EXT_MDP MTK_M4U_DOM_ID(4, 10) + +/* larb5 */ +#define M4U_PORT_L5_VDEC_LAT0_VLD_EXT_DISP MTK_M4U_DOM_ID(5, 0) +#define M4U_PORT_L5_VDEC_LAT0_VLD2_EXT_DISP MTK_M4U_DOM_ID(5, 1) +#define M4U_PORT_L5_VDEC_LAT0_AVC_MV_EXT_DISP MTK_M4U_DOM_ID(5, 2) +#define M4U_PORT_L5_VDEC_LAT0_PRED_RD_EXT_DISP MTK_M4U_DOM_ID(5, 3) +#define M4U_PORT_L5_VDEC_LAT0_TILE_EXT_DISP MTK_M4U_DOM_ID(5, 4) +#define M4U_PORT_L5_VDEC_LAT0_WDMA_EXT_DISP MTK_M4U_DOM_ID(5, 5) +#define M4U_PORT_L5_VDEC_LAT0_RG_CTRL_DMA_EXT_DISP MTK_M4U_DOM_ID(5, 6) +#define M4U_PORT_L5_VDEC_UFO_ENC_EXT_DISP MTK_M4U_DOM_ID(5, 7) + +/* larb6: null */ + +/* larb7 */ +#define M4U_PORT_L7_VENC_RCPU_DISP MTK_M4U_DOM_ID(7, 0) +#define M4U_PORT_L7_VENC_REC_DISP MTK_M4U_DOM_ID(7, 1) +#define M4U_PORT_L7_VENC_BSDMA_DISP MTK_M4U_DOM_ID(7, 2) +#define M4U_PORT_L7_VENC_SV_COMV_DISP MTK_M4U_DOM_ID(7, 3) +#define M4U_PORT_L7_VENC_RD_COMV_DISP MTK_M4U_DOM_ID(7, 4) +#define M4U_PORT_L7_VENC_NBM_RDMA_DISP MTK_M4U_DOM_ID(7, 5) +#define M4U_PORT_L7_VENC_NBM_RDMA_LITE_DISP MTK_M4U_DOM_ID(7, 6) +#define M4U_PORT_L7_JPGENC_Y_RDMA_DISP MTK_M4U_DOM_ID(7, 7) +#define M4U_PORT_L7_JPGENC_C_RDMA_DISP MTK_M4U_DOM_ID(7, 8) +#define M4U_PORT_L7_JPGENC_Q_TABLE_DISP MTK_M4U_DOM_ID(7, 9) +#define M4U_PORT_L7_JPGENC_BSDMA_DISP MTK_M4U_DOM_ID(7, 10) +#define M4U_PORT_L7_JPGENC_WDMA0_DISP MTK_M4U_DOM_ID(7, 11) +#define M4U_PORT_L7_JPGENC_BSDMA0_DISP MTK_M4U_DOM_ID(7, 12) +#define M4U_PORT_L7_VENC_NBM_WDMA_DISP MTK_M4U_DOM_ID(7, 13) +#define M4U_PORT_L7_VENC_NBM_WDMA_LITE_DISP MTK_M4U_DOM_ID(7, 14) +#define M4U_PORT_L7_VENC_CUR_LUMA_DISP MTK_M4U_DOM_ID(7, 15) +#define M4U_PORT_L7_VENC_CUR_CHROMA_DISP MTK_M4U_DOM_ID(7, 16) +#define M4U_PORT_L7_VENC_REF_LUMA_DISP MTK_M4U_DOM_ID(7, 17) +#define M4U_PORT_L7_VENC_REF_CHROMA_DISP MTK_M4U_DOM_ID(7, 18) +#define M4U_PORT_L7_VENC_SUB_R_LUMA_DISP MTK_M4U_DOM_ID(7, 19) +#define M4U_PORT_L7_VENC_SUB_W_LUMA_DISP MTK_M4U_DOM_ID(7, 20) +#define M4U_PORT_L7_VENC_FCS_NBM_RDMA_DISP MTK_M4U_DOM_ID(7, 21) +#define M4U_PORT_L7_VENC_FCS_NBM_WDMA_DISP MTK_M4U_DOM_ID(7, 22) +#define M4U_PORT_L7_JPGENC_WDMA1_DISP MTK_M4U_DOM_ID(7, 23) +#define M4U_PORT_L7_JPGENC_BSDMA1_DISP MTK_M4U_DOM_ID(7, 24) +#define M4U_PORT_L7_JPGENC_HUFF_OFFSET1_DISP MTK_M4U_DOM_ID(7, 25) +#define M4U_PORT_L7_JPGENC_HUFF_OFFSET0_DISP MTK_M4U_DOM_ID(7, 26) + +/* larb8: null */ + +/* larb9 */ +#define M4U_PORT_L9_IMG_IMGI_D1_MDP MTK_M4U_DOM_ID(9, 0) +#define M4U_PORT_L9_IMG_IMGBI_D1_MDP MTK_M4U_DOM_ID(9, 1) +#define M4U_PORT_L9_IMG_DMGI_D1_MDP MTK_M4U_DOM_ID(9, 2) +#define M4U_PORT_L9_IMG_DEPI_D1_MDP MTK_M4U_DOM_ID(9, 3) +#define M4U_PORT_L9_IMG_ICE_D1_MDP MTK_M4U_DOM_ID(9, 4) +#define M4U_PORT_L9_IMG_SMTI_D1_MDP MTK_M4U_DOM_ID(9, 5) +#define M4U_PORT_L9_IMG_SMTO_D2_MDP MTK_M4U_DOM_ID(9, 6) +#define M4U_PORT_L9_IMG_SMTO_D1_MDP MTK_M4U_DOM_ID(9, 7) +#define M4U_PORT_L9_IMG_CRZO_D1_MDP MTK_M4U_DOM_ID(9, 8) +#define M4U_PORT_L9_IMG_IMG3O_D1_MDP MTK_M4U_DOM_ID(9, 9) +#define M4U_PORT_L9_IMG_VIPI_D1_MDP MTK_M4U_DOM_ID(9, 10) +#define M4U_PORT_L9_IMG_SMTI_D5_MDP MTK_M4U_DOM_ID(9, 11) +#define M4U_PORT_L9_IMG_TIMGO_D1_MDP MTK_M4U_DOM_ID(9, 12) +#define M4U_PORT_L9_IMG_UFBC_W0_MDP MTK_M4U_DOM_ID(9, 13) +#define M4U_PORT_L9_IMG_UFBC_R0_MDP MTK_M4U_DOM_ID(9, 14) +#define M4U_PORT_L9_IMG_WPE_RDMA1_MDP MTK_M4U_DOM_ID(9, 15) +#define M4U_PORT_L9_IMG_WPE_RDMA0_MDP MTK_M4U_DOM_ID(9, 16) +#define M4U_PORT_L9_IMG_WPE_WDMA_MDP MTK_M4U_DOM_ID(9, 17) +#define M4U_PORT_L9_IMG_MFB_RDMA0_MDP MTK_M4U_DOM_ID(9, 18) +#define M4U_PORT_L9_IMG_MFB_RDMA1_MDP MTK_M4U_DOM_ID(9, 19) +#define M4U_PORT_L9_IMG_MFB_RDMA2_MDP MTK_M4U_DOM_ID(9, 20) +#define M4U_PORT_L9_IMG_MFB_RDMA3_MDP MTK_M4U_DOM_ID(9, 21) +#define M4U_PORT_L9_IMG_MFB_RDMA4_MDP MTK_M4U_DOM_ID(9, 22) +#define M4U_PORT_L9_IMG_MFB_RDMA5_MDP MTK_M4U_DOM_ID(9, 23) +#define M4U_PORT_L9_IMG_MFB_WDMA0_MDP MTK_M4U_DOM_ID(9, 24) +#define M4U_PORT_L9_IMG_MFB_WDMA1_MDP MTK_M4U_DOM_ID(9, 25) +#define M4U_PORT_L9_IMG_RESERVE6_MDP MTK_M4U_DOM_ID(9, 26) +#define M4U_PORT_L9_IMG_RESERVE7_MDP MTK_M4U_DOM_ID(9, 27) +#define M4U_PORT_L9_IMG_RESERVE8_MDP MTK_M4U_DOM_ID(9, 28) + +/* larb10: null */ + +/* larb11 */ +#define M4U_PORT_L11_IMG_IMGI_D1_DISP MTK_M4U_DOM_ID(11, 0) +#define M4U_PORT_L11_IMG_IMGBI_D1_DISP MTK_M4U_DOM_ID(11, 1) +#define M4U_PORT_L11_IMG_DMGI_D1_DISP MTK_M4U_DOM_ID(11, 2) +#define M4U_PORT_L11_IMG_DEPI_D1_DISP MTK_M4U_DOM_ID(11, 3) +#define M4U_PORT_L11_IMG_ICE_D1_DISP MTK_M4U_DOM_ID(11, 4) +#define M4U_PORT_L11_IMG_SMTI_D1_DISP MTK_M4U_DOM_ID(11, 5) +#define M4U_PORT_L11_IMG_SMTO_D2_DISP MTK_M4U_DOM_ID(11, 6) +#define M4U_PORT_L11_IMG_SMTO_D1_DISP MTK_M4U_DOM_ID(11, 7) +#define M4U_PORT_L11_IMG_CRZO_D1_DISP MTK_M4U_DOM_ID(11, 8) +#define M4U_PORT_L11_IMG_IMG3O_D1_DISP MTK_M4U_DOM_ID(11, 9) +#define M4U_PORT_L11_IMG_VIPI_D1_DISP MTK_M4U_DOM_ID(11, 10) +#define M4U_PORT_L11_IMG_SMTI_D5_DISP MTK_M4U_DOM_ID(11, 11) +#define M4U_PORT_L11_IMG_TIMGO_D1_DISP MTK_M4U_DOM_ID(11, 12) +#define M4U_PORT_L11_IMG_UFBC_W0_DISP MTK_M4U_DOM_ID(11, 13) +#define M4U_PORT_L11_IMG_UFBC_R0_DISP MTK_M4U_DOM_ID(11, 14) +#define M4U_PORT_L11_IMG_WPE_RDMA1_DISP MTK_M4U_DOM_ID(11, 15) +#define M4U_PORT_L11_IMG_WPE_RDMA0_DISP MTK_M4U_DOM_ID(11, 16) +#define M4U_PORT_L11_IMG_WPE_WDMA_DISP MTK_M4U_DOM_ID(11, 17) +#define M4U_PORT_L11_IMG_MFB_RDMA0_DISP MTK_M4U_DOM_ID(11, 18) +#define M4U_PORT_L11_IMG_MFB_RDMA1_DISP MTK_M4U_DOM_ID(11, 19) +#define M4U_PORT_L11_IMG_MFB_RDMA2_DISP MTK_M4U_DOM_ID(11, 20) +#define M4U_PORT_L11_IMG_MFB_RDMA3_DISP MTK_M4U_DOM_ID(11, 21) +#define M4U_PORT_L11_IMG_MFB_RDMA4_DISP MTK_M4U_DOM_ID(11, 22) +#define M4U_PORT_L11_IMG_MFB_RDMA5_DISP MTK_M4U_DOM_ID(11, 23) +#define M4U_PORT_L11_IMG_MFB_WDMA0_DISP MTK_M4U_DOM_ID(11, 24) +#define M4U_PORT_L11_IMG_MFB_WDMA1_DISP MTK_M4U_DOM_ID(11, 25) +#define M4U_PORT_L11_IMG_RESERVE6_DISP MTK_M4U_DOM_ID(11, 26) +#define M4U_PORT_L11_IMG_RESERVE7_DISP MTK_M4U_DOM_ID(11, 27) +#define M4U_PORT_L11_IMG_RESERVE8_DISP MTK_M4U_DOM_ID(11, 28) + +/* larb12: null */ + +/* larb13 */ +#define M4U_PORT_L13_CAM_MRAWI_MDP MTK_M4U_DOM_ID(13, 0) +#define M4U_PORT_L13_CAM_MRAWO0_MDP MTK_M4U_DOM_ID(13, 1) +#define M4U_PORT_L13_CAM_MRAWO1_MDP MTK_M4U_DOM_ID(13, 2) +#define M4U_PORT_L13_CAM_CAMSV1_MDP MTK_M4U_DOM_ID(13, 3) +#define M4U_PORT_L13_CAM_CAMSV2_MDP MTK_M4U_DOM_ID(13, 4) +#define M4U_PORT_L13_CAM_CAMSV3_MDP MTK_M4U_DOM_ID(13, 5) +#define M4U_PORT_L13_CAM_CAMSV4_MDP MTK_M4U_DOM_ID(13, 6) +#define M4U_PORT_L13_CAM_CAMSV5_MDP MTK_M4U_DOM_ID(13, 7) +#define M4U_PORT_L13_CAM_CAMSV6_MDP MTK_M4U_DOM_ID(13, 8) +#define M4U_PORT_L13_CAM_CCUI_MDP MTK_M4U_DOM_ID(13, 9) +#define M4U_PORT_L13_CAM_CCUO_MDP MTK_M4U_DOM_ID(13, 10) +#define M4U_PORT_L13_CAM_FAKE_MDP MTK_M4U_DOM_ID(13, 11) + +/* larb14 */ +#define M4U_PORT_L14_CAM_MRAWI_DISP MTK_M4U_DOM_ID(14, 0) +#define M4U_PORT_L14_CAM_MRAWO0_DISP MTK_M4U_DOM_ID(14, 1) +#define M4U_PORT_L14_CAM_MRAWO1_DISP MTK_M4U_DOM_ID(14, 2) +#define M4U_PORT_L14_CAM_CAMSV0_DISP MTK_M4U_DOM_ID(14, 3) +#define M4U_PORT_L14_CAM_CCUI_DISP MTK_M4U_DOM_ID(14, 4) +#define M4U_PORT_L14_CAM_CCUO_DISP MTK_M4U_DOM_ID(14, 5) + +/* larb15: null */ + +/* larb16 */ +#define M4U_PORT_L16_CAM_IMGO_R1_A_MDP MTK_M4U_DOM_ID(16, 0) +#define M4U_PORT_L16_CAM_RRZO_R1_A_MDP MTK_M4U_DOM_ID(16, 1) +#define M4U_PORT_L16_CAM_CQI_R1_A_MDP MTK_M4U_DOM_ID(16, 2) +#define M4U_PORT_L16_CAM_BPCI_R1_A_MDP MTK_M4U_DOM_ID(16, 3) +#define M4U_PORT_L16_CAM_YUVO_R1_A_MDP MTK_M4U_DOM_ID(16, 4) +#define M4U_PORT_L16_CAM_UFDI_R2_A_MDP MTK_M4U_DOM_ID(16, 5) +#define M4U_PORT_L16_CAM_RAWI_R2_A_MDP MTK_M4U_DOM_ID(16, 6) +#define M4U_PORT_L16_CAM_RAWI_R3_A_MDP MTK_M4U_DOM_ID(16, 7) +#define M4U_PORT_L16_CAM_AAO_R1_A_MDP MTK_M4U_DOM_ID(16, 8) +#define M4U_PORT_L16_CAM_AFO_R1_A_MDP MTK_M4U_DOM_ID(16, 9) +#define M4U_PORT_L16_CAM_FLKO_R1_A_MDP MTK_M4U_DOM_ID(16, 10) +#define M4U_PORT_L16_CAM_LCESO_R1_A_MDP MTK_M4U_DOM_ID(16, 11) +#define M4U_PORT_L16_CAM_CRZO_R1_A_MDP MTK_M4U_DOM_ID(16, 12) +#define M4U_PORT_L16_CAM_LTMSO_R1_A_MDP MTK_M4U_DOM_ID(16, 13) +#define M4U_PORT_L16_CAM_RSSO_R1_A_MDP MTK_M4U_DOM_ID(16, 14) +#define M4U_PORT_L16_CAM_AAHO_R1_A_MDP MTK_M4U_DOM_ID(16, 15) +#define M4U_PORT_L16_CAM_LSCI_R1_A_MDP MTK_M4U_DOM_ID(16, 16) + +/* larb17 */ +#define M4U_PORT_L17_CAM_IMGO_R1_B_DISP MTK_M4U_DOM_ID(17, 0) +#define M4U_PORT_L17_CAM_RRZO_R1_B_DISP MTK_M4U_DOM_ID(17, 1) +#define M4U_PORT_L17_CAM_CQI_R1_B_DISP MTK_M4U_DOM_ID(17, 2) +#define M4U_PORT_L17_CAM_BPCI_R1_B_DISP MTK_M4U_DOM_ID(17, 3) +#define M4U_PORT_L17_CAM_YUVO_R1_B_DISP MTK_M4U_DOM_ID(17, 4) +#define M4U_PORT_L17_CAM_UFDI_R2_B_DISP MTK_M4U_DOM_ID(17, 5) +#define M4U_PORT_L17_CAM_RAWI_R2_B_DISP MTK_M4U_DOM_ID(17, 6) +#define M4U_PORT_L17_CAM_RAWI_R3_B_DISP MTK_M4U_DOM_ID(17, 7) +#define M4U_PORT_L17_CAM_AAO_R1_B_DISP MTK_M4U_DOM_ID(17, 8) +#define M4U_PORT_L17_CAM_AFO_R1_B_DISP MTK_M4U_DOM_ID(17, 9) +#define M4U_PORT_L17_CAM_FLKO_R1_B_DISP MTK_M4U_DOM_ID(17, 10) +#define M4U_PORT_L17_CAM_LCESO_R1_B_DISP MTK_M4U_DOM_ID(17, 11) +#define M4U_PORT_L17_CAM_CRZO_R1_B_DISP MTK_M4U_DOM_ID(17, 12) +#define M4U_PORT_L17_CAM_LTMSO_R1_B_DISP MTK_M4U_DOM_ID(17, 13) +#define M4U_PORT_L17_CAM_RSSO_R1_B_DISP MTK_M4U_DOM_ID(17, 14) +#define M4U_PORT_L17_CAM_AAHO_R1_B_DISP MTK_M4U_DOM_ID(17, 15) +#define M4U_PORT_L17_CAM_LSCI_R1_B_DISP MTK_M4U_DOM_ID(17, 16) + +/* larb18 */ +#define M4U_PORT_L18_CAM_IMGO_R1_C_MDP MTK_M4U_DOM_ID(18, 0) +#define M4U_PORT_L18_CAM_RRZO_R1_C_MDP MTK_M4U_DOM_ID(18, 1) +#define M4U_PORT_L18_CAM_CQI_R1_C_MDP MTK_M4U_DOM_ID(18, 2) +#define M4U_PORT_L18_CAM_BPCI_R1_C_MDP MTK_M4U_DOM_ID(18, 3) +#define M4U_PORT_L18_CAM_YUVO_R1_C_MDP MTK_M4U_DOM_ID(18, 4) +#define M4U_PORT_L18_CAM_UFDI_R2_C_MDP MTK_M4U_DOM_ID(18, 5) +#define M4U_PORT_L18_CAM_RAWI_R2_C_MDP MTK_M4U_DOM_ID(18, 6) +#define M4U_PORT_L18_CAM_RAWI_R3_C_MDP MTK_M4U_DOM_ID(18, 7) +#define M4U_PORT_L18_CAM_AAO_R1_C_MDP MTK_M4U_DOM_ID(18, 8) +#define M4U_PORT_L18_CAM_AFO_R1_C_MDP MTK_M4U_DOM_ID(18, 9) +#define M4U_PORT_L18_CAM_FLKO_R1_C_MDP MTK_M4U_DOM_ID(18, 10) +#define M4U_PORT_L18_CAM_LCESO_R1_C_MDP MTK_M4U_DOM_ID(18, 11) +#define M4U_PORT_L18_CAM_CRZO_R1_C_MDP MTK_M4U_DOM_ID(18, 12) +#define M4U_PORT_L18_CAM_LTMSO_R1_C_MDP MTK_M4U_DOM_ID(18, 13) +#define M4U_PORT_L18_CAM_RSSO_R1_C_MDP MTK_M4U_DOM_ID(18, 14) +#define M4U_PORT_L18_CAM_AAHO_R1_C_MDP MTK_M4U_DOM_ID(18, 15) +#define M4U_PORT_L18_CAM_LSCI_R1_C_MDP MTK_M4U_DOM_ID(18, 16) + +/* larb19 */ +#define M4U_PORT_L19_IPE_DVS_RDMA_DISP MTK_M4U_DOM_ID(19, 0) +#define M4U_PORT_L19_IPE_DVS_WDMA_DISP MTK_M4U_DOM_ID(19, 1) +#define M4U_PORT_L19_IPE_DVP_RDMA_DISP MTK_M4U_DOM_ID(19, 2) +#define M4U_PORT_L19_IPE_DVP_WDMA_DISP MTK_M4U_DOM_ID(19, 3) + +/* larb20 */ +#define M4U_PORT_L20_IPE_FDVT_RDA_DISP MTK_M4U_DOM_ID(20, 0) +#define M4U_PORT_L20_IPE_FDVT_RDB_DISP MTK_M4U_DOM_ID(20, 1) +#define M4U_PORT_L20_IPE_FDVT_WRA_DISP MTK_M4U_DOM_ID(20, 2) +#define M4U_PORT_L20_IPE_FDVT_WRB_DISP MTK_M4U_DOM_ID(20, 3) +#define M4U_PORT_L20_IPE_RSC_RDMA0_DISP MTK_M4U_DOM_ID(20, 4) +#define M4U_PORT_L20_IPE_RSC_WDMA_DISP MTK_M4U_DOM_ID(20, 5) + +#endif -- cgit v1.2.3 From 163ddf1fea590229c30a8dc4c29ff4febfb895c3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 17 Apr 2025 18:24:46 +0300 Subject: spi: Add spi_bpw_to_bytes() helper and use it This helper converts the given bits per word to bytes. The result will always be power-of-two, e.g., =============== ================= Input (in bits) Output (in bytes) =============== ================= 5 1 9 2 21 4 37 8 =============== ================= It will return 0 for the 0 input. There are a couple of cases in SPI that are using the same approach and at least one more (in IIO) would benefit of it. Add a helper for everyone. Signed-off-by: Andy Shevchenko Reviewed-by: David Lechner Link: https://patch.msgid.link/20250417152529.490582-2-andriy.shevchenko@linux.intel.com Acked-by: Mukesh Kumar Savaliya Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 0ba5e49bace4..5b872fe3b1a2 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1325,6 +1325,32 @@ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw) return false; } +/** + * spi_bpw_to_bytes - Covert bits per word to bytes + * @bpw: Bits per word + * + * This function converts the given @bpw to bytes. The result is always + * power-of-two, e.g., + * + * =============== ================= + * Input (in bits) Output (in bytes) + * =============== ================= + * 5 1 + * 9 2 + * 21 4 + * 37 8 + * =============== ================= + * + * It will return 0 for the 0 input. + * + * Returns: + * Bytes for the given @bpw. + */ +static inline u32 spi_bpw_to_bytes(u32 bpw) +{ + return roundup_pow_of_two(BITS_TO_BYTES(bpw)); +} + /** * spi_controller_xfer_timeout - Compute a suitable timeout value * @ctlr: SPI device -- cgit v1.2.3 From a1b669ea16c4d7c1a1a8fc7e25aaf651ea0078c3 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 9 Apr 2025 14:45:57 -0700 Subject: bpf: Prepare to reuse get_ctx_arg_idx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename get_ctx_arg_idx to bpf_ctx_arg_idx, and allow others to call it. No functional change. Signed-off-by: Amery Hung Signed-off-by: Martin KaFai Lau Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250409214606.2000194-2-ameryhung@gmail.com --- include/linux/btf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index ebc0c0c9b944..b2983706292f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -522,6 +522,7 @@ bool btf_param_match_suffix(const struct btf *btf, const char *suffix); int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, u32 arg_no); +u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, int off); struct bpf_verifier_log; -- cgit v1.2.3 From 151e13ece86d234213b7f224f0e26a957c0eeb3e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 15 Apr 2025 18:02:15 -0700 Subject: net: ethtool: Adjust exactly ETH_GSTRING_LEN-long stats to use memcpy Many drivers populate the stats buffer using C-String based APIs (e.g. ethtool_sprintf() and ethtool_puts()), usually when building up the list of stats individually (i.e. with a for() loop). This, however, requires that the source strings be populated in such a way as to have a terminating NUL byte in the source. Other drivers populate the stats buffer directly using one big memcpy() of an entire array of strings. No NUL termination is needed here, as the bytes are being directly passed through. Yet others will build up the stats buffer individually, but also use memcpy(). This, too, does not need NUL termination of the source strings. However, there are cases where the strings that populate the source stats strings are exactly ETH_GSTRING_LEN long, and GCC 15's -Wunterminated-string-initialization option complains that the trailing NUL byte has been truncated. This situation is fine only if the driver is using the memcpy() approach. If the C-String APIs are used, the destination string name will have its final byte truncated by the required trailing NUL byte applied by the C-string API. For drivers that are already using memcpy() but have initializers that truncate the NUL terminator, mark their source strings as __nonstring to silence the GCC warnings. For drivers that have initializers that truncate the NUL terminator and are using the C-String APIs, switch to memcpy() to avoid destination string truncation and mark their source strings as __nonstring to silence the GCC warnings. (Also introduce ethtool_cpy() as a helper to make this an easy replacement). Specifically the following warnings were investigated and addressed: ../drivers/net/ethernet/chelsio/cxgb/cxgb2.c:364:9: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization] 364 | "TxFramesAbortedDueToXSCollisions", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/net/ethernet/freescale/enetc/enetc_ethtool.c:165:33: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization] 165 | { ENETC_PM_R1523X(0), "MAC rx 1523 to max-octet packets" }, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/net/ethernet/freescale/enetc/enetc_ethtool.c:190:33: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization] 190 | { ENETC_PM_T1523X(0), "MAC tx 1523 to max-octet packets" }, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/net/ethernet/google/gve/gve_ethtool.c:76:9: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization] 76 | "adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c:117:53: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization] 117 | STMMAC_STAT(ptp_rx_msg_type_pdelay_follow_up), | ^ ../drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c:46:12: note: in definition of macro 'STMMAC_STAT' 46 | { #m, sizeof_field(struct stmmac_extra_stats, m), \ | ^ ../drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c:328:24: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization] 328 | .str = "a_mac_control_frames_transmitted", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c:340:24: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization] 340 | .str = "a_pause_mac_ctrl_frames_received", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Kees Cook Reviewed-by: Petr Machata # for mlxsw Reviewed-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20250416010210.work.904-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 013d25858642..7edb5f5e7134 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1330,6 +1330,17 @@ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); */ extern void ethtool_puts(u8 **data, const char *str); +/** + * ethtool_cpy - Write possibly-not-NUL-terminated string to ethtool string data + * @data: Pointer to a pointer to the start of string to write into + * @str: NUL-byte padded char array of size ETH_GSTRING_LEN to copy from + */ +#define ethtool_cpy(data, str) do { \ + BUILD_BUG_ON(sizeof(str) != ETH_GSTRING_LEN); \ + memcpy(*(data), str, ETH_GSTRING_LEN); \ + *(data) += ETH_GSTRING_LEN; \ +} while (0) + /* Link mode to forced speed capabilities maps */ struct ethtool_forced_speed_map { u32 speed; -- cgit v1.2.3 From 22cbc1ee268b7ec0000848708944daa61c6e4909 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Apr 2025 20:04:47 -0700 Subject: netdev: fix the locking for netdev notifications Kuniyuki reports that the assert for netdev lock fires when there are netdev event listeners (otherwise we skip the netlink event generation). Correct the locking when coming from the notifier. The NETDEV_XDP_FEAT_CHANGE notifier is already fully locked, it's the documentation that's incorrect. Fixes: 99e44f39a8f7 ("netdev: depend on netdev->lock for xdp features") Reported-by: syzkaller Reported-by: Kuniyuki Iwashima Link: https://lore.kernel.org/20250410171019.62128-1-kuniyu@amazon.com Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250416030447.1077551-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- include/net/netdev_lock.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e6036b82ef4c..0321fd952f70 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2520,7 +2520,7 @@ struct net_device { * @net_shaper_hierarchy, @reg_state, @threaded * * Double protects: - * @up, @moving_ns, @nd_net, @xdp_flags + * @up, @moving_ns, @nd_net, @xdp_features * * Double ops protects: * @real_num_rx_queues, @real_num_tx_queues diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index 5706835a660c..2a753813f849 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -48,6 +48,22 @@ static inline void netdev_unlock_ops(struct net_device *dev) netdev_unlock(dev); } +static inline void netdev_lock_ops_to_full(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_assert_locked(dev); + else + netdev_lock(dev); +} + +static inline void netdev_unlock_full_to_ops(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_assert_locked(dev); + else + netdev_unlock(dev); +} + static inline void netdev_ops_assert_locked(const struct net_device *dev) { if (netdev_need_ops_lock(dev)) -- cgit v1.2.3 From 2b905deb43ea0b67fa8448fc9c15dacb068f45b6 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Wed, 16 Apr 2025 19:56:23 +0800 Subject: net: Delete the outer () duplicated of macro SOCK_SKB_CB_OFFSET definition For macro SOCK_SKB_CB_OFFSET definition, Delete the outer () duplicated. Signed-off-by: Zijun Hu Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250416-fix_net-v1-1-d544c9f3f169@quicinc.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index bb4d6189292f..e223102337c7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2604,8 +2604,8 @@ struct sock_skb_cb { * using skb->cb[] would keep using it directly and utilize its * alignment guarantee. */ -#define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \ - sizeof(struct sock_skb_cb))) +#define SOCK_SKB_CB_OFFSET (sizeof_field(struct sk_buff, cb) - \ + sizeof(struct sock_skb_cb)) #define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \ SOCK_SKB_CB_OFFSET)) -- cgit v1.2.3 From 1df4a945444f071a9c5e09580a485919c42d4de5 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 16 Apr 2025 10:06:12 -0700 Subject: trace: tcp: Add const qualifier to skb parameter in tcp_probe event Change the tcp_probe tracepoint to accept a const struct sk_buff parameter instead of a non-const one. This improves type safety and better reflects that the skb is not modified within the tracepoint implementation. Signed-off-by: Breno Leitao Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250416-tcp_probe-v1-1-1edc3c5a1cb8@debian.org Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 75d3d53a3832..53e878fa14d1 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -293,7 +293,7 @@ DECLARE_TRACE(tcp_cwnd_reduction_tp, TRACE_EVENT(tcp_probe, - TP_PROTO(struct sock *sk, struct sk_buff *skb), + TP_PROTO(struct sock *sk, const struct sk_buff *skb), TP_ARGS(sk, skb), -- cgit v1.2.3 From 8066e388be48f1ad62b0449dc1d31a25489fa12a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 16 Apr 2025 13:08:40 -0700 Subject: net: add UAPI to the header guard in various network headers fib_rule, ip6_tunnel, and a whole lot of if_* headers lack the customary _UAPI in the header guard. Without it YNL build can't protect from in tree and system headers both getting included. YNL doesn't need most of these but it's annoying to have to fix them one by one. Note that header installation strips this _UAPI prefix so this should result in no change to the end user. Acked-by: Jamal Hadi Salim Reviewed-by: Jason Xing Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250416200840.1338195-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/fib_rules.h | 4 ++-- include/uapi/linux/if_addr.h | 4 ++-- include/uapi/linux/if_addrlabel.h | 4 ++-- include/uapi/linux/if_alg.h | 6 +++--- include/uapi/linux/if_arcnet.h | 6 +++--- include/uapi/linux/if_bonding.h | 6 +++--- include/uapi/linux/if_fc.h | 6 +++--- include/uapi/linux/if_hippi.h | 6 +++--- include/uapi/linux/if_packet.h | 4 ++-- include/uapi/linux/if_plip.h | 4 ++-- include/uapi/linux/if_slip.h | 4 ++-- include/uapi/linux/if_x25.h | 6 +++--- include/uapi/linux/if_xdp.h | 6 +++--- include/uapi/linux/ip6_tunnel.h | 4 ++-- include/uapi/linux/net_dropmon.h | 4 ++-- include/uapi/linux/net_tstamp.h | 6 +++--- include/uapi/linux/netlink_diag.h | 4 ++-- include/uapi/linux/pkt_cls.h | 4 ++-- include/uapi/linux/pkt_sched.h | 4 ++-- 19 files changed, 46 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 2df6e4035d50..418c4be697ad 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_FIB_RULES_H -#define __LINUX_FIB_RULES_H +#ifndef _UAPI__LINUX_FIB_RULES_H +#define _UAPI__LINUX_FIB_RULES_H #include #include diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index 1c392dd95a5e..aa7958b4e41d 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_IF_ADDR_H -#define __LINUX_IF_ADDR_H +#ifndef _UAPI__LINUX_IF_ADDR_H +#define _UAPI__LINUX_IF_ADDR_H #include #include diff --git a/include/uapi/linux/if_addrlabel.h b/include/uapi/linux/if_addrlabel.h index d1f5974c76e1..e69db764fbba 100644 --- a/include/uapi/linux/if_addrlabel.h +++ b/include/uapi/linux/if_addrlabel.h @@ -8,8 +8,8 @@ * YOSHIFUJI Hideaki @ USAGI/WIDE */ -#ifndef __LINUX_IF_ADDRLABEL_H -#define __LINUX_IF_ADDRLABEL_H +#ifndef _UAPI__LINUX_IF_ADDRLABEL_H +#define _UAPI__LINUX_IF_ADDRLABEL_H #include diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h index 0824fbc026a1..b35871cbeed7 100644 --- a/include/uapi/linux/if_alg.h +++ b/include/uapi/linux/if_alg.h @@ -11,8 +11,8 @@ * */ -#ifndef _LINUX_IF_ALG_H -#define _LINUX_IF_ALG_H +#ifndef _UAPI_LINUX_IF_ALG_H +#define _UAPI_LINUX_IF_ALG_H #include @@ -58,4 +58,4 @@ struct af_alg_iv { #define ALG_OP_DECRYPT 0 #define ALG_OP_ENCRYPT 1 -#endif /* _LINUX_IF_ALG_H */ +#endif /* _UAPI_LINUX_IF_ALG_H */ diff --git a/include/uapi/linux/if_arcnet.h b/include/uapi/linux/if_arcnet.h index b122cfac7128..473569eaf692 100644 --- a/include/uapi/linux/if_arcnet.h +++ b/include/uapi/linux/if_arcnet.h @@ -14,8 +14,8 @@ * 2 of the License, or (at your option) any later version. */ -#ifndef _LINUX_IF_ARCNET_H -#define _LINUX_IF_ARCNET_H +#ifndef _UAPI_LINUX_IF_ARCNET_H +#define _UAPI_LINUX_IF_ARCNET_H #include #include @@ -127,4 +127,4 @@ struct archdr { } soft; }; -#endif /* _LINUX_IF_ARCNET_H */ +#endif /* _UAPI_LINUX_IF_ARCNET_H */ diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index d174914a837d..3bcc03f3aa4f 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -41,8 +41,8 @@ * - added definitions for various XOR hashing policies */ -#ifndef _LINUX_IF_BONDING_H -#define _LINUX_IF_BONDING_H +#ifndef _UAPI_LINUX_IF_BONDING_H +#define _UAPI_LINUX_IF_BONDING_H #include #include @@ -152,4 +152,4 @@ enum { }; #define BOND_3AD_STAT_MAX (__BOND_3AD_STAT_MAX - 1) -#endif /* _LINUX_IF_BONDING_H */ +#endif /* _UAPI_LINUX_IF_BONDING_H */ diff --git a/include/uapi/linux/if_fc.h b/include/uapi/linux/if_fc.h index 3e3173282cc3..ff5ab92d16c2 100644 --- a/include/uapi/linux/if_fc.h +++ b/include/uapi/linux/if_fc.h @@ -18,8 +18,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#ifndef _LINUX_IF_FC_H -#define _LINUX_IF_FC_H +#ifndef _UAPI_LINUX_IF_FC_H +#define _UAPI_LINUX_IF_FC_H #include @@ -49,4 +49,4 @@ struct fcllc { __be16 ethertype; /* ether type field */ }; -#endif /* _LINUX_IF_FC_H */ +#endif /* _UAPI_LINUX_IF_FC_H */ diff --git a/include/uapi/linux/if_hippi.h b/include/uapi/linux/if_hippi.h index 785a1452a66c..42c4ffd11dae 100644 --- a/include/uapi/linux/if_hippi.h +++ b/include/uapi/linux/if_hippi.h @@ -20,8 +20,8 @@ * 2 of the License, or (at your option) any later version. */ -#ifndef _LINUX_IF_HIPPI_H -#define _LINUX_IF_HIPPI_H +#ifndef _UAPI_LINUX_IF_HIPPI_H +#define _UAPI_LINUX_IF_HIPPI_H #include #include @@ -151,4 +151,4 @@ struct hippi_hdr { struct hippi_snap_hdr snap; } __attribute__((packed)); -#endif /* _LINUX_IF_HIPPI_H */ +#endif /* _UAPI_LINUX_IF_HIPPI_H */ diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 1d2718dd9647..6cd1d7a41dfb 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_IF_PACKET_H -#define __LINUX_IF_PACKET_H +#ifndef _UAPI__LINUX_IF_PACKET_H +#define _UAPI__LINUX_IF_PACKET_H #include #include diff --git a/include/uapi/linux/if_plip.h b/include/uapi/linux/if_plip.h index 495a366112f2..054d86a9c6e6 100644 --- a/include/uapi/linux/if_plip.h +++ b/include/uapi/linux/if_plip.h @@ -9,8 +9,8 @@ * */ -#ifndef _LINUX_IF_PLIP_H -#define _LINUX_IF_PLIP_H +#ifndef _UAPI_LINUX_IF_PLIP_H +#define _UAPI_LINUX_IF_PLIP_H #include diff --git a/include/uapi/linux/if_slip.h b/include/uapi/linux/if_slip.h index 65937be53103..299bf7adc862 100644 --- a/include/uapi/linux/if_slip.h +++ b/include/uapi/linux/if_slip.h @@ -6,8 +6,8 @@ * KISS TNC driver. */ -#ifndef __LINUX_SLIP_H -#define __LINUX_SLIP_H +#ifndef _UAPI__LINUX_SLIP_H +#define _UAPI__LINUX_SLIP_H #define SL_MODE_SLIP 0 #define SL_MODE_CSLIP 1 diff --git a/include/uapi/linux/if_x25.h b/include/uapi/linux/if_x25.h index 3a5938e38370..861cfa983db4 100644 --- a/include/uapi/linux/if_x25.h +++ b/include/uapi/linux/if_x25.h @@ -13,8 +13,8 @@ * GNU General Public License for more details. */ -#ifndef _IF_X25_H -#define _IF_X25_H +#ifndef _UAPI_IF_X25_H +#define _UAPI_IF_X25_H #include @@ -24,4 +24,4 @@ #define X25_IFACE_DISCONNECT 0x02 #define X25_IFACE_PARAMS 0x03 -#endif /* _IF_X25_H */ +#endif /* _UAPI_IF_X25_H */ diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 42869770776e..44f2bb93e7e6 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -7,8 +7,8 @@ * Magnus Karlsson */ -#ifndef _LINUX_IF_XDP_H -#define _LINUX_IF_XDP_H +#ifndef _UAPI_LINUX_IF_XDP_H +#define _UAPI_LINUX_IF_XDP_H #include @@ -180,4 +180,4 @@ struct xdp_desc { /* TX packet carries valid metadata. */ #define XDP_TX_METADATA (1 << 1) -#endif /* _LINUX_IF_XDP_H */ +#endif /* _UAPI_LINUX_IF_XDP_H */ diff --git a/include/uapi/linux/ip6_tunnel.h b/include/uapi/linux/ip6_tunnel.h index 0245269b037c..85182a839d42 100644 --- a/include/uapi/linux/ip6_tunnel.h +++ b/include/uapi/linux/ip6_tunnel.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _IP6_TUNNEL_H -#define _IP6_TUNNEL_H +#ifndef _UAPI_IP6_TUNNEL_H +#define _UAPI_IP6_TUNNEL_H #include #include /* For IFNAMSIZ. */ diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 84f622a66a7a..9dd41c2f58a6 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __NET_DROPMON_H -#define __NET_DROPMON_H +#ifndef _UAPI__NET_DROPMON_H +#define _UAPI__NET_DROPMON_H #include #include diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 383213de612a..a93e6ea37fb3 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -7,8 +7,8 @@ * */ -#ifndef _NET_TIMESTAMPING_H -#define _NET_TIMESTAMPING_H +#ifndef _UAPI_NET_TIMESTAMPING_H +#define _UAPI_NET_TIMESTAMPING_H #include #include /* for SO_TIMESTAMPING */ @@ -216,4 +216,4 @@ struct sock_txtime { __u32 flags; /* as defined by enum txtime_flags */ }; -#endif /* _NET_TIMESTAMPING_H */ +#endif /* _UAPI_NET_TIMESTAMPING_H */ diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index dfa61be43d2f..ff28200204bb 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __NETLINK_DIAG_H__ -#define __NETLINK_DIAG_H__ +#ifndef _UAPI__NETLINK_DIAG_H__ +#define _UAPI__NETLINK_DIAG_H__ #include diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 2c32080416b5..490821364165 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_PKT_CLS_H -#define __LINUX_PKT_CLS_H +#ifndef _UAPI__LINUX_PKT_CLS_H +#define _UAPI__LINUX_PKT_CLS_H #include #include diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 25a9a47001cd..9ea874395717 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_PKT_SCHED_H -#define __LINUX_PKT_SCHED_H +#ifndef _UAPI__LINUX_PKT_SCHED_H +#define _UAPI__LINUX_PKT_SCHED_H #include #include -- cgit v1.2.3 From 9ff2aa4206eff40a202e425f232036bc84ad4c0e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 17 Mar 2025 23:07:30 -0400 Subject: net: ethtool: mm: extract stmmac verification logic into common library It appears that stmmac is not the only hardware which requires a software-driven verification state machine for the MAC Merge layer. While on the one hand it's good to encourage hardware implementations, on the other hand it's quite difficult to tolerate multiple drivers implementing independently fairly non-trivial logic. Extract the hardware-independent logic from stmmac into library code and put it in ethtool. Name the state structure "mmsv" for MAC Merge Software Verification. Let this expose an operations structure for executing the hardware stuff: sync hardware with the tx_active boolean (result of verification process), enable/disable the pMAC, send mPackets, notify library of external events (reception of mPackets), as well as link state changes. Note that it is assumed that the external events are received in hardirq context. If they are not, it is probably a good idea to disable hardirqs when calling ethtool_mmsv_event_handle(), because the library does not do so. Also, the MM software verification process has no business with the tx_min_frag_size, that is all the driver's to handle. Signed-off-by: Vladimir Oltean Co-developed-by: Choong Yong Liang Signed-off-by: Choong Yong Liang Tested-by: Choong Yong Liang Tested-by: Furong Xu <0x1207@gmail.com> Reviewed-by: Vladimir Oltean Signed-off-by: Faizal Rahim Signed-off-by: Tony Nguyen --- include/linux/ethtool.h | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 7edb5f5e7134..117718c24814 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -17,9 +17,13 @@ #include #include #include +#include #include #include +#define ETHTOOL_MM_MAX_VERIFY_TIME_MS 128 +#define ETHTOOL_MM_MAX_VERIFY_RETRIES 3 + struct compat_ethtool_rx_flow_spec { u32 flow_type; union ethtool_flow_union h_u; @@ -718,6 +722,75 @@ struct ethtool_mm_stats { u64 MACMergeHoldCount; }; +enum ethtool_mmsv_event { + ETHTOOL_MMSV_LP_SENT_VERIFY_MPACKET, + ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET, + ETHTOOL_MMSV_LP_SENT_RESPONSE_MPACKET, +}; + +/* MAC Merge verification mPacket type */ +enum ethtool_mpacket { + ETHTOOL_MPACKET_VERIFY, + ETHTOOL_MPACKET_RESPONSE, +}; + +struct ethtool_mmsv; + +/** + * struct ethtool_mmsv_ops - Operations for MAC Merge Software Verification + * @configure_tx: Driver callback for the event where the preemptible TX + * becomes active or inactive. Preemptible traffic + * classes must be committed to hardware only while + * preemptible TX is active. + * @configure_pmac: Driver callback for the event where the pMAC state + * changes as result of an administrative setting + * (ethtool) or a call to ethtool_mmsv_link_state_handle(). + * @send_mpacket: Driver-provided method for sending a Verify or a Response + * mPacket. + */ +struct ethtool_mmsv_ops { + void (*configure_tx)(struct ethtool_mmsv *mmsv, bool tx_active); + void (*configure_pmac)(struct ethtool_mmsv *mmsv, bool pmac_enabled); + void (*send_mpacket)(struct ethtool_mmsv *mmsv, enum ethtool_mpacket mpacket); +}; + +/** + * struct ethtool_mmsv - MAC Merge Software Verification + * @ops: operations for MAC Merge Software Verification + * @dev: pointer to net_device structure + * @lock: serialize access to MAC Merge state between + * ethtool requests and link state updates. + * @status: current verification FSM state + * @verify_timer: timer for verification in local TX direction + * @verify_enabled: indicates if verification is enabled + * @verify_retries: number of retries for verification + * @pmac_enabled: indicates if the preemptible MAC is enabled + * @verify_time: time for verification in milliseconds + * @tx_enabled: indicates if transmission is enabled + */ +struct ethtool_mmsv { + const struct ethtool_mmsv_ops *ops; + struct net_device *dev; + spinlock_t lock; + enum ethtool_mm_verify_status status; + struct timer_list verify_timer; + bool verify_enabled; + int verify_retries; + bool pmac_enabled; + u32 verify_time; + bool tx_enabled; +}; + +void ethtool_mmsv_stop(struct ethtool_mmsv *mmsv); +void ethtool_mmsv_link_state_handle(struct ethtool_mmsv *mmsv, bool up); +void ethtool_mmsv_event_handle(struct ethtool_mmsv *mmsv, + enum ethtool_mmsv_event event); +void ethtool_mmsv_get_mm(struct ethtool_mmsv *mmsv, + struct ethtool_mm_state *state); +void ethtool_mmsv_set_mm(struct ethtool_mmsv *mmsv, struct ethtool_mm_cfg *cfg); +void ethtool_mmsv_init(struct ethtool_mmsv *mmsv, struct net_device *dev, + const struct ethtool_mmsv_ops *ops); + /** * struct ethtool_rxfh_param - RXFH (RSS) parameters * @hfunc: Defines the current RSS hash function used by HW (or to be set to). -- cgit v1.2.3 From 818bd489f137e9257d701c0d4bf11efdfd02ca5f Mon Sep 17 00:00:00 2001 From: Romain Gantois Date: Thu, 6 Mar 2025 17:23:25 +0100 Subject: i2c: use client addresses directly in ATR interface The I2C Address Translator (ATR) module defines mappings from i2c_client structs to aliases. However, only the physical address of each i2c_client struct is actually relevant to the workings of the ATR module. Moreover, some drivers require address translation functionality but do not allocate i2c_client structs, accessing the adapter directly instead. The SFP subsystem is an example of this. Replace the "i2c_client" field of the i2c_atr_alias_pair struct with a u16 "addr" field. Rewrite helper functions and callbacks as needed. Reviewed-by: Tomi Valkeinen Tested-by: Tomi Valkeinen Signed-off-by: Romain Gantois Acked-by: Andi Shyti Signed-off-by: Wolfram Sang --- include/linux/i2c-atr.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h index 4d5da161c225..14c1f9175c0d 100644 --- a/include/linux/i2c-atr.h +++ b/include/linux/i2c-atr.h @@ -20,20 +20,20 @@ struct i2c_atr; /** * struct i2c_atr_ops - Callbacks from ATR to the device driver. - * @attach_client: Notify the driver of a new device connected on a child - * bus, with the alias assigned to it. The driver must - * configure the hardware to use the alias. - * @detach_client: Notify the driver of a device getting disconnected. The - * driver must configure the hardware to stop using the - * alias. + * @attach_addr: Notify the driver of a new device connected on a child + * bus, with the alias assigned to it. The driver must + * configure the hardware to use the alias. + * @detach_addr: Notify the driver of a device getting disconnected. The + * driver must configure the hardware to stop using the + * alias. * * All these functions return 0 on success, a negative error code otherwise. */ struct i2c_atr_ops { - int (*attach_client)(struct i2c_atr *atr, u32 chan_id, - const struct i2c_client *client, u16 alias); - void (*detach_client)(struct i2c_atr *atr, u32 chan_id, - const struct i2c_client *client); + int (*attach_addr)(struct i2c_atr *atr, u32 chan_id, + u16 addr, u16 alias); + void (*detach_addr)(struct i2c_atr *atr, u32 chan_id, + u16 addr); }; /** -- cgit v1.2.3 From 328a106ce0e8d0596d2b020b6f83efd0c859b084 Mon Sep 17 00:00:00 2001 From: Romain Gantois Date: Thu, 6 Mar 2025 17:23:28 +0100 Subject: i2c: support per-channel ATR alias pools Some I2C address translators (ATRs) assign each of their remote peripheral aliases to a specific channel. To properly handle these devices, add support for having separate alias pools for each ATR channel. This is achieved by allowing callers of i2c_atr_add_adapter to pass an optional alias list. If present, this list will be used to populate the channel's alias pool. Otherwise, the common alias pool will be used. Tested-by: Tomi Valkeinen Signed-off-by: Romain Gantois Acked-by: Andi Shyti Signed-off-by: Wolfram Sang --- include/linux/i2c-atr.h | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h index 14c1f9175c0d..1c3a5bcd939f 100644 --- a/include/linux/i2c-atr.h +++ b/include/linux/i2c-atr.h @@ -36,6 +36,29 @@ struct i2c_atr_ops { u16 addr); }; +/** + * struct i2c_atr_adap_desc - An ATR downstream bus descriptor + * @chan_id: Index of the new adapter (0 .. max_adapters-1). This value is + * passed to the callbacks in `struct i2c_atr_ops`. + * @parent: The device used as the parent of the new i2c adapter, or NULL + * to use the i2c-atr device as the parent. + * @bus_handle: The fwnode handle that points to the adapter's i2c + * peripherals, or NULL. + * @num_aliases: The number of aliases in this adapter's private alias pool. Set + * to zero if this adapter uses the ATR's global alias pool. + * @aliases: An optional array of private aliases used by the adapter + * instead of the ATR's global pool of aliases. Must contain + * exactly num_aliases entries if num_aliases > 0, is ignored + * otherwise. + */ +struct i2c_atr_adap_desc { + u32 chan_id; + struct device *parent; + struct fwnode_handle *bus_handle; + size_t num_aliases; + u16 *aliases; +}; + /** * i2c_atr_new() - Allocate and initialize an I2C ATR helper. * @parent: The parent (upstream) adapter @@ -65,12 +88,7 @@ void i2c_atr_delete(struct i2c_atr *atr); /** * i2c_atr_add_adapter - Create a child ("downstream") I2C bus. * @atr: The I2C ATR - * @chan_id: Index of the new adapter (0 .. max_adapters-1). This value is - * passed to the callbacks in `struct i2c_atr_ops`. - * @adapter_parent: The device used as the parent of the new i2c adapter, or NULL - * to use the i2c-atr device as the parent. - * @bus_handle: The fwnode handle that points to the adapter's i2c - * peripherals, or NULL. + * @desc: An ATR adapter descriptor * * After calling this function a new i2c bus will appear. Adding and removing * devices on the downstream bus will result in calls to the @@ -85,9 +103,7 @@ void i2c_atr_delete(struct i2c_atr *atr); * * Return: 0 on success, a negative error code otherwise. */ -int i2c_atr_add_adapter(struct i2c_atr *atr, u32 chan_id, - struct device *adapter_parent, - struct fwnode_handle *bus_handle); +int i2c_atr_add_adapter(struct i2c_atr *atr, struct i2c_atr_adap_desc *desc); /** * i2c_atr_del_adapter - Remove a child ("downstream") I2C bus added by -- cgit v1.2.3 From 178af54a678d08735233e070a9329651e1589587 Mon Sep 17 00:00:00 2001 From: Krishna Chaitanya Chundru Date: Fri, 28 Mar 2025 15:58:32 +0530 Subject: PCI: Add lane equalization register offsets As per PCIe spec 6.0.1, add PCIe lane equalization register offset for data rates 8.0 GT/s, 32.0 GT/s and 64.0 GT/s. Also add a macro for defining data rate 64.0 GT/s physical layer capability ID. Signed-off-by: Krishna Chaitanya Chundru Signed-off-by: Manivannan Sadhasivam Reviewed-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250328-preset_v6-v9-4-22cfa0490518@oss.qualcomm.com --- include/uapi/linux/pci_regs.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index ba326710f9c8..a3a3e942dedf 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -750,7 +750,8 @@ #define PCI_EXT_CAP_ID_NPEM 0x29 /* Native PCIe Enclosure Management */ #define PCI_EXT_CAP_ID_PL_32GT 0x2A /* Physical Layer 32.0 GT/s */ #define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */ -#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE +#define PCI_EXT_CAP_ID_PL_64GT 0x31 /* Physical Layer 64.0 GT/s */ +#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_64GT #define PCI_EXT_CAP_DSN_SIZEOF 12 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 @@ -1144,12 +1145,21 @@ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ #define PCI_DLF_EXCHANGE_ENABLE 0x80000000 /* Data Link Feature Exchange Enable */ +/* Secondary PCIe Capability 8.0 GT/s */ +#define PCI_SECPCI_LE_CTRL 0x0c /* Lane Equalization Control Register */ + /* Physical Layer 16.0 GT/s */ #define PCI_PL_16GT_LE_CTRL 0x20 /* Lane Equalization Control Register */ #define PCI_PL_16GT_LE_CTRL_DSP_TX_PRESET_MASK 0x0000000F #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK 0x000000F0 #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT 4 +/* Physical Layer 32.0 GT/s */ +#define PCI_PL_32GT_LE_CTRL 0x20 /* Lane Equalization Control Register */ + +/* Physical Layer 64.0 GT/s */ +#define PCI_PL_64GT_LE_CTRL 0x20 /* Lane Equalization Control Register */ + /* Native PCIe Enclosure Management */ #define PCI_NPEM_CAP 0x04 /* NPEM capability register */ #define PCI_NPEM_CAP_CAPABLE 0x00000001 /* NPEM Capable */ -- cgit v1.2.3 From 8f49682d94f3a12a6a3e636a07bbe57c80329d1d Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Mon, 14 Apr 2025 02:00:33 -0700 Subject: RDMA/mana_ib: support of the zero based MRs Add IB_ZERO_BASED to the valid flags and use the corresponding MR creation request for the zero based memory. Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/1744621234-26114-3-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Long Li Signed-off-by: Leon Romanovsky --- include/net/mana/gdma.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 228603bf03f2..239a70032550 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -815,6 +815,8 @@ enum gdma_mr_type { * address that is set up in the MST */ GDMA_MR_TYPE_GVA = 2, + /* Guest zero-based address MRs */ + GDMA_MR_TYPE_ZBVA = 4, }; struct gdma_create_mr_params { @@ -826,6 +828,10 @@ struct gdma_create_mr_params { u64 virtual_address; enum gdma_mr_access_flags access_flags; } gva; + struct { + u64 dma_region_handle; + enum gdma_mr_access_flags access_flags; + } zbva; }; }; @@ -841,7 +847,10 @@ struct gdma_create_mr_request { u64 virtual_address; enum gdma_mr_access_flags access_flags; } gva; - + struct { + u64 dma_region_handle; + enum gdma_mr_access_flags access_flags; + } zbva; }; u32 reserved_2; };/* HW DATA */ -- cgit v1.2.3 From f1652d76f4c51b5aefd14706eecbd70f05ca987a Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Mon, 14 Apr 2025 02:00:34 -0700 Subject: RDMA/mana_ib: Add support of 4M, 1G, and 2G pages Check PF capability flag whether the 4M, 1G, and 2G pages are supported. Add these pages sizes to mana_ib, if supported. Define possible page sizes in enum gdma_page_type and remove unused enum atb_page_size. Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/1744621234-26114-4-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Long Li Signed-off-by: Leon Romanovsky --- include/net/mana/gdma.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 239a70032550..ffa9820f14ba 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -407,6 +407,8 @@ struct gdma_context { /* Azure RDMA adapter */ struct gdma_dev mana_ib; + + u64 pf_cap_flags1; }; static inline bool mana_gd_is_mana(struct gdma_dev *gd) @@ -553,6 +555,7 @@ enum { */ #define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) #define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3) +#define GDMA_DRV_CAP_FLAG_1_GDMA_PAGES_4MB_1GB_2GB BIT(4) #define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5) /* Driver can handle holes (zeros) in the device list */ @@ -707,20 +710,6 @@ struct gdma_query_hwc_timeout_resp { u32 reserved; }; -enum atb_page_size { - ATB_PAGE_SIZE_4K, - ATB_PAGE_SIZE_8K, - ATB_PAGE_SIZE_16K, - ATB_PAGE_SIZE_32K, - ATB_PAGE_SIZE_64K, - ATB_PAGE_SIZE_128K, - ATB_PAGE_SIZE_256K, - ATB_PAGE_SIZE_512K, - ATB_PAGE_SIZE_1M, - ATB_PAGE_SIZE_2M, - ATB_PAGE_SIZE_MAX, -}; - enum gdma_mr_access_flags { GDMA_ACCESS_FLAG_LOCAL_READ = BIT_ULL(0), GDMA_ACCESS_FLAG_LOCAL_WRITE = BIT_ULL(1), -- cgit v1.2.3 From 04039390cc3cb3d1e6ce6bb1680cbdfe117d6473 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 18 Apr 2025 17:58:48 +0100 Subject: RDMA/cma: Remove unused rdma_res_to_id The last use of rdma_res_to_id() was removed in 2020 by commi t211cd9459fda ("RDMA: Add dedicated CM_ID resource tracker function") Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://patch.msgid.link/20250418165848.241305-1-linux@treblig.org Signed-off-by: Leon Romanovsky --- include/rdma/rdma_cm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 8a8ab2f793ab..d1593ad47e28 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -388,6 +388,5 @@ void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid, union ib_gid *dgid); struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *cm_id); -struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res); #endif /* RDMA_CM_H */ -- cgit v1.2.3 From fd073dffef041d6a2d11f00cd6cbd8ff46083396 Mon Sep 17 00:00:00 2001 From: Damon Ding Date: Mon, 10 Mar 2025 18:41:06 +0800 Subject: drm/bridge: analogix_dp: Support to get &analogix_dp_device.plat_data and &analogix_dp_device.aux Add two new functions: one to find &analogix_dp_device.plat_data via &drm_dp_aux, and the other to get &analogix_dp_device.aux. Both of them serve for the function of getting panel from DP AUX bus, which is why they are included in a single commit. Reviewed-by: Dmitry Baryshkov Signed-off-by: Damon Ding Link: https://lore.kernel.org/r/20250310104114.2608063-6-damon.ding@rock-chips.com Signed-off-by: Dmitry Baryshkov --- include/drm/bridge/analogix_dp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/bridge/analogix_dp.h b/include/drm/bridge/analogix_dp.h index 6002c5666031..e19b0bbc57d6 100644 --- a/include/drm/bridge/analogix_dp.h +++ b/include/drm/bridge/analogix_dp.h @@ -10,6 +10,7 @@ #include struct analogix_dp_device; +struct drm_dp_aux; enum analogix_dp_devtype { EXYNOS_DP, @@ -48,4 +49,7 @@ void analogix_dp_unbind(struct analogix_dp_device *dp); int analogix_dp_start_crc(struct drm_connector *connector); int analogix_dp_stop_crc(struct drm_connector *connector); +struct analogix_dp_plat_data *analogix_dp_aux_to_plat_data(struct drm_dp_aux *aux); +struct drm_dp_aux *analogix_dp_get_aux(struct analogix_dp_device *dp); + #endif /* _ANALOGIX_DP_H_ */ -- cgit v1.2.3 From 0e8b86b6df143662c631dee8bb3b1fff368aa18a Mon Sep 17 00:00:00 2001 From: Damon Ding Date: Mon, 10 Mar 2025 18:41:11 +0800 Subject: drm/bridge: analogix_dp: Add support for RK3588 Expand enum analogix_dp_devtype with RK3588_EDP, and add max_link_rate and max_lane_count configs for it. Reviewed-by: Dmitry Baryshkov Signed-off-by: Damon Ding Link: https://lore.kernel.org/r/20250310104114.2608063-11-damon.ding@rock-chips.com Signed-off-by: Dmitry Baryshkov --- include/drm/bridge/analogix_dp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/bridge/analogix_dp.h b/include/drm/bridge/analogix_dp.h index e19b0bbc57d6..cf17646c1310 100644 --- a/include/drm/bridge/analogix_dp.h +++ b/include/drm/bridge/analogix_dp.h @@ -16,11 +16,12 @@ enum analogix_dp_devtype { EXYNOS_DP, RK3288_DP, RK3399_EDP, + RK3588_EDP, }; static inline bool is_rockchip(enum analogix_dp_devtype type) { - return type == RK3288_DP || type == RK3399_EDP; + return type == RK3288_DP || type == RK3399_EDP || type == RK3588_EDP; } struct analogix_dp_plat_data { -- cgit v1.2.3 From e8866e26f5e8ec551eec73dca1c30d458f9dca86 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 11 Apr 2025 07:59:09 +0200 Subject: ata: libata-core: Simplify ata_print_version_once Use dev_dbg_once() instead of open-coding the once functionality. Signed-off-by: Heiner Kallweit Signed-off-by: Damien Le Moal --- include/linux/libata.h | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index e5695998acb0..98affa1d9136 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -41,17 +41,6 @@ */ #undef ATA_IRQ_TRAP /* define to ack screaming irqs */ - -#define ata_print_version_once(dev, version) \ -({ \ - static bool __print_once; \ - \ - if (!__print_once) { \ - __print_once = true; \ - ata_print_version(dev, version); \ - } \ -}) - /* defines only for the constants which don't work well as enums */ #define ATA_TAG_POISON 0xfafbfcfdU @@ -1593,7 +1582,11 @@ do { \ #define ata_dev_dbg(dev, fmt, ...) \ ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__) -void ata_print_version(const struct device *dev, const char *version); +static inline void ata_print_version_once(const struct device *dev, + const char *version) +{ + dev_dbg_once(dev, "version %s\n", version); +} /* * ata_eh_info helpers -- cgit v1.2.3 From 296b67059e3026125a1ca942f5506e6ca051749e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 11 Apr 2025 23:31:40 +0800 Subject: fs/fs_parse: Delete macro fsparam_u32hex() Delete macro fsparam_u32hex() since: - it has no caller. - it uses as type @fs_param_is_u32_hex which is never defined, so will cause compile error when caller uses it. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/20250411-fix_fs-v2-1-5d3395c102e4@quicinc.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs_parser.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 53e566efd5fd..5057faf4f091 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -125,8 +125,6 @@ static inline bool fs_validate_description(const char *name, #define fsparam_u32(NAME, OPT) __fsparam(fs_param_is_u32, NAME, OPT, 0, NULL) #define fsparam_u32oct(NAME, OPT) \ __fsparam(fs_param_is_u32, NAME, OPT, 0, (void *)8) -#define fsparam_u32hex(NAME, OPT) \ - __fsparam(fs_param_is_u32_hex, NAME, OPT, 0, (void *)16) #define fsparam_s32(NAME, OPT) __fsparam(fs_param_is_s32, NAME, OPT, 0, NULL) #define fsparam_u64(NAME, OPT) __fsparam(fs_param_is_u64, NAME, OPT, 0, NULL) #define fsparam_enum(NAME, OPT, array) __fsparam(fs_param_is_enum, NAME, OPT, 0, array) -- cgit v1.2.3 From d1f482108a2cff2b9c6ebebc40b157aaeb8213b3 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 15 Apr 2025 20:25:00 +0800 Subject: fs/fs_parse: Remove unused and problematic validate_constant_table() Remove validate_constant_table() since: - It has no caller. - It has below 3 bugs for good constant table array array[] which must end with a empty entry, and take below invocation for explaination: validate_constant_table(array, ARRAY_SIZE(array), ...) - Always return wrong value due to the last empty entry. - Imprecise error message for missorted case. - Potential NULL pointer dereference since the last pr_err() may use @tbl[i].name NULL pointer to print the last empty entry's name. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/20250415-fix_fs-v4-1-5d575124a3ff@quicinc.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs_parser.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 5057faf4f091..5a0e897cae80 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -87,14 +87,9 @@ extern int lookup_constant(const struct constant_table tbl[], const char *name, extern const struct constant_table bool_names[]; #ifdef CONFIG_VALIDATE_FS_PARSER -extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, - int low, int high, int special); extern bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc); #else -static inline bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, - int low, int high, int special) -{ return true; } static inline bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc) { return true; } -- cgit v1.2.3 From 79beea2db0431536d79fc5d321225fb42f955466 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 15 Apr 2025 10:27:50 +0200 Subject: fs: remove uselib() system call This system call has been deprecated for quite a while now. Let's try and remove it from the kernel completely. Link: https://lore.kernel.org/20250415-kanufahren-besten-02ac00e6becd@brauner Acked-by: Kees Cook Signed-off-by: Christian Brauner --- include/linux/binfmts.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 1625c8529e70..65abd5ab8836 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -90,7 +90,6 @@ struct linux_binfmt { struct list_head lh; struct module *module; int (*load_binary)(struct linux_binprm *); - int (*load_shlib)(struct file *); #ifdef CONFIG_COREDUMP int (*core_dump)(struct coredump_params *cprm); unsigned long min_coredump; /* minimal dump size */ -- cgit v1.2.3 From 4ef4ac360101f8bb11b6486ce60cd60ca015be8c Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Thu, 17 Apr 2025 00:16:26 +0200 Subject: device_cgroup: avoid access to ->i_rdev in the common case in devcgroup_inode_permission() The routine gets called for every path component during lookup. ->i_mode is going to be cached on account of permission checks, while ->i_rdev is an area which is most likely cache-cold. gcc 14.2 is kind enough to emit one branch: movzwl (%rbx),%eax mov %eax,%edx and $0xb000,%dx cmp $0x2000,%dx je 11bc This patch is lazy in that I don't know if the ->i_rdev branch makes any sense with the newly added mode check upfront. I am not changing any semantics here though. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/20250416221626.2710239-3-mjguzik@gmail.com Signed-off-by: Christian Brauner --- include/linux/device_cgroup.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index d02f32b7514e..0864773a57e8 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -18,15 +18,16 @@ static inline int devcgroup_inode_permission(struct inode *inode, int mask) { short type, access = 0; + if (likely(!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))) + return 0; + if (likely(!inode->i_rdev)) return 0; if (S_ISBLK(inode->i_mode)) type = DEVCG_DEV_BLOCK; - else if (S_ISCHR(inode->i_mode)) + else /* S_ISCHR by the test above */ type = DEVCG_DEV_CHAR; - else - return 0; if (mask & MAY_WRITE) access |= DEVCG_ACC_WRITE; -- cgit v1.2.3 From bd32923e5f02fa7b04d487ec265dc8080d27a257 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 31 Mar 2025 17:18:02 +0100 Subject: io_uring: don't store bgid in req->buf_index Pass buffer group id into the rest of helpers via struct buf_sel_arg and remove all reassignments of req->buf_index back to bgid. Now, it only stores buffer indexes, and the group is provided by callers. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/3ea9fa08113ecb4d9224b943e7806e80a324bdf9.1743437358.git.asml.silence@gmail.com Link: https://lore.kernel.org/io-uring/0c01d76ff12986c2f48614db8610caff8f78c869.1743500909.git.asml.silence@gmail.com/ [axboe: fold in patch from second link] Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index b44d201520d8..3b467879bca8 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -653,8 +653,7 @@ struct io_kiocb { u8 iopoll_completed; /* * Can be either a fixed buffer index, or used with provided buffers. - * For the latter, before issue it points to the buffer group ID, - * and after selection it points to the buffer ID itself. + * For the latter, it points to the selected buffer ID. */ u16 buf_index; -- cgit v1.2.3 From 53db8a71ecb42c2ec5e9c6925269a750255f9af5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 4 Apr 2025 14:50:59 -0600 Subject: io_uring: add support for IORING_OP_PIPE This works just like pipe2(2), except it also supports fixed file descriptors. Used in a similar fashion as for other fd instantiating opcodes (like accept, socket, open, etc), where sqe->file_slot is set appropriately if two direct descriptors are desired rather than a set of normal file descriptors. sqe->addr must be set to a pointer to an array of 2 integers, which is where the fixed/normal file descriptors are copied to. sqe->pipe_flags contains flags, same as what is allowed for pipe2(2). Future expansion of per-op private flags can go in sqe->ioprio, like we do for other opcodes that take both a "syscall" flag set and an io_uring opcode specific flag set. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8f1fc12bac46..130f3bc71a69 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -73,6 +73,7 @@ struct io_uring_sqe { __u32 futex_flags; __u32 install_fd_flags; __u32 nop_flags; + __u32 pipe_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -283,6 +284,7 @@ enum io_uring_op { IORING_OP_EPOLL_WAIT, IORING_OP_READV_FIXED, IORING_OP_WRITEV_FIXED, + IORING_OP_PIPE, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 632b3186726984319e2337987de86a442407f30e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 20 Apr 2025 10:31:19 +0100 Subject: io_uring/zcrx: move zcrx region to struct io_zcrx_ifq Refill queue region is a part of zcrx and should stay in struct io_zcrx_ifq. We can't have multiple queues without it, so move it there. As a result there is no context global zcrx region anymore, and the region is looked up together with its ifq. To protect a concurrent mmap from seeing an inconsistent region we were protecting changes to ->zcrx_region with mmap_lock, but now it protect the publishing of the ifq. Reviewed-by: David Wei Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/24f1a728fc03d0166f16d099575457e10d9d90f2.1745141261.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3b467879bca8..06d722289fc5 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -448,8 +448,6 @@ struct io_ring_ctx { struct io_mapped_region ring_region; /* used for optimised request parameter and wait argument passing */ struct io_mapped_region param_region; - /* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */ - struct io_mapped_region zcrx_region; }; /* -- cgit v1.2.3 From 19bbfe7b5fcc04d8711e8e1352acc77c1a5c3955 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 21 Apr 2025 10:27:40 +0200 Subject: fs: add S_ANON_INODE This makes it easy to detect proper anonymous inodes and to ensure that we can detect them in codepaths such as readahead(). Readahead on anonymous inodes didn't work because they didn't have a proper mode. Now that they have we need to retain EINVAL being returned otherwise LTP will fail. We also need to ensure that ioctls aren't simply fired like they are for regular files so things like inotify inodes continue to correctly call their own ioctl handlers as in [1]. Reported-by: Xilin Wu Link: https://lore.kernel.org/3A9139D5CD543962+89831381-31b9-4392-87ec-a84a5b3507d8@radxa.com [1] Link: https://lore.kernel.org/7a1a7076-ff6b-4cb0-94e7-7218a0a44028@sirena.org.uk Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..cf7208500cee 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2344,6 +2344,7 @@ struct super_operations { #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ +#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -2400,6 +2401,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) +#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE) static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, struct inode *inode) -- cgit v1.2.3 From 685f9537a72877693a1ab116d155acc89562c29b Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Fri, 18 Apr 2025 14:13:45 +0900 Subject: RDMA/core: Move ODP capability definitions to uapi The bits are used from both kernel space and userland, so they should be placed in UAPI. Signed-off-by: Daisuke Matsuda Link: https://patch.msgid.link/20250418051345.1022339-2-matsuda-daisuke@fujitsu.com Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 20 ++++++++++---------- include/uapi/rdma/ib_user_verbs.h | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0a7ccd08b365..b06a0ed81bdd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -314,19 +314,19 @@ enum ib_atomic_cap { }; enum ib_odp_general_cap_bits { - IB_ODP_SUPPORT = 1 << 0, - IB_ODP_SUPPORT_IMPLICIT = 1 << 1, + IB_ODP_SUPPORT = IB_UVERBS_ODP_SUPPORT, + IB_ODP_SUPPORT_IMPLICIT = IB_UVERBS_ODP_SUPPORT_IMPLICIT, }; enum ib_odp_transport_cap_bits { - IB_ODP_SUPPORT_SEND = 1 << 0, - IB_ODP_SUPPORT_RECV = 1 << 1, - IB_ODP_SUPPORT_WRITE = 1 << 2, - IB_ODP_SUPPORT_READ = 1 << 3, - IB_ODP_SUPPORT_ATOMIC = 1 << 4, - IB_ODP_SUPPORT_SRQ_RECV = 1 << 5, - IB_ODP_SUPPORT_FLUSH = 1 << 6, - IB_ODP_SUPPORT_ATOMIC_WRITE = 1 << 7, + IB_ODP_SUPPORT_SEND = IB_UVERBS_ODP_SUPPORT_SEND, + IB_ODP_SUPPORT_RECV = IB_UVERBS_ODP_SUPPORT_RECV, + IB_ODP_SUPPORT_WRITE = IB_UVERBS_ODP_SUPPORT_WRITE, + IB_ODP_SUPPORT_READ = IB_UVERBS_ODP_SUPPORT_READ, + IB_ODP_SUPPORT_ATOMIC = IB_UVERBS_ODP_SUPPORT_ATOMIC, + IB_ODP_SUPPORT_SRQ_RECV = IB_UVERBS_ODP_SUPPORT_SRQ_RECV, + IB_ODP_SUPPORT_FLUSH = IB_UVERBS_ODP_SUPPORT_FLUSH, + IB_ODP_SUPPORT_ATOMIC_WRITE = IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE, }; struct ib_odp_caps { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index e16650f0c85d..3b7bd99813e9 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -233,6 +233,22 @@ struct ib_uverbs_ex_query_device { __u32 reserved; }; +enum ib_uverbs_odp_general_cap_bits { + IB_UVERBS_ODP_SUPPORT = 1 << 0, + IB_UVERBS_ODP_SUPPORT_IMPLICIT = 1 << 1, +}; + +enum ib_uverbs_odp_transport_cap_bits { + IB_UVERBS_ODP_SUPPORT_SEND = 1 << 0, + IB_UVERBS_ODP_SUPPORT_RECV = 1 << 1, + IB_UVERBS_ODP_SUPPORT_WRITE = 1 << 2, + IB_UVERBS_ODP_SUPPORT_READ = 1 << 3, + IB_UVERBS_ODP_SUPPORT_ATOMIC = 1 << 4, + IB_UVERBS_ODP_SUPPORT_SRQ_RECV = 1 << 5, + IB_UVERBS_ODP_SUPPORT_FLUSH = 1 << 6, + IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE = 1 << 7, +}; + struct ib_uverbs_odp_caps { __aligned_u64 general_caps; struct { -- cgit v1.2.3 From fced8e7d2ddeba7f41b19e065f8c02a9abf9ac00 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 16:54:27 -0500 Subject: drm/amdgpu: convert userq UAPI _pad to flags Reuse the _pad field for flags. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index ef97c0d78b8a..1a451907184c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -355,7 +355,10 @@ struct drm_amdgpu_userq_in { * and doorbell_offset in the doorbell bo. */ __u32 doorbell_offset; - __u32 _pad; + /** + * @flags: flags used for queue parameters + */ + __u32 flags; /** * @queue_va: Virtual address of the GPU memory which holds the queue * object. The queue holds the workload packets. -- cgit v1.2.3 From 024cc8a71aac8194fc2883782d36cbad6a9fe36b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 16:55:36 -0500 Subject: drm/amdgpu/userq: add UAPI for setting queue priority Allow the user to set a queue priority levels: 0 - normal low - most apps (maps to MES AMD_PRIORITY_LEVEL_NORMAL) 1 - low - background jobs (maps to MES AMD_PRIORITY_LEVEL_LOW) 2 - normal high - apps that need relative high (maps to MES AMD_PRIORITY_LEVEL_MEDIUM) 3 - high (admin only - for compositors) (maps to MES AMD_PRIORITY_LEVEL_HIGH) Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1a451907184c..267ed4adcfb9 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -329,6 +329,15 @@ union drm_amdgpu_ctx { #define AMDGPU_USERQ_OP_CREATE 1 #define AMDGPU_USERQ_OP_FREE 2 +/* queue priority levels */ +/* low < normal low < normal high < high */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK 0x3 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ + /* * This structure is a container to pass input configuration * info for all supported userqueue related operations. -- cgit v1.2.3 From 94a62b0f573f868f6f706d96c8c577c2e9b309e0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 17:12:58 -0500 Subject: drm/amdgpu/userq: add UAPI for setting up secure queues If the queues needs to access TMZ surfaces, it must be set up as secure. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 267ed4adcfb9..284ac25ab5c4 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -337,6 +337,8 @@ union drm_amdgpu_ctx { #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ +/* for queues that need access to protected content */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2) /* * This structure is a container to pass input configuration -- cgit v1.2.3 From 98b995660bff011d8e00af03abd74ac7d1ac1390 Mon Sep 17 00:00:00 2001 From: Omri Mann Date: Mon, 21 Apr 2025 13:59:50 +0300 Subject: ublk: Add UBLK_U_CMD_UPDATE_SIZE Currently ublk only allows the size of the ublkb block device to be set via UBLK_CMD_SET_PARAMS before UBLK_CMD_START_DEV is triggered. This does not provide support for extendable user-space block devices without having to stop and restart the underlying ublkb block device causing IO interruption. This patch adds a new ublk command UBLK_U_CMD_UPDATE_SIZE to allow the ublk block device to be resized on-the-fly. Feature flag UBLK_F_UPDATE_SIZE is also added to indicate support. Signed-off-by: Omri Mann Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/2a370ab1-d85b-409d-b762-f9f3f6bdf705@nvidia.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 583b86681c93..be5c6c6b16e0 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -51,6 +51,8 @@ _IOR('u', 0x13, struct ublksrv_ctrl_cmd) #define UBLK_U_CMD_DEL_DEV_ASYNC \ _IOR('u', 0x14, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_UPDATE_SIZE \ + _IOWR('u', 0x15, struct ublksrv_ctrl_cmd) /* * 64bits are enough now, and it should be easy to extend in case of @@ -211,6 +213,12 @@ */ #define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9) +/* + * Resizing a block device is possible with UBLK_U_CMD_UPDATE_SIZE + * New size is passed in cmd->data[0] and is in units of sectors + */ +#define UBLK_F_UPDATE_SIZE (1ULL << 10) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From 4e2c719782a84702db7fc2dc07ced796f308fec7 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Apr 2025 08:32:47 +0200 Subject: x86/cpu: Help users notice when running old Intel microcode Old microcode is bad for users and for kernel developers. For users, it exposes them to known fixed security and/or functional issues. These obviously rarely result in instant dumpster fires in every environment. But it is as important to keep your microcode up to date as it is to keep your kernel up to date. Old microcode also makes kernels harder to debug. A developer looking at an oops need to consider kernel bugs, known CPU issues and unknown CPU issues as possible causes. If they know the microcode is up to date, they can mostly eliminate known CPU issues as the cause. Make it easier to tell if CPU microcode is out of date. Add a list of released microcode. If the loaded microcode is older than the release, tell users in a place that folks can find it: /sys/devices/system/cpu/vulnerabilities/old_microcode Tell kernel kernel developers about it with the existing taint flag: TAINT_CPU_OUT_OF_SPEC == Discussion == When a user reports a potential kernel issue, it is very common to ask them to reproduce the issue on mainline. Running mainline, they will (independently from the distro) acquire a more up-to-date microcode version list. If their microcode is old, they will get a warning about the taint and kernel developers can take that into consideration when debugging. Just like any other entry in "vulnerabilities/", users are free to make their own assessment of their exposure. == Microcode Revision Discussion == The microcode versions in the table were generated from the Intel microcode git repo: 8ac9378a8487 ("microcode-20241112 Release") which as of this writing lags behind the latest microcode-20250211. It can be argued that the versions that the kernel picks to call "old" should be a revision or two old. Which specific version is picked is less important to me than picking *a* version and enforcing it. This repository contains only microcode versions that Intel has deemed to be OS-loadable. It is quite possible that the BIOS has loaded a newer microcode than the latest in this repo. If this happens, the system is considered to have new microcode, not old. Specifically, the sysfs file and taint flag answer the question: Is the CPU running on the latest OS-loadable microcode, or something even later that the BIOS loaded? In other words, Intel never publishes an authoritative list of CPUs and latest microcode revisions. Until it does, this is the best that Linux can do. Also note that the "intel-ucode-defs.h" file is simple, ugly and has lots of magic numbers. That's on purpose and should allow a single file to be shared across lots of stable kernel regardless of if they have the new "VFM" infrastructure or not. It was generated with a dumb script. == FAQ == Q: Does this tell me if my system is secure or insecure? A: No. It only tells you if your microcode was old when the system booted. Q: Should the kernel warn if the microcode list itself is too old? A: No. New kernels will get new microcode lists, both mainline and stable. The only way to have an old list is to be running an old kernel in which case you have bigger problems. Q: Is this for security or functional issues? A: Both. Q: If a given microcode update only has functional problems but no security issues, will it be considered old? A: Yes. All microcode image versions within a microcode release are treated identically. Intel appears to make security updates without disclosing them in the release notes. Thus, all updates are considered to be security-relevant. Q: Who runs old microcode? A: Anybody with an old distro. This happens all the time inside of Intel where there are lots of weird systems in labs that might not be getting regular distro updates and might also be running rather exotic microcode images. Q: If I update my microcode after booting will it stop saying "Vulnerable"? A: No. Just like all the other vulnerabilies, you need to reboot before the kernel will reassess your vulnerability. Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Cc: "Ahmed S. Darwish" Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Brian Gerst Cc: John Ogness Cc: Josh Poimboeuf Cc: Juergen Gross Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Link: https://lore.kernel.org/all/20250421195659.CF426C07%40davehans-spike.ostc.intel.com (cherry picked from commit 9127865b15eb0a1bd05ad7efe29489c44394bdc1) --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e3049543008b..1f5cfc4cc04f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -78,6 +78,8 @@ extern ssize_t cpu_show_gds(struct device *dev, extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_old_microcode(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From 092071e0f63c2d2c54810a427d4d9a0df6aad52b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 18 Apr 2025 16:12:55 +0200 Subject: vmlinux.lds: Include .data.rel[.local] into .data section When running in -fPIC mode, the compiler may decide to emit statically initialized data objects into .data.rel or .data.rel.local if they contain absolute references to global or local objects, respectively, which require fixing up at load time. This distinction is irrelevant for the kernel, so fold .data.rel and .data.rel.local into .data. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Brian Gerst Cc: David Woodhouse Cc: Dionna Amalie Glaze Cc: H. Peter Anvin Cc: Juergen Gross Cc: Kees Cook Cc: Kevin Loughlin Cc: Len Brown Cc: Linus Torvalds Cc: Rafael J. Wysocki Cc: Tom Lendacky Link: https://lore.kernel.org/r/20250418141253.2601348-9-ardb+git@google.com --- include/asm-generic/vmlinux.lds.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 58a635a6d5bd..66409bc3a4e0 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -108,13 +108,13 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) #define TEXT_MAIN .text #endif #if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) -#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data.rel.* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* #else -#define DATA_MAIN .data +#define DATA_MAIN .data .data.rel .data.rel.local #define SDATA_MAIN .sdata #define RODATA_MAIN .rodata #define BSS_MAIN .bss -- cgit v1.2.3 From 61a0fc33b8538169eb30365b1598eaf33895c34f Mon Sep 17 00:00:00 2001 From: Tejas Vipin Date: Sat, 19 Apr 2025 09:42:10 +0530 Subject: drm/mipi-dsi: Remove mipi_dsi_dcs_write_seq There are no remaining users of mipi_dsi_dcs_write_seq and it can be removed in favor of mipi_dsi_dcs_write_seq_multi. Signed-off-by: Tejas Vipin Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250419041210.515517-3-tejasvipin76@gmail.com Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250419041210.515517-3-tejasvipin76@gmail.com --- include/drm/drm_mipi_dsi.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index d6796ce9f5ff..b37860f4a895 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -417,28 +417,6 @@ void mipi_dsi_dcs_set_tear_off_multi(struct mipi_dsi_multi_context *ctx); mipi_dsi_generic_write_multi(ctx, d, ARRAY_SIZE(d)); \ } while (0) -/** - * mipi_dsi_dcs_write_seq - transmit a DCS command with payload - * - * This macro will print errors for you and will RETURN FROM THE CALLING - * FUNCTION (yes this is non-intuitive) upon error. - * - * Because of the non-intuitive return behavior, THIS MACRO IS DEPRECATED. - * Please replace calls of it with mipi_dsi_dcs_write_seq_multi(). - * - * @dsi: DSI peripheral device - * @cmd: Command - * @seq: buffer containing data to be transmitted - */ -#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...) \ - do { \ - static const u8 d[] = { cmd, seq }; \ - int ret; \ - ret = mipi_dsi_dcs_write_buffer_chatty(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) \ - return ret; \ - } while (0) - /** * mipi_dsi_dcs_write_seq_multi - transmit a DCS command with payload * -- cgit v1.2.3 From a8dc26a0ec43fd416ca781a8030807e65f71cfc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Thu, 27 Mar 2025 12:54:28 +0000 Subject: firmware: exynos-acpm: introduce devm_acpm_get_by_node() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To allow ACPM clients to simply be children of the ACPM node in DT, they need to be able to get the ACPM handle based on that ACPM node directly. Add an API to allow them to do so, devm_acpm_get_by_node(). At the same time, the previous approach of acquiring the ACPM handle via a DT phandle is now obsolete and we can remove devm_acpm_get_by_phandle(), which was there to facilitate that. There are no existing or anticipated upcoming users of that API, because all clients should be children of the ACPM node going forward. Note that no DTs have been merged that use the old approach, so doing this API change in this driver now will not affect any existing DTs or client drivers. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250327-acpm-children-v1-2-0afe15ee2ff7@linaro.org Signed-off-by: Krzysztof Kozlowski --- include/linux/firmware/samsung/exynos-acpm-protocol.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/samsung/exynos-acpm-protocol.h b/include/linux/firmware/samsung/exynos-acpm-protocol.h index 76255b5d06b1..f628bf1862c2 100644 --- a/include/linux/firmware/samsung/exynos-acpm-protocol.h +++ b/include/linux/firmware/samsung/exynos-acpm-protocol.h @@ -11,6 +11,7 @@ #include struct acpm_handle; +struct device_node; struct acpm_pmic_ops { int (*read_reg)(const struct acpm_handle *handle, @@ -44,6 +45,7 @@ struct acpm_handle { struct device; -const struct acpm_handle *devm_acpm_get_by_phandle(struct device *dev, - const char *property); +const struct acpm_handle *devm_acpm_get_by_node(struct device *dev, + struct device_node *np); + #endif /* __EXYNOS_ACPM_PROTOCOL_H */ -- cgit v1.2.3 From 094adad91310d9f8f8485251129482f4f3e2c5b3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Apr 2025 15:11:34 +0300 Subject: vxlan: Use a single lock to protect the FDB table Currently, the VXLAN driver stores FDB entries in a hash table with a fixed number of buckets (256). Subsequent patches are going to convert this table to rhashtable with a linked list for entry traversal, as rhashtable is more scalable. In preparation for this conversion, move from a per-bucket spin lock to a single spin lock that protects the entire FDB table. The per-bucket spin locks were introduced by commit fe1e0713bbe8 ("vxlan: Use FDB_HASH_SIZE hash_locks to reduce contention") citing "huge contention when inserting/deleting vxlan_fdbs into the fdb_head". It is not clear from the commit message which code path was holding the spin lock for long periods of time, but the obvious suspect is the FDB cleanup routine (vxlan_cleanup()) that periodically traverses the entire table in order to delete aged-out entries. This will be solved by subsequent patches that will convert the FDB cleanup routine to traverse the linked list of FDB entries using RCU, only acquiring the spin lock when deleting an aged-out entry. The change reduces the size of the VXLAN device structure from 3600 bytes to 2576 bytes. Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20250415121143.345227-7-idosch@nvidia.com Reviewed-by: Nikolay Aleksandrov Signed-off-by: Paolo Abeni --- include/net/vxlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 2dd23ee2bacd..272e11708a33 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -296,7 +296,7 @@ struct vxlan_dev { struct vxlan_rdst default_dst; /* default destination */ struct timer_list age_timer; - spinlock_t hash_lock[FDB_HASH_SIZE]; + spinlock_t hash_lock; unsigned int addrcnt; struct gro_cells gro_cells; -- cgit v1.2.3 From 8d45673d2d2e59d03e108c569a3e8c031aa534c8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Apr 2025 15:11:35 +0300 Subject: vxlan: Add a linked list of FDB entries Currently, FDB entries are stored in a hash table with a fixed number of buckets. The table is used for both lookups and entry traversal. Subsequent patches will convert the table to rhashtable which is not suitable for entry traversal. In preparation for this conversion, add FDB entries to a linked list. Subsequent patches will convert the driver to use this list when traversing entries during dump, flush, etc. Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20250415121143.345227-8-idosch@nvidia.com Reviewed-by: Nikolay Aleksandrov Signed-off-by: Paolo Abeni --- include/net/vxlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 272e11708a33..96a6c6f45c2e 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -307,6 +307,7 @@ struct vxlan_dev { struct hlist_head fdb_head[FDB_HASH_SIZE]; struct rhashtable mdb_tbl; + struct hlist_head fdb_list; struct hlist_head mdb_list; unsigned int mdb_seq; }; -- cgit v1.2.3 From 1f763fa808e92a67feea8364ef80ca3065d74702 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Apr 2025 15:11:43 +0300 Subject: vxlan: Convert FDB table to rhashtable FDB entries are currently stored in a hash table with a fixed number of buckets (256), resulting in performance degradation as the number of entries grows. Solve this by converting the driver to use rhashtable which maintains more or less constant performance regardless of the number of entries. Measured transmitted packets per second using a single pktgen thread with varying number of entries when the transmitted packet always hits the default entry (worst case): Number of entries | Improvement ------------------|------------ 1k | +1.12% 4k | +9.22% 16k | +55% 64k | +585% 256k | +2460% In addition, the change reduces the size of the VXLAN device structure from 2584 bytes to 672 bytes. Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20250415121143.345227-16-idosch@nvidia.com Reviewed-by: Nikolay Aleksandrov Signed-off-by: Paolo Abeni --- include/net/vxlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 96a6c6f45c2e..e2f7ca045d3e 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -304,7 +304,7 @@ struct vxlan_dev { struct vxlan_vni_group __rcu *vnigrp; - struct hlist_head fdb_head[FDB_HASH_SIZE]; + struct rhashtable fdb_hash_tbl; struct rhashtable mdb_tbl; struct hlist_head fdb_list; -- cgit v1.2.3 From ad227dab306543208a0658357a82712e678a06f4 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 7 Apr 2025 17:52:00 +0100 Subject: dt-bindings: clock: renesas,r9a09g057-cpg: Add USB2 PHY and GBETH PTP core clocks Add definitions for USB2 PHY core clocks and Gigabit Ethernet PTP reference core clocks in the R9A09G057 CPG DT bindings header file. Signed-off-by: Lad Prabhakar Acked-by: Krzysztof Kozlowski Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250407165202.197570-8-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a09g057-cpg.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g057-cpg.h b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h index 541e6d719bd6..884dbeb1e139 100644 --- a/include/dt-bindings/clock/renesas,r9a09g057-cpg.h +++ b/include/dt-bindings/clock/renesas,r9a09g057-cpg.h @@ -17,5 +17,9 @@ #define R9A09G057_CM33_CLK0 6 #define R9A09G057_CST_0_SWCLKTCK 7 #define R9A09G057_IOTOP_0_SHCLK 8 +#define R9A09G057_USB2_0_CLK_CORE0 9 +#define R9A09G057_USB2_0_CLK_CORE1 10 +#define R9A09G057_GBETH_0_CLK_PTP_REF_I 11 +#define R9A09G057_GBETH_1_CLK_PTP_REF_I 12 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G057_CPG_H__ */ -- cgit v1.2.3 From 4b27406380b0b9ada6b4893bc8f6766dd34fff36 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 11 Apr 2025 15:08:30 +0530 Subject: drm/amdgpu: Add queue id support to the user queue wait IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add queue id support to the user queue wait IOCTL drm_amdgpu_userq_wait structure. This is required to retrieve the wait user queue and maintain the fence driver references in it so that the user queue in the same context releases their reference to the fence drivers at some point before queue destruction. Otherwise, we would gather those references until we don't have any more space left and crash. v2: Modify the UAPI comment as per the mesa and libdrm UAPI comment. Libdrm MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/408 Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34493 Signed-off-by: Arunpravin Paneer Selvam Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 284ac25ab5c4..1fd96474e64c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -515,6 +515,12 @@ struct drm_amdgpu_userq_fence_info { }; struct drm_amdgpu_userq_wait { + /** + * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the + * wait queue and maintain the fence driver references in it. + */ + __u32 waitq_id; + __u32 pad; /** * @syncobj_handles: The list of syncobj handles submitted by the user queue * job to get the va/value pairs. -- cgit v1.2.3 From dcbd5dcc956e2331414fd7020b4655df08deeb87 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 1 Apr 2025 08:11:39 +0300 Subject: drm/panel: make prepare/enable and disable/unprepare calls return void Now there are no users of the return value of the drm_panel_prepare(), drm_panel_unprepare(), drm_panel_enable() and drm_panel_disable() calls. Usually these calls are performed from the atomic callbacks, where it is impossible to return an error. Stop returning error codes and return void instead. Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250401-panel-return-void-v1-7-93e1be33dc8d@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- include/drm/drm_panel.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/drm_panel.h b/include/drm/drm_panel.h index 31d84f901c51..843fb756a295 100644 --- a/include/drm/drm_panel.h +++ b/include/drm/drm_panel.h @@ -316,11 +316,11 @@ void drm_panel_put(struct drm_panel *panel); void drm_panel_add(struct drm_panel *panel); void drm_panel_remove(struct drm_panel *panel); -int drm_panel_prepare(struct drm_panel *panel); -int drm_panel_unprepare(struct drm_panel *panel); +void drm_panel_prepare(struct drm_panel *panel); +void drm_panel_unprepare(struct drm_panel *panel); -int drm_panel_enable(struct drm_panel *panel); -int drm_panel_disable(struct drm_panel *panel); +void drm_panel_enable(struct drm_panel *panel); +void drm_panel_disable(struct drm_panel *panel); int drm_panel_get_modes(struct drm_panel *panel, struct drm_connector *connector); -- cgit v1.2.3 From 45bd443bfd8697a7da308c16c3e75e2bb353b3d1 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 18 Apr 2025 02:15:19 +0100 Subject: net: 802: Remove unused p8022 code p8022.c defines two external functions, register_8022_client() and unregister_8022_client(), the last use of which was removed in 2018 by commit 7a2e838d28cf ("staging: ipx: delete it from the tree") Remove the p8022.c file, it's corresponding header, and glue surrounding it. There was one place the header was included in vlan.c but it didn't use the functions it declared. There was a comment in net/802/Makefile about checking against net/core/Makefile, but that's at least 20 years old and there's no sign of net/core/Makefile mentioning it. Signed-off-by: Dr. David Alan Gilbert Link: https://patch.msgid.link/20250418011519.145320-1-linux@treblig.org Signed-off-by: Jakub Kicinski --- include/net/p8022.h | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 include/net/p8022.h (limited to 'include') diff --git a/include/net/p8022.h b/include/net/p8022.h deleted file mode 100644 index a29e224ac498..000000000000 --- a/include/net/p8022.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_P8022_H -#define _NET_P8022_H - -struct net_device; -struct packet_type; -struct sk_buff; - -struct datalink_proto * -register_8022_client(unsigned char type, - int (*func)(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev)); -void unregister_8022_client(struct datalink_proto *proto); -#endif -- cgit v1.2.3 From f24303631489d250f330373a59b3412103a93b67 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 24 Mar 2025 09:12:50 +0200 Subject: property: Add functions to iterate named child There are a few use-cases where child nodes with a specific name need to be parsed. Code like: fwnode_for_each_child_node() if (fwnode_name_eq()) ... can be found from a various drivers/subsystems. Adding a macro for this can simplify things a bit. In a few cases the data from the found nodes is later added to an array, which is allocated based on the number of found nodes. One example of such use is the IIO subsystem's ADC channel nodes, where the relevant nodes are named as channel[@N]. Add helpers for iterating and counting device's sub-nodes with certain name instead of open-coding this in every user. Suggested-by: Jonathan Cameron Signed-off-by: Matti Vaittinen Reviewed-by: Andy Shevchenko Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/2767173b7b18e974c0bac244688214bd3863ff06.1742560649.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/property.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index e214ecd241eb..3e83babac0b0 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -167,6 +167,10 @@ struct fwnode_handle *fwnode_get_next_available_child_node( for (child = fwnode_get_next_child_node(fwnode, NULL); child; \ child = fwnode_get_next_child_node(fwnode, child)) +#define fwnode_for_each_named_child_node(fwnode, child, name) \ + fwnode_for_each_child_node(fwnode, child) \ + if (!fwnode_name_eq(child, name)) { } else + #define fwnode_for_each_available_child_node(fwnode, child) \ for (child = fwnode_get_next_available_child_node(fwnode, NULL); child;\ child = fwnode_get_next_available_child_node(fwnode, child)) @@ -178,11 +182,19 @@ struct fwnode_handle *device_get_next_child_node(const struct device *dev, for (child = device_get_next_child_node(dev, NULL); child; \ child = device_get_next_child_node(dev, child)) +#define device_for_each_named_child_node(dev, child, name) \ + device_for_each_child_node(dev, child) \ + if (!fwnode_name_eq(child, name)) { } else + #define device_for_each_child_node_scoped(dev, child) \ for (struct fwnode_handle *child __free(fwnode_handle) = \ device_get_next_child_node(dev, NULL); \ child; child = device_get_next_child_node(dev, child)) +#define device_for_each_named_child_node_scoped(dev, child, name) \ + device_for_each_child_node_scoped(dev, child) \ + if (!fwnode_name_eq(child, name)) { } else + struct fwnode_handle *fwnode_get_named_child_node(const struct fwnode_handle *fwnode, const char *childname); struct fwnode_handle *device_get_named_child_node(const struct device *dev, @@ -210,6 +222,14 @@ int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name); unsigned int device_get_child_node_count(const struct device *dev); +unsigned int fwnode_get_named_child_node_count(const struct fwnode_handle *fwnode, + const char *name); +static inline unsigned int device_get_named_child_node_count(const struct device *dev, + const char *name) +{ + return fwnode_get_named_child_node_count(dev_fwnode(dev), name); +} + static inline int device_property_read_u8(const struct device *dev, const char *propname, u8 *val) { -- cgit v1.2.3 From f3a8f870fa9c84b34aad4c72c8d0a1213ba13a36 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 24 Mar 2025 09:13:03 +0200 Subject: iio: adc: add helpers for parsing ADC nodes There are ADC ICs which may have some of the AIN pins usable for other functions. These ICs may have some of the AIN pins wired so that they should not be used for ADC. A common way of marking pins that can be used as ADC inputs is to add corresponding channel@N nodes in the device tree as described in the ADC binding yaml. Add couple of helper functions which can be used to retrieve the channel information from the device node. Signed-off-by: Matti Vaittinen Reviewed-by: Andy Shevchenko Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/f1d8b3e15237947738912c0d297b3e1e21d8b03e.1742560649.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/iio/adc-helpers.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/iio/adc-helpers.h (limited to 'include') diff --git a/include/linux/iio/adc-helpers.h b/include/linux/iio/adc-helpers.h new file mode 100644 index 000000000000..56b092a2a4c4 --- /dev/null +++ b/include/linux/iio/adc-helpers.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * The industrial I/O ADC firmware property parsing helpers + * + * Copyright (c) 2025 Matti Vaittinen + */ + +#ifndef _INDUSTRIAL_IO_ADC_HELPERS_H_ +#define _INDUSTRIAL_IO_ADC_HELPERS_H_ + +#include + +struct device; +struct iio_chan_spec; + +static inline int iio_adc_device_num_channels(struct device *dev) +{ + return device_get_named_child_node_count(dev, "channel"); +} + +int devm_iio_adc_device_alloc_chaninfo_se(struct device *dev, + const struct iio_chan_spec *template, + int max_chan_id, + struct iio_chan_spec **cs); + +#endif /* _INDUSTRIAL_IO_ADC_HELPERS_H_ */ -- cgit v1.2.3 From cfed1969fcfe69d0b0db780efe2b6fedeaf7f2b3 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Fri, 14 Mar 2025 18:14:46 +0100 Subject: iio: trigger: stm32-lptimer: add support for stm32mp25 Add support for STM32MP25 SoC. Use newly introduced compatible to handle this new HW variant. Add new trigger definitions that can be used by the stm32 analog-to-digital converter. Use compatible data to identify them. Signed-off-by: Olivier Moysan Signed-off-by: Fabrice Gasnier Link: https://patch.msgid.link/20250314171451.3497789-4-fabrice.gasnier@foss.st.com Signed-off-by: Jonathan Cameron --- include/linux/iio/timer/stm32-lptim-trigger.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/iio/timer/stm32-lptim-trigger.h b/include/linux/iio/timer/stm32-lptim-trigger.h index a34dcf6a6001..ce3cf0addb2e 100644 --- a/include/linux/iio/timer/stm32-lptim-trigger.h +++ b/include/linux/iio/timer/stm32-lptim-trigger.h @@ -14,6 +14,15 @@ #define LPTIM1_OUT "lptim1_out" #define LPTIM2_OUT "lptim2_out" #define LPTIM3_OUT "lptim3_out" +#define LPTIM4_OUT "lptim4_out" +#define LPTIM5_OUT "lptim5_out" + +#define LPTIM1_CH1 "lptim1_ch1" +#define LPTIM1_CH2 "lptim1_ch2" +#define LPTIM2_CH1 "lptim2_ch1" +#define LPTIM2_CH2 "lptim2_ch2" +#define LPTIM3_CH1 "lptim3_ch1" +#define LPTIM4_CH1 "lptim4_ch1" #if IS_REACHABLE(CONFIG_IIO_STM32_LPTIMER_TRIGGER) bool is_stm32_lptim_trigger(struct iio_trigger *trig); -- cgit v1.2.3 From 5d1dff5b45b7e81206ada46ae996f58129a68a7c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 31 Mar 2025 13:13:17 +0100 Subject: iio: Adjust internals of handling of direct mode claiming to suit new API. Now there are no remaining callers of iio_device_claim_direct_mode() and iio_device_release_direct_mode() rename those functions to ensure they are not used in new drivers. Also make them now return booleans in line with the sparse friendly static inline wrappers. Reviewed-by: David Lechner Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/20250331121317.1694135-38-jic23@kernel.org Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 07a0e8132e88..638cf2420fbd 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -659,8 +659,8 @@ void iio_device_unregister(struct iio_dev *indio_dev); int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev, struct module *this_mod); int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp); -int iio_device_claim_direct_mode(struct iio_dev *indio_dev); -void iio_device_release_direct_mode(struct iio_dev *indio_dev); +bool __iio_device_claim_direct(struct iio_dev *indio_dev); +void __iio_device_release_direct(struct iio_dev *indio_dev); /* * Helper functions that allow claim and release of direct mode @@ -671,9 +671,7 @@ void iio_device_release_direct_mode(struct iio_dev *indio_dev); */ static inline bool iio_device_claim_direct(struct iio_dev *indio_dev) { - int ret = iio_device_claim_direct_mode(indio_dev); - - if (ret) + if (!__iio_device_claim_direct(indio_dev)) return false; __acquire(iio_dev); @@ -683,7 +681,7 @@ static inline bool iio_device_claim_direct(struct iio_dev *indio_dev) static inline void iio_device_release_direct(struct iio_dev *indio_dev) { - iio_device_release_direct_mode(indio_dev); + __iio_device_release_direct(indio_dev); __release(indio_dev); } -- cgit v1.2.3 From 81fe5529e812d7176ff1e48783978504239a990e Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Thu, 3 Apr 2025 18:19:04 +0200 Subject: dt-bindings: iio: adc: adi,ad7606: add SPI offload properties Add #trigger-source-cells property to allow the BUSY output to be used as a SPI offload trigger source to indicate when a sample is ready to be read. Macros are added to adi,ad7606.h for the cell values to help with readability since they are arbitrary values. Signed-off-by: Angelo Dureghello Reviewed-by: Rob Herring (Arm) Reviewed-by: David Lechner Link: https://patch.msgid.link/20250403-wip-bl-spi-offload-ad7606-v1-1-1b00cb638b12@baylibre.com Signed-off-by: Jonathan Cameron --- include/dt-bindings/iio/adc/adi,ad7606.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/dt-bindings/iio/adc/adi,ad7606.h (limited to 'include') diff --git a/include/dt-bindings/iio/adc/adi,ad7606.h b/include/dt-bindings/iio/adc/adi,ad7606.h new file mode 100644 index 000000000000..f38a6d72b6dc --- /dev/null +++ b/include/dt-bindings/iio/adc/adi,ad7606.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_ADI_AD7606_H +#define _DT_BINDINGS_ADI_AD7606_H + +#define AD7606_TRIGGER_EVENT_BUSY 0 +#define AD7606_TRIGGER_EVENT_FRSTDATA 1 + +#endif /* _DT_BINDINGS_ADI_AD7606_H */ -- cgit v1.2.3 From 2086321576425361e8ea0052d9ef6eec55f99a6f Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Wed, 9 Apr 2025 20:36:30 +0200 Subject: iio: backend: add support for data source get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add backend support for getting the data source used. The ad3552r HDL implements an internal ramp generator, so adding the getter to allow data source get/set by debugfs. Reviewed-by: Nuno Sá Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250409-wip-bl-ad3552r-fixes-v5-3-fb429c3a6515@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/backend.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index e45b7dfbec35..e59d909cb659 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -84,6 +84,7 @@ enum iio_backend_interface_type { * @chan_disable: Disable one channel. * @data_format_set: Configure the data format for a specific channel. * @data_source_set: Configure the data source for a specific channel. + * @data_source_get: Data source getter for a specific channel. * @set_sample_rate: Configure the sampling rate for a specific channel. * @test_pattern_set: Configure a test pattern. * @chan_status: Get the channel status. @@ -115,6 +116,8 @@ struct iio_backend_ops { const struct iio_backend_data_fmt *data); int (*data_source_set)(struct iio_backend *back, unsigned int chan, enum iio_backend_data_source data); + int (*data_source_get)(struct iio_backend *back, unsigned int chan, + enum iio_backend_data_source *data); int (*set_sample_rate)(struct iio_backend *back, unsigned int chan, u64 sample_rate_hz); int (*test_pattern_set)(struct iio_backend *back, @@ -176,6 +179,8 @@ int iio_backend_data_format_set(struct iio_backend *back, unsigned int chan, const struct iio_backend_data_fmt *data); int iio_backend_data_source_set(struct iio_backend *back, unsigned int chan, enum iio_backend_data_source data); +int iio_backend_data_source_get(struct iio_backend *back, unsigned int chan, + enum iio_backend_data_source *data); int iio_backend_set_sampling_freq(struct iio_backend *back, unsigned int chan, u64 sample_rate_hz); int iio_backend_test_pattern_set(struct iio_backend *back, -- cgit v1.2.3 From 74e5b13a1b0f10c5a5c6168f6915620a1d369fae Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 21 Apr 2025 15:52:50 -0700 Subject: lsm: Move security_netlink_send to under CONFIG_SECURITY_NETWORK security_netlink_send() is a networking hook, so it fits better under CONFIG_SECURITY_NETWORK. Signed-off-by: Song Liu Signed-off-by: Paul Moore --- include/linux/security.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index cc9b54d95d22..dba349629229 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -563,7 +563,6 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *ctx, int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value); int security_setprocattr(int lsmid, const char *name, void *value, size_t size); -int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, struct lsm_context *cp); int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp); @@ -1527,11 +1526,6 @@ static inline int security_setprocattr(int lsmid, char *name, void *value, return -EINVAL; } -static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb) -{ - return 0; -} - static inline int security_ismaclabel(const char *name) { return 0; @@ -1629,6 +1623,7 @@ static inline int security_watch_key(struct key *key) #ifdef CONFIG_SECURITY_NETWORK +int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk); int security_unix_may_send(struct socket *sock, struct socket *other); int security_socket_create(int family, int type, int protocol, int kern); @@ -1684,6 +1679,11 @@ int security_sctp_assoc_established(struct sctp_association *asoc, int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk); #else /* CONFIG_SECURITY_NETWORK */ +static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb) +{ + return 0; +} + static inline int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk) -- cgit v1.2.3 From 145436ae01193c0a379fd3ea9c4fbdf32863db1f Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau Date: Wed, 16 Apr 2025 19:14:49 +0200 Subject: net: phy: Add helper for getting MAC termination resistance Add helper which returns the MAC termination resistance value. Modifying the resistance to an appropriate value can reduce signal reflections and therefore improve signal quality. Reviewed-by: Russell King (Oracle) Signed-off-by: Dimitri Fedrau Link: https://patch.msgid.link/20250416-dp83822-mac-impedance-v3-3-028ac426cddb@liebherr.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index fb755358d965..066a28a4b64b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2040,6 +2040,9 @@ int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, enum ethtool_link_mode_bit_indices linkmode, u32 *val); +int phy_get_mac_termination(struct phy_device *phydev, struct device *dev, + u32 *val); + void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv, bool *tx_pause, bool *rx_pause); -- cgit v1.2.3 From 0e0a7e3719bc8cbe6d6e30b3e81f21472ecba5bc Mon Sep 17 00:00:00 2001 From: Joshua Washington Date: Mon, 21 Apr 2025 18:16:32 -0700 Subject: xdp: create locked/unlocked instances of xdp redirect target setters Commit 03df156dd3a6 ("xdp: double protect netdev->xdp_flags with netdev->lock") introduces the netdev lock to xdp_set_features_flag(). The change includes a _locked version of the method, as it is possible for a driver to have already acquired the netdev lock before calling this helper. However, the same applies to xdp_features_(set|clear)_redirect_flags(), which ends up calling the unlocked version of xdp_set_features_flags() leading to deadlocks in GVE, which grabs the netdev lock as part of its suspend, reset, and shutdown processes: [ 833.265543] WARNING: possible recursive locking detected [ 833.270949] 6.15.0-rc1 #6 Tainted: G E [ 833.276271] -------------------------------------------- [ 833.281681] systemd-shutdow/1 is trying to acquire lock: [ 833.287090] ffff949d2b148c68 (&dev->lock){+.+.}-{4:4}, at: xdp_set_features_flag+0x29/0x90 [ 833.295470] [ 833.295470] but task is already holding lock: [ 833.301400] ffff949d2b148c68 (&dev->lock){+.+.}-{4:4}, at: gve_shutdown+0x44/0x90 [gve] [ 833.309508] [ 833.309508] other info that might help us debug this: [ 833.316130] Possible unsafe locking scenario: [ 833.316130] [ 833.322142] CPU0 [ 833.324681] ---- [ 833.327220] lock(&dev->lock); [ 833.330455] lock(&dev->lock); [ 833.333689] [ 833.333689] *** DEADLOCK *** [ 833.333689] [ 833.339701] May be due to missing lock nesting notation [ 833.339701] [ 833.346582] 5 locks held by systemd-shutdow/1: [ 833.351205] #0: ffffffffa9c89130 (system_transition_mutex){+.+.}-{4:4}, at: __se_sys_reboot+0xe6/0x210 [ 833.360695] #1: ffff93b399e5c1b8 (&dev->mutex){....}-{4:4}, at: device_shutdown+0xb4/0x1f0 [ 833.369144] #2: ffff949d19a471b8 (&dev->mutex){....}-{4:4}, at: device_shutdown+0xc2/0x1f0 [ 833.377603] #3: ffffffffa9eca050 (rtnl_mutex){+.+.}-{4:4}, at: gve_shutdown+0x33/0x90 [gve] [ 833.386138] #4: ffff949d2b148c68 (&dev->lock){+.+.}-{4:4}, at: gve_shutdown+0x44/0x90 [gve] Introduce xdp_features_(set|clear)_redirect_target_locked() versions which assume that the netdev lock has already been acquired before setting the XDP feature flag and update GVE to use the locked version. Fixes: 03df156dd3a6 ("xdp: double protect netdev->xdp_flags with netdev->lock") Tested-by: Mina Almasry Reviewed-by: Willem de Bruijn Reviewed-by: Harshitha Ramamurthy Signed-off-by: Joshua Washington Acked-by: Stanislav Fomichev Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20250422011643.3509287-1-joshwash@google.com Signed-off-by: Jakub Kicinski --- include/net/xdp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 20e41b5ff319..b40f1f96cb11 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -618,7 +618,10 @@ bool bpf_dev_bound_kfunc_id(u32 btf_id); void xdp_set_features_flag(struct net_device *dev, xdp_features_t val); void xdp_set_features_flag_locked(struct net_device *dev, xdp_features_t val); void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg); +void xdp_features_set_redirect_target_locked(struct net_device *dev, + bool support_sg); void xdp_features_clear_redirect_target(struct net_device *dev); +void xdp_features_clear_redirect_target_locked(struct net_device *dev); #else static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; } static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; } -- cgit v1.2.3 From 2ee97c190b2b6769d7c1688e80b84c951177a22b Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 18 Apr 2025 00:36:49 +0200 Subject: crypto: engine - Realign struct crypto_engine to save 8 bytes Realign struct crypto_engine to reduce its size by 8 bytes. Total size is now 192 bytes, allowing it to fit within 3 cachelines instead of 4. pahole output before: /* size: 200, cachelines: 4, members: 17 */ /* sum members: 183, holes: 3, sum holes: 17 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 8 bytes */ and after: /* size: 192, cachelines: 3, members: 17 */ /* sum members: 183, holes: 2, sum holes: 9 */ /* paddings: 1, sum paddings: 4 */ No functional changes intended. Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu --- include/crypto/internal/engine.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/engine.h b/include/crypto/internal/engine.h index fbf4be56cf12..b6a4ea2240fc 100644 --- a/include/crypto/internal/engine.h +++ b/include/crypto/internal/engine.h @@ -27,10 +27,10 @@ struct device; * @retry_support: indication that the hardware allows re-execution * of a failed backlog request * crypto-engine, in head position to keep order + * @rt: whether this queue is set to run as a realtime task * @list: link with the global crypto engine list * @queue_lock: spinlock to synchronise access to request queue * @queue: the crypto queue of the engine - * @rt: whether this queue is set to run as a realtime task * @prepare_crypt_hardware: a request will soon arrive from the queue * so the subsystem requests the driver to prepare the hardware * by issuing this call @@ -51,14 +51,13 @@ struct crypto_engine { bool running; bool retry_support; + bool rt; struct list_head list; spinlock_t queue_lock; struct crypto_queue queue; struct device *dev; - bool rt; - int (*prepare_crypt_hardware)(struct crypto_engine *engine); int (*unprepare_crypt_hardware)(struct crypto_engine *engine); int (*do_batch_requests)(struct crypto_engine *engine); -- cgit v1.2.3 From 7650f826f7b2d84782f9147c51687ff0364125e9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:58:41 +0800 Subject: crypto: shash - Handle partial blocks in API Provide an option to handle the partial blocks in the shash API. Almost every hash algorithm has a block size and are only able to hash partial blocks on finalisation. Rather than duplicating the partial block handling many times, add this functionality to the shash API. It is optional (e.g., hmac would never need this by relying on the partial block handling of the underlying hash), and to enable it set the bit CRYPTO_AHASH_ALG_BLOCK_ONLY. The export format is always that of the underlying hash export, plus the partial block buffer, followed by a single-byte for the partial block length. Set the bit CRYPTO_AHASH_ALG_FINAL_NONZERO to withhold an extra byte in the partial block. This will come in handy when this is extended to ahash where hardware often can't deal with a zero-length final. It will also be used for algorithms requiring an extra block for finalisation (e.g., cmac). As an optimisation, set the bit CRYPTO_AHASH_ALG_FINUP_MAX if the algorithm wishes to get as much data as possible instead of just the last partial block. The descriptor will be zeroed after finalisation. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 82 +++++++++++++++++++++++++++--------------- include/crypto/internal/hash.h | 15 ++++++++ include/linux/crypto.h | 2 ++ 3 files changed, 70 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 58ac1423dc38..5f87d1040a7c 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -85,6 +85,8 @@ struct ahash_request { * transformation object. Data processing can happen synchronously * [SHASH] or asynchronously [AHASH] at this point. Driver must not use * req->result. + * For block-only algorithms, @update must return the number + * of bytes to store in the API partial block buffer. * @final: **[mandatory]** Retrieve result from the driver. This function finalizes the * transformation and retrieves the resulting hash from the driver and * pushes it back to upper layers. No data processing happens at this @@ -905,6 +907,18 @@ int crypto_hash_digest(struct crypto_ahash *tfm, const u8 *data, */ int crypto_shash_export(struct shash_desc *desc, void *out); +/** + * crypto_shash_export_core() - extract core state for message digest + * @desc: reference to the operational state handle whose state is exported + * @out: output buffer of sufficient size that can hold the hash state + * + * Export the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the export creation was successful; < 0 if an error occurred + */ +int crypto_shash_export_core(struct shash_desc *desc, void *out); + /** * crypto_shash_import() - import operational state * @desc: reference to the operational state handle the state imported into @@ -919,6 +933,18 @@ int crypto_shash_export(struct shash_desc *desc, void *out); */ int crypto_shash_import(struct shash_desc *desc, const void *in); +/** + * crypto_shash_import_core() - import core state + * @desc: reference to the operational state handle the state imported into + * @in: buffer holding the state + * + * Import the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the import was successful; < 0 if an error occurred + */ +int crypto_shash_import_core(struct shash_desc *desc, const void *in); + /** * crypto_shash_init() - (re)initialize message digest * @desc: operational state handle that is already filled @@ -931,15 +957,25 @@ int crypto_shash_import(struct shash_desc *desc, const void *in); * Return: 0 if the message digest initialization was successful; < 0 if an * error occurred */ -static inline int crypto_shash_init(struct shash_desc *desc) -{ - struct crypto_shash *tfm = desc->tfm; +int crypto_shash_init(struct shash_desc *desc); - if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) - return -ENOKEY; - - return crypto_shash_alg(tfm)->init(desc); -} +/** + * crypto_shash_finup() - calculate message digest of buffer + * @desc: see crypto_shash_final() + * @data: see crypto_shash_update() + * @len: see crypto_shash_update() + * @out: see crypto_shash_final() + * + * This function is a "short-hand" for the function calls of + * crypto_shash_update and crypto_shash_final. The parameters have the same + * meaning as discussed for those separate functions. + * + * Context: Softirq or process context. + * Return: 0 if the message digest creation was successful; < 0 if an error + * occurred + */ +int crypto_shash_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out); /** * crypto_shash_update() - add data to message digest for processing @@ -953,8 +989,11 @@ static inline int crypto_shash_init(struct shash_desc *desc) * Return: 0 if the message digest update was successful; < 0 if an error * occurred */ -int crypto_shash_update(struct shash_desc *desc, const u8 *data, - unsigned int len); +static inline int crypto_shash_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return crypto_shash_finup(desc, data, len, NULL); +} /** * crypto_shash_final() - calculate message digest @@ -970,25 +1009,10 @@ int crypto_shash_update(struct shash_desc *desc, const u8 *data, * Return: 0 if the message digest creation was successful; < 0 if an error * occurred */ -int crypto_shash_final(struct shash_desc *desc, u8 *out); - -/** - * crypto_shash_finup() - calculate message digest of buffer - * @desc: see crypto_shash_final() - * @data: see crypto_shash_update() - * @len: see crypto_shash_update() - * @out: see crypto_shash_final() - * - * This function is a "short-hand" for the function calls of - * crypto_shash_update and crypto_shash_final. The parameters have the same - * meaning as discussed for those separate functions. - * - * Context: Softirq or process context. - * Return: 0 if the message digest creation was successful; < 0 if an error - * occurred - */ -int crypto_shash_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out); +static inline int crypto_shash_final(struct shash_desc *desc, u8 *out) +{ + return crypto_shash_finup(desc, NULL, 0, out); +} static inline void shash_desc_zero(struct shash_desc *desc) { diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 45c728ac2621..1e80dd084a23 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -11,6 +11,15 @@ #include #include +/* Set this bit to handle partial blocks in the API. */ +#define CRYPTO_AHASH_ALG_BLOCK_ONLY 0x01000000 + +/* Set this bit if final requires at least one byte. */ +#define CRYPTO_AHASH_ALG_FINAL_NONZERO 0x02000000 + +/* Set this bit if finup can deal with multiple blocks. */ +#define CRYPTO_AHASH_ALG_FINUP_MAX 0x04000000 + #define HASH_FBREQ_ON_STACK(name, req) \ char __##name##_req[sizeof(struct ahash_request) + \ MAX_SYNC_HASH_REQSIZE] CRYPTO_MINALIGN_ATTR; \ @@ -281,5 +290,11 @@ static inline struct ahash_request *ahash_fbreq_on_stack_init( return req; } +/* Return the state size without partial block for block-only algorithms. */ +static inline unsigned int crypto_shash_coresize(struct crypto_shash *tfm) +{ + return crypto_shash_statesize(tfm) - crypto_shash_blocksize(tfm) - 1; +} + #endif /* _CRYPTO_INTERNAL_HASH_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b89b1b348095..f691ce01745e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -136,6 +136,8 @@ /* Set if the algorithm supports request chains and virtual addresses. */ #define CRYPTO_ALG_REQ_CHAIN 0x00040000 +/* The high bits 0xff000000 are reserved for type-specific flags. */ + /* * Transform masks and values (for crt_flags). */ -- cgit v1.2.3 From aa54e17020852858d6e1639f1c7ef516fd217e08 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:58:43 +0800 Subject: crypto: blake2b-generic - Use API partial block handling Use the Crypto API partial block handling. Signed-off-by: Herbert Xu --- include/crypto/blake2b.h | 14 ++------- include/crypto/internal/blake2b.h | 66 +++++++++++++++++++++++++++++++++------ 2 files changed, 60 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/crypto/blake2b.h b/include/crypto/blake2b.h index 0c0176285349..68da368dc182 100644 --- a/include/crypto/blake2b.h +++ b/include/crypto/blake2b.h @@ -11,6 +11,8 @@ enum blake2b_lengths { BLAKE2B_BLOCK_SIZE = 128, BLAKE2B_HASH_SIZE = 64, BLAKE2B_KEY_SIZE = 64, + BLAKE2B_STATE_SIZE = 80, + BLAKE2B_DESC_SIZE = 96, BLAKE2B_160_HASH_SIZE = 20, BLAKE2B_256_HASH_SIZE = 32, @@ -25,7 +27,6 @@ struct blake2b_state { u64 f[2]; u8 buf[BLAKE2B_BLOCK_SIZE]; unsigned int buflen; - unsigned int outlen; }; enum blake2b_iv { @@ -40,7 +41,7 @@ enum blake2b_iv { }; static inline void __blake2b_init(struct blake2b_state *state, size_t outlen, - const void *key, size_t keylen) + size_t keylen) { state->h[0] = BLAKE2B_IV0 ^ (0x01010000 | keylen << 8 | outlen); state->h[1] = BLAKE2B_IV1; @@ -52,15 +53,6 @@ static inline void __blake2b_init(struct blake2b_state *state, size_t outlen, state->h[7] = BLAKE2B_IV7; state->t[0] = 0; state->t[1] = 0; - state->f[0] = 0; - state->f[1] = 0; - state->buflen = 0; - state->outlen = outlen; - if (keylen) { - memcpy(state->buf, key, keylen); - memset(&state->buf[keylen], 0, BLAKE2B_BLOCK_SIZE - keylen); - state->buflen = BLAKE2B_BLOCK_SIZE; - } } #endif /* _CRYPTO_BLAKE2B_H */ diff --git a/include/crypto/internal/blake2b.h b/include/crypto/internal/blake2b.h index 982fe5e8471c..48dc9830400d 100644 --- a/include/crypto/internal/blake2b.h +++ b/include/crypto/internal/blake2b.h @@ -7,16 +7,27 @@ #ifndef _CRYPTO_INTERNAL_BLAKE2B_H #define _CRYPTO_INTERNAL_BLAKE2B_H +#include #include #include +#include +#include +#include +#include +#include #include - -void blake2b_compress_generic(struct blake2b_state *state, - const u8 *block, size_t nblocks, u32 inc); +#include static inline void blake2b_set_lastblock(struct blake2b_state *state) { state->f[0] = -1; + state->f[1] = 0; +} + +static inline void blake2b_set_nonlast(struct blake2b_state *state) +{ + state->f[0] = 0; + state->f[1] = 0; } typedef void (*blake2b_compress_t)(struct blake2b_state *state, @@ -30,6 +41,7 @@ static inline void __blake2b_update(struct blake2b_state *state, if (unlikely(!inlen)) return; + blake2b_set_nonlast(state); if (inlen > fill) { memcpy(state->buf + state->buflen, in, fill); (*compress)(state, state->buf, 1, BLAKE2B_BLOCK_SIZE); @@ -49,6 +61,7 @@ static inline void __blake2b_update(struct blake2b_state *state, } static inline void __blake2b_final(struct blake2b_state *state, u8 *out, + unsigned int outlen, blake2b_compress_t compress) { int i; @@ -59,13 +72,13 @@ static inline void __blake2b_final(struct blake2b_state *state, u8 *out, (*compress)(state, state->buf, 1, state->buflen); for (i = 0; i < ARRAY_SIZE(state->h); i++) __cpu_to_le64s(&state->h[i]); - memcpy(out, state->h, state->outlen); + memcpy(out, state->h, outlen); } /* Helper functions for shash implementations of BLAKE2b */ struct blake2b_tfm_ctx { - u8 key[BLAKE2B_KEY_SIZE]; + u8 key[BLAKE2B_BLOCK_SIZE]; unsigned int keylen; }; @@ -74,10 +87,13 @@ static inline int crypto_blake2b_setkey(struct crypto_shash *tfm, { struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(tfm); - if (keylen == 0 || keylen > BLAKE2B_KEY_SIZE) + if (keylen > BLAKE2B_KEY_SIZE) return -EINVAL; + BUILD_BUG_ON(BLAKE2B_KEY_SIZE > BLAKE2B_BLOCK_SIZE); + memcpy(tctx->key, key, keylen); + memset(tctx->key + keylen, 0, BLAKE2B_BLOCK_SIZE - keylen); tctx->keylen = keylen; return 0; @@ -89,8 +105,9 @@ static inline int crypto_blake2b_init(struct shash_desc *desc) struct blake2b_state *state = shash_desc_ctx(desc); unsigned int outlen = crypto_shash_digestsize(desc->tfm); - __blake2b_init(state, outlen, tctx->key, tctx->keylen); - return 0; + __blake2b_init(state, outlen, tctx->keylen); + return tctx->keylen ? + crypto_shash_update(desc, tctx->key, BLAKE2B_BLOCK_SIZE) : 0; } static inline int crypto_blake2b_update(struct shash_desc *desc, @@ -103,12 +120,43 @@ static inline int crypto_blake2b_update(struct shash_desc *desc, return 0; } +static inline int crypto_blake2b_update_bo(struct shash_desc *desc, + const u8 *in, unsigned int inlen, + blake2b_compress_t compress) +{ + struct blake2b_state *state = shash_desc_ctx(desc); + + blake2b_set_nonlast(state); + compress(state, in, inlen / BLAKE2B_BLOCK_SIZE, BLAKE2B_BLOCK_SIZE); + return inlen - round_down(inlen, BLAKE2B_BLOCK_SIZE); +} + static inline int crypto_blake2b_final(struct shash_desc *desc, u8 *out, blake2b_compress_t compress) { + unsigned int outlen = crypto_shash_digestsize(desc->tfm); struct blake2b_state *state = shash_desc_ctx(desc); - __blake2b_final(state, out, compress); + __blake2b_final(state, out, outlen, compress); + return 0; +} + +static inline int crypto_blake2b_finup(struct shash_desc *desc, const u8 *in, + unsigned int inlen, u8 *out, + blake2b_compress_t compress) +{ + struct blake2b_state *state = shash_desc_ctx(desc); + u8 buf[BLAKE2B_BLOCK_SIZE]; + int i; + + memcpy(buf, in, inlen); + memset(buf + inlen, 0, BLAKE2B_BLOCK_SIZE - inlen); + blake2b_set_lastblock(state); + compress(state, buf, 1, inlen); + for (i = 0; i < ARRAY_SIZE(state->h); i++) + __cpu_to_le64s(&state->h[i]); + memcpy(out, state->h, crypto_shash_digestsize(desc->tfm)); + memzero_explicit(buf, sizeof(buf)); return 0; } -- cgit v1.2.3 From cc28260ab4fbe110981bcffff5da50befcb5119e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:58:45 +0800 Subject: crypto: arm/blake2b - Use API partial block handling Use the Crypto API partial block handling. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- include/crypto/blake2b.h | 21 +++++++------ include/crypto/internal/blake2b.h | 62 --------------------------------------- 2 files changed, 10 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/include/crypto/blake2b.h b/include/crypto/blake2b.h index 68da368dc182..dd7694477e50 100644 --- a/include/crypto/blake2b.h +++ b/include/crypto/blake2b.h @@ -7,12 +7,20 @@ #include #include +struct blake2b_state { + /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */ + u64 h[8]; + u64 t[2]; + /* The true state ends here. The rest is temporary storage. */ + u64 f[2]; +}; + enum blake2b_lengths { BLAKE2B_BLOCK_SIZE = 128, BLAKE2B_HASH_SIZE = 64, BLAKE2B_KEY_SIZE = 64, - BLAKE2B_STATE_SIZE = 80, - BLAKE2B_DESC_SIZE = 96, + BLAKE2B_STATE_SIZE = offsetof(struct blake2b_state, f), + BLAKE2B_DESC_SIZE = sizeof(struct blake2b_state), BLAKE2B_160_HASH_SIZE = 20, BLAKE2B_256_HASH_SIZE = 32, @@ -20,15 +28,6 @@ enum blake2b_lengths { BLAKE2B_512_HASH_SIZE = 64, }; -struct blake2b_state { - /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */ - u64 h[8]; - u64 t[2]; - u64 f[2]; - u8 buf[BLAKE2B_BLOCK_SIZE]; - unsigned int buflen; -}; - enum blake2b_iv { BLAKE2B_IV0 = 0x6A09E667F3BCC908ULL, BLAKE2B_IV1 = 0xBB67AE8584CAA73BULL, diff --git a/include/crypto/internal/blake2b.h b/include/crypto/internal/blake2b.h index 48dc9830400d..3e09e2485306 100644 --- a/include/crypto/internal/blake2b.h +++ b/include/crypto/internal/blake2b.h @@ -33,48 +33,6 @@ static inline void blake2b_set_nonlast(struct blake2b_state *state) typedef void (*blake2b_compress_t)(struct blake2b_state *state, const u8 *block, size_t nblocks, u32 inc); -static inline void __blake2b_update(struct blake2b_state *state, - const u8 *in, size_t inlen, - blake2b_compress_t compress) -{ - const size_t fill = BLAKE2B_BLOCK_SIZE - state->buflen; - - if (unlikely(!inlen)) - return; - blake2b_set_nonlast(state); - if (inlen > fill) { - memcpy(state->buf + state->buflen, in, fill); - (*compress)(state, state->buf, 1, BLAKE2B_BLOCK_SIZE); - state->buflen = 0; - in += fill; - inlen -= fill; - } - if (inlen > BLAKE2B_BLOCK_SIZE) { - const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2B_BLOCK_SIZE); - /* Hash one less (full) block than strictly possible */ - (*compress)(state, in, nblocks - 1, BLAKE2B_BLOCK_SIZE); - in += BLAKE2B_BLOCK_SIZE * (nblocks - 1); - inlen -= BLAKE2B_BLOCK_SIZE * (nblocks - 1); - } - memcpy(state->buf + state->buflen, in, inlen); - state->buflen += inlen; -} - -static inline void __blake2b_final(struct blake2b_state *state, u8 *out, - unsigned int outlen, - blake2b_compress_t compress) -{ - int i; - - blake2b_set_lastblock(state); - memset(state->buf + state->buflen, 0, - BLAKE2B_BLOCK_SIZE - state->buflen); /* Padding */ - (*compress)(state, state->buf, 1, state->buflen); - for (i = 0; i < ARRAY_SIZE(state->h); i++) - __cpu_to_le64s(&state->h[i]); - memcpy(out, state->h, outlen); -} - /* Helper functions for shash implementations of BLAKE2b */ struct blake2b_tfm_ctx { @@ -110,16 +68,6 @@ static inline int crypto_blake2b_init(struct shash_desc *desc) crypto_shash_update(desc, tctx->key, BLAKE2B_BLOCK_SIZE) : 0; } -static inline int crypto_blake2b_update(struct shash_desc *desc, - const u8 *in, unsigned int inlen, - blake2b_compress_t compress) -{ - struct blake2b_state *state = shash_desc_ctx(desc); - - __blake2b_update(state, in, inlen, compress); - return 0; -} - static inline int crypto_blake2b_update_bo(struct shash_desc *desc, const u8 *in, unsigned int inlen, blake2b_compress_t compress) @@ -131,16 +79,6 @@ static inline int crypto_blake2b_update_bo(struct shash_desc *desc, return inlen - round_down(inlen, BLAKE2B_BLOCK_SIZE); } -static inline int crypto_blake2b_final(struct shash_desc *desc, u8 *out, - blake2b_compress_t compress) -{ - unsigned int outlen = crypto_shash_digestsize(desc->tfm); - struct blake2b_state *state = shash_desc_ctx(desc); - - __blake2b_final(state, out, outlen, compress); - return 0; -} - static inline int crypto_blake2b_finup(struct shash_desc *desc, const u8 *in, unsigned int inlen, u8 *out, blake2b_compress_t compress) -- cgit v1.2.3 From ef11957b0ad0a2e1c159c172865d6923f1731b57 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:58:47 +0800 Subject: crypto: ghash-generic - Use API partial block handling Use the Crypto API partial block handling. Signed-off-by: Herbert Xu --- include/crypto/ghash.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/ghash.h b/include/crypto/ghash.h index f832c9f2aca3..16904f2b5184 100644 --- a/include/crypto/ghash.h +++ b/include/crypto/ghash.h @@ -12,13 +12,14 @@ #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 +struct gf128mul_4k; + struct ghash_ctx { struct gf128mul_4k *gf128; }; struct ghash_desc_ctx { u8 buffer[GHASH_BLOCK_SIZE]; - u32 bytes; }; #endif -- cgit v1.2.3 From 867a2177c2c995d3a0c8b03f9be3b7f4fe84e0b1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:58:57 +0800 Subject: crypto: riscv/ghash - Use API partial block handling Use the Crypto API partial block handling. As this was the last user relying on crypto/ghash.h for gf128mul.h, remove the unnecessary inclusion of gf128mul.h from crypto/ghash.h. Signed-off-by: Herbert Xu --- include/crypto/ghash.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/crypto/ghash.h b/include/crypto/ghash.h index 16904f2b5184..043d938e9a2c 100644 --- a/include/crypto/ghash.h +++ b/include/crypto/ghash.h @@ -7,7 +7,6 @@ #define __CRYPTO_GHASH_H__ #include -#include #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 -- cgit v1.2.3 From efd62c85525e212705368b24eb90ef10778190f5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:59:04 +0800 Subject: crypto: md5-generic - Use API partial block handling Use the Crypto API partial block handling. Signed-off-by: Herbert Xu --- include/crypto/md5.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/md5.h b/include/crypto/md5.h index cf9e9dec3d21..198b5d69b92f 100644 --- a/include/crypto/md5.h +++ b/include/crypto/md5.h @@ -8,6 +8,7 @@ #define MD5_HMAC_BLOCK_SIZE 64 #define MD5_BLOCK_WORDS 16 #define MD5_HASH_WORDS 4 +#define MD5_STATE_SIZE 24 #define MD5_H0 0x67452301UL #define MD5_H1 0xefcdab89UL @@ -18,8 +19,8 @@ extern const u8 md5_zero_message_hash[MD5_DIGEST_SIZE]; struct md5_state { u32 hash[MD5_HASH_WORDS]; - u32 block[MD5_BLOCK_WORDS]; u64 byte_count; + u32 block[MD5_BLOCK_WORDS]; }; #endif -- cgit v1.2.3 From 0865a89413652ecad551c9b782fa5146259640e4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:59:13 +0800 Subject: crypto: x86/sha1 - Use API partial block handling Use the Crypto API partial block handling. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- include/crypto/sha1.h | 1 + include/crypto/sha1_base.h | 42 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/crypto/sha1.h b/include/crypto/sha1.h index 044ecea60ac8..dd6de4a4d6e6 100644 --- a/include/crypto/sha1.h +++ b/include/crypto/sha1.h @@ -10,6 +10,7 @@ #define SHA1_DIGEST_SIZE 20 #define SHA1_BLOCK_SIZE 64 +#define SHA1_STATE_SIZE offsetof(struct sha1_state, buffer) #define SHA1_H0 0x67452301UL #define SHA1_H1 0xefcdab89UL diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h index 0c342ed0d038..b23cfad18ce2 100644 --- a/include/crypto/sha1_base.h +++ b/include/crypto/sha1_base.h @@ -10,10 +10,9 @@ #include #include -#include -#include +#include #include - +#include #include typedef void (sha1_block_fn)(struct sha1_state *sst, u8 const *src, int blocks); @@ -70,6 +69,19 @@ static inline int sha1_base_do_update(struct shash_desc *desc, return 0; } +static inline int sha1_base_do_update_blocks(struct shash_desc *desc, + const u8 *data, + unsigned int len, + sha1_block_fn *block_fn) +{ + unsigned int remain = len - round_down(len, SHA1_BLOCK_SIZE); + struct sha1_state *sctx = shash_desc_ctx(desc); + + sctx->count += len - remain; + block_fn(sctx, data, len / SHA1_BLOCK_SIZE); + return remain; +} + static inline int sha1_base_do_finalize(struct shash_desc *desc, sha1_block_fn *block_fn) { @@ -93,6 +105,29 @@ static inline int sha1_base_do_finalize(struct shash_desc *desc, return 0; } +static inline int sha1_base_do_finup(struct shash_desc *desc, + const u8 *src, unsigned int len, + sha1_block_fn *block_fn) +{ + unsigned int bit_offset = SHA1_BLOCK_SIZE / 8 - 1; + struct sha1_state *sctx = shash_desc_ctx(desc); + union { + __be64 b64[SHA1_BLOCK_SIZE / 4]; + u8 u8[SHA1_BLOCK_SIZE * 2]; + } block = {}; + + if (len >= bit_offset * 8) + bit_offset += SHA1_BLOCK_SIZE / 8; + memcpy(&block, src, len); + block.u8[len] = 0x80; + sctx->count += len; + block.b64[bit_offset] = cpu_to_be64(sctx->count << 3); + block_fn(sctx, block.u8, (bit_offset + 1) * 8 / SHA1_BLOCK_SIZE); + memzero_explicit(&block, sizeof(block)); + + return 0; +} + static inline int sha1_base_finish(struct shash_desc *desc, u8 *out) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -102,7 +137,6 @@ static inline int sha1_base_finish(struct shash_desc *desc, u8 *out) for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) put_unaligned_be32(sctx->state[i], digest++); - memzero_explicit(sctx, sizeof(*sctx)); return 0; } -- cgit v1.2.3 From a2d910b8466af942d34478b4c480360f0b3d29ae Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:59:20 +0800 Subject: crypto: sha1-generic - Use API partial block handling Use the Crypto API partial block handling. Signed-off-by: Herbert Xu --- include/crypto/sha1.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/crypto/sha1.h b/include/crypto/sha1.h index dd6de4a4d6e6..f48230b1413c 100644 --- a/include/crypto/sha1.h +++ b/include/crypto/sha1.h @@ -26,14 +26,6 @@ struct sha1_state { u8 buffer[SHA1_BLOCK_SIZE]; }; -struct shash_desc; - -extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len); - -extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash); - /* * An implementation of SHA-1's compression function. Don't use in new code! * You shouldn't be using SHA-1, and even if you *have* to use SHA-1, this isn't -- cgit v1.2.3 From a86ee2d74013e071efaa18631f9aa313262e5b1f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:59:38 +0800 Subject: crypto: sha1_base - Remove partial block helpers Now that all sha1_base users have been converted to use the API partial block handling, remove the partial block helpers. Signed-off-by: Herbert Xu --- include/crypto/sha1_base.h | 61 ---------------------------------------------- 1 file changed, 61 deletions(-) (limited to 'include') diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h index b23cfad18ce2..62701d136c79 100644 --- a/include/crypto/sha1_base.h +++ b/include/crypto/sha1_base.h @@ -31,44 +31,6 @@ static inline int sha1_base_init(struct shash_desc *desc) return 0; } -static inline int sha1_base_do_update(struct shash_desc *desc, - const u8 *data, - unsigned int len, - sha1_block_fn *block_fn) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; - - sctx->count += len; - - if (unlikely((partial + len) >= SHA1_BLOCK_SIZE)) { - int blocks; - - if (partial) { - int p = SHA1_BLOCK_SIZE - partial; - - memcpy(sctx->buffer + partial, data, p); - data += p; - len -= p; - - block_fn(sctx, sctx->buffer, 1); - } - - blocks = len / SHA1_BLOCK_SIZE; - len %= SHA1_BLOCK_SIZE; - - if (blocks) { - block_fn(sctx, data, blocks); - data += blocks * SHA1_BLOCK_SIZE; - } - partial = 0; - } - if (len) - memcpy(sctx->buffer + partial, data, len); - - return 0; -} - static inline int sha1_base_do_update_blocks(struct shash_desc *desc, const u8 *data, unsigned int len, @@ -82,29 +44,6 @@ static inline int sha1_base_do_update_blocks(struct shash_desc *desc, return remain; } -static inline int sha1_base_do_finalize(struct shash_desc *desc, - sha1_block_fn *block_fn) -{ - const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64); - struct sha1_state *sctx = shash_desc_ctx(desc); - __be64 *bits = (__be64 *)(sctx->buffer + bit_offset); - unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; - - sctx->buffer[partial++] = 0x80; - if (partial > bit_offset) { - memset(sctx->buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial); - partial = 0; - - block_fn(sctx, sctx->buffer, 1); - } - - memset(sctx->buffer + partial, 0x0, bit_offset - partial); - *bits = cpu_to_be64(sctx->count << 3); - block_fn(sctx, sctx->buffer, 1); - - return 0; -} - static inline int sha1_base_do_finup(struct shash_desc *desc, const u8 *src, unsigned int len, sha1_block_fn *block_fn) -- cgit v1.2.3 From afc0a570bb6138713d74784a032e4eb5b7f919ca Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Apr 2025 10:17:03 +0100 Subject: PCI: host-generic: Extract an ECAM bridge creation helper from pci_host_common_probe() pci_host_common_probe() is an extremely useful helper, as it abstracts away most of the gunk that a "mostly-ECAM-compliant" device driver needs. However, it is structured as a probe function, meaning that a lot of the driver-specific setup has to happen in a .init() callback, after the bridge and config space have been instantiated. This is a bit awkward, and results in a number of convolutions that could be avoided if the host-common code was more like a library. Introduce a pci_host_common_init() helper that does exactly that, taking the platform device and a struct pci_ecam_op as parameters. This can then be called from the probe routine, and a lot of the code that isn't relevant to PCI setup moved away from the .init() callback. This also removes the dependency on the device match data, which is an oddity. Signed-off-by: Marc Zyngier [mani: fixed spelling mistakes] Signed-off-by: Manivannan Sadhasivam Tested-by: Janne Grunau Reviewed-by: Rob Herring (Arm) Reviewed-by: Manivannan Sadhasivam Acked-by: Alyssa Rosenzweig Link: https://patch.msgid.link/20250401091713.2765724-4-maz@kernel.org --- include/linux/pci-ecam.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 3a10f8cfc3ad..bc2ca2c72ee2 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -97,6 +97,8 @@ extern const struct pci_ecam_ops loongson_pci_ecam_ops; /* Loongson PCIe */ #if IS_ENABLED(CONFIG_PCI_HOST_COMMON) /* for DT-based PCI controllers that support ECAM */ int pci_host_common_probe(struct platform_device *pdev); +int pci_host_common_init(struct platform_device *pdev, + const struct pci_ecam_ops *ops); void pci_host_common_remove(struct platform_device *pdev); #endif #endif -- cgit v1.2.3 From eba187a6e7141a1166a68c4d27b4ee5a27670b3b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:59:41 +0800 Subject: crypto: x86/sha256 - Use API partial block handling Use the Crypto API partial block handling. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- include/crypto/sha2.h | 5 +++ include/crypto/sha256_base.h | 80 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 78 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index b9e9281d76c9..d9b1b9932393 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -64,6 +64,11 @@ extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE]; extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE]; +struct crypto_sha256_state { + u32 state[SHA256_DIGEST_SIZE / 4]; + u64 count; +}; + struct sha256_state { u32 state[SHA256_DIGEST_SIZE / 4]; u64 count; diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h index e0418818d63c..727a1b63e1e9 100644 --- a/include/crypto/sha256_base.h +++ b/include/crypto/sha256_base.h @@ -8,15 +8,17 @@ #ifndef _CRYPTO_SHA256_BASE_H #define _CRYPTO_SHA256_BASE_H -#include -#include #include #include +#include #include #include +#include typedef void (sha256_block_fn)(struct sha256_state *sst, u8 const *src, int blocks); +typedef void (crypto_sha256_block_fn)(struct crypto_sha256_state *sst, + u8 const *src, int blocks); static inline int sha224_base_init(struct shash_desc *desc) { @@ -81,6 +83,64 @@ static inline int sha256_base_do_update(struct shash_desc *desc, return lib_sha256_base_do_update(sctx, data, len, block_fn); } +static inline int lib_sha256_base_do_update_blocks( + struct crypto_sha256_state *sctx, const u8 *data, unsigned int len, + crypto_sha256_block_fn *block_fn) +{ + unsigned int remain = len - round_down(len, SHA256_BLOCK_SIZE); + + sctx->count += len - remain; + block_fn(sctx, data, len / SHA256_BLOCK_SIZE); + return remain; +} + +static inline int sha256_base_do_update_blocks( + struct shash_desc *desc, const u8 *data, unsigned int len, + crypto_sha256_block_fn *block_fn) +{ + return lib_sha256_base_do_update_blocks(shash_desc_ctx(desc), data, + len, block_fn); +} + +static inline int lib_sha256_base_do_finup(struct crypto_sha256_state *sctx, + const u8 *src, unsigned int len, + crypto_sha256_block_fn *block_fn) +{ + unsigned int bit_offset = SHA256_BLOCK_SIZE / 8 - 1; + union { + __be64 b64[SHA256_BLOCK_SIZE / 4]; + u8 u8[SHA256_BLOCK_SIZE * 2]; + } block = {}; + + if (len >= bit_offset * 8) + bit_offset += SHA256_BLOCK_SIZE / 8; + memcpy(&block, src, len); + block.u8[len] = 0x80; + sctx->count += len; + block.b64[bit_offset] = cpu_to_be64(sctx->count << 3); + block_fn(sctx, block.u8, (bit_offset + 1) * 8 / SHA256_BLOCK_SIZE); + memzero_explicit(&block, sizeof(block)); + + return 0; +} + +static inline int sha256_base_do_finup(struct shash_desc *desc, + const u8 *src, unsigned int len, + crypto_sha256_block_fn *block_fn) +{ + struct crypto_sha256_state *sctx = shash_desc_ctx(desc); + + if (len >= SHA256_BLOCK_SIZE) { + int remain; + + remain = lib_sha256_base_do_update_blocks(sctx, src, len, + block_fn); + src += len - remain; + len = remain; + } + return lib_sha256_base_do_finup(sctx, src, len, block_fn); +} + static inline int lib_sha256_base_do_finalize(struct sha256_state *sctx, sha256_block_fn *block_fn) { @@ -111,15 +171,21 @@ static inline int sha256_base_do_finalize(struct shash_desc *desc, return lib_sha256_base_do_finalize(sctx, block_fn); } -static inline int lib_sha256_base_finish(struct sha256_state *sctx, u8 *out, - unsigned int digest_size) +static inline int __sha256_base_finish(u32 state[SHA256_DIGEST_SIZE / 4], + u8 *out, unsigned int digest_size) { __be32 *digest = (__be32 *)out; int i; for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be32)) - put_unaligned_be32(sctx->state[i], digest++); + put_unaligned_be32(state[i], digest++); + return 0; +} +static inline int lib_sha256_base_finish(struct sha256_state *sctx, u8 *out, + unsigned int digest_size) +{ + __sha256_base_finish(sctx->state, out, digest_size); memzero_explicit(sctx, sizeof(*sctx)); return 0; } @@ -127,9 +193,9 @@ static inline int lib_sha256_base_finish(struct sha256_state *sctx, u8 *out, static inline int sha256_base_finish(struct shash_desc *desc, u8 *out) { unsigned int digest_size = crypto_shash_digestsize(desc->tfm); - struct sha256_state *sctx = shash_desc_ctx(desc); + struct crypto_sha256_state *sctx = shash_desc_ctx(desc); - return lib_sha256_base_finish(sctx, out, digest_size); + return __sha256_base_finish(sctx->state, out, digest_size); } #endif /* _CRYPTO_SHA256_BASE_H */ -- cgit v1.2.3 From e6c5597badf2998980639fb83e60696d0f2ce0dd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:59:45 +0800 Subject: crypto: riscv/sha256 - Use API partial block handling Use the Crypto API partial block handling. Signed-off-by: Herbert Xu --- include/crypto/sha256_base.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h index 727a1b63e1e9..b9d3583b6256 100644 --- a/include/crypto/sha256_base.h +++ b/include/crypto/sha256_base.h @@ -198,4 +198,7 @@ static inline int sha256_base_finish(struct shash_desc *desc, u8 *out) return __sha256_base_finish(sctx->state, out, digest_size); } +void sha256_transform_blocks(struct crypto_sha256_state *sst, + const u8 *input, int blocks); + #endif /* _CRYPTO_SHA256_BASE_H */ -- cgit v1.2.3 From 9adeea13ed7f5218028cdc5b11df53e69c39a60f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 10:59:48 +0800 Subject: crypto: sha256-generic - Use API partial block handling Use the Crypto API partial block handling. Signed-off-by: Herbert Xu --- include/crypto/sha2.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index d9b1b9932393..a913bad5dd3b 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -83,12 +83,6 @@ struct sha512_state { struct shash_desc; -extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len); - -extern int crypto_sha256_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash); - extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len); -- cgit v1.2.3 From 8ba81fef400bf5dec895bc259d65c91bfece798a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 11:00:08 +0800 Subject: crypto: sha256_base - Remove partial block helpers Now that all sha256_base users have been converted to use the API partial block handling, remove the partial block helpers. Signed-off-by: Herbert Xu --- include/crypto/sha256_base.h | 50 ++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h index b9d3583b6256..08cd5e41d4fd 100644 --- a/include/crypto/sha256_base.h +++ b/include/crypto/sha256_base.h @@ -15,10 +15,8 @@ #include #include -typedef void (sha256_block_fn)(struct sha256_state *sst, u8 const *src, +typedef void (sha256_block_fn)(struct crypto_sha256_state *sst, u8 const *src, int blocks); -typedef void (crypto_sha256_block_fn)(struct crypto_sha256_state *sst, - u8 const *src, int blocks); static inline int sha224_base_init(struct shash_desc *desc) { @@ -42,6 +40,7 @@ static inline int lib_sha256_base_do_update(struct sha256_state *sctx, sha256_block_fn *block_fn) { unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + struct crypto_sha256_state *state = (void *)sctx; sctx->count += len; @@ -55,14 +54,14 @@ static inline int lib_sha256_base_do_update(struct sha256_state *sctx, data += p; len -= p; - block_fn(sctx, sctx->buf, 1); + block_fn(state, sctx->buf, 1); } blocks = len / SHA256_BLOCK_SIZE; len %= SHA256_BLOCK_SIZE; if (blocks) { - block_fn(sctx, data, blocks); + block_fn(state, data, blocks); data += blocks * SHA256_BLOCK_SIZE; } partial = 0; @@ -73,19 +72,9 @@ static inline int lib_sha256_base_do_update(struct sha256_state *sctx, return 0; } -static inline int sha256_base_do_update(struct shash_desc *desc, - const u8 *data, - unsigned int len, - sha256_block_fn *block_fn) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - return lib_sha256_base_do_update(sctx, data, len, block_fn); -} - static inline int lib_sha256_base_do_update_blocks( struct crypto_sha256_state *sctx, const u8 *data, unsigned int len, - crypto_sha256_block_fn *block_fn) + sha256_block_fn *block_fn) { unsigned int remain = len - round_down(len, SHA256_BLOCK_SIZE); @@ -96,7 +85,7 @@ static inline int lib_sha256_base_do_update_blocks( static inline int sha256_base_do_update_blocks( struct shash_desc *desc, const u8 *data, unsigned int len, - crypto_sha256_block_fn *block_fn) + sha256_block_fn *block_fn) { return lib_sha256_base_do_update_blocks(shash_desc_ctx(desc), data, len, block_fn); @@ -104,7 +93,7 @@ static inline int sha256_base_do_update_blocks( static inline int lib_sha256_base_do_finup(struct crypto_sha256_state *sctx, const u8 *src, unsigned int len, - crypto_sha256_block_fn *block_fn) + sha256_block_fn *block_fn) { unsigned int bit_offset = SHA256_BLOCK_SIZE / 8 - 1; union { @@ -126,7 +115,7 @@ static inline int lib_sha256_base_do_finup(struct crypto_sha256_state *sctx, static inline int sha256_base_do_finup(struct shash_desc *desc, const u8 *src, unsigned int len, - crypto_sha256_block_fn *block_fn) + sha256_block_fn *block_fn) { struct crypto_sha256_state *sctx = shash_desc_ctx(desc); @@ -144,23 +133,11 @@ static inline int sha256_base_do_finup(struct shash_desc *desc, static inline int lib_sha256_base_do_finalize(struct sha256_state *sctx, sha256_block_fn *block_fn) { - const int bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64); - __be64 *bits = (__be64 *)(sctx->buf + bit_offset); unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + struct crypto_sha256_state *state = (void *)sctx; - sctx->buf[partial++] = 0x80; - if (partial > bit_offset) { - memset(sctx->buf + partial, 0x0, SHA256_BLOCK_SIZE - partial); - partial = 0; - - block_fn(sctx, sctx->buf, 1); - } - - memset(sctx->buf + partial, 0x0, bit_offset - partial); - *bits = cpu_to_be64(sctx->count << 3); - block_fn(sctx, sctx->buf, 1); - - return 0; + sctx->count -= partial; + return lib_sha256_base_do_finup(state, sctx->buf, partial, block_fn); } static inline int sha256_base_do_finalize(struct shash_desc *desc, @@ -182,12 +159,11 @@ static inline int __sha256_base_finish(u32 state[SHA256_DIGEST_SIZE / 4], return 0; } -static inline int lib_sha256_base_finish(struct sha256_state *sctx, u8 *out, - unsigned int digest_size) +static inline void lib_sha256_base_finish(struct sha256_state *sctx, u8 *out, + unsigned int digest_size) { __sha256_base_finish(sctx->state, out, digest_size); memzero_explicit(sctx, sizeof(*sctx)); - return 0; } static inline int sha256_base_finish(struct shash_desc *desc, u8 *out) -- cgit v1.2.3 From b333c273ab43b96ca25469770d3319ec7ababd5a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 11:00:11 +0800 Subject: crypto: arm64/sha3-ce - Use API partial block handling Use the Crypto API partial block handling. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- include/crypto/sha3.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/sha3.h b/include/crypto/sha3.h index 080f60c2e6b1..661f196193cf 100644 --- a/include/crypto/sha3.h +++ b/include/crypto/sha3.h @@ -17,8 +17,10 @@ #define SHA3_512_DIGEST_SIZE (512 / 8) #define SHA3_512_BLOCK_SIZE (200 - 2 * SHA3_512_DIGEST_SIZE) +#define SHA3_STATE_SIZE 200 + struct sha3_state { - u64 st[25]; + u64 st[SHA3_STATE_SIZE / 8]; unsigned int rsiz; unsigned int rsizw; -- cgit v1.2.3 From 0d474be2676d9262afd0cf6a416e96b9277139a7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 11:00:15 +0800 Subject: crypto: sha3-generic - Use API partial block handling Use the Crypto API partial block handling. Signed-off-by: Herbert Xu --- include/crypto/sha3.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/crypto/sha3.h b/include/crypto/sha3.h index 661f196193cf..420b90c5f08a 100644 --- a/include/crypto/sha3.h +++ b/include/crypto/sha3.h @@ -5,6 +5,8 @@ #ifndef __CRYPTO_SHA3_H__ #define __CRYPTO_SHA3_H__ +#include + #define SHA3_224_DIGEST_SIZE (224 / 8) #define SHA3_224_BLOCK_SIZE (200 - 2 * SHA3_224_DIGEST_SIZE) @@ -19,6 +21,8 @@ #define SHA3_STATE_SIZE 200 +struct shash_desc; + struct sha3_state { u64 st[SHA3_STATE_SIZE / 8]; unsigned int rsiz; @@ -29,8 +33,5 @@ struct sha3_state { }; int crypto_sha3_init(struct shash_desc *desc); -int crypto_sha3_update(struct shash_desc *desc, const u8 *data, - unsigned int len); -int crypto_sha3_final(struct shash_desc *desc, u8 *out); #endif -- cgit v1.2.3 From 201e9ec3b621c9542441eb7bd34facbc1749b975 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 11:00:18 +0800 Subject: crypto: zynqmp-sha - Use API partial block handling Use the Crypto API partial block handling. As this was the last user of the extra fields in struct sha3_state, remove them. Signed-off-by: Herbert Xu --- include/crypto/sha3.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/crypto/sha3.h b/include/crypto/sha3.h index 420b90c5f08a..3c2559f51ada 100644 --- a/include/crypto/sha3.h +++ b/include/crypto/sha3.h @@ -25,11 +25,6 @@ struct shash_desc; struct sha3_state { u64 st[SHA3_STATE_SIZE / 8]; - unsigned int rsiz; - unsigned int rsizw; - - unsigned int partial; - u8 buf[SHA3_224_BLOCK_SIZE]; }; int crypto_sha3_init(struct shash_desc *desc); -- cgit v1.2.3 From ff3cb9de53baa3da7df0e08798a5638436d8aabc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 11:00:20 +0800 Subject: crypto: x86/sha512 - Use API partial block handling Use the Crypto API partial block handling. Also remove the unnecessary SIMD fallback path. Signed-off-by: Herbert Xu --- include/crypto/sha2.h | 1 + include/crypto/sha512_base.h | 54 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index a913bad5dd3b..e9ad7ab955aa 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -19,6 +19,7 @@ #define SHA512_DIGEST_SIZE 64 #define SHA512_BLOCK_SIZE 128 +#define SHA512_STATE_SIZE 80 #define SHA224_H0 0xc1059ed8UL #define SHA224_H1 0x367cd507UL diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h index 679916a84cb2..8cb172e52dc0 100644 --- a/include/crypto/sha512_base.h +++ b/include/crypto/sha512_base.h @@ -10,10 +10,7 @@ #include #include -#include -#include #include - #include typedef void (sha512_block_fn)(struct sha512_state *sst, u8 const *src, @@ -93,6 +90,55 @@ static inline int sha512_base_do_update(struct shash_desc *desc, return 0; } +static inline int sha512_base_do_update_blocks(struct shash_desc *desc, + const u8 *data, + unsigned int len, + sha512_block_fn *block_fn) +{ + unsigned int remain = len - round_down(len, SHA512_BLOCK_SIZE); + struct sha512_state *sctx = shash_desc_ctx(desc); + + len -= remain; + sctx->count[0] += len; + if (sctx->count[0] < len) + sctx->count[1]++; + block_fn(sctx, data, len / SHA512_BLOCK_SIZE); + return remain; +} + +static inline int sha512_base_do_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, + sha512_block_fn *block_fn) +{ + unsigned int bit_offset = SHA512_BLOCK_SIZE / 8 - 2; + struct sha512_state *sctx = shash_desc_ctx(desc); + union { + __be64 b64[SHA512_BLOCK_SIZE / 4]; + u8 u8[SHA512_BLOCK_SIZE * 2]; + } block = {}; + + if (len >= SHA512_BLOCK_SIZE) { + int remain; + + remain = sha512_base_do_update_blocks(desc, src, len, block_fn); + src += len - remain; + len = remain; + } + + if (len >= bit_offset * 8) + bit_offset += SHA512_BLOCK_SIZE / 8; + memcpy(&block, src, len); + block.u8[len] = 0x80; + sctx->count[0] += len; + block.b64[bit_offset] = cpu_to_be64(sctx->count[1] << 3 | + sctx->count[0] >> 61); + block.b64[bit_offset + 1] = cpu_to_be64(sctx->count[0] << 3); + block_fn(sctx, block.u8, (bit_offset + 2) * 8 / SHA512_BLOCK_SIZE); + memzero_explicit(&block, sizeof(block)); + + return 0; +} + static inline int sha512_base_do_finalize(struct shash_desc *desc, sha512_block_fn *block_fn) { @@ -126,8 +172,6 @@ static inline int sha512_base_finish(struct shash_desc *desc, u8 *out) for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be64)) put_unaligned_be64(sctx->state[i], digest++); - - memzero_explicit(sctx, sizeof(*sctx)); return 0; } -- cgit v1.2.3 From 561aab1104d80083886beaaeee39fa6482d6f157 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 11:00:25 +0800 Subject: crypto: riscv/sha512 - Use API partial block handling Use the Crypto API partial block handling. Signed-off-by: Herbert Xu --- include/crypto/sha512_base.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h index 8cb172e52dc0..e9f302ec3ede 100644 --- a/include/crypto/sha512_base.h +++ b/include/crypto/sha512_base.h @@ -10,7 +10,10 @@ #include #include +#include +#include #include +#include #include typedef void (sha512_block_fn)(struct sha512_state *sst, u8 const *src, @@ -175,4 +178,7 @@ static inline int sha512_base_finish(struct shash_desc *desc, u8 *out) return 0; } +void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src, + int blocks); + #endif /* _CRYPTO_SHA512_BASE_H */ -- cgit v1.2.3 From 216623af53062943ee4ef76a0b597ea6030ffd42 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 11:00:27 +0800 Subject: crypto: sha512-generic - Use API partial block handling Use the Crypto API partial block handling. Signed-off-by: Herbert Xu --- include/crypto/sha2.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index e9ad7ab955aa..abbd882f7849 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -82,14 +82,6 @@ struct sha512_state { u8 buf[SHA512_BLOCK_SIZE]; }; -struct shash_desc; - -extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data, - unsigned int len); - -extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *hash); - /* * Stand-alone implementation of the SHA256 algorithm. It is designed to * have as little dependencies as possible so it can be used in the -- cgit v1.2.3 From f9fbc8bff1e19dc88ceeb24b39fd78631e5ead13 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 11:00:43 +0800 Subject: crypto: sha512_base - Remove partial block helpers Now that all sha256_base users have been converted to use the API partial block handling, remove the partial block helpers. Signed-off-by: Herbert Xu --- include/crypto/sha512_base.h | 64 -------------------------------------------- 1 file changed, 64 deletions(-) (limited to 'include') diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h index e9f302ec3ede..aa814bab442d 100644 --- a/include/crypto/sha512_base.h +++ b/include/crypto/sha512_base.h @@ -53,46 +53,6 @@ static inline int sha512_base_init(struct shash_desc *desc) return 0; } -static inline int sha512_base_do_update(struct shash_desc *desc, - const u8 *data, - unsigned int len, - sha512_block_fn *block_fn) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; - - sctx->count[0] += len; - if (sctx->count[0] < len) - sctx->count[1]++; - - if (unlikely((partial + len) >= SHA512_BLOCK_SIZE)) { - int blocks; - - if (partial) { - int p = SHA512_BLOCK_SIZE - partial; - - memcpy(sctx->buf + partial, data, p); - data += p; - len -= p; - - block_fn(sctx, sctx->buf, 1); - } - - blocks = len / SHA512_BLOCK_SIZE; - len %= SHA512_BLOCK_SIZE; - - if (blocks) { - block_fn(sctx, data, blocks); - data += blocks * SHA512_BLOCK_SIZE; - } - partial = 0; - } - if (len) - memcpy(sctx->buf + partial, data, len); - - return 0; -} - static inline int sha512_base_do_update_blocks(struct shash_desc *desc, const u8 *data, unsigned int len, @@ -142,30 +102,6 @@ static inline int sha512_base_do_finup(struct shash_desc *desc, const u8 *src, return 0; } -static inline int sha512_base_do_finalize(struct shash_desc *desc, - sha512_block_fn *block_fn) -{ - const int bit_offset = SHA512_BLOCK_SIZE - sizeof(__be64[2]); - struct sha512_state *sctx = shash_desc_ctx(desc); - __be64 *bits = (__be64 *)(sctx->buf + bit_offset); - unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; - - sctx->buf[partial++] = 0x80; - if (partial > bit_offset) { - memset(sctx->buf + partial, 0x0, SHA512_BLOCK_SIZE - partial); - partial = 0; - - block_fn(sctx, sctx->buf, 1); - } - - memset(sctx->buf + partial, 0x0, bit_offset - partial); - bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); - bits[1] = cpu_to_be64(sctx->count[0] << 3); - block_fn(sctx, sctx->buf, 1); - - return 0; -} - static inline int sha512_base_finish(struct shash_desc *desc, u8 *out) { unsigned int digest_size = crypto_shash_digestsize(desc->tfm); -- cgit v1.2.3 From 8266393e9ba0bee395bc256ba56427ec5493e2d1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 11:00:45 +0800 Subject: crypto: sm3-generic - Use API partial block handling Use the Crypto API partial block handling. Signed-off-by: Herbert Xu --- include/crypto/sm3.h | 1 + include/crypto/sm3_base.h | 45 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h index d49211ba9a20..6dc95264a836 100644 --- a/include/crypto/sm3.h +++ b/include/crypto/sm3.h @@ -14,6 +14,7 @@ #define SM3_DIGEST_SIZE 32 #define SM3_BLOCK_SIZE 64 +#define SM3_STATE_SIZE 40 #define SM3_T1 0x79CC4519 #define SM3_T2 0x7A879D8A diff --git a/include/crypto/sm3_base.h b/include/crypto/sm3_base.h index 835896228e8e..9460589c8cb8 100644 --- a/include/crypto/sm3_base.h +++ b/include/crypto/sm3_base.h @@ -62,6 +62,49 @@ static inline int sm3_base_do_update(struct shash_desc *desc, return 0; } +static inline int sm3_base_do_update_blocks(struct shash_desc *desc, + const u8 *data, unsigned int len, + sm3_block_fn *block_fn) +{ + unsigned int remain = len - round_down(len, SM3_BLOCK_SIZE); + struct sm3_state *sctx = shash_desc_ctx(desc); + + sctx->count += len - remain; + block_fn(sctx, data, len / SM3_BLOCK_SIZE); + return remain; +} + +static inline int sm3_base_do_finup(struct shash_desc *desc, + const u8 *src, unsigned int len, + sm3_block_fn *block_fn) +{ + unsigned int bit_offset = SM3_BLOCK_SIZE / 8 - 1; + struct sm3_state *sctx = shash_desc_ctx(desc); + union { + __be64 b64[SM3_BLOCK_SIZE / 4]; + u8 u8[SM3_BLOCK_SIZE * 2]; + } block = {}; + + if (len >= SM3_BLOCK_SIZE) { + int remain; + + remain = sm3_base_do_update_blocks(desc, src, len, block_fn); + src += len - remain; + len = remain; + } + + if (len >= bit_offset * 8) + bit_offset += SM3_BLOCK_SIZE / 8; + memcpy(&block, src, len); + block.u8[len] = 0x80; + sctx->count += len; + block.b64[bit_offset] = cpu_to_be64(sctx->count << 3); + block_fn(sctx, block.u8, (bit_offset + 1) * 8 / SM3_BLOCK_SIZE); + memzero_explicit(&block, sizeof(block)); + + return 0; +} + static inline int sm3_base_do_finalize(struct shash_desc *desc, sm3_block_fn *block_fn) { @@ -93,8 +136,6 @@ static inline int sm3_base_finish(struct shash_desc *desc, u8 *out) for (i = 0; i < SM3_DIGEST_SIZE / sizeof(__be32); i++) put_unaligned_be32(sctx->state[i], digest++); - - memzero_explicit(sctx, sizeof(*sctx)); return 0; } -- cgit v1.2.3 From 9939049085bb80205b5e7e5b2b1d594f2731917f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2025 11:00:57 +0800 Subject: crypto: lib/sm3 - Remove partial block helpers Now that all sm3_base users have been converted to use the API partial block handling, remove the partial block helpers as well as the lib/crypto functions. Signed-off-by: Herbert Xu --- include/crypto/sm3.h | 2 -- include/crypto/sm3_base.h | 64 ++--------------------------------------------- 2 files changed, 2 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/include/crypto/sm3.h b/include/crypto/sm3.h index 6dc95264a836..c8d02c86c298 100644 --- a/include/crypto/sm3.h +++ b/include/crypto/sm3.h @@ -60,7 +60,5 @@ static inline void sm3_init(struct sm3_state *sctx) } void sm3_block_generic(struct sm3_state *sctx, u8 const *data, int blocks); -void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len); -void sm3_final(struct sm3_state *sctx, u8 *out); #endif diff --git a/include/crypto/sm3_base.h b/include/crypto/sm3_base.h index 9460589c8cb8..7c53570bc05e 100644 --- a/include/crypto/sm3_base.h +++ b/include/crypto/sm3_base.h @@ -11,9 +11,10 @@ #include #include -#include +#include #include #include +#include #include typedef void (sm3_block_fn)(struct sm3_state *sst, u8 const *src, int blocks); @@ -24,44 +25,6 @@ static inline int sm3_base_init(struct shash_desc *desc) return 0; } -static inline int sm3_base_do_update(struct shash_desc *desc, - const u8 *data, - unsigned int len, - sm3_block_fn *block_fn) -{ - struct sm3_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count % SM3_BLOCK_SIZE; - - sctx->count += len; - - if (unlikely((partial + len) >= SM3_BLOCK_SIZE)) { - int blocks; - - if (partial) { - int p = SM3_BLOCK_SIZE - partial; - - memcpy(sctx->buffer + partial, data, p); - data += p; - len -= p; - - block_fn(sctx, sctx->buffer, 1); - } - - blocks = len / SM3_BLOCK_SIZE; - len %= SM3_BLOCK_SIZE; - - if (blocks) { - block_fn(sctx, data, blocks); - data += blocks * SM3_BLOCK_SIZE; - } - partial = 0; - } - if (len) - memcpy(sctx->buffer + partial, data, len); - - return 0; -} - static inline int sm3_base_do_update_blocks(struct shash_desc *desc, const u8 *data, unsigned int len, sm3_block_fn *block_fn) @@ -105,29 +68,6 @@ static inline int sm3_base_do_finup(struct shash_desc *desc, return 0; } -static inline int sm3_base_do_finalize(struct shash_desc *desc, - sm3_block_fn *block_fn) -{ - const int bit_offset = SM3_BLOCK_SIZE - sizeof(__be64); - struct sm3_state *sctx = shash_desc_ctx(desc); - __be64 *bits = (__be64 *)(sctx->buffer + bit_offset); - unsigned int partial = sctx->count % SM3_BLOCK_SIZE; - - sctx->buffer[partial++] = 0x80; - if (partial > bit_offset) { - memset(sctx->buffer + partial, 0x0, SM3_BLOCK_SIZE - partial); - partial = 0; - - block_fn(sctx, sctx->buffer, 1); - } - - memset(sctx->buffer + partial, 0x0, bit_offset - partial); - *bits = cpu_to_be64(sctx->count << 3); - block_fn(sctx, sctx->buffer, 1); - - return 0; -} - static inline int sm3_base_finish(struct shash_desc *desc, u8 *out) { struct sm3_state *sctx = shash_desc_ctx(desc); -- cgit v1.2.3 From 9861f21ff16b6cd919144dae7ff355d5145a3474 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 14 Mar 2025 11:00:55 +0100 Subject: pmdomain: core: Add genpd helper to correct the usage/rejected counters In the cpuidle-psci-domain case the ->power_off() callback is usually returning zero to indicate success. This is because the actual call to the PSCI FW to enter the selected domain-idlestate, needs to be done after the ->power_off() callback has returned. When the call to the PSCI FW fails, this leads to receiving an incorrect tracking of the usage/rejected counts for the selected domain-idlestate. In other words, the presented debug-statistics for genpd may look better than what the actually are. To allow a better correctness of the data, let's add a new genpd helper function, which enables the caller adjust the usage/rejected counters for a domain-idlestate, in cases of errors during power-off. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20250314100103.1294715-2-ulf.hansson@linaro.org --- include/linux/pm_domain.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index d56a78af4af1..6e808aeecbcb 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -285,6 +285,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, int pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); int pm_genpd_remove(struct generic_pm_domain *genpd); +void pm_genpd_inc_rejected(struct generic_pm_domain *genpd, + unsigned int state_idx); struct device *dev_to_genpd_dev(struct device *dev); int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb); @@ -336,6 +338,10 @@ static inline int pm_genpd_remove(struct generic_pm_domain *genpd) return -EOPNOTSUPP; } +static inline void pm_genpd_inc_rejected(struct generic_pm_domain *genpd, + unsigned int state_idx) +{ } + static inline struct device *dev_to_genpd_dev(struct device *dev) { return ERR_PTR(-EOPNOTSUPP); -- cgit v1.2.3 From 0a8a888167ddaaec7a292e5045782b8a240e6f3e Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 14 Mar 2025 11:00:58 +0100 Subject: pmdomain: core: Add residency reflection for domain-idlestates to debugfs For regular cpuidle states we are reflecting over the selected/entered state to see if the sleep-duration meets the residency for the state. The output from the reflection is an "above" value to indicate the number of times the state was too deep and a "below" value for the number of times it was too shallow. Let's implement the similar thing for genpd's domain-idlestates along with genpd's governor and put the information in the genpd's debugfs. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20250314100103.1294715-5-ulf.hansson@linaro.org --- include/linux/pm_domain.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 6e808aeecbcb..0b18160901a2 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -142,6 +142,8 @@ struct genpd_governor_data { bool max_off_time_changed; ktime_t next_wakeup; ktime_t next_hrtimer; + ktime_t last_enter; + bool reflect_residency; bool cached_power_down_ok; bool cached_power_down_state_idx; }; @@ -153,6 +155,8 @@ struct genpd_power_state { s64 residency_ns; u64 usage; u64 rejected; + u64 above; + u64 below; struct fwnode_handle *fwnode; u64 idle_time; void *data; -- cgit v1.2.3 From b5c48210341129db7158703c00e4d2155f4cf0f3 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 10 Apr 2025 16:39:42 +0200 Subject: dt-bindings: power: mediatek: Support Dimensity 1200 MT6893 MTCMOS Add support for the Power Domains (MTCMOS) integrated into the MediaTek Dimensity 1200 (MT6893) SoC. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250410143944.475773-2-angelogioacchino.delregno@collabora.com Signed-off-by: Ulf Hansson --- include/dt-bindings/power/mediatek,mt6893-power.h | 35 +++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 include/dt-bindings/power/mediatek,mt6893-power.h (limited to 'include') diff --git a/include/dt-bindings/power/mediatek,mt6893-power.h b/include/dt-bindings/power/mediatek,mt6893-power.h new file mode 100644 index 000000000000..aeab51bb2ad8 --- /dev/null +++ b/include/dt-bindings/power/mediatek,mt6893-power.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * Copyright (c) 2025 Collabora Ltd + * AngeloGioacchino Del Regno + */ + +#ifndef _DT_BINDINGS_POWER_MT6893_POWER_H +#define _DT_BINDINGS_POWER_MT6893_POWER_H + +#define MT6893_POWER_DOMAIN_CONN 0 +#define MT6893_POWER_DOMAIN_MFG0 1 +#define MT6893_POWER_DOMAIN_MFG1 2 +#define MT6893_POWER_DOMAIN_MFG2 3 +#define MT6893_POWER_DOMAIN_MFG3 4 +#define MT6893_POWER_DOMAIN_MFG4 5 +#define MT6893_POWER_DOMAIN_MFG5 6 +#define MT6893_POWER_DOMAIN_MFG6 7 +#define MT6893_POWER_DOMAIN_ISP 8 +#define MT6893_POWER_DOMAIN_ISP2 9 +#define MT6893_POWER_DOMAIN_IPE 10 +#define MT6893_POWER_DOMAIN_VDEC0 11 +#define MT6893_POWER_DOMAIN_VDEC1 12 +#define MT6893_POWER_DOMAIN_VENC0 13 +#define MT6893_POWER_DOMAIN_VENC1 14 +#define MT6893_POWER_DOMAIN_MDP 15 +#define MT6893_POWER_DOMAIN_DISP 16 +#define MT6893_POWER_DOMAIN_AUDIO 17 +#define MT6893_POWER_DOMAIN_ADSP 18 +#define MT6893_POWER_DOMAIN_CAM 19 +#define MT6893_POWER_DOMAIN_CAM_RAWA 20 +#define MT6893_POWER_DOMAIN_CAM_RAWB 21 +#define MT6893_POWER_DOMAIN_CAM_RAWC 22 +#define MT6893_POWER_DOMAIN_DP_TX 23 + +#endif /* _DT_BINDINGS_POWER_MT6893_POWER_H */ -- cgit v1.2.3 From a572dc467de241706ab92d61b3b3a0bca93450c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Larumbe?= Date: Wed, 23 Apr 2025 03:12:32 +0100 Subject: drm/panthor: Add driver IOCTL for setting BO labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow UM to label a BO for which it possesses a DRM handle. Signed-off-by: Adrián Larumbe Reviewed-by: Liviu Dudau Reviewed-by: Boris Brezillon Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20250423021238.1639175-3-adrian.larumbe@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panthor_drm.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 97e2c4510e69..ad9a70afea6c 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -127,6 +127,9 @@ enum drm_panthor_ioctl_id { /** @DRM_PANTHOR_TILER_HEAP_DESTROY: Destroy a tiler heap. */ DRM_PANTHOR_TILER_HEAP_DESTROY, + + /** @DRM_PANTHOR_BO_SET_LABEL: Label a BO. */ + DRM_PANTHOR_BO_SET_LABEL, }; /** @@ -977,6 +980,24 @@ struct drm_panthor_tiler_heap_destroy { __u32 pad; }; +/** + * struct drm_panthor_bo_set_label - Arguments passed to DRM_IOCTL_PANTHOR_BO_SET_LABEL + */ +struct drm_panthor_bo_set_label { + /** @handle: Handle of the buffer object to label. */ + __u32 handle; + + /** @pad: MBZ. */ + __u32 pad; + + /** + * @label: User pointer to a NUL-terminated string + * + * Length cannot be greater than 4096 + */ + __u64 label; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1019,6 +1040,8 @@ enum { DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create), DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY = DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy), + DRM_IOCTL_PANTHOR_BO_SET_LABEL = + DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label), }; #if defined(__cplusplus) -- cgit v1.2.3 From 2798cf48d2670487f7ee22dfc5f31c07d0d3b135 Mon Sep 17 00:00:00 2001 From: Finley Xiao Date: Tue, 15 Apr 2025 11:23:13 +0800 Subject: dt-bindings: power: rockchip: Add support for RK3562 SoC According to a description from TRM, add all the power domains. Signed-off-by: Finley Xiao Signed-off-by: Kever Yang Reviewed-by: Heiko Stuebner Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250415032314.44997-1-kever.yang@rock-chips.com Signed-off-by: Ulf Hansson --- include/dt-bindings/power/rockchip,rk3562-power.h | 35 +++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 include/dt-bindings/power/rockchip,rk3562-power.h (limited to 'include') diff --git a/include/dt-bindings/power/rockchip,rk3562-power.h b/include/dt-bindings/power/rockchip,rk3562-power.h new file mode 100644 index 000000000000..5182c2427a55 --- /dev/null +++ b/include/dt-bindings/power/rockchip,rk3562-power.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2022-2024 Rockchip Electronics Co., Ltd. + */ +#ifndef __DT_BINDINGS_POWER_RK3562_POWER_H__ +#define __DT_BINDINGS_POWER_RK3562_POWER_H__ + +/* VD_CORE */ +#define RK3562_PD_CPU_0 0 +#define RK3562_PD_CPU_1 1 +#define RK3562_PD_CPU_2 2 +#define RK3562_PD_CPU_3 3 +#define RK3562_PD_CORE_ALIVE 4 + +/* VD_PMU */ +#define RK3562_PD_PMU 5 +#define RK3562_PD_PMU_ALIVE 6 + +/* VD_NPU */ +#define RK3562_PD_NPU 7 + +/* VD_GPU */ +#define RK3562_PD_GPU 8 + +/* VD_LOGIC */ +#define RK3562_PD_DDR 9 +#define RK3562_PD_VEPU 10 +#define RK3562_PD_VDPU 11 +#define RK3562_PD_VI 12 +#define RK3562_PD_VO 13 +#define RK3562_PD_RGA 14 +#define RK3562_PD_PHP 15 +#define RK3562_PD_LOGIC_ALIVE 16 + +#endif -- cgit v1.2.3 From 76a853f86c97b348dc96e75a6e6f94d8750715ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Mar 2025 13:49:39 +0100 Subject: wifi: free SKBTX_WIFI_STATUS skb tx_flags flag Jason mentioned at netdevconf that we've run out of tx_flags in the skb_shinfo(). Gain one bit back by removing the wifi bit. We can do that because the only userspace application for it (hostapd) doesn't change the setting on the socket, it just uses different sockets, and normally doesn't even use this any more, sending the frames over nl80211 instead. Reviewed-by: Jason Xing Link: https://patch.msgid.link/20250313134942.52ff54a140ec.If390bbdc46904cf451256ba989d7a056c457af6e@changeid Signed-off-by: Johannes Berg --- include/linux/skbuff.h | 3 --- include/net/sock.h | 2 -- 2 files changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b974a277975a..9ee39670e8f4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -481,9 +481,6 @@ enum { /* generate software time stamp on packet tx completion */ SKBTX_COMPLETION_TSTAMP = 1 << 3, - /* generate wifi status information (where possible) */ - SKBTX_WIFI_STATUS = 1 << 4, - /* determine hardware time stamp based on time or cycles */ SKBTX_HW_TSTAMP_NETDEV = 1 << 5, diff --git a/include/net/sock.h b/include/net/sock.h index 694f954258d4..36b219109790 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2736,8 +2736,6 @@ static inline void _sock_tx_timestamp(struct sock *sk, *tskey = atomic_inc_return(&sk->sk_tskey) - 1; } } - if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) - *tx_flags |= SKBTX_WIFI_STATUS; } static inline void sock_tx_timestamp(struct sock *sk, -- cgit v1.2.3 From 76f1cc98b23cefd1f0ae90c51f1fb837e5f46528 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 20 Apr 2025 10:31:20 +0100 Subject: io_uring/zcrx: add support for multiple ifqs Allow the user to register multiple ifqs / zcrx contexts. With that we can use multiple interfaces / interface queues in a single io_uring instance. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/668b03bee03b5216564482edcfefbc2ee337dd30.1745141261.git.asml.silence@gmail.com [axboe: fold in fix] Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 06d722289fc5..7e23e993280e 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -40,8 +40,6 @@ enum io_uring_cmd_flags { IO_URING_F_TASK_DEAD = (1 << 13), }; -struct io_zcrx_ifq; - struct io_wq_work_node { struct io_wq_work_node *next; }; @@ -394,7 +392,8 @@ struct io_ring_ctx { struct wait_queue_head poll_wq; struct io_restriction restrictions; - struct io_zcrx_ifq *ifq; + /* Stores zcrx object pointers of type struct io_zcrx_ifq */ + struct xarray zcrx_ctxs; u32 pers_next; struct xarray personalities; -- cgit v1.2.3 From 996c15bd30a9caf5d3a32414a28503f3389fc96e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 29 Mar 2025 22:14:20 +0100 Subject: wifi: cfg80211/mac80211: remove more 5/10 MHz code We still have ieee80211_chandef_rate_flags() and all that, but all the users seem pretty much broken (deflink, etc.) Remove all the code. It's been two years since last anyone even vaguely entertained the notion of looking at this and fixing it. Link: https://patch.msgid.link/20250329221419.c31da7ae8c84.I1a3a4b6008134d66ca75a5bdfc004f4594da8145@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 37 ------------------------------------- 1 file changed, 37 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index efbd79c67be2..6df4e17f1437 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1096,43 +1096,6 @@ int cfg80211_chandef_primary(const struct cfg80211_chan_def *chandef, **/ int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *chandef); -/** - * ieee80211_chanwidth_rate_flags - return rate flags for channel width - * @width: the channel width of the channel - * - * In some channel types, not all rates may be used - for example CCK - * rates may not be used in 5/10 MHz channels. - * - * Returns: rate flags which apply for this channel width - */ -static inline enum ieee80211_rate_flags -ieee80211_chanwidth_rate_flags(enum nl80211_chan_width width) -{ - switch (width) { - case NL80211_CHAN_WIDTH_5: - return IEEE80211_RATE_SUPPORTS_5MHZ; - case NL80211_CHAN_WIDTH_10: - return IEEE80211_RATE_SUPPORTS_10MHZ; - default: - break; - } - return 0; -} - -/** - * ieee80211_chandef_rate_flags - returns rate flags for a channel - * @chandef: channel definition for the channel - * - * See ieee80211_chanwidth_rate_flags(). - * - * Returns: rate flags which apply for this channel - */ -static inline enum ieee80211_rate_flags -ieee80211_chandef_rate_flags(struct cfg80211_chan_def *chandef) -{ - return ieee80211_chanwidth_rate_flags(chandef->width); -} - /** * ieee80211_chandef_max_power - maximum transmission power for the chandef * -- cgit v1.2.3 From 4876376988081d636a4c4e5f03a5556386b49087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 3 Apr 2025 20:39:28 +0200 Subject: Revert "mac80211: Dynamically set CoDel parameters per station" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 484a54c2e597dbc4ace79c1687022282905afba0. The CoDel parameter change essentially disables CoDel on slow stations, with some questionable assumptions, as Dave pointed out in [0]. Quoting from there: But here are my pithy comments as to why this part of mac80211 is so wrong... static void sta_update_codel_params(struct sta_info *sta, u32 thr) { - if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) { 1) sta->local->num_sta is the number of associated, rather than active, stations. "Active" stations in the last 50ms or so, might have been a better thing to use, but as most people have far more than that associated, we end up with really lousy codel parameters, all the time. Mistake numero uno! 2) The STA_SLOW_THRESHOLD was completely arbitrary in 2016. - sta->cparams.target = MS2TIME(50); This, by itself, was probably not too bad. 30ms might have been better, at the time, when we were battling powersave etc, but 20ms was enough, really, to cover most scenarios, even where we had low rate 2Ghz multicast to cope with. Even then, codel has a hard time finding any sane drop rate at all, with a target this high. - sta->cparams.interval = MS2TIME(300); But this was horrible, a total mistake, that is leading to codel being completely ineffective in almost any scenario on clients or APS. 100ms, even 80ms, here, would be vastly better than this insanity. I'm seeing 5+seconds of delay accumulated in a bunch of otherwise happily fq-ing APs.... 100ms of observed jitter during a flow is enough. Certainly (in 2016) there were interactions with powersave that I did not understand, and still don't, but if you are transmitting in the first place, powersave shouldn't be a problemmmm..... - sta->cparams.ecn = false; At the time we were pretty nervous about ecn, I'm kind of sanguine about it now, and reliably indicating ecn seems better than turning it off for any reason. [...] In production, on p2p wireless, I've had 8ms and 80ms for target and interval for years now, and it works great. I think Dave's arguments above are basically sound on the face of it, and various experimentation with tighter CoDel parameters in the OpenWrt community have show promising results[1]. So I don't think there's any reason to keep this parameter fiddling; hence this revert. [0] https://lore.kernel.org/linux-wireless/CAA93jw6NJ2cmLmMauz0xAgC2MGbBq6n0ZiZzAdkK0u4b+O2yXg@mail.gmail.com/ [1] https://forum.openwrt.org/t/reducing-multiplexing-latencies-still-further-in-wifi/133605/130 Suggested-By: Dave Taht In-memory-of: Dave Taht Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250403183930.197716-1-toke@toke.dk Signed-off-by: Johannes Berg --- include/net/mac80211.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c498f685d01f..5349df596157 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5346,22 +5346,6 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, struct ieee80211_tx_rate *dest, int max_rates); -/** - * ieee80211_sta_set_expected_throughput - set the expected tpt for a station - * - * Call this function to notify mac80211 about a change in expected throughput - * to a station. A driver for a device that does rate control in firmware can - * call this function when the expected throughput estimate towards a station - * changes. The information is used to tune the CoDel AQM applied to traffic - * going towards that station (which can otherwise be too aggressive and cause - * slow stations to starve). - * - * @pubsta: the station to set throughput for. - * @thr: the current expected throughput in kbps. - */ -void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, - u32 thr); - /** * ieee80211_tx_rate_update - transmit rate update callback * -- cgit v1.2.3 From fcc2d3e11bcc8f01d52a8c419f49f86ff8343b7c Mon Sep 17 00:00:00 2001 From: Karthikeyan Kathirvel Date: Mon, 21 Apr 2025 16:45:05 +0530 Subject: wifi: ieee80211: define beacon protection bit field An AP supporting Beacon Protection should set bit 84 in the extended capabilities IE (9.4.2.25 in the 802.11be D7 spec). So the *4th* bit of the 10th byte should be checked to figure out whether beacon protection is enabled or disabled. Signed-off-by: Karthikeyan Kathirvel Reviewed-by: Jeff Johnson Link: https://patch.msgid.link/20250421111505.3633992-1-karthikeyan.kathirvel@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 508d466de1cc..cbc3928aa504 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4087,6 +4087,9 @@ enum ieee80211_tdls_actioncode { /* Defines support for enhanced multi-bssid advertisement*/ #define WLAN_EXT_CAPA11_EMA_SUPPORT BIT(3) +/* Enable Beacon Protection */ +#define WLAN_EXT_CAPA11_BCN_PROTECT BIT(4) + /* TDLS specific payload type in the LLC/SNAP header */ #define WLAN_TDLS_SNAP_RFTYPE 0x2 -- cgit v1.2.3 From 53160d0edf7336acaed4c74c6af8549d87c92ae6 Mon Sep 17 00:00:00 2001 From: Ramasamy Kaliappan Date: Thu, 27 Mar 2025 10:43:17 +0530 Subject: wifi: cfg80211: Add support to get EMLSR capabilities of non-AP MLD The Enhanced multi-link single-radio (EMLSR) operation allows a non-AP MLD with multiple receive chains to listen on one or more EMLSR links when the corresponding non-AP STA(s) affiliated with the non-AP MLD is (are) in the awake state. [IEEE 802.11be-2024, (35.3.17 Enhanced multi-link single-radio (EMLSR) operation)] An MLD which intends to enable EMLSR operations will set the EML Capabilities Present subfield to 1 and shall set the EMLSR Support subfield in the Common Info field of the Basic Multi-Link element to 1 in all Management frames that include the Basic Multi-Link element except Authentication frames. EML capabilities contains information such as EML Transition timeout, Padding delay and Transition delay. These fields needs to updated to drivers to trigger EMLSR operation and to transmit and receive initial control frame and data frames. Add support to receive EML Capabilities subfield that non-AP MLD advertises during (re)association request and send it to underlying drivers during ADD/SET station. Signed-off-by: Ramasamy Kaliappan Signed-off-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20250327051320.3253783-2-quic_ramess@quicinc.com [accept EMLSR capabilities only for unassoc AP STA] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6df4e17f1437..87cb66fba621 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1733,6 +1733,9 @@ struct cfg80211_ttlm_params { * @supported_oper_classes_len: number of supported operating classes * @support_p2p_ps: information if station supports P2P PS mechanism * @airtime_weight: airtime scheduler weight for this station + * @eml_cap_present: Specifies if EML capabilities field (@eml_cap) is + * present/updated + * @eml_cap: EML capabilities of this station * @link_sta_params: link related params. */ struct station_parameters { @@ -1757,6 +1760,8 @@ struct station_parameters { u8 supported_oper_classes_len; int support_p2p_ps; u16 airtime_weight; + bool eml_cap_present; + u16 eml_cap; struct link_station_parameters link_sta_params; }; -- cgit v1.2.3 From 14e0f59a88cc22ceeb36e26b89b70b22292d23de Mon Sep 17 00:00:00 2001 From: Ramasamy Kaliappan Date: Thu, 27 Mar 2025 10:43:18 +0530 Subject: wifi: mac80211: update ML STA with EML capabilities When an AP and Non-AP MLD operates in EMLSR mode, EML capabilities advertised during Association contains information such as EMLSR transition delay, padding delay and transition timeout values. Save the EML capabilities information that is received during station addition and capabilities update in ieee80211_sta so that drivers can use it for triggering EMLSR operation. Signed-off-by: Ramasamy Kaliappan Signed-off-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20250327051320.3253783-3-quic_ramess@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5349df596157..c305ebfa6e45 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2488,6 +2488,7 @@ struct ieee80211_link_sta { * @max_amsdu_subframes: indicates the maximal number of MSDUs in a single * A-MSDU. Taken from the Extended Capabilities element. 0 means * unlimited. + * @eml_cap: EML capabilities of this MLO station * @cur: currently valid data as aggregated from the active links * For non MLO STA it will point to the deflink data. For MLO STA * ieee80211_sta_recalc_aggregates() must be called to update it. @@ -2522,6 +2523,7 @@ struct ieee80211_sta { bool mlo; bool spp_amsdu; u8 max_amsdu_subframes; + u16 eml_cap; struct ieee80211_sta_aggregates *cur; -- cgit v1.2.3 From 91ea0489dc97bfda72ed74f98ab66dc0ab4235c7 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Thu, 27 Mar 2025 10:43:19 +0530 Subject: wifi: ieee80211: Add helpers to fetch EMLSR delay and timeout values Add helpers to get EMLSR transition delay, padding delay and transition timeout values from EML capabilities field of Multi-link Element. Signed-off-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20250327051320.3253783-4-quic_ramess@quicinc.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 74 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index cbc3928aa504..15a87f522017 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -5618,6 +5618,80 @@ static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) return len >= fixed + elem_len; } +/** + * ieee80211_emlsr_pad_delay_in_us - Fetch the EMLSR Padding delay + * in microseconds + * @eml_cap: EML capabilities field value from common info field of + * the Multi-link element + * Return: the EMLSR Padding delay (in microseconds) encoded in the + * EML Capabilities field + */ + +static inline u32 ieee80211_emlsr_pad_delay_in_us(u16 eml_cap) +{ + /* IEEE Std 802.11be-2024 Table 9-417i—Encoding of the EMLSR + * Padding Delay subfield. + */ + u32 pad_delay = u16_get_bits(eml_cap, + IEEE80211_EML_CAP_EMLSR_PADDING_DELAY); + + if (!pad_delay || + pad_delay > IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US) + return 0; + + return 32 * (1 << (pad_delay - 1)); +} + +/** + * ieee80211_emlsr_trans_delay_in_us - Fetch the EMLSR Transition + * delay in microseconds + * @eml_cap: EML capabilities field value from common info field of + * the Multi-link element + * Return: the EMLSR Transition delay (in microseconds) encoded in the + * EML Capabilities field + */ + +static inline u32 ieee80211_emlsr_trans_delay_in_us(u16 eml_cap) +{ + /* IEEE Std 802.11be-2024 Table 9-417j—Encoding of the EMLSR + * Transition Delay subfield. + */ + u32 trans_delay = + u16_get_bits(eml_cap, + IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY); + + /* invalid values also just use 0 */ + if (!trans_delay || + trans_delay > IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US) + return 0; + + return 16 * (1 << (trans_delay - 1)); +} + +/** + * ieee80211_eml_trans_timeout_in_us - Fetch the EMLSR Transition + * timeout value in microseconds + * @eml_cap: EML capabilities field value from common info field of + * the Multi-link element + * Return: the EMLSR Transition timeout (in microseconds) encoded in + * the EML Capabilities field + */ + +static inline u32 ieee80211_eml_trans_timeout_in_us(u16 eml_cap) +{ + /* IEEE Std 802.11be-2024 Table 9-417m—Encoding of the + * Transition Timeout subfield. + */ + u8 timeout = u16_get_bits(eml_cap, + IEEE80211_EML_CAP_TRANSITION_TIMEOUT); + + /* invalid values also just use 0 */ + if (!timeout || timeout > IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU) + return 0; + + return 128 * (1 << (timeout - 1)); +} + #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ -- cgit v1.2.3 From 37523c3c47b3f3cc4c7d2ff47d28ee9ec99317c1 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Tue, 8 Apr 2025 11:44:59 -0700 Subject: wifi: nl80211: add link id of transmitted profile for MLO MBSSID During non-transmitted (nontx) profile configuration, interface index of the transmitted (tx) profile is used to retrieve the wireless device (wdev) associated with it. With MLO, this 'wdev' may be part of an MLD with more than one link, hence only interface index is not sufficient anymore to retrieve the correct tx profile. Add a new attribute to configure link id of tx profile. Signed-off-by: Rameshkumar Sundaram Co-developed-by: Muna Sinada Signed-off-by: Muna Sinada Co-developed-by: Aloka Dixit Signed-off-by: Aloka Dixit Link: https://patch.msgid.link/20250408184501.3715887-2-aloka.dixit@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 87cb66fba621..d1848dc8ec99 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1263,11 +1263,13 @@ struct cfg80211_crypto_settings { * struct cfg80211_mbssid_config - AP settings for multi bssid * * @tx_wdev: pointer to the transmitted interface in the MBSSID set + * @tx_link_id: link ID of the transmitted profile in an MLD. * @index: index of this AP in the multi bssid group. * @ema: set to true if the beacons should be sent out in EMA mode. */ struct cfg80211_mbssid_config { struct wireless_dev *tx_wdev; + u8 tx_link_id; u8 index; bool ema; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ddcc4cda74af..e9ccf43fe3c6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -8036,6 +8036,11 @@ enum nl80211_sar_specs_attrs { * Setting this flag is permitted only if the driver advertises EMA support * by setting wiphy->ema_max_profile_periodicity to non-zero. * + * @NL80211_MBSSID_CONFIG_ATTR_TX_LINK_ID: Link ID of the transmitted profile. + * This parameter is mandatory when NL80211_ATTR_MBSSID_CONFIG attributes + * are sent for a non-transmitted profile and if the transmitted profile + * is part of an MLD. For all other cases this parameter is unnecessary. + * * @__NL80211_MBSSID_CONFIG_ATTR_LAST: Internal * @NL80211_MBSSID_CONFIG_ATTR_MAX: highest attribute */ @@ -8047,6 +8052,7 @@ enum nl80211_mbssid_config_attributes { NL80211_MBSSID_CONFIG_ATTR_INDEX, NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX, NL80211_MBSSID_CONFIG_ATTR_EMA, + NL80211_MBSSID_CONFIG_ATTR_TX_LINK_ID, /* keep last */ __NL80211_MBSSID_CONFIG_ATTR_LAST, -- cgit v1.2.3 From f600832794c91d7021d7337104734246b02a2b86 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Tue, 8 Apr 2025 11:45:00 -0700 Subject: wifi: mac80211: restructure tx profile retrieval for MLO MBSSID For MBSSID, each vif (struct ieee80211_vif) stores another vif pointer for the transmitting profile of MBSSID set. This won't suffice for MLO as there may be multiple links, each of which can be part of different MBSSID sets. Hence the information needs to be stored per-link. Additionally, the transmitted profile itself may be part of an MLD hence storing vif will not suffice either. Fix MLO by storing an instance of struct ieee80211_bss_conf for each link. Modify following operations to reflect the above structure updates: - channel switch completion - BSS color change completion - Removing nontransmitted links in ieee80211_stop_mbssid() - drivers retrieving the transmitted link for beacon templates. Signed-off-by: Rameshkumar Sundaram Co-developed-by: Muna Sinada Signed-off-by: Muna Sinada Co-developed-by: Aloka Dixit Signed-off-by: Aloka Dixit Link: https://patch.msgid.link/20250408184501.3715887-3-aloka.dixit@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c305ebfa6e45..fdafc37d17cc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -682,6 +682,9 @@ struct ieee80211_parsed_tpe { * responder functionality. * @ftmr_params: configurable lci/civic parameter when enabling FTM responder. * @nontransmitted: this BSS is a nontransmitted BSS profile + * @tx_bss_conf: Pointer to the BSS configuration of transmitting interface + * if MBSSID is enabled. This pointer is RCU-protected due to CSA finish + * and BSS color change flows accessing it. * @transmitter_bssid: the address of transmitter AP * @bssid_index: index inside the multiple BSSID set * @bssid_indicator: 2^bssid_indicator is the maximum number of APs in set @@ -804,6 +807,7 @@ struct ieee80211_bss_conf { struct ieee80211_ftm_responder_params *ftmr_params; /* Multiple BSSID data */ bool nontransmitted; + struct ieee80211_bss_conf __rcu *tx_bss_conf; u8 transmitter_bssid[ETH_ALEN]; u8 bssid_index; u8 bssid_indicator; @@ -2023,7 +2027,6 @@ enum ieee80211_neg_ttlm_res { * @txq: the multicast data TX queue * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see * &enum ieee80211_offload_flags. - * @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled. */ struct ieee80211_vif { enum nl80211_iftype type; @@ -2052,8 +2055,6 @@ struct ieee80211_vif { bool probe_req_reg; bool rx_mcast_action_reg; - struct ieee80211_vif *mbssid_tx_vif; - /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; -- cgit v1.2.3 From 55df7c0c62c1727c399fc82150b7f72bee47d4c6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 23 Apr 2025 12:21:38 +1000 Subject: drm/ttm/xe: drop unused force_alloc flag This flag used to be used in the old memory tracking code, that code got migrated into the vmwgfx driver[1], and then got removed from the tree[2], but this piece got left behind. [1] f07069da6b4c ("drm/ttm: move memory accounting into vmwgfx v4") [2] 8aadeb8ad874 ("drm/vmwgfx: Remove the dedicated memory accounting") Cleanup the dead code. Reviewed-by: Matthew Brost Signed-off-by: Dave Airlie --- include/drm/ttm/ttm_bo.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index 903cd1030110..cf027558b6db 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -172,7 +172,6 @@ struct ttm_bo_kmap_obj { * @gfp_retry_mayfail: Set the __GFP_RETRY_MAYFAIL when allocation pages. * @allow_res_evict: Allow eviction of reserved BOs. Can be used when multiple * BOs share the same reservation object. - * @force_alloc: Don't check the memory account during suspend or CPU page * faults. Should only be used by TTM internally. * @resv: Reservation object to allow reserved evictions with. * @bytes_moved: Statistics on how many bytes have been moved. @@ -185,7 +184,6 @@ struct ttm_operation_ctx { bool no_wait_gpu; bool gfp_retry_mayfail; bool allow_res_evict; - bool force_alloc; struct dma_resv *resv; uint64_t bytes_moved; }; -- cgit v1.2.3 From bfa4477751e9909bb121eca860f44d1b4259d871 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 22 Apr 2025 17:05:31 -0600 Subject: PM: runtime: Define pm_runtime_put cleanup helper Define a cleanup helper for use with __free to automatically drop the device usage count when the pointer goes out of scope. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki Link: https://patch.msgid.link/20250422230534.2295291-2-alex.williamson@redhat.com --- include/linux/pm_runtime.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 7fb5a459847e..69d4b2929ee6 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -466,6 +466,8 @@ static inline int pm_runtime_put(struct device *dev) return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } +DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T)) + /** * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. -- cgit v1.2.3 From 52358dd63e348c3b6c488acc105be1aeda8fb923 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 18 Apr 2025 11:04:01 +0200 Subject: net: phy: remove function stubs All callers of these functions depend on PHYLIB or select it directly or indirectly by selecting PHYLINK. Stubs make sense for optional functionality, but that's not the case here. MDIO_XGENE usually is selected by NET_XGENE which also selects PHYLIB. Add a dependency to PHYLIB nevertheless, in order not to break randconfig builds. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/f7a69a1f-60e9-4ac0-8b7c-481e0cc850e7@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 37 ------------------------------------- 1 file changed, 37 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 066a28a4b64b..3beaf225ee88 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1753,7 +1753,6 @@ int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum, struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids); -#if IS_ENABLED(CONFIG_PHYLIB) int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id); struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode); struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode); @@ -1761,42 +1760,6 @@ struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); void phy_device_free(struct phy_device *phydev); -#else -static inline int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id) -{ - return 0; -} -static inline -struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode) -{ - return 0; -} - -static inline -struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode) -{ - return NULL; -} - -static inline -struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode) -{ - return NULL; -} - -static inline -struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45) -{ - return NULL; -} - -static inline int phy_device_register(struct phy_device *phy) -{ - return 0; -} - -static inline void phy_device_free(struct phy_device *phydev) { } -#endif /* CONFIG_PHYLIB */ void phy_device_remove(struct phy_device *phydev); int phy_get_c45_ids(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); -- cgit v1.2.3 From 834d97843e3bca86f17cc517885f54f3433427b2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Apr 2025 17:03:53 -0700 Subject: ipv6: Protect fib6_link_table() with spinlock. We will get rid of RTNL from RTM_NEWROUTE and SIOCADDRT. If the request specifies a new table ID, fib6_new_table() is called to create a new routing table. Two concurrent requests could specify the same table ID, so we need a lock to protect net->ipv6.fib_table_hash[h]. Let's add a spinlock to protect the hash bucket linkage. Signed-off-by: Kuniyuki Iwashima Acked-by: Paolo Abeni Link: https://patch.msgid.link/20250418000443.43734-13-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- include/net/netns/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 5f2cfd84570a..47dc70d8100a 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -72,6 +72,7 @@ struct netns_ipv6 { struct rt6_statistics *rt6_stats; struct timer_list ip6_fib_timer; struct hlist_head *fib_table_hash; + spinlock_t fib_table_hash_lock; struct fib6_table *fib6_main_tbl; struct list_head fib6_walkers; rwlock_t fib6_walker_lock; -- cgit v1.2.3 From accb46b56bc3bc99ee69ba18b06ca60266ad6fca Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Apr 2025 17:03:54 -0700 Subject: ipv6: Defer fib6_purge_rt() in fib6_add_rt2node() to fib6_add(). The next patch adds per-nexthop spinlock which protects nh->f6i_list. When rt->nh is not NULL, fib6_add_rt2node() will be called under the lock. fib6_add_rt2node() could call fib6_purge_rt() for another route, which could holds another nexthop lock. Then, deadlock could happen between two nexthops. Let's defer fib6_purge_rt() after fib6_add_rt2node(). Signed-off-by: Kuniyuki Iwashima Acked-by: Paolo Abeni Link: https://patch.msgid.link/20250418000443.43734-14-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- include/net/ip6_fib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 7c87873ae211..88b0dd4d8e09 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -198,6 +198,7 @@ struct fib6_info { fib6_destroying:1, unused:4; + struct list_head purge_link; struct rcu_head rcu; struct nexthop *nh; struct fib6_nh fib6_nh[]; -- cgit v1.2.3 From 081efd18326e353c6fbfdeff903a83edde953f72 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Apr 2025 17:03:55 -0700 Subject: ipv6: Protect nh->f6i_list with spinlock and flag. We will get rid of RTNL from RTM_NEWROUTE and SIOCADDRT. Then, we may be going to add a route tied to a dying nexthop. The nexthop itself is not freed during the RCU grace period, but if we link a route after __remove_nexthop_fib() is called for the nexthop, the route will be leaked. To avoid the race between IPv6 route addition under RCU vs nexthop deletion under RTNL, let's add a dead flag and protect it and nh->f6i_list with a spinlock. __remove_nexthop_fib() acquires the nexthop's spinlock and sets false to nh->dead, then calls ip6_del_rt() for the linked route one by one without the spinlock because fib6_purge_rt() acquires it later. While adding an IPv6 route, fib6_add() acquires the nexthop lock and checks the dead flag just before inserting the route. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250418000443.43734-15-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- include/net/nexthop.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index d9fb44e8b321..572e69cda476 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -152,6 +152,8 @@ struct nexthop { u8 protocol; /* app managing this nh */ u8 nh_flags; bool is_group; + bool dead; + spinlock_t lock; /* protect dead and f6i_list */ refcount_t refcnt; struct rcu_head rcu; -- cgit v1.2.3 From fe7f7ac8e0c708446ff017453add769ffc15deed Mon Sep 17 00:00:00 2001 From: Terry Junge Date: Wed, 12 Mar 2025 15:23:31 -0700 Subject: HID: usbhid: Eliminate recurrent out-of-bounds bug in usbhid_parse() Update struct hid_descriptor to better reflect the mandatory and optional parts of the HID Descriptor as per USB HID 1.11 specification. Note: the kernel currently does not parse any optional HID class descriptors, only the mandatory report descriptor. Update all references to member element desc[0] to rpt_desc. Add test to verify bLength and bNumDescriptors values are valid. Replace the for loop with direct access to the mandatory HID class descriptor member for the report descriptor. This eliminates the possibility of getting an out-of-bounds fault. Add a warning message if the HID descriptor contains any unsupported optional HID class descriptors. Reported-by: syzbot+c52569baf0c843f35495@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c52569baf0c843f35495 Fixes: f043bfc98c19 ("HID: usbhid: fix out-of-bounds bug") Cc: stable@vger.kernel.org Signed-off-by: Terry Junge Reviewed-by: Michael Kelley Signed-off-by: Jiri Kosina --- include/linux/hid.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index ef9a90ca0fbd..daae1d6d11a7 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -740,8 +740,9 @@ struct hid_descriptor { __le16 bcdHID; __u8 bCountryCode; __u8 bNumDescriptors; + struct hid_class_descriptor rpt_desc; - struct hid_class_descriptor desc[1]; + struct hid_class_descriptor opt_descs[]; } __attribute__ ((packed)); #define HID_DEVICE(b, g, ven, prod) \ -- cgit v1.2.3 From a058002358b7aaf14674b2a0daa5194bfa5720a1 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Thu, 10 Apr 2025 12:59:27 +0530 Subject: HID: quirks: Add HID_QUIRK_IGNORE_MOUSE quirk Some USB HID mice have drivers both in HID as well as a separate USB driver. The already existing hid_mouse_ignore_list in hid-quirks manages this, but is not yet configurable by usbhid.quirks, unlike all others like hid_ignore_list. Thus in some HID devices, where the vendor provides USB drivers only for the mouse and lets keyboard handled by the generic hid drivers, presence of such a quirk prevents the user from compiling hid core again to add the device to the table. Signed-off-by: Aditya Garg Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index daae1d6d11a7..a1305210b2fd 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -357,6 +357,7 @@ struct hid_item { * | @HID_QUIRK_INPUT_PER_APP: * | @HID_QUIRK_X_INVERT: * | @HID_QUIRK_Y_INVERT: + * | @HID_QUIRK_IGNORE_MOUSE: * | @HID_QUIRK_SKIP_OUTPUT_REPORTS: * | @HID_QUIRK_SKIP_OUTPUT_REPORT_ID: * | @HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP: @@ -382,6 +383,7 @@ struct hid_item { #define HID_QUIRK_INPUT_PER_APP BIT(11) #define HID_QUIRK_X_INVERT BIT(12) #define HID_QUIRK_Y_INVERT BIT(13) +#define HID_QUIRK_IGNORE_MOUSE BIT(14) #define HID_QUIRK_SKIP_OUTPUT_REPORTS BIT(16) #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID BIT(17) #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18) -- cgit v1.2.3 From 7a3be00771aa9786c7bb4cdb0ee36fee45f67d69 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 23 Apr 2025 15:48:40 +0530 Subject: OPP: Return opp from dev_pm_opp_get() For convenience of users, return back the pointer to the opp from dev_pm_opp_get(), so they can do: opp = dev_pm_opp_get(tmp_opp); No intentional functional impact. Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index c247317aae38..5e4c3428b139 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -161,7 +161,7 @@ struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, unsigned int *bw, int index); -void dev_pm_opp_get(struct dev_pm_opp *opp); +struct dev_pm_opp *dev_pm_opp_get(struct dev_pm_opp *opp); void dev_pm_opp_put(struct dev_pm_opp *opp); int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp); @@ -345,7 +345,10 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } -static inline void dev_pm_opp_get(struct dev_pm_opp *opp) {} +static inline struct dev_pm_opp *dev_pm_opp_get(struct dev_pm_opp *opp) +{ + return opp; +} static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {} -- cgit v1.2.3 From ead694941686345bfd3f95100d889191cb9e3cda Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 23 Apr 2025 15:48:40 +0530 Subject: OPP: Return opp_table from dev_pm_opp_get_opp_table_ref() For convenience of users, return back the pointer to the opp_table from dev_pm_opp_get_opp_table_ref(), so they can do: opp_table = dev_pm_opp_get_opp_table_ref(tmp_table); No intentional functional impact. Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 5e4c3428b139..0deddfa91aca 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -100,7 +100,7 @@ struct dev_pm_opp_data { #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); -void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table); +struct opp_table *dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table); void dev_pm_opp_put_opp_table(struct opp_table *opp_table); unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index); @@ -207,7 +207,10 @@ static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device * return ERR_PTR(-EOPNOTSUPP); } -static inline void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table) {} +static inline struct opp_table *dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table) +{ + return opp_table; +} static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {} -- cgit v1.2.3 From 57493a145552fe8a9a926f25b14c324c441ca358 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 11 Apr 2025 01:55:20 +0200 Subject: drm: drv: implement __drm_dev_alloc() In the Rust DRM device abstraction we need to allocate a struct drm_device. Currently, there are two options, the deprecated drm_dev_alloc() (which does not support subclassing) and devm_drm_dev_alloc(). The latter supports subclassing, but also manages the initial reference through devres for the parent device. In Rust we want to conform with the subclassing pattern, but do not want to get the initial reference managed for us, since Rust has its own, idiomatic ways to properly deal with it. There are two options to achieve this. 1) Allocate the memory ourselves with a KBox. 2) Implement __drm_dev_alloc(), which supports subclassing, but is unmanged. While (1) would be possible, it would be cumbersome, since it would require exporting drm_dev_init() and drmm_add_final_kfree(). Hence, go with option (2) and implement __drm_dev_alloc(). Reviewed-by: Maxime Ripard Reviewed-by: Alyssa Rosenzweig Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250410235546.43736-2-dakr@kernel.org Signed-off-by: Danilo Krummrich --- include/drm/drm_drv.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index a43d707b5f36..63b51942d606 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -473,6 +473,11 @@ drmm_cgroup_register_region(struct drm_device *dev, struct drm_device *drm_dev_alloc(const struct drm_driver *driver, struct device *parent); + +void *__drm_dev_alloc(struct device *parent, + const struct drm_driver *driver, + size_t size, size_t offset); + int drm_dev_register(struct drm_device *dev, unsigned long flags); void drm_dev_unregister(struct drm_device *dev); -- cgit v1.2.3 From 63a9362c7172c74f6df19195c3e7491d3ec123ca Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 14 Apr 2025 16:34:24 +0800 Subject: ASoC: codec: tpa6130a2: Remove tpa6130a2_platform_data There is no in-tree user to create the device using platform data 'struct tpa6130a2_platform_data', so drop the dead code. Signed-off-by: Peng Fan Link: https://patch.msgid.link/20250414-asoc-tpa6130a2-v1-2-5f4052e656a0@nxp.com Signed-off-by: Mark Brown --- include/sound/tpa6130a2-plat.h | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 include/sound/tpa6130a2-plat.h (limited to 'include') diff --git a/include/sound/tpa6130a2-plat.h b/include/sound/tpa6130a2-plat.h deleted file mode 100644 index a60930e36e93..000000000000 --- a/include/sound/tpa6130a2-plat.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * TPA6130A2 driver platform header - * - * Copyright (C) Nokia Corporation - * - * Author: Peter Ujfalusi - */ - -#ifndef TPA6130A2_PLAT_H -#define TPA6130A2_PLAT_H - -struct tpa6130a2_platform_data { - int power_gpio; -}; - -#endif -- cgit v1.2.3 From 0128816c42b52c6ee339718621aeda85855cd3be Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Thu, 10 Apr 2025 18:51:43 +0800 Subject: genirq: Fix typo in IRQ_NOTCONNECTED comment Fix a minor typo in the comment for IRQ_NOTCONNECTED: "distingiush" is corrected to "distinguish". Signed-off-by: Cheng-Yang Chou Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250410105144.214849-1-yphbchou0911@gmail.com --- include/linux/interrupt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c782a74d2a30..51b6484c0493 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -140,7 +140,7 @@ extern irqreturn_t no_action(int cpl, void *dev_id); /* * If a (PCI) device interrupt is not connected we set dev->irq to * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we - * can distingiush that case from other error returns. + * can distinguish that case from other error returns. * * 0x80000000 is guaranteed to be outside the available range of interrupts * and easy to distinguish from other possible incorrect values. -- cgit v1.2.3 From 49916e22d9530d6cf027e635a5d824c7d698d67f Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 18 Apr 2025 21:08:03 +0100 Subject: timers: Remove unused __round_jiffies(_up) Remove two trivial but long unused functions. __round_jiffies() has been unused since 2008's commit 9c133c469d38 ("Add round_jiffies_up and related routines") __round_jiffies_up() has been unused since 2019's commit 7ae3f6e130e8 ("powerpc/watchdog: Use hrtimers for per-CPU heartbeat") Remove them. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250418200803.427911-1-linux@treblig.org --- include/linux/timer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 10596d7c3a34..e17aac74b5b3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -172,12 +172,10 @@ extern void init_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); -unsigned long __round_jiffies(unsigned long j, int cpu); unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); unsigned long round_jiffies_relative(unsigned long j); -unsigned long __round_jiffies_up(unsigned long j, int cpu); unsigned long __round_jiffies_up_relative(unsigned long j, int cpu); unsigned long round_jiffies_up(unsigned long j); unsigned long round_jiffies_up_relative(unsigned long j); -- cgit v1.2.3 From 4094040b1a133206ed893da2cf5e17cc22f7ca7c Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 8 Apr 2025 11:45:59 +0300 Subject: mfd: rohm-bd96801: Support ROHM BD96802 The ROHM BD96802 PMIC looks from software point of view a lot like ROHM BD96801 PMIC. Just with reduced number of voltage rails. Both PMICs provide two physical IRQ lines referred as INTB and ERRB and contain blocks implementing regulator controls and a weatchdog. Hence it makes sense to use same MFD core for both PMICs. Add support for ROHM BD96802 scalable companion PMIC to the BD96801 core driver. Signed-off-by: Matti Vaittinen Acked-by: Conor Dooley Link: https://lore.kernel.org/r/05957d194425a79a4f35f287695c3d9ca2ed1ae2.1744090658.git.mazziesaccount@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/rohm-bd96801.h | 2 ++ include/linux/mfd/rohm-bd96802.h | 74 ++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/rohm-generic.h | 1 + 3 files changed, 77 insertions(+) create mode 100644 include/linux/mfd/rohm-bd96802.h (limited to 'include') diff --git a/include/linux/mfd/rohm-bd96801.h b/include/linux/mfd/rohm-bd96801.h index e2d9e10b6364..68c8ac8ad409 100644 --- a/include/linux/mfd/rohm-bd96801.h +++ b/include/linux/mfd/rohm-bd96801.h @@ -40,7 +40,9 @@ * INTB status registers are at range 0x5c ... 0x63 */ #define BD96801_REG_INT_SYS_ERRB1 0x52 +#define BD96801_REG_INT_BUCK2_ERRB 0x56 #define BD96801_REG_INT_SYS_INTB 0x5c +#define BD96801_REG_INT_BUCK2_INTB 0x5e #define BD96801_REG_INT_LDO7_INTB 0x63 /* MASK registers */ diff --git a/include/linux/mfd/rohm-bd96802.h b/include/linux/mfd/rohm-bd96802.h new file mode 100644 index 000000000000..bf4b77944edf --- /dev/null +++ b/include/linux/mfd/rohm-bd96802.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2025 ROHM Semiconductors + * + * The digital interface of trhe BD96802 PMIC is a reduced version of the + * BD96801. Hence the BD96801 definitions are used for registers and masks + * while this header only holds the IRQ definitions - mainly to avoid gaps in + * IRQ numbers caused by the lack of some BUCKs / LDOs and their respective + * IRQs. + */ + +#ifndef __LINUX_MFD_BD96802_H__ +#define __LINUX_MFD_BD96802_H__ + +/* ERRB IRQs */ +enum { + /* Reg 0x52, 0x53, 0x54 - ERRB system IRQs */ + BD96802_OTP_ERR_STAT, + BD96802_DBIST_ERR_STAT, + BD96802_EEP_ERR_STAT, + BD96802_ABIST_ERR_STAT, + BD96802_PRSTB_ERR_STAT, + BD96802_DRMOS1_ERR_STAT, + BD96802_DRMOS2_ERR_STAT, + BD96802_SLAVE_ERR_STAT, + BD96802_VREF_ERR_STAT, + BD96802_TSD_ERR_STAT, + BD96802_UVLO_ERR_STAT, + BD96802_OVLO_ERR_STAT, + BD96802_OSC_ERR_STAT, + BD96802_PON_ERR_STAT, + BD96802_POFF_ERR_STAT, + BD96802_CMD_SHDN_ERR_STAT, + BD96802_INT_SHDN_ERR_STAT, + + /* Reg 0x55 BUCK1 ERR IRQs */ + BD96802_BUCK1_PVIN_ERR_STAT, + BD96802_BUCK1_OVP_ERR_STAT, + BD96802_BUCK1_UVP_ERR_STAT, + BD96802_BUCK1_SHDN_ERR_STAT, + + /* Reg 0x56 BUCK2 ERR IRQs */ + BD96802_BUCK2_PVIN_ERR_STAT, + BD96802_BUCK2_OVP_ERR_STAT, + BD96802_BUCK2_UVP_ERR_STAT, + BD96802_BUCK2_SHDN_ERR_STAT, +}; + +/* INTB IRQs */ +enum { + /* Reg 0x5c (System INTB) */ + BD96802_TW_STAT, + BD96802_WDT_ERR_STAT, + BD96802_I2C_ERR_STAT, + BD96802_CHIP_IF_ERR_STAT, + + /* Reg 0x5d (BUCK1 INTB) */ + BD96802_BUCK1_OCPH_STAT, + BD96802_BUCK1_OCPL_STAT, + BD96802_BUCK1_OCPN_STAT, + BD96802_BUCK1_OVD_STAT, + BD96802_BUCK1_UVD_STAT, + BD96802_BUCK1_TW_CH_STAT, + + /* Reg 0x5e (BUCK2 INTB) */ + BD96802_BUCK2_OCPH_STAT, + BD96802_BUCK2_OCPL_STAT, + BD96802_BUCK2_OCPN_STAT, + BD96802_BUCK2_OVD_STAT, + BD96802_BUCK2_UVD_STAT, + BD96802_BUCK2_TW_CH_STAT, +}; + +#endif diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index e7d4e6afe388..11b86f9129e3 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -17,6 +17,7 @@ enum rohm_chip_type { ROHM_CHIP_TYPE_BD71837, ROHM_CHIP_TYPE_BD71847, ROHM_CHIP_TYPE_BD96801, + ROHM_CHIP_TYPE_BD96802, ROHM_CHIP_TYPE_AMOUNT }; -- cgit v1.2.3 From 6a309b489215f705c20cb4ed7f85d9c16f768e55 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 8 Apr 2025 11:46:36 +0300 Subject: mfd: bd96801: Support ROHM BD96805 The ROHM BD96805 is from the software perspective almost identical to the ROHM BD96801. The main difference is different voltage tuning ranges. Add support differentiating these PMICs based on the compatible, and invoking the regulator driver with correct IC type. Signed-off-by: Matti Vaittinen Acked-by: Conor Dooley Link: https://lore.kernel.org/r/8680097dc083f191bea56d3ac7c6fe5c005644ec.1744090658.git.mazziesaccount@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/rohm-generic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 11b86f9129e3..867acf5baefc 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -18,6 +18,7 @@ enum rohm_chip_type { ROHM_CHIP_TYPE_BD71847, ROHM_CHIP_TYPE_BD96801, ROHM_CHIP_TYPE_BD96802, + ROHM_CHIP_TYPE_BD96805, ROHM_CHIP_TYPE_AMOUNT }; -- cgit v1.2.3 From fecc18a9f59ce9c36eb3622ae77bff5fa5c6d976 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 8 Apr 2025 11:47:10 +0300 Subject: mfd: bd96801: Support ROHM BD96806 The ROHM BD96806 is from the software perspective almost identical to the ROHM BD96802. The main difference is different voltage tuning ranges. Add support differentiating these PMICs based on the compatible, and invoking the regulator driver with correct IC type. Signed-off-by: Matti Vaittinen Acked-by: Conor Dooley Link: https://lore.kernel.org/r/ccc95ae33613648fdcba08915777d945412ac5c4.1744090658.git.mazziesaccount@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/rohm-generic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 867acf5baefc..579e8dcfcca4 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -19,6 +19,7 @@ enum rohm_chip_type { ROHM_CHIP_TYPE_BD96801, ROHM_CHIP_TYPE_BD96802, ROHM_CHIP_TYPE_BD96805, + ROHM_CHIP_TYPE_BD96806, ROHM_CHIP_TYPE_AMOUNT }; -- cgit v1.2.3 From 7f8ce4d88b42fcbd3350370ec4d02e00979fc5a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 17 Apr 2025 20:16:11 +0200 Subject: pwm: Fix various formatting issues in kernel-doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Return and (where interesting) Context sections, fix some formatting and drop documenting the internal function __pwm_apply(). Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20250417181611.2693599-2-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index bf0469b2201d..63a17d2b4ec8 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -218,6 +218,8 @@ static inline void pwm_init_state(const struct pwm_device *pwm, * * pwm_get_state(pwm, &state); * duty = pwm_get_relative_duty_cycle(&state, 100); + * + * Returns: rounded relative duty cycle multiplied by @scale */ static inline unsigned int pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale) @@ -244,8 +246,8 @@ pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale) * pwm_set_relative_duty_cycle(&state, 50, 100); * pwm_apply_might_sleep(pwm, &state); * - * This functions returns -EINVAL if @duty_cycle and/or @scale are - * inconsistent (@scale == 0 or @duty_cycle > @scale). + * Returns: 0 on success or ``-EINVAL`` if @duty_cycle and/or @scale are + * inconsistent (@scale == 0 or @duty_cycle > @scale) */ static inline int pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, @@ -351,7 +353,7 @@ struct pwm_chip { * pwmchip_supports_waveform() - checks if the given chip supports waveform callbacks * @chip: The pwm_chip to test * - * Returns true iff the pwm chip support the waveform functions like + * Returns: true iff the pwm chip support the waveform functions like * pwm_set_waveform_might_sleep() and pwm_round_waveform_might_sleep() */ static inline bool pwmchip_supports_waveform(struct pwm_chip *chip) -- cgit v1.2.3 From 5c5c32d7abd970f18ffca89eb6ada399bce496e5 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 24 Apr 2025 08:16:23 +0800 Subject: scripts/kernel-doc.py: don't create *.pyc files As reported by Andy, kernel-doc.py is creating a __pycache__ directory at build time. Disable creation of __pycache__ for the libraries used by kernel-doc.py, when excecuted via the build system or via scripts/find-unused-docs.sh. Reported-by: Andy Shevchenko Closes: https://lore.kernel.org/linux-doc/Z_zYXAJcTD-c3xTe@black.fi.intel.com/ Signed-off-by: Mauro Carvalho Chehab Tested-by: Andy Shevchenko Signed-off-by: Jonathan Corbet Message-ID: <158b962ed7cd104f7bbfe69f499ec1cc378864db.1745453655.git.mchehab+huawei@kernel.org> --- include/drm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/Makefile b/include/drm/Makefile index a7bd15d2803e..1df6962556ef 100644 --- a/include/drm/Makefile +++ b/include/drm/Makefile @@ -11,7 +11,7 @@ always-$(CONFIG_DRM_HEADER_TEST) += \ quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) cmd_hdrtest = \ $(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \ - $(srctree)/scripts/kernel-doc -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ + PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ touch $@ $(obj)/%.hdrtest: $(src)/%.h FORCE -- cgit v1.2.3 From 309d28576f0a23931a36bf67324868448f79a77e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 27 Mar 2025 12:39:56 -0500 Subject: KVM: SVM: Fix SNP AP destroy race with VMRUN An AP destroy request for a target vCPU is typically followed by an RMPADJUST to remove the VMSA attribute from the page currently being used as the VMSA for the target vCPU. This can result in a vCPU that is about to VMRUN to exit with #VMEXIT_INVALID. This usually does not happen as APs are typically sitting in HLT when being destroyed and therefore the vCPU thread is not running at the time. However, if HLT is allowed inside the VM, then the vCPU could be about to VMRUN when the VMSA attribute is removed from the VMSA page, resulting in a #VMEXIT_INVALID when the vCPU actually issues the VMRUN and causing the guest to crash. An RMPADJUST against an in-use (already running) VMSA results in a #NPF for the vCPU issuing the RMPADJUST, so the VMSA attribute cannot be changed until the VMRUN for target vCPU exits. The Qemu command line option '-overcommit cpu-pm=on' is an example of allowing HLT inside the guest. Update the KVM_REQ_UPDATE_PROTECTED_GUEST_STATE event to include the KVM_REQUEST_WAIT flag. The kvm_vcpu_kick() function will not wait for requests to be honored, so create kvm_make_request_and_kick() that will add a new event request and honor the KVM_REQUEST_WAIT flag. This will ensure that the target vCPU sees the AP destroy request before returning to the initiating vCPU should the target vCPU be in guest mode. Fixes: e366f92ea99e ("KVM: SEV: Support SEV-SNP AP Creation NAE event") Signed-off-by: Tom Lendacky Link: https://lore.kernel.org/r/fe2c885bf35643dd224e91294edb6777d5df23a4.1743097196.git.thomas.lendacky@amd.com [sean: add a comment explaining the use of smp_send_reschedule()] Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1dedc421b3e3..c685fb417e92 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1505,7 +1505,16 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); -void kvm_vcpu_kick(struct kvm_vcpu *vcpu); + +#ifndef CONFIG_S390 +void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait); + +static inline void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + __kvm_vcpu_kick(vcpu, false); +} +#endif + int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); @@ -2253,6 +2262,14 @@ static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) __kvm_make_request(req, vcpu); } +#ifndef CONFIG_S390 +static inline void kvm_make_request_and_kick(int req, struct kvm_vcpu *vcpu) +{ + kvm_make_request(req, vcpu); + __kvm_vcpu_kick(vcpu, req & KVM_REQUEST_WAIT); +} +#endif + static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) { return READ_ONCE(vcpu->requests); -- cgit v1.2.3 From fa4b8b3e3a11765a42066f23db7100c09a661c25 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 23 Apr 2025 10:17:24 -0700 Subject: drm/print: Add drm_coredump_printer_is_full Add drm_coredump_printer_is_full which indicates if a drm printer's output is full. Useful to short circuit coredump printing once printer's output is full. v2: - s/drm_printer_is_full/drm_coredump_printer_is_full (Jani) v3: - Bail if not a coredump printer (Michal) Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250423171725.597955-4-matthew.brost@intel.com --- include/drm/drm_print.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index f31eba1c7cab..ab017b05e175 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -344,6 +344,26 @@ drm_coredump_printer(struct drm_print_iterator *iter) return p; } +/** + * drm_coredump_printer_is_full() - DRM coredump printer output is full + * @p: DRM coredump printer + * + * DRM printer output is full, useful to short circuit coredump printing once + * printer is full. + * + * RETURNS: + * True if DRM coredump printer output buffer is full, False otherwise + */ +static inline bool drm_coredump_printer_is_full(struct drm_printer *p) +{ + struct drm_print_iterator *iterator = p->arg; + + if (p->printfn != __drm_printfn_coredump) + return true; + + return !iterator->remain; +} + /** * drm_seq_file_printer - construct a &drm_printer that outputs to &seq_file * @f: the &struct seq_file to output to -- cgit v1.2.3 From 39144062ea335495d659b08c9e3133ab746a0b1b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 23 Apr 2025 00:51:47 +0100 Subject: rxrpc: Remove deadcode Remove three functions that are no longer used. rxrpc_get_txbuf() last use was removed by 2020's commit 5e6ef4f1017c ("rxrpc: Make the I/O thread take over the call and local processor work") rxrpc_kernel_get_epoch() last use was removed by 2020's commit 44746355ccb1 ("afs: Don't get epoch from a server because it may be ambiguous") rxrpc_kernel_set_max_life() last use was removed by 2023's commit db099c625b13 ("rxrpc: Fix timeout of a call that hasn't yet been granted a channel") Both of the rxrpc_kernel_* functions were documented. Remove that documentation as well as the code. Signed-off-by: Dr. David Alan Gilbert Acked-by: David Howells Link: https://patch.msgid.link/20250422235147.146460-1-linux@treblig.org Signed-off-by: Jakub Kicinski --- include/net/af_rxrpc.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index f15341594cc8..0fb4c41c9bbf 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -88,9 +88,6 @@ int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx, unsigned int debug_id); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); -u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); -void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *, - unsigned long); int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val); int rxrpc_sock_set_security_keyring(struct sock *, struct key *); -- cgit v1.2.3 From bc2550b4e195754fbb24aac1f012d3dd9e3b4edc Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Wed, 23 Apr 2025 13:43:33 +0100 Subject: tcp: fastopen: note that a child socket was created tcp: fastopen: note that a child socket was created This uses up the last bit in a field of tcp_sock. Signed-off-by: Jeremy Harris Reviewed-by: Eric Dumazet Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20250423124334.4916-2-jgh@exim.org Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1669d95bb0f9..a8af71623ba7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -385,7 +385,8 @@ struct tcp_sock { syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_fastopen_ch:1, /* Active TFO re-enabling probe */ - syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + syn_fastopen_child:1; /* created TFO passive child socket */ u8 keepalive_probes; /* num of allowed keep alive probes */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ -- cgit v1.2.3 From 2b13042d3636327eb50c8a0ee06f629d52d1b8fb Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Wed, 23 Apr 2025 13:43:34 +0100 Subject: tcp: fastopen: pass TFO child indication through getsockopt tcp: fastopen: pass TFO child indication through getsockopt Note that this uses up the last bit of a field in struct tcp_info Signed-off-by: Jeremy Harris Reviewed-by: Eric Dumazet Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20250423124334.4916-3-jgh@exim.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index dc8fdc80e16b..bdac8c42fa82 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -184,6 +184,7 @@ enum tcp_fastopen_client_fail { #define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ #define TCPI_OPT_USEC_TS 64 /* usec timestamps */ +#define TCPI_OPT_TFO_CHILD 128 /* child from a Fast Open option on SYN */ /* * Sender's congestion state indicating normal or abnormal situations -- cgit v1.2.3 From d57ee99831e336576359beb26e2b140511c99106 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Apr 2025 17:08:08 +0200 Subject: net: ethernet: mtk_wed: annotate RCU release in attach() There are some sparse warnings in wifi, and it seems that it's actually possible to annotate a function pointer with __releases(), making the sparse warnings go away. In a way that also serves as documentation that rcu_read_unlock() must be called in the attach method, so add that annotation. Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20250423150811.456205-2-johannes@sipsolutions.net Signed-off-by: Jakub Kicinski --- include/linux/soc/mediatek/mtk_wed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index a476648858a6..d8949a4ed0dc 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -192,7 +192,7 @@ struct mtk_wed_device { }; struct mtk_wed_ops { - int (*attach)(struct mtk_wed_device *dev); + int (*attach)(struct mtk_wed_device *dev) __releases(RCU); int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring, void __iomem *regs, bool reset); int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring, -- cgit v1.2.3 From eae324ca644554d5ce363186bee820a088bb74ab Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 20 Apr 2025 12:47:25 +0200 Subject: configfs: Add CONFIGFS_ATTR_PERM helper This new helper allows creating rw files with custom permissions. Signed-off-by: Richard Weinberger Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20250420104726.2963750-1-richard@nod.at Signed-off-by: Andreas Hindborg --- include/linux/configfs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index c771e9d0d0b9..698520b1bfdb 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -120,15 +120,19 @@ struct configfs_attribute { ssize_t (*store)(struct config_item *, const char *, size_t); }; -#define CONFIGFS_ATTR(_pfx, _name) \ +#define CONFIGFS_ATTR_PERM(_pfx, _name, _perm) \ static struct configfs_attribute _pfx##attr_##_name = { \ .ca_name = __stringify(_name), \ - .ca_mode = S_IRUGO | S_IWUSR, \ + .ca_mode = _perm, \ .ca_owner = THIS_MODULE, \ .show = _pfx##_name##_show, \ .store = _pfx##_name##_store, \ } +#define CONFIGFS_ATTR(_pfx, _name) CONFIGFS_ATTR_PERM( \ + _pfx, _name, S_IRUGO | S_IWUSR \ +) + #define CONFIGFS_ATTR_RO(_pfx, _name) \ static struct configfs_attribute _pfx##attr_##_name = { \ .ca_name = __stringify(_name), \ -- cgit v1.2.3 From be4e3097c1f800b0f39e7e60b2b28eb6603f5d06 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Wed, 23 Apr 2025 22:40:56 +0800 Subject: tty: Remove unused API tty_port_register_device_serdev() Remove API tty_port_register_device_serdev() which has no caller. Signed-off-by: Zijun Hu Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250423-remove_api-v1-1-fac673d09feb@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 1b861f2100b6..08f89a598366 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -147,9 +147,6 @@ struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); -struct device *tty_port_register_device_serdev(struct tty_port *port, - struct tty_driver *driver, unsigned index, - struct device *host, struct device *parent); struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *host, struct device *parent, void *drvdata, -- cgit v1.2.3 From 1404d3509c768732be51d0acf8330689936a692a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 25 Apr 2025 13:13:12 +0200 Subject: serial: switch uart_port::iotype to enum uart_iotype The inline-defined constants look weird. Instead, define a proper enum for them and type uart_port::iotype as that enum. This allows for proper checking in switch-case labels (somewhere, a default or UPIO_UNKNOWN label needs to be added/handled). Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250425111315.1036184-4-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 743b4afaad4c..914b5e97e056 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -427,6 +427,18 @@ struct uart_icount { typedef u64 __bitwise upf_t; typedef unsigned int __bitwise upstat_t; +enum uart_iotype { + UPIO_UNKNOWN = -1, + UPIO_PORT = SERIAL_IO_PORT, /* 8b I/O port access */ + UPIO_HUB6 = SERIAL_IO_HUB6, /* Hub6 ISA card */ + UPIO_MEM = SERIAL_IO_MEM, /* driver-specific */ + UPIO_MEM32 = SERIAL_IO_MEM32, /* 32b little endian */ + UPIO_AU = SERIAL_IO_AU, /* Au1x00 and RT288x type IO */ + UPIO_TSI = SERIAL_IO_TSI, /* Tsi108/109 type IO */ + UPIO_MEM32BE = SERIAL_IO_MEM32BE, /* 32b big endian */ + UPIO_MEM16 = SERIAL_IO_MEM16, /* 16b little endian */ +}; + struct uart_port { spinlock_t lock; /* port lock */ unsigned long iobase; /* in/out[bwl] */ @@ -469,23 +481,13 @@ struct uart_port { unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ - unsigned char iotype; /* io access style */ - -#define UPIO_UNKNOWN ((unsigned char)~0U) /* UCHAR_MAX */ -#define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ -#define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ -#define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ -#define UPIO_MEM32 (SERIAL_IO_MEM32) /* 32b little endian */ -#define UPIO_AU (SERIAL_IO_AU) /* Au1x00 and RT288x type IO */ -#define UPIO_TSI (SERIAL_IO_TSI) /* Tsi108/109 type IO */ -#define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ -#define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */ - unsigned char quirks; /* internal quirks */ /* internal quirks must be updated while holding port mutex */ #define UPQ_NO_TXEN_TEST BIT(0) + enum uart_iotype iotype; /* io access style */ + unsigned int read_status_mask; /* driver specific */ unsigned int ignore_status_mask; /* driver specific */ struct uart_state *state; /* pointer to parent state */ @@ -1101,8 +1103,8 @@ static inline bool uart_console_registered(struct uart_port *port) struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); -int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, - char **options); +int uart_parse_earlycon(char *p, enum uart_iotype *iotype, + resource_size_t *addr, char **options); void uart_parse_options(const char *options, int *baud, int *parity, int *bits, int *flow); int uart_set_options(struct uart_port *port, struct console *co, int baud, -- cgit v1.2.3 From 5117f28a7d78d00a44d03463115a0f295dbbb1ff Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 15 Apr 2025 12:35:58 +0100 Subject: comedi: remove the mapping of the Comedi buffer in vmalloc address space Now that all the code that accesses the Comedi buffer data does so page-by-page, using the `virt_addr` member of `struct comedi_buf_page` to point to the data of each page, do not linearly map the buffer into vmalloc address space (pointed to by the `prealloc_buf` member of `struct comedi_async`). That was only done for convenience, but was not done for those drivers that need a DMA coherent buffer, which is allocated in a single chunk. Remove the `prealloc_buf` member as it is no longer used. Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20250415114008.5977-4-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedidev.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h index c08416a7364b..4cb0400ad616 100644 --- a/include/linux/comedi/comedidev.h +++ b/include/linux/comedi/comedidev.h @@ -234,16 +234,12 @@ struct comedi_buf_page { * * A COMEDI data buffer is allocated as individual pages, either in * conventional memory or DMA coherent memory, depending on the attached, - * low-level hardware device. (The buffer pages also get mapped into the - * kernel's contiguous virtual address space pointed to by the 'prealloc_buf' - * member of &struct comedi_async.) + * low-level hardware device. * * The buffer is normally freed when the COMEDI device is detached from the * low-level driver (which may happen due to device removal), but if it happens * to be mmapped at the time, the pages cannot be freed until the buffer has - * been munmapped. That is what the reference counter is for. (The virtual - * address space pointed by 'prealloc_buf' is freed when the COMEDI device is - * detached.) + * been munmapped. That is what the reference counter is for. */ struct comedi_buf_map { struct device *dma_hw_dev; @@ -255,7 +251,6 @@ struct comedi_buf_map { /** * struct comedi_async - Control data for asynchronous COMEDI commands - * @prealloc_buf: Kernel virtual address of allocated acquisition buffer. * @prealloc_bufsz: Buffer size (in bytes). * @buf_map: Map of buffer pages. * @max_bufsize: Maximum allowed buffer size (in bytes). @@ -344,7 +339,6 @@ struct comedi_buf_map { * less than or equal to UINT_MAX). */ struct comedi_async { - void *prealloc_buf; unsigned int prealloc_bufsz; struct comedi_buf_map *buf_map; unsigned int max_bufsize; -- cgit v1.2.3 From dbc8c84d5c2e200c79393c17896bdc65e6daef30 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 20 Apr 2025 15:57:39 +0100 Subject: misc: rtsx: Remove deadcode The last uses of rtsx_ms_power_off_card3v3() and rtsx_sd_power_off_card3v3() were removed by 2019's commit bede03a579b3 ("misc: rtsx: Enable OCP for rts522a rts524a rts525a rts5260") The last use of rtsx_pci_transfer_data() was removed by 2024's commit d0f459259c13 ("memstick: rtsx_pci_ms: Remove Realtek PCI memstick driver") Remove them. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250420145739.58337-1-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/rtsx_pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 4612ef09a0c7..3b4c36705a9b 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -1312,8 +1312,6 @@ void rtsx_pci_add_cmd(struct rtsx_pcr *pcr, u8 cmd_type, u16 reg_addr, u8 mask, u8 data); void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr); int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout); -int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, - int num_sg, bool read, int timeout); int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, int num_sg, bool read); void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, -- cgit v1.2.3 From 142ba31d8b4aff086c4f080bd97d437232922177 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sun, 20 Apr 2025 12:19:09 +0800 Subject: PM: wakeup: Do not expose 4 device wakeup source APIs The following 4 APIs are only used by drivers/base/power/wakeup.c internally. - wakeup_source_create() - wakeup_source_destroy() - wakeup_source_add() - wakeup_source_remove() Do not expose them by making them as static functions. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250420-fix_power-v2-1-9b938d2283aa@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/pm_wakeup.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 51e0e8dd5f9e..c838b4a30f87 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -95,10 +95,6 @@ static inline void device_set_wakeup_path(struct device *dev) } /* drivers/base/power/wakeup.c */ -extern struct wakeup_source *wakeup_source_create(const char *name); -extern void wakeup_source_destroy(struct wakeup_source *ws); -extern void wakeup_source_add(struct wakeup_source *ws); -extern void wakeup_source_remove(struct wakeup_source *ws); extern struct wakeup_source *wakeup_source_register(struct device *dev, const char *name); extern void wakeup_source_unregister(struct wakeup_source *ws); @@ -129,17 +125,6 @@ static inline bool device_can_wakeup(struct device *dev) return dev->power.can_wakeup; } -static inline struct wakeup_source *wakeup_source_create(const char *name) -{ - return NULL; -} - -static inline void wakeup_source_destroy(struct wakeup_source *ws) {} - -static inline void wakeup_source_add(struct wakeup_source *ws) {} - -static inline void wakeup_source_remove(struct wakeup_source *ws) {} - static inline struct wakeup_source *wakeup_source_register(struct device *dev, const char *name) { -- cgit v1.2.3 From 477058411c45f225ddfbb4769e35a9a5a95cb826 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 25 Apr 2025 10:11:30 +0200 Subject: pidfs: register pid in pidfs Add simple helpers that allow a struct pid to be pinned via a pidfs dentry/inode. If no pidfs dentry exists a new one will be allocated for it. A reference is taken by pidfs on @pid. The reference must be released via pidfs_put_pid(). This will allow AF_UNIX sockets to allocate a dentry for the peer credentials pid at the time they are recorded where we know the task is still alive. When the task gets reaped its exit status is guaranteed to be recorded and a pidfd can be handed out for the reaped task. Link: https://lore.kernel.org/20250425-work-pidfs-net-v2-1-450a19461e75@kernel.org Reviewed-by: Oleg Nesterov Reviewed-by: David Rheinsberg Signed-off-by: Christian Brauner --- include/linux/pidfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 05e6f8f4a026..2676890c4d0d 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -8,5 +8,8 @@ void pidfs_add_pid(struct pid *pid); void pidfs_remove_pid(struct pid *pid); void pidfs_exit(struct task_struct *tsk); extern const struct dentry_operations pidfs_dentry_operations; +int pidfs_register_pid(struct pid *pid); +void pidfs_get_pid(struct pid *pid); +void pidfs_put_pid(struct pid *pid); #endif /* _LINUX_PID_FS_H */ -- cgit v1.2.3 From a71f402acd71a942e59c16270ad61dee06de6e24 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 25 Apr 2025 10:11:32 +0200 Subject: pidfs: get rid of __pidfd_prepare() Fold it into pidfd_prepare() and rename PIDFD_CLONE to PIDFD_STALE to indicate that the passed pid might not have task linkage and no explicit check for that should be performed. Link: https://lore.kernel.org/20250425-work-pidfs-net-v2-3-450a19461e75@kernel.org Reviewed-by: Oleg Nesterov Reviewed-by: David Rheinsberg Signed-off-by: Christian Brauner --- include/linux/pid.h | 2 +- include/uapi/linux/pidfd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pid.h b/include/linux/pid.h index 311ecebd7d56..453ae6d8a68d 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -77,7 +77,7 @@ struct file; struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); -int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); +int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file); void do_notify_pidfd(struct task_struct *task); static inline struct pid *get_pid(struct pid *pid) diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 2970ef44655a..8c1511edd0e9 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -12,7 +12,7 @@ #define PIDFD_THREAD O_EXCL #ifdef __KERNEL__ #include -#define PIDFD_CLONE CLONE_PIDFD +#define PIDFD_STALE CLONE_PIDFD #endif /* Flags for pidfd_send_signal(). */ -- cgit v1.2.3 From 6cccf837ac8d72e13c651f60d93545f9fb4e84ed Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:19 +0200 Subject: Revert "vt: create ucs_recompose.c using gen_ucs_recompose.py" This reverts commit 54af55b990eda5a6a0140a3cded8094b42c0c3b7. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 4d3a34c288e5..b3a911866662 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -30,7 +30,6 @@ int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); -uint32_t ucs_recompose(uint32_t base, uint32_t combining); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -70,11 +69,6 @@ static inline bool ucs_is_zero_width(uint32_t cp) { return false; } - -static inline uint32_t ucs_recompose(uint32_t base, uint32_t combining) -{ - return 0; -} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit v1.2.3 From 67a4bb27461b0e3b39b27db688b5981b6eb62175 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:21 +0200 Subject: Revert "vt: update ucs_width.c using gen_ucs_width.py" This reverts commit 3a1ab63aa05b4736a7d30ae0a769385662f13def. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index b3a911866662..7d778752dcef 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,7 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -bool ucs_is_zero_width(uint32_t cp); +static inline bool ucs_is_zero_width(uint32_t cp) +{ + /* coming soon */ + return false; +} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) -- cgit v1.2.3 From d3e92076c1af713e65edac109499c25c37f38c16 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:24 +0200 Subject: Revert "vt: properly support zero-width Unicode code points" This reverts commit e88391f730e46d208b7fb37b02611d24137af1ef. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 7d778752dcef..caf079bcb8c9 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,11 +29,6 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -static inline bool ucs_is_zero_width(uint32_t cp) -{ - /* coming soon */ - return false; -} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -68,11 +63,6 @@ static inline bool ucs_is_double_width(uint32_t cp) { return false; } - -static inline bool ucs_is_zero_width(uint32_t cp) -{ - return false; -} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit v1.2.3 From e42e607aefc4132d508a0e5724b5d0975d0a53e8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:25 +0200 Subject: Revert "vt: move unicode processing to a separate file" This reverts commit 2acaf27cd7f4f32bfe8bf7335690618e2417e744. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index caf079bcb8c9..c35db4896c37 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -28,7 +28,6 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); -bool ucs_is_double_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -58,11 +57,6 @@ static inline int conv_uni_to_8bit(u32 uni) } static inline void console_map_init(void) { } - -static inline bool ucs_is_double_width(uint32_t cp) -{ - return false; -} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit v1.2.3 From 07bc3f442f47b4d158468c2e0146475bdf009091 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:04 -0400 Subject: vt: move unicode processing to a separate file This will make it easier to maintain. Also make it depend on CONFIG_CONSOLE_TRANSLATIONS. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-3-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index c35db4896c37..caf079bcb8c9 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -28,6 +28,7 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); +bool ucs_is_double_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -57,6 +58,11 @@ static inline int conv_uni_to_8bit(u32 uni) } static inline void console_map_init(void) { } + +static inline bool ucs_is_double_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit v1.2.3 From 95b05de0a56699392e67590d000df76fedec609a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:05 -0400 Subject: vt: properly support zero-width Unicode code points Zero-width Unicode code points are causing misalignment in vertically aligned content, disrupting the visual layout. Let's handle zero-width code points more intelligently. Double-width code points are stored in the screen grid followed by a white space code point to create the expected screen layout. When a double-width code point is followed by a zero-width code point in the console incoming bytestream (e.g., an emoji with a presentation selector) then we may replace the white space padding by that zero-width code point instead of dropping it. This maximize screen content information while preserving proper layout. If a zero-width code point is preceded by a single-width code point then the above trick is not possible and such zero-width code point must be dropped. VS16 (Variation Selector 16, U+FE0F) is special as it typically doubles the width of the preceding single-width code point. We handle that case by giving VS16 a width of 1 instead of 0 when that happens. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-4-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index caf079bcb8c9..7d778752dcef 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,6 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); +static inline bool ucs_is_zero_width(uint32_t cp) +{ + /* coming soon */ + return false; +} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -63,6 +68,11 @@ static inline bool ucs_is_double_width(uint32_t cp) { return false; } + +static inline bool ucs_is_zero_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit v1.2.3 From 54cda9201c673fd1c5de189d961670999232e49d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:08 -0400 Subject: vt: use new tables in ucs.c This removes the table from ucs.c and substitutes the generated tables from ucs_width_table.h providing comprehensive ranges for double-width and zero-width Unicode code points. Also implements ucs_is_zero_width() to query the new zero-width table. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-7-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 7d778752dcef..b3a911866662 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,11 +29,7 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -static inline bool ucs_is_zero_width(uint32_t cp) -{ - /* coming soon */ - return false; -} +bool ucs_is_zero_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) -- cgit v1.2.3 From b5c574995d842d241d810f3a6a3ebb03c52d57fa Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:11 -0400 Subject: vt: support Unicode recomposition Try replacing any decomposed Unicode sequence by the corresponding recomposed code point. Code point to glyph correspondance works best after recomposition, and this apply mostly to single-width code points therefore we can't preserve them in their decomposed form anyway. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-10-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index b3a911866662..8167494229db 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -30,6 +30,7 @@ int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); +u32 ucs_recompose(u32 base, u32 mark); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -69,6 +70,11 @@ static inline bool ucs_is_zero_width(uint32_t cp) { return false; } + +static inline u32 ucs_recompose(u32 base, u32 mark) +{ + return 0; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit v1.2.3 From e2642509e3d60dc90b74d27b47e36cea71a96186 Mon Sep 17 00:00:00 2001 From: Shin Son Date: Wed, 23 Apr 2025 13:41:51 +0900 Subject: dt-bindings: clock: exynosautov920: add cpucl0 clock definitions Add cpucl0 clock definitions. CPUCL0 refers to CPU Cluster 0, which provide clock support for the CPUs on Exynosauto V920 SoC. Signed-off-by: Shin Son Link: https://lore.kernel.org/r/20250423044153.1288077-2-shin.son@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/samsung,exynosautov920.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/samsung,exynosautov920.h b/include/dt-bindings/clock/samsung,exynosautov920.h index 0c681f2ba3d0..c57a1d749700 100644 --- a/include/dt-bindings/clock/samsung,exynosautov920.h +++ b/include/dt-bindings/clock/samsung,exynosautov920.h @@ -162,6 +162,25 @@ #define DOUT_CLKCMU_TAA_NOC 146 #define DOUT_TCXO_DIV2 147 +/* CMU_CPUCL0 */ +#define CLK_FOUT_CPUCL0_PLL 1 + +#define CLK_MOUT_PLL_CPUCL0 2 +#define CLK_MOUT_CPUCL0_CLUSTER_USER 3 +#define CLK_MOUT_CPUCL0_DBG_USER 4 +#define CLK_MOUT_CPUCL0_SWITCH_USER 5 +#define CLK_MOUT_CPUCL0_CLUSTER 6 +#define CLK_MOUT_CPUCL0_CORE 7 + +#define CLK_DOUT_CLUSTER0_ACLK 8 +#define CLK_DOUT_CLUSTER0_ATCLK 9 +#define CLK_DOUT_CLUSTER0_MPCLK 10 +#define CLK_DOUT_CLUSTER0_PCLK 11 +#define CLK_DOUT_CLUSTER0_PERIPHCLK 12 +#define CLK_DOUT_CPUCL0_DBG_NOC 13 +#define CLK_DOUT_CPUCL0_DBG_PCLKDBG 14 +#define CLK_DOUT_CPUCL0_NOCP 15 + /* CMU_PERIC0 */ #define CLK_MOUT_PERIC0_IP_USER 1 #define CLK_MOUT_PERIC0_NOC_USER 2 -- cgit v1.2.3 From f5e5631df596ade5875ba1dc5d640611745d0c0d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 20 Feb 2025 18:20:26 +0200 Subject: devres: Move devm_*_action*() APIs to devres.h We have a newly created header linux/device/devres.h that gathers device managed APIs, so users won't need to include entire device.h for only these ones. Move devm_*_action*() APIs to devres.h as well. Reviewed-by: Raag Jadav Signed-off-by: Andy Shevchenko Reviewed-by: Zijun Hu Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250220162238.2738038-2-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- include/linux/device.h | 38 -------------------------------------- include/linux/device/devres.h | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 79e49fe494b7..4940db137fff 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -281,44 +281,6 @@ int __must_check device_create_bin_file(struct device *dev, void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr); -/* allows to add/remove a custom action to devres stack */ -int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data); - -/** - * devm_remove_action() - removes previously added custom action - * @dev: Device that owns the action - * @action: Function implementing the action - * @data: Pointer to data passed to @action implementation - * - * Removes instance of @action previously added by devm_add_action(). - * Both action and data should match one of the existing entries. - */ -static inline -void devm_remove_action(struct device *dev, void (*action)(void *), void *data) -{ - WARN_ON(devm_remove_action_nowarn(dev, action, data)); -} - -void devm_release_action(struct device *dev, void (*action)(void *), void *data); - -int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); -#define devm_add_action(dev, action, data) \ - __devm_add_action(dev, action, data, #action) - -static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), - void *data, const char *name) -{ - int ret; - - ret = __devm_add_action(dev, action, data, name); - if (ret) - action(data); - - return ret; -} -#define devm_add_action_or_reset(dev, action, data) \ - __devm_add_action_or_reset(dev, action, data, #action) - /** * devm_alloc_percpu - Resource-managed alloc_percpu * @dev: Device to allocate per-cpu memory for diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h index 9b49f9915850..9cd1787ef28e 100644 --- a/include/linux/device/devres.h +++ b/include/linux/device/devres.h @@ -8,6 +8,7 @@ #include #include #include +#include struct device; struct device_node; @@ -126,4 +127,42 @@ void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int in #endif +/* allows to add/remove a custom action to devres stack */ +int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data); + +/** + * devm_remove_action() - removes previously added custom action + * @dev: Device that owns the action + * @action: Function implementing the action + * @data: Pointer to data passed to @action implementation + * + * Removes instance of @action previously added by devm_add_action(). + * Both action and data should match one of the existing entries. + */ +static inline +void devm_remove_action(struct device *dev, void (*action)(void *), void *data) +{ + WARN_ON(devm_remove_action_nowarn(dev, action, data)); +} + +void devm_release_action(struct device *dev, void (*action)(void *), void *data); + +int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); +#define devm_add_action(dev, action, data) \ + __devm_add_action(dev, action, data, #action) + +static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), + void *data, const char *name) +{ + int ret; + + ret = __devm_add_action(dev, action, data, name); + if (ret) + action(data); + + return ret; +} +#define devm_add_action_or_reset(dev, action, data) \ + __devm_add_action_or_reset(dev, action, data, #action) + #endif /* _DEVICE_DEVRES_H_ */ -- cgit v1.2.3 From e383bb8f958444620d96386811aacf6a49757996 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 20 Feb 2025 18:20:27 +0200 Subject: devres: Add devm_is_action_added() helper In some code we would like to know if the action in device managed resources was added by devm_add_action() family of calls. Introduce a helper for that. Reviewed-by: Raag Jadav Signed-off-by: Andy Shevchenko Reviewed-by: Zijun Hu Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250220162238.2738038-3-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- include/linux/device/devres.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h index 9cd1787ef28e..ae696d10faff 100644 --- a/include/linux/device/devres.h +++ b/include/linux/device/devres.h @@ -165,4 +165,6 @@ static inline int __devm_add_action_or_reset(struct device *dev, void (*action)( #define devm_add_action_or_reset(dev, action, data) \ __devm_add_action_or_reset(dev, action, data, #action) +bool devm_is_action_added(struct device *dev, void (*action)(void *), void *data); + #endif /* _DEVICE_DEVRES_H_ */ -- cgit v1.2.3 From b15d97139ff14beb7c300f261e11d22d5a996941 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:11 +0200 Subject: mtd: spinand: Use more specific naming for the reset op SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really means by describing the expected bus topology in the reset macro name. Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 311f145eb4e8..d1b9b630bd83 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -20,7 +20,7 @@ * Standard SPI NAND flash operations */ -#define SPINAND_RESET_OP \ +#define SPINAND_RESET_1S_0_0_OP \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xff, 1), \ SPI_MEM_OP_NO_ADDR, \ SPI_MEM_OP_NO_DUMMY, \ -- cgit v1.2.3 From 7c8896dd4a2a27c84b04dcf0990e6f6b118cb6b2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 18 Apr 2025 16:01:24 +0800 Subject: iommu: Remove IOMMU_DEV_FEAT_SVA None of the drivers implement anything here anymore, remove the dead code. Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Tested-by: Zhangfei Gao Link: https://lore.kernel.org/r/20250418080130.1844424-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 062818b51822..f39af28acaeb 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -318,18 +318,11 @@ struct iommu_iort_rmr_data { /** * enum iommu_dev_features - Per device IOMMU features - * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses - * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. Generally - * enabling %IOMMU_DEV_FEAT_SVA requires - * %IOMMU_DEV_FEAT_IOPF, but some devices manage I/O Page - * Faults themselves instead of relying on the IOMMU. When - * supported, this feature must be enabled before and - * disabled after %IOMMU_DEV_FEAT_SVA. + * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. * * Device drivers enable a feature using iommu_dev_enable_feature(). */ enum iommu_dev_features { - IOMMU_DEV_FEAT_SVA, IOMMU_DEV_FEAT_IOPF, }; -- cgit v1.2.3 From f984fb09e60e0e2db5ad5be9d4cfcefb2008fda1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 18 Apr 2025 16:01:30 +0800 Subject: iommu: Remove iommu_dev_enable/disable_feature() No external drivers use these interfaces anymore. Furthermore, no existing iommu drivers implement anything in the callbacks. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Tested-by: Zhangfei Gao Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/20250418080130.1844424-9-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f39af28acaeb..8d9119b000fe 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -316,16 +316,6 @@ struct iommu_iort_rmr_data { u32 num_sids; }; -/** - * enum iommu_dev_features - Per device IOMMU features - * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. - * - * Device drivers enable a feature using iommu_dev_enable_feature(). - */ -enum iommu_dev_features { - IOMMU_DEV_FEAT_IOPF, -}; - #define IOMMU_NO_PASID (0U) /* Reserved for DMA w/o PASID */ #define IOMMU_FIRST_GLOBAL_PASID (1U) /*starting range for allocation */ #define IOMMU_PASID_INVALID (-1U) @@ -657,9 +647,6 @@ struct iommu_ops { bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ - int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); - int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - void (*page_response)(struct device *dev, struct iopf_fault *evt, struct iommu_page_response *msg); @@ -1128,9 +1115,6 @@ void dev_iommu_priv_set(struct device *dev, void *priv); extern struct mutex iommu_probe_device_lock; int iommu_probe_device(struct device *dev); -int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); -int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); - int iommu_device_use_default_domain(struct device *dev); void iommu_device_unuse_default_domain(struct device *dev); @@ -1415,18 +1399,6 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, return -ENODEV; } -static inline int -iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) -{ - return -ENODEV; -} - -static inline int -iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) -{ - return -ENODEV; -} - static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; -- cgit v1.2.3 From 0d609a1450fab636c825c66344ab0ecfc1d3a98c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 8 Apr 2025 13:35:48 -0300 Subject: iommu: Add domain_alloc_identity() virtio-iommu has a mode where the IDENTITY domain is actually a paging domain with an identity mapping covering some of the system address space manually created. To support this add a new domain_alloc_identity() op that accepts the struct device so that virtio can allocate and fully finalize a paging domain to return. Signed-off-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/2-v4-ff5fb6b03bd1+288-iommu_virtio_domains_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 8d9119b000fe..7b636b92ac7c 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -567,6 +567,9 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * @domain_alloc: allocate and return an iommu domain if success. Otherwise * NULL is returned. The domain is not fully initialized until * the caller iommu_domain_alloc() returns. + * @domain_alloc_identity: allocate an IDENTITY domain. Drivers should prefer to + * use identity_domain instead. This should only be used + * if dynamic logic is necessary. * @domain_alloc_paging_flags: Allocate an iommu domain corresponding to the * input parameters as defined in * include/uapi/linux/iommufd.h. The @user_data can be @@ -625,6 +628,7 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); + struct iommu_domain *(*domain_alloc_identity)(struct device *dev); struct iommu_domain *(*domain_alloc_paging_flags)( struct device *dev, u32 flags, const struct iommu_user_data *user_data); -- cgit v1.2.3 From 21c03574df19f0d77cb2e4d28bc02c79b21e656a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 8 Apr 2025 13:35:51 -0300 Subject: iommu: Hide ops.domain_alloc behind CONFIG_FSL_PAMU fsl_pamu is the last user of domain_alloc(), and it is using it to create something weird that doesn't really fit into the iommu subsystem architecture. It is a not a paging domain since it doesn't have any map/unmap ops. It may be some special kind of identity domain. For now just leave it as is. Wrap it's definition in CONFIG_FSL_PAMU to discourage any new drivers from attempting to use it. Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/5-v4-ff5fb6b03bd1+288-iommu_virtio_domains_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7b636b92ac7c..5b64d0242e66 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -564,9 +564,7 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * op is allocated in the iommu driver and freed by the caller after * use. The information type is one of enum iommu_hw_info_type defined * in include/uapi/linux/iommufd.h. - * @domain_alloc: allocate and return an iommu domain if success. Otherwise - * NULL is returned. The domain is not fully initialized until - * the caller iommu_domain_alloc() returns. + * @domain_alloc: Do not use in new drivers * @domain_alloc_identity: allocate an IDENTITY domain. Drivers should prefer to * use identity_domain instead. This should only be used * if dynamic logic is necessary. @@ -627,7 +625,9 @@ struct iommu_ops { void *(*hw_info)(struct device *dev, u32 *length, u32 *type); /* Domain allocation and freeing by the iommu driver */ +#if IS_ENABLED(CONFIG_FSL_PAMU) struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); +#endif struct iommu_domain *(*domain_alloc_identity)(struct device *dev); struct iommu_domain *(*domain_alloc_paging_flags)( struct device *dev, u32 flags, -- cgit v1.2.3 From da33e87bd2bfc63531cf7448a3cd7a3d42182f08 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 24 Apr 2025 18:41:28 +0100 Subject: iommu: Handle yet another race around registration Next up on our list of race windows to close is another one during iommu_device_register() - it's now OK again for multiple instances to run their bus_iommu_probe() in parallel, but an iommu_probe_device() can still also race against a running bus_iommu_probe(). As Johan has managed to prove, this has now become a lot more visible on DT platforms wth driver_async_probe where a client driver is attempting to probe in parallel with its IOMMU driver - although commit b46064a18810 ("iommu: Handle race with default domain setup") resolves this from the client driver's point of view, this isn't before of_iommu_configure() has had the chance to attempt to "replay" a probe that the bus walk hasn't even tried yet, and so still cause the out-of-order group allocation behaviour that we're trying to clean up (and now warning about). The most reliable thing to do here is to explicitly keep track of the "iommu_device_register() is still running" state, so we can then special-case the ops lookup for the replay path (based on dev->iommu again) to let that think it's still waiting for the IOMMU driver to appear at all. This still leaves the longstanding theoretical case of iommu_bus_notifier() being triggered during bus_iommu_probe(), but it's not so simple to defer a notifier, and nobody's ever reported that being a visible issue, so let's quietly kick that can down the road for now... Reported-by: Johan Hovold Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/88d54c1b48fed8279aa47d30f3d75173685bb26a.1745516488.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5b64d0242e66..bc5363e6b43e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -746,6 +746,7 @@ struct iommu_domain_ops { * @dev: struct device for sysfs handling * @singleton_group: Used internally for drivers that have only one group * @max_pasids: number of supported PASIDs + * @ready: set once iommu_device_register() has completed successfully */ struct iommu_device { struct list_head list; @@ -754,6 +755,7 @@ struct iommu_device { struct device *dev; struct iommu_group *singleton_group; u32 max_pasids; + bool ready; }; /** -- cgit v1.2.3 From 19da081a28c95fe9b03ce952a2bf4a6f6bf5112c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 23 Apr 2025 17:22:28 +0800 Subject: crypto: api - Add crypto_request_clone and fb Add a helper to clone crypto requests and eliminate code duplication. Use kmemdup in the helper. Also add an fb field to crypto_tfm. This also happens to fix the existing implementations which were buggy. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504230118.1CxUaUoX-lkp@intel.com/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504230004.c7mrY0C6-lkp@intel.com/ Signed-off-by: Herbert Xu --- include/crypto/acompress.h | 9 ++++++--- include/crypto/hash.h | 9 ++++++--- include/crypto/internal/acompress.h | 7 ++++++- include/crypto/internal/hash.h | 7 ++++++- include/linux/crypto.h | 11 ++++++++--- 5 files changed, 32 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index 1b30290d6380..933c48a4855b 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -114,7 +114,6 @@ struct crypto_acomp { int (*compress)(struct acomp_req *req); int (*decompress)(struct acomp_req *req); unsigned int reqsize; - struct crypto_acomp *fb; struct crypto_tfm base; }; @@ -553,7 +552,11 @@ static inline struct acomp_req *acomp_request_on_stack_init( return req; } -struct acomp_req *acomp_request_clone(struct acomp_req *req, - size_t total, gfp_t gfp); +static inline struct acomp_req *acomp_request_clone(struct acomp_req *req, + size_t total, gfp_t gfp) +{ + return container_of(crypto_request_clone(&req->base, total, gfp), + struct acomp_req, base); +} #endif diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 5f87d1040a7c..68813a83443b 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -246,7 +246,6 @@ struct crypto_ahash { bool using_shash; /* Underlying algorithm is shash, not ahash */ unsigned int statesize; unsigned int reqsize; - struct crypto_ahash *fb; struct crypto_tfm base; }; @@ -1035,7 +1034,11 @@ static inline struct ahash_request *ahash_request_on_stack_init( return req; } -struct ahash_request *ahash_request_clone(struct ahash_request *req, - size_t total, gfp_t gfp); +static inline struct ahash_request *ahash_request_clone( + struct ahash_request *req, size_t total, gfp_t gfp) +{ + return container_of(crypto_request_clone(&req->base, total, gfp), + struct ahash_request, base); +} #endif /* _CRYPTO_HASH_H */ diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 7eda32619024..6550dad18e0f 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -220,13 +220,18 @@ static inline u32 acomp_request_flags(struct acomp_req *req) return crypto_request_flags(&req->base) & ~CRYPTO_ACOMP_REQ_PRIVATE; } +static inline struct crypto_acomp *crypto_acomp_fb(struct crypto_acomp *tfm) +{ + return __crypto_acomp_tfm(crypto_acomp_tfm(tfm)->fb); +} + static inline struct acomp_req *acomp_fbreq_on_stack_init( char *buf, struct acomp_req *old) { struct crypto_acomp *tfm = crypto_acomp_reqtfm(old); struct acomp_req *req = (void *)buf; - acomp_request_set_tfm(req, tfm->fb); + acomp_request_set_tfm(req, crypto_acomp_fb(tfm)); req->base.flags = CRYPTO_TFM_REQ_ON_STACK; acomp_request_set_callback(req, acomp_request_flags(old), NULL, NULL); req->base.flags &= ~CRYPTO_ACOMP_REQ_PRIVATE; diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 1e80dd084a23..0bc0fefc9b3c 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -272,13 +272,18 @@ static inline bool crypto_ahash_req_chain(struct crypto_ahash *tfm) return crypto_tfm_req_chain(&tfm->base); } +static inline struct crypto_ahash *crypto_ahash_fb(struct crypto_ahash *tfm) +{ + return __crypto_ahash_cast(crypto_ahash_tfm(tfm)->fb); +} + static inline struct ahash_request *ahash_fbreq_on_stack_init( char *buf, struct ahash_request *old) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(old); struct ahash_request *req = (void *)buf; - ahash_request_set_tfm(req, tfm->fb); + ahash_request_set_tfm(req, crypto_ahash_fb(tfm)); req->base.flags = CRYPTO_TFM_REQ_ON_STACK; ahash_request_set_callback(req, ahash_request_flags(old), NULL, NULL); req->base.flags &= ~CRYPTO_AHASH_REQ_PRIVATE; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index f691ce01745e..fe75320ff9a3 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include @@ -411,9 +411,11 @@ struct crypto_tfm { u32 crt_flags; int node; - + + struct crypto_tfm *fb; + void (*exit)(struct crypto_tfm *tfm); - + struct crypto_alg *__crt_alg; void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; @@ -509,5 +511,8 @@ static inline void crypto_request_set_tfm(struct crypto_async_request *req, req->flags &= ~CRYPTO_TFM_REQ_ON_STACK; } +struct crypto_async_request *crypto_request_clone( + struct crypto_async_request *req, size_t total, gfp_t gfp); + #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.3 From e3b49657228c114099b4591821465dc6f9884c61 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 24 Apr 2025 18:47:30 +0800 Subject: crypto: polyval-generic - Use API partial block handling Use the Crypto API partial block handling. The accelerated export format on x86/arm64 is easier to use so switch the generic polyval algorithm to use that format instead. Signed-off-by: Herbert Xu --- include/crypto/polyval.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/crypto/polyval.h b/include/crypto/polyval.h index 1d630f371f77..d2e63743e592 100644 --- a/include/crypto/polyval.h +++ b/include/crypto/polyval.h @@ -8,15 +8,7 @@ #ifndef _CRYPTO_POLYVAL_H #define _CRYPTO_POLYVAL_H -#include -#include - #define POLYVAL_BLOCK_SIZE 16 #define POLYVAL_DIGEST_SIZE 16 -void polyval_mul_non4k(u8 *op1, const u8 *op2); - -void polyval_update_non4k(const u8 *key, const u8 *in, - size_t nblocks, u8 *accumulator); - #endif -- cgit v1.2.3 From b75fa20c127eb736b0ac9b30be051f526a2316a9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Apr 2025 11:05:29 +0800 Subject: crypto: api - Add crypto_stack_request_init and initialise flags fully Add a helper to initialise crypto stack requests and use it for ahash and acomp. Make sure that the flags field is initialised fully in the helper to silence false-positive warnings from the compiler. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504250751.mdy28Ibr-lkp@intel.com/ Signed-off-by: Herbert Xu --- include/crypto/acompress.h | 3 +-- include/crypto/hash.h | 3 +-- include/crypto/internal/acompress.h | 4 ++-- include/crypto/internal/hash.h | 4 ++-- include/linux/crypto.h | 8 ++++++++ 5 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index 933c48a4855b..f1812e92d3e3 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -547,8 +547,7 @@ static inline struct acomp_req *acomp_request_on_stack_init( { struct acomp_req *req = (void *)buf; - acomp_request_set_tfm(req, tfm); - req->base.flags = CRYPTO_TFM_REQ_ON_STACK; + crypto_stack_request_init(&req->base, crypto_acomp_tfm(tfm)); return req; } diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 68813a83443b..c2497c300a28 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -1029,8 +1029,7 @@ static inline struct ahash_request *ahash_request_on_stack_init( { struct ahash_request *req = (void *)buf; - ahash_request_set_tfm(req, tfm); - req->base.flags = CRYPTO_TFM_REQ_ON_STACK; + crypto_stack_request_init(&req->base, crypto_ahash_tfm(tfm)); return req; } diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 6550dad18e0f..b72bb7a6a2b2 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -231,8 +231,8 @@ static inline struct acomp_req *acomp_fbreq_on_stack_init( struct crypto_acomp *tfm = crypto_acomp_reqtfm(old); struct acomp_req *req = (void *)buf; - acomp_request_set_tfm(req, crypto_acomp_fb(tfm)); - req->base.flags = CRYPTO_TFM_REQ_ON_STACK; + crypto_stack_request_init(&req->base, + crypto_acomp_tfm(crypto_acomp_fb(tfm))); acomp_request_set_callback(req, acomp_request_flags(old), NULL, NULL); req->base.flags &= ~CRYPTO_ACOMP_REQ_PRIVATE; req->base.flags |= old->base.flags & CRYPTO_ACOMP_REQ_PRIVATE; diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 0bc0fefc9b3c..33d45275f5bd 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -283,8 +283,8 @@ static inline struct ahash_request *ahash_fbreq_on_stack_init( struct crypto_ahash *tfm = crypto_ahash_reqtfm(old); struct ahash_request *req = (void *)buf; - ahash_request_set_tfm(req, crypto_ahash_fb(tfm)); - req->base.flags = CRYPTO_TFM_REQ_ON_STACK; + crypto_stack_request_init(&req->base, + crypto_ahash_tfm(crypto_ahash_fb(tfm))); ahash_request_set_callback(req, ahash_request_flags(old), NULL, NULL); req->base.flags &= ~CRYPTO_AHASH_REQ_PRIVATE; req->base.flags |= old->base.flags & CRYPTO_AHASH_REQ_PRIVATE; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index fe75320ff9a3..b8d875b11193 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -514,5 +514,13 @@ static inline void crypto_request_set_tfm(struct crypto_async_request *req, struct crypto_async_request *crypto_request_clone( struct crypto_async_request *req, size_t total, gfp_t gfp); +static inline void crypto_stack_request_init(struct crypto_async_request *req, + struct crypto_tfm *tfm) +{ + req->flags = 0; + crypto_request_set_tfm(req, tfm); + req->flags |= CRYPTO_TFM_REQ_ON_STACK; +} + #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.3 From 0f8d42bf128d349ad490e87d5574d211245e40f1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 27 Apr 2025 08:42:54 +0800 Subject: crypto: scatterwalk - Move skcipher walk and use it for memcpy_sglist Move the generic part of skcipher walk into scatterwalk, and use it to implement memcpy_sglist. This makes memcpy_sglist do the right thing when two distinct SG lists contain identical subsets (e.g., the AD part of AEAD). Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 12 ------- include/crypto/internal/skcipher.h | 48 +--------------------------- include/crypto/scatterwalk.h | 65 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 64 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 6999e10ea09e..f5f730969d72 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -107,18 +107,6 @@ struct crypto_queue { unsigned int max_qlen; }; -struct scatter_walk { - /* Must be the first member, see struct skcipher_walk. */ - union { - void *const addr; - - /* Private API field, do not touch. */ - union crypto_no_such_thing *__addr; - }; - struct scatterlist *sg; - unsigned int offset; -}; - struct crypto_attr_alg { char name[CRYPTO_MAX_ALG_NAME]; }; diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 0cad8e7364c8..d5aa535263f6 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -54,47 +55,6 @@ struct crypto_lskcipher_spawn { struct crypto_spawn base; }; -struct skcipher_walk { - union { - /* Virtual address of the source. */ - struct { - struct { - const void *const addr; - } virt; - } src; - - /* Private field for the API, do not use. */ - struct scatter_walk in; - }; - - union { - /* Virtual address of the destination. */ - struct { - struct { - void *const addr; - } virt; - } dst; - - /* Private field for the API, do not use. */ - struct scatter_walk out; - }; - - unsigned int nbytes; - unsigned int total; - - u8 *page; - u8 *buffer; - u8 *oiv; - void *iv; - - unsigned int ivsize; - - int flags; - unsigned int blocksize; - unsigned int stride; - unsigned int alignmask; -}; - static inline struct crypto_instance *skcipher_crypto_instance( struct skcipher_instance *inst) { @@ -211,7 +171,6 @@ void crypto_unregister_lskciphers(struct lskcipher_alg *algs, int count); int lskcipher_register_instance(struct crypto_template *tmpl, struct lskcipher_instance *inst); -int skcipher_walk_done(struct skcipher_walk *walk, int res); int skcipher_walk_virt(struct skcipher_walk *__restrict walk, struct skcipher_request *__restrict req, bool atomic); @@ -222,11 +181,6 @@ int skcipher_walk_aead_decrypt(struct skcipher_walk *__restrict walk, struct aead_request *__restrict req, bool atomic); -static inline void skcipher_walk_abort(struct skcipher_walk *walk) -{ - skcipher_walk_done(walk, -ECANCELED); -} - static inline void *crypto_skcipher_ctx(struct crypto_skcipher *tfm) { return crypto_tfm_ctx(&tfm->base); diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 94a8585f26b2..15ab743f68c8 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -11,11 +11,64 @@ #ifndef _CRYPTO_SCATTERWALK_H #define _CRYPTO_SCATTERWALK_H -#include - +#include #include #include #include +#include + +struct scatter_walk { + /* Must be the first member, see struct skcipher_walk. */ + union { + void *const addr; + + /* Private API field, do not touch. */ + union crypto_no_such_thing *__addr; + }; + struct scatterlist *sg; + unsigned int offset; +}; + +struct skcipher_walk { + union { + /* Virtual address of the source. */ + struct { + struct { + const void *const addr; + } virt; + } src; + + /* Private field for the API, do not use. */ + struct scatter_walk in; + }; + + union { + /* Virtual address of the destination. */ + struct { + struct { + void *const addr; + } virt; + } dst; + + /* Private field for the API, do not use. */ + struct scatter_walk out; + }; + + unsigned int nbytes; + unsigned int total; + + u8 *page; + u8 *buffer; + u8 *oiv; + void *iv; + + unsigned int ivsize; + + int flags; + unsigned int blocksize; + unsigned int stride; + unsigned int alignmask; +}; static inline void scatterwalk_crypto_chain(struct scatterlist *head, struct scatterlist *sg, int num) @@ -243,4 +296,12 @@ struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2], struct scatterlist *src, unsigned int len); +int skcipher_walk_first(struct skcipher_walk *walk, bool atomic); +int skcipher_walk_done(struct skcipher_walk *walk, int res); + +static inline void skcipher_walk_abort(struct skcipher_walk *walk) +{ + skcipher_walk_done(walk, -ECANCELED); +} + #endif /* _CRYPTO_SCATTERWALK_H */ -- cgit v1.2.3 From 357be13962eac9dca146c15963ca6a28cab60cd1 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 25 Apr 2025 21:20:21 +0200 Subject: ACPICA: Drop stale comment about the header file content ACPICA commit 48e7c4cd893270dab8d05e6d75fbf23d0fcbb267 Commit a025731aec31 ("Restructure ACPI table files") restructed the header files and moved lots of tables that are in the ACPI specification to actbl2.h(e.g.: APIC/MADT, PCCT, PPTT, ..etc). This restructure made the comment stating this header file contains tables that are not in the ACPI specification incorrect. From that commit onwards it has remained as stale. Let us get rid of it as it might be misleading. Link: https://github.com/acpica/acpica/commit/48e7c4cd Reported-by: Sahil Kaushal Signed-off-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/6168200.lOV4Wx5bFT@rjwysocki.net --- include/acpi/actbl2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 2e917a8f8bca..a3008bae6f48 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ /****************************************************************************** * - * Name: actbl2.h - ACPI Table Definitions (tables not in ACPI spec) + * Name: actbl2.h - ACPI Table Definitions * * Copyright (C) 2000 - 2023, Intel Corp. * -- cgit v1.2.3 From c5c967733c5e187bc8f9993f3e8acee65d3215f2 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 15 Apr 2025 16:03:47 +0200 Subject: dt-bindings: interconnect: sm8650: document the MASTER_APSS_NOC Document the MASTER_APSS_NOC interconnect node for the SM8650 SoC system NoC. Signed-off-by: Neil Armstrong Acked-by: "Rob Herring (Arm)" Link: https://lore.kernel.org/r/20250415-topic-sm8650-upstream-icc-apss-noc-v1-1-9e6bea3943d8@linaro.org Signed-off-by: Georgi Djakov --- include/dt-bindings/interconnect/qcom,sm8650-rpmh.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,sm8650-rpmh.h b/include/dt-bindings/interconnect/qcom,sm8650-rpmh.h index 6c1eaf04e241..1216aa352d55 100644 --- a/include/dt-bindings/interconnect/qcom,sm8650-rpmh.h +++ b/include/dt-bindings/interconnect/qcom,sm8650-rpmh.h @@ -150,5 +150,6 @@ #define MASTER_A1NOC_SNOC 0 #define MASTER_A2NOC_SNOC 1 #define SLAVE_SNOC_GEM_NOC_SF 2 +#define MASTER_APSS_NOC 3 #endif -- cgit v1.2.3 From 1313057c369bb4da11bb1f8dffb570ac7b44a4d4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 31 Mar 2025 21:11:11 +0100 Subject: highmem: Add memcpy_folio() The folio equivalent of memcpy_page(). It should correctly and efficiently manage large folios: - If one, neither or both is highmem - If (either or both) offset+len crosses a page boundary - If the two offsets are congruent or not Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/highmem.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 5c6bea81a90e..fd72f66b872a 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -404,6 +404,33 @@ static inline void memcpy_page(struct page *dst_page, size_t dst_off, kunmap_local(dst); } +static inline void memcpy_folio(struct folio *dst_folio, size_t dst_off, + struct folio *src_folio, size_t src_off, size_t len) +{ + VM_BUG_ON(dst_off + len > folio_size(dst_folio)); + VM_BUG_ON(src_off + len > folio_size(src_folio)); + + do { + char *dst = kmap_local_folio(dst_folio, dst_off); + const char *src = kmap_local_folio(src_folio, src_off); + size_t chunk = len; + + if (folio_test_highmem(dst_folio) && + chunk > PAGE_SIZE - offset_in_page(dst_off)) + chunk = PAGE_SIZE - offset_in_page(dst_off); + if (folio_test_highmem(src_folio) && + chunk > PAGE_SIZE - offset_in_page(src_off)) + chunk = PAGE_SIZE - offset_in_page(src_off); + memcpy(dst, src, chunk); + kunmap_local(src); + kunmap_local(dst); + + dst_off += chunk; + src_off += chunk; + len -= chunk; + } while (len > 0); +} + static inline void memset_page(struct page *page, size_t offset, int val, size_t len) { -- cgit v1.2.3 From a510c186abfc870e5c74cf953b646c8a2c07bee1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Apr 2025 10:20:20 -0700 Subject: compiler_types: Identify compiler versions for __builtin_dynamic_object_size Clarify when __builtin_dynamic_object_size() is available. All our supported Clang versions support it. GCC 12 and later support it. Link to documentation for both. Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250416172016.work.154-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/compiler_types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 501cffddc2f4..20881cc761fa 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -449,6 +449,11 @@ struct ftrace_likely_data { /* * When the size of an allocated object is needed, use the best available * mechanism to find it. (For cases where sizeof() cannot be used.) + * + * Optional: only supported since gcc >= 12 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Object-Size-Checking.html + * clang: https://clang.llvm.org/docs/LanguageExtensions.html#evaluating-object-size */ #if __has_builtin(__builtin_dynamic_object_size) #define __struct_size(p) __builtin_dynamic_object_size(p, 0) -- cgit v1.2.3 From 9a93048476e7cbdde00cdeebe66b6504995eac92 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Apr 2025 10:29:15 -0700 Subject: overflow: Clarify expectations for getting DEFINE_FLEX variable sizes Mention the use of __member_size() for DEFINE_FLEX variables as a hint for getting at the compile-time size of the resulting flexible array member. Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20250416172911.work.854-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/overflow.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0c7e3dcfe867..6ee67c20b575 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -419,6 +419,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Define a zeroed, on-stack, instance of @type structure with a trailing * flexible array member. * Use __struct_size(@name) to get compile-time size of it afterwards. + * Use __member_size(@name->member) to get compile-time size of @name members. */ #define DEFINE_RAW_FLEX(type, name, member, count) \ _DEFINE_FLEX(type, name, member, count, = {}) @@ -436,6 +437,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Define a zeroed, on-stack, instance of @TYPE structure with a trailing * flexible array member. * Use __struct_size(@NAME) to get compile-time size of it afterwards. + * Use __member_size(@NAME->member) to get compile-time size of @NAME members. */ #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) -- cgit v1.2.3 From 655862865c97ff55e4f3f2aaa7708f42f0ea3bd8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 15 Apr 2025 16:14:24 -0700 Subject: mod_devicetable: Enlarge the maximum platform_device_id name length The 20 byte length of struct platform_device_id::name is not long enough for many devices (especially regulators), where the string initialization is getting truncated and missing the trailing NUL byte. This is seen with GCC 15's -Wunterminated-string-initialization option: drivers/regulator/hi6421v530-regulator.c:189:19: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization] 189 | { .name = "hi6421v530-regulator" }, | ^~~~~~~~~~~~~~~~~~~~~~ drivers/regulator/hi6421v600-regulator.c:278:19: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization] 278 | { .name = "hi6421v600-regulator" }, | ^~~~~~~~~~~~~~~~~~~~~~ drivers/regulator/lp87565-regulator.c:233:11: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization] 233 | { "lp87565-q1-regulator", }, | ^~~~~~~~~~~~~~~~~~~~~~ sound/soc/fsl/imx-pcm-rpmsg.c:818:19: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization] 818 | { .name = "rpmsg-micfil-channel" }, | ^~~~~~~~~~~~~~~~~~~~~~ drivers/iio/light/hid-sensor-als.c:457:25: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization] 457 | .name = "HID-SENSOR-LISS-0041", | ^~~~~~~~~~~~~~~~~~~~~~ drivers/iio/light/hid-sensor-prox.c:366:25: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (21 chars into 20 available) [-Wunterminated-string-initialization] 366 | .name = "HID-SENSOR-LISS-0226", | ^~~~~~~~~~~~~~~~~~~~~~ Increase the length to 24, slightly more than is currently being used by the affected drivers. The string is used in '%s' format strings and via the module code, which appears to do its own length encoding. This size was chosen because there was already a 4 byte hole in the structure: struct platform_device_id { char name[20]; /* 0 20 */ /* XXX 4 bytes hole, try to pack */ kernel_ulong_t driver_data; /* 24 8 */ /* size: 32, cachelines: 1, members: 2 */ /* sum members: 28, holes: 1, sum holes: 4 */ /* last cacheline: 32 bytes */ }; Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250415231420.work.066-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index bd7e60c0b72f..ebcee9328168 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -601,7 +601,7 @@ struct dmi_system_id { #define DMI_MATCH(a, b) { .slot = a, .substr = b } #define DMI_EXACT_MATCH(a, b) { .slot = a, .substr = b, .exact_match = 1 } -#define PLATFORM_NAME_SIZE 20 +#define PLATFORM_NAME_SIZE 24 #define PLATFORM_MODULE_PREFIX "platform:" struct platform_device_id { -- cgit v1.2.3 From 34dd0fecaa02d654c447d43a7e4c72f9b18b7033 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 25 Apr 2025 16:55:31 +0200 Subject: net: sched: generalize check for no-queue qdisc on TX queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "noqueue" qdisc can either be directly attached, or get default attached if net_device priv_flags has IFF_NO_QUEUE. In both cases, the allocated Qdisc structure gets it's enqueue function pointer reset to NULL by noqueue_init() via noqueue_qdisc_ops. This is a common case for software virtual net_devices. For these devices with no-queue, the transmission path in __dev_queue_xmit() will bypass the qdisc layer. Directly invoking device drivers ndo_start_xmit (via dev_hard_start_xmit). In this mode the device driver is not allowed to ask for packets to be queued (either via returning NETDEV_TX_BUSY or stopping the TXQ). The simplest and most reliable way to identify this no-queue case is by checking if enqueue == NULL. The vrf driver currently open-codes this check (!qdisc->enqueue). While functionally correct, this low-level detail is better encapsulated in a dedicated helper for clarity and long-term maintainability. To make this behavior more explicit and reusable, this patch introduce a new helper: qdisc_txq_has_no_queue(). Helper will also be used by the veth driver in the next patch, which introduces optional qdisc-based backpressure. This is a non-functional change. Reviewed-by: David Ahern Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/174559293172.827981.7583862632045264175.stgit@firesoul Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d48c657191cd..b6c177f7141c 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -803,6 +803,14 @@ static inline bool qdisc_tx_changing(const struct net_device *dev) return false; } +/* "noqueue" qdisc identified by not having any enqueue, see noqueue_init() */ +static inline bool qdisc_txq_has_no_queue(const struct netdev_queue *txq) +{ + struct Qdisc *qdisc = rcu_access_pointer(txq->qdisc); + + return qdisc->enqueue == NULL; +} + /* Is the device using the noop qdisc on all queues? */ static inline bool qdisc_tx_is_noop(const struct net_device *dev) { -- cgit v1.2.3 From 0014af802193aa3547484b5db0f1a258bad28c81 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Apr 2025 15:55:53 +0200 Subject: netfilter: nf_tables: export set count and backend name to userspace nf_tables picks a suitable set backend implementation (bitmap, hash, rbtree..) based on the userspace requirements. Figuring out the chosen backend requires information about the set flags and the kernel version. Export this to userspace so nft can include this information in '--debug=netlink' output. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 49c944e78463..7d6bc19a0153 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -394,6 +394,8 @@ enum nft_set_field_attributes { * @NFTA_SET_HANDLE: set handle (NLA_U64) * @NFTA_SET_EXPR: set expression (NLA_NESTED: nft_expr_attributes) * @NFTA_SET_EXPRESSIONS: list of expressions (NLA_NESTED: nft_list_attributes) + * @NFTA_SET_TYPE: set backend type (NLA_STRING) + * @NFTA_SET_COUNT: number of set elements (NLA_U32) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -415,6 +417,8 @@ enum nft_set_attributes { NFTA_SET_HANDLE, NFTA_SET_EXPR, NFTA_SET_EXPRESSIONS, + NFTA_SET_TYPE, + NFTA_SET_COUNT, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) -- cgit v1.2.3 From c1f7375a246e5f810191a6c3031d2fa2b80e9f5e Mon Sep 17 00:00:00 2001 From: Jelle van der Waa Date: Mon, 14 Apr 2025 15:18:40 +0200 Subject: power: supply: support charge_types in extensions Similar to charge_behaviour, charge_types is an enum option where reading the property shows the supported values, with the active value surrounded by brackets. To be able to use it with a power_supply extension a bitmask with the supported charge_Types values has to be added to power_supply_ext. Signed-off-by: Jelle van der Waa Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250414131840.382756-2-jvanderwaa@redhat.com Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 888824592953..c4cb854971f5 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -288,6 +288,7 @@ struct power_supply_desc { struct power_supply_ext { const char *const name; u8 charge_behaviours; + u32 charge_types; const enum power_supply_property *properties; size_t num_properties; -- cgit v1.2.3 From 9cf2317b795d6cde0fccb8744b5a080a9586020e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 23 Apr 2025 22:26:32 -0500 Subject: scsi: target: Move I/O path stats to per CPU The atomic use in the main I/O path is causing perf issues when using higher performance backend devices and multiple queues. This moves the stats to per CPU. Combined with the next patch that moves the non_ordered/delayed_cmd_count to per CPU, IOPS by up to 33% for 8K IOS when using 4 or more queues. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20250424032741.16216-2-michael.christie@oracle.com Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/target/target_core_base.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 97099a5e3f6c..05e3673607b8 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -669,15 +669,19 @@ struct se_lun_acl { struct se_ml_stat_grps ml_stat_grps; }; +struct se_dev_entry_io_stats { + u32 total_cmds; + u32 read_bytes; + u32 write_bytes; +}; + struct se_dev_entry { u64 mapped_lun; u64 pr_res_key; u64 creation_time; bool lun_access_ro; u32 attach_count; - atomic_long_t total_cmds; - atomic_long_t read_bytes; - atomic_long_t write_bytes; + struct se_dev_entry_io_stats __percpu *stats; /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */ struct kref pr_kref; struct completion pr_comp; @@ -800,6 +804,12 @@ struct se_device_queue { struct se_cmd_queue sq; }; +struct se_dev_io_stats { + u32 total_cmds; + u32 read_bytes; + u32 write_bytes; +}; + struct se_device { /* Used for SAM Task Attribute ordering */ u32 dev_cur_ordered_id; @@ -821,9 +831,7 @@ struct se_device { atomic_long_t num_resets; atomic_long_t aborts_complete; atomic_long_t aborts_no_task; - atomic_long_t num_cmds; - atomic_long_t read_bytes; - atomic_long_t write_bytes; + struct se_dev_io_stats __percpu *stats; /* Active commands on this virtual SE device */ atomic_t non_ordered; bool ordered_sync_in_progress; -- cgit v1.2.3 From 268975a87c7b6f6b0ceb62df236c1e1b08b89379 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 23 Apr 2025 22:26:33 -0500 Subject: scsi: target: Move delayed/ordered tracking to per CPU The atomic use from the delayed/ordered tracking is causing perf issues when using higher perf backend devices and multiple queues. This moves the values to a per CPU counter. Combined with the per CPU stats patch, this improves IOPS by up to 33% for 8K IOS when using 4 or more queues from the initiator. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20250424032741.16216-3-michael.christie@oracle.com Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/target/target_core_base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 05e3673607b8..a52d4967c0d3 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -157,6 +157,7 @@ enum se_cmd_flags_table { SCF_USE_CPUID = (1 << 16), SCF_TASK_ATTR_SET = (1 << 17), SCF_TREAT_READ_AS_NORMAL = (1 << 18), + SCF_TASK_ORDERED_SYNC = (1 << 19), }; /* @@ -833,9 +834,8 @@ struct se_device { atomic_long_t aborts_no_task; struct se_dev_io_stats __percpu *stats; /* Active commands on this virtual SE device */ - atomic_t non_ordered; + struct percpu_ref non_ordered; bool ordered_sync_in_progress; - atomic_t delayed_cmd_count; atomic_t dev_qf_count; u32 export_count; spinlock_t delayed_cmd_lock; -- cgit v1.2.3 From e43b8bb56e537bfc8d9076793091e7679020fc9c Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Thu, 20 Mar 2025 10:29:24 -0700 Subject: entry: Inline syscall_exit_to_user_mode() Similar to commit 221a164035fd ("entry: Move syscall_enter_from_user_mode() to header file"), move syscall_exit_to_user_mode() to the header file as well. Testing was done with the byte-unixbench syscall benchmark (which calls getpid) and QEMU. On riscv I measured a 7.09246% improvement, on x86 a 2.98843% improvement, on loongarch a 6.07954% improvement, and on s390 a 11.1328% improvement. The Intel bot also reported "kernel test robot noticed a 1.9% improvement of stress-ng.seek.ops_per_sec". Signed-off-by: Charlie Jenkins Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/all/20250320-riscv_optimize_entry-v6-4-63e187e26041@rivosinc.com Link: https://lore.kernel.org/linux-riscv/202502051555.85ae6844-lkp@intel.com/ --- include/linux/entry-common.h | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index fc61d0205c97..f94f3fdf15fc 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -14,6 +14,7 @@ #include #include +#include /* * Define dummy _TIF work flags if not defined by the architecture or for @@ -366,6 +367,15 @@ static __always_inline void exit_to_user_mode(void) lockdep_hardirqs_on(CALLER_ADDR0); } +/** + * syscall_exit_work - Handle work before returning to user mode + * @regs: Pointer to current pt_regs + * @work: Current thread syscall work + * + * Do one-time syscall specific work. + */ +void syscall_exit_work(struct pt_regs *regs, unsigned long work); + /** * syscall_exit_to_user_mode_work - Handle work before returning to user mode * @regs: Pointer to currents pt_regs @@ -379,7 +389,30 @@ static __always_inline void exit_to_user_mode(void) * make the final state transitions. Interrupts must stay disabled between * return from this function and the invocation of exit_to_user_mode(). */ -void syscall_exit_to_user_mode_work(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) +{ + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + unsigned long nr = syscall_get_nr(current, regs); + + CT_WARN_ON(ct_state() != CT_STATE_KERNEL); + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) + local_irq_enable(); + } + + rseq_syscall(regs); + + /* + * Do one-time syscall specific work. If these work items are + * enabled, we want to run them exactly once per syscall exit with + * interrupts enabled. + */ + if (unlikely(work & SYSCALL_WORK_EXIT)) + syscall_exit_work(regs, work); + local_irq_disable_exit_to_user(); + exit_to_user_mode_prepare(regs); +} /** * syscall_exit_to_user_mode - Handle work before returning to user mode @@ -410,7 +443,13 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs); * exit_to_user_mode(). This function is preferred unless there is a * compelling architectural reason to use the separate functions. */ -void syscall_exit_to_user_mode(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) +{ + instrumentation_begin(); + syscall_exit_to_user_mode_work(regs); + instrumentation_end(); + exit_to_user_mode(); +} /** * irqentry_enter_from_user_mode - Establish state before invoking the irq handler -- cgit v1.2.3 From d54e34c58aa224bdd0b9ea0f429c5a90d91db2c7 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:12 +0200 Subject: mtd: spinand: Use more specific naming for the write enable/disable op SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really means by describing the expected bus topology in the write enable/disable macro names. Reviewed-by: Tudor Ambarus [Miquel: Fixed conflicts with -next by updating esmt and micron drivers] Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index d1b9b630bd83..87d8f9f2017e 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -26,7 +26,7 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_WR_EN_DIS_OP(enable) \ +#define SPINAND_WR_EN_DIS_1S_0_0_OP(enable) \ SPI_MEM_OP(SPI_MEM_OP_CMD((enable) ? 0x06 : 0x04, 1), \ SPI_MEM_OP_NO_ADDR, \ SPI_MEM_OP_NO_DUMMY, \ -- cgit v1.2.3 From 2a294fa215289aff4b7b0c0a70f24dcd621df497 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:13 +0200 Subject: mtd: spinand: Use more specific naming for the read ID op SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really means by describing the expected bus topology in the read ID macro name. Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 87d8f9f2017e..dfb5c74cad6d 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -32,7 +32,7 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_READID_OP(naddr, ndummy, buf, len) \ +#define SPINAND_READID_1S_1S_1S_OP(naddr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x9f, 1), \ SPI_MEM_OP_ADDR(naddr, 0, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ -- cgit v1.2.3 From 429330cd1cfe860133d1fbdd49e9e081524333cf Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:14 +0200 Subject: mtd: spinand: Use more specific naming for the get/set feature ops SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really mean by describing the expected bus topology in the get/set feature macro names. Reviewed-by: Tudor Ambarus [Miquel: Fixed conflicts with -next by updating macronix driver] Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index dfb5c74cad6d..8ea40d838dc2 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -38,13 +38,13 @@ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1)) -#define SPINAND_SET_FEATURE_OP(reg, valptr) \ +#define SPINAND_SET_FEATURE_1S_1S_1S_OP(reg, valptr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x1f, 1), \ SPI_MEM_OP_ADDR(1, reg, 1), \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_DATA_OUT(1, valptr, 1)) -#define SPINAND_GET_FEATURE_OP(reg, valptr) \ +#define SPINAND_GET_FEATURE_1S_1S_1S_OP(reg, valptr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0f, 1), \ SPI_MEM_OP_ADDR(1, reg, 1), \ SPI_MEM_OP_NO_DUMMY, \ -- cgit v1.2.3 From 7e8533b273ee9e7243cc57afec835c20135ebbb2 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:15 +0200 Subject: mtd: spinand: Use more specific naming for the erase op SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really means by describing the expected bus topology in the erase macro name. Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 8ea40d838dc2..964f1244d6a6 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -50,7 +50,7 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_DATA_IN(1, valptr, 1)) -#define SPINAND_BLK_ERASE_OP(addr) \ +#define SPINAND_BLK_ERASE_1S_1S_0_OP(addr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xd8, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_NO_DUMMY, \ -- cgit v1.2.3 From 7528c97c0c2ac4c6c09b5aae52382958a57122fe Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:16 +0200 Subject: mtd: spinand: Use more specific naming for the page read op SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really means by describing the expected bus topology in the page read macro name. Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 964f1244d6a6..df07bc55aa4d 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -56,7 +56,7 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_PAGE_READ_OP(addr) \ +#define SPINAND_PAGE_READ_1S_1S_0_OP(addr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x13, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_NO_DUMMY, \ -- cgit v1.2.3 From ea2087d4e66d0b927918cc9048576aca6a0446ad Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:17 +0200 Subject: mtd: spinand: Use more specific naming for the (single) read from cache ops SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really mean by describing the expected bus topology in the (single) read from cache macro names. Acked-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index df07bc55aa4d..c702e1ed8fe8 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -62,32 +62,32 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_PAGE_READ_FROM_CACHE_OP(addr, ndummy, buf, len, ...) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1), \ SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) -#define SPINAND_PAGE_READ_FROM_CACHE_FAST_OP(addr, ndummy, buf, len) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ +#define SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1)) -#define SPINAND_PAGE_READ_FROM_CACHE_OP_3A(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1)) -#define SPINAND_PAGE_READ_FROM_CACHE_FAST_OP_3A(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_FAST_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1)) -#define SPINAND_PAGE_READ_FROM_CACHE_DTR_OP(addr, ndummy, buf, len, freq) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_1D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0d, 1), \ SPI_MEM_DTR_OP_ADDR(2, addr, 1), \ SPI_MEM_DTR_OP_DUMMY(ndummy, 1), \ -- cgit v1.2.3 From 684f7105e8534f6500de389c089ba204cb1c8058 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:18 +0200 Subject: mtd: spinand: Use more specific naming for the (dual output) read from cache ops SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really mean by describing the expected bus topology in the (dual output) read from cache macro names. Acked-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index c702e1ed8fe8..f84991d05d85 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -94,19 +94,19 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 1), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 2)) -#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP_3A(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_2S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 2)) -#define SPINAND_PAGE_READ_FROM_CACHE_X2_DTR_OP(addr, ndummy, buf, len, freq) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_2D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3d, 1), \ SPI_MEM_DTR_OP_ADDR(2, addr, 1), \ SPI_MEM_DTR_OP_DUMMY(ndummy, 1), \ -- cgit v1.2.3 From d9de177996d74c0cc44220003953ba2d2bece0ac Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:19 +0200 Subject: mtd: spinand: Use more specific naming for the (dual IO) read from cache ops SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really mean by describing the expected bus topology in the (dual IO) read from cache macro names. While at modifying them, better reordering the macros to group them all by bus topology which now feels more intuitive. Acked-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index f84991d05d85..a8a963f57d43 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -113,6 +113,25 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ SPI_MEM_OP_MAX_FREQ(freq)) +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ + SPI_MEM_OP_ADDR(2, addr, 2), \ + SPI_MEM_OP_DUMMY(ndummy, 2), \ + SPI_MEM_OP_DATA_IN(len, buf, 2)) + +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ + SPI_MEM_OP_ADDR(3, addr, 2), \ + SPI_MEM_OP_DUMMY(ndummy, 2), \ + SPI_MEM_OP_DATA_IN(len, buf, 2)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2D_2D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xbd, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 2), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 2), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(freq)) + #define SPINAND_PAGE_READ_FROM_CACHE_X4_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ @@ -132,25 +151,6 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(addr, ndummy, buf, len) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ - SPI_MEM_OP_ADDR(2, addr, 2), \ - SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) - -#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP_3A(addr, ndummy, buf, len) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ - SPI_MEM_OP_ADDR(3, addr, 2), \ - SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) - -#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_DTR_OP(addr, ndummy, buf, len, freq) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(0xbd, 1), \ - SPI_MEM_DTR_OP_ADDR(2, addr, 2), \ - SPI_MEM_DTR_OP_DUMMY(ndummy, 2), \ - SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ - SPI_MEM_OP_MAX_FREQ(freq)) - #define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(2, addr, 4), \ -- cgit v1.2.3 From 1deae734cc1c7e976d588e7d8f46af2bb9ef5656 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:20 +0200 Subject: mtd: spinand: Use more specific naming for the (quad output) read from cache ops SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really mean by describing the expected bus topology in the (quad output) read from cache macro names. Acked-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index a8a963f57d43..cbeec0f53f88 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -132,19 +132,19 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 4)) -#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP_3A(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 4)) -#define SPINAND_PAGE_READ_FROM_CACHE_X4_DTR_OP(addr, ndummy, buf, len, freq) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_4D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6d, 1), \ SPI_MEM_DTR_OP_ADDR(2, addr, 1), \ SPI_MEM_DTR_OP_DUMMY(ndummy, 1), \ -- cgit v1.2.3 From 9c6911072c6e8b128ccdb7dd00efa13c47513074 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:21 +0200 Subject: mtd: spinand: Use more specific naming for the (quad IO) read from cache ops SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really mean by describing the expected bus topology in the (quad IO) read from cache macro names. Acked-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index cbeec0f53f88..70f3e69e56b8 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -151,19 +151,19 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(2, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ SPI_MEM_OP_DATA_IN(len, buf, 4)) -#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP_3A(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(3, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ SPI_MEM_OP_DATA_IN(len, buf, 4)) -#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_DTR_OP(addr, ndummy, buf, len, freq) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4D_4D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xed, 1), \ SPI_MEM_DTR_OP_ADDR(2, addr, 4), \ SPI_MEM_DTR_OP_DUMMY(ndummy, 4), \ -- cgit v1.2.3 From 36e461894cf31e33ebd869f3fd85c5d7c68a22bc Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:22 +0200 Subject: mtd: spinand: Use more specific naming for the program execution op SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really means by describing the expected bus topology in the program execution macro name. Acked-by: Tudor Ambarus [Miquel: Fixed conflicts with -next by updating esmt and micron drivers] Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 70f3e69e56b8..d84206b9e824 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -170,7 +170,7 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PROG_EXEC_OP(addr) \ +#define SPINAND_PROG_EXEC_1S_1S_0_OP(addr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x10, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_NO_DUMMY, \ -- cgit v1.2.3 From 07cdbae7f84190983f9b07133ce5b6f2634c95cd Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:23 +0200 Subject: mtd: spinand: Use more specific naming for the (single) program load op SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really means by describing the expected bus topology in the (single) program load macro name. While at modifying it, better add the missing_ OP suffix to align with all the other macros of the same kind. Acked-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index d84206b9e824..d1cc2cdbd4a1 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -176,7 +176,7 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_PROG_LOAD(reset, addr, buf, len) \ +#define SPINAND_PROG_LOAD_1S_1S_1S_OP(reset, addr, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x02 : 0x84, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_NO_DUMMY, \ -- cgit v1.2.3 From ac3a4b17e03b079c00eb61456364bcdbf65f3436 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:24 +0200 Subject: mtd: spinand: Use more specific naming for the (quad) program load op SPI operations have been initially described through macros implicitly implying the use of a single SPI SDR bus. Macros for supporting dual and quad I/O transfers have been added on top, generally inspired by vendor naming, followed by DTR operations. Soon we might see octal and even octal DTR operations as well (including the opcode byte). Let's clarify what the macro really means by describing the expected bus topology in the (quad) program load macro name. While at modifying it, better add the missing_ OP suffix to align with all the other macros of the same kind. Acked-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index d1cc2cdbd4a1..c70d17e0b308 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -182,7 +182,7 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_DATA_OUT(len, buf, 1)) -#define SPINAND_PROG_LOAD_X4(reset, addr, buf, len) \ +#define SPINAND_PROG_LOAD_1S_1S_4S_OP(reset, addr, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x32 : 0x34, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_NO_DUMMY, \ -- cgit v1.2.3 From 51b252cce172cbfb21dfd5e544dcbefc649f3daa Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 Apr 2025 11:19:25 +0200 Subject: mtd: spinand: Define octal operations SPI NAND chips may support octal "read from cache" and "program load" transfers. List the opcodes by defining the relevant macros describing these operations. However, due to the hardware available I had, 0x82 and 0xc2 are untested and given as reference, only 0xc4 could be (successfully) tested. Controllers supporting operations mixing SDR and DTR operations might even leverage octal DTR data I/O transfers. Acked-by: Tudor Ambarus Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index c70d17e0b308..811a0f356315 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -170,6 +170,27 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ SPI_MEM_OP_MAX_FREQ(freq)) +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_8S_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x8b, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 8), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_8S_8S_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xcb, 1), \ + SPI_MEM_OP_ADDR(2, addr, 8), \ + SPI_MEM_OP_DUMMY(ndummy, 8), \ + SPI_MEM_OP_DATA_IN(len, buf, 8), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_8D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x9d, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 1), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 1), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 8), \ + SPI_MEM_OP_MAX_FREQ(freq)) + #define SPINAND_PROG_EXEC_1S_1S_0_OP(addr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x10, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ @@ -188,6 +209,18 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_DATA_OUT(len, buf, 4)) +#define SPINAND_PROG_LOAD_1S_1S_8S_OP(addr, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x82, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_OUT(len, buf, 8)) + +#define SPINAND_PROG_LOAD_1S_8S_8S_OP(reset, addr, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0xc2 : 0xc4, 1), \ + SPI_MEM_OP_ADDR(2, addr, 8), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_OUT(len, buf, 8)) + /** * Standard SPI NAND flash commands */ -- cgit v1.2.3 From ee000969f28bf579d3772bf7c0ae8aff86586e20 Mon Sep 17 00:00:00 2001 From: Md Sadre Alam Date: Thu, 10 Apr 2025 15:30:17 +0530 Subject: mtd: rawnand: qcom: Pass 18 bit offset from NANDc base to BAM base The BAM command descriptor provides only 18 bits to specify the BAM register offset. Additionally, in the BAM command descriptor, the BAM register offset is supposed to be specified as "(NANDc base - BAM base) + reg_off". Since, the BAM controller expecting the value in the form of "NANDc base - BAM base", so that added a new field 'bam_offset' in the NAND properties structure and use it while preparing the command descriptor. Previously, the driver was specifying the NANDc base address in the BAM command descriptor. Cc: stable@vger.kernel.org Fixes: 8d6b6d7e135e ("mtd: nand: qcom: support for command descriptor formation") Tested-by: Lakshmi Sowjanya D Signed-off-by: Md Sadre Alam Acked-by: Mark Brown Tested-by: Gabor Juhos # on IPQ9574 Reviewed-by: Manivannan Sadhasivam Signed-off-by: Miquel Raynal --- include/linux/mtd/nand-qpic-common.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h index cd7172e6c1bb..e8462deda6db 100644 --- a/include/linux/mtd/nand-qpic-common.h +++ b/include/linux/mtd/nand-qpic-common.h @@ -199,9 +199,6 @@ */ #define dev_cmd_reg_addr(nandc, reg) ((nandc)->props->dev_cmd_reg_start + (reg)) -/* Returns the NAND register physical address */ -#define nandc_reg_phys(chip, offset) ((chip)->base_phys + (offset)) - /* Returns the dma address for reg read buffer */ #define reg_buf_dma_addr(chip, vaddr) \ ((chip)->reg_read_dma + \ @@ -454,6 +451,7 @@ struct qcom_nand_controller { struct qcom_nandc_props { u32 ecc_modes; u32 dev_cmd_reg_start; + u32 bam_offset; bool supports_bam; bool nandc_part_of_qpic; bool qpic_version2; -- cgit v1.2.3 From 73db799bf5efc5a04654bb3ff6c9bf63a0dfa473 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20Cs=C3=B3k=C3=A1s?= Date: Thu, 27 Mar 2025 20:59:26 +0100 Subject: PM: runtime: Add new devm functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `devm_pm_runtime_set_active_enabled()` and `devm_pm_runtime_get_noresume()` for simplifying common cases in drivers. Signed-off-by: Bence Csókás Link: https://patch.msgid.link/20250327195928.680771-3-csokas.bence@prolan.hu Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 7fb5a459847e..756b842dcd30 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -96,7 +96,9 @@ extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); extern void pm_runtime_release_supplier(struct device_link *link); +int devm_pm_runtime_set_active_enabled(struct device *dev); extern int devm_pm_runtime_enable(struct device *dev); +int devm_pm_runtime_get_noresume(struct device *dev); /** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. @@ -294,7 +296,9 @@ static inline bool pm_runtime_blocked(struct device *dev) { return true; } static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} +static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; } static inline int devm_pm_runtime_enable(struct device *dev) { return 0; } +static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; } static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} -- cgit v1.2.3 From 32607a332cfea5a4b2a185f3e3d605a9bf4f8df0 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 24 Apr 2025 10:35:18 -0400 Subject: ipv4: prefer multipath nexthop that matches source address With multipath routes, try to ensure that packets leave on the device that is associated with the source address. Avoid the following tcpdump example: veth0 Out IP 10.1.0.2.38640 > 10.2.0.3.8000: Flags [S] veth1 Out IP 10.1.0.2.38648 > 10.2.0.3.8000: Flags [S] Which can happen easily with the most straightforward setup: ip addr add 10.0.0.1/24 dev veth0 ip addr add 10.1.0.1/24 dev veth1 ip route add 10.2.0.3 nexthop via 10.0.0.2 dev veth0 \ nexthop via 10.1.0.2 dev veth1 This is apparently considered WAI, based on the comment in ip_route_output_key_hash_rcu: * 2. Moreover, we are allowed to send packets with saddr * of another iface. --ANK It may be ok for some uses of multipath, but not all. For instance, when using two ISPs, a router may drop packets with unknown source. The behavior occurs because tcp_v4_connect makes three route lookups when establishing a connection: 1. ip_route_connect calls to select a source address, with saddr zero. 2. ip_route_connect calls again now that saddr and daddr are known. 3. ip_route_newports calls again after a source port is also chosen. With a route with multiple nexthops, each lookup may make a different choice depending on available entropy to fib_select_multipath. So it is possible for 1 to select the saddr from the first entry, but 3 to select the second entry. Leading to the above situation. Address this by preferring a match that matches the flowi4 saddr. This will make 2 and 3 make the same choice as 1. Continue to update the backup choice until a choice that matches saddr is found. Do this in fib_select_multipath itself, rather than passing an fl4_oif constraint, to avoid changing non-multipath route selection. Commit e6b45241c57a ("ipv4: reset flowi parameters on route connect") shows how that may cause regressions. Also read ipv4.sysctl_fib_multipath_use_neigh only once. No need to refresh in the loop. This does not happen in IPv6, which performs only one lookup. Signed-off-by: Willem de Bruijn Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250424143549.669426-2-willemdebruijn.kernel@gmail.com Signed-off-by: Paolo Abeni --- include/net/ip_fib.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e3864b74e92a..48bb3cf41469 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -574,7 +574,8 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net, int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); -void fib_select_multipath(struct fib_result *res, int hash); +void fib_select_multipath(struct fib_result *res, int hash, + const struct flowi4 *fl4); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); -- cgit v1.2.3 From 65e9024643c7512ade3aedbb341e11d77ed7abc2 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 24 Apr 2025 10:35:19 -0400 Subject: ip: load balance tcp connections to single dst addr and port Load balance new TCP connections across nexthops also when they connect to the same service at a single remote address and port. This affects only port-based multipath hashing: fib_multipath_hash_policy 1 or 3. Local connections must choose both a source address and port when connecting to a remote service, in ip_route_connect. This "chicken-and-egg problem" (commit 2d7192d6cbab ("ipv4: Sanitize and simplify ip_route_{connect,newports}()")) is resolved by first selecting a source address, by looking up a route using the zero wildcard source port and address. As a result multiple connections to the same destination address and port have no entropy in fib_multipath_hash. This is not a problem when forwarding, as skb-based hashing has a 4-tuple. Nor when establishing UDP connections, as autobind there selects a port before reaching ip_route_connect. Load balance also TCP, by using a random port in fib_multipath_hash. Port assignment in inet_hash_connect is not atomic with ip_route_connect. Thus ports are unpredictable, effectively random. Implementation details: Do not actually pass a random fl4_sport, as that affects not only hashing, but routing more broadly, and can match a source port based policy route, which existing wildcard port 0 will not. Instead, define a new wildcard flowi flag that is used only for hashing. Selecting a random source is equivalent to just selecting a random hash entirely. But for code clarity, follow the normal 4-tuple hash process and only update this field. fib_multipath_hash can be reached with zero sport from other code paths, so explicitly pass this flowi flag, rather than trying to infer this case in the function itself. Signed-off-by: Willem de Bruijn Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250424143549.669426-3-willemdebruijn.kernel@gmail.com Signed-off-by: Paolo Abeni --- include/net/flow.h | 1 + include/net/route.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 2a3f0c42f092..a1839c278d87 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -39,6 +39,7 @@ struct flowi_common { #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 #define FLOWI_FLAG_L3MDEV_OIF 0x04 +#define FLOWI_FLAG_ANY_SPORT 0x08 __u32 flowic_secid; kuid_t flowic_uid; __u32 flowic_multipath_hash; diff --git a/include/net/route.h b/include/net/route.h index c605fd5ec0c0..8e39aa822cf9 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -326,6 +326,9 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, if (inet_test_bit(TRANSPARENT, sk)) flow_flags |= FLOWI_FLAG_ANYSRC; + if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !sport) + flow_flags |= FLOWI_FLAG_ANY_SPORT; + flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), protocol, flow_flags, dst, src, dport, sport, sk->sk_uid); -- cgit v1.2.3 From 9f52aecc952ddf307571517d5c91136c8c4e87c9 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Wed, 18 Sep 2024 11:53:27 +0800 Subject: coresight: Fixes device's owner field for registered using coresight_init_driver() The coresight_init_driver() of the coresight-core module is called from the sub coresgiht device (such as tmc/stm/funnle/...) module. It calls amba_driver_register() and Platform_driver_register(), which are macro functions that use the coresight-core's module to initialize the caller's owner field. Therefore, when the sub coresight device calls coresight_init_driver(), an incorrect THIS_MODULE value is captured. The sub coesgiht modules can be removed while their callbacks are running, resulting in a general protection failure. Add module parameter to coresight_init_driver() so can be called with the module of the callback. Fixes: 075b7cd7ad7d ("coresight: Add helpers registering/removing both AMBA and platform drivers") Signed-off-by: Junhao He Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20240918035327.9710-1-hejunhao3@huawei.com --- include/linux/coresight.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index d79a242b271d..cfcf6e4707ed 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -723,7 +723,7 @@ coresight_find_output_type(struct coresight_platform_data *pdata, union coresight_dev_subtype subtype); int coresight_init_driver(const char *drv, struct amba_driver *amba_drv, - struct platform_driver *pdev_drv); + struct platform_driver *pdev_drv, struct module *owner); void coresight_remove_driver(struct amba_driver *amba_drv, struct platform_driver *pdev_drv); -- cgit v1.2.3 From 0091d9241ea24c5275be4a3e5a032862fd9de9ec Mon Sep 17 00:00:00 2001 From: Steven Chen Date: Mon, 21 Apr 2025 15:25:09 -0700 Subject: kexec: define functions to map and unmap segments Implement kimage_map_segment() to enable IMA to map the measurement log list to the kimage structure during the kexec 'load' stage. This function gathers the source pages within the specified address range, and maps them to a contiguous virtual address range. This is a preparation for later usage. Implement kimage_unmap_segment() for unmapping segments using vunmap(). Cc: Eric Biederman Cc: Baoquan He Cc: Vivek Goyal Cc: Dave Young Co-developed-by: Tushar Sugandhi Signed-off-by: Tushar Sugandhi Signed-off-by: Steven Chen Acked-by: Baoquan He Tested-by: Stefan Berger # ppc64/kvm Signed-off-by: Mimi Zohar --- include/linux/kexec.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c8971861521a..805743208d19 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -474,13 +474,19 @@ extern bool kexec_file_dbg_print; #define kexec_dprintk(fmt, arg...) \ do { if (kexec_file_dbg_print) pr_info(fmt, ##arg); } while (0) +extern void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size); +extern void kimage_unmap_segment(void *buffer); #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; +struct kimage; static inline void __crash_kexec(struct pt_regs *regs) { } static inline void crash_kexec(struct pt_regs *regs) { } static inline int kexec_should_crash(struct task_struct *p) { return 0; } static inline int kexec_crash_loaded(void) { return 0; } +static inline void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size) +{ return NULL; } +static inline void kimage_unmap_segment(void *buffer) { } #define kexec_in_progress false #endif /* CONFIG_KEXEC_CORE */ -- cgit v1.2.3 From 9ee8888a80fe2bd20ce929ffbc1dedd57607a778 Mon Sep 17 00:00:00 2001 From: Steven Chen Date: Mon, 21 Apr 2025 15:25:10 -0700 Subject: ima: kexec: skip IMA segment validation after kexec soft reboot Currently, the function kexec_calculate_store_digests() calculates and stores the digest of the segment during the kexec_file_load syscall, where the IMA segment is also allocated. Later, the IMA segment will be updated with the measurement log at the kexec execute stage when a kexec reboot is initiated. Therefore, the digests should be updated for the IMA segment in the normal case. The problem is that the content of memory segments carried over to the new kernel during the kexec systemcall can be changed at kexec 'execute' stage, but the size and the location of the memory segments cannot be changed at kexec 'execute' stage. To address this, skip the calculation and storage of the digest for the IMA segment in kexec_calculate_store_digests() so that it is not added to the purgatory_sha_regions. With this change, the IMA segment is not included in the digest calculation, storage, and verification. Cc: Eric Biederman Cc: Baoquan He Cc: Vivek Goyal Cc: Dave Young Co-developed-by: Tushar Sugandhi Signed-off-by: Tushar Sugandhi Signed-off-by: Steven Chen Reviewed-by: Stefan Berger Acked-by: Baoquan He Tested-by: Stefan Berger # ppc64/kvm [zohar@linux.ibm.com: Fixed Signed-off-by tag to match author's email ] Signed-off-by: Mimi Zohar --- include/linux/kexec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 805743208d19..53ef1b6c8712 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -369,6 +369,9 @@ struct kimage { phys_addr_t ima_buffer_addr; size_t ima_buffer_size; + + unsigned long ima_segment_index; + bool is_ima_segment_index_set; #endif /* Core ELF header buffer */ -- cgit v1.2.3 From f18e502db673c75f762d47101dafcf58f30e2733 Mon Sep 17 00:00:00 2001 From: Steven Chen Date: Mon, 21 Apr 2025 15:25:11 -0700 Subject: ima: kexec: define functions to copy IMA log at soft boot The IMA log is currently copied to the new kernel during kexec 'load' using ima_dump_measurement_list(). However, the log copied at kexec 'load' may result in loss of IMA measurements that only occurred after kexec "load'. Setup the needed infrastructure to move the IMA log copy from kexec 'load' to 'execute'. Define a new IMA hook ima_update_kexec_buffer() as a stub function. It will be used to call ima_dump_measurement_list() during kexec 'execute'. Implement ima_kexec_post_load() function to be invoked after the new Kernel image has been loaded for kexec. ima_kexec_post_load() maps the IMA buffer to a segment in the newly loaded Kernel. It also registers the reboot notifier_block to trigger ima_update_kexec_buffer() at kexec 'execute'. Set the priority of register_reboot_notifier to INT_MIN to ensure that the IMA log copy operation will happen at the end of the operation chain, so that all the IMA measurement records extended into the TPM are copied Co-developed-by: Tushar Sugandhi Signed-off-by: Tushar Sugandhi Cc: Eric Biederman Cc: Baoquan He Cc: Vivek Goyal Cc: Dave Young Signed-off-by: Steven Chen Reviewed-by: Stefan Berger Acked-by: Baoquan He Tested-by: Stefan Berger # ppc64/kvm Signed-off-by: Mimi Zohar --- include/linux/ima.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ima.h b/include/linux/ima.h index 0bae61a15b60..8e29cb4e6a01 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -32,6 +32,9 @@ static inline void ima_appraise_parse_cmdline(void) {} #ifdef CONFIG_IMA_KEXEC extern void ima_add_kexec_buffer(struct kimage *image); +extern void ima_kexec_post_load(struct kimage *image); +#else +static inline void ima_kexec_post_load(struct kimage *image) {} #endif #else -- cgit v1.2.3 From 74a2bd0bfb0675cb8f276f2fe3ede0c7f9e78fa3 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 12 Mar 2025 22:19:49 -0400 Subject: nodemask: drop nodes_shift nodes_shift_{left,right} are not used. Drop them. Signed-off-by: Yury Norov [NVIDIA] --- include/linux/nodemask.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index f0ac0633366b..0aa6b63aa747 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -39,9 +39,6 @@ * int nodes_full(mask) Is mask full (all bits sets)? * int nodes_weight(mask) Hamming weight - number of set bits * - * void nodes_shift_right(dst, src, n) Shift right - * void nodes_shift_left(dst, src, n) Shift left - * * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, @@ -247,22 +244,6 @@ static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int n return bitmap_weight(srcp->bits, nbits); } -#define nodes_shift_right(dst, src, n) \ - __nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES) -static __always_inline void __nodes_shift_right(nodemask_t *dstp, - const nodemask_t *srcp, int n, int nbits) -{ - bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); -} - -#define nodes_shift_left(dst, src, n) \ - __nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES) -static __always_inline void __nodes_shift_left(nodemask_t *dstp, - const nodemask_t *srcp, int n, int nbits) -{ - bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); -} - /* FIXME: better would be to fix all architectures to never return > MAX_NUMNODES, then the silly min_ts could be dropped. */ -- cgit v1.2.3 From 4923c2c5b66fe8dea1df5e16d61f15c1dbea5ba1 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 12 Mar 2025 22:19:50 -0400 Subject: cpumask: add non-atomic __assign_cpu() Similarly to atomic, add a non-atomic version. Signed-off-by: Yury Norov [NVIDIA] --- include/linux/cpumask.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f9a868384083..ba1e232e0200 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1074,6 +1074,9 @@ void init_cpu_possible(const struct cpumask *src); #define assign_cpu(cpu, mask, val) \ assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) +#define __assign_cpu(cpu, mask, val) \ + __assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) + #define set_cpu_possible(cpu, possible) assign_cpu((cpu), &__cpu_possible_mask, (possible)) #define set_cpu_enabled(cpu, enabled) assign_cpu((cpu), &__cpu_enabled_mask, (enabled)) #define set_cpu_present(cpu, present) assign_cpu((cpu), &__cpu_present_mask, (present)) -- cgit v1.2.3 From 791a9b25ce2e6ecbe404ee32eed8a96a17e52896 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 12 Mar 2025 22:19:52 -0400 Subject: cpumask: drop cpumask_assign_cpu() Commit decde1fa209323c7 ("cpumask: Add assign cpu") was merged bypassing cpumasks reviewers. It adds atomic and non-atomic cpumask_assign_cpu() helpers. In the same merge window, commit 5c563ee90a22d3 ("cpumask: introduce assign_cpu() macro") added the same functionality. So now we have it duplicated. __cpumask_assign_cpu() has never been used since introducing, and because this series reworks the only user of cpumask_assign_cpu(), both functions become a dead code. Signed-off-by: Yury Norov [NVIDIA] --- include/linux/cpumask.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ba1e232e0200..beff4d26e605 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -558,22 +558,6 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) __clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -/** - * cpumask_assign_cpu - assign a cpu in a cpumask - * @cpu: cpu number (< nr_cpu_ids) - * @dstp: the cpumask pointer - * @bool: the value to assign - */ -static __always_inline void cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value) -{ - assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value); -} - -static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value) -{ - __assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value); -} - /** * cpumask_test_cpu - test for a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) -- cgit v1.2.3 From 31299a5e0211241171b2222c5633aad4763bf700 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 26 Mar 2025 00:59:56 +0900 Subject: bits: add comments and newlines to #if, #else and #endif directives This is a preparation for the upcoming GENMASK_U*() and BIT_U*() changes. After introducing those new macros, there will be a lot of scrolling between the #if, #else and #endif. Add a comment to the #else and #endif preprocessor macros to help keep track of which context we are in. Also, add new lines to better visually separate the non-asm and asm sections. Signed-off-by: Vincent Mailhol Reviewed-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bits.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bits.h b/include/linux/bits.h index 14fd0ca9a6cd..e1e517769140 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -19,16 +19,21 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #if !defined(__ASSEMBLY__) + #include #include + #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) -#else + +#else /* defined(__ASSEMBLY__) */ + /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ #define GENMASK_INPUT_CHECK(h, l) 0 -#endif + +#endif /* !defined(__ASSEMBLY__) */ #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -- cgit v1.2.3 From 19408200c094858d952a90bf4977733dc89a4df5 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 26 Mar 2025 00:59:57 +0900 Subject: bits: introduce fixed-type GENMASK_U*() Add GENMASK_TYPE() which generalizes __GENMASK() to support different types, and implement fixed-types versions of GENMASK() based on it. The fixed-type version allows more strict checks to the min/max values accepted, which is useful for defining registers like implemented by i915 and xe drivers with their REG_GENMASK*() macros. The strict checks rely on shift-count-overflow compiler check to fail the build if a number outside of the range allowed is passed. Example: #define FOO_MASK GENMASK_U32(33, 4) will generate a warning like: include/linux/bits.h:51:27: error: right shift count >= width of type [-Werror=shift-count-overflow] 51 | type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) | ^~ The result is casted to the corresponding fixed width type. For example, GENMASK_U8() returns an u8. Note that because of the C promotion rules, GENMASK_U8() and GENMASK_U16() will immediately be promoted to int if used in an expression. Regardless, the main goal is not to get the correct type, but rather to enforce more checks at compile time. While GENMASK_TYPE() is crafted to cover all variants, including the already existing GENMASK(), GENMASK_ULL() and GENMASK_U128(), for the moment, only use it for the newly introduced GENMASK_U*(). The consolidation will be done in a separate change. Co-developed-by: Yury Norov Signed-off-by: Lucas De Marchi Acked-by: Jani Nikula Signed-off-by: Vincent Mailhol Reviewed-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bitops.h | 1 - include/linux/bits.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index c1cb53cf2f0f..9be2d50da09a 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -8,7 +8,6 @@ #include -#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) #define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) #define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) diff --git a/include/linux/bits.h b/include/linux/bits.h index e1e517769140..9718c5ae5fc3 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -12,6 +12,7 @@ #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) #define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) #define BITS_PER_BYTE 8 +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) /* * Create a contiguous bitmask starting at bit position @l and ending at @@ -20,11 +21,40 @@ */ #if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * GENMASK_U*() depend on BITS_PER_TYPE() which relies on sizeof(), + * something not available in asm. Nevertheless, fixed width integers is a C + * concept. Assembly code can rely on the long and long long versions instead. + */ + #include #include +#include #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) +/* + * Generate a mask for the specified type @t. Additional checks are made to + * guarantee the value returned fits in that type, relying on + * -Wshift-count-overflow compiler check to detect incompatible arguments. + * For example, all these create build errors or warnings: + * + * - GENMASK(15, 20): wrong argument order + * - GENMASK(72, 15): doesn't fit unsigned long + * - GENMASK_U32(33, 15): doesn't fit in a u32 + */ +#define GENMASK_TYPE(t, h, l) \ + ((t)(GENMASK_INPUT_CHECK(h, l) + \ + (type_max(t) << (l) & \ + type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) + +#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) +#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) +#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) +#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) + #else /* defined(__ASSEMBLY__) */ /* -- cgit v1.2.3 From 5b572e8a9f3dcd6e3ba80ec5b5ec85472c88cb4c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 26 Mar 2025 00:59:58 +0900 Subject: bits: introduce fixed-type BIT_U*() Implement fixed-type BIT_U*() to help drivers add stricter checks, like it was done for GENMASK_U*(). Signed-off-by: Lucas De Marchi Acked-by: Jani Nikula Co-developed-by: Vincent Mailhol Signed-off-by: Vincent Mailhol Reviewed-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bits.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bits.h b/include/linux/bits.h index 9718c5ae5fc3..7ad056219115 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -24,7 +24,7 @@ /* * Missing asm support * - * GENMASK_U*() depend on BITS_PER_TYPE() which relies on sizeof(), + * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(), * something not available in asm. Nevertheless, fixed width integers is a C * concept. Assembly code can rely on the long and long long versions instead. */ @@ -55,6 +55,24 @@ #define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) #define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) +/* + * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The + * following examples generate compiler warnings due to -Wshift-count-overflow: + * + * - BIT_U8(8) + * - BIT_U32(-1) + * - BIT_U32(40) + */ +#define BIT_INPUT_CHECK(type, nr) \ + BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type))) + +#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr))) + +#define BIT_U8(nr) BIT_TYPE(u8, nr) +#define BIT_U16(nr) BIT_TYPE(u16, nr) +#define BIT_U32(nr) BIT_TYPE(u32, nr) +#define BIT_U64(nr) BIT_TYPE(u64, nr) + #else /* defined(__ASSEMBLY__) */ /* -- cgit v1.2.3 From 243c90e917f5cfc99821e5104d1c8a81c11cda4c Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Sat, 29 Mar 2025 01:48:50 +0900 Subject: build_bug.h: more user friendly error messages in BUILD_BUG_ON_ZERO() __BUILD_BUG_ON_ZERO_MSG(), as introduced in [1], makes it possible to do a static assertions in expressions. The direct benefit is to provide a meaningful error message instead of the cryptic negative bitfield size error message currently returned by BUILD_BUG_ON_ZERO(): ./include/linux/build_bug.h:16:51: error: negative width in bit-field '' 16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) | ^ Get rid of BUILD_BUG_ON_ZERO()'s bitfield size hack. Instead rely on __BUILD_BUG_ON_ZERO_MSG() which in turn relies on C11's _Static_assert(). Use some macro magic, similarly to static_assert(), to either use an optional error message provided by the user or, when omitted, to produce a default error message by stringifying the tested expression. With this, for example: BUILD_BUG_ON_ZERO(1 > 0) would now throw: ./include/linux/compiler.h:197:62: error: static assertion failed: "1 > 0 is true" 197 | define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) | ^~~~~~~~~~~~~~ Finally, __BUILD_BUG_ON_ZERO_MSG() is already guarded by an: #ifdef __CHECKER__ So no need any more for that guard clause for BUILD_BUG_ON_ZERO(). Remove it. [1] commit d7a516c6eeae ("compiler.h: Fix undefined BUILD_BUG_ON_ZERO()") Link: https://git.kernel.org/torvalds/c/d7a516c6eeae Signed-off-by: Vincent Mailhol Link: https://git.kernel.org/next/linux-next/c/b88937277df Reviewed-by: Kees Cook Signed-off-by: Yury Norov --- include/linux/build_bug.h | 10 +++++----- include/linux/compiler.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index 3aa3640f8c18..2cfbb4c65c78 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -4,17 +4,17 @@ #include -#ifdef __CHECKER__ -#define BUILD_BUG_ON_ZERO(e) (0) -#else /* __CHECKER__ */ /* * Force a compilation error if condition is true, but also produce a * result (of value 0 and type int), so the expression can be used * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). + * + * Take an error message as an optional second argument. If omitted, + * default to the stringification of the tested expression. */ -#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) -#endif /* __CHECKER__ */ +#define BUILD_BUG_ON_ZERO(e, ...) \ + __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true") /* Force a compilation error if a constant expression is not a power of 2 */ #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 27725f1ab5ab..6f04a1d8c720 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -192,9 +192,9 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, }) #ifdef __CHECKER__ -#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0) +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) (0) #else /* __CHECKER__ */ -#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);})) #endif /* __CHECKER__ */ /* &a[0] degrades to a pointer: a different type from an array */ -- cgit v1.2.3 From 99c712d788c46452289beada638476b68b5a773b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 17 Apr 2025 19:17:15 +0300 Subject: bitmap-str: Get rid of 'extern' for function prototypes The bitmap-str.h uses mixed style for function prototypes. Drop the 'extern' as it easier to read and makes style aligned with a new code in the kernel. Signed-off-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bitmap-str.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bitmap-str.h b/include/linux/bitmap-str.h index 17caeca94cab..d758b4809a3a 100644 --- a/include/linux/bitmap-str.h +++ b/include/linux/bitmap-str.h @@ -4,10 +4,10 @@ int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); -extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); -extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); +int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, int nmaskbits, + loff_t off, size_t count); +int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, int nmaskbits, + loff_t off, size_t count); int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, -- cgit v1.2.3 From 89a44a808814d4717a8bf945ddebd5c39bfffcf4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 17 Apr 2025 19:17:16 +0300 Subject: bitmap-str: Add missing header(s) bitmap-str.h is not self-contained, it uses bool type that is provided in types.h and it uses __user annotation that is guaranteed to be included with types.h. Add missing header(s) to follow IWYU (Include What You Use) principle. Signed-off-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bitmap-str.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bitmap-str.h b/include/linux/bitmap-str.h index d758b4809a3a..53d3e1b32d3d 100644 --- a/include/linux/bitmap-str.h +++ b/include/linux/bitmap-str.h @@ -2,6 +2,8 @@ #ifndef __LINUX_BITMAP_STR_H #define __LINUX_BITMAP_STR_H +#include + int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, int nmaskbits, -- cgit v1.2.3 From a256ae22570ee4c3427fdc703a58a89afee6a332 Mon Sep 17 00:00:00 2001 From: Luo Jie Date: Thu, 17 Apr 2025 18:47:08 +0800 Subject: bitfield: Add FIELD_MODIFY() helper Add a helper for replacing the contents of bitfield in memory with the specified value. Even though a helper xxx_replace_bits() is available, it is not well documented, and only reports errors at the run time, which will not be helpful to catch possible overflow errors due to incorrect parameter types used. FIELD_MODIFY(REG_FIELD_C, ®, c) is the wrapper to the code below. reg &= ~REG_FIELD_C; reg |= FIELD_PREP(REG_FIELD_C, c); Yury: trim commit message, align backslashes. Signed-off-by: Luo Jie Signed-off-by: Yury Norov --- include/linux/bitfield.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 63928f173223..6d9a53db54b6 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -8,6 +8,7 @@ #define _LINUX_BITFIELD_H #include +#include #include /* @@ -38,8 +39,7 @@ * FIELD_PREP(REG_FIELD_D, 0x40); * * Modify: - * reg &= ~REG_FIELD_C; - * reg |= FIELD_PREP(REG_FIELD_C, c); + * FIELD_MODIFY(REG_FIELD_C, ®, c); */ #define __bf_shf(x) (__builtin_ffsll(x) - 1) @@ -156,6 +156,23 @@ (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ }) +/** + * FIELD_MODIFY() - modify a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_reg_p: pointer to the memory that should be updated + * @_val: value to store in the bitfield + * + * FIELD_MODIFY() modifies the set of bits in @_reg_p specified by @_mask, + * by replacing them with the bitfield value passed in as @_val. + */ +#define FIELD_MODIFY(_mask, _reg_p, _val) \ + ({ \ + typecheck_pointer(_reg_p); \ + __BF_FIELD_CHECK(_mask, *(_reg_p), _val, "FIELD_MODIFY: "); \ + *(_reg_p) &= ~(_mask); \ + *(_reg_p) |= (((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask)); \ + }) + extern void __compiletime_error("value doesn't fit into mask") __field_overflow(void); extern void __compiletime_error("bad bitfield mask") -- cgit v1.2.3 From 9fa6a693ad8dc0cd34833632883767083b8fc1f5 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Tue, 29 Apr 2025 19:10:54 +0800 Subject: ALSA: hda/tas2781: Remove tas2781_spi_fwlib.c and leverage SND_SOC_TAS2781_FMWLIB Most codes in tas2781_spi_fwlib.c are same as tas2781-fmwlib.c, mainly for firmware parsing, only differece is the register reading, bit update and book switching in i2c and spi. The main purpose of this patch is for code cleaup and arrange the shared part for i2c and spi. Signed-off-by: Shenghao Ding Acked-by: Mark Brown Link: https://patch.msgid.link/20250429111055.567-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- include/sound/tas2781-comlib-i2c.h | 37 +++++++++++++++++++++++++++++++ include/sound/tas2781.h | 45 ++++++++++++++++---------------------- 2 files changed, 56 insertions(+), 26 deletions(-) create mode 100644 include/sound/tas2781-comlib-i2c.h (limited to 'include') diff --git a/include/sound/tas2781-comlib-i2c.h b/include/sound/tas2781-comlib-i2c.h new file mode 100644 index 000000000000..a1afa5c444ba --- /dev/null +++ b/include/sound/tas2781-comlib-i2c.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// +// ALSA SoC Texas Instruments TAS2563/TAS2781 Audio Smart Amplifier +// +// Copyright (C) 2025 Texas Instruments Incorporated +// https://www.ti.com +// +// The TAS2563/TAS2781 driver implements a flexible and configurable +// algo coefficient setting for one, two, or even multiple +// TAS2563/TAS2781 chips. +// +// Author: Shenghao Ding +// + +#ifndef __TAS2781_COMLIB_I2C_H__ +#define __TAS2781_COMLIB_I2C_H__ + +void tasdevice_reset(struct tasdevice_priv *tas_dev); +int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, + struct module *module, + void (*cont)(const struct firmware *fw, void *context)); +struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c); +int tasdevice_init(struct tasdevice_priv *tas_priv); +int tasdev_chn_switch(struct tasdevice_priv *tas_priv, + unsigned short chn); +int tasdevice_dev_update_bits( + struct tasdevice_priv *tasdevice, unsigned short chn, + unsigned int reg, unsigned int mask, unsigned int value); +int tasdevice_amp_putvol(struct tasdevice_priv *tas_priv, + struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc); +int tasdevice_amp_getvol(struct tasdevice_priv *tas_priv, + struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc); +int tasdevice_digital_getvol(struct tasdevice_priv *tas_priv, + struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc); +int tasdevice_digital_putvol(struct tasdevice_priv *tas_priv, + struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc); +#endif /* __TAS2781_COMLIB_I2C_H__ */ diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index eff011444cc8..8192a94d783a 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -32,6 +32,8 @@ SNDRV_PCM_FMTBIT_S24_LE | \ SNDRV_PCM_FMTBIT_S32_LE) +#define TASDEVICE_CRC8_POLYNOMIAL 0x4d + /* PAGE Control Register (available in page0 of each book) */ #define TASDEVICE_PAGE_SELECT 0x00 #define TASDEVICE_BOOKCTL_PAGE 0x00 @@ -193,6 +195,7 @@ struct tasdevice_priv { bool force_fwload_status; bool playback_started; bool isacpi; + bool isspi; bool is_user_space_calidata; unsigned int global_addr; @@ -210,41 +213,31 @@ struct tasdevice_priv { int (*tasdevice_load_block)(struct tasdevice_priv *tas_priv, struct tasdev_blk *block); + int (*change_chn_book)(struct tasdevice_priv *tas_priv, + unsigned short chn, int book); + int (*update_bits)(struct tasdevice_priv *tas_priv, + unsigned short chn, unsigned int reg, unsigned int mask, + unsigned int value); + int (*dev_read)(struct tasdevice_priv *tas_priv, + unsigned short chn, unsigned int reg, unsigned int *value); + int (*dev_bulk_read)(struct tasdevice_priv *tas_priv, + unsigned short chn, unsigned int reg, unsigned char *p_data, + unsigned int n_length); int (*save_calibration)(struct tasdevice_priv *tas_priv); void (*apply_calibration)(struct tasdevice_priv *tas_priv); }; -void tasdevice_reset(struct tasdevice_priv *tas_dev); -int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, - struct module *module, - void (*cont)(const struct firmware *fw, void *context)); -struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c); -int tasdevice_init(struct tasdevice_priv *tas_priv); -void tasdevice_remove(struct tasdevice_priv *tas_priv); -int tasdevice_save_calibration(struct tasdevice_priv *tas_priv); -void tasdevice_apply_calibration(struct tasdevice_priv *tas_priv); -int tasdev_chn_switch(struct tasdevice_priv *tas_priv, - unsigned short chn); int tasdevice_dev_read(struct tasdevice_priv *tas_priv, unsigned short chn, unsigned int reg, unsigned int *value); +int tasdevice_dev_bulk_read(struct tasdevice_priv *tas_priv, + unsigned short chn, unsigned int reg, unsigned char *p_data, + unsigned int n_length); int tasdevice_dev_write(struct tasdevice_priv *tas_priv, unsigned short chn, unsigned int reg, unsigned int value); int tasdevice_dev_bulk_write( struct tasdevice_priv *tas_priv, unsigned short chn, unsigned int reg, unsigned char *p_data, unsigned int n_length); -int tasdevice_dev_bulk_read(struct tasdevice_priv *tas_priv, - unsigned short chn, unsigned int reg, unsigned char *p_data, - unsigned int n_length); -int tasdevice_dev_update_bits( - struct tasdevice_priv *tasdevice, unsigned short chn, - unsigned int reg, unsigned int mask, unsigned int value); -int tasdevice_amp_putvol(struct tasdevice_priv *tas_priv, - struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc); -int tasdevice_amp_getvol(struct tasdevice_priv *tas_priv, - struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc); -int tasdevice_digital_putvol(struct tasdevice_priv *tas_priv, - struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc); -int tasdevice_digital_getvol(struct tasdevice_priv *tas_priv, - struct snd_ctl_elem_value *ucontrol, struct soc_mixer_control *mc); - +void tasdevice_remove(struct tasdevice_priv *tas_priv); +int tasdevice_save_calibration(struct tasdevice_priv *tas_priv); +void tasdevice_apply_calibration(struct tasdevice_priv *tas_priv); #endif /* __TAS2781_H__ */ -- cgit v1.2.3 From d12ddda5239826ca6978eaadea1b9280762c830a Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 10 Apr 2025 12:00:49 +0200 Subject: media: uapi: cec-funcs.h: use CEC_LOG_ADDR_BROADCAST The cec-funcs.h header sets the destination to 0xf for those messages that can only be broadcast. Instead of writing: msg->msg[0] |= 0xf; /* broadcast */ just write: msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; which is more descriptive and allows us to drop the comment. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/cec-funcs.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/cec-funcs.h b/include/uapi/linux/cec-funcs.h index d58fa1cdcb08..189ecf0e13cd 100644 --- a/include/uapi/linux/cec-funcs.h +++ b/include/uapi/linux/cec-funcs.h @@ -14,7 +14,7 @@ static inline void cec_msg_active_source(struct cec_msg *msg, __u16 phys_addr) { msg->len = 4; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_ACTIVE_SOURCE; msg->msg[2] = phys_addr >> 8; msg->msg[3] = phys_addr & 0xff; @@ -59,7 +59,7 @@ static inline void cec_msg_request_active_source(struct cec_msg *msg, int reply) { msg->len = 2; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_REQUEST_ACTIVE_SOURCE; msg->reply = reply ? CEC_MSG_ACTIVE_SOURCE : 0; } @@ -68,7 +68,7 @@ static inline void cec_msg_routing_information(struct cec_msg *msg, __u16 phys_addr) { msg->len = 4; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_ROUTING_INFORMATION; msg->msg[2] = phys_addr >> 8; msg->msg[3] = phys_addr & 0xff; @@ -86,7 +86,7 @@ static inline void cec_msg_routing_change(struct cec_msg *msg, __u16 new_phys_addr) { msg->len = 6; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_ROUTING_CHANGE; msg->msg[2] = orig_phys_addr >> 8; msg->msg[3] = orig_phys_addr & 0xff; @@ -106,7 +106,7 @@ static inline void cec_ops_routing_change(const struct cec_msg *msg, static inline void cec_msg_set_stream_path(struct cec_msg *msg, __u16 phys_addr) { msg->len = 4; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_SET_STREAM_PATH; msg->msg[2] = phys_addr >> 8; msg->msg[3] = phys_addr & 0xff; @@ -791,7 +791,7 @@ static inline void cec_msg_report_physical_addr(struct cec_msg *msg, __u16 phys_addr, __u8 prim_devtype) { msg->len = 5; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_REPORT_PHYSICAL_ADDR; msg->msg[2] = phys_addr >> 8; msg->msg[3] = phys_addr & 0xff; @@ -817,7 +817,7 @@ static inline void cec_msg_set_menu_language(struct cec_msg *msg, const char *language) { msg->len = 5; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_SET_MENU_LANGUAGE; memcpy(msg->msg + 2, language, 3); } @@ -850,7 +850,7 @@ static inline void cec_msg_report_features(struct cec_msg *msg, __u8 rc_profile, __u8 dev_features) { msg->len = 6; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_REPORT_FEATURES; msg->msg[2] = cec_version; msg->msg[3] = all_device_types; @@ -1092,7 +1092,7 @@ static inline void cec_msg_tuner_step_increment(struct cec_msg *msg) static inline void cec_msg_device_vendor_id(struct cec_msg *msg, __u32 vendor_id) { msg->len = 5; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_DEVICE_VENDOR_ID; msg->msg[2] = vendor_id >> 16; msg->msg[3] = (vendor_id >> 8) & 0xff; @@ -1655,7 +1655,7 @@ static inline void cec_msg_report_current_latency(struct cec_msg *msg, __u8 audio_out_delay) { msg->len = 6; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_REPORT_CURRENT_LATENCY; msg->msg[2] = phys_addr >> 8; msg->msg[3] = phys_addr & 0xff; @@ -1687,7 +1687,7 @@ static inline void cec_msg_request_current_latency(struct cec_msg *msg, __u16 phys_addr) { msg->len = 4; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_REQUEST_CURRENT_LATENCY; msg->msg[2] = phys_addr >> 8; msg->msg[3] = phys_addr & 0xff; @@ -1707,7 +1707,7 @@ static inline void cec_msg_cdc_hec_inquire_state(struct cec_msg *msg, __u16 phys_addr2) { msg->len = 9; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_CDC_MESSAGE; /* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */ msg->msg[4] = CEC_MSG_CDC_HEC_INQUIRE_STATE; @@ -1737,7 +1737,7 @@ static inline void cec_msg_cdc_hec_report_state(struct cec_msg *msg, __u16 hec_field) { msg->len = has_field ? 10 : 8; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_CDC_MESSAGE; /* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */ msg->msg[4] = CEC_MSG_CDC_HEC_REPORT_STATE; @@ -1782,7 +1782,7 @@ static inline void cec_msg_cdc_hec_set_state(struct cec_msg *msg, __u16 phys_addr5) { msg->len = 10; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_CDC_MESSAGE; /* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */ msg->msg[4] = CEC_MSG_CDC_HEC_INQUIRE_STATE; @@ -1832,7 +1832,7 @@ static inline void cec_msg_cdc_hec_set_state_adjacent(struct cec_msg *msg, __u8 hec_set_state) { msg->len = 8; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_CDC_MESSAGE; /* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */ msg->msg[4] = CEC_MSG_CDC_HEC_SET_STATE_ADJACENT; @@ -1857,7 +1857,7 @@ static inline void cec_msg_cdc_hec_request_deactivation(struct cec_msg *msg, __u16 phys_addr3) { msg->len = 11; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_CDC_MESSAGE; /* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */ msg->msg[4] = CEC_MSG_CDC_HEC_REQUEST_DEACTIVATION; @@ -1884,7 +1884,7 @@ static inline void cec_ops_cdc_hec_request_deactivation(const struct cec_msg *ms static inline void cec_msg_cdc_hec_notify_alive(struct cec_msg *msg) { msg->len = 5; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_CDC_MESSAGE; /* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */ msg->msg[4] = CEC_MSG_CDC_HEC_NOTIFY_ALIVE; @@ -1899,7 +1899,7 @@ static inline void cec_ops_cdc_hec_notify_alive(const struct cec_msg *msg, static inline void cec_msg_cdc_hec_discover(struct cec_msg *msg) { msg->len = 5; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_CDC_MESSAGE; /* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */ msg->msg[4] = CEC_MSG_CDC_HEC_DISCOVER; @@ -1916,7 +1916,7 @@ static inline void cec_msg_cdc_hpd_set_state(struct cec_msg *msg, __u8 hpd_state) { msg->len = 6; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_CDC_MESSAGE; /* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */ msg->msg[4] = CEC_MSG_CDC_HPD_SET_STATE; @@ -1938,7 +1938,7 @@ static inline void cec_msg_cdc_hpd_report_state(struct cec_msg *msg, __u8 hpd_error) { msg->len = 6; - msg->msg[0] |= 0xf; /* broadcast */ + msg->msg[0] |= CEC_LOG_ADDR_BROADCAST; msg->msg[1] = CEC_MSG_CDC_MESSAGE; /* msg[2] and msg[3] (phys_addr) are filled in by the CEC framework */ msg->msg[4] = CEC_MSG_CDC_HPD_REPORT_STATE; -- cgit v1.2.3 From 007c07168ac0c64387be500f6604b09ace3f3bdc Mon Sep 17 00:00:00 2001 From: Su Hui Date: Wed, 30 Apr 2025 11:27:32 +0800 Subject: time/jiffies: Change register_refined_jiffies() to void __init register_refined_jiffies() is only used in setup code and always returns 0. Mark it as __init to save some bytes and change it to void. Signed-off-by: Su Hui Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250430032734.2079290-2-suhui@nfschina.com --- include/linux/jiffies.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 0ea8c9887429..91b20788273d 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -59,7 +59,7 @@ /* LATCH is used in the interval timer and ftape setup. */ #define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */ -extern int register_refined_jiffies(long clock_tick_rate); +extern void register_refined_jiffies(long clock_tick_rate); /* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */ #define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ) -- cgit v1.2.3 From 3c50137aa4c80c532cfcd7444a36b21710189ebf Mon Sep 17 00:00:00 2001 From: Shin Son Date: Mon, 28 Apr 2025 20:35:14 +0900 Subject: dt-bindings: clock: exynosautov920: add cpucl1/2 clock definitions Add cpucl1 and cpucl2 clock definitions. CPUCL1/2 refer to CPU Cluster 1 and CPU Cluster 2, which provide clock support for the CPUs on Exynosauto V920 SoC. Signed-off-by: Shin Son Link: https://lore.kernel.org/r/20250428113517.426987-2-shin.son@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/samsung,exynosautov920.h | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/samsung,exynosautov920.h b/include/dt-bindings/clock/samsung,exynosautov920.h index c57a1d749700..5e6896e9627f 100644 --- a/include/dt-bindings/clock/samsung,exynosautov920.h +++ b/include/dt-bindings/clock/samsung,exynosautov920.h @@ -181,6 +181,38 @@ #define CLK_DOUT_CPUCL0_DBG_PCLKDBG 14 #define CLK_DOUT_CPUCL0_NOCP 15 +/* CMU_CPUCL1 */ +#define CLK_FOUT_CPUCL1_PLL 1 + +#define CLK_MOUT_PLL_CPUCL1 2 +#define CLK_MOUT_CPUCL1_CLUSTER_USER 3 +#define CLK_MOUT_CPUCL1_SWITCH_USER 4 +#define CLK_MOUT_CPUCL1_CLUSTER 5 +#define CLK_MOUT_CPUCL1_CORE 6 + +#define CLK_DOUT_CLUSTER1_ACLK 7 +#define CLK_DOUT_CLUSTER1_ATCLK 8 +#define CLK_DOUT_CLUSTER1_MPCLK 9 +#define CLK_DOUT_CLUSTER1_PCLK 10 +#define CLK_DOUT_CLUSTER1_PERIPHCLK 11 +#define CLK_DOUT_CPUCL1_NOCP 12 + +/* CMU_CPUCL2 */ +#define CLK_FOUT_CPUCL2_PLL 1 + +#define CLK_MOUT_PLL_CPUCL2 2 +#define CLK_MOUT_CPUCL2_CLUSTER_USER 3 +#define CLK_MOUT_CPUCL2_SWITCH_USER 4 +#define CLK_MOUT_CPUCL2_CLUSTER 5 +#define CLK_MOUT_CPUCL2_CORE 6 + +#define CLK_DOUT_CLUSTER2_ACLK 7 +#define CLK_DOUT_CLUSTER2_ATCLK 8 +#define CLK_DOUT_CLUSTER2_MPCLK 9 +#define CLK_DOUT_CLUSTER2_PCLK 10 +#define CLK_DOUT_CLUSTER2_PERIPHCLK 11 +#define CLK_DOUT_CPUCL2_NOCP 12 + /* CMU_PERIC0 */ #define CLK_MOUT_PERIC0_IP_USER 1 #define CLK_MOUT_PERIC0_NOC_USER 2 -- cgit v1.2.3 From 6c9ffa2ae48e8fd44ba5c6ffdc16e62a62bac38a Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Fri, 25 Apr 2025 13:18:18 +0200 Subject: power: supply: add inhibit-charge-awake to charge_behaviour MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OneXPlayer devices have a charge inhibit feature that allows the user to select between it being active always or only when the device is on. Therefore, add attribute inhibit-charge-awake to charge_behaviour to allow the user to select that charge should be paused only when the device is awake. Reviewed-by: Hans de Goede Reviewed-by: Thomas Weißschuh Reviewed-by: Derek J. Clark Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250425111821.88746-14-lkml@antheas.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/power_supply.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 888824592953..cbec930430a7 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -212,6 +212,7 @@ enum power_supply_usb_type { enum power_supply_charge_behaviour { POWER_SUPPLY_CHARGE_BEHAVIOUR_AUTO = 0, POWER_SUPPLY_CHARGE_BEHAVIOUR_INHIBIT_CHARGE, + POWER_SUPPLY_CHARGE_BEHAVIOUR_INHIBIT_CHARGE_AWAKE, POWER_SUPPLY_CHARGE_BEHAVIOUR_FORCE_DISCHARGE, }; -- cgit v1.2.3 From 144530c15ec7fa95b29812d86f4be527338ea204 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 25 Apr 2025 13:46:16 -0700 Subject: pds_core: remove extra name description Fix the kernel-doc complaint include/linux/pds/pds_adminq.h:481: warning: Excess struct member 'name' description in 'pds_core_lif_getattr_comp' Reviewed-by: Simon Horman Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- include/linux/pds/pds_adminq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index ddd111f04ca0..339156113fa5 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -463,7 +463,6 @@ struct pds_core_lif_getattr_cmd { * @rsvd: Word boundary padding * @comp_index: Index in the descriptor ring for which this is the completion * @state: LIF state (enum pds_core_lif_state) - * @name: LIF name string, 0 terminated * @features: Features (enum pds_core_hw_features) * @rsvd2: Word boundary padding * @color: Color bit -- cgit v1.2.3 From 7c4f4c4fa9b6fbb7e483bebd02f7b9cbc20ca5cc Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 25 Apr 2025 13:46:17 -0700 Subject: pds_core: smaller adminq poll starting interval Shorten the adminq poll starting interval in order to notice any transaction errors more quickly. Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/pds/pds_adminq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index 339156113fa5..40ff0ec2b879 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -4,7 +4,7 @@ #ifndef _PDS_CORE_ADMINQ_H_ #define _PDS_CORE_ADMINQ_H_ -#define PDSC_ADMINQ_MAX_POLL_INTERVAL 256 +#define PDSC_ADMINQ_MAX_POLL_INTERVAL 256000 /* usecs */ enum pds_core_adminq_flags { PDS_AQ_FLAG_FASTPOLL = BIT(1), /* completion poll at 1ms */ -- cgit v1.2.3 From fc7fed6f77f94f2fd9a7557020503e146eb0ce38 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 25 Mar 2025 11:58:46 +0000 Subject: coresight: Convert tag clear function to take a struct csdev_access The self hosted claim tag will be reset on device probe in a later commit. We'll want to do this before coresight_register() is called so won't have a coresight_device and have to use csdev_access instead. Also make them public and create locked and unlocked versions for later use. These look functions look like they set the whole tags register as one value, but they only set and clear the self hosted bit using a SET/CLR bits mechanism so also rename the functions to reflect this better. Reviewed-by: Leo Yan Reviewed-by: Yeoreum Yun Signed-off-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250325-james-coresight-claim-tags-v4-1-dfbd3822b2e5@linaro.org --- include/linux/coresight.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index cfcf6e4707ed..b89692d9ceac 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -685,7 +685,8 @@ extern int coresight_timeout_action(struct csdev_access *csa, u32 offset, extern int coresight_claim_device(struct coresight_device *csdev); extern int coresight_claim_device_unlocked(struct coresight_device *csdev); - +void coresight_clear_self_claim_tag(struct csdev_access *csa); +void coresight_clear_self_claim_tag_unlocked(struct csdev_access *csa); extern void coresight_disclaim_device(struct coresight_device *csdev); extern void coresight_disclaim_device_unlocked(struct coresight_device *csdev); extern char *coresight_alloc_device_name(struct coresight_dev_list *devs, -- cgit v1.2.3 From e6e6b692865d333d79d25c761c53b19e73e9653f Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 25 Mar 2025 11:58:52 +0000 Subject: coresight: Remove extern from function declarations Function declarations are extern by default so remove the extra noise and inconsistency. Reviewed-by: Leo Yan Reviewed-by: Yeoreum Yun Signed-off-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250325-james-coresight-claim-tags-v4-7-dfbd3822b2e5@linaro.org --- include/linux/coresight.h | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index b89692d9ceac..8abdd8b5c791 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -671,28 +671,27 @@ static inline void coresight_set_mode(struct coresight_device *csdev, local_set(&csdev->mode, new_mode); } -extern struct coresight_device * -coresight_register(struct coresight_desc *desc); -extern void coresight_unregister(struct coresight_device *csdev); -extern int coresight_enable_sysfs(struct coresight_device *csdev); -extern void coresight_disable_sysfs(struct coresight_device *csdev); -extern int coresight_timeout(struct csdev_access *csa, u32 offset, - int position, int value); +struct coresight_device *coresight_register(struct coresight_desc *desc); +void coresight_unregister(struct coresight_device *csdev); +int coresight_enable_sysfs(struct coresight_device *csdev); +void coresight_disable_sysfs(struct coresight_device *csdev); +int coresight_timeout(struct csdev_access *csa, u32 offset, int position, int value); typedef void (*coresight_timeout_cb_t) (struct csdev_access *, u32, int, int); -extern int coresight_timeout_action(struct csdev_access *csa, u32 offset, - int position, int value, - coresight_timeout_cb_t cb); +int coresight_timeout_action(struct csdev_access *csa, u32 offset, int position, int value, + coresight_timeout_cb_t cb); +int coresight_claim_device(struct coresight_device *csdev); +int coresight_claim_device_unlocked(struct coresight_device *csdev); -extern int coresight_claim_device(struct coresight_device *csdev); -extern int coresight_claim_device_unlocked(struct coresight_device *csdev); +int coresight_claim_device(struct coresight_device *csdev); +int coresight_claim_device_unlocked(struct coresight_device *csdev); void coresight_clear_self_claim_tag(struct csdev_access *csa); void coresight_clear_self_claim_tag_unlocked(struct csdev_access *csa); -extern void coresight_disclaim_device(struct coresight_device *csdev); -extern void coresight_disclaim_device_unlocked(struct coresight_device *csdev); -extern char *coresight_alloc_device_name(struct coresight_dev_list *devs, +void coresight_disclaim_device(struct coresight_device *csdev); +void coresight_disclaim_device_unlocked(struct coresight_device *csdev); +char *coresight_alloc_device_name(struct coresight_dev_list *devs, struct device *dev); -extern bool coresight_loses_context_with_cpu(struct device *dev); +bool coresight_loses_context_with_cpu(struct device *dev); u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset); u32 coresight_read32(struct coresight_device *csdev, u32 offset); @@ -705,8 +704,8 @@ void coresight_relaxed_write64(struct coresight_device *csdev, u64 val, u32 offset); void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset); -extern int coresight_get_cpu(struct device *dev); -extern int coresight_get_static_trace_id(struct device *dev, u32 *id); +int coresight_get_cpu(struct device *dev); +int coresight_get_static_trace_id(struct device *dev, u32 *id); struct coresight_platform_data *coresight_get_platform_data(struct device *dev); struct coresight_connection * -- cgit v1.2.3 From 468d8b462ac64659caec53eff34f02963d5f52c8 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Tue, 15 Apr 2025 21:15:45 -0500 Subject: iidc/ice/irdma: Rename IDC header file To prepare for the IDC upgrade to support different CORE PCI drivers, rename header file from iidc.h to iidc_rdma.h since this files functionality is specifically for RDMA support. Use net/dscp.h include in irdma osdep.h and DSCP_MAX type.h, instead of iidc header and define. Reviewed-by: Przemek Kitszel Signed-off-by: Dave Ertman Signed-off-by: Tatyana Nikolova Signed-off-by: Tony Nguyen --- include/linux/net/intel/iidc.h | 109 ------------------------------------ include/linux/net/intel/iidc_rdma.h | 109 ++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 109 deletions(-) delete mode 100644 include/linux/net/intel/iidc.h create mode 100644 include/linux/net/intel/iidc_rdma.h (limited to 'include') diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h deleted file mode 100644 index 13274c3def66..000000000000 --- a/include/linux/net/intel/iidc.h +++ /dev/null @@ -1,109 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2021, Intel Corporation. */ - -#ifndef _IIDC_H_ -#define _IIDC_H_ - -#include -#include -#include -#include -#include -#include - -enum iidc_event_type { - IIDC_EVENT_BEFORE_MTU_CHANGE, - IIDC_EVENT_AFTER_MTU_CHANGE, - IIDC_EVENT_BEFORE_TC_CHANGE, - IIDC_EVENT_AFTER_TC_CHANGE, - IIDC_EVENT_CRIT_ERR, - IIDC_EVENT_NBITS /* must be last */ -}; - -enum iidc_reset_type { - IIDC_PFR, - IIDC_CORER, - IIDC_GLOBR, -}; - -enum iidc_rdma_protocol { - IIDC_RDMA_PROTOCOL_IWARP = BIT(0), - IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1), -}; - -#define IIDC_MAX_USER_PRIORITY 8 -#define IIDC_MAX_DSCP_MAPPING 64 -#define IIDC_DSCP_PFC_MODE 0x1 - -/* Struct to hold per RDMA Qset info */ -struct iidc_rdma_qset_params { - /* Qset TEID returned to the RDMA driver in - * ice_add_rdma_qset and used by RDMA driver - * for calls to ice_del_rdma_qset - */ - u32 teid; /* Qset TEID */ - u16 qs_handle; /* RDMA driver provides this */ - u16 vport_id; /* VSI index */ - u8 tc; /* TC branch the Qset should belong to */ -}; - -struct iidc_qos_info { - u64 tc_ctx; - u8 rel_bw; - u8 prio_type; - u8 egress_virt_up; - u8 ingress_virt_up; -}; - -/* Struct to pass QoS info */ -struct iidc_qos_params { - struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS]; - u8 up2tc[IIDC_MAX_USER_PRIORITY]; - u8 vport_relative_bw; - u8 vport_priority_type; - u8 num_tc; - u8 pfc_mode; - u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; -}; - -struct iidc_event { - DECLARE_BITMAP(type, IIDC_EVENT_NBITS); - u32 reg; -}; - -struct ice_pf; - -int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); -int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); -int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type); -int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); -void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos); -int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); -void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); - -/* Structure representing auxiliary driver tailored information about the core - * PCI dev, each auxiliary driver using the IIDC interface will have an - * instance of this struct dedicated to it. - */ - -struct iidc_auxiliary_dev { - struct auxiliary_device adev; - struct ice_pf *pf; -}; - -/* structure representing the auxiliary driver. This struct is to be - * allocated and populated by the auxiliary driver's owner. The core PCI - * driver will access these ops by performing a container_of on the - * auxiliary_device->dev.driver. - */ -struct iidc_auxiliary_drv { - struct auxiliary_driver adrv; - /* This event_handler is meant to be a blocking call. For instance, - * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not - * return until the auxiliary driver is ready for the MTU change to - * happen. - */ - void (*event_handler)(struct ice_pf *pf, struct iidc_event *event); -}; - -#endif /* _IIDC_H_*/ diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h new file mode 100644 index 000000000000..0cd75404e459 --- /dev/null +++ b/include/linux/net/intel/iidc_rdma.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021, Intel Corporation. */ + +#ifndef _IIDC_RDMA_H_ +#define _IIDC_RDMA_H_ + +#include +#include +#include +#include +#include +#include + +enum iidc_event_type { + IIDC_EVENT_BEFORE_MTU_CHANGE, + IIDC_EVENT_AFTER_MTU_CHANGE, + IIDC_EVENT_BEFORE_TC_CHANGE, + IIDC_EVENT_AFTER_TC_CHANGE, + IIDC_EVENT_CRIT_ERR, + IIDC_EVENT_NBITS /* must be last */ +}; + +enum iidc_reset_type { + IIDC_PFR, + IIDC_CORER, + IIDC_GLOBR, +}; + +enum iidc_rdma_protocol { + IIDC_RDMA_PROTOCOL_IWARP = BIT(0), + IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1), +}; + +#define IIDC_MAX_USER_PRIORITY 8 +#define IIDC_MAX_DSCP_MAPPING 64 +#define IIDC_DSCP_PFC_MODE 0x1 + +/* Struct to hold per RDMA Qset info */ +struct iidc_rdma_qset_params { + /* Qset TEID returned to the RDMA driver in + * ice_add_rdma_qset and used by RDMA driver + * for calls to ice_del_rdma_qset + */ + u32 teid; /* Qset TEID */ + u16 qs_handle; /* RDMA driver provides this */ + u16 vport_id; /* VSI index */ + u8 tc; /* TC branch the Qset should belong to */ +}; + +struct iidc_qos_info { + u64 tc_ctx; + u8 rel_bw; + u8 prio_type; + u8 egress_virt_up; + u8 ingress_virt_up; +}; + +/* Struct to pass QoS info */ +struct iidc_qos_params { + struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS]; + u8 up2tc[IIDC_MAX_USER_PRIORITY]; + u8 vport_relative_bw; + u8 vport_priority_type; + u8 num_tc; + u8 pfc_mode; + u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; +}; + +struct iidc_event { + DECLARE_BITMAP(type, IIDC_EVENT_NBITS); + u32 reg; +}; + +struct ice_pf; + +int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); +int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); +int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type); +int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); +void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos); +int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); +void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); + +/* Structure representing auxiliary driver tailored information about the core + * PCI dev, each auxiliary driver using the IIDC interface will have an + * instance of this struct dedicated to it. + */ + +struct iidc_auxiliary_dev { + struct auxiliary_device adev; + struct ice_pf *pf; +}; + +/* structure representing the auxiliary driver. This struct is to be + * allocated and populated by the auxiliary driver's owner. The core PCI + * driver will access these ops by performing a container_of on the + * auxiliary_device->dev.driver. + */ +struct iidc_auxiliary_drv { + struct auxiliary_driver adrv; + /* This event_handler is meant to be a blocking call. For instance, + * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not + * return until the auxiliary driver is ready for the MTU change to + * happen. + */ + void (*event_handler)(struct ice_pf *pf, struct iidc_event *event); +}; + +#endif /* _IIDC_RDMA_H_*/ -- cgit v1.2.3 From 7d585426a6538a717f55a1bb0b614d465bd03805 Mon Sep 17 00:00:00 2001 From: Arun R Murthy Date: Thu, 24 Apr 2025 20:45:14 +0530 Subject: drm/display/dp: Export fn to calculate link symbol cycles Unify the function to calculate the link symbol cycles for both dsc and non-dsc case and export the function so that it can be used in the respective platform display drivers for other calculations. v2: unify the fn for both dsc and non-dsc case (Imre) v3: rename drm_dp_link_symbol_cycles to drm_dp_link_data_symbol_cycles retain slice_eoc_cycles as is (Imre) v4: Expose only drm_dp_link_symbol_cycles() (Imre) v6: Add slice pixels which was removed unknowingly (Vinod) Signed-off-by: Arun R Murthy Reviewed-by: Imre Deak Reviewed-by: Vinod Govindapillai Acked-by: Maarten Lankhorst Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250424-hblank-v7-1-8b002f1506cc@intel.com --- include/drm/display/drm_dp_helper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 89a34dff85a4..36ca21d6a399 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -879,5 +879,7 @@ int drm_dp_bw_channel_coding_efficiency(bool is_uhbr); int drm_dp_max_dprx_data_rate(int max_link_rate, int max_lanes); ssize_t drm_dp_vsc_sdp_pack(const struct drm_dp_vsc_sdp *vsc, struct dp_sdp *sdp); +int drm_dp_link_symbol_cycles(int lane_count, int pixels, int dsc_slice_count, + int bpp_x16, int symbol_size, bool is_mst); #endif /* _DRM_DP_HELPER_H_ */ -- cgit v1.2.3 From 118c40b7b50340bf7ff7e0adee8e3bab6e552c82 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 28 Mar 2025 18:21:44 +0100 Subject: kbuild: require gcc-8 and binutils-2.30 Commit a3e8fe814ad1 ("x86/build: Raise the minimum GCC version to 8.1") raised the minimum compiler version as enforced by Kbuild to gcc-8.1 and clang-15 for x86. This is actually the same gcc version that has been discussed as the minimum for all architectures several times in the past, with little objection. A previous concern was the kernel for SLE15-SP7 needing to be built with gcc-7. As this ended up still using linux-6.4 and there is no plan for an SP8, this is no longer a problem. Change it for all architectures and adjust the documentation accordingly. A few version checks can be removed in the process. The binutils version 2.30 is the lowest version used in combination with gcc-8 on common distros, so use that as the corresponding minimum. Link: https://lore.kernel.org/lkml/20240925150059.3955569-32-ardb+git@google.com/ Link: https://lore.kernel.org/lkml/871q7yxrgv.wl-tiwai@suse.de/ Acked-by: Mark Rutland Signed-off-by: Arnd Bergmann --- include/linux/unroll.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/unroll.h b/include/linux/unroll.h index 863fb69f6a7e..186b71de740f 100644 --- a/include/linux/unroll.h +++ b/include/linux/unroll.h @@ -11,10 +11,8 @@ #ifdef CONFIG_CC_IS_CLANG #define __pick_unrolled(x, y) _Pragma(#x) -#elif CONFIG_GCC_VERSION >= 80000 -#define __pick_unrolled(x, y) _Pragma(#y) #else -#define __pick_unrolled(x, y) /* not supported */ +#define __pick_unrolled(x, y) _Pragma(#y) #endif /** -- cgit v1.2.3 From 2605e4ab6615ef43361b18fc6d08dd884896aad8 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 11 Apr 2025 17:38:54 +0800 Subject: ACPI: CPPC: Modify cppc_get_auto_sel_caps() to cppc_get_auto_sel() Modify cppc_get_auto_sel_caps() to cppc_get_auto_sel(). Using a cppc_perf_caps to carry the value is unnecessary. Add a check to ensure the pointer 'enable' is not null. Reviewed-by: Pierre Gondois Signed-off-by: Lifeng Zheng Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/20250411093855.982491-8-zhenglifeng1@huawei.com Signed-off-by: Rafael J. Wysocki --- include/acpi/cppc_acpi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 62d368bcd9ec..31767c65be20 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -159,7 +159,7 @@ extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val); extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val); extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); -extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps); +extern int cppc_get_auto_sel(int cpu, bool *enable); extern int cppc_set_auto_sel(int cpu, bool enable); extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf); extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); @@ -229,11 +229,11 @@ static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf) { return -EOPNOTSUPP; } -static inline int cppc_set_auto_sel(int cpu, bool enable) +static inline int cppc_get_auto_sel(int cpu, bool *enable) { return -EOPNOTSUPP; } -static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps) +static inline int cppc_set_auto_sel(int cpu, bool enable) { return -EOPNOTSUPP; } -- cgit v1.2.3 From f35e5b3ccfd3516bc36b08e101678b036309397c Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 11 Apr 2025 17:38:55 +0800 Subject: ACPI: CPPC: Add three functions related to autonomous selection cppc_set_epp() - write energy performance preference register value, based on ACPI 6.5, s8.4.6.1.7 cppc_get_auto_act_window() - read autonomous activity window register value, based on ACPI 6.5, s8.4.6.1.6 cppc_set_auto_act_window() - write autonomous activity window register value, based on ACPI 6.5, s8.4.6.1.6 Reviewed-by: Pierre Gondois Signed-off-by: Lifeng Zheng Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/20250411093855.982491-9-zhenglifeng1@huawei.com Signed-off-by: Rafael J. Wysocki --- include/acpi/cppc_acpi.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 31767c65be20..325e9543e08f 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -32,6 +32,15 @@ #define CMD_READ 0 #define CMD_WRITE 1 +#define CPPC_AUTO_ACT_WINDOW_SIG_BIT_SIZE (7) +#define CPPC_AUTO_ACT_WINDOW_EXP_BIT_SIZE (3) +#define CPPC_AUTO_ACT_WINDOW_MAX_SIG ((1 << CPPC_AUTO_ACT_WINDOW_SIG_BIT_SIZE) - 1) +#define CPPC_AUTO_ACT_WINDOW_MAX_EXP ((1 << CPPC_AUTO_ACT_WINDOW_EXP_BIT_SIZE) - 1) +/* CPPC_AUTO_ACT_WINDOW_MAX_SIG is 127, so 128 and 129 will decay to 127 when writing */ +#define CPPC_AUTO_ACT_WINDOW_SIG_CARRY_THRESH 129 + +#define CPPC_ENERGY_PERF_MAX (0xFF) + /* Each register has the folowing format. */ struct cpc_reg { u8 descriptor; @@ -159,6 +168,9 @@ extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val); extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val); extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); +extern int cppc_set_epp(int cpu, u64 epp_val); +extern int cppc_get_auto_act_window(int cpu, u64 *auto_act_window); +extern int cppc_set_auto_act_window(int cpu, u64 auto_act_window); extern int cppc_get_auto_sel(int cpu, bool *enable); extern int cppc_set_auto_sel(int cpu, bool enable); extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf); @@ -229,6 +241,18 @@ static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf) { return -EOPNOTSUPP; } +static inline int cppc_set_epp(int cpu, u64 epp_val) +{ + return -EOPNOTSUPP; +} +static inline int cppc_get_auto_act_window(int cpu, u64 *auto_act_window) +{ + return -EOPNOTSUPP; +} +static inline int cppc_set_auto_act_window(int cpu, u64 auto_act_window) +{ + return -EOPNOTSUPP; +} static inline int cppc_get_auto_sel(int cpu, bool *enable) { return -EOPNOTSUPP; -- cgit v1.2.3 From 97b5631aae6896369712d6b7131afbc95c753587 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Tue, 15 Apr 2025 21:15:46 -0500 Subject: iidc/ice/irdma: Rename to iidc_* convention In preparation of supporting more than a single core PCI driver for RDMA, homogenize naming to iidc_rdma_* and IIDC_RDMA_* form. Reviewed-by: Przemek Kitszel Signed-off-by: Dave Ertman Signed-off-by: Tatyana Nikolova Signed-off-by: Tony Nguyen --- include/linux/net/intel/iidc_rdma.h | 38 +++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h index 0cd75404e459..2b24a9912fa0 100644 --- a/include/linux/net/intel/iidc_rdma.h +++ b/include/linux/net/intel/iidc_rdma.h @@ -11,16 +11,16 @@ #include #include -enum iidc_event_type { - IIDC_EVENT_BEFORE_MTU_CHANGE, - IIDC_EVENT_AFTER_MTU_CHANGE, - IIDC_EVENT_BEFORE_TC_CHANGE, - IIDC_EVENT_AFTER_TC_CHANGE, - IIDC_EVENT_CRIT_ERR, - IIDC_EVENT_NBITS /* must be last */ +enum iidc_rdma_event_type { + IIDC_RDMA_EVENT_BEFORE_MTU_CHANGE, + IIDC_RDMA_EVENT_AFTER_MTU_CHANGE, + IIDC_RDMA_EVENT_BEFORE_TC_CHANGE, + IIDC_RDMA_EVENT_AFTER_TC_CHANGE, + IIDC_RDMA_EVENT_CRIT_ERR, + IIDC_RDMA_EVENT_NBITS /* must be last */ }; -enum iidc_reset_type { +enum iidc_rdma_reset_type { IIDC_PFR, IIDC_CORER, IIDC_GLOBR, @@ -47,7 +47,7 @@ struct iidc_rdma_qset_params { u8 tc; /* TC branch the Qset should belong to */ }; -struct iidc_qos_info { +struct iidc_rdma_qos_info { u64 tc_ctx; u8 rel_bw; u8 prio_type; @@ -56,8 +56,8 @@ struct iidc_qos_info { }; /* Struct to pass QoS info */ -struct iidc_qos_params { - struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS]; +struct iidc_rdma_qos_params { + struct iidc_rdma_qos_info tc_info[IEEE_8021QAZ_MAX_TCS]; u8 up2tc[IIDC_MAX_USER_PRIORITY]; u8 vport_relative_bw; u8 vport_priority_type; @@ -66,8 +66,8 @@ struct iidc_qos_params { u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; }; -struct iidc_event { - DECLARE_BITMAP(type, IIDC_EVENT_NBITS); +struct iidc_rdma_event { + DECLARE_BITMAP(type, IIDC_RDMA_EVENT_NBITS); u32 reg; }; @@ -75,9 +75,11 @@ struct ice_pf; int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); -int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type); +int ice_rdma_request_reset(struct ice_pf *pf, + enum iidc_rdma_reset_type reset_type); int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); -void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos); +void ice_get_qos_params(struct ice_pf *pf, + struct iidc_rdma_qos_params *qos); int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); @@ -86,7 +88,7 @@ void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); * instance of this struct dedicated to it. */ -struct iidc_auxiliary_dev { +struct iidc_rdma_core_auxiliary_dev { struct auxiliary_device adev; struct ice_pf *pf; }; @@ -96,14 +98,14 @@ struct iidc_auxiliary_dev { * driver will access these ops by performing a container_of on the * auxiliary_device->dev.driver. */ -struct iidc_auxiliary_drv { +struct iidc_rdma_core_auxiliary_drv { struct auxiliary_driver adrv; /* This event_handler is meant to be a blocking call. For instance, * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not * return until the auxiliary driver is ready for the MTU change to * happen. */ - void (*event_handler)(struct ice_pf *pf, struct iidc_event *event); + void (*event_handler)(struct ice_pf *pf, struct iidc_rdma_event *event); }; #endif /* _IIDC_RDMA_H_*/ -- cgit v1.2.3 From d9251a560ba67bbedd53b81aee32e1ad95f42000 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Tue, 15 Apr 2025 21:15:47 -0500 Subject: iidc/ice/irdma: Break iidc.h into two headers In preparation of supporting more than a single core PCI driver for RDMA, break the iidc_rdma.h header file into two more focused headers. Only the elements universal to all Intel drivers will remain in the generic iidc_rdma.h header. Move the ice specific information to an ice specific header file named iidc_rdma_ice.h. Reviewed-by: Przemek Kitszel Signed-off-by: Dave Ertman Signed-off-by: Tatyana Nikolova Signed-off-by: Tony Nguyen --- include/linux/net/intel/iidc_rdma.h | 14 +------------- include/linux/net/intel/iidc_rdma_ice.h | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 13 deletions(-) create mode 100644 include/linux/net/intel/iidc_rdma_ice.h (limited to 'include') diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h index 2b24a9912fa0..1e8136395154 100644 --- a/include/linux/net/intel/iidc_rdma.h +++ b/include/linux/net/intel/iidc_rdma.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2021, Intel Corporation. */ +/* Copyright (C) 2021-2025, Intel Corporation. */ #ifndef _IIDC_RDMA_H_ #define _IIDC_RDMA_H_ @@ -71,18 +71,6 @@ struct iidc_rdma_event { u32 reg; }; -struct ice_pf; - -int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); -int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); -int ice_rdma_request_reset(struct ice_pf *pf, - enum iidc_rdma_reset_type reset_type); -int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); -void ice_get_qos_params(struct ice_pf *pf, - struct iidc_rdma_qos_params *qos); -int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); -void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); - /* Structure representing auxiliary driver tailored information about the core * PCI dev, each auxiliary driver using the IIDC interface will have an * instance of this struct dedicated to it. diff --git a/include/linux/net/intel/iidc_rdma_ice.h b/include/linux/net/intel/iidc_rdma_ice.h new file mode 100644 index 000000000000..78d10003d776 --- /dev/null +++ b/include/linux/net/intel/iidc_rdma_ice.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021-2025, Intel Corporation. */ + +#ifndef _IIDC_RDMA_ICE_H_ +#define _IIDC_RDMA_ICE_H_ + +struct ice_pf; + +int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); +int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); +int ice_rdma_request_reset(struct ice_pf *pf, + enum iidc_rdma_reset_type reset_type); +int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); +void ice_get_qos_params(struct ice_pf *pf, + struct iidc_rdma_qos_params *qos); +int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); +void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); + +#endif /* _IIDC_RDMA_ICE_H_*/ -- cgit v1.2.3 From 8239b771b94b639556c1987185fd82b2a896c923 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 15 Apr 2025 21:15:48 -0500 Subject: ice: Replace ice specific DSCP mapping num with a kernel define Replace ice driver specific DSCP mapping number defines ICE_DSCP_NUM_VAL and IIDC_MAX_DSCP_MAPPING with an equivalent kernel define DSCP_MAX. Reviewed-by: Przemek Kitszel Signed-off-by: Tatyana Nikolova Signed-off-by: Dave Ertman Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- include/linux/net/intel/iidc_rdma.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h index 1e8136395154..7f1910289534 100644 --- a/include/linux/net/intel/iidc_rdma.h +++ b/include/linux/net/intel/iidc_rdma.h @@ -10,6 +10,7 @@ #include #include #include +#include enum iidc_rdma_event_type { IIDC_RDMA_EVENT_BEFORE_MTU_CHANGE, @@ -32,7 +33,6 @@ enum iidc_rdma_protocol { }; #define IIDC_MAX_USER_PRIORITY 8 -#define IIDC_MAX_DSCP_MAPPING 64 #define IIDC_DSCP_PFC_MODE 0x1 /* Struct to hold per RDMA Qset info */ @@ -63,7 +63,7 @@ struct iidc_rdma_qos_params { u8 vport_priority_type; u8 num_tc; u8 pfc_mode; - u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; + u8 dscp_map[DSCP_MAX]; }; struct iidc_rdma_event { -- cgit v1.2.3 From 687cac9559d8e9277830bdfb68b57d7403695b1d Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 24 Apr 2025 09:59:54 +0100 Subject: memory: renesas-rpc-if: Add RZ/G3E xSPI support Add support for RZ/G3E xSPI. Compared to RPC-IF, it can support writes on memory-mapped area. Introduce struct rpcif_impl for holding the function pointers and data to handle the differences between xspi and rpc-if interface. Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20250424090000.136804-7-biju.das.jz@bp.renesas.com Signed-off-by: Krzysztof Kozlowski --- include/memory/renesas-rpc-if.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/memory/renesas-rpc-if.h b/include/memory/renesas-rpc-if.h index b8fa30fd6b50..53663c4e5ae3 100644 --- a/include/memory/renesas-rpc-if.h +++ b/include/memory/renesas-rpc-if.h @@ -61,12 +61,14 @@ enum rpcif_type { RPCIF_RCAR_GEN3, RPCIF_RCAR_GEN4, RPCIF_RZ_G2L, + XSPI_RZ_G3E, }; struct rpcif { struct device *dev; void __iomem *dirmap; size_t size; + bool xspi; }; int rpcif_sw_init(struct rpcif *rpc, struct device *dev); @@ -75,5 +77,7 @@ void rpcif_prepare(struct device *dev, const struct rpcif_op *op, u64 *offs, size_t *len); int rpcif_manual_xfer(struct device *dev); ssize_t rpcif_dirmap_read(struct device *dev, u64 offs, size_t len, void *buf); +ssize_t xspi_dirmap_write(struct device *dev, u64 offs, size_t len, + const void *buf); #endif // __RENESAS_RPC_IF_H -- cgit v1.2.3 From 1b3f2bd04d90f61e1f291b5e365b9bc4ce0ea7c7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 29 Apr 2025 19:46:21 -0700 Subject: x86/devmem: Remove duplicate range_is_allowed() definition 17 years ago, Venki suggested [1] "A future improvement would be to avoid the range_is_allowed duplication". The only thing preventing a common implementation is that phys_mem_access_prot_allowed() expects the range check to exit immediately when PAT is disabled [2]. I.e. there is no cache conflict to manage in that case. This cleanup was noticed on the path to considering changing range_is_allowed() policy to blanket deny /dev/mem for private (confidential computing) memory. Note, however that phys_mem_access_prot_allowed() has long since stopped being relevant for managing cache-type validation due to [3], and [4]. Commit 0124cecfc85a ("x86, PAT: disable /dev/mem mmap RAM with PAT") [1] Commit 9e41bff2708e ("x86: fix /dev/mem mmap breakage when PAT is disabled") [2] Commit 1886297ce0c8 ("x86/mm/pat: Fix BUG_ON() in mmap_mem() on QEMU/i386") [3] Commit 0c3c8a18361a ("x86, PAT: Remove duplicate memtype reserve in devmem mmap") [4] Signed-off-by: Dan Williams Signed-off-by: Dave Hansen Reviewed-by: Nikolay Borisov Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/all/20250430024622.1134277-2-dan.j.williams%40intel.com --- include/linux/io.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/io.h b/include/linux/io.h index 6a6bc4d46d0a..0642c7ee41db 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -183,4 +183,25 @@ static inline void arch_io_free_memtype_wc(resource_size_t base, int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start, resource_size_t size); +#ifdef CONFIG_STRICT_DEVMEM +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + u64 from = ((u64)pfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; + + while (cursor < to) { + if (!devmem_is_allowed(pfn)) + return 0; + cursor += PAGE_SIZE; + pfn++; + } + return 1; +} +#else +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + return 1; +} +#endif #endif /* _LINUX_IO_H */ -- cgit v1.2.3 From 3e021f3b8115614127dd04c5615ccbcc2706bdd5 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 16 Apr 2025 06:27:10 +0000 Subject: ASoC: soc-utils: add snd_soc_dlc_is_dummy() We have snd_soc_xxx_is_dummy() functions, but not for dlc. Let's add it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87cydc8vup.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 952ed77b8c87..21903984af49 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -935,7 +935,7 @@ snd_soc_link_to_platform(struct snd_soc_dai_link *link, int n) { extern struct snd_soc_dai_link_component null_dailink_component[0]; extern struct snd_soc_dai_link_component snd_soc_dummy_dlc; - +int snd_soc_dlc_is_dummy(struct snd_soc_dai_link_component *dlc); struct snd_soc_codec_conf { /* -- cgit v1.2.3 From a3e1c0ad835702555d90565584ab6f723adf7f94 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 29 Apr 2025 08:04:46 +0200 Subject: net: phy: factor out provider part from mdio_bus.c After 52358dd63e34 ("net: phy: remove function stubs") there's a problem if CONFIG_MDIO_BUS is set, but CONFIG_PHYLIB is not. mdiobus_scan() uses phylib functions like get_phy_device(). Bringing back the stub wouldn't make much sense, because it would allow to compile mdiobus_scan(), but the function would be unusable. The stub returned NULL, and we have the following in mdiobus_scan(): phydev = get_phy_device(bus, addr, c45); if (IS_ERR(phydev)) return phydev; So calling mdiobus_scan() w/o CONFIG_PHYLIB would cause a crash later in mdiobus_scan(). In general the PHYLIB functionality isn't optional here. Consequently, MDIO bus providers depend on PHYLIB. Therefore factor it out and build it together with the libphy core modules. In addition make all MDIO bus providers under /drivers/net/mdio depend on PHYLIB. Same applies to enetc MDIO bus provider. Note that PHYLIB selects MDIO_DEVRES, therefore we can omit this here. Fixes: 52358dd63e34 ("net: phy: remove function stubs") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504270639.mT0lh2o1-lkp@intel.com/ Reviewed-by: Jacob Keller Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/c74772a9-dab6-44bf-a657-389df89d85c2@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 3beaf225ee88..d62d292024bc 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2062,6 +2062,7 @@ int __phy_hwtstamp_set(struct phy_device *phydev, struct netlink_ext_ack *extack); extern const struct bus_type mdio_bus_type; +extern const struct class mdio_bus_class; struct mdio_board_info { const char *bus_id; -- cgit v1.2.3 From 8c5d8c0b9e8184a86baede72ca392f90d867d22e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 23 Apr 2025 14:21:28 +0530 Subject: OPP: Define and use scope-based cleanup helpers Define and use scope-based cleanup helpers for `struct opp` and `struct opp_table`. No intentional functional impact. Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0deddfa91aca..8313ed981535 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -11,6 +11,7 @@ #ifndef __LINUX_OPP_H__ #define __LINUX_OPP_H__ +#include #include #include #include @@ -579,6 +580,12 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta } #endif +/* Scope based cleanup macro for OPP reference counting */ +DEFINE_FREE(put_opp, struct dev_pm_opp *, if (!IS_ERR_OR_NULL(_T)) dev_pm_opp_put(_T)) + +/* Scope based cleanup macro for OPP table reference counting */ +DEFINE_FREE(put_opp_table, struct opp_table *, if (!IS_ERR_OR_NULL(_T)) dev_pm_opp_put_opp_table(_T)) + /* OPP Configuration helpers */ static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, -- cgit v1.2.3 From ee3de3cf7035aaf289085111fd0cfaddc93f0409 Mon Sep 17 00:00:00 2001 From: Praveen Talari Date: Fri, 2 May 2025 10:58:22 +0530 Subject: OPP: Add dev_pm_opp_set_level() To configure a device to a specific performance level, consumer drivers currently need to determine the OPP based on the exact level and then set it, resulting in code duplication across drivers. The new helper API, dev_pm_opp_set_level(), addresses this issue by providing a streamlined method for consumer drivers to find and set the OPP based on the desired performance level, thereby eliminating redundancy. Signed-off-by: Praveen Talari [ Viresh: Lot of fixes in the code, and rebased over latest changes. Fixed commit log too. ] Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 8313ed981535..cf477beae4bb 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -197,6 +197,7 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) void dev_pm_opp_remove_table(struct device *dev); void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask); int dev_pm_opp_sync_regulators(struct device *dev); + #else static inline struct opp_table *dev_pm_opp_get_opp_table(struct device *dev) { @@ -717,4 +718,14 @@ static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) return dev_pm_opp_get_freq_indexed(opp, 0); } +static inline int dev_pm_opp_set_level(struct device *dev, unsigned int level) +{ + struct dev_pm_opp *opp __free(put_opp) = dev_pm_opp_find_level_exact(dev, level); + + if (IS_ERR(opp)) + return PTR_ERR(opp); + + return dev_pm_opp_set_opp(dev, opp); +} + #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3 From 2e9b2ee2ba403cbe270a8256b8794ef5ad19b38d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 30 Apr 2025 10:52:49 +0800 Subject: iommu: Cleanup comments for dev_enable/disable_feat The dev_enable/disable_feat ops have been removed by commit ("iommu: Remove iommu_dev_enable/disable_feature()"). Cleanup the comments to make the code clean. Signed-off-by: Lu Baolu Reviewed-by: Vasant Hegde Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250430025249.2371751-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bc5363e6b43e..56b4a1c2e031 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -590,8 +590,6 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) - * @dev_enable/disable_feat: per device entries to enable/disable - * iommu specific features. * @page_response: handle page request response * @def_domain_type: device default domain type, return value: * - IOMMU_DOMAIN_IDENTITY: must use an identity domain -- cgit v1.2.3 From b64b134942c8cf4801ea288b3fd38b509aedec21 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 30 Apr 2025 02:29:03 +0300 Subject: media: renesas: vsp1: Expose color space through the DRM API Now that the VSP1 driver supports color spaces, expose them through the API used by the DU driver. This allows configuring the YCbCr encoding and quantization used by each plane, ensuring correct color rendering. Signed-off-by: Laurent Pinchart Reviewed-by: Tomi Valkeinen Link: https://lore.kernel.org/r/20250429232904.26413-9-laurent.pinchart+renesas@ideasonboard.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/vsp1.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/media/vsp1.h b/include/media/vsp1.h index 48f4a5023d81..4ea6352fd63f 100644 --- a/include/media/vsp1.h +++ b/include/media/vsp1.h @@ -52,6 +52,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int pipe_index, * @alpha: alpha value (0: fully transparent, 255: fully opaque) * @zpos: Z position of the plane (from 0 to number of planes minus 1) * @premult: true for premultiplied alpha + * @color_encoding: color encoding (valid for YUV formats only) + * @color_range: color range (valid for YUV formats only) */ struct vsp1_du_atomic_config { u32 pixelformat; @@ -62,6 +64,8 @@ struct vsp1_du_atomic_config { unsigned int alpha; unsigned int zpos; bool premult; + enum v4l2_ycbcr_encoding color_encoding; + enum v4l2_quantization color_range; }; /** -- cgit v1.2.3 From b5325b2a270fcaf7b2a9a0f23d422ca8a5a8bdea Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 14 Apr 2025 15:55:07 +0200 Subject: coredump: hand a pidfd to the usermode coredump helper Give userspace a way to instruct the kernel to install a pidfd into the usermode helper process. This makes coredump handling a lot more reliable for userspace. In parallel with this commit we already have systemd adding support for this in [1]. We create a pidfs file for the coredumping process when we process the corename pattern. When the usermode helper process is forked we then install the pidfs file as file descriptor three into the usermode helpers file descriptor table so it's available to the exec'd program. Since usermode helpers are either children of the system_unbound_wq workqueue or kthreadd we know that the file descriptor table is empty and can thus always use three as the file descriptor number. Note, that we'll install a pidfd for the thread-group leader even if a subthread is calling do_coredump(). We know that task linkage hasn't been removed due to delay_group_leader() and even if this @current isn't the actual thread-group leader we know that the thread-group leader cannot be reaped until @current has exited. Link: https://github.com/systemd/systemd/pull/37125 [1] Link: https://lore.kernel.org/20250414-work-coredump-v2-3-685bf231f828@kernel.org Tested-by: Luca Boccassi Reviewed-by: Oleg Nesterov Signed-off-by: Christian Brauner --- include/linux/coredump.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 77e6e195d1d6..76e41805b92d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -28,6 +28,7 @@ struct coredump_params { int vma_count; size_t vma_data_size; struct core_vma_metadata *vma_meta; + struct pid *pid; }; extern unsigned int core_file_note_size_limit; -- cgit v1.2.3 From 66d454e99d71857faf249486912e381ec83760b4 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Fri, 2 May 2025 09:15:21 -0700 Subject: bpf: udp: Make sure iter->batch always contains a full bucket snapshot Require that iter->batch always contains a full bucket snapshot. This invariant is important to avoid skipping or repeating sockets during iteration when combined with the next few patches. Before, there were two cases where a call to bpf_iter_udp_batch may only capture part of a bucket: 1. When bpf_iter_udp_realloc_batch() returns -ENOMEM [1]. 2. When more sockets are added to the bucket while calling bpf_iter_udp_realloc_batch(), making the updated batch size insufficient [2]. In cases where the batch size only covers part of a bucket, it is possible to forget which sockets were already visited, especially if we have to process a bucket in more than two batches. This forces us to choose between repeating or skipping sockets, so don't allow this: 1. Stop iteration and propagate -ENOMEM up to userspace if reallocation fails instead of continuing with a partial batch. 2. Try bpf_iter_udp_realloc_batch() with GFP_USER just as before, but if we still aren't able to capture the full bucket, call bpf_iter_udp_realloc_batch() again while holding the bucket lock to guarantee the bucket does not change. On the second attempt use GFP_NOWAIT since we hold onto the spin lock. Introduce the udp_portaddr_for_each_entry_from macro and use it instead of udp_portaddr_for_each_entry to make it possible to continue iteration from an arbitrary socket. This is required for this patch in the GFP_NOWAIT case to allow us to fill the rest of a batch starting from the middle of a bucket and the later patch which skips sockets that were already seen. Testing all scenarios directly is a bit difficult, but I did some manual testing to exercise the code paths where GFP_NOWAIT is used and where ERR_PTR(err) is returned. I used the realloc test case included later in this series to trigger a scenario where a realloc happens inside bpf_iter_udp_batch and made a small code tweak to force the first realloc attempt to allocate a too-small batch, thus requiring another attempt with GFP_NOWAIT. Some printks showed both reallocs with the tests passing: Apr 25 23:16:24 crow kernel: go again GFP_USER Apr 25 23:16:24 crow kernel: go again GFP_NOWAIT With this setup, I also forced each of the bpf_iter_udp_realloc_batch calls to return -ENOMEM to ensure that iteration ends and that the read() in userspace fails. [1]: https://lore.kernel.org/bpf/CABi4-ogUtMrH8-NVB6W8Xg_F_KDLq=yy-yu-tKr2udXE2Mu1Lg@mail.gmail.com/ [2]: https://lore.kernel.org/bpf/7ed28273-a716-4638-912d-f86f965e54bb@linux.dev/ Signed-off-by: Jordan Rife Signed-off-by: Martin KaFai Lau --- include/linux/udp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 895240177f4f..4e1a672af4c5 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -216,6 +216,9 @@ static inline void udp_allow_gso(struct sock *sk) #define udp_portaddr_for_each_entry(__sk, list) \ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry_from(__sk) \ + hlist_for_each_entry_from(__sk, __sk_common.skc_portaddr_node) + #define udp_portaddr_for_each_entry_rcu(__sk, list) \ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) -- cgit v1.2.3 From 71ded61bee2af97a218f9dfa0f4019cc4cd3b029 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Mar 2025 23:14:20 -0800 Subject: configfs-tsm: Namespace TSM report symbols In preparation for new + common TSM (TEE Security Manager) infrastructure, namespace the TSM report symbols in tsm.h with an _REPORT suffix to differentiate them from other incoming tsm work. Cc: Yilun Xu Cc: Samuel Ortiz Cc: Tom Lendacky Cc: Sami Mujawar Cc: Steven Price Reviewed-by: Alexey Kardashevskiy Reviewed-by: Suzuki K Poulose Reviewed-by: Kai Huang Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Steven Price Link: https://patch.msgid.link/174107246021.1288555.7203769833791489618.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/linux/tsm.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/tsm.h b/include/linux/tsm.h index 11b0c525be30..431054810dca 100644 --- a/include/linux/tsm.h +++ b/include/linux/tsm.h @@ -6,17 +6,17 @@ #include #include -#define TSM_INBLOB_MAX 64 -#define TSM_OUTBLOB_MAX SZ_32K +#define TSM_REPORT_INBLOB_MAX 64 +#define TSM_REPORT_OUTBLOB_MAX SZ_32K /* * Privilege level is a nested permission concept to allow confidential * guests to partition address space, 4-levels are supported. */ -#define TSM_PRIVLEVEL_MAX 3 +#define TSM_REPORT_PRIVLEVEL_MAX 3 /** - * struct tsm_desc - option descriptor for generating tsm report blobs + * struct tsm_report_desc - option descriptor for generating tsm report blobs * @privlevel: optional privilege level to associate with @outblob * @inblob_len: sizeof @inblob * @inblob: arbitrary input data @@ -24,10 +24,10 @@ * @service_guid: optional service-provider service guid to attest * @service_manifest_version: optional service-provider service manifest version requested */ -struct tsm_desc { +struct tsm_report_desc { unsigned int privlevel; size_t inblob_len; - u8 inblob[TSM_INBLOB_MAX]; + u8 inblob[TSM_REPORT_INBLOB_MAX]; char *service_provider; guid_t service_guid; unsigned int service_manifest_version; @@ -44,7 +44,7 @@ struct tsm_desc { * @manifestblob: (optional) manifest data associated with the report */ struct tsm_report { - struct tsm_desc desc; + struct tsm_report_desc desc; size_t outblob_len; u8 *outblob; size_t auxblob_len; @@ -88,7 +88,7 @@ enum tsm_bin_attr_index { }; /** - * struct tsm_ops - attributes and operations for tsm instances + * struct tsm_report_ops - attributes and operations for tsm_report instances * @name: tsm id reflected in /sys/kernel/config/tsm/report/$report/provider * @privlevel_floor: convey base privlevel for nested scenarios * @report_new: Populate @report with the report blob and auxblob @@ -99,7 +99,7 @@ enum tsm_bin_attr_index { * Implementation specific ops, only one is expected to be registered at * a time i.e. only one of "sev-guest", "tdx-guest", etc. */ -struct tsm_ops { +struct tsm_report_ops { const char *name; unsigned int privlevel_floor; int (*report_new)(struct tsm_report *report, void *data); @@ -107,6 +107,6 @@ struct tsm_ops { bool (*report_bin_attr_visible)(int n); }; -int tsm_register(const struct tsm_ops *ops, void *priv); -int tsm_unregister(const struct tsm_ops *ops); +int tsm_report_register(const struct tsm_report_ops *ops, void *priv); +int tsm_report_unregister(const struct tsm_report_ops *ops); #endif /* __TSM_H */ -- cgit v1.2.3 From ca732e990fc8222a2d6782ae750304719e212fe8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 1 May 2025 12:45:11 +0100 Subject: net: stmmac: add get_interfaces() platform method Add a get_interfaces() platform method to allow platforms to indicate to phylink which interface modes they support - which then allows phylink to validate on initialisation that the configured PHY interface mode is actually supported. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1uASLn-0021Qd-Mi@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 8aed09d65b4a..537bced69c46 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -233,6 +233,8 @@ struct plat_stmmacenet_data { u8 tx_sched_algorithm; struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; + void (*get_interfaces)(struct stmmac_priv *priv, void *bsp_priv, + unsigned long *interfaces); int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i, phy_interface_t interface, int speed); void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); -- cgit v1.2.3 From 9d165dc58055d98658941a33fef9e5da866af3e9 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 1 May 2025 12:45:27 +0100 Subject: net: stmmac: remove speed_mode_2500() method Remove the speed_mode_2500() platform method which is no longer used or necessary, being superseded by the more flexible get_interfaces() method. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1uASM3-0021R3-2B@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 537bced69c46..26ddf95d23f9 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -241,7 +241,6 @@ struct plat_stmmacenet_data { int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); - void (*speed_mode_2500)(struct net_device *ndev, void *priv); int (*mac_finish)(struct net_device *ndev, void *priv, unsigned int mode, -- cgit v1.2.3 From 3efa66ce6ee1b55ab687b316e48e1e9ddc1f780a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 16 Apr 2025 18:29:01 +0200 Subject: rcuref: Provide rcuref_is_dead() rcuref_read() returns the number of references that are currently held. If 0 is returned then it is not safe to assume that the object ca be scheduled for deconstruction because it is marked DEAD. This happens if the return value of rcuref_put() is ignored and assumptions are made. If 0 is returned then the counter transitioned from 0 to RCUREF_NOREF. If rcuref_put() did not return to the caller then the counter did not yet transition from RCUREF_NOREF to RCUREF_DEAD. This means that there is still a chance that the counter will transition from RCUREF_NOREF to 0 meaning it is still valid and must not be deconstructed. In this brief window rcuref_read() will return 0. Provide rcuref_is_dead() to determine if the counter is marked as RCUREF_DEAD. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250416162921.513656-2-bigeasy@linutronix.de --- include/linux/rcuref.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h index 6322d8c1c6b4..2fb2af6d9824 100644 --- a/include/linux/rcuref.h +++ b/include/linux/rcuref.h @@ -30,7 +30,11 @@ static inline void rcuref_init(rcuref_t *ref, unsigned int cnt) * rcuref_read - Read the number of held reference counts of a rcuref * @ref: Pointer to the reference count * - * Return: The number of held references (0 ... N) + * Return: The number of held references (0 ... N). The value 0 does not + * indicate that it is safe to schedule the object, protected by this reference + * counter, for deconstruction. + * If you want to know if the reference counter has been marked DEAD (as + * signaled by rcuref_put()) please use rcuread_is_dead(). */ static inline unsigned int rcuref_read(rcuref_t *ref) { @@ -40,6 +44,22 @@ static inline unsigned int rcuref_read(rcuref_t *ref) return c >= RCUREF_RELEASED ? 0 : c + 1; } +/** + * rcuref_is_dead - Check if the rcuref has been already marked dead + * @ref: Pointer to the reference count + * + * Return: True if the object has been marked DEAD. This signals that a previous + * invocation of rcuref_put() returned true on this reference counter meaning + * the protected object can safely be scheduled for deconstruction. + * Otherwise, returns false. + */ +static inline bool rcuref_is_dead(rcuref_t *ref) +{ + unsigned int c = atomic_read(&ref->refcnt); + + return (c >= RCUREF_RELEASED) && (c < RCUREF_NOREF); +} + extern __must_check bool rcuref_get_slowpath(rcuref_t *ref); /** -- cgit v1.2.3 From 55284f70134f01fdc9cc4c4905551cc1f37abd34 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Apr 2025 18:29:02 +0200 Subject: mm: Add vmalloc_huge_node() To enable node specific hash-tables using huge pages if possible. [bigeasy: use __vmalloc_node_range_noprof(), add nommu bits, inline vmalloc_huge] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250416162921.513656-3-bigeasy@linutronix.de --- include/linux/vmalloc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e3..de95794777ad 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -168,8 +168,13 @@ void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_m int node, const void *caller) __alloc_size(1); #define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__)) -void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); -#define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__)) +void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) __alloc_size(1); +#define vmalloc_huge_node(...) alloc_hooks(vmalloc_huge_node_noprof(__VA_ARGS__)) + +static inline void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) +{ + return vmalloc_huge_node(size, gfp_mask, NUMA_NO_NODE); +} extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); #define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__)) -- cgit v1.2.3 From 80367ad01d93ac781b0e1df246edaf006928002f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 16 Apr 2025 18:29:12 +0200 Subject: futex: Add basic infrastructure for local task local hash The futex hash is system wide and shared by all tasks. Each slot is hashed based on futex address and the VMA of the thread. Due to randomized VMAs (and memory allocations) the same logical lock (pointer) can end up in a different hash bucket on each invocation of the application. This in turn means that different applications may share a hash bucket on the first invocation but not on the second and it is not always clear which applications will be involved. This can result in high latency's to acquire the futex_hash_bucket::lock especially if the lock owner is limited to a CPU and can not be effectively PI boosted. Introduce basic infrastructure for process local hash which is shared by all threads of process. This hash will only be used for a PROCESS_PRIVATE FUTEX operation. The hashmap can be allocated via: prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, num); A `num' of 0 means that the global hash is used instead of a private hash. Other values for `num' specify the number of slots for the hash and the number must be power of two, starting with two. The prctl() returns zero on success. This function can only be used before a thread is created. The current status for the private hash can be queried via: num = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); which return the current number of slots. The value 0 means that the global hash is used. Values greater than 0 indicate the number of slots that are used. A negative number indicates an error. For optimisation, for the private hash jhash2() uses only two arguments the address and the offset. This omits the VMA which is always the same. [peterz: Use 0 for global hash. A bit shuffling and renaming. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250416162921.513656-13-bigeasy@linutronix.de --- include/linux/futex.h | 26 ++++++++++++++++++++++++-- include/linux/mm_types.h | 5 ++++- include/uapi/linux/prctl.h | 5 +++++ 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index b70df27d7e85..8f1be08bef18 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -4,11 +4,11 @@ #include #include +#include #include struct inode; -struct mm_struct; struct task_struct; /* @@ -77,7 +77,22 @@ void futex_exec_release(struct task_struct *tsk); long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -#else +int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4); + +#ifdef CONFIG_FUTEX_PRIVATE_HASH +void futex_hash_free(struct mm_struct *mm); + +static inline void futex_mm_init(struct mm_struct *mm) +{ + mm->futex_phash = NULL; +} + +#else /* !CONFIG_FUTEX_PRIVATE_HASH */ +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } +#endif /* CONFIG_FUTEX_PRIVATE_HASH */ + +#else /* !CONFIG_FUTEX */ static inline void futex_init_task(struct task_struct *tsk) { } static inline void futex_exit_recursive(struct task_struct *tsk) { } static inline void futex_exit_release(struct task_struct *tsk) { } @@ -88,6 +103,13 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val, { return -EINVAL; } +static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) +{ + return -EINVAL; +} +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } + #endif #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 56d07edd01f9..a4b5661e4177 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -31,6 +31,7 @@ #define INIT_PASID 0 struct address_space; +struct futex_private_hash; struct mem_cgroup; /* @@ -1031,7 +1032,9 @@ struct mm_struct { */ seqcount_t mm_lock_seq; #endif - +#ifdef CONFIG_FUTEX_PRIVATE_HASH + struct futex_private_hash *futex_phash; +#endif unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 15c18ef4eb11..3b93fb906e3c 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -364,4 +364,9 @@ struct prctl_mm_map { # define PR_TIMER_CREATE_RESTORE_IDS_ON 1 # define PR_TIMER_CREATE_RESTORE_IDS_GET 2 +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define PR_FUTEX_HASH_GET_SLOTS 2 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 7c4f75a21f636486d2969d9b6680403ea8483539 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 16 Apr 2025 18:29:13 +0200 Subject: futex: Allow automatic allocation of process wide futex hash Allocate a private futex hash with 16 slots if a task forks its first thread. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250416162921.513656-14-bigeasy@linutronix.de --- include/linux/futex.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 8f1be08bef18..1d3f7555825e 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -80,6 +80,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4); #ifdef CONFIG_FUTEX_PRIVATE_HASH +int futex_hash_allocate_default(void); void futex_hash_free(struct mm_struct *mm); static inline void futex_mm_init(struct mm_struct *mm) @@ -88,6 +89,7 @@ static inline void futex_mm_init(struct mm_struct *mm) } #else /* !CONFIG_FUTEX_PRIVATE_HASH */ +static inline int futex_hash_allocate_default(void) { return 0; } static inline void futex_hash_free(struct mm_struct *mm) { } static inline void futex_mm_init(struct mm_struct *mm) { } #endif /* CONFIG_FUTEX_PRIVATE_HASH */ @@ -107,6 +109,10 @@ static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsig { return -EINVAL; } +static inline int futex_hash_allocate_default(void) +{ + return 0; +} static inline void futex_hash_free(struct mm_struct *mm) { } static inline void futex_mm_init(struct mm_struct *mm) { } -- cgit v1.2.3 From bd54df5ea7cadac520e346d5f0fe5d58e635b6ba Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 16 Apr 2025 18:29:14 +0200 Subject: futex: Allow to resize the private local hash The mm_struct::futex_hash_lock guards the futex_hash_bucket assignment/ replacement. The futex_hash_allocate()/ PR_FUTEX_HASH_SET_SLOTS operation can now be invoked at runtime and resize an already existing internal private futex_hash_bucket to another size. The reallocation is based on an idea by Thomas Gleixner: The initial allocation of struct futex_private_hash sets the reference count to one. Every user acquires a reference on the local hash before using it and drops it after it enqueued itself on the hash bucket. There is no reference held while the task is scheduled out while waiting for the wake up. The resize process allocates a new struct futex_private_hash and drops the initial reference. Synchronized with mm_struct::futex_hash_lock it is checked if the reference counter for the currently used mm_struct::futex_phash is marked as DEAD. If so, then all users enqueued on the current private hash are requeued on the new private hash and the new private hash is set to mm_struct::futex_phash. Otherwise the newly allocated private hash is saved as mm_struct::futex_phash_new and the rehashing and reassigning is delayed to the futex_hash() caller once the reference counter is marked DEAD. The replacement is not performed at rcuref_put() time because certain callers, such as futex_wait_queue(), drop their reference after changing the task state. This change will be destroyed once the futex_hash_lock is acquired. The user can change the number slots with PR_FUTEX_HASH_SET_SLOTS multiple times. An increase and decrease is allowed and request blocks until the assignment is done. The private hash allocated at thread creation is changed from 16 to 16 <= 4 * number_of_threads <= global_hash_size where number_of_threads can not exceed the number of online CPUs. Should the user PR_FUTEX_HASH_SET_SLOTS then the auto scaling is disabled. [peterz: reorganize the code to avoid state tracking and simplify new object handling, block the user until changes are in effect, allow increase and decrease of the hash]. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250416162921.513656-15-bigeasy@linutronix.de --- include/linux/futex.h | 3 ++- include/linux/mm_types.h | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 1d3f7555825e..40bc778b2bb4 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -85,7 +85,8 @@ void futex_hash_free(struct mm_struct *mm); static inline void futex_mm_init(struct mm_struct *mm) { - mm->futex_phash = NULL; + rcu_assign_pointer(mm->futex_phash, NULL); + mutex_init(&mm->futex_hash_lock); } #else /* !CONFIG_FUTEX_PRIVATE_HASH */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index a4b5661e4177..32ba5126e221 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1033,7 +1033,9 @@ struct mm_struct { seqcount_t mm_lock_seq; #endif #ifdef CONFIG_FUTEX_PRIVATE_HASH - struct futex_private_hash *futex_phash; + struct mutex futex_hash_lock; + struct futex_private_hash __rcu *futex_phash; + struct futex_private_hash *futex_phash_new; #endif unsigned long hiwater_rss; /* High-watermark of RSS usage */ -- cgit v1.2.3 From 63e8595c060a1fef421e3eecfc05ad882dafb8ac Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 16 Apr 2025 18:29:15 +0200 Subject: futex: Allow to make the private hash immutable My initial testing showed that: perf bench futex hash reported less operations/sec with private hash. After using the same amount of buckets in the private hash as used by the global hash then the operations/sec were about the same. This changed once the private hash became resizable. This feature added an RCU section and reference counting via atomic inc+dec operation into the hot path. The reference counting can be avoided if the private hash is made immutable. Extend PR_FUTEX_HASH_SET_SLOTS by a fourth argument which denotes if the private should be made immutable. Once set (to true) the a further resize is not allowed (same if set to global hash). Add PR_FUTEX_HASH_GET_IMMUTABLE which returns true if the hash can not be changed. Update "perf bench" suite. For comparison, results of "perf bench futex hash -s": - Xeon CPU E5-2650, 2 NUMA nodes, total 32 CPUs: - Before the introducing task local hash shared Averaged 1.487.148 operations/sec (+- 0,53%), total secs = 10 private Averaged 2.192.405 operations/sec (+- 0,07%), total secs = 10 - With the series shared Averaged 1.326.342 operations/sec (+- 0,41%), total secs = 10 -b128 Averaged 141.394 operations/sec (+- 1,15%), total secs = 10 -Ib128 Averaged 851.490 operations/sec (+- 0,67%), total secs = 10 -b8192 Averaged 131.321 operations/sec (+- 2,13%), total secs = 10 -Ib8192 Averaged 1.923.077 operations/sec (+- 0,61%), total secs = 10 128 is the default allocation of hash buckets. 8192 was the previous amount of allocated hash buckets. - Xeon(R) CPU E7-8890 v3, 4 NUMA nodes, total 144 CPUs: - Before the introducing task local hash shared Averaged 1.810.936 operations/sec (+- 0,26%), total secs = 20 private Averaged 2.505.801 operations/sec (+- 0,05%), total secs = 20 - With the series shared Averaged 1.589.002 operations/sec (+- 0,25%), total secs = 20 -b1024 Averaged 42.410 operations/sec (+- 0,20%), total secs = 20 -Ib1024 Averaged 740.638 operations/sec (+- 1,51%), total secs = 20 -b65536 Averaged 48.811 operations/sec (+- 1,35%), total secs = 20 -Ib65536 Averaged 1.963.165 operations/sec (+- 0,18%), total secs = 20 1024 is the default allocation of hash buckets. 65536 was the previous amount of allocated hash buckets. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Acked-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20250416162921.513656-16-bigeasy@linutronix.de --- include/uapi/linux/prctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 3b93fb906e3c..43dec6eed559 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -367,6 +367,8 @@ struct prctl_mm_map { /* FUTEX hash management */ #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From cec199c5e39bde7191a08087cc3d002ccfab31ff Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Apr 2025 18:29:16 +0200 Subject: futex: Implement FUTEX2_NUMA Extend the futex2 interface to be numa aware. When FUTEX2_NUMA is specified for a futex, the user value is extended to two words (of the same size). The first is the user value we all know, the second one will be the node to place this futex on. struct futex_numa_32 { u32 val; u32 node; }; When node is set to ~0, WAIT will set it to the current node_id such that WAKE knows where to find it. If userspace corrupts the node value between WAIT and WAKE, the futex will not be found and no wakeup will happen. When FUTEX2_NUMA is not set, the node is simply an extension of the hash, such that traditional futexes are still interleaved over the nodes. This is done to avoid having to have a separate !numa hash-table. [bigeasy: ensure to have at least hashsize of 4 in futex_init(), add pr_info() for size and allocation information. Cast the naddr math to void*] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250416162921.513656-17-bigeasy@linutronix.de --- include/linux/futex.h | 3 +++ include/uapi/linux/futex.h | 7 +++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 40bc778b2bb4..eccc99751bd9 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -34,6 +34,7 @@ union futex_key { u64 i_seq; unsigned long pgoff; unsigned int offset; + /* unsigned int node; */ } shared; struct { union { @@ -42,11 +43,13 @@ union futex_key { }; unsigned long address; unsigned int offset; + /* unsigned int node; */ } private; struct { u64 ptr; unsigned long word; unsigned int offset; + unsigned int node; /* NOT hashed! */ } both; }; diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index d2ee625ea189..6b94da467e70 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -74,6 +74,13 @@ /* do not use */ #define FUTEX_32 FUTEX2_SIZE_U32 /* historical accident :-( */ +/* + * When FUTEX2_NUMA doubles the futex word, the second word is a node value. + * The special value -1 indicates no-node. This is the same value as + * NUMA_NO_NODE, except that value is not ABI, this is. + */ +#define FUTEX_NO_NODE (-1) + /* * Max numbers of elements in a futex_waitv array */ -- cgit v1.2.3 From c042c505210dc3453f378df432c10fff3d471bc5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Apr 2025 18:29:17 +0200 Subject: futex: Implement FUTEX2_MPOL Extend the futex2 interface to be aware of mempolicy. When FUTEX2_MPOL is specified and there is a MPOL_PREFERRED or home_node specified covering the futex address, use that hash-map. Notably, in this case the futex will go to the global node hashtable, even if it is a PRIVATE futex. When FUTEX2_NUMA|FUTEX2_MPOL is specified and the user specified node value is FUTEX_NO_NODE, the MPOL lookup (as described above) will be tried first before reverting to setting node to the local node. [bigeasy: add CONFIG_FUTEX_MPOL, add MPOL to FUTEX2_VALID_MASK, write the node only to user if FUTEX_NO_NODE was supplied] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250416162921.513656-18-bigeasy@linutronix.de --- include/linux/mmap_lock.h | 4 ++++ include/uapi/linux/futex.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 4706c6769902..e0eddfd306ef 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -7,6 +7,7 @@ #include #include #include +#include #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), @@ -211,6 +212,9 @@ static inline void mmap_read_unlock(struct mm_struct *mm) up_read(&mm->mmap_lock); } +DEFINE_GUARD(mmap_read_lock, struct mm_struct *, + mmap_read_lock(_T), mmap_read_unlock(_T)) + static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) { __mmap_lock_trace_released(mm, false); diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 6b94da467e70..7e2744ec8933 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -63,7 +63,7 @@ #define FUTEX2_SIZE_U32 0x02 #define FUTEX2_SIZE_U64 0x03 #define FUTEX2_NUMA 0x04 - /* 0x08 */ +#define FUTEX2_MPOL 0x08 /* 0x10 */ /* 0x20 */ /* 0x40 */ -- cgit v1.2.3 From 4862c8861d902d43645a493e441c4478be1c6c44 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 29 Apr 2025 18:50:17 +0200 Subject: dm: Allow .prepare_ioctl to handle ioctls directly This adds a 'bool *forward' parameter to .prepare_ioctl, which allows device mapper targets to accept ioctls to themselves instead of the underlying device. If the target already fully handled the ioctl, it sets *forward to false and device mapper won't forward it to the underlying device any more. In order for targets to actually know what the ioctl is about and how to handle it, pass also cmd and arg. As long as targets restrict themselves to interpreting ioctls of type DM_IOCTL, this is a backwards compatible change because previously, any such ioctl would have been passed down through all device mapper layers until it reached a device that can't understand the ioctl and would return an error. Signed-off-by: Kevin Wolf Reviewed-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka --- include/linux/device-mapper.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index bcc6d7b69470..cb95951547ab 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -93,7 +93,14 @@ typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, typedef int (*dm_message_fn) (struct dm_target *ti, unsigned int argc, char **argv, char *result, unsigned int maxlen); -typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev); +/* + * Called with *forward == true. If it remains true, the ioctl should be + * forwarded to bdev. If it is reset to false, the target already fully handled + * the ioctl and the return value is the return value for the whole ioctl. + */ +typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev, + unsigned int cmd, unsigned long arg, + bool *forward); #ifdef CONFIG_BLK_DEV_ZONED typedef int (*dm_report_zones_fn) (struct dm_target *ti, -- cgit v1.2.3 From 7734fb4ad98c3fdaf0fde82978ef8638195a5285 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 29 Apr 2025 18:50:18 +0200 Subject: dm mpath: Interface for explicit probing of active paths Multipath cannot directly provide failover for ioctls in the kernel because it doesn't know what each ioctl means and which result could indicate a path error. Userspace generally knows what the ioctl it issued means and if it might be a path error, but neither does it know which path the ioctl took nor does it necessarily have the privileges to fail a path using the control device. In order to allow userspace to address this situation, implement a DM_MPATH_PROBE_PATHS ioctl that prompts the dm-mpath driver to probe all active paths in the current path group to see whether they still work, and fail them if not. If this returns success, userspace can retry the ioctl and expect that the previously hit bad path is now failed (or working again). The immediate motivation for this is the use of SG_IO in QEMU for SCSI passthrough. Following a failed SG_IO ioctl, QEMU will trigger probing to ensure that all active paths are actually alive, so that retrying SG_IO at least has a lower chance of failing due to a path error. However, the problem is broader than just SG_IO (it affects any ioctl), and if applications need failover support for other ioctls, the same probing can be used. This is not implemented on the DM control device, but on the DM mpath block devices, to allow all users who have access to such a block device to make use of this interface, specifically to implement failover for ioctls. For the same reason, it is also unprivileged. Its implementation is effectively just a bunch of reads, which could already be issued by userspace, just without any guarantee that all the rights paths are selected. The probing implemented here is done fully synchronously path by path; probing all paths concurrently is left as an improvement for the future. Co-developed-by: Hanna Czenczek Signed-off-by: Hanna Czenczek Signed-off-by: Kevin Wolf Reviewed-by: Benjamin Marzinski Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka --- include/uapi/linux/dm-ioctl.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index b08c7378164d..3225e025e30e 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -258,10 +258,12 @@ enum { DM_DEV_SET_GEOMETRY_CMD, DM_DEV_ARM_POLL_CMD, DM_GET_TARGET_VERSION_CMD, + DM_MPATH_PROBE_PATHS_CMD, }; #define DM_IOCTL 0xfd +/* Control device ioctls */ #define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) #define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl) #define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl) @@ -285,10 +287,13 @@ enum { #define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl) #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) +/* Block device ioctls */ +#define DM_MPATH_PROBE_PATHS _IO(DM_IOCTL, DM_MPATH_PROBE_PATHS_CMD) + #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 49 +#define DM_VERSION_MINOR 50 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2025-01-17)" +#define DM_VERSION_EXTRA "-ioctl (2025-04-28)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 74a43a2cf5e8a3eeab3b55a2e64b33281f5ac554 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:05 +0800 Subject: crypto: lib/sha256 - Move partial block handling out Extract the common partial block handling into a helper macro that can be reused by other library code. Also delete the unused sha256_base_do_finalize function. Signed-off-by: Herbert Xu --- include/crypto/internal/blockhash.h | 52 +++++++++++++++++++++++++++++++++++++ include/crypto/sha2.h | 9 +++++-- include/crypto/sha256_base.h | 38 +++------------------------ 3 files changed, 62 insertions(+), 37 deletions(-) create mode 100644 include/crypto/internal/blockhash.h (limited to 'include') diff --git a/include/crypto/internal/blockhash.h b/include/crypto/internal/blockhash.h new file mode 100644 index 000000000000..52d9d4c82493 --- /dev/null +++ b/include/crypto/internal/blockhash.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Handle partial blocks for block hash. + * + * Copyright (c) 2015 Linaro Ltd + * Copyright (c) 2025 Herbert Xu + */ + +#ifndef _CRYPTO_INTERNAL_BLOCKHASH_H +#define _CRYPTO_INTERNAL_BLOCKHASH_H + +#include +#include + +#define BLOCK_HASH_UPDATE_BASE(block_fn, state, src, nbytes, bs, dv, \ + buf, buflen) \ + ({ \ + typeof(block_fn) *_block_fn = &(block_fn); \ + typeof(state + 0) _state = (state); \ + unsigned int _buflen = (buflen); \ + size_t _nbytes = (nbytes); \ + unsigned int _bs = (bs); \ + const u8 *_src = (src); \ + u8 *_buf = (buf); \ + while ((_buflen + _nbytes) >= _bs) { \ + const u8 *data = _src; \ + size_t len = _nbytes; \ + size_t blocks; \ + int remain; \ + if (_buflen) { \ + remain = _bs - _buflen; \ + memcpy(_buf + _buflen, _src, remain); \ + data = _buf; \ + len = _bs; \ + } \ + remain = len % bs; \ + blocks = (len - remain) / (dv); \ + (*_block_fn)(_state, data, blocks); \ + _src += len - remain - _buflen; \ + _nbytes -= len - remain - _buflen; \ + _buflen = 0; \ + } \ + memcpy(_buf + _buflen, _src, _nbytes); \ + _buflen += _nbytes; \ + }) + +#define BLOCK_HASH_UPDATE(block, state, src, nbytes, bs, buf, buflen) \ + BLOCK_HASH_UPDATE_BASE(block, state, src, nbytes, bs, 1, buf, buflen) +#define BLOCK_HASH_UPDATE_BLOCKS(block, state, src, nbytes, bs, buf, buflen) \ + BLOCK_HASH_UPDATE_BASE(block, state, src, nbytes, bs, bs, buf, buflen) + +#endif /* _CRYPTO_INTERNAL_BLOCKHASH_H */ diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index abbd882f7849..f873c2207b1e 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -71,8 +71,13 @@ struct crypto_sha256_state { }; struct sha256_state { - u32 state[SHA256_DIGEST_SIZE / 4]; - u64 count; + union { + struct crypto_sha256_state ctx; + struct { + u32 state[SHA256_DIGEST_SIZE / 4]; + u64 count; + }; + }; u8 buf[SHA256_BLOCK_SIZE]; }; diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h index 08cd5e41d4fd..9f284bed5a51 100644 --- a/include/crypto/sha256_base.h +++ b/include/crypto/sha256_base.h @@ -8,6 +8,7 @@ #ifndef _CRYPTO_SHA256_BASE_H #define _CRYPTO_SHA256_BASE_H +#include #include #include #include @@ -40,35 +41,10 @@ static inline int lib_sha256_base_do_update(struct sha256_state *sctx, sha256_block_fn *block_fn) { unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; - struct crypto_sha256_state *state = (void *)sctx; sctx->count += len; - - if (unlikely((partial + len) >= SHA256_BLOCK_SIZE)) { - int blocks; - - if (partial) { - int p = SHA256_BLOCK_SIZE - partial; - - memcpy(sctx->buf + partial, data, p); - data += p; - len -= p; - - block_fn(state, sctx->buf, 1); - } - - blocks = len / SHA256_BLOCK_SIZE; - len %= SHA256_BLOCK_SIZE; - - if (blocks) { - block_fn(state, data, blocks); - data += blocks * SHA256_BLOCK_SIZE; - } - partial = 0; - } - if (len) - memcpy(sctx->buf + partial, data, len); - + BLOCK_HASH_UPDATE_BLOCKS(block_fn, &sctx->ctx, data, len, + SHA256_BLOCK_SIZE, sctx->buf, partial); return 0; } @@ -140,14 +116,6 @@ static inline int lib_sha256_base_do_finalize(struct sha256_state *sctx, return lib_sha256_base_do_finup(state, sctx->buf, partial, block_fn); } -static inline int sha256_base_do_finalize(struct shash_desc *desc, - sha256_block_fn *block_fn) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - return lib_sha256_base_do_finalize(sctx, block_fn); -} - static inline int __sha256_base_finish(u32 state[SHA256_DIGEST_SIZE / 4], u8 *out, unsigned int digest_size) { -- cgit v1.2.3 From 9b84cb897803c484e15eb1885cd45a895ce1e436 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:07 +0800 Subject: crypto: lib/poly1305 - Add block-only interface Add a block-only interface for poly1305. Implement the generic code first. Also use the generic partial block helper. Signed-off-by: Herbert Xu --- include/crypto/internal/poly1305.h | 28 ++++++++++++++++++++++++++-- include/crypto/poly1305.h | 25 +++++++++++++++++++------ 2 files changed, 45 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h index e614594f88c1..c60315f47562 100644 --- a/include/crypto/internal/poly1305.h +++ b/include/crypto/internal/poly1305.h @@ -6,9 +6,8 @@ #ifndef _CRYPTO_INTERNAL_POLY1305_H #define _CRYPTO_INTERNAL_POLY1305_H -#include -#include #include +#include /* * Poly1305 core functions. These only accept whole blocks; the caller must @@ -31,4 +30,29 @@ void poly1305_core_blocks(struct poly1305_state *state, void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], void *dst); +void poly1305_block_init_arch(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +void poly1305_block_init_generic(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit); + +static inline void poly1305_blocks_generic(struct poly1305_block_state *state, + const u8 *src, unsigned int len, + u32 padbit) +{ + poly1305_core_blocks(&state->h, &state->core_r, src, + len / POLY1305_BLOCK_SIZE, padbit); +} + +void poly1305_emit_arch(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]); + +static inline void poly1305_emit_generic(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]) +{ + poly1305_core_emit(state, nonce, digest); +} + #endif diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 6e21ec2d1dc2..027d74842cd5 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -7,7 +7,6 @@ #define _CRYPTO_POLY1305_H #include -#include #define POLY1305_BLOCK_SIZE 16 #define POLY1305_KEY_SIZE 32 @@ -38,6 +37,17 @@ struct poly1305_state { }; }; +/* Combined state for block function. */ +struct poly1305_block_state { + /* accumulator */ + struct poly1305_state h; + /* key */ + union { + struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; + struct poly1305_core_key core_r; + }; +}; + struct poly1305_desc_ctx { /* partial buffer */ u8 buf[POLY1305_BLOCK_SIZE]; @@ -45,12 +55,15 @@ struct poly1305_desc_ctx { unsigned int buflen; /* finalize key */ u32 s[4]; - /* accumulator */ - struct poly1305_state h; - /* key */ union { - struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; - struct poly1305_core_key core_r; + struct { + struct poly1305_state h; + union { + struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; + struct poly1305_core_key core_r; + }; + }; + struct poly1305_block_state state; }; }; -- cgit v1.2.3 From 10a6d72ea355b730aa9702da0fd36aef0898a80e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Apr 2025 12:56:28 +0800 Subject: crypto: lib/poly1305 - Use block-only interface Now that every architecture provides a block function, use that to implement the lib/poly1305 and remove the old per-arch code. Signed-off-by: Herbert Xu --- include/crypto/poly1305.h | 53 ++++++----------------------------------------- 1 file changed, 6 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index 027d74842cd5..e54abda8cfe9 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -55,55 +55,14 @@ struct poly1305_desc_ctx { unsigned int buflen; /* finalize key */ u32 s[4]; - union { - struct { - struct poly1305_state h; - union { - struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; - struct poly1305_core_key core_r; - }; - }; - struct poly1305_block_state state; - }; + struct poly1305_block_state state; }; -void poly1305_init_arch(struct poly1305_desc_ctx *desc, - const u8 key[POLY1305_KEY_SIZE]); -void poly1305_init_generic(struct poly1305_desc_ctx *desc, - const u8 key[POLY1305_KEY_SIZE]); - -static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_init_arch(desc, key); - else - poly1305_init_generic(desc, key); -} - -void poly1305_update_arch(struct poly1305_desc_ctx *desc, const u8 *src, - unsigned int nbytes); -void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, - unsigned int nbytes); - -static inline void poly1305_update(struct poly1305_desc_ctx *desc, - const u8 *src, unsigned int nbytes) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_update_arch(desc, src, nbytes); - else - poly1305_update_generic(desc, src, nbytes); -} - -void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest); -void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *digest); - -static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_final_arch(desc, digest); - else - poly1305_final_generic(desc, digest); -} +void poly1305_init(struct poly1305_desc_ctx *desc, + const u8 key[POLY1305_KEY_SIZE]); +void poly1305_update(struct poly1305_desc_ctx *desc, + const u8 *src, unsigned int nbytes); +void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest); #if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305) bool poly1305_is_arch_optimized(void); -- cgit v1.2.3 From 950e5c84118c9e5b06bb9a9b64edf989ee4034df Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:26 -0700 Subject: crypto: sha256 - support arch-optimized lib and expose through shash As has been done for various other algorithms, rework the design of the SHA-256 library to support arch-optimized implementations, and make crypto/sha256.c expose both generic and arch-optimized shash algorithms that wrap the library functions. This allows users of the SHA-256 library functions to take advantage of the arch-optimized code, and this makes it much simpler to integrate SHA-256 for each architecture. Note that sha256_base.h is not used in the new design. It will be removed once all the architecture-specific code has been updated. Move the generic block function into its own module to avoid a circular dependency from libsha256.ko => sha256-$ARCH.ko => libsha256.ko. Signed-off-by: Eric Biggers Add export and import functions to maintain existing export format. Signed-off-by: Herbert Xu --- include/crypto/internal/sha2.h | 28 ++++++++++++++++++++++++++++ include/crypto/sha2.h | 15 +++------------ include/crypto/sha256_base.h | 9 ++++++--- 3 files changed, 37 insertions(+), 15 deletions(-) create mode 100644 include/crypto/internal/sha2.h (limited to 'include') diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h new file mode 100644 index 000000000000..d641c67abcbc --- /dev/null +++ b/include/crypto/internal/sha2.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _CRYPTO_INTERNAL_SHA2_H +#define _CRYPTO_INTERNAL_SHA2_H + +#include + +void sha256_update_generic(struct sha256_state *sctx, + const u8 *data, size_t len); +void sha256_final_generic(struct sha256_state *sctx, + u8 out[SHA256_DIGEST_SIZE]); +void sha224_final_generic(struct sha256_state *sctx, + u8 out[SHA224_DIGEST_SIZE]); + +#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) +bool sha256_is_arch_optimized(void); +#else +static inline bool sha256_is_arch_optimized(void) +{ + return false; +} +#endif +void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); + +#endif /* _CRYPTO_INTERNAL_SHA2_H */ diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index f873c2207b1e..9a56286d736d 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -13,6 +13,7 @@ #define SHA256_DIGEST_SIZE 32 #define SHA256_BLOCK_SIZE 64 +#define SHA256_STATE_WORDS 8 #define SHA384_DIGEST_SIZE 48 #define SHA384_BLOCK_SIZE 128 @@ -66,7 +67,7 @@ extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE]; extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE]; struct crypto_sha256_state { - u32 state[SHA256_DIGEST_SIZE / 4]; + u32 state[SHA256_STATE_WORDS]; u64 count; }; @@ -74,7 +75,7 @@ struct sha256_state { union { struct crypto_sha256_state ctx; struct { - u32 state[SHA256_DIGEST_SIZE / 4]; + u32 state[SHA256_STATE_WORDS]; u64 count; }; }; @@ -87,16 +88,6 @@ struct sha512_state { u8 buf[SHA512_BLOCK_SIZE]; }; -/* - * Stand-alone implementation of the SHA256 algorithm. It is designed to - * have as little dependencies as possible so it can be used in the - * kexec_file purgatory. In other cases you should generally use the - * hash APIs from include/crypto/hash.h. Especially when hashing large - * amounts of data as those APIs may be hw-accelerated. - * - * For details see lib/crypto/sha256.c - */ - static inline void sha256_init(struct sha256_state *sctx) { sctx->state[0] = SHA256_H0; diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h index 9f284bed5a51..804361731a7a 100644 --- a/include/crypto/sha256_base.h +++ b/include/crypto/sha256_base.h @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include @@ -142,7 +142,10 @@ static inline int sha256_base_finish(struct shash_desc *desc, u8 *out) return __sha256_base_finish(sctx->state, out, digest_size); } -void sha256_transform_blocks(struct crypto_sha256_state *sst, - const u8 *input, int blocks); +static inline void sha256_transform_blocks(struct crypto_sha256_state *sst, + const u8 *input, int blocks) +{ + sha256_blocks_generic(sst->state, input, blocks); +} #endif /* _CRYPTO_SHA256_BASE_H */ -- cgit v1.2.3 From 5aab01777fd38ff820bd7ae6cbc15d03b85de92f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:37 -0700 Subject: crypto: sha256 - remove sha256_base.h sha256_base.h is no longer used, so remove it. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/sha256_base.h | 151 ------------------------------------------- 1 file changed, 151 deletions(-) delete mode 100644 include/crypto/sha256_base.h (limited to 'include') diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h deleted file mode 100644 index 804361731a7a..000000000000 --- a/include/crypto/sha256_base.h +++ /dev/null @@ -1,151 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sha256_base.h - core logic for SHA-256 implementations - * - * Copyright (C) 2015 Linaro Ltd - */ - -#ifndef _CRYPTO_SHA256_BASE_H -#define _CRYPTO_SHA256_BASE_H - -#include -#include -#include -#include -#include -#include -#include - -typedef void (sha256_block_fn)(struct crypto_sha256_state *sst, u8 const *src, - int blocks); - -static inline int sha224_base_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - sha224_init(sctx); - return 0; -} - -static inline int sha256_base_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - sha256_init(sctx); - return 0; -} - -static inline int lib_sha256_base_do_update(struct sha256_state *sctx, - const u8 *data, - unsigned int len, - sha256_block_fn *block_fn) -{ - unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; - - sctx->count += len; - BLOCK_HASH_UPDATE_BLOCKS(block_fn, &sctx->ctx, data, len, - SHA256_BLOCK_SIZE, sctx->buf, partial); - return 0; -} - -static inline int lib_sha256_base_do_update_blocks( - struct crypto_sha256_state *sctx, const u8 *data, unsigned int len, - sha256_block_fn *block_fn) -{ - unsigned int remain = len - round_down(len, SHA256_BLOCK_SIZE); - - sctx->count += len - remain; - block_fn(sctx, data, len / SHA256_BLOCK_SIZE); - return remain; -} - -static inline int sha256_base_do_update_blocks( - struct shash_desc *desc, const u8 *data, unsigned int len, - sha256_block_fn *block_fn) -{ - return lib_sha256_base_do_update_blocks(shash_desc_ctx(desc), data, - len, block_fn); -} - -static inline int lib_sha256_base_do_finup(struct crypto_sha256_state *sctx, - const u8 *src, unsigned int len, - sha256_block_fn *block_fn) -{ - unsigned int bit_offset = SHA256_BLOCK_SIZE / 8 - 1; - union { - __be64 b64[SHA256_BLOCK_SIZE / 4]; - u8 u8[SHA256_BLOCK_SIZE * 2]; - } block = {}; - - if (len >= bit_offset * 8) - bit_offset += SHA256_BLOCK_SIZE / 8; - memcpy(&block, src, len); - block.u8[len] = 0x80; - sctx->count += len; - block.b64[bit_offset] = cpu_to_be64(sctx->count << 3); - block_fn(sctx, block.u8, (bit_offset + 1) * 8 / SHA256_BLOCK_SIZE); - memzero_explicit(&block, sizeof(block)); - - return 0; -} - -static inline int sha256_base_do_finup(struct shash_desc *desc, - const u8 *src, unsigned int len, - sha256_block_fn *block_fn) -{ - struct crypto_sha256_state *sctx = shash_desc_ctx(desc); - - if (len >= SHA256_BLOCK_SIZE) { - int remain; - - remain = lib_sha256_base_do_update_blocks(sctx, src, len, - block_fn); - src += len - remain; - len = remain; - } - return lib_sha256_base_do_finup(sctx, src, len, block_fn); -} - -static inline int lib_sha256_base_do_finalize(struct sha256_state *sctx, - sha256_block_fn *block_fn) -{ - unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; - struct crypto_sha256_state *state = (void *)sctx; - - sctx->count -= partial; - return lib_sha256_base_do_finup(state, sctx->buf, partial, block_fn); -} - -static inline int __sha256_base_finish(u32 state[SHA256_DIGEST_SIZE / 4], - u8 *out, unsigned int digest_size) -{ - __be32 *digest = (__be32 *)out; - int i; - - for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be32)) - put_unaligned_be32(state[i], digest++); - return 0; -} - -static inline void lib_sha256_base_finish(struct sha256_state *sctx, u8 *out, - unsigned int digest_size) -{ - __sha256_base_finish(sctx->state, out, digest_size); - memzero_explicit(sctx, sizeof(*sctx)); -} - -static inline int sha256_base_finish(struct shash_desc *desc, u8 *out) -{ - unsigned int digest_size = crypto_shash_digestsize(desc->tfm); - struct crypto_sha256_state *sctx = shash_desc_ctx(desc); - - return __sha256_base_finish(sctx->state, out, digest_size); -} - -static inline void sha256_transform_blocks(struct crypto_sha256_state *sst, - const u8 *input, int blocks) -{ - sha256_blocks_generic(sst->state, input, blocks); -} - -#endif /* _CRYPTO_SHA256_BASE_H */ -- cgit v1.2.3 From 7350fef56b7629a539897c332d7324629580671d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 10:00:38 -0700 Subject: crypto: lib/sha256 - improve function prototypes Follow best practices by changing the length parameters to size_t and explicitly specifying the length of the output digest arrays. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/sha2.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index 9a56286d736d..9853cd2d1291 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -100,9 +100,9 @@ static inline void sha256_init(struct sha256_state *sctx) sctx->state[7] = SHA256_H7; sctx->count = 0; } -void sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len); -void sha256_final(struct sha256_state *sctx, u8 *out); -void sha256(const u8 *data, unsigned int len, u8 *out); +void sha256_update(struct sha256_state *sctx, const u8 *data, size_t len); +void sha256_final(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE]); +void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]); static inline void sha224_init(struct sha256_state *sctx) { @@ -117,6 +117,6 @@ static inline void sha224_init(struct sha256_state *sctx) sctx->count = 0; } /* Simply use sha256_update as it is equivalent to sha224_update. */ -void sha224_final(struct sha256_state *sctx, u8 *out); +void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]); #endif /* _CRYPTO_SHA2_H */ -- cgit v1.2.3 From 0e2392b6513cb138e2915ad08e33ac029358e957 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 29 Apr 2025 16:43:22 +0800 Subject: crypto: streebog - Use API partial block handling Use the Crypto API partial block handling. Signed-off-by: Herbert Xu --- include/crypto/streebog.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/crypto/streebog.h b/include/crypto/streebog.h index cae1b4a01971..570f720a113b 100644 --- a/include/crypto/streebog.h +++ b/include/crypto/streebog.h @@ -23,15 +23,10 @@ struct streebog_uint512 { }; struct streebog_state { - union { - u8 buffer[STREEBOG_BLOCK_SIZE]; - struct streebog_uint512 m; - }; struct streebog_uint512 hash; struct streebog_uint512 h; struct streebog_uint512 N; struct streebog_uint512 Sigma; - size_t fillsize; }; #endif /* !_CRYPTO_STREEBOG_H_ */ -- cgit v1.2.3 From 63ff962c058a0b32a6c512a9fcd5aae11ebcb7e1 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Wed, 30 Apr 2025 14:59:53 +0300 Subject: crypto: rng - fix documentation for crypto_rng_alg() Current documentation states that crypto_rng_alg() returns the cra_name of the rng algorithm, but it actually returns a 'struct rng_alg' pointer from a RNG handle. Update documentation to reflect this. Signed-off-by: Ovidiu Panait Signed-off-by: Herbert Xu --- include/crypto/rng.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/crypto/rng.h b/include/crypto/rng.h index 5ac4388f50e1..f8224cc390f8 100644 --- a/include/crypto/rng.h +++ b/include/crypto/rng.h @@ -102,12 +102,10 @@ static inline struct rng_alg *__crypto_rng_alg(struct crypto_alg *alg) } /** - * crypto_rng_alg - obtain name of RNG - * @tfm: cipher handle - * - * Return the generic name (cra_name) of the initialized random number generator + * crypto_rng_alg() - obtain 'struct rng_alg' pointer from RNG handle + * @tfm: RNG handle * - * Return: generic name string + * Return: Pointer to 'struct rng_alg', derived from @tfm RNG handle */ static inline struct rng_alg *crypto_rng_alg(struct crypto_rng *tfm) { -- cgit v1.2.3 From 0efae411280988b43d5e5042aa047eeb276d1dac Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 May 2025 20:37:30 +0800 Subject: crypto: acomp - Clone folios properly The folios contain references to the request itself so they must be setup again in the cloned request. Fixes: 5f3437e9c89e ("crypto: acomp - Simplify folio handling") Signed-off-by: Herbert Xu --- include/crypto/acompress.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index f1812e92d3e3..9eacb9fa375d 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -551,11 +551,7 @@ static inline struct acomp_req *acomp_request_on_stack_init( return req; } -static inline struct acomp_req *acomp_request_clone(struct acomp_req *req, - size_t total, gfp_t gfp) -{ - return container_of(crypto_request_clone(&req->base, total, gfp), - struct acomp_req, base); -} +struct acomp_req *acomp_request_clone(struct acomp_req *req, + size_t total, gfp_t gfp); #endif -- cgit v1.2.3 From 8fd17374be8f220c26bec2b482cabf51ebbaed80 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 May 2025 20:37:32 +0800 Subject: crypto: api - Rename CRYPTO_ALG_REQ_CHAIN to CRYPTO_ALG_REQ_VIRT As chaining has been removed, all that remains of REQ_CHAIN is just virtual address support. Rename it before the reintroduction of batching creates confusion. Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 4 ++-- include/crypto/internal/acompress.h | 4 ++-- include/crypto/internal/hash.h | 4 ++-- include/linux/crypto.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index f5f730969d72..423e57eca351 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -255,9 +255,9 @@ static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK; } -static inline bool crypto_tfm_req_chain(struct crypto_tfm *tfm) +static inline bool crypto_tfm_req_virt(struct crypto_tfm *tfm) { - return tfm->__crt_alg->cra_flags & CRYPTO_ALG_REQ_CHAIN; + return tfm->__crt_alg->cra_flags & CRYPTO_ALG_REQ_VIRT; } static inline u32 crypto_request_flags(struct crypto_async_request *req) diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index b72bb7a6a2b2..ffffd88bbbad 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -186,9 +186,9 @@ static inline bool acomp_request_isnondma(struct acomp_req *req) CRYPTO_ACOMP_REQ_DST_NONDMA); } -static inline bool crypto_acomp_req_chain(struct crypto_acomp *tfm) +static inline bool crypto_acomp_req_virt(struct crypto_acomp *tfm) { - return crypto_tfm_req_chain(&tfm->base); + return crypto_tfm_req_virt(&tfm->base); } void crypto_acomp_free_streams(struct crypto_acomp_streams *s); diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 33d45275f5bd..e911f32f46dc 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -267,9 +267,9 @@ static inline bool ahash_request_isvirt(struct ahash_request *req) return req->base.flags & CRYPTO_AHASH_REQ_VIRT; } -static inline bool crypto_ahash_req_chain(struct crypto_ahash *tfm) +static inline bool crypto_ahash_req_virt(struct crypto_ahash *tfm) { - return crypto_tfm_req_chain(&tfm->base); + return crypto_tfm_req_virt(&tfm->base); } static inline struct crypto_ahash *crypto_ahash_fb(struct crypto_ahash *tfm) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b8d875b11193..b50f1954d1bb 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -133,8 +133,8 @@ */ #define CRYPTO_ALG_FIPS_INTERNAL 0x00020000 -/* Set if the algorithm supports request chains and virtual addresses. */ -#define CRYPTO_ALG_REQ_CHAIN 0x00040000 +/* Set if the algorithm supports virtual addresses. */ +#define CRYPTO_ALG_REQ_VIRT 0x00040000 /* The high bits 0xff000000 are reserved for type-specific flags. */ -- cgit v1.2.3 From 5b90a779bc547939421bfeb333e470658ba94fb6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 May 2025 13:30:53 +0800 Subject: crypto: lib/sha256 - Add helpers for block-based shash Add an internal sha256_finup helper and move the finalisation code from __sha256_final into it. Also add sha256_choose_blocks and CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD so that the Crypto API can use the SIMD block function unconditionally. The Crypto API must not be used in hard IRQs and there is no reason to have a fallback path for hardirqs. Signed-off-by: Herbert Xu --- include/crypto/internal/sha2.h | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h index d641c67abcbc..fff156f66edc 100644 --- a/include/crypto/internal/sha2.h +++ b/include/crypto/internal/sha2.h @@ -3,7 +3,12 @@ #ifndef _CRYPTO_INTERNAL_SHA2_H #define _CRYPTO_INTERNAL_SHA2_H +#include #include +#include +#include +#include +#include void sha256_update_generic(struct sha256_state *sctx, const u8 *data, size_t len); @@ -24,5 +29,45 @@ void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks); void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks); +void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks); + +static inline void sha256_choose_blocks( + u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks, + bool force_generic, bool force_simd) +{ + if (!IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) || force_generic) + sha256_blocks_generic(state, data, nblocks); + else if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD) && + (force_simd || crypto_simd_usable())) + sha256_blocks_simd(state, data, nblocks); + else + sha256_blocks_arch(state, data, nblocks); +} + +static __always_inline void sha256_finup( + struct crypto_sha256_state *sctx, u8 buf[SHA256_BLOCK_SIZE], + size_t len, u8 out[SHA256_DIGEST_SIZE], size_t digest_size, + bool force_generic, bool force_simd) +{ + const size_t bit_offset = SHA256_BLOCK_SIZE - 8; + __be64 *bits = (__be64 *)&buf[bit_offset]; + int i; + + buf[len++] = 0x80; + if (len > bit_offset) { + memset(&buf[len], 0, SHA256_BLOCK_SIZE - len); + sha256_choose_blocks(sctx->state, buf, 1, force_generic, + force_simd); + len = 0; + } + + memset(&buf[len], 0, bit_offset - len); + *bits = cpu_to_be64(sctx->count << 3); + sha256_choose_blocks(sctx->state, buf, 1, force_generic, force_simd); + + for (i = 0; i < digest_size; i += 4) + put_unaligned_be32(sctx->state[i / 4], out + i); +} #endif /* _CRYPTO_INTERNAL_SHA2_H */ -- cgit v1.2.3 From ff8f037d394f0900597ba527388a6eb95cd02695 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 May 2025 13:30:56 +0800 Subject: crypto: sha256 - Use the partial block API for generic The shash interface already handles partial blocks, use it for sha224-generic and sha256-generic instead of going through the lib/sha256 interface. Signed-off-by: Herbert Xu --- include/crypto/sha2.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index 9853cd2d1291..4912572578dc 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -88,7 +88,7 @@ struct sha512_state { u8 buf[SHA512_BLOCK_SIZE]; }; -static inline void sha256_init(struct sha256_state *sctx) +static inline void sha256_block_init(struct crypto_sha256_state *sctx) { sctx->state[0] = SHA256_H0; sctx->state[1] = SHA256_H1; @@ -100,11 +100,16 @@ static inline void sha256_init(struct sha256_state *sctx) sctx->state[7] = SHA256_H7; sctx->count = 0; } + +static inline void sha256_init(struct sha256_state *sctx) +{ + sha256_block_init(&sctx->ctx); +} void sha256_update(struct sha256_state *sctx, const u8 *data, size_t len); void sha256_final(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE]); void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]); -static inline void sha224_init(struct sha256_state *sctx) +static inline void sha224_block_init(struct crypto_sha256_state *sctx) { sctx->state[0] = SHA224_H0; sctx->state[1] = SHA224_H1; @@ -116,6 +121,11 @@ static inline void sha224_init(struct sha256_state *sctx) sctx->state[7] = SHA224_H7; sctx->count = 0; } + +static inline void sha224_init(struct sha256_state *sctx) +{ + sha224_block_init(&sctx->ctx); +} /* Simply use sha256_update as it is equivalent to sha224_update. */ void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]); -- cgit v1.2.3 From 3007e90572d0c5fd409c3d2fa8cedcbd5cb06d4b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 May 2025 13:31:09 +0800 Subject: crypto: lib/sha256 - Use generic block helper Use the BLOCK_HASH_UPDATE_BLOCKS helper instead of duplicating partial block handling. Also remove the unused lib/sha256 force-generic interface. Signed-off-by: Herbert Xu --- include/crypto/internal/sha2.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h index fff156f66edc..b9bccd3ff57f 100644 --- a/include/crypto/internal/sha2.h +++ b/include/crypto/internal/sha2.h @@ -10,13 +10,6 @@ #include #include -void sha256_update_generic(struct sha256_state *sctx, - const u8 *data, size_t len); -void sha256_final_generic(struct sha256_state *sctx, - u8 out[SHA256_DIGEST_SIZE]); -void sha224_final_generic(struct sha256_state *sctx, - u8 out[SHA224_DIGEST_SIZE]); - #if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) bool sha256_is_arch_optimized(void); #else -- cgit v1.2.3 From ecd71c95a60e7298acfabe81189439f350bd0e18 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 4 May 2025 14:13:50 +0800 Subject: crypto: zynqmp-sha - Fix partial block implementation The zynqmp-sha partial block was based on an old design of the partial block API where the leftover calculation was done in the Crypto API. As the leftover calculation is now done by the algorithm, fix this by passing the partial blocks to the fallback. Also zero the stack descriptors. Fixes: 201e9ec3b621 ("crypto: zynqmp-sha - Use API partial block handling") Signed-off-by: Herbert Xu --- include/crypto/sha3.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/crypto/sha3.h b/include/crypto/sha3.h index 3c2559f51ada..41e1b83a6d91 100644 --- a/include/crypto/sha3.h +++ b/include/crypto/sha3.h @@ -9,15 +9,19 @@ #define SHA3_224_DIGEST_SIZE (224 / 8) #define SHA3_224_BLOCK_SIZE (200 - 2 * SHA3_224_DIGEST_SIZE) +#define SHA3_224_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_224_BLOCK_SIZE + 1 #define SHA3_256_DIGEST_SIZE (256 / 8) #define SHA3_256_BLOCK_SIZE (200 - 2 * SHA3_256_DIGEST_SIZE) +#define SHA3_256_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_256_BLOCK_SIZE + 1 #define SHA3_384_DIGEST_SIZE (384 / 8) #define SHA3_384_BLOCK_SIZE (200 - 2 * SHA3_384_DIGEST_SIZE) +#define SHA3_384_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_384_BLOCK_SIZE + 1 #define SHA3_512_DIGEST_SIZE (512 / 8) #define SHA3_512_BLOCK_SIZE (200 - 2 * SHA3_512_DIGEST_SIZE) +#define SHA3_512_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_512_BLOCK_SIZE + 1 #define SHA3_STATE_SIZE 200 -- cgit v1.2.3 From 2b1a29ce3360570aeff053d1315cd504d94eab31 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 4 May 2025 21:33:14 +0800 Subject: crypto: shash - Cap state size to HASH_MAX_STATESIZE Now that all shash algorithms have converted over to the generic export format, limit the shash state size to HASH_MAX_STATESIZE. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index c2497c300a28..e0321b5ec363 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -167,8 +167,11 @@ struct shash_desc { #define HASH_MAX_DIGESTSIZE 64 +/* Worst case is sha3-224. */ +#define HASH_MAX_STATESIZE 200 + 144 + 1 + /* - * Worst case is hmac(sha-224-s390). Its context is a nested 'shash_desc' + * Worst case is hmac(sha3-224-s390). Its context is a nested 'shash_desc' * containing a 'struct s390_sha_ctx'. */ #define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 360) -- cgit v1.2.3 From 1052671ca118b79fa3f5de281bba850aaf20bbf5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 4 May 2025 21:33:21 +0800 Subject: crypto: ahash - Add core export and import Add crypto_ahash_export_core and crypto_ahash_import_core. For now they only differ from the normal export/import functions when going through shash. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index e0321b5ec363..1760662ad70a 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -506,6 +506,18 @@ int crypto_ahash_digest(struct ahash_request *req); */ int crypto_ahash_export(struct ahash_request *req, void *out); +/** + * crypto_ahash_export_core() - extract core state for message digest + * @req: reference to the ahash_request handle whose state is exported + * @out: output buffer of sufficient size that can hold the hash state + * + * Export the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the export creation was successful; < 0 if an error occurred + */ +int crypto_ahash_export_core(struct ahash_request *req, void *out); + /** * crypto_ahash_import() - import message digest state * @req: reference to ahash_request handle the state is imported into @@ -519,6 +531,18 @@ int crypto_ahash_export(struct ahash_request *req, void *out); */ int crypto_ahash_import(struct ahash_request *req, const void *in); +/** + * crypto_ahash_import_core() - import core state + * @req: reference to ahash_request handle the state is imported into + * @in: buffer holding the state + * + * Import the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the import was successful; < 0 if an error occurred + */ +int crypto_ahash_import_core(struct ahash_request *req, const void *in); + /** * crypto_ahash_init() - (re)initialize message digest handle * @req: ahash_request handle that already is initialized with all necessary -- cgit v1.2.3 From 88bca957e87e9a8a274213db257a744170b7248a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 4 May 2025 21:33:23 +0800 Subject: crypto: ahash - Add HASH_REQUEST_ZERO Add a helper to zero hash stack requests that were never cloned off the stack. Signed-off-by: Herbert Xu --- include/crypto/internal/hash.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index e911f32f46dc..f2bbdb74e11a 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -301,5 +301,9 @@ static inline unsigned int crypto_shash_coresize(struct crypto_shash *tfm) return crypto_shash_statesize(tfm) - crypto_shash_blocksize(tfm) - 1; } +/* This can only be used if the request was never cloned. */ +#define HASH_REQUEST_ZERO(name) \ + memzero_explicit(__##name##_req, sizeof(__##name##_req)) + #endif /* _CRYPTO_INTERNAL_HASH_H */ -- cgit v1.2.3 From 15b39ced6247666f616e3b0b8eeb42cd4a8a11f2 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 3 May 2025 00:52:15 +0100 Subject: ALSA: pcm: Remove unused snd_pcm_rate_range_to_bits The last use of snd_pcm_rate_range_to_bits() was removed in 2016 by commit b6b6e4d670c9 ("ASoC: topology: Fix setting of stream rates, rate_min and rate_max") Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250502235219.1000429-2-linux@treblig.org --- include/sound/pcm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 8becb4504887..a268de860259 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -1251,8 +1251,6 @@ unsigned int snd_pcm_rate_to_rate_bit(unsigned int rate); unsigned int snd_pcm_rate_bit_to_rate(unsigned int rate_bit); unsigned int snd_pcm_rate_mask_intersect(unsigned int rates_a, unsigned int rates_b); -unsigned int snd_pcm_rate_range_to_bits(unsigned int rate_min, - unsigned int rate_max); /** * snd_pcm_set_runtime_buffer - Set the PCM runtime buffer -- cgit v1.2.3 From a5f2dd266c0b5b706ebc39d64c1caaec636f8fbf Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 3 May 2025 00:52:16 +0100 Subject: ALSA: pcm: Remove unused snd_dmaengine_pcm_open_request_chan snd_dmaengine_pcm_open_request_chan() last use was removed in 2022's commit b401d1fd8053 ("ASoC: pxa: remove unused board support") Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250502235219.1000429-3-linux@treblig.org --- include/sound/dmaengine_pcm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h index f6baa9a01868..1ef13bcdc43f 100644 --- a/include/sound/dmaengine_pcm.h +++ b/include/sound/dmaengine_pcm.h @@ -38,8 +38,6 @@ int snd_dmaengine_pcm_open(struct snd_pcm_substream *substream, int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream); int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream); -int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream, - dma_filter_fn filter_fn, void *filter_data); int snd_dmaengine_pcm_close_release_chan(struct snd_pcm_substream *substream); struct dma_chan *snd_dmaengine_pcm_request_channel(dma_filter_fn filter_fn, -- cgit v1.2.3 From 625a4681666adf626afc87a752974325f65a8c32 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 3 May 2025 00:52:18 +0100 Subject: ALSA: core: Remove unused snd_device_get_state snd_device_get_state() last use was removed in 2022 by commit 7e1afce5866e ("ALSA: usb-audio: Inform the delayed registration more properly") Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250502235219.1000429-5-linux@treblig.org --- include/sound/core.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 1f3f5dccd736..64327e971122 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -326,7 +326,6 @@ void snd_device_disconnect(struct snd_card *card, void *device_data); void snd_device_disconnect_all(struct snd_card *card); void snd_device_free(struct snd_card *card, void *device_data); void snd_device_free_all(struct snd_card *card); -int snd_device_get_state(struct snd_card *card, void *device_data); /* isadma.c */ -- cgit v1.2.3 From 307addcc5672b0d340c0e08df5298d08c15bab75 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 3 May 2025 00:52:19 +0100 Subject: ALSA: core: Remove unused snd_jack_set_parent snd_jack_set_parent() was added as part of 2008's commit e76d8ceaaff9 ("ALSA: Add jack reporting API") but hasn't been used. Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250502235219.1000429-6-linux@treblig.org --- include/sound/jack.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/sound/jack.h b/include/sound/jack.h index 1ed90e2109e9..36dc104c1145 100644 --- a/include/sound/jack.h +++ b/include/sound/jack.h @@ -79,7 +79,6 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, struct snd_jack **jack, bool initial_kctl, bool phantom_jack); int snd_jack_add_new_kctl(struct snd_jack *jack, const char * name, int mask); #ifdef CONFIG_SND_JACK_INPUT_DEV -void snd_jack_set_parent(struct snd_jack *jack, struct device *parent); int snd_jack_set_key(struct snd_jack *jack, enum snd_jack_types type, int keytype); #endif @@ -104,11 +103,6 @@ static inline void snd_jack_report(struct snd_jack *jack, int status) #endif #if !defined(CONFIG_SND_JACK) || !defined(CONFIG_SND_JACK_INPUT_DEV) -static inline void snd_jack_set_parent(struct snd_jack *jack, - struct device *parent) -{ -} - static inline int snd_jack_set_key(struct snd_jack *jack, enum snd_jack_types type, int keytype) -- cgit v1.2.3 From 6b3754009f871083b114daacbad5b87a494da4b8 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Fri, 11 Apr 2025 14:41:10 +0200 Subject: reset: Add devm_reset_control_array_get_exclusive_released() Add the released variant of devm_reset_control_array_get_exclusive(). Needed by spi-smt32-ospi driver as same reset line is ulso used by stm32-omm driver. Signed-off-by: Patrice Chotard Reviewed-by: Philipp Zabel Link: https://lore.kernel.org/r/20250411-b4-upstream_ospi_reset_update-v2-1-4de7f5dd2a91@foss.st.com Signed-off-by: Philipp Zabel --- include/linux/reset.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/reset.h b/include/linux/reset.h index 2986ced69a02..840d75d172f6 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -1004,6 +1004,12 @@ devm_reset_control_array_get_exclusive(struct device *dev) return devm_reset_control_array_get(dev, RESET_CONTROL_EXCLUSIVE); } +static inline struct reset_control * +devm_reset_control_array_get_exclusive_released(struct device *dev) +{ + return devm_reset_control_array_get(dev, RESET_CONTROL_EXCLUSIVE_RELEASED); +} + static inline struct reset_control * devm_reset_control_array_get_shared(struct device *dev) { -- cgit v1.2.3 From 5d2ea5aebbb2f3ebde4403f9c55b2b057e5dd2d6 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 28 Apr 2025 14:34:07 +0300 Subject: RDMA/mlx5: Fix error flow upon firmware failure for RQ destruction Upon RQ destruction if the firmware command fails which is the last resource to be destroyed some SW resources were already cleaned regardless of the failure. Now properly rollback the object to its original state upon such failure. In order to avoid a use-after free in case someone tries to destroy the object again, which results in the following kernel trace: refcount_t: underflow; use-after-free. WARNING: CPU: 0 PID: 37589 at lib/refcount.c:28 refcount_warn_saturate+0xf4/0x148 Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) rfkill mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) psample mlxfw(OE) mlx_compat(OE) macsec tls pci_hyperv_intf sunrpc vfat fat virtio_net net_failover failover fuse loop nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vmw_vmci vsock xfs crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce virtio_console virtio_gpu virtio_blk virtio_dma_buf virtio_mmio dm_mirror dm_region_hash dm_log dm_mod xpmem(OE) CPU: 0 UID: 0 PID: 37589 Comm: python3 Kdump: loaded Tainted: G OE ------- --- 6.12.0-54.el10.aarch64 #1 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : refcount_warn_saturate+0xf4/0x148 lr : refcount_warn_saturate+0xf4/0x148 sp : ffff80008b81b7e0 x29: ffff80008b81b7e0 x28: ffff000133d51600 x27: 0000000000000001 x26: 0000000000000000 x25: 00000000ffffffea x24: ffff00010ae80f00 x23: ffff00010ae80f80 x22: ffff0000c66e5d08 x21: 0000000000000000 x20: ffff0000c66e0000 x19: ffff00010ae80340 x18: 0000000000000006 x17: 0000000000000000 x16: 0000000000000020 x15: ffff80008b81b37f x14: 0000000000000000 x13: 2e656572662d7265 x12: ffff80008283ef78 x11: ffff80008257efd0 x10: ffff80008283efd0 x9 : ffff80008021ed90 x8 : 0000000000000001 x7 : 00000000000bffe8 x6 : c0000000ffff7fff x5 : ffff0001fb8e3408 x4 : 0000000000000000 x3 : ffff800179993000 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000133d51600 Call trace: refcount_warn_saturate+0xf4/0x148 mlx5_core_put_rsc+0x88/0xa0 [mlx5_ib] mlx5_core_destroy_rq_tracked+0x64/0x98 [mlx5_ib] mlx5_ib_destroy_wq+0x34/0x80 [mlx5_ib] ib_destroy_wq_user+0x30/0xc0 [ib_core] uverbs_free_wq+0x28/0x58 [ib_uverbs] destroy_hw_idr_uobject+0x34/0x78 [ib_uverbs] uverbs_destroy_uobject+0x48/0x240 [ib_uverbs] __uverbs_cleanup_ufile+0xd4/0x1a8 [ib_uverbs] uverbs_destroy_ufile_hw+0x48/0x120 [ib_uverbs] ib_uverbs_close+0x2c/0x100 [ib_uverbs] __fput+0xd8/0x2f0 __fput_sync+0x50/0x70 __arm64_sys_close+0x40/0x90 invoke_syscall.constprop.0+0x74/0xd0 do_el0_svc+0x48/0xe8 el0_svc+0x44/0x1d0 el0t_64_sync_handler+0x120/0x130 el0t_64_sync+0x1a4/0x1a8 Fixes: e2013b212f9f ("net/mlx5_core: Add RQ and SQ event handling") Signed-off-by: Patrisious Haddad Link: https://patch.msgid.link/3181433ccdd695c63560eeeb3f0c990961732101.1745839855.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d1dfbad9a447..e6ba8f4f4bd1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -398,6 +398,7 @@ struct mlx5_core_rsc_common { enum mlx5_res_type res; refcount_t refcount; struct completion free; + bool invalid; }; struct mlx5_uars_page { -- cgit v1.2.3 From fc3817fb499605a8e9cfebdc2615da0ec114b281 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 9 Apr 2025 17:09:23 +0530 Subject: drm: add drm_file_err function to add process info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a drm helper function which appends the process information for the drm_file over drm_err formatted output. v5: change to macro from function (Christian Koenig) add helper functions for lock/unlock (Christian Koenig) v6: remove __maybe_unused and make function inline (Jani Nikula) remove drm_print.h v7: Use va_format and %pV to concatenate fmt and vargs (Jani Nikula) v8: Code formatting and typos (Ursulin tvrtko) Signed-off-by: Sunil Khatri Reviewed-by: Tvrtko Ursulin Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/drm/drm_file.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 94d365b22505..5c3b2aa3e69d 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -446,6 +446,9 @@ static inline bool drm_is_accel_client(const struct drm_file *file_priv) return file_priv->minor->type == DRM_MINOR_ACCEL; } +__printf(2, 3) +void drm_file_err(struct drm_file *file_priv, const char *fmt, ...); + void drm_file_update_pid(struct drm_file *); struct drm_minor *drm_minor_acquire(struct xarray *minors_xa, unsigned int minor_id); -- cgit v1.2.3 From 575ec9b0c2f11f40535ea737ed5a64792780d1ef Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 25 Apr 2025 19:36:50 +0530 Subject: dma-fence: Add helper to sort and deduplicate dma_fence arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export a new helper function `dma_fence_dedup_array()` that sorts an array of dma_fence pointers by context, then deduplicates the array by retaining only the most recent fence per context. This utility is useful when merging or optimizing sets of fences where redundant entries from the same context can be pruned. The operation is performed in-place and releases references to dropped fences using dma_fence_put(). v2: - Export this code from dma-fence-unwrap.c(by Christian). v3: - To split this in a dma_buf patch and amd userq patch(by Sunil). - No need to add a new function just re-use existing(by Christian). v4: - Export dma_fence_dedub_array and use it(by Christian). Cc: Alex Deucher Cc: Arunpravin Paneer Selvam Reviewed-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- include/linux/dma-fence-unwrap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index 66b1e56fbb81..62df222fe0f1 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -52,6 +52,8 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence **fences, struct dma_fence_unwrap *cursors); +int dma_fence_dedup_array(struct dma_fence **array, int num_fences); + /** * dma_fence_unwrap_merge - unwrap and merge fences * -- cgit v1.2.3 From a9437f6a1d8d0b3787fe6ff03d9aab4d3fe9b940 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 May 2025 10:11:24 +0200 Subject: scsi: remove the no_highmem flag in the host All users are gone now. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: John Garry Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250505081138.3435992-6-hch@lst.de Signed-off-by: Jens Axboe --- include/scsi/scsi_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 26bc23419cfd..c53812b9026f 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -670,8 +670,6 @@ struct Scsi_Host { /* The transport requires the LUN bits NOT to be stored in CDB[1] */ unsigned no_scsi2_lun_in_cdb:1; - unsigned no_highmem:1; - /* * Optional work queue to be utilized by the transport */ -- cgit v1.2.3 From eeadd68e2a5f6bfe0bf1038ec49e3a8d99eb5fe8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 May 2025 10:11:25 +0200 Subject: block: remove bounce buffering support The block layer bounce buffering support is unused now, remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: John Garry Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250505081138.3435992-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 - include/linux/blkdev.h | 5 +---- include/trace/events/block.h | 15 --------------- include/uapi/linux/blktrace_api.h | 2 +- 4 files changed, 2 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dce7615c35e7..5a46067e85b1 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -286,7 +286,6 @@ struct bio { enum { BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */ BIO_CLONED, /* doesn't own data */ - BIO_BOUNCED, /* bio is a bounce bio */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ BIO_REFFED, /* bio has elevated ->bi_cnt */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3d74f9dae8e..5ccb961ee2ae 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -331,9 +331,6 @@ typedef unsigned int __bitwise blk_features_t; /* skip this queue in blk_mq_(un)quiesce_tagset */ #define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13)) -/* bounce all highmem pages */ -#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14)) - /* undocumented magic for bcache */ #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ ((__force blk_features_t)(1u << 15)) @@ -347,7 +344,7 @@ typedef unsigned int __bitwise blk_features_t; */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \ + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | \ BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE) /* internal flags in queue_limits.flags */ diff --git a/include/trace/events/block.h b/include/trace/events/block.h index bd0ea07338eb..ad36e73b8579 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -360,21 +360,6 @@ DECLARE_EVENT_CLASS(block_bio, __entry->nr_sector, __entry->comm) ); -/** - * block_bio_bounce - used bounce buffer when processing block operation - * @bio: block operation - * - * A bounce buffer was used to handle the block operation @bio in @q. - * This occurs when hardware limitations prevent a direct transfer of - * data between the @bio data memory area and the IO device. Use of a - * bounce buffer requires extra copying of data and decreases - * performance. - */ -DEFINE_EVENT(block_bio, block_bio_bounce, - TP_PROTO(struct bio *bio), - TP_ARGS(bio) -); - /** * block_bio_backmerge - merging block operation to the end of an existing operation * @bio: new block operation to merge diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 690621b610e5..1bfb635e309b 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -49,7 +49,7 @@ enum blktrace_act { __BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */ __BLK_TA_INSERT, /* insert request */ __BLK_TA_SPLIT, /* bio was split */ - __BLK_TA_BOUNCE, /* bio was bounced */ + __BLK_TA_BOUNCE, /* unused, was: bio was bounced */ __BLK_TA_REMAP, /* bio was remapped */ __BLK_TA_ABORT, /* request aborted */ __BLK_TA_DRV_DATA, /* driver-specific binary data */ -- cgit v1.2.3 From 194df9f66db8d6f74f03c78c2ad47b74a5a8b886 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 May 2025 10:11:26 +0200 Subject: mm: remove NR_BOUNCE zone stat The stat is always 0 now, so remove it and hardwire the user visible output to 0. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250505081138.3435992-8-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/mmzone.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6ccec1bf2896..b1c459f7a485 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -148,7 +148,6 @@ enum zone_stat_item { NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ /* Second 128 byte cacheline */ - NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif -- cgit v1.2.3 From 4210f21c004a18aad11c55bdaf552e649a4fd286 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Fri, 2 May 2025 13:03:07 +0200 Subject: dt-bindings: clock: rk3576: add IOC gated clocks Certain clocks on the RK3576 are additionally essentially "gated" behind some bit toggles in the IOC GRF range. Downstream ungates these by adding a separate clock driver that maps over the GRF range and leaks their implementation of this into the DT. Instead, define some new clock IDs for these, so that consumers of these types of clocks can properly articulate which clock they're using, so that we can then add them to the clock driver for SoCs that need them. Acked-by: Krzysztof Kozlowski Signed-off-by: Nicolas Frattaroli Link: https://lore.kernel.org/r/20250502-rk3576-sai-v3-1-376cef19dd7c@collabora.com Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rockchip,rk3576-cru.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/rockchip,rk3576-cru.h b/include/dt-bindings/clock/rockchip,rk3576-cru.h index f576e61bec70..ded5ce42e62a 100644 --- a/include/dt-bindings/clock/rockchip,rk3576-cru.h +++ b/include/dt-bindings/clock/rockchip,rk3576-cru.h @@ -594,4 +594,14 @@ #define SCMI_ARMCLK_B 11 #define SCMI_CLK_GPU 456 +/* IOC-controlled output clocks */ +#define CLK_SAI0_MCLKOUT_TO_IO 571 +#define CLK_SAI1_MCLKOUT_TO_IO 572 +#define CLK_SAI2_MCLKOUT_TO_IO 573 +#define CLK_SAI3_MCLKOUT_TO_IO 574 +#define CLK_SAI4_MCLKOUT_TO_IO 575 +#define CLK_SAI4_MCLKOUT_TO_IO 575 +#define CLK_FSPI0_TO_IO 576 +#define CLK_FSPI1_TO_IO 577 + #endif -- cgit v1.2.3 From f4fcfdda2fd8834c62dcb9bfddcf1f89d190b70e Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 23 Apr 2025 14:42:13 -0500 Subject: of: reserved_mem: Add functions to parse "memory-region" Drivers with "memory-region" properties currently have to do their own parsing of "memory-region" properties. The result is all the drivers have similar patterns of a call to parse "memory-region" and then get the region's address and size. As this is a standard property, it should have common functions for drivers to use. Add new functions to count the number of regions and retrieve the region's address as a resource. Reviewed-by: Daniel Baluta Acked-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20250423-dt-memory-region-v2-v2-1-2fbd6ebd3c88@kernel.org Signed-off-by: Rob Herring (Arm) --- include/linux/of_reserved_mem.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index e338282da652..f573423359f4 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -7,6 +7,7 @@ struct of_phandle_args; struct reserved_mem_ops; +struct resource; struct reserved_mem { const char *name; @@ -39,6 +40,12 @@ int of_reserved_mem_device_init_by_name(struct device *dev, void of_reserved_mem_device_release(struct device *dev); struct reserved_mem *of_reserved_mem_lookup(struct device_node *np); +int of_reserved_mem_region_to_resource(const struct device_node *np, + unsigned int idx, struct resource *res); +int of_reserved_mem_region_to_resource_byname(const struct device_node *np, + const char *name, struct resource *res); +int of_reserved_mem_region_count(const struct device_node *np); + #else #define RESERVEDMEM_OF_DECLARE(name, compat, init) \ @@ -63,6 +70,25 @@ static inline struct reserved_mem *of_reserved_mem_lookup(struct device_node *np { return NULL; } + +static inline int of_reserved_mem_region_to_resource(const struct device_node *np, + unsigned int idx, + struct resource *res) +{ + return -ENOSYS; +} + +static inline int of_reserved_mem_region_to_resource_byname(const struct device_node *np, + const char *name, + struct resource *res) +{ + return -ENOSYS; +} + +static inline int of_reserved_mem_region_count(const struct device_node *np) +{ + return 0; +} #endif /** -- cgit v1.2.3 From 8ce5ace8e279d39f249fd5ead33895d04263ea36 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 5 May 2025 15:14:08 +0100 Subject: ASoC: sdw_utils: Remove num_platforms from simple DAI helper There is no point in passing num_platforms into asoc_sdw_init_simple_dai_link(). Firstly, as a single pointer for the component name is passed in only a single string can be passed and secondly if it is a complex DAI with multiple platforms it would make more sense to use asoc_sdw_init_dai_link(). Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250505141409.2614010-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index d8bd5d37131a..b63021f5afaf 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -159,9 +159,8 @@ void asoc_sdw_init_dai_link(struct device *dev, struct snd_soc_dai_link *dai_lin int asoc_sdw_init_simple_dai_link(struct device *dev, struct snd_soc_dai_link *dai_links, int *be_id, char *name, int playback, int capture, const char *cpu_dai_name, const char *platform_comp_name, - int num_platforms, const char *codec_name, - const char *codec_dai_name, int no_pcm, - int (*init)(struct snd_soc_pcm_runtime *rtd), + const char *codec_name, const char *codec_dai_name, + int no_pcm, int (*init)(struct snd_soc_pcm_runtime *rtd), const struct snd_soc_ops *ops); int asoc_sdw_count_sdw_endpoints(struct snd_soc_card *card, int *num_devs, int *num_ends); -- cgit v1.2.3 From 23227e71b69af95e421e263302d13f426c548155 Mon Sep 17 00:00:00 2001 From: Guan-Chun Wu <409411716@gms.tku.edu.tw> Date: Sat, 3 May 2025 16:12:03 +0800 Subject: workqueue: fix typo in comment Fix a duplicated word "that that" in the comment describing the @max_active behavior for unbound workqueues. Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw> Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b0dc957c3e56..a6dacc0eb688 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -480,7 +480,7 @@ void workqueue_softirq_dead(unsigned int cpu); * executing at most one work item for the workqueue. * * For unbound workqueues, @max_active limits the number of in-flight work items - * for the whole system. e.g. @max_active of 16 indicates that that there can be + * for the whole system. e.g. @max_active of 16 indicates that there can be * at most 16 work items executing for the workqueue in the whole system. * * As sharing the same active counter for an unbound workqueue across multiple -- cgit v1.2.3 From 320a66f84022028f1277bf568a5e8987eac6e797 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 1 May 2025 01:24:02 +0100 Subject: strparser: Remove unused __strp_unpause The last use of __strp_unpause() was removed in 2022 by commit 84c61fe1a75b ("tls: rx: do not use the standard strparser") Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250501002402.308843-1-linux@treblig.org Signed-off-by: Jakub Kicinski --- include/net/strparser.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/strparser.h b/include/net/strparser.h index 0a83010b3a64..0ed73e364faa 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -114,8 +114,6 @@ static inline void strp_pause(struct strparser *strp) /* May be called without holding lock for attached socket */ void strp_unpause(struct strparser *strp); -/* Must be called with process lock held (lock_sock) */ -void __strp_unpause(struct strparser *strp); static inline void save_strp_stats(struct strparser *strp, struct strp_aggr_stats *agg_stats) -- cgit v1.2.3 From 8c0821ccaa1e53c30f6a51d034171e20d522a9c0 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Mon, 7 Apr 2025 16:16:41 +0100 Subject: ASoC: cs35l56: Add struct to index firmware registers Firmware based registers may be different addresses across different device ids and revision ids. Create a structure to store and access these addresses. Signed-off-by: Stefan Binding Reviewed-by: Richard Fitzgerald Link: https://patch.msgid.link/20250407151842.143393-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 5d653a3491d0..d712cb79652b 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -267,6 +267,14 @@ struct cs35l56_spi_payload { } __packed; static_assert(sizeof(struct cs35l56_spi_payload) == 10); +struct cs35l56_fw_reg { + unsigned int fw_ver; + unsigned int halo_state; + unsigned int pm_cur_stat; + unsigned int prot_sts; + unsigned int transducer_actual_ps; +}; + struct cs35l56_base { struct device *dev; struct regmap *regmap; @@ -283,6 +291,7 @@ struct cs35l56_base { struct cirrus_amp_cal_data cal_data; struct gpio_desc *reset_gpio; struct cs35l56_spi_payload *spi_payload_buf; + const struct cs35l56_fw_reg *fw_reg; }; static inline bool cs35l56_is_otp_register(unsigned int reg) @@ -311,6 +320,8 @@ extern const struct regmap_config cs35l56_regmap_i2c; extern const struct regmap_config cs35l56_regmap_spi; extern const struct regmap_config cs35l56_regmap_sdw; +extern const struct cs35l56_fw_reg cs35l56_fw_reg; + extern const struct cirrus_amp_cal_controls cs35l56_calibration_controls; extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC]; -- cgit v1.2.3 From a3ec669efcee2cffd8ca020df777adbf79ac0b75 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Mon, 7 Apr 2025 16:16:42 +0100 Subject: ASoC: cs35l56: Add Mute, Volume and Posture registers to firmware register list Registers to set Mute, Volume and Posture are inside firmware, which means they should be added to the list of registers set inside firmware, in case they vary across Device or Revision. These three registers are also used for controls, so additional handling is required to be able to obtain and set the register inside ALSA controls. Signed-off-by: Stefan Binding Reviewed-by: Richard Fitzgerald Link: https://patch.msgid.link/20250407151842.143393-4-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index d712cb79652b..3abe4fbd2085 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -273,6 +273,9 @@ struct cs35l56_fw_reg { unsigned int pm_cur_stat; unsigned int prot_sts; unsigned int transducer_actual_ps; + unsigned int user_mute; + unsigned int user_volume; + unsigned int posture_number; }; struct cs35l56_base { -- cgit v1.2.3 From 978858791cedaf1dbbbaeb1e37126611b7c28a9e Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Mon, 7 Apr 2025 16:16:43 +0100 Subject: ASoC: cs35l56: Add initial support for CS35L63 for I2C and SoundWire CS35L63 uses a similar control interface to CS35L56 so support for it can be added into the CS35L56 driver. New regmap configs have been added to support CS35L63. CS35L63 only has SoundWire and I2C control interfaces. Signed-off-by: Stefan Binding Reviewed-by: Richard Fitzgerald Link: https://patch.msgid.link/20250407151842.143393-5-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 3abe4fbd2085..e16e1a94c8a1 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -104,6 +104,15 @@ #define CS35L56_DSP1_PMEM_0 0x3800000 #define CS35L56_DSP1_PMEM_5114 0x3804FE8 +#define CS35L63_DSP1_FW_VER CS35L56_DSP1_FW_VER +#define CS35L63_DSP1_HALO_STATE 0x280396C +#define CS35L63_DSP1_PM_CUR_STATE 0x28042C8 +#define CS35L63_PROTECTION_STATUS 0x340009C +#define CS35L63_TRANSDUCER_ACTUAL_PS 0x34000F4 +#define CS35L63_MAIN_RENDER_USER_MUTE 0x3400020 +#define CS35L63_MAIN_RENDER_USER_VOLUME 0x3400028 +#define CS35L63_MAIN_POSTURE_NUMBER 0x3400068 + /* DEVID */ #define CS35L56_DEVID_MASK 0x00FFFFFF @@ -322,8 +331,11 @@ static inline bool cs35l56_is_spi(struct cs35l56_base *cs35l56) extern const struct regmap_config cs35l56_regmap_i2c; extern const struct regmap_config cs35l56_regmap_spi; extern const struct regmap_config cs35l56_regmap_sdw; +extern const struct regmap_config cs35l63_regmap_i2c; +extern const struct regmap_config cs35l63_regmap_sdw; extern const struct cs35l56_fw_reg cs35l56_fw_reg; +extern const struct cs35l56_fw_reg cs35l63_fw_reg; extern const struct cirrus_amp_cal_controls cs35l56_calibration_controls; -- cgit v1.2.3 From 406fbc4d0fb34c16718551bb8f4c776710f63b55 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Mon, 7 Apr 2025 16:16:44 +0100 Subject: ASoC: cs35l56: Read Silicon ID from DIE_STS registers for CS35L63 On CS35L63 the DIE_STS registers are populated by the Firmware from OTP, so the driver can read these registers directly, rather than obtaining them from OTP. Signed-off-by: Stefan Binding Reviewed-by: Richard Fitzgerald Link: https://patch.msgid.link/20250407151842.143393-6-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index e16e1a94c8a1..63f2c63f7c59 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -71,6 +71,8 @@ #define CS35L56_DSP_VIRTUAL1_MBOX_6 0x0011034 #define CS35L56_DSP_VIRTUAL1_MBOX_7 0x0011038 #define CS35L56_DSP_VIRTUAL1_MBOX_8 0x001103C +#define CS35L56_DIE_STS1 0x0017040 +#define CS35L56_DIE_STS2 0x0017044 #define CS35L56_DSP_RESTRICT_STS1 0x00190F0 #define CS35L56_DSP1_XMEM_PACKED_0 0x2000000 #define CS35L56_DSP1_XMEM_PACKED_6143 0x2005FFC -- cgit v1.2.3 From ac8f09b9210c48934c78fdc6bc167e660eaac928 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 2 May 2025 00:38:15 +0100 Subject: sctp: Remove unused sctp_assoc_del_peer and sctp_chunk_iif sctp_assoc_del_peer() last use was removed in 2015 by commit 73e6742027f5 ("sctp: Do not try to search for the transport twice") which now uses rm_peer instead of del_peer. sctp_chunk_iif() last use was removed in 2016 by commit 1f45f78f8e51 ("sctp: allow GSO frags to access the chunk too") Remove them. Signed-off-by: Dr. David Alan Gilbert Acked-by: Xin Long Link: https://patch.msgid.link/20250501233815.99832-1-linux@treblig.org Signed-off-by: Jakub Kicinski --- include/net/sctp/sm.h | 1 - include/net/sctp/structs.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 64c42bd56bb2..3bfd261a53cc 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -161,7 +161,6 @@ const struct sctp_sm_table_entry *sctp_sm_lookup_event( enum sctp_event_type event_type, enum sctp_state state, union sctp_subtype event_subtype); -int sctp_chunk_iif(const struct sctp_chunk *); struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *, struct sctp_chunk *, gfp_t gfp); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index dcd288fa1bb6..1ad7ce71d0a7 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -2152,8 +2152,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *, const union sctp_addr *address, const gfp_t gfp, const int peer_state); -void sctp_assoc_del_peer(struct sctp_association *asoc, - const union sctp_addr *addr); void sctp_assoc_rm_peer(struct sctp_association *asoc, struct sctp_transport *peer); void sctp_assoc_control_transport(struct sctp_association *asoc, -- cgit v1.2.3 From a25e7962db0d79882c9d32fd2a67a02e79129c0e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 May 2025 10:01:38 +0300 Subject: PCI/P2PDMA: Refactor the p2pdma mapping helpers The current scheme with a single helper to determine the P2P status and map a scatterlist segment force users to always use the map_sg helper to DMA map, which we're trying to get away from because they are very cache inefficient. Refactor the code so that there is a single helper that checks the P2P state for a page, including the result that it is not a P2P page to simplify the callers, and a second one to perform the address translation for a bus mapped P2P transfer that does not depend on the scatterlist structure. Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Acked-by: Bjorn Helgaas Tested-by: Jens Axboe Reviewed-by: Luis Chamberlain Reviewed-by: Lu Baolu Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski --- include/linux/dma-map-ops.h | 51 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index e172522cd936..c3086edeccc6 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -443,6 +443,11 @@ enum pci_p2pdma_map_type { */ PCI_P2PDMA_MAP_UNKNOWN = 0, + /* + * Not a PCI P2PDMA transfer. + */ + PCI_P2PDMA_MAP_NONE, + /* * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will * traverse the host bridge and the host bridge is not in the @@ -471,21 +476,47 @@ enum pci_p2pdma_map_type { struct pci_p2pdma_map_state { struct dev_pagemap *pgmap; - int map; + enum pci_p2pdma_map_type map; u64 bus_off; }; -#ifdef CONFIG_PCI_P2PDMA -enum pci_p2pdma_map_type -pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, - struct scatterlist *sg); -#else /* CONFIG_PCI_P2PDMA */ +/* helper for pci_p2pdma_state(), do not use directly */ +void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, + struct device *dev, struct page *page); + +/** + * pci_p2pdma_state - check the P2P transfer state of a page + * @state: P2P state structure + * @dev: device to transfer to/from + * @page: page to map + * + * Check if @page is a PCI P2PDMA page, and if yes of what kind. Returns the + * map type, and updates @state with all information needed for a P2P transfer. + */ static inline enum pci_p2pdma_map_type -pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, - struct scatterlist *sg) +pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev, + struct page *page) +{ + if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) { + if (state->pgmap != page_pgmap(page)) + __pci_p2pdma_update_state(state, dev, page); + return state->map; + } + return PCI_P2PDMA_MAP_NONE; +} + +/** + * pci_p2pdma_bus_addr_map - map a PCI_P2PDMA_MAP_BUS_ADDR P2P transfer + * @state: P2P state structure + * @paddr: physical address to map + * + * Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer. + */ +static inline dma_addr_t +pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr) { - return PCI_P2PDMA_MAP_NOT_SUPPORTED; + WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR); + return paddr + state->bus_off; } -#endif /* CONFIG_PCI_P2PDMA */ #endif /* _LINUX_DMA_MAP_OPS_H */ -- cgit v1.2.3 From ca2c2e4a78c619bcf1c1df29fee5981035f3fcbc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 May 2025 10:01:39 +0300 Subject: dma-mapping: move the PCI P2PDMA mapping helpers to pci-p2pdma.h To support the upcoming non-scatterlist mapping helpers, we need to go back to have them called outside of the DMA API. Thus move them out of dma-map-ops.h, which is only for DMA API implementations to pci-p2pdma.h, which is for driver use. Note that the core helper is still not exported as the mapping is expected to be done only by very highlevel subsystem code at least for now. Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Acked-by: Bjorn Helgaas Tested-by: Jens Axboe Reviewed-by: Luis Chamberlain Reviewed-by: Lu Baolu Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski --- include/linux/dma-map-ops.h | 85 --------------------------------------------- include/linux/pci-p2pdma.h | 85 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 85 deletions(-) (limited to 'include') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index c3086edeccc6..f48e5fb88bd5 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -434,89 +434,4 @@ static inline void debug_dma_dump_mappings(struct device *dev) #endif /* CONFIG_DMA_API_DEBUG */ extern const struct dma_map_ops dma_dummy_ops; - -enum pci_p2pdma_map_type { - /* - * PCI_P2PDMA_MAP_UNKNOWN: Used internally for indicating the mapping - * type hasn't been calculated yet. Functions that return this enum - * never return this value. - */ - PCI_P2PDMA_MAP_UNKNOWN = 0, - - /* - * Not a PCI P2PDMA transfer. - */ - PCI_P2PDMA_MAP_NONE, - - /* - * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will - * traverse the host bridge and the host bridge is not in the - * allowlist. DMA Mapping routines should return an error when - * this is returned. - */ - PCI_P2PDMA_MAP_NOT_SUPPORTED, - - /* - * PCI_P2PDMA_BUS_ADDR: Indicates that two devices can talk to - * each other directly through a PCI switch and the transaction will - * not traverse the host bridge. Such a mapping should program - * the DMA engine with PCI bus addresses. - */ - PCI_P2PDMA_MAP_BUS_ADDR, - - /* - * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk - * to each other, but the transaction traverses a host bridge on the - * allowlist. In this case, a normal mapping either with CPU physical - * addresses (in the case of dma-direct) or IOVA addresses (in the - * case of IOMMUs) should be used to program the DMA engine. - */ - PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, -}; - -struct pci_p2pdma_map_state { - struct dev_pagemap *pgmap; - enum pci_p2pdma_map_type map; - u64 bus_off; -}; - -/* helper for pci_p2pdma_state(), do not use directly */ -void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, - struct device *dev, struct page *page); - -/** - * pci_p2pdma_state - check the P2P transfer state of a page - * @state: P2P state structure - * @dev: device to transfer to/from - * @page: page to map - * - * Check if @page is a PCI P2PDMA page, and if yes of what kind. Returns the - * map type, and updates @state with all information needed for a P2P transfer. - */ -static inline enum pci_p2pdma_map_type -pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev, - struct page *page) -{ - if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) { - if (state->pgmap != page_pgmap(page)) - __pci_p2pdma_update_state(state, dev, page); - return state->map; - } - return PCI_P2PDMA_MAP_NONE; -} - -/** - * pci_p2pdma_bus_addr_map - map a PCI_P2PDMA_MAP_BUS_ADDR P2P transfer - * @state: P2P state structure - * @paddr: physical address to map - * - * Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer. - */ -static inline dma_addr_t -pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr) -{ - WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR); - return paddr + state->bus_off; -} - #endif /* _LINUX_DMA_MAP_OPS_H */ diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h index 2c07aa6b7665..075c20b161d9 100644 --- a/include/linux/pci-p2pdma.h +++ b/include/linux/pci-p2pdma.h @@ -104,4 +104,89 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client) return pci_p2pmem_find_many(&client, 1); } +enum pci_p2pdma_map_type { + /* + * PCI_P2PDMA_MAP_UNKNOWN: Used internally as an initial state before + * the mapping type has been calculated. Exported routines for the API + * will never return this value. + */ + PCI_P2PDMA_MAP_UNKNOWN = 0, + + /* + * Not a PCI P2PDMA transfer. + */ + PCI_P2PDMA_MAP_NONE, + + /* + * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will + * traverse the host bridge and the host bridge is not in the + * allowlist. DMA Mapping routines should return an error when + * this is returned. + */ + PCI_P2PDMA_MAP_NOT_SUPPORTED, + + /* + * PCI_P2PDMA_MAP_BUS_ADDR: Indicates that two devices can talk to + * each other directly through a PCI switch and the transaction will + * not traverse the host bridge. Such a mapping should program + * the DMA engine with PCI bus addresses. + */ + PCI_P2PDMA_MAP_BUS_ADDR, + + /* + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk + * to each other, but the transaction traverses a host bridge on the + * allowlist. In this case, a normal mapping either with CPU physical + * addresses (in the case of dma-direct) or IOVA addresses (in the + * case of IOMMUs) should be used to program the DMA engine. + */ + PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, +}; + +struct pci_p2pdma_map_state { + struct dev_pagemap *pgmap; + enum pci_p2pdma_map_type map; + u64 bus_off; +}; + +/* helper for pci_p2pdma_state(), do not use directly */ +void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, + struct device *dev, struct page *page); + +/** + * pci_p2pdma_state - check the P2P transfer state of a page + * @state: P2P state structure + * @dev: device to transfer to/from + * @page: page to map + * + * Check if @page is a PCI P2PDMA page, and if yes of what kind. Returns the + * map type, and updates @state with all information needed for a P2P transfer. + */ +static inline enum pci_p2pdma_map_type +pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev, + struct page *page) +{ + if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) { + if (state->pgmap != page_pgmap(page)) + __pci_p2pdma_update_state(state, dev, page); + return state->map; + } + return PCI_P2PDMA_MAP_NONE; +} + +/** + * pci_p2pdma_bus_addr_map - Translate a physical address to a bus address + * for a PCI_P2PDMA_MAP_BUS_ADDR transfer. + * @state: P2P state structure + * @paddr: physical address to map + * + * Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer. + */ +static inline dma_addr_t +pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr) +{ + WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR); + return paddr + state->bus_off; +} + #endif /* _LINUX_PCI_P2P_H */ -- cgit v1.2.3 From 5c87cffe2d3853cfae61e9373ee98a0409839178 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 May 2025 10:01:40 +0300 Subject: iommu: generalize the batched sync after map interface For the upcoming IOVA-based DMA API we want to batch the ops->iotlb_sync_map() call after mapping multiple IOVAs from dma-iommu without having a scatterlist. Improve the API. Add a wrapper for the map_sync as iommu_sync_map() so that callers don't need to poke into the methods directly. Formalize __iommu_map() into iommu_map_nosync() which requires the caller to call iommu_sync_map() after all maps are completed. Refactor the existing sanity checks from all the different layers into iommu_map_nosync(). Signed-off-by: Christoph Hellwig Acked-by: Will Deacon Tested-by: Jens Axboe Reviewed-by: Jason Gunthorpe Reviewed-by: Luis Chamberlain Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski --- include/linux/iommu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ccce8a751e2a..ce472af8e9c3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -872,6 +872,10 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); +int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); +int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, -- cgit v1.2.3 From 393cf700e6242032fbad51b248111ab6740ce8ad Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 May 2025 10:01:42 +0300 Subject: dma-mapping: Provide an interface to allow allocate IOVA The existing .map_pages() callback provides both allocating of IOVA and linking DMA pages. That combination works great for most of the callers who use it in control paths, but is less effective in fast paths where there may be multiple calls to map_page(). These advanced callers already manage their data in some sort of database and can perform IOVA allocation in advance, leaving range linkage operation to be in fast path. Provide an interface to allocate/deallocate IOVA and next patch link/unlink DMA ranges to that specific IOVA. In the new API a DMA mapping transaction is identified by a struct dma_iova_state, which holds some recomputed information for the transaction which does not change for each page being mapped, so add a check if IOVA can be used for the specific transaction. The API is exported from dma-iommu as it is the only implementation supported, the namespace is clearly different from iommu_* functions which are not allowed to be used. This code layout allows us to save function call per API call used in datapath as well as a lot of boilerplate code. Reviewed-by: Christoph Hellwig Tested-by: Jens Axboe Reviewed-by: Luis Chamberlain Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski --- include/linux/dma-mapping.h | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index b79925b1c433..de7f73810d54 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -72,6 +72,22 @@ #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) +struct dma_iova_state { + dma_addr_t addr; + u64 __size; +}; + +/* + * Use the high bit to mark if we used swiotlb for one or more ranges. + */ +#define DMA_IOVA_USE_SWIOTLB (1ULL << 63) + +static inline size_t dma_iova_size(struct dma_iova_state *state) +{ + /* Casting is needed for 32-bits systems */ + return (size_t)(state->__size & ~DMA_IOVA_USE_SWIOTLB); +} + #ifdef CONFIG_DMA_API_DEBUG void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); void debug_dma_map_single(struct device *dev, const void *addr, @@ -277,6 +293,38 @@ static inline int dma_mmap_noncontiguous(struct device *dev, } #endif /* CONFIG_HAS_DMA */ +#ifdef CONFIG_IOMMU_DMA +/** + * dma_use_iova - check if the IOVA API is used for this state + * @state: IOVA state + * + * Return %true if the DMA transfers uses the dma_iova_*() calls or %false if + * they can't be used. + */ +static inline bool dma_use_iova(struct dma_iova_state *state) +{ + return state->__size != 0; +} + +bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t size); +void dma_iova_free(struct device *dev, struct dma_iova_state *state); +#else /* CONFIG_IOMMU_DMA */ +static inline bool dma_use_iova(struct dma_iova_state *state) +{ + return false; +} +static inline bool dma_iova_try_alloc(struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t size) +{ + return false; +} +static inline void dma_iova_free(struct device *dev, + struct dma_iova_state *state) +{ +} +#endif /* CONFIG_IOMMU_DMA */ + #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir); -- cgit v1.2.3 From 433a76207dcf5facc0183acb790f6e8398585258 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 May 2025 10:01:44 +0300 Subject: dma-mapping: Implement link/unlink ranges API Introduce new DMA APIs to perform DMA linkage of buffers in layers higher than DMA. In proposed API, the callers will perform the following steps. In map path: if (dma_can_use_iova(...)) dma_iova_alloc() for (page in range) dma_iova_link_next(...) dma_iova_sync(...) else /* Fallback to legacy map pages */ for (all pages) dma_map_page(...) In unmap path: if (dma_can_use_iova(...)) dma_iova_destroy() else for (all pages) dma_unmap_page(...) Reviewed-by: Christoph Hellwig Tested-by: Jens Axboe Reviewed-by: Luis Chamberlain Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski --- include/linux/dma-mapping.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index de7f73810d54..a71e110f1e9d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -309,6 +309,17 @@ static inline bool dma_use_iova(struct dma_iova_state *state) bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state, phys_addr_t phys, size_t size); void dma_iova_free(struct device *dev, struct dma_iova_state *state); +void dma_iova_destroy(struct device *dev, struct dma_iova_state *state, + size_t mapped_len, enum dma_data_direction dir, + unsigned long attrs); +int dma_iova_sync(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size); +int dma_iova_link(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void dma_iova_unlink(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); #else /* CONFIG_IOMMU_DMA */ static inline bool dma_use_iova(struct dma_iova_state *state) { @@ -323,6 +334,27 @@ static inline void dma_iova_free(struct device *dev, struct dma_iova_state *state) { } +static inline void dma_iova_destroy(struct device *dev, + struct dma_iova_state *state, size_t mapped_len, + enum dma_data_direction dir, unsigned long attrs) +{ +} +static inline int dma_iova_sync(struct device *dev, + struct dma_iova_state *state, size_t offset, size_t size) +{ + return -EOPNOTSUPP; +} +static inline int dma_iova_link(struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t offset, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + return -EOPNOTSUPP; +} +static inline void dma_iova_unlink(struct device *dev, + struct dma_iova_state *state, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ +} #endif /* CONFIG_IOMMU_DMA */ #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) -- cgit v1.2.3 From 5f3b133a23c545272b6bc1e818a5a35e1ca84b1e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 May 2025 10:01:45 +0300 Subject: dma-mapping: add a dma_need_unmap helper Add helper that allows a driver to skip calling dma_unmap_* if the DMA layer can guarantee that they are no-nops. Signed-off-by: Christoph Hellwig Tested-by: Jens Axboe Reviewed-by: Luis Chamberlain Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski --- include/linux/dma-mapping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index a71e110f1e9d..d2f358c5a25d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -406,6 +406,7 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; } +bool dma_need_unmap(struct device *dev); #else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ static inline bool dma_dev_need_sync(const struct device *dev) { @@ -431,6 +432,10 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return false; } +static inline bool dma_need_unmap(struct device *dev) +{ + return false; +} #endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ struct page *dma_alloc_pages(struct device *dev, size_t size, -- cgit v1.2.3 From 454ad0169cf7bb09e622c5bc40275ea725ab3a8a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 5 Mar 2025 11:22:01 +0200 Subject: media: common: Add v4l2_find_nearest_size_conditional() v4l2_find_nearest_size() returns a mode from sensor driver's mode list that is a best match width and height wise for the sensor. Some drivers have different set of available modes depending on the number of lanes. While this could be handled within a driver by providing different lists of modes, provide a helper v4l2_find_nearest_size_conditional() to ignore modes that aren't available. Also use size_t for the array index and remove extra commas while at it. Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-common.h | 58 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index fda903bb3674..0a43f56578bc 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -390,38 +390,72 @@ void v4l_bound_align_image(unsigned int *width, unsigned int wmin, unsigned int salign); /** - * v4l2_find_nearest_size - Find the nearest size among a discrete - * set of resolutions contained in an array of a driver specific struct. + * v4l2_find_nearest_size_conditional - Find the nearest size among a discrete + * set of resolutions contained in an array of a driver specific struct, + * with conditionally exlusion of certain modes * * @array: a driver specific array of image sizes * @array_size: the length of the driver specific array of image sizes * @width_field: the name of the width field in the driver specific struct * @height_field: the name of the height field in the driver specific struct - * @width: desired width. - * @height: desired height. + * @width: desired width + * @height: desired height + * @func: ignores mode if returns false + * @context: context for the function * * Finds the closest resolution to minimize the width and height differences * between what requested and the supported resolutions. The size of the width * and height fields in the driver specific must equal to that of u32, i.e. four - * bytes. + * bytes. @func is called for each mode considered, a mode is ignored if @func + * returns false for it. * * Returns the best match or NULL if the length of the array is zero. */ -#define v4l2_find_nearest_size(array, array_size, width_field, height_field, \ - width, height) \ +#define v4l2_find_nearest_size_conditional(array, array_size, width_field, \ + height_field, width, height, \ + func, context) \ ({ \ BUILD_BUG_ON(sizeof((array)->width_field) != sizeof(u32) || \ sizeof((array)->height_field) != sizeof(u32)); \ - (typeof(&(array)[0]))__v4l2_find_nearest_size( \ + (typeof(&(array)[0]))__v4l2_find_nearest_size_conditional( \ (array), array_size, sizeof(*(array)), \ offsetof(typeof(*(array)), width_field), \ offsetof(typeof(*(array)), height_field), \ - width, height); \ + width, height, func, context); \ }) const void * -__v4l2_find_nearest_size(const void *array, size_t array_size, - size_t entry_size, size_t width_offset, - size_t height_offset, s32 width, s32 height); +__v4l2_find_nearest_size_conditional(const void *array, size_t array_size, + size_t entry_size, size_t width_offset, + size_t height_offset, s32 width, + s32 height, + bool (*func)(const void *array, + size_t index, + const void *context), + const void *context); + +/** + * v4l2_find_nearest_size - Find the nearest size among a discrete set of + * resolutions contained in an array of a driver specific struct + * + * @array: a driver specific array of image sizes + * @array_size: the length of the driver specific array of image sizes + * @width_field: the name of the width field in the driver specific struct + * @height_field: the name of the height field in the driver specific struct + * @width: desired width + * @height: desired height + * + * Finds the closest resolution to minimize the width and height differences + * between what requested and the supported resolutions. The size of the width + * and height fields in the driver specific must equal to that of u32, i.e. four + * bytes. + * + * Returns the best match or NULL if the length of the array is zero. + */ +#define v4l2_find_nearest_size(array, array_size, width_field, \ + height_field, width, height) \ + v4l2_find_nearest_size_conditional(array, array_size, width_field, \ + height_field, width, height, NULL, \ + NULL) /** * v4l2_g_parm_cap - helper routine for vidioc_g_parm to fill this in by -- cgit v1.2.3 From 56dee46ff47f0ef9944dddd1fd137c94b7c2d9de Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 5 May 2025 22:17:40 +0800 Subject: block: move ELEVATOR_FLAG_DISABLE_WBT a request queue flag ELEVATOR_FLAG_DISABLE_WBT is only used by BFQ to disallow wbt when BFQ is in use. The flag is set in BFQ's init(), and cleared in BFQ's exit(). Making it as request queue flag, so that we can avoid to deal with elevator switch race. Also it isn't graceful to checking one scheduler flag in wbt_enable_default(). Reviewed-by: Christoph Hellwig Reviewed-by: Nilay Shroff Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250505141805.2751237-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5ccb961ee2ae..c36d7a1c2cc0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -641,6 +641,7 @@ enum { QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */ QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */ QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ + QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ QUEUE_FLAG_MAX }; @@ -676,6 +677,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) #define blk_queue_skip_tagset_quiesce(q) \ ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE) +#define blk_queue_disable_wbt(q) \ + test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -- cgit v1.2.3 From 98e68f67020ce30e1a4d8e2d05d85a453738dfb8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 5 May 2025 22:17:46 +0800 Subject: block: prevent adding/deleting disk during updating nr_hw_queues Both adding/deleting disk code are reader of `nr_hw_queues`, so we can't allow them in-progress when updating nr_hw_queues, kernel panic and kasan has been reported in [1]. Prevent adding/deleting disk during updating nr_hw_queues by adding rw_semaphore to tagset, write lock is grabbed in blk_mq_update_nr_hw_queues(), and read lock is acquired when adding/deleting disk. Also mark GFP_NOIO allocation scope for adding/deleting disk because blk_mq_update_nr_hw_queues() is part of some driver's error handler. This way avoids lot of trouble. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Nilay Shroff Suggested-by: Nilay Shroff Reported-by: Nilay Shroff Closes: https://lore.kernel.org/linux-block/a5896cdb-a59a-4a37-9f99-20522f5d2987@linux.ibm.com/ Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250505141805.2751237-9-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8eb9b3310167..ef84d53095a6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -9,6 +9,7 @@ #include #include #include +#include struct blk_mq_tags; struct blk_flush_queue; @@ -527,6 +528,8 @@ struct blk_mq_tag_set { struct mutex tag_list_lock; struct list_head tag_list; struct srcu_struct *srcu; + + struct rw_semaphore update_nr_hwq_lock; }; /** -- cgit v1.2.3 From 21eed794ab4bd1a6c82a55df4416d18fb4d21da9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 5 May 2025 22:17:58 +0800 Subject: block: add new helper for disabling elevator switch when deleting disk Add new helper disable_elv_switch() and new flag QUEUE_FLAG_NO_ELV_SWITCH for disabling elevator switch before deleting disk: - originally flag QUEUE_FLAG_REGISTERED is added for preventing elevator switch during removing disk, but this flag has been used widely for other purposes, so add one new flag for disabling elevator switch only - for avoiding deadlock risk, we have to move elevator queue register/unregister out of elevator lock and queue freeze, which will be done in next patch. However, this way adds small race window between elevator switch and deleting ->queue_kobj, in which elevator queue register/unregister could be run concurrently. The added helper will be used for avoiding the race in the following patch. - drain in-progress elevator switch before deleting disk Suggested-by: Nilay Shroff Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Reviewed-by: Nilay Shroff Link: https://lore.kernel.org/r/20250505141805.2751237-21-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c36d7a1c2cc0..3aa1fd637d57 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -642,6 +642,7 @@ enum { QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */ QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ + QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ QUEUE_FLAG_MAX }; @@ -679,6 +680,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE) #define blk_queue_disable_wbt(q) \ test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags) +#define blk_queue_no_elv_switch(q) \ + test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -- cgit v1.2.3 From 78c271344b6f64ce24c845e54903e09928cf2061 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 5 May 2025 22:18:03 +0800 Subject: block: move wbt_enable_default() out of queue freezing from sched ->exit() scheduler's ->exit() is called with queue frozen and elevator lock is held, and wbt_enable_default() can't be called with queue frozen, otherwise the following lockdep warning is triggered: #6 (&q->rq_qos_mutex){+.+.}-{4:4}: #5 (&eq->sysfs_lock){+.+.}-{4:4}: #4 (&q->elevator_lock){+.+.}-{4:4}: #3 (&q->q_usage_counter(io)#3){++++}-{0:0}: #2 (fs_reclaim){+.+.}-{0:0}: #1 (&sb->s_type->i_mutex_key#3){+.+.}-{4:4}: #0 (&q->debugfs_mutex){+.+.}-{4:4}: Fix the issue by moving wbt_enable_default() out of bfq's exit(), and call it from elevator_change_done(). Meantime add disk->rqos_state_mutex for covering wbt state change, which matches the purpose more than ->elevator_lock. Reviewed-by: Hannes Reinecke Reviewed-by: Nilay Shroff Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250505141805.2751237-26-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3aa1fd637d57..94323f303b37 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -218,6 +218,8 @@ struct gendisk { * devices that do not have multiple independent access ranges. */ struct blk_independent_access_ranges *ia_ranges; + + struct mutex rqos_state_mutex; /* rqos state change mutex */ }; /** -- cgit v1.2.3 From 732f25a2895a8c1c54fb56544f0b1e23770ef4d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2025 17:47:22 +0530 Subject: fs: add a write stream field to the kiocb Prepare for io_uring passthrough of write streams. The write stream field in the kiocb structure fits into an existing 2-byte hole, so its size is not changed. Reviewed-by: Hannes Reinecke Reviewed-by: Nitesh Shetty Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/r/20250506121732.8211-2-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..d5988867fe31 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -408,6 +408,7 @@ struct kiocb { void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ + u8 ki_write_stream; union { /* * Only used for async buffered reads, where it denotes the -- cgit v1.2.3 From 5006f85ea23ea0bda9a8e31fdda126f4fca48f20 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2025 17:47:23 +0530 Subject: block: add a bi_write_stream field Add the ability to pass a write stream for placement control in the bio. The new field fits in an existing hole, so does not change the size of the struct. Reviewed-by: Hannes Reinecke Reviewed-by: Nitesh Shetty Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/r/20250506121732.8211-3-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 5a46067e85b1..f38425338c3f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -220,6 +220,7 @@ struct bio { unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; enum rw_hint bi_write_hint; + u8 bi_write_stream; blk_status_t bi_status; atomic_t __bi_remaining; -- cgit v1.2.3 From d2f526ba27d29c442542f7c5df0a86ef0b576716 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 6 May 2025 17:47:24 +0530 Subject: block: introduce max_write_streams queue limit Drivers with hardware that support write streams need a way to export how many are available so applications can generically query this. Reviewed-by: Hannes Reinecke Reviewed-by: Nitesh Shetty Signed-off-by: Keith Busch [hch: renamed hints to streams, removed stacking] Signed-off-by: Christoph Hellwig Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/r/20250506121732.8211-4-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 94323f303b37..13e1426ad38d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -404,6 +404,8 @@ struct queue_limits { unsigned short max_integrity_segments; unsigned short max_discard_segments; + unsigned short max_write_streams; + unsigned int max_open_zones; unsigned int max_active_zones; @@ -1270,6 +1272,13 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev) return queue_max_segments(bdev_get_queue(bdev)); } +static inline unsigned short bdev_max_write_streams(struct block_device *bdev) +{ + if (bdev_is_partition(bdev)) + return 0; + return bdev_limits(bdev)->max_write_streams; +} + static inline unsigned queue_logical_block_size(const struct request_queue *q) { return q->limits.logical_block_size; -- cgit v1.2.3 From c23acfac10786ac5062a0615e23e68b913ac8da0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2025 17:47:25 +0530 Subject: block: introduce a write_stream_granularity queue limit Export the granularity that write streams should be discarded with, as it is essential for making good use of them. Reviewed-by: Hannes Reinecke Reviewed-by: Nitesh Shetty Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/r/20250506121732.8211-5-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 13e1426ad38d..52b7a27c46e8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -405,6 +405,7 @@ struct queue_limits { unsigned short max_discard_segments; unsigned short max_write_streams; + unsigned int write_stream_granularity; unsigned int max_open_zones; unsigned int max_active_zones; -- cgit v1.2.3 From 02040353f4fedb823f011f27962325f328d0689f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 6 May 2025 17:47:27 +0530 Subject: io_uring: enable per-io write streams Allow userspace to pass a per-I/O write stream in the SQE: __u8 write_stream; The __u8 type matches the size the filesystems and block layer support. Application can query the supported values from the block devices max_write_streams sysfs attribute. Unsupported values are ignored by file operations that do not support write streams or rejected with an error by those that support them. Reviewed-by: Hannes Reinecke Reviewed-by: Nitesh Shetty Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/r/20250506121732.8211-7-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8f1fc12bac46..50e372ea97c5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -93,6 +93,10 @@ struct io_uring_sqe { __u16 addr_len; __u16 __pad3[1]; }; + struct { + __u8 write_stream; + __u8 __pad4[3]; + }; }; union { struct { -- cgit v1.2.3 From ee203d3d86113559b77b1723e0d10909ebbd66ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2025 17:47:30 +0530 Subject: nvme: add FDP definitions Add the config feature result, config log page, and management receive commands needed for FDP. Partially based on a patch from Kanchan Joshi . Reviewed-by: Hannes Reinecke Reviewed-by: Nitesh Shetty Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/r/20250506121732.8211-10-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/linux/nvme.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 2479ed10f53e..51308f65b72f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -303,6 +303,7 @@ enum nvme_ctrl_attr { NVME_CTRL_ATTR_TBKAS = (1 << 6), NVME_CTRL_ATTR_ELBAS = (1 << 15), NVME_CTRL_ATTR_RHII = (1 << 18), + NVME_CTRL_ATTR_FDPS = (1 << 19), }; struct nvme_id_ctrl { @@ -689,6 +690,44 @@ struct nvme_rotational_media_log { __u8 rsvd24[488]; }; +struct nvme_fdp_config { + __u8 flags; +#define FDPCFG_FDPE (1U << 0) + __u8 fdpcidx; + __le16 reserved; +}; + +struct nvme_fdp_ruh_desc { + __u8 ruht; + __u8 reserved[3]; +}; + +struct nvme_fdp_config_desc { + __le16 dsze; + __u8 fdpa; + __u8 vss; + __le32 nrg; + __le16 nruh; + __le16 maxpids; + __le32 nns; + __le64 runs; + __le32 erutl; + __u8 rsvd28[36]; + struct nvme_fdp_ruh_desc ruhs[]; +}; + +struct nvme_fdp_config_log { + __le16 numfdpc; + __u8 ver; + __u8 rsvd3; + __le32 sze; + __u8 rsvd8[8]; + /* + * This is followed by variable number of nvme_fdp_config_desc + * structures, but sparse doesn't like nested variable sized arrays. + */ +}; + struct nvme_smart_log { __u8 critical_warning; __u8 temperature[2]; @@ -915,6 +954,7 @@ enum nvme_opcode { nvme_cmd_resv_register = 0x0d, nvme_cmd_resv_report = 0x0e, nvme_cmd_resv_acquire = 0x11, + nvme_cmd_io_mgmt_recv = 0x12, nvme_cmd_resv_release = 0x15, nvme_cmd_zone_mgmt_send = 0x79, nvme_cmd_zone_mgmt_recv = 0x7a, @@ -936,6 +976,7 @@ enum nvme_opcode { nvme_opcode_name(nvme_cmd_resv_register), \ nvme_opcode_name(nvme_cmd_resv_report), \ nvme_opcode_name(nvme_cmd_resv_acquire), \ + nvme_opcode_name(nvme_cmd_io_mgmt_recv), \ nvme_opcode_name(nvme_cmd_resv_release), \ nvme_opcode_name(nvme_cmd_zone_mgmt_send), \ nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \ @@ -1087,6 +1128,7 @@ enum { NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, NVME_RW_PRINFO_PRACT = 1 << 13, NVME_RW_DTYPE_STREAMS = 1 << 4, + NVME_RW_DTYPE_DPLCMT = 2 << 4, NVME_WZ_DEAC = 1 << 9, }; @@ -1174,6 +1216,38 @@ struct nvme_zone_mgmt_recv_cmd { __le32 cdw14[2]; }; +struct nvme_io_mgmt_recv_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le64 rsvd2[2]; + union nvme_data_ptr dptr; + __u8 mo; + __u8 rsvd11; + __u16 mos; + __le32 numd; + __le32 cdw12[4]; +}; + +enum { + NVME_IO_MGMT_RECV_MO_RUHS = 1, +}; + +struct nvme_fdp_ruh_status_desc { + __le16 pid; + __le16 ruhid; + __le32 earutr; + __le64 ruamw; + __u8 reserved[16]; +}; + +struct nvme_fdp_ruh_status { + __u8 rsvd0[14]; + __le16 nruhsd; + struct nvme_fdp_ruh_status_desc ruhsd[]; +}; + enum { NVME_ZRA_ZONE_REPORT = 0, NVME_ZRASF_ZONE_REPORT_ALL = 0, @@ -1309,6 +1383,7 @@ enum { NVME_FEAT_PLM_WINDOW = 0x14, NVME_FEAT_HOST_BEHAVIOR = 0x16, NVME_FEAT_SANITIZE = 0x17, + NVME_FEAT_FDP = 0x1d, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, @@ -1329,6 +1404,7 @@ enum { NVME_LOG_ANA = 0x0c, NVME_LOG_FEATURES = 0x12, NVME_LOG_RMI = 0x16, + NVME_LOG_FDP_CONFIGS = 0x20, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), @@ -1923,6 +1999,7 @@ struct nvme_command { struct nvmf_auth_receive_command auth_receive; struct nvme_dbbuf dbbuf; struct nvme_directive_cmd directive; + struct nvme_io_mgmt_recv_cmd imr; }; }; -- cgit v1.2.3 From e48a906af6b3cfd09c50af14a8ea7a5cf45b39ab Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 6 May 2025 19:33:08 +0800 Subject: ASoC: SOF: add disable_function_topology flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SOF driver will load required function topologies dynamically. However, we prefer using the monolithic topology. Add a flag to allow user not using the function topologies. Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Link: https://patch.msgid.link/20250506113311.45487-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/sof.h b/include/sound/sof.h index 64fd5504cb2b..eddea82c7b5a 100644 --- a/include/sound/sof.h +++ b/include/sound/sof.h @@ -106,6 +106,7 @@ struct snd_sof_pdata { const char *fw_filename; const char *tplg_filename_prefix; const char *tplg_filename; + bool disable_function_topology; /* loadable external libraries available under this directory */ const char *fw_lib_prefix; -- cgit v1.2.3 From 86f6e4791c40c33891299d95c055e5d06d396284 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 6 May 2025 15:29:32 +0800 Subject: ASoC: codec: cs42l56: Drop cs42l56.h There is no in-tree user of "include/sound/cs42l56.h", so move 'struct cs42l56_platform_data' to cs42l56.c and remove the header file. And platform data is mostly for platforms that create devices non using device tree. CS42L56 is a discontinued product, there is less possibility that new users will use legacy method to create devices. So drop cs42l56.h to prepare using GPIOD API. Signed-off-by: Peng Fan Link: https://patch.msgid.link/20250506-csl42x-v3-2-e9496db544c4@nxp.com Signed-off-by: Mark Brown --- include/sound/cs42l56.h | 45 --------------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 include/sound/cs42l56.h (limited to 'include') diff --git a/include/sound/cs42l56.h b/include/sound/cs42l56.h deleted file mode 100644 index 62e9f7a3b414..000000000000 --- a/include/sound/cs42l56.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/sound/cs42l56.h -- Platform data for CS42L56 - * - * Copyright (c) 2014 Cirrus Logic Inc. - */ - -#ifndef __CS42L56_H -#define __CS42L56_H - -struct cs42l56_platform_data { - - /* GPIO for Reset */ - unsigned int gpio_nreset; - - /* MICBIAS Level. Check datasheet Pg48 */ - unsigned int micbias_lvl; - - /* Analog Input 1A Reference 0=Single 1=Pseudo-Differential */ - unsigned int ain1a_ref_cfg; - - /* Analog Input 2A Reference 0=Single 1=Pseudo-Differential */ - unsigned int ain2a_ref_cfg; - - /* Analog Input 1B Reference 0=Single 1=Pseudo-Differential */ - unsigned int ain1b_ref_cfg; - - /* Analog Input 2B Reference 0=Single 1=Pseudo-Differential */ - unsigned int ain2b_ref_cfg; - - /* Charge Pump Freq. Check datasheet Pg62 */ - unsigned int chgfreq; - - /* HighPass Filter Right Channel Corner Frequency */ - unsigned int hpfb_freq; - - /* HighPass Filter Left Channel Corner Frequency */ - unsigned int hpfa_freq; - - /* Adaptive Power Control for LO/HP */ - unsigned int adaptive_pwr; - -}; - -#endif /* __CS42L56_H */ -- cgit v1.2.3 From 43ef0dccbc2528924c4b03a902fa39502faabb16 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 6 May 2025 15:29:35 +0800 Subject: ASoC: codec: cs42l73: Drop cs42l73.h There is no in-tree user of "include/sound/cs42l56.h", so move 'struct cs42l73_platform_data ' to cs42l73.c and remove the header file. And platform data is mostly for legacy platforms that create devices non using device tree. So drop cs42l73.h to prepare using GPIOD API. Signed-off-by: Peng Fan Link: https://patch.msgid.link/20250506-csl42x-v3-5-e9496db544c4@nxp.com Signed-off-by: Mark Brown --- include/sound/cs42l73.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/sound/cs42l73.h (limited to 'include') diff --git a/include/sound/cs42l73.h b/include/sound/cs42l73.h deleted file mode 100644 index 5a93393b6124..000000000000 --- a/include/sound/cs42l73.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/sound/cs42l73.h -- Platform data for CS42L73 - * - * Copyright (c) 2012 Cirrus Logic Inc. - */ - -#ifndef __CS42L73_H -#define __CS42L73_H - -struct cs42l73_platform_data { - /* RST GPIO */ - unsigned int reset_gpio; - unsigned int chgfreq; - int jack_detection; - unsigned int mclk_freq; -}; - -#endif /* __CS42L73_H */ -- cgit v1.2.3 From 772c036befb875c904731fb309fb9d2e065ba3f8 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 6 May 2025 15:29:38 +0800 Subject: ASoC: codec: cs42l52: Drop cs42l52.h There is no in-tree user of "include/sound/cs42l52.h", so move 'struct cs42l52_platform_data ' to cs42l52.c and remove the header file. And platform data is mostly for legacy platforms that create devices non using device tree. So drop cs42l52.h to prepare using GPIOD API. Signed-off-by: Peng Fan Link: https://patch.msgid.link/20250506-csl42x-v3-8-e9496db544c4@nxp.com Signed-off-by: Mark Brown --- include/sound/cs42l52.h | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 include/sound/cs42l52.h (limited to 'include') diff --git a/include/sound/cs42l52.h b/include/sound/cs42l52.h deleted file mode 100644 index c20649666abe..000000000000 --- a/include/sound/cs42l52.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * linux/sound/cs42l52.h -- Platform data for CS42L52 - * - * Copyright (c) 2012 Cirrus Logic Inc. - */ - -#ifndef __CS42L52_H -#define __CS42L52_H - -struct cs42l52_platform_data { - - /* MICBIAS Level. Check datasheet Pg48 */ - unsigned int micbias_lvl; - - /* MICA mode selection Differential or Single-ended */ - bool mica_diff_cfg; - - /* MICB mode selection Differential or Single-ended */ - bool micb_diff_cfg; - - /* Charge Pump Freq. Check datasheet Pg73 */ - unsigned int chgfreq; - - /* Reset GPIO */ - unsigned int reset_gpio; -}; - -#endif /* __CS42L52_H */ -- cgit v1.2.3 From bb5eb8a5b222fa5092f60d5555867a05ebc3bdf2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 22 Apr 2025 19:56:38 +0800 Subject: f2fs: fix to bail out in get_new_segment() ------------[ cut here ]------------ WARNING: CPU: 3 PID: 579 at fs/f2fs/segment.c:2832 new_curseg+0x5e8/0x6dc pc : new_curseg+0x5e8/0x6dc Call trace: new_curseg+0x5e8/0x6dc f2fs_allocate_data_block+0xa54/0xe28 do_write_page+0x6c/0x194 f2fs_do_write_node_page+0x38/0x78 __write_node_page+0x248/0x6d4 f2fs_sync_node_pages+0x524/0x72c f2fs_write_checkpoint+0x4bc/0x9b0 __checkpoint_and_complete_reqs+0x80/0x244 issue_checkpoint_thread+0x8c/0xec kthread+0x114/0x1bc ret_from_fork+0x10/0x20 get_new_segment() detects inconsistent status in between free_segmap and free_secmap, let's record such error into super block, and bail out get_new_segment() instead of continue using the segment. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index c24f8bc01045..5206d63b3386 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -78,6 +78,7 @@ enum stop_cp_reason { STOP_CP_REASON_UPDATE_INODE, STOP_CP_REASON_FLUSH_FAIL, STOP_CP_REASON_NO_SEGMENT, + STOP_CP_REASON_CORRUPTED_FREE_BITMAP, STOP_CP_REASON_MAX, }; -- cgit v1.2.3 From a5c98e9424573649e59988199a3356a79c9e1fd9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 May 2025 13:17:18 +0100 Subject: io_uring/zcrx: dmabuf backed zerocopy receive Add support for dmabuf backed zcrx areas. To use it, the user should pass IORING_ZCRX_AREA_DMABUF in the struct io_uring_zcrx_area_reg flags field and pass a dmabuf fd in the dmabuf_fd field. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/20bb1890e60a82ec945ab36370d1fd54be414ab6.1746097431.git.asml.silence@gmail.com Link: https://lore.kernel.org/io-uring/6e37db97303212bbd8955f9501cf99b579f8aece.1746547722.git.asml.silence@gmail.com [axboe: fold in fixup] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 130f3bc71a69..5ce096090b0c 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -990,12 +990,16 @@ struct io_uring_zcrx_offsets { __u64 __resv[2]; }; +enum io_uring_zcrx_area_flags { + IORING_ZCRX_AREA_DMABUF = 1, +}; + struct io_uring_zcrx_area_reg { __u64 addr; __u64 len; __u64 rq_area_token; __u32 flags; - __u32 __resv1; + __u32 dmabuf_fd; __u64 __resv2[2]; }; -- cgit v1.2.3 From 2dbf6e0df447d1542f8fd158b17a06d2e8ede15e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 May 2025 21:04:08 -0400 Subject: kill vfs_submount() The last remaining user of vfs_submount() (tracefs) is easy to convert to fs_context_for_submount(); do that and bury that thing, along with SB_SUBMOUNT Reviewed-by: Jan Kara Acked-by: Steven Rostedt (Google) Tested-by: Steven Rostedt (Google) Signed-off-by: Al Viro --- include/linux/fs.h | 1 - include/linux/mount.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..515e702d98ae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1240,7 +1240,6 @@ extern int send_sigurg(struct file *file); /* These sb flags are internal to the kernel */ #define SB_DEAD BIT(21) #define SB_DYING BIT(24) -#define SB_SUBMOUNT BIT(26) #define SB_FORCE BIT(27) #define SB_NOSEC BIT(28) #define SB_BORN BIT(29) diff --git a/include/linux/mount.h b/include/linux/mount.h index dcc17ce8a959..d4eb90a367af 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -98,9 +98,6 @@ extern struct vfsmount *vfs_create_mount(struct fs_context *fc); extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data); -extern struct vfsmount *vfs_submount(const struct dentry *mountpoint, - struct file_system_type *type, - const char *name, void *data); extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); -- cgit v1.2.3 From 20b4f0b4cdfec106bb73afd47de7d121b723f723 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 30 Apr 2025 00:45:57 +0100 Subject: rpmsg: core: Remove deadcode rpmsg_send_offchannel() and rpmsg_trysend_offchannel() have been unused since they were added in 2011's commit bcabbccabffe ("rpmsg: add virtio-based remote processor messaging bus") Remove them and associated docs. Signed-off-by: Dr. David Alan Gilbert Acked-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20250429234600.301083-2-linux@treblig.org Signed-off-by: Mathieu Poirier --- include/linux/rpmsg.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 90d8e4475f80..fb7ab9165645 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -184,13 +184,9 @@ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *, int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len); int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); -int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, - void *data, int len); int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len); int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); -int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, - void *data, int len); __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, poll_table *wait); @@ -271,15 +267,6 @@ static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, } -static inline int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, - u32 dst, void *data, int len) -{ - /* This shouldn't be possible */ - WARN_ON(1); - - return -ENXIO; -} - static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) { /* This shouldn't be possible */ @@ -297,15 +284,6 @@ static inline int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, return -ENXIO; } -static inline int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, - u32 dst, void *data, int len) -{ - /* This shouldn't be possible */ - WARN_ON(1); - - return -ENXIO; -} - static inline __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, poll_table *wait) { -- cgit v1.2.3 From 7a0c1872ee7db25d711ac29a55f36844a7108308 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 27 Mar 2025 12:52:14 +0000 Subject: clk: test: Forward-declare struct of_phandle_args in kunit/clk.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a forward-declare of struct of_phandle_args to prevent the compiler warning: ../include/kunit/clk.h:29:63: warning: ‘struct of_phandle_args’ declared inside parameter list will not be visible outside of this definition or declaration struct clk_hw *(*get)(struct of_phandle_args *clkspec, void *data), Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20250327125214.82598-1-rf@opensource.cirrus.com Fixes: a82fcb16d977 ("clk: test: Add test managed of_clk_add_hw_provider()") Signed-off-by: Stephen Boyd --- include/kunit/clk.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/kunit/clk.h b/include/kunit/clk.h index 0afae7688157..f226044cc78d 100644 --- a/include/kunit/clk.h +++ b/include/kunit/clk.h @@ -6,6 +6,7 @@ struct clk; struct clk_hw; struct device; struct device_node; +struct of_phandle_args; struct kunit; struct clk * -- cgit v1.2.3 From e4b700d38957526fbcec6d7bb5890ad4c1192241 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Fri, 18 Apr 2025 10:03:20 +0800 Subject: dt-bindings: soc: sophgo: Add SG2044 top syscon device The SG2044 top syscon device provide PLL clock control and some other misc feature of the SoC. Add the compatible string for SG2044 top syscon device. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250418020325.421257-2-inochiama@gmail.com Signed-off-by: Inochi Amaoto Signed-off-by: Chen Wang Signed-off-by: Chen Wang --- include/dt-bindings/clock/sophgo,sg2044-pll.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/dt-bindings/clock/sophgo,sg2044-pll.h (limited to 'include') diff --git a/include/dt-bindings/clock/sophgo,sg2044-pll.h b/include/dt-bindings/clock/sophgo,sg2044-pll.h new file mode 100644 index 000000000000..817d45e700cc --- /dev/null +++ b/include/dt-bindings/clock/sophgo,sg2044-pll.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2024 Inochi Amaoto + */ + +#ifndef __DT_BINDINGS_SOPHGO_SG2044_PLL_H__ +#define __DT_BINDINGS_SOPHGO_SG2044_PLL_H__ + +#define CLK_FPLL0 0 +#define CLK_FPLL1 1 +#define CLK_FPLL2 2 +#define CLK_DPLL0 3 +#define CLK_DPLL1 4 +#define CLK_DPLL2 5 +#define CLK_DPLL3 6 +#define CLK_DPLL4 7 +#define CLK_DPLL5 8 +#define CLK_DPLL6 9 +#define CLK_DPLL7 10 +#define CLK_MPLL0 11 +#define CLK_MPLL1 12 +#define CLK_MPLL2 13 +#define CLK_MPLL3 14 +#define CLK_MPLL4 15 +#define CLK_MPLL5 16 + +#endif /* __DT_BINDINGS_SOPHGO_SG2044_PLL_H__ */ -- cgit v1.2.3 From 1a215904986e48ce04ac8f24ecb5b18cb6beaf43 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Fri, 18 Apr 2025 10:03:22 +0800 Subject: dt-bindings: clock: sophgo: add clock controller for SG2044 The clock controller on the SG2044 provides common clock function for all IPs on the SoC. This device requires PLL clock to function normally. Add definition for the clock controller of the SG2044 SoC. Reviewed-by: Chen Wang Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250418020325.421257-4-inochiama@gmail.com Signed-off-by: Inochi Amaoto Signed-off-by: Chen Wang Signed-off-by: Chen Wang --- include/dt-bindings/clock/sophgo,sg2044-clk.h | 153 ++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 include/dt-bindings/clock/sophgo,sg2044-clk.h (limited to 'include') diff --git a/include/dt-bindings/clock/sophgo,sg2044-clk.h b/include/dt-bindings/clock/sophgo,sg2044-clk.h new file mode 100644 index 000000000000..d9adca42548e --- /dev/null +++ b/include/dt-bindings/clock/sophgo,sg2044-clk.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2024 Inochi Amaoto + */ + +#ifndef __DT_BINDINGS_SOPHGO_SG2044_CLK_H__ +#define __DT_BINDINGS_SOPHGO_SG2044_CLK_H__ + +#define CLK_DIV_AP_SYS_FIXED 0 +#define CLK_DIV_AP_SYS_MAIN 1 +#define CLK_DIV_RP_SYS_FIXED 2 +#define CLK_DIV_RP_SYS_MAIN 3 +#define CLK_DIV_TPU_SYS_FIXED 4 +#define CLK_DIV_TPU_SYS_MAIN 5 +#define CLK_DIV_NOC_SYS_FIXED 6 +#define CLK_DIV_NOC_SYS_MAIN 7 +#define CLK_DIV_VC_SRC0_FIXED 8 +#define CLK_DIV_VC_SRC0_MAIN 9 +#define CLK_DIV_VC_SRC1_FIXED 10 +#define CLK_DIV_VC_SRC1_MAIN 11 +#define CLK_DIV_CXP_MAC_FIXED 12 +#define CLK_DIV_CXP_MAC_MAIN 13 +#define CLK_DIV_DDR0_FIXED 14 +#define CLK_DIV_DDR0_MAIN 15 +#define CLK_DIV_DDR1_FIXED 16 +#define CLK_DIV_DDR1_MAIN 17 +#define CLK_DIV_DDR2_FIXED 18 +#define CLK_DIV_DDR2_MAIN 19 +#define CLK_DIV_DDR3_FIXED 20 +#define CLK_DIV_DDR3_MAIN 21 +#define CLK_DIV_DDR4_FIXED 22 +#define CLK_DIV_DDR4_MAIN 23 +#define CLK_DIV_DDR5_FIXED 24 +#define CLK_DIV_DDR5_MAIN 25 +#define CLK_DIV_DDR6_FIXED 26 +#define CLK_DIV_DDR6_MAIN 27 +#define CLK_DIV_DDR7_FIXED 28 +#define CLK_DIV_DDR7_MAIN 29 +#define CLK_DIV_TOP_50M 30 +#define CLK_DIV_TOP_AXI0 31 +#define CLK_DIV_TOP_AXI_HSPERI 32 +#define CLK_DIV_TIMER0 33 +#define CLK_DIV_TIMER1 34 +#define CLK_DIV_TIMER2 35 +#define CLK_DIV_TIMER3 36 +#define CLK_DIV_TIMER4 37 +#define CLK_DIV_TIMER5 38 +#define CLK_DIV_TIMER6 39 +#define CLK_DIV_TIMER7 40 +#define CLK_DIV_CXP_TEST_PHY 41 +#define CLK_DIV_CXP_TEST_ETH_PHY 42 +#define CLK_DIV_C2C0_TEST_PHY 43 +#define CLK_DIV_C2C1_TEST_PHY 44 +#define CLK_DIV_PCIE_1G 45 +#define CLK_DIV_UART_500M 46 +#define CLK_DIV_GPIO_DB 47 +#define CLK_DIV_SD 48 +#define CLK_DIV_SD_100K 49 +#define CLK_DIV_EMMC 50 +#define CLK_DIV_EMMC_100K 51 +#define CLK_DIV_EFUSE 52 +#define CLK_DIV_TX_ETH0 53 +#define CLK_DIV_PTP_REF_I_ETH0 54 +#define CLK_DIV_REF_ETH0 55 +#define CLK_DIV_PKA 56 +#define CLK_MUX_DDR0 57 +#define CLK_MUX_DDR1 58 +#define CLK_MUX_DDR2 59 +#define CLK_MUX_DDR3 60 +#define CLK_MUX_DDR4 61 +#define CLK_MUX_DDR5 62 +#define CLK_MUX_DDR6 63 +#define CLK_MUX_DDR7 64 +#define CLK_MUX_NOC_SYS 65 +#define CLK_MUX_TPU_SYS 66 +#define CLK_MUX_RP_SYS 67 +#define CLK_MUX_AP_SYS 68 +#define CLK_MUX_VC_SRC0 69 +#define CLK_MUX_VC_SRC1 70 +#define CLK_MUX_CXP_MAC 71 +#define CLK_GATE_AP_SYS 72 +#define CLK_GATE_RP_SYS 73 +#define CLK_GATE_TPU_SYS 74 +#define CLK_GATE_NOC_SYS 75 +#define CLK_GATE_VC_SRC0 76 +#define CLK_GATE_VC_SRC1 77 +#define CLK_GATE_DDR0 78 +#define CLK_GATE_DDR1 79 +#define CLK_GATE_DDR2 80 +#define CLK_GATE_DDR3 81 +#define CLK_GATE_DDR4 82 +#define CLK_GATE_DDR5 83 +#define CLK_GATE_DDR6 84 +#define CLK_GATE_DDR7 85 +#define CLK_GATE_TOP_50M 86 +#define CLK_GATE_SC_RX 87 +#define CLK_GATE_SC_RX_X0Y1 88 +#define CLK_GATE_TOP_AXI0 89 +#define CLK_GATE_INTC0 90 +#define CLK_GATE_INTC1 91 +#define CLK_GATE_INTC2 92 +#define CLK_GATE_INTC3 93 +#define CLK_GATE_MAILBOX0 94 +#define CLK_GATE_MAILBOX1 95 +#define CLK_GATE_MAILBOX2 96 +#define CLK_GATE_MAILBOX3 97 +#define CLK_GATE_TOP_AXI_HSPERI 98 +#define CLK_GATE_APB_TIMER 99 +#define CLK_GATE_TIMER0 100 +#define CLK_GATE_TIMER1 101 +#define CLK_GATE_TIMER2 102 +#define CLK_GATE_TIMER3 103 +#define CLK_GATE_TIMER4 104 +#define CLK_GATE_TIMER5 105 +#define CLK_GATE_TIMER6 106 +#define CLK_GATE_TIMER7 107 +#define CLK_GATE_CXP_CFG 108 +#define CLK_GATE_CXP_MAC 109 +#define CLK_GATE_CXP_TEST_PHY 110 +#define CLK_GATE_CXP_TEST_ETH_PHY 111 +#define CLK_GATE_PCIE_1G 112 +#define CLK_GATE_C2C0_TEST_PHY 113 +#define CLK_GATE_C2C1_TEST_PHY 114 +#define CLK_GATE_UART_500M 115 +#define CLK_GATE_APB_UART 116 +#define CLK_GATE_APB_SPI 117 +#define CLK_GATE_AHB_SPIFMC 118 +#define CLK_GATE_APB_I2C 119 +#define CLK_GATE_AXI_DBG_I2C 120 +#define CLK_GATE_GPIO_DB 121 +#define CLK_GATE_APB_GPIO_INTR 122 +#define CLK_GATE_APB_GPIO 123 +#define CLK_GATE_SD 124 +#define CLK_GATE_AXI_SD 125 +#define CLK_GATE_SD_100K 126 +#define CLK_GATE_EMMC 127 +#define CLK_GATE_AXI_EMMC 128 +#define CLK_GATE_EMMC_100K 129 +#define CLK_GATE_EFUSE 130 +#define CLK_GATE_APB_EFUSE 131 +#define CLK_GATE_SYSDMA_AXI 132 +#define CLK_GATE_TX_ETH0 133 +#define CLK_GATE_AXI_ETH0 134 +#define CLK_GATE_PTP_REF_I_ETH0 135 +#define CLK_GATE_REF_ETH0 136 +#define CLK_GATE_APB_RTC 137 +#define CLK_GATE_APB_PWM 138 +#define CLK_GATE_APB_WDT 139 +#define CLK_GATE_AXI_SRAM 140 +#define CLK_GATE_AHB_ROM 141 +#define CLK_GATE_PKA 142 + +#endif /* __DT_BINDINGS_SOPHGO_SG2044_CLK_H__ */ -- cgit v1.2.3 From 429ac6211494c12b668dac59811ea8a96db6d757 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 5 May 2025 13:45:11 +0200 Subject: devlink: define enum for attr types of dynamic attributes Devlink param and health reporter fmsg use attributes with dynamic type which is determined according to a different type. Currently used values are NLA_*. The problem is, they are not part of UAPI. They may change which would cause a break. To make this future safe, introduce a enum that shadows NLA_* values in it and is part of UAPI. Also, this allows to possibly carry types that are unrelated to NLA_* values. Signed-off-by: Saeed Mahameed Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20250505114513.53370-3-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 9401aa343673..a5ee0f13740a 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -385,6 +385,21 @@ enum devlink_linecard_state { DEVLINK_LINECARD_STATE_MAX = __DEVLINK_LINECARD_STATE_MAX - 1 }; +/* Variable attribute type. */ +enum devlink_var_attr_type { + /* Following values relate to the internal NLA_* values */ + DEVLINK_VAR_ATTR_TYPE_U8 = 1, + DEVLINK_VAR_ATTR_TYPE_U16, + DEVLINK_VAR_ATTR_TYPE_U32, + DEVLINK_VAR_ATTR_TYPE_U64, + DEVLINK_VAR_ATTR_TYPE_STRING, + DEVLINK_VAR_ATTR_TYPE_FLAG, + DEVLINK_VAR_ATTR_TYPE_NUL_STRING = 10, + DEVLINK_VAR_ATTR_TYPE_BINARY, + __DEVLINK_VAR_ATTR_TYPE_CUSTOM_BASE = 0x80, + /* Any possible custom types, unrelated to NLA_* values go below */ +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, -- cgit v1.2.3 From f9e78932eac650cf1385244482b85e65ccaa87cf Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 5 May 2025 13:45:12 +0200 Subject: devlink: avoid param type value translations Assign DEVLINK_PARAM_TYPE_* enum values to DEVLINK_VAR_ATTR_TYPE_* to ensure the same values are used internally and in UAPI. Benefit from that by removing the value translations. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20250505114513.53370-4-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index b8783126c1ed..0091f23a40f7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -420,11 +420,11 @@ typedef u64 devlink_resource_occ_get_t(void *priv); #define __DEVLINK_PARAM_MAX_STRING_VALUE 32 enum devlink_param_type { - DEVLINK_PARAM_TYPE_U8, - DEVLINK_PARAM_TYPE_U16, - DEVLINK_PARAM_TYPE_U32, - DEVLINK_PARAM_TYPE_STRING, - DEVLINK_PARAM_TYPE_BOOL, + DEVLINK_PARAM_TYPE_U8 = DEVLINK_VAR_ATTR_TYPE_U8, + DEVLINK_PARAM_TYPE_U16 = DEVLINK_VAR_ATTR_TYPE_U16, + DEVLINK_PARAM_TYPE_U32 = DEVLINK_VAR_ATTR_TYPE_U32, + DEVLINK_PARAM_TYPE_STRING = DEVLINK_VAR_ATTR_TYPE_STRING, + DEVLINK_PARAM_TYPE_BOOL = DEVLINK_VAR_ATTR_TYPE_FLAG, }; union devlink_param_value { -- cgit v1.2.3 From a1ceb831417b8e23bd041d3e7021e235fa845128 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 Apr 2025 08:55:41 +0200 Subject: genirq/manage: Rework can_request_irq() Use the new guards to get and lock the interrupt descriptor and tidy up the code. Make the return value boolean to reflect it's meaning. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250429065422.187250840@linutronix.de --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index dd5df1e2d032..df93b238b8bf 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -700,7 +700,7 @@ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); extern int noirqdebug_setup(char *str); /* Checks whether the interrupt can be requested by request_irq(): */ -extern int can_request_irq(unsigned int irq, unsigned long irqflags); +extern bool can_request_irq(unsigned int irq, unsigned long irqflags); /* Dummy irq-chip implementations: */ extern struct irq_chip no_irq_chip; -- cgit v1.2.3 From 66db876162155c1cec87359cd78c62aaafde9257 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 7 May 2025 11:21:22 +0200 Subject: bus: firewall: Fix missing static inline annotations for stubs Stubs in the header file for !CONFIG_STM32_FIREWALL case should be both static and inline, because they do not come with earlier declaration and should be inlined in every unit including the header. Cc: Patrice Chotard Cc: stable@vger.kernel.org Fixes: 5c9668cfc6d7 ("firewall: introduce stm32_firewall framework") Link: https://lore.kernel.org/r/20250507092121.95121-2-krzysztof.kozlowski@linaro.org Signed-off-by: Krzysztof Kozlowski --- include/linux/bus/stm32_firewall_device.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/bus/stm32_firewall_device.h b/include/linux/bus/stm32_firewall_device.h index 5178b72bc920..eaa7a3f54450 100644 --- a/include/linux/bus/stm32_firewall_device.h +++ b/include/linux/bus/stm32_firewall_device.h @@ -114,27 +114,30 @@ void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 su #else /* CONFIG_STM32_FIREWALL */ -int stm32_firewall_get_firewall(struct device_node *np, struct stm32_firewall *firewall, - unsigned int nb_firewall) +static inline int stm32_firewall_get_firewall(struct device_node *np, + struct stm32_firewall *firewall, + unsigned int nb_firewall) { return -ENODEV; } -int stm32_firewall_grant_access(struct stm32_firewall *firewall) +static inline int stm32_firewall_grant_access(struct stm32_firewall *firewall) { return -ENODEV; } -void stm32_firewall_release_access(struct stm32_firewall *firewall) +static inline void stm32_firewall_release_access(struct stm32_firewall *firewall) { } -int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id) +static inline int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, + u32 subsystem_id) { return -ENODEV; } -void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id) +static inline void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, + u32 subsystem_id) { } -- cgit v1.2.3 From aefc11550ebd08eadee6d643792c9092de2e472f Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 20 Apr 2025 17:46:56 +0100 Subject: genirq: Remove unused remove_percpu_irq() remove_percpu_irq() has been unused since it was added in 2011 by commit 31d9d9b6d830 ("genirq: Add support for per-cpu dev_id interrupts") Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250420164656.112641-1-linux@treblig.org --- include/linux/irq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index df93b238b8bf..810e44ee297f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -597,7 +597,6 @@ enum { struct irqaction; extern int setup_percpu_irq(unsigned int irq, struct irqaction *new); -extern void remove_percpu_irq(unsigned int irq, struct irqaction *act); #ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE extern void irq_cpu_online(void); -- cgit v1.2.3 From d683a8561889c1813fe2ad6082769c91e3cb71b3 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Wed, 30 Apr 2025 16:27:09 +0000 Subject: ubsan: Remove regs from report_ubsan_failure() report_ubsan_failure() doesn't use argument regs, and soon it will be called from the hypervisor context were regs are not available. So, remove the unused argument. Signed-off-by: Mostafa Saleh Acked-by: Kees Cook Link: https://lore.kernel.org/r/20250430162713.1997569-3-smostafa@google.com Signed-off-by: Marc Zyngier --- include/linux/ubsan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h index d8219cbe09ff..c843816f5f68 100644 --- a/include/linux/ubsan.h +++ b/include/linux/ubsan.h @@ -3,9 +3,9 @@ #define _LINUX_UBSAN_H #ifdef CONFIG_UBSAN_TRAP -const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type); +const char *report_ubsan_failure(u32 check_type); #else -static inline const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type) +static inline const char *report_ubsan_failure(u32 check_type) { return NULL; } -- cgit v1.2.3 From 61b38f7591fb434fce326c1d686a9793c7f418bc Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Wed, 30 Apr 2025 16:27:10 +0000 Subject: KVM: arm64: Introduce CONFIG_UBSAN_KVM_EL2 Add a new Kconfig CONFIG_UBSAN_KVM_EL2 for KVM which enables UBSAN for EL2 code (in protected/nvhe/hvhe) modes. This will re-use the same checks enabled for the kernel for the hypervisor. The only difference is that for EL2 it always emits a "brk" instead of implementing hooks as the hypervisor can't print reports. The KVM code will re-use the same code for the kernel "report_ubsan_failure()" so #ifdefs are changed to also have this code for CONFIG_UBSAN_KVM_EL2 Signed-off-by: Mostafa Saleh Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20250430162713.1997569-4-smostafa@google.com Signed-off-by: Marc Zyngier --- include/linux/ubsan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h index c843816f5f68..3ab8d38aedb8 100644 --- a/include/linux/ubsan.h +++ b/include/linux/ubsan.h @@ -2,7 +2,7 @@ #ifndef _LINUX_UBSAN_H #define _LINUX_UBSAN_H -#ifdef CONFIG_UBSAN_TRAP +#if defined(CONFIG_UBSAN_TRAP) || defined(CONFIG_UBSAN_KVM_EL2) const char *report_ubsan_failure(u32 check_type); #else static inline const char *report_ubsan_failure(u32 check_type) -- cgit v1.2.3 From d75d38dc460452cc8bbca483dee65839e11c71fe Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Wed, 7 May 2025 17:46:15 +0800 Subject: ASoC: tas2781: Add a debugfs node for acoustic tuning "Acoustic Tuning" debugfs node is a bridge to the acoustic tuning tool which can tune the chips' acoustic effect. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250507094616.210-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index eff011444cc8..07ea38fba03b 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -159,10 +159,33 @@ struct calidata { unsigned int cali_dat_sz_per_dev; }; +/* + * To enable CONFIG_SND_SOC_TAS2781_ACOUST_I2C will create a bridge to the + * acoustic tuning tool which can tune the chips' acoustic effect. Due to the + * whole directly exposing the registers, there exist some potential risks. So + * this define is invisible in Kconfig, anyone who wants to use acoustic tool + * have to edit the source manually. + */ +#ifdef CONFIG_SND_SOC_TAS2781_ACOUST_I2C +#define TASDEV_DATA_PAYLOAD_SIZE 128 +struct acoustic_data { + unsigned char len; + unsigned char id; + unsigned char addr; + unsigned char book; + unsigned char page; + unsigned char reg; + unsigned char data[TASDEV_DATA_PAYLOAD_SIZE]; +}; +#endif + struct tasdevice_priv { struct tasdevice tasdevice[TASDEVICE_MAX_CHANNELS]; struct tasdevice_rca rcabin; struct calidata cali_data; +#ifdef CONFIG_SND_SOC_TAS2781_ACOUST_I2C + struct acoustic_data acou_data; +#endif struct tasdevice_fw *fmw; struct gpio_desc *speaker_id; struct gpio_desc *reset; -- cgit v1.2.3 From f31acff017b1d80e649f674450d3b64d3265848c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 7 May 2025 17:25:37 +0800 Subject: block: fix warning on 'make htmldocs' Fix the following warning when running 'make htmldocs': +WARNING: include/linux/blk-mq.h:532 struct member 'update_nr_hwq_lock' not described in 'blk_mq_tag_set' Fixes: 98e68f67020c ("block: prevent adding/deleting disk during updating nr_hw_queues") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20250507163220.00141d77@canb.auug.org.au/ Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250507092537.3009112-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ef84d53095a6..906bf330ffb5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -507,6 +507,9 @@ enum hctx_type { * request_queue.tag_set_list. * @srcu: Use as lock when type of the request queue is blocking * (BLK_MQ_F_BLOCKING). + * @update_nr_hwq_lock: + * Synchronize updating nr_hw_queues with add/del disk & + * switching elevator. */ struct blk_mq_tag_set { const struct blk_mq_ops *ops; -- cgit v1.2.3 From 850e210d5ad21b94b55b97d4d82b4cdeb0bb05df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 May 2025 14:04:25 +0200 Subject: block: add a bio_add_virt_nofail helper Add a helper to add a directly mapped kernel virtual address to a bio so that callers don't have to convert to pages or folios. For now only the _nofail variant is provided as that is what all the obvious callers want. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250507120451.4000627-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index cafc7c215de8..acca7464080c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -417,6 +417,8 @@ void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, size_t off); +void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len); + int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); -- cgit v1.2.3 From 10b1e59cdadabff16fc78eb2ca4c341b1502293c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 May 2025 14:04:26 +0200 Subject: block: add a bdev_rw_virt helper Add a helper to perform synchronous I/O on a kernel direct map range. Currently this is implemented in various places in usually not very efficient ways, so provide a generic helper instead. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250507120451.4000627-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index acca7464080c..ad54e6af20dc 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -402,7 +402,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) struct request_queue; -extern int submit_bio_wait(struct bio *bio); void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, blk_opf_t opf); extern void bio_uninit(struct bio *); @@ -419,6 +418,10 @@ void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, size_t off); void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len); +int submit_bio_wait(struct bio *bio); +int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, + size_t len, enum req_op op); + int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); -- cgit v1.2.3 From 75f88659e47dc570bdebddf77d7a3cd5f0845612 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 May 2025 14:04:27 +0200 Subject: block: add a bio_add_max_vecs helper Add a helper to check how many bio_vecs are needed to add a kernel virtual address range to a bio, accounting for the always contiguous direct mapping and vmalloc mappings that usually need a bio_vec per page sized chunk. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250507120451.4000627-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index ad54e6af20dc..128b1c6ca648 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -418,6 +418,21 @@ void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, size_t off); void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len); +/** + * bio_add_max_vecs - number of bio_vecs needed to add data to a bio + * @kaddr: kernel virtual address to add + * @len: length in bytes to add + * + * Calculate how many bio_vecs need to be allocated to add the kernel virtual + * address range in [@kaddr:@len] in the worse case. + */ +static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len) +{ + if (is_vmalloc_addr(kaddr)) + return DIV_ROUND_UP(offset_in_page(kaddr) + len, PAGE_SIZE); + return 1; +} + int submit_bio_wait(struct bio *bio); int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, size_t len, enum req_op op); -- cgit v1.2.3 From 8dd16f5e34693aa0aa6a4ed48427045008097e64 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 May 2025 14:04:28 +0200 Subject: block: add a bio_add_vmalloc helpers Add a helper to add a vmalloc region to a bio, abstracting away the vmalloc addresses from the underlying pages and another one wrapping it for the simple case where all data fits into a single bio. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250507120451.4000627-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 128b1c6ca648..5d880903bcc5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -433,6 +433,9 @@ static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len) return 1; } +unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len); +bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len); + int submit_bio_wait(struct bio *bio); int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, size_t len, enum req_op op); -- cgit v1.2.3 From af78428ed3f3eebad7be9d0463251046e9582cf6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 May 2025 14:04:29 +0200 Subject: block: remove the q argument from blk_rq_map_kern Remove the q argument from blk_rq_map_kern and the internal helpers called by it as the queue can trivially be derived from the request. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20250507120451.4000627-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 906bf330ffb5..de8c85a03bb7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -1037,8 +1037,8 @@ int blk_rq_map_user_io(struct request *, struct rq_map_data *, int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); int blk_rq_unmap_user(struct bio *); -int blk_rq_map_kern(struct request_queue *, struct request *, void *, - unsigned int, gfp_t); +int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len, + gfp_t gfp); int blk_rq_append_bio(struct request *rq, struct bio *bio); void blk_execute_rq_nowait(struct request *rq, bool at_head); blk_status_t blk_execute_rq(struct request *rq, bool at_head); -- cgit v1.2.3 From b8c7bfb7a0f01521297d688ca5fe1482c81e8910 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 14 Apr 2025 14:30:56 -0400 Subject: irqdomain: Add IRQ_DOMAIN_FLAG_MSI_IMMUTABLE and irq_domain_is_msi_immutable() Add the flag IRQ_DOMAIN_FLAG_MSI_IMMUTABLE and the API function irq_domain_is_msi_immutable() to check if the MSI controller retains an immutable address/data pair during irq_set_affinity(). Ensure compatibility with MSI users like PCIe Endpoint Doorbell, which require the address/data pair to remain unchanged after setup. Use this function to verify if the MSI controller is immutable. Signed-off-by: Frank Li Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250414-ep-msi-v18-2-f69b49917464@nxp.com --- include/linux/irqdomain.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index bb7111105296..bccfff5bebe2 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -231,6 +231,9 @@ enum { /* Irq domain must destroy generic chips when removed */ IRQ_DOMAIN_FLAG_DESTROY_GC = (1 << 10), + /* Address and data pair is mutable when irq_set_affinity() */ + IRQ_DOMAIN_FLAG_MSI_IMMUTABLE = (1 << 11), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -691,6 +694,10 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) return domain->flags & IRQ_DOMAIN_FLAG_MSI_DEVICE; } +static inline bool irq_domain_is_msi_immutable(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI_IMMUTABLE; +} #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) -- cgit v1.2.3 From 1b4bb451f3adeb7e5fb86c09cd83609638964b68 Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Thu, 3 Apr 2025 11:44:23 +0200 Subject: dt-bindings: clock: thead: Add TH1520 VO clock controller Add device tree bindings for the TH1520 Video Output (VO) subsystem clock controller. The VO sub-system manages clock gates for multimedia components including HDMI, MIPI, and GPU. Document the VIDEO_PLL requirements for the VO clock controller, which receives its input from the AP clock controller. The VIDEO_PLL is a Silicon Creations Sigma-Delta (integer) PLL typically running at 792 MHz with maximum FOUTVCO of 2376 MHz. This binding complements the existing AP sub-system clock controller which manages CPU, DPU, GMAC and TEE PLLs. Reviewed-by: Krzysztof Kozlowski Acked-by: Conor Dooley Signed-off-by: Michal Wilczynski Reviewed-by: Drew Fustini Signed-off-by: Drew Fustini --- include/dt-bindings/clock/thead,th1520-clk-ap.h | 34 +++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/thead,th1520-clk-ap.h b/include/dt-bindings/clock/thead,th1520-clk-ap.h index a199784b3512..09a9aa7b3ab1 100644 --- a/include/dt-bindings/clock/thead,th1520-clk-ap.h +++ b/include/dt-bindings/clock/thead,th1520-clk-ap.h @@ -93,4 +93,38 @@ #define CLK_SRAM3 83 #define CLK_PLL_GMAC_100M 84 #define CLK_UART_SCLK 85 + +/* VO clocks */ +#define CLK_AXI4_VO_ACLK 0 +#define CLK_GPU_MEM 1 +#define CLK_GPU_CORE 2 +#define CLK_GPU_CFG_ACLK 3 +#define CLK_DPU_PIXELCLK0 4 +#define CLK_DPU_PIXELCLK1 5 +#define CLK_DPU_HCLK 6 +#define CLK_DPU_ACLK 7 +#define CLK_DPU_CCLK 8 +#define CLK_HDMI_SFR 9 +#define CLK_HDMI_PCLK 10 +#define CLK_HDMI_CEC 11 +#define CLK_MIPI_DSI0_PCLK 12 +#define CLK_MIPI_DSI1_PCLK 13 +#define CLK_MIPI_DSI0_CFG 14 +#define CLK_MIPI_DSI1_CFG 15 +#define CLK_MIPI_DSI0_REFCLK 16 +#define CLK_MIPI_DSI1_REFCLK 17 +#define CLK_HDMI_I2S 18 +#define CLK_X2H_DPU1_ACLK 19 +#define CLK_X2H_DPU_ACLK 20 +#define CLK_AXI4_VO_PCLK 21 +#define CLK_IOPMP_VOSYS_DPU_PCLK 22 +#define CLK_IOPMP_VOSYS_DPU1_PCLK 23 +#define CLK_IOPMP_VOSYS_GPU_PCLK 24 +#define CLK_IOPMP_DPU1_ACLK 25 +#define CLK_IOPMP_DPU_ACLK 26 +#define CLK_IOPMP_GPU_ACLK 27 +#define CLK_MIPIDSI0_PIXCLK 28 +#define CLK_MIPIDSI1_PIXCLK 29 +#define CLK_HDMI_PIXCLK 30 + #endif -- cgit v1.2.3 From f42c8556a0690802246dc588ae9c18184f71d8f5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 May 2025 22:34:31 +0200 Subject: cpufreq/sched: schedutil: Add helper for governor checks Add a helper for checking if schedutil is the current governor for a given cpufreq policy and use it in sched_is_eas_possible() to avoid accessing cpufreq policy internals directly from there. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Tested-by: Christian Loehle Reviewed-by: Dietmar Eggemann Link: https://patch.msgid.link/3365956.44csPzL39Z@rjwysocki.net --- include/linux/cpufreq.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7bb760031dd7..1d2c6c6d8952 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -650,6 +650,15 @@ module_exit(__governor##_exit) struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); +#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL +bool sugov_is_governor(struct cpufreq_policy *policy); +#else +static inline bool sugov_is_governor(struct cpufreq_policy *policy) +{ + return false; +} +#endif + static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy) { if (policy->max < policy->cur) -- cgit v1.2.3 From 4854649b1fb43968615e0374d9d59580093ac67f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 May 2025 22:37:15 +0200 Subject: cpufreq/sched: Move cpufreq-specific EAS checks to cpufreq Doing cpufreq-specific EAS checks that require accessing policy internals directly from sched_is_eas_possible() is a bit unfortunate, so introduce cpufreq_ready_for_eas() in cpufreq, move those checks into that new function and make sched_is_eas_possible() call it. While at it, address a possible race between the EAS governor check and governor change by doing the former under the policy rwsem. Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Tested-by: Christian Loehle Reviewed-by: Dietmar Eggemann Link: https://patch.msgid.link/2317800.iZASKD2KPV@rjwysocki.net --- include/linux/cpufreq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1d2c6c6d8952..95f3807c8c55 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1237,6 +1237,8 @@ void cpufreq_generic_init(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table, unsigned int transition_latency); +bool cpufreq_ready_for_eas(const struct cpumask *cpu_mask); + static inline void cpufreq_register_em_with_opp(struct cpufreq_policy *policy) { dev_pm_opp_of_register_em(get_cpu_device(policy->cpu), -- cgit v1.2.3 From 6bceea7a1e076ef9d71b20d8dda2f7dc52bd34d2 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 18 Apr 2025 19:55:03 -0700 Subject: arch_topology: Relocate cpu_scale to topology.[h|c] arch_topology.c provides functionality to parse and scale CPU capacity. It also provides a corresponding sysfs interface. Some architectures parse and scale CPU capacity differently as per their own needs. On Intel processors, for instance, it is responsibility of the Intel P-state driver. Relocate the implementation of that interface to a common location in topology.c. Architectures can use the interface and populate it using their own mechanisms. An alternative approach would be to compile arch_topology.c even if not needed only to get this interface. This approach would create duplicated and conflicting functionality and data structures. Signed-off-by: Ricardo Neri Tested-by: Christian Loehle Link: https://patch.msgid.link/20250419025504.9760-2-ricardo.neri-calderon@linux.intel.com Signed-off-by: Rafael J. Wysocki --- include/linux/arch_topology.h | 8 -------- include/linux/topology.h | 9 +++++++++ 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 2222e8b03ff4..d72d6e5aa200 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -14,14 +14,6 @@ int topology_update_cpu_topology(void); struct device_node; bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); -DECLARE_PER_CPU(unsigned long, cpu_scale); - -static inline unsigned long topology_get_cpu_scale(int cpu) -{ - return per_cpu(cpu_scale, cpu); -} - -void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); DECLARE_PER_CPU(unsigned long, capacity_freq_ref); diff --git a/include/linux/topology.h b/include/linux/topology.h index 24e715f0f6d2..cd6b4bdc9cfd 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -332,4 +332,13 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops) !IS_ERR_OR_NULL(mask); \ __hops++) +DECLARE_PER_CPU(unsigned long, cpu_scale); + +static inline unsigned long topology_get_cpu_scale(int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); + #endif /* _LINUX_TOPOLOGY_H */ -- cgit v1.2.3 From 5d894321c49e61379189b0ff605f316e39cbd1e9 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 7 May 2025 14:18:21 -0700 Subject: fs: add atomic write unit max opt to statx XFS will be able to support large atomic writes (atomic write > 1x block) in future. This will be achieved by using different operating methods, depending on the size of the write. Specifically a new method of operation based in FS atomic extent remapping will be supported in addition to the current HW offload-based method. The FS method will generally be appreciably slower performing than the HW-offload method. However the FS method will be typically able to contribute to achieving a larger atomic write unit max limit. XFS will support a hybrid mode, where HW offload method will be used when possible, i.e. HW offload is used when the length of the write is supported, and for other times FS-based atomic writes will be used. As such, there is an atomic write length at which the user may experience appreciably slower performance. Advertise this limit in a new statx field, stx_atomic_write_unit_max_opt. When zero, it means that there is no such performance boundary. Masks STATX{_ATTR}_WRITE_ATOMIC can be used to get this new field. This is ok for older kernels which don't support this new field, as they would report 0 in this field (from zeroing in cp_statx()) already. Furthermore those older kernels don't support large atomic writes - apart from block fops, but there would be consistent performance there for atomic writes in range [unit min, unit max]. Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Acked-by: Darrick J. Wong Signed-off-by: John Garry --- include/linux/fs.h | 3 ++- include/linux/stat.h | 1 + include/uapi/linux/stat.h | 8 ++++++-- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..7b19d8f99aff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3475,7 +3475,8 @@ void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, - unsigned int unit_max); + unsigned int unit_max, + unsigned int unit_max_opt); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); diff --git a/include/linux/stat.h b/include/linux/stat.h index be7496a6a0dd..e3d00e7bb26d 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -57,6 +57,7 @@ struct kstat { u32 dio_read_offset_align; u32 atomic_write_unit_min; u32 atomic_write_unit_max; + u32 atomic_write_unit_max_opt; u32 atomic_write_segments_max; }; diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index f78ee3670dd5..1686861aae20 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -182,8 +182,12 @@ struct statx { /* File offset alignment for direct I/O reads */ __u32 stx_dio_read_offset_align; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ /* 0x100 */ }; -- cgit v1.2.3 From 5017b1b02640234b08ad38a046043f143670dea2 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 18 Mar 2025 20:41:42 -0500 Subject: ipmi: Add a note about the pretimeout callback You can't do IPMI calls from the callback, it's called with locks held. Signed-off-by: Corey Minyard --- include/linux/ipmi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 2f74dd90c271..27cd5980bb27 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -93,7 +93,8 @@ struct ipmi_user_hndl { /* * Called when the interface detects a watchdog pre-timeout. If - * this is NULL, it will be ignored for the user. + * this is NULL, it will be ignored for the user. Note that you + * can't do any IPMI calls from here, it's called with locks held. */ void (*ipmi_watchdog_pretimeout)(void *handler_data); -- cgit v1.2.3 From 6f7f6605c9aeb472b5d4101b263b1fc5dc3cf623 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 22 Apr 2025 12:06:02 -0500 Subject: ipmi:msghandler: Export and fix panic messaging capability Don't have the other users that do things at panic time (the watchdog) do all this themselves, provide a function to do it. Also, with the new design where most stuff happens at thread context, a few things needed to be fixed to avoid doing locking in a panic context. Signed-off-by: Corey Minyard --- include/linux/ipmi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 27cd5980bb27..7da6602eab71 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -344,4 +344,14 @@ extern int ipmi_get_smi_info(int if_num, struct ipmi_smi_info *data); /* Helper function for computing the IPMB checksum of some data. */ unsigned char ipmb_checksum(unsigned char *data, int size); +/* + * For things that must send messages at panic time, like the IPMI watchdog + * driver that extends the reset time on a panic, use this to send messages + * from panic context. Note that this puts the driver into a mode that + * only works at panic time, so only use it then. + */ +void ipmi_panic_request_and_wait(struct ipmi_user *user, + struct ipmi_addr *addr, + struct kernel_ipmi_msg *msg); + #endif /* __LINUX_IPMI_H */ -- cgit v1.2.3 From 845b997761a40cd028d1a04a66b34f0871318f28 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 5 May 2025 02:10:37 +0100 Subject: ALSA: hda: Remove unused snd_hdac_stream_get_spbmaxfifo snd_hdac_stream_get_spbmaxfifo() was originally added in 2015 in commit ee8bc4df1b5a ("ALSA: hdac: Add support to enable SPIB for hdac ext stream") when it was originally called snd_hdac_ext_stream_set_spbmaxfifo, it was renamed snd_hdac_ext_stream_get_spbmaxfifo shortly after and was finally renamed to snd_hdac_stream_get_spbmaxfifo in 2022. But it was never used. Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://patch.msgid.link/20250505011037.340592-1-linux@treblig.org Signed-off-by: Takashi Iwai --- include/sound/hdaudio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index b098ceadbe74..6257152fa0ce 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -598,8 +598,6 @@ void snd_hdac_stream_spbcap_enable(struct hdac_bus *chip, bool enable, int index); int snd_hdac_stream_set_spib(struct hdac_bus *bus, struct hdac_stream *azx_dev, u32 value); -int snd_hdac_stream_get_spbmaxfifo(struct hdac_bus *bus, - struct hdac_stream *azx_dev); void snd_hdac_stream_drsm_enable(struct hdac_bus *bus, bool enable, int index); int snd_hdac_stream_wait_drsm(struct hdac_stream *azx_dev); -- cgit v1.2.3 From 5d51fdd122d7dcb1e1f7ef8ccd13d258bfd0b6b8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 8 May 2025 01:02:25 +0100 Subject: ALSA: gus: Remove deadcode snd_gus_use_dec(), snd_gus_use_inc() and snd_gf1_print_voice_registers() last uses were removed in 2007 by commit e5723b41abe5 ("[ALSA] Remove sequencer instrument layer") Remove them. While there, remove big #if 0 blocks next to the code being deleted. Signed-off-by: Dr. David Alan Gilbert Link: https://patch.msgid.link/20250508000225.195766-1-linux@treblig.org Signed-off-by: Takashi Iwai --- include/sound/gus.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/sound/gus.h b/include/sound/gus.h index cd8da68cab92..bdd60ffe15c2 100644 --- a/include/sound/gus.h +++ b/include/sound/gus.h @@ -578,14 +578,8 @@ int snd_gf1_new_mixer(struct snd_gus_card * gus); int snd_gf1_pcm_new(struct snd_gus_card *gus, int pcm_dev, int control_index); -#ifdef CONFIG_SND_DEBUG -extern void snd_gf1_print_voice_registers(struct snd_gus_card * gus); -#endif - /* gus.c */ -int snd_gus_use_inc(struct snd_gus_card * gus); -void snd_gus_use_dec(struct snd_gus_card * gus); int snd_gus_create(struct snd_card *card, unsigned long port, int irq, int dma1, int dma2, -- cgit v1.2.3 From 59529bbe642de4eb2191a541d9b4bae7eb73862e Mon Sep 17 00:00:00 2001 From: Huang Yiwei Date: Wed, 7 May 2025 12:57:57 +0800 Subject: firmware: SDEI: Allow sdei initialization without ACPI_APEI_GHES SDEI usually initialize with the ACPI table, but on platforms where ACPI is not used, the SDEI feature can still be used to handle specific firmware calls or other customized purposes. Therefore, it is not necessary for ARM_SDE_INTERFACE to depend on ACPI_APEI_GHES. In commit dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in acpi_init()"), to make APEI ready earlier, sdei_init was moved into acpi_ghes_init instead of being a standalone initcall, adding ACPI_APEI_GHES dependency to ARM_SDE_INTERFACE. This restricts the flexibility and usability of SDEI. This patch corrects the dependency in Kconfig and splits sdei_init() into two separate functions: sdei_init() and acpi_sdei_init(). sdei_init() will be called by arch_initcall and will only initialize the platform driver, while acpi_sdei_init() will initialize the device from acpi_ghes_init() when ACPI is ready. This allows the initialization of SDEI without ACPI_APEI_GHES enabled. Fixes: dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in apci_init()") Cc: Shuai Xue Signed-off-by: Huang Yiwei Reviewed-by: Shuai Xue Reviewed-by: Gavin Shan Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20250507045757.2658795-1-quic_hyiwei@quicinc.com Signed-off-by: Will Deacon --- include/linux/arm_sdei.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 255701e1251b..f652a5028b59 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -46,12 +46,12 @@ int sdei_unregister_ghes(struct ghes *ghes); /* For use by arch code when CPU hotplug notifiers are not appropriate. */ int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); -void __init sdei_init(void); +void __init acpi_sdei_init(void); void sdei_handler_abort(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } -static inline void sdei_init(void) { } +static inline void acpi_sdei_init(void) { } static inline void sdei_handler_abort(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */ -- cgit v1.2.3 From 812bca7f7e73778a7022f862dc8c7c7d38b9a760 Mon Sep 17 00:00:00 2001 From: Xi Pardee Date: Fri, 25 Apr 2025 12:52:29 -0700 Subject: platform/x86:intel/vsec: Change return type of intel_vsec_register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change return type of intel_vsec_register() to int. The current implementation does not indicate if the register fail or not. Change to return error code if it fails or if INTEL_VSEC config is not set. This is a preparation step to introduce a new SSRAM Telemetry driver that will be using this API. Signed-off-by: Xi Pardee Link: https://lore.kernel.org/r/20250425195237.493129-2-xi.pardee@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/intel_vsec.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h index b94beab64610..bc95821f1bfb 100644 --- a/include/linux/intel_vsec.h +++ b/include/linux/intel_vsec.h @@ -139,12 +139,13 @@ static inline struct intel_vsec_device *auxdev_to_ivdev(struct auxiliary_device } #if IS_ENABLED(CONFIG_INTEL_VSEC) -void intel_vsec_register(struct pci_dev *pdev, +int intel_vsec_register(struct pci_dev *pdev, struct intel_vsec_platform_info *info); #else -static inline void intel_vsec_register(struct pci_dev *pdev, +static inline int intel_vsec_register(struct pci_dev *pdev, struct intel_vsec_platform_info *info) { + return -ENODEV; } #endif #endif -- cgit v1.2.3 From feea7bd6b02d43a794e3f065650d89cf8d8e8e59 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Sun, 23 Mar 2025 15:34:21 +1300 Subject: platform/x86: asus-wmi: Refactor Ally suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust how the CSEE direct call hack is used. The results of months of testing combined with help from ASUS to determine the actual cause of suspend issues has resulted in this refactoring which immensely improves the reliability for devices which do not have the following minimum MCU FW version: - ROG Ally X: 313 - ROG Ally 1: 319 For MCU FW versions that match the minimum or above the CSEE hack is disabled and mcu_powersave set to on by default as there are no negatives beyond a slightly slower device reinitialization due to the MCU being powered off. As this is set only at module load time, it is still possible for mcu_powersave sysfs attributes to change it at runtime if so desired. Signed-off-by: Luke D. Jones Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250323023421.78012-3-luke@ljones.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/asus-wmi.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 783e2a336861..8a515179113d 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -157,9 +157,28 @@ #define ASUS_WMI_DSTS_MAX_BRIGTH_MASK 0x0000FF00 #define ASUS_WMI_DSTS_LIGHTBAR_MASK 0x0000000F +enum asus_ally_mcu_hack { + ASUS_WMI_ALLY_MCU_HACK_INIT, + ASUS_WMI_ALLY_MCU_HACK_ENABLED, + ASUS_WMI_ALLY_MCU_HACK_DISABLED, +}; + #if IS_REACHABLE(CONFIG_ASUS_WMI) +void set_ally_mcu_hack(enum asus_ally_mcu_hack status); +void set_ally_mcu_powersave(bool enabled); +int asus_wmi_set_devstate(u32 dev_id, u32 ctrl_param, u32 *retval); int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval); #else +static inline void set_ally_mcu_hack(enum asus_ally_mcu_hack status) +{ +} +static inline void set_ally_mcu_powersave(bool enabled) +{ +} +static inline int asus_wmi_set_devstate(u32 dev_id, u32 ctrl_param, u32 *retval) +{ + return -ENODEV; +} static inline int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval) { -- cgit v1.2.3 From d6644d737bec473a38dbd44a71553cacd636a920 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 6 May 2025 09:35:30 -0700 Subject: platform/x86: ISST: Support SST-PP revision 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SST PP revision 2 added fabric 1 P0, P1 and Pm frequencies. Export them by using a new IOCTL ISST_IF_GET_PERF_LEVEL_FABRIC_INFO. This IOCTL requires platforms with SST PP revision 2 or higher. To accommodate potential future increases in fabric count and avoid ABI changes, support is extended for up to 8 fabrics. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20250506163531.1061185-3-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/uapi/linux/isst_if.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/isst_if.h b/include/uapi/linux/isst_if.h index 0df1a1c3caf4..8197a4800604 100644 --- a/include/uapi/linux/isst_if.h +++ b/include/uapi/linux/isst_if.h @@ -375,6 +375,30 @@ struct isst_perf_level_data_info { __u16 trl_freq_mhz[TRL_MAX_LEVELS][TRL_MAX_BUCKETS]; }; +#define MAX_FABRIC_COUNT 8 + +/** + * struct isst_perf_level_fabric_info - Structure to get SST-PP fabric details + * @socket_id: Socket/package id + * @power_domain_id: Power Domain id + * @level: SST-PP level for which caller wants to get information + * @max_fabrics: Count of fabrics in resonse + * @p0_fabric_freq_mhz: Fabric (Uncore) maximum frequency + * @p1_fabric_freq_mhz: Fabric (Uncore) TDP frequency + * @pm_fabric_freq_mhz: Fabric (Uncore) minimum frequency + * + * Structure used to get information on frequencies for fabrics. + */ +struct isst_perf_level_fabric_info { + __u8 socket_id; + __u8 power_domain_id; + __u16 level; + __u16 max_fabrics; + __u16 p0_fabric_freq_mhz[MAX_FABRIC_COUNT]; + __u16 p1_fabric_freq_mhz[MAX_FABRIC_COUNT]; + __u16 pm_fabric_freq_mhz[MAX_FABRIC_COUNT]; +}; + /** * struct isst_perf_level_cpu_mask - Structure to get SST-PP level CPU mask * @socket_id: Socket/package id @@ -471,5 +495,7 @@ struct isst_turbo_freq_info { #define ISST_IF_GET_BASE_FREQ_INFO _IOR(ISST_IF_MAGIC, 14, struct isst_base_freq_info *) #define ISST_IF_GET_BASE_FREQ_CPU_MASK _IOR(ISST_IF_MAGIC, 15, struct isst_perf_level_cpu_mask *) #define ISST_IF_GET_TURBO_FREQ_INFO _IOR(ISST_IF_MAGIC, 16, struct isst_turbo_freq_info *) +#define ISST_IF_GET_PERF_LEVEL_FABRIC_INFO _IOR(ISST_IF_MAGIC, 17,\ + struct isst_perf_level_fabric_info *) #endif -- cgit v1.2.3 From 9950f94e485908fc42e7bb2533aff13b7e97a36c Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 5 May 2025 16:25:58 +0100 Subject: platform/x86/sony-laptop: Remove unused sony laptop camera code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ba47652ba655 ("media: meye: remove this deprecated driver") removed the meye driver but left behind the code in sony-laptop.c which that driver used to call. Remove the sony_pic_camera_command() function, and the set of defines (SONY_PIC_COMMAND_*) in a header used for the interface and the static helpers it called. Cleanup remaining #defines. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250505152558.40526-1-linux@treblig.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/sony-laptop.h | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 include/linux/sony-laptop.h (limited to 'include') diff --git a/include/linux/sony-laptop.h b/include/linux/sony-laptop.h deleted file mode 100644 index 1e3c92feea6e..000000000000 --- a/include/linux/sony-laptop.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SONYLAPTOP_H_ -#define _SONYLAPTOP_H_ - -#include - -#ifdef __KERNEL__ - -/* used only for communication between v4l and sony-laptop */ - -#define SONY_PIC_COMMAND_GETCAMERA 1 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERA 2 -#define SONY_PIC_COMMAND_GETCAMERABRIGHTNESS 3 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERABRIGHTNESS 4 -#define SONY_PIC_COMMAND_GETCAMERACONTRAST 5 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERACONTRAST 6 -#define SONY_PIC_COMMAND_GETCAMERAHUE 7 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERAHUE 8 -#define SONY_PIC_COMMAND_GETCAMERACOLOR 9 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERACOLOR 10 -#define SONY_PIC_COMMAND_GETCAMERASHARPNESS 11 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERASHARPNESS 12 -#define SONY_PIC_COMMAND_GETCAMERAPICTURE 13 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERAPICTURE 14 -#define SONY_PIC_COMMAND_GETCAMERAAGC 15 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERAAGC 16 -#define SONY_PIC_COMMAND_GETCAMERADIRECTION 17 /* obsolete */ -#define SONY_PIC_COMMAND_GETCAMERAROMVERSION 18 /* obsolete */ -#define SONY_PIC_COMMAND_GETCAMERAREVISION 19 /* obsolete */ - -#if IS_ENABLED(CONFIG_SONY_LAPTOP) -int sony_pic_camera_command(int command, u8 value); -#else -static inline int sony_pic_camera_command(int command, u8 value) { return 0; } -#endif - -#endif /* __KERNEL__ */ - -#endif /* _SONYLAPTOP_H_ */ -- cgit v1.2.3 From 88cefd99ee265b18f851b911a75282146ecabc3d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Apr 2025 15:38:30 -0400 Subject: ftrace: Show subops in enabled_functions The function graph infrastructure uses subops of the function tracer. These are not shown in enabled_functions. Add a "subops:" section to the enabled_functions line to show what functions are attached via subops. If the subops is from the function_graph infrastructure, then show the entry and return callbacks that are attached. Here's an example of the output: schedule_on_each_cpu (1) tramp: 0xffffffffc03ef000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 subops: {ent:trace_graph_entry+0x0/0x20 ret:trace_graph_return+0x0/0x150} Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250410153830.5d97f108@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fbabc3d848b3..fc939ca2ff66 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -328,6 +328,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * DIRECT - Used by the direct ftrace_ops helper for direct functions * (internal ftrace only, should not be used by others) * SUBOP - Is controlled by another op in field managed. + * GRAPH - Is a component of the fgraph_ops structure */ enum { FTRACE_OPS_FL_ENABLED = BIT(0), @@ -349,6 +350,7 @@ enum { FTRACE_OPS_FL_PERMANENT = BIT(16), FTRACE_OPS_FL_DIRECT = BIT(17), FTRACE_OPS_FL_SUBOP = BIT(18), + FTRACE_OPS_FL_GRAPH = BIT(19), }; #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS -- cgit v1.2.3 From c82b6357a5465a3222780ac5d3edcdfb02208cc3 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 30 Apr 2025 15:07:03 -0400 Subject: Bluetooth: hci_event: Fix not using key encryption size when its known This fixes the regression introduced by 50c1241e6a8a ("Bluetooth: l2cap: Check encryption key size on incoming connection") introduced a check for l2cap_check_enc_key_size which checks for hcon->enc_key_size which may not be initialized if HCI_OP_READ_ENC_KEY_SIZE is still pending. If the key encryption size is known, due previously reading it using HCI_OP_READ_ENC_KEY_SIZE, then store it as part of link_key/smp_ltk structures so the next time the encryption is changed their values are used as conn->enc_key_size thus avoiding the racing against HCI_OP_READ_ENC_KEY_SIZE. Now that the enc_size is stored as part of key the information the code then attempts to check that there is no downgrade of security if HCI_OP_READ_ENC_KEY_SIZE returns a value smaller than what has been previously stored. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220061 Link: https://bugzilla.kernel.org/show_bug.cgi?id=220063 Fixes: 522e9ed157e3 ("Bluetooth: l2cap: Check encryption key size on incoming connection") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 522d837a23fa..54bfeeaa0995 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1798,6 +1798,7 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, void hci_uuids_clear(struct hci_dev *hdev); void hci_link_keys_clear(struct hci_dev *hdev); +u8 *hci_conn_key_enc_size(struct hci_conn *conn); struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr); struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, bdaddr_t *bdaddr, u8 *val, u8 type, -- cgit v1.2.3 From 53eddae9af0c0b46f9c77a02d23c21c1aa824739 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 May 2025 20:47:32 +0200 Subject: platform/x86: int3472: Move common.h to public includes, symbols to INTEL_INT3472 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the common.h header file to include/linux/platform_data/x86/int3472.h and add a "INTEL_INT3472" kernel-symbol-namespace to the exported symbols. This is a preparation patch for exporting some more symbols for re-use in the atomisp driver. Signed-off-by: Hans de Goede Reviewed-by: Sakari Ailus Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250507184737.154747-2-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/int3472.h | 164 ++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 include/linux/platform_data/x86/int3472.h (limited to 'include') diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h new file mode 100644 index 000000000000..4cf02df6f753 --- /dev/null +++ b/include/linux/platform_data/x86/int3472.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Intel INT3472 ACPI camera sensor power-management support + * + * Author: Dan Scally + */ + +#ifndef __PLATFORM_DATA_X86_INT3472_H +#define __PLATFORM_DATA_X86_INT3472_H + +#include +#include +#include +#include +#include +#include + +/* FIXME drop this once the I2C_DEV_NAME_FORMAT macro has been added to include/linux/i2c.h */ +#ifndef I2C_DEV_NAME_FORMAT +#define I2C_DEV_NAME_FORMAT "i2c-%s" +#endif + +/* PMIC GPIO Types */ +#define INT3472_GPIO_TYPE_RESET 0x00 +#define INT3472_GPIO_TYPE_POWERDOWN 0x01 +#define INT3472_GPIO_TYPE_POWER_ENABLE 0x0b +#define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c +#define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d +#define INT3472_GPIO_TYPE_HANDSHAKE 0x12 + +#define INT3472_PDEV_MAX_NAME_LEN 23 +#define INT3472_MAX_SENSOR_GPIOS 3 +#define INT3472_MAX_REGULATORS 3 + +/* E.g. "avdd\0" */ +#define GPIO_SUPPLY_NAME_LENGTH 5 +/* 12 chars for acpi_dev_name() + "-", e.g. "ABCD1234:00-" */ +#define GPIO_REGULATOR_NAME_LENGTH (12 + GPIO_SUPPLY_NAME_LENGTH) +/* lower- and upper-case mapping */ +#define GPIO_REGULATOR_SUPPLY_MAP_COUNT 2 +/* + * Ensure the GPIO is driven low/high for at least 2 ms before changing. + * + * 2 ms has been chosen because it is the minimum time ovXXXX sensors need to + * have their reset line driven logical high to properly register a reset. + */ +#define GPIO_REGULATOR_ENABLE_TIME (2 * USEC_PER_MSEC) +#define GPIO_REGULATOR_OFF_ON_DELAY (2 * USEC_PER_MSEC) + +#define INT3472_LED_MAX_NAME_LEN 32 + +#define CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET 86 + +#define INT3472_REGULATOR(_name, _ops, _enable_time, _off_on_delay) \ + (const struct regulator_desc) { \ + .name = _name, \ + .type = REGULATOR_VOLTAGE, \ + .ops = _ops, \ + .owner = THIS_MODULE, \ + .enable_time = _enable_time, \ + .off_on_delay = _off_on_delay, \ + } + +#define to_int3472_clk(hw) \ + container_of(hw, struct int3472_clock, clk_hw) + +#define to_int3472_device(clk) \ + container_of(clk, struct int3472_discrete_device, clock) + +struct acpi_device; +struct dmi_system_id; +struct i2c_client; +struct platform_device; + +struct int3472_cldb { + u8 version; + /* + * control logic type + * 0: UNKNOWN + * 1: DISCRETE(CRD-D) + * 2: PMIC TPS68470 + * 3: PMIC uP6641 + */ + u8 control_logic_type; + u8 control_logic_id; + u8 sensor_card_sku; + u8 reserved[10]; + u8 clock_source; + u8 reserved2[17]; +}; + +struct int3472_discrete_quirks { + /* For models where AVDD GPIO is shared between sensors */ + const char *avdd_second_sensor; +}; + +struct int3472_gpio_regulator { + /* SUPPLY_MAP_COUNT * 2 to make room for second sensor mappings */ + struct regulator_consumer_supply supply_map[GPIO_REGULATOR_SUPPLY_MAP_COUNT * 2]; + char supply_name_upper[GPIO_SUPPLY_NAME_LENGTH]; + char regulator_name[GPIO_REGULATOR_NAME_LENGTH]; + struct regulator_dev *rdev; + struct regulator_desc rdesc; +}; + +struct int3472_discrete_device { + struct acpi_device *adev; + struct device *dev; + struct acpi_device *sensor; + const char *sensor_name; + + const struct int3472_sensor_config *sensor_config; + + struct int3472_gpio_regulator regulators[INT3472_MAX_REGULATORS]; + + struct int3472_clock { + struct clk *clk; + struct clk_hw clk_hw; + struct clk_lookup *cl; + struct gpio_desc *ena_gpio; + u32 frequency; + u8 imgclk_index; + } clock; + + struct int3472_pled { + struct led_classdev classdev; + struct led_lookup_data lookup; + char name[INT3472_LED_MAX_NAME_LEN]; + struct gpio_desc *gpio; + } pled; + + struct int3472_discrete_quirks quirks; + + unsigned int ngpios; /* how many GPIOs have we seen */ + unsigned int n_sensor_gpios; /* how many have we mapped to sensor */ + unsigned int n_regulator_gpios; /* how many have we mapped to a regulator */ + struct gpiod_lookup_table gpios; +}; + +extern const struct dmi_system_id skl_int3472_discrete_quirks[]; + +union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev, + char *id); +int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb); +int skl_int3472_get_sensor_adev_and_name(struct device *dev, + struct acpi_device **sensor_adev_ret, + const char **name_ret); + +int skl_int3472_register_gpio_clock(struct int3472_discrete_device *int3472, + struct gpio_desc *gpio); +int skl_int3472_register_dsm_clock(struct int3472_discrete_device *int3472); +void skl_int3472_unregister_clock(struct int3472_discrete_device *int3472); + +int skl_int3472_register_regulator(struct int3472_discrete_device *int3472, + struct gpio_desc *gpio, + unsigned int enable_time, + const char *supply_name, + const char *second_sensor); +void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472); + +int skl_int3472_register_pled(struct int3472_discrete_device *int3472, struct gpio_desc *gpio); +void skl_int3472_unregister_pled(struct int3472_discrete_device *int3472); + +#endif -- cgit v1.2.3 From 1e5d088a52c207bcef6a43a6f6ffe162c514ed64 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 May 2025 20:47:33 +0200 Subject: platform/x86: int3472: Stop using devm_gpiod_get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intent is to re-use the INT3472 code for parsing Intel camera sensor GPIOs and mapping them to the sensor for the atomisp driver, which currently has duplicate code. On atomisp devices there is no special INT3472 ACPI device, instead the Intel _DSM to get the GPIO type is part of the ACPI device for the sensor itself. To deal with this the mapping is done from ipu_bridge_init() instead of from a platform-device probe() function, there is no device to tie the lifetime of the gpiod_get() calls done by the INT3472 code to. Switch from devm_gpiod_get() to plain gpiod_get() + explicit gpiod_put() calls, to prepare for the code being re-used in the atomisp driver. Signed-off-by: Hans de Goede Reviewed-by: Sakari Ailus Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250507184737.154747-3-hdegoede@redhat.com Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/int3472.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 4cf02df6f753..0a835cc85c67 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -99,6 +99,7 @@ struct int3472_gpio_regulator { struct regulator_consumer_supply supply_map[GPIO_REGULATOR_SUPPLY_MAP_COUNT * 2]; char supply_name_upper[GPIO_SUPPLY_NAME_LENGTH]; char regulator_name[GPIO_REGULATOR_NAME_LENGTH]; + struct gpio_desc *ena_gpio; struct regulator_dev *rdev; struct regulator_desc rdesc; }; -- cgit v1.2.3 From 1cfa1bb9b4033e51d3c3f5ed5bf55475e57cc686 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 May 2025 20:47:34 +0200 Subject: platform/x86: int3472: Export int3472_discrete_parse_crs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the moment the atomisp has duplicate code for parsing Intel camera sensor GPIOS and calling the special 79234640-9e10-4fea-a5c1-b5aa8b19756f _DSM to get the GPIO type and map it to the sensor. Export int3472_discrete_parse_crs() so that the atomisp driver can reuse the INT3472 code for this. Signed-off-by: Hans de Goede Reviewed-by: Sakari Ailus Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250507184737.154747-4-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/int3472.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 0a835cc85c67..89410f0cb73a 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -147,6 +147,9 @@ int skl_int3472_get_sensor_adev_and_name(struct device *dev, struct acpi_device **sensor_adev_ret, const char **name_ret); +int int3472_discrete_parse_crs(struct int3472_discrete_device *int3472); +void int3472_discrete_cleanup(struct int3472_discrete_device *int3472); + int skl_int3472_register_gpio_clock(struct int3472_discrete_device *int3472, struct gpio_desc *gpio); int skl_int3472_register_dsm_clock(struct int3472_discrete_device *int3472); -- cgit v1.2.3 From 45adb05473aa4afd6ff19fd0c46c17a6294ae788 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 May 2025 20:47:35 +0200 Subject: platform/x86: int3472: Remove unused sensor_config struct member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sensor_config is not used anywhere and its struct int3472_sensor_config type also is not declared anywhere, drop it. Signed-off-by: Hans de Goede Reviewed-by: Sakari Ailus Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250507184737.154747-5-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/int3472.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 89410f0cb73a..78276a11c48d 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -110,8 +110,6 @@ struct int3472_discrete_device { struct acpi_device *sensor; const char *sensor_name; - const struct int3472_sensor_config *sensor_config; - struct int3472_gpio_regulator regulators[INT3472_MAX_REGULATORS]; struct int3472_clock { -- cgit v1.2.3 From 402dd9f02ce447e2253953b5f84a2b62fed00889 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 May 2025 07:14:29 +0200 Subject: f2fs: remove wbc->for_reclaim handling Since commits 7ff0104a8052 ("f2fs: Remove f2fs_write_node_page()") and 3b47398d9861 ("f2fs: Remove f2fs_write_meta_page()'), f2fs can't be called from reclaim context any more. Remove all code keyed of the wbc->for_reclaim flag, which is now only set for writing out swap or shmem pages inside the swap code, but never passed to file systems. Signed-off-by: Christoph Hellwig Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index eb3b2f1326b1..edbbd869078f 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1472,7 +1472,6 @@ TRACE_EVENT(f2fs_writepages, __field(char, for_kupdate) __field(char, for_background) __field(char, tagged_writepages) - __field(char, for_reclaim) __field(char, range_cyclic) __field(char, for_sync) ), @@ -1491,14 +1490,13 @@ TRACE_EVENT(f2fs_writepages, __entry->for_kupdate = wbc->for_kupdate; __entry->for_background = wbc->for_background; __entry->tagged_writepages = wbc->tagged_writepages; - __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; __entry->for_sync = wbc->for_sync; ), TP_printk("dev = (%d,%d), ino = %lu, %s, %s, nr_to_write %ld, " "skipped %ld, start %lld, end %lld, wb_idx %lu, sync_mode %d, " - "kupdate %u background %u tagged %u reclaim %u cyclic %u sync %u", + "kupdate %u background %u tagged %u cyclic %u sync %u", show_dev_ino(__entry), show_block_type(__entry->type), show_file_type(__entry->dir), @@ -1511,7 +1509,6 @@ TRACE_EVENT(f2fs_writepages, __entry->for_kupdate, __entry->for_background, __entry->tagged_writepages, - __entry->for_reclaim, __entry->range_cyclic, __entry->for_sync) ); -- cgit v1.2.3 From 190faecf64c54bb5f9d8f0ea2a6628078d0f2c83 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 22 Apr 2025 15:05:18 -0600 Subject: overflow: Add STACK_FLEX_ARRAY_SIZE() helper Add new STACK_FLEX_ARRAY_SIZE() helper to get the size of a flexible-array member defined using DEFINE_FLEX()/DEFINE_RAW_FLEX() at compile time. This is essentially the same as ARRAY_SIZE() but for on-stack flexible-array members. Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/83d53744e11c80eb3f03765238cbe648855f4168.1745355442.git.gustavoars@kernel.org Signed-off-by: Kees Cook --- include/linux/overflow.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 6ee67c20b575..f33d74dac06f 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -420,6 +420,8 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * flexible array member. * Use __struct_size(@name) to get compile-time size of it afterwards. * Use __member_size(@name->member) to get compile-time size of @name members. + * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of + * elements in array @member. */ #define DEFINE_RAW_FLEX(type, name, member, count) \ _DEFINE_FLEX(type, name, member, count, = {}) @@ -438,8 +440,21 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * flexible array member. * Use __struct_size(@NAME) to get compile-time size of it afterwards. * Use __member_size(@NAME->member) to get compile-time size of @NAME members. + * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of + * elements in array @member. */ #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) +/** + * STACK_FLEX_ARRAY_SIZE() - helper macro for DEFINE_FLEX() family. + * Returns the number of elements in @array. + * + * @name: Name for a variable defined in DEFINE_RAW_FLEX()/DEFINE_FLEX(). + * @array: Name of the array member. + */ +#define STACK_FLEX_ARRAY_SIZE(name, array) \ + (__member_size((name)->array) / sizeof(*(name)->array) + \ + __must_be_array((name)->array)) + #endif /* __LINUX_OVERFLOW_H */ -- cgit v1.2.3 From 47e36ed7840661a9f7fb53554a1b04a5f8daffea Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 1 May 2025 18:44:43 -0600 Subject: overflow: Fix direct struct member initialization in _DEFINE_FLEX() Currently, to statically initialize the struct members of the `type` object created by _DEFINE_FLEX(), the internal `obj` member must be explicitly referenced at the call site. See: struct flex { int a; int b; struct foo flex_array[]; }; _DEFINE_FLEX(struct flex, instance, flex_array, FIXED_SIZE, = { .obj = { .a = 0, .b = 1, }, }); This leaks _DEFINE_FLEX() internal implementation details and make the helper harder to use and read. Fix this and allow for a more natural and intuitive C99 init-style: _DEFINE_FLEX(struct flex, instance, flex_array, FIXED_SIZE, = { .a = 0, .b = 1, }); Note that before these changes, the `initializer` argument was optional, but now it's required. Also, update "counter" member initialization in DEFINE_FLEX(). Fixes: 26dd68d293fd ("overflow: add DEFINE_FLEX() for on-stack allocs") Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/aBQVeyKfLOkO9Yss@kspp Signed-off-by: Kees Cook --- include/linux/overflow.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f33d74dac06f..7b7be27ca113 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -396,7 +396,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * @name: Name for a variable to define. * @member: Name of the array member. * @count: Number of elements in the array; must be compile-time const. - * @initializer: initializer expression (could be empty for no init). + * @initializer: Initializer expression (e.g., pass `= { }` at minimum). */ #define _DEFINE_FLEX(type, name, member, count, initializer...) \ _Static_assert(__builtin_constant_p(count), \ @@ -404,7 +404,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) union { \ u8 bytes[struct_size_t(type, member, count)]; \ type obj; \ - } name##_u initializer; \ + } name##_u = { .obj initializer }; \ type *name = (type *)&name##_u /** @@ -444,7 +444,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * elements in array @member. */ #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ - _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) + _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .COUNTER = COUNT, }) /** * STACK_FLEX_ARRAY_SIZE() - helper macro for DEFINE_FLEX() family. -- cgit v1.2.3 From 0cecd37daef3d57e6656c0023978d5ec2d7409c1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 3 May 2025 11:46:18 -0700 Subject: gcc-plugins: Force full rebuild when plugins change There was no dependency between the plugins changing and the rest of the kernel being built. This could cause strange behaviors as instrumentation could vary between targets depending on when they were built. Generate a new header file, gcc-plugins.h, any time the GCC plugins change. Include the header file in compiler-version.h when its associated feature name, GCC_PLUGINS, is defined. This will be picked up by fixdep and force rebuilds where needed. Add a generic "touch" kbuild command, which will be used again in a following patch. Add a "normalize_path" string helper to make the "TOUCH" output less ugly. Link: https://lore.kernel.org/r/20250503184623.2572355-1-kees@kernel.org Tested-by: Nicolas Schier Reviewed-by: Nicolas Schier Signed-off-by: Kees Cook --- include/linux/compiler-version.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h index 573fa85b6c0c..5dba398a9412 100644 --- a/include/linux/compiler-version.h +++ b/include/linux/compiler-version.h @@ -12,3 +12,14 @@ * and add dependency on include/config/CC_VERSION_TEXT, which is touched * by Kconfig when the version string from the compiler changes. */ + +/* Additional tree-wide dependencies start here. */ + +/* + * If any of the GCC plugins change, we need to rebuild everything that + * was built with them, as they may have changed their behavior and those + * behaviors may need to be synchronized across all translation units. + */ +#ifdef GCC_PLUGINS +#include +#endif -- cgit v1.2.3 From 056000c471ea41db56de58f71b9fe727d6e68b9b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 3 May 2025 11:46:19 -0700 Subject: randstruct: Force full rebuild when seed changes While the randstruct GCC plugin was being rebuilt if the randstruct seed changed, Clang builds did not notice the change. This could result in differing struct layouts in a target depending on when it was built. Include the existing generated header file in compiler-version.h when its associated feature name, RANDSTRUCT, is defined. This will be picked up by fixdep and force rebuilds where needed. Link: https://lore.kernel.org/r/20250503184623.2572355-2-kees@kernel.org Reviewed-by: Nicolas Schier Tested-by: Nicolas Schier Signed-off-by: Kees Cook --- include/linux/compiler-version.h | 9 +++++++++ include/linux/vermagic.h | 1 - 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h index 5dba398a9412..4b9d39b37650 100644 --- a/include/linux/compiler-version.h +++ b/include/linux/compiler-version.h @@ -23,3 +23,12 @@ #ifdef GCC_PLUGINS #include #endif + +/* + * If the randstruct seed itself changes (whether for GCC plugins or + * Clang), the entire tree needs to be rebuilt since the randomization of + * structures may change between compilation units if not. + */ +#ifdef RANDSTRUCT +#include +#endif diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 939ceabcaf06..335c360d4f9b 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -33,7 +33,6 @@ #define MODULE_VERMAGIC_MODVERSIONS "" #endif #ifdef RANDSTRUCT -#include #define MODULE_RANDSTRUCT "RANDSTRUCT_" RANDSTRUCT_HASHED_SEED #else #define MODULE_RANDSTRUCT -- cgit v1.2.3 From 11bb1678e249e51cd748e8f91e5241b3ce71da3a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 3 May 2025 11:46:20 -0700 Subject: integer-wrap: Force full rebuild when .scl file changes Since the integer wrapping sanitizer's behavior depends on its associated .scl file, we must force a full rebuild if the file changes. If not, instrumentation may differ between targets based on when they were built. Generate a new header file, integer-wrap.h, any time the Clang .scl file changes. Include the header file in compiler-version.h when its associated feature name, INTEGER_WRAP, is defined. This will be picked up by fixdep and force rebuilds where needed. Acked-by: Justin Stitt Link: https://lore.kernel.org/r/20250503184623.2572355-3-kees@kernel.org Reviewed-by: Nicolas Schier Signed-off-by: Kees Cook --- include/linux/compiler-version.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h index 4b9d39b37650..ac1665a98a15 100644 --- a/include/linux/compiler-version.h +++ b/include/linux/compiler-version.h @@ -32,3 +32,13 @@ #ifdef RANDSTRUCT #include #endif + +/* + * If any external changes affect Clang's integer wrapping sanitizer + * behavior, a full rebuild is needed as the coverage for wrapping types + * may have changed, which may impact the expected behaviors that should + * not differ between compilation units. + */ +#ifdef INTEGER_WRAP +#include +#endif -- cgit v1.2.3 From e86e43907f945e7f2e48d1c5c9105801ca2b11b7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 7 May 2025 19:53:30 +0200 Subject: timers: Rename init_timer_key() as timer_init_key() Move this API to the canonical timer_*() namespace. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250507175338.672442-3-mingo@kernel.org --- include/linux/timer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 10596d7c3a34..0c1c3aa723e5 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -67,7 +67,7 @@ /* * LOCKDEP and DEBUG timer interfaces. */ -void init_timer_key(struct timer_list *timer, +void timer_init_key(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key); @@ -83,7 +83,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, const char *name, struct lock_class_key *key) { - init_timer_key(timer, func, flags, name, key); + timer_init_key(timer, func, flags, name, key); } #endif @@ -91,7 +91,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define __init_timer(_timer, _fn, _flags) \ do { \ static struct lock_class_key __key; \ - init_timer_key((_timer), (_fn), (_flags), #_timer, &__key);\ + timer_init_key((_timer), (_fn), (_flags), #_timer, &__key);\ } while (0) #define __init_timer_on_stack(_timer, _fn, _flags) \ @@ -102,7 +102,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, } while (0) #else #define __init_timer(_timer, _fn, _flags) \ - init_timer_key((_timer), (_fn), (_flags), NULL, NULL) + timer_init_key((_timer), (_fn), (_flags), NULL, NULL) #define __init_timer_on_stack(_timer, _fn, _flags) \ init_timer_on_stack_key((_timer), (_fn), (_flags), NULL, NULL) #endif -- cgit v1.2.3 From 7879d10de3318af0b7893d1efc9d7654cd4ac1a6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 7 May 2025 19:53:31 +0200 Subject: timers: Rename init_timer_on_stack_key() as timer_init_key_on_stack() Move this API to the canonical timer_*() namespace. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250507175338.672442-4-mingo@kernel.org --- include/linux/timer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 0c1c3aa723e5..31127e8de010 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -72,12 +72,12 @@ void timer_init_key(struct timer_list *timer, const char *name, struct lock_class_key *key); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS -extern void init_timer_on_stack_key(struct timer_list *timer, +extern void timer_init_key_on_stack(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key); #else -static inline void init_timer_on_stack_key(struct timer_list *timer, +static inline void timer_init_key_on_stack(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, @@ -97,14 +97,14 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define __init_timer_on_stack(_timer, _fn, _flags) \ do { \ static struct lock_class_key __key; \ - init_timer_on_stack_key((_timer), (_fn), (_flags), \ + timer_init_key_on_stack((_timer), (_fn), (_flags), \ #_timer, &__key); \ } while (0) #else #define __init_timer(_timer, _fn, _flags) \ timer_init_key((_timer), (_fn), (_flags), NULL, NULL) #define __init_timer_on_stack(_timer, _fn, _flags) \ - init_timer_on_stack_key((_timer), (_fn), (_flags), NULL, NULL) + timer_init_key_on_stack((_timer), (_fn), (_flags), NULL, NULL) #endif /** -- cgit v1.2.3 From 9505215b6b3233d38c5ee65cfd47b6a5a36adab9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 7 May 2025 19:53:32 +0200 Subject: timers: Rename __init_timer() as __timer_init() Move this API to the canonical __timer_*() namespace. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250507175338.672442-5-mingo@kernel.org --- include/linux/timer.h | 6 +++--- include/linux/workqueue.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 31127e8de010..11e1fac18f0e 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -88,7 +88,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer, #endif #ifdef CONFIG_LOCKDEP -#define __init_timer(_timer, _fn, _flags) \ +#define __timer_init(_timer, _fn, _flags) \ do { \ static struct lock_class_key __key; \ timer_init_key((_timer), (_fn), (_flags), #_timer, &__key);\ @@ -101,7 +101,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer, #_timer, &__key); \ } while (0) #else -#define __init_timer(_timer, _fn, _flags) \ +#define __timer_init(_timer, _fn, _flags) \ timer_init_key((_timer), (_fn), (_flags), NULL, NULL) #define __init_timer_on_stack(_timer, _fn, _flags) \ timer_init_key_on_stack((_timer), (_fn), (_flags), NULL, NULL) @@ -118,7 +118,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer, * be used and must be balanced with a call to destroy_timer_on_stack(). */ #define timer_setup(timer, callback, flags) \ - __init_timer((timer), (callback), (flags)) + __timer_init((timer), (callback), (flags)) #define timer_setup_on_stack(timer, callback, flags) \ __init_timer_on_stack((timer), (callback), (flags)) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b0dc957c3e56..985f69f8fb99 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -316,7 +316,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ - __init_timer(&(_work)->timer, \ + __timer_init(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) -- cgit v1.2.3 From 9a716ac6eaaa73599921fa081c99b67bef589171 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 7 May 2025 19:53:33 +0200 Subject: timers: Rename __init_timer_on_stack() as __timer_init_on_stack() Move this API to the canonical __timer_*() namespace. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250507175338.672442-6-mingo@kernel.org --- include/linux/timer.h | 6 +++--- include/linux/workqueue.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 11e1fac18f0e..4e1237ff4b24 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -94,7 +94,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer, timer_init_key((_timer), (_fn), (_flags), #_timer, &__key);\ } while (0) -#define __init_timer_on_stack(_timer, _fn, _flags) \ +#define __timer_init_on_stack(_timer, _fn, _flags) \ do { \ static struct lock_class_key __key; \ timer_init_key_on_stack((_timer), (_fn), (_flags), \ @@ -103,7 +103,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer, #else #define __timer_init(_timer, _fn, _flags) \ timer_init_key((_timer), (_fn), (_flags), NULL, NULL) -#define __init_timer_on_stack(_timer, _fn, _flags) \ +#define __timer_init_on_stack(_timer, _fn, _flags) \ timer_init_key_on_stack((_timer), (_fn), (_flags), NULL, NULL) #endif @@ -121,7 +121,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer, __timer_init((timer), (callback), (flags)) #define timer_setup_on_stack(timer, callback, flags) \ - __init_timer_on_stack((timer), (callback), (flags)) + __timer_init_on_stack((timer), (callback), (flags)) #ifdef CONFIG_DEBUG_OBJECTS_TIMERS extern void destroy_timer_on_stack(struct timer_list *timer); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 985f69f8fb99..c84da78878d9 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -324,7 +324,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags) \ do { \ INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ - __init_timer_on_stack(&(_work)->timer, \ + __timer_init_on_stack(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) -- cgit v1.2.3 From 220beffd36c26ac68e1c646f91b7ddf4f39039e6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 7 May 2025 19:53:34 +0200 Subject: timers: Rename NEXT_TIMER_MAX_DELTA as TIMER_NEXT_MAX_DELTA Move this macro to the canonical TIMER_* namespace. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250507175338.672442-7-mingo@kernel.org --- include/linux/timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 4e1237ff4b24..68cf8e28bbe0 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -156,7 +156,7 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); * The jiffies value which is added to now, when there is no timer * in the timer wheel: */ -#define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) +#define TIMER_NEXT_MAX_DELTA ((1UL << 30) - 1) extern void add_timer(struct timer_list *timer); extern void add_timer_local(struct timer_list *timer); -- cgit v1.2.3 From 751e6a394c2ecd08825d5ba527478e171b87144e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 7 May 2025 19:53:35 +0200 Subject: timers: Rename init_timers() as timers_init() Move this API to the canonical timers_*() namespace. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250507175338.672442-8-mingo@kernel.org --- include/linux/timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 68cf8e28bbe0..153d07dad3cd 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -168,7 +168,7 @@ extern int timer_delete(struct timer_list *timer); extern int timer_shutdown_sync(struct timer_list *timer); extern int timer_shutdown(struct timer_list *timer); -extern void init_timers(void); +extern void timers_init(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); -- cgit v1.2.3 From 367ed4e35734d6e7bce1dbca426a5bf150d76905 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 7 May 2025 19:53:36 +0200 Subject: treewide, timers: Rename try_to_del_timer_sync() as timer_delete_sync_try() Move this API to the canonical timer_*() namespace. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250507175338.672442-9-mingo@kernel.org --- include/linux/timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 153d07dad3cd..5b6ff90fcf81 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -162,7 +162,7 @@ extern void add_timer(struct timer_list *timer); extern void add_timer_local(struct timer_list *timer); extern void add_timer_global(struct timer_list *timer); -extern int try_to_del_timer_sync(struct timer_list *timer); +extern int timer_delete_sync_try(struct timer_list *timer); extern int timer_delete_sync(struct timer_list *timer); extern int timer_delete(struct timer_list *timer); extern int timer_shutdown_sync(struct timer_list *timer); -- cgit v1.2.3 From aad823aa3a7d675a8d0de478a04307f63e3725db Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 7 May 2025 19:53:37 +0200 Subject: treewide, timers: Rename destroy_timer_on_stack() as timer_destroy_on_stack() Move this API to the canonical timer_*() namespace. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250507175338.672442-10-mingo@kernel.org --- include/linux/timer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 5b6ff90fcf81..7b53043a2d25 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -115,7 +115,7 @@ static inline void timer_init_key_on_stack(struct timer_list *timer, * * Regular timer initialization should use either DEFINE_TIMER() above, * or timer_setup(). For timers on the stack, timer_setup_on_stack() must - * be used and must be balanced with a call to destroy_timer_on_stack(). + * be used and must be balanced with a call to timer_destroy_on_stack(). */ #define timer_setup(timer, callback, flags) \ __timer_init((timer), (callback), (flags)) @@ -124,9 +124,9 @@ static inline void timer_init_key_on_stack(struct timer_list *timer, __timer_init_on_stack((timer), (callback), (flags)) #ifdef CONFIG_DEBUG_OBJECTS_TIMERS -extern void destroy_timer_on_stack(struct timer_list *timer); +extern void timer_destroy_on_stack(struct timer_list *timer); #else -static inline void destroy_timer_on_stack(struct timer_list *timer) { } +static inline void timer_destroy_on_stack(struct timer_list *timer) { } #endif #define from_timer(var, callback_timer, timer_fieldname) \ -- cgit v1.2.3 From 8a023e86f3d999007f2687952afe78ef34a6aa91 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Tue, 6 May 2025 09:22:02 +0000 Subject: dt-bindings: clock: Add GRF clock definition for RK3528 These clocks are for SD/SDIO tuning purpose and come with registers in GRF syscon. Signed-off-by: Yao Zi Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250506092206.46143-2-ziyao@disroot.org Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rockchip,rk3528-cru.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/rockchip,rk3528-cru.h b/include/dt-bindings/clock/rockchip,rk3528-cru.h index 55a448f5ed6d..0245a53fc334 100644 --- a/include/dt-bindings/clock/rockchip,rk3528-cru.h +++ b/include/dt-bindings/clock/rockchip,rk3528-cru.h @@ -414,6 +414,12 @@ #define MCLK_I2S2_2CH_SAI_SRC_PRE 402 #define MCLK_I2S3_8CH_SAI_SRC_PRE 403 #define MCLK_SDPDIF_SRC_PRE 404 +#define SCLK_SDMMC_DRV 405 +#define SCLK_SDMMC_SAMPLE 406 +#define SCLK_SDIO0_DRV 407 +#define SCLK_SDIO0_SAMPLE 408 +#define SCLK_SDIO1_DRV 409 +#define SCLK_SDIO1_SAMPLE 410 /* scmi-clocks indices */ #define SCMI_PCLK_KEYREADER 0 -- cgit v1.2.3 From f21923f3f410f84528b5e7bdcbe4afdc6f07010c Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 24 Apr 2025 09:13:54 +0100 Subject: dt-bindings: clock: renesas,r9a09g047-cpg: Add XSPI and GBETH PTP core clocks Add definitions for XSPI core clock and Gigabit Ethernet PTP reference core clocks in the R9A09G047 CPG DT bindings header file. The clk_spi is modelled as a fixed divider clock with parent clk_spix2 and factor two as both parent and child share same gating bit. Signed-off-by: Biju Das Acked-by: Krzysztof Kozlowski Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250424081400.135028-2-biju.das.jz@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a09g047-cpg.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g047-cpg.h b/include/dt-bindings/clock/renesas,r9a09g047-cpg.h index 1d031bf6bf03..a27132f9a6c8 100644 --- a/include/dt-bindings/clock/renesas,r9a09g047-cpg.h +++ b/include/dt-bindings/clock/renesas,r9a09g047-cpg.h @@ -17,5 +17,8 @@ #define R9A09G047_CM33_CLK0 6 #define R9A09G047_CST_0_SWCLKTCK 7 #define R9A09G047_IOTOP_0_SHCLK 8 +#define R9A09G047_SPI_CLK_SPI 9 +#define R9A09G047_GBETH_0_CLK_PTP_REF_I 10 +#define R9A09G047_GBETH_1_CLK_PTP_REF_I 11 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G047_CPG_H__ */ -- cgit v1.2.3 From 6e06b641ca96c232e0b13f9b44b118742986bcd5 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sat, 3 May 2025 22:25:29 +0200 Subject: dt-bindings: clock: rk3036: add SCLK_USB480M clock-id Contrary to how it is implemented right now, the usb480m clock is a controllable mux that can switch between the 24MHz oscillator and the clock output of the usb2phy. Add the needed clock-id to allow setting this mux from DT. Acked-by: Conor Dooley Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250503202532.992033-2-heiko@sntech.de --- include/dt-bindings/clock/rk3036-cru.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/rk3036-cru.h b/include/dt-bindings/clock/rk3036-cru.h index 99cc617e1e54..5cbc0e2b08ff 100644 --- a/include/dt-bindings/clock/rk3036-cru.h +++ b/include/dt-bindings/clock/rk3036-cru.h @@ -47,6 +47,7 @@ #define SCLK_MACREF 152 #define SCLK_MACPLL 153 #define SCLK_SFC 160 +#define SCLK_USB480M 161 /* aclk gates */ #define ACLK_DMAC2 194 -- cgit v1.2.3 From 01475aedfdfa33a5ee3219079426f5743367c624 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 28 Apr 2025 21:34:45 +0200 Subject: futex: Fix outdated comment in struct restart_block Since commit 2070887fdeac ("futex: fix restart in wait_requeue_pi"), futex_wait_requeue_pi() no longer uses restart_block. Update the comment accordingly. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250428193445.4571-1-namcao@linutronix.de --- include/linux/restart_block.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h index 13f17676c5f4..7e50bbc94e47 100644 --- a/include/linux/restart_block.h +++ b/include/linux/restart_block.h @@ -26,7 +26,7 @@ struct restart_block { unsigned long arch_data; long (*fn)(struct restart_block *); union { - /* For futex_wait and futex_wait_requeue_pi */ + /* For futex_wait() */ struct { u32 __user *uaddr; u32 val; -- cgit v1.2.3 From f400565faa50737ac1d550d2c75128c0dad75765 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Apr 2025 18:11:27 +0200 Subject: perf: Remove too early and redundant CPU hotplug handling The CPU hotplug handlers are called twice: at prepare and online stage. Their role is to: 1) Enable/disable a CPU context. This is irrelevant and even buggy at the prepare stage because the CPU is still offline. On early secondary CPU up, creating an event attached to that CPU might silently fail because the CPU context is observed as online but the context installation's IPI failure is ignored. 2) Update the scope cpumasks and re-migrate the events accordingly in the CPU down case. This is irrelevant at the prepare stage. 3) Remove the events attached to the context of the offlining CPU. It even uses an (unnecessary) IPI for it. This is also irrelevant at the prepare stage. Also none of the *_PREPARE and *_STARTING architecture perf related CPU hotplug callbacks rely on CPUHP_PERF_PREPARE. CPUHP_AP_PERF_ONLINE is enough and the right place to perform the work. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250424161128.29176-4-frederic@kernel.org --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 1987400000b4..df366ee15456 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -60,7 +60,6 @@ enum cpuhp_state { /* PREPARE section invoked on a control CPU */ CPUHP_OFFLINE = 0, CPUHP_CREATE_THREADS, - CPUHP_PERF_PREPARE, CPUHP_PERF_X86_PREPARE, CPUHP_PERF_X86_AMD_UNCORE_PREP, CPUHP_PERF_POWER, -- cgit v1.2.3 From 22c64f37e1d4e757b0073a72f1439c2c3509c5cb Mon Sep 17 00:00:00 2001 From: Mohan Kumar G Date: Mon, 5 May 2025 20:58:36 +0530 Subject: wifi: mac80211: Update MCS15 support in link_conf As per IEEE 802.11be-2024 - 9.4.2.321, EHT operation element contains MCS15 Disable subfield as the sixth bit, which is set when MCS15 support is not enabled. Get MCS15 support from EHT operation params and add it in link_conf so that driver can use this value to know if EHT-MCS 15 reception is enabled. Co-developed-by: Dhanavandhana Kannan Signed-off-by: Dhanavandhana Kannan Signed-off-by: Mohan Kumar G Link: https://patch.msgid.link/20250505152836.3266829-1-quic_mkumarg@quicinc.com [remove pointless !! for bool assignment] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + include/net/mac80211.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 17f917cb4540..420c7f9aa6ee 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2325,6 +2325,7 @@ struct ieee80211_eht_cap_elem { #define IEEE80211_EHT_OPER_EHT_DEF_PE_DURATION 0x04 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_LIMIT 0x08 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_EXP_MASK 0x30 +#define IEEE80211_EHT_OPER_MCS15_DISABLE 0x40 /** * struct ieee80211_eht_operation - eht operation element diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fdafc37d17cc..82617579d910 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -744,6 +744,7 @@ struct ieee80211_parsed_tpe { * @eht_80mhz_full_bw_ul_mumimo: in AP-mode, does this BSS support the * reception of an EHT TB PPDU on an RU that spans the entire PPDU * bandwidth + * @eht_disable_mcs15: disable EHT-MCS 15 reception capability. * @bss_param_ch_cnt: in BSS-mode, the BSS params change count. This * information is the latest known value. It can come from this link's * beacon or from a beacon sent by another link. @@ -852,6 +853,8 @@ struct ieee80211_bss_conf { bool eht_su_beamformee; bool eht_mu_beamformer; bool eht_80mhz_full_bw_ul_mumimo; + bool eht_disable_mcs15; + u8 bss_param_ch_cnt; u8 bss_param_ch_cnt_link_id; }; -- cgit v1.2.3 From 56a7b9f8b05981e929becb45a826558779bca6f6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 13 Mar 2025 15:31:50 -0700 Subject: ratelimit: Create functions to handle ratelimit_state internals A number of ratelimit use cases do open-coded access to the ratelimit_state structure's ->missed field. This works, but is a bit messy and makes it more annoying to make changes to this field. Therefore, provide a ratelimit_state_inc_miss() function that increments the ->missed field, a ratelimit_state_get_miss() function that reads out the ->missed field, and a ratelimit_state_reset_miss() function that reads out that field, but that also resets its value to zero. These functions will replace client-code open-coded uses of ->missed. In addition, a new ratelimit_state_reset_interval() function encapsulates what was previously open-coded lock acquisition and direct field updates. [ paulmck: Apply kernel test robot feedback. ] Link: https://lore.kernel.org/all/fbe93a52-365e-47fe-93a4-44a44547d601@paulmck-laptop/ Link: https://lore.kernel.org/all/20250423115409.3425-1-spasswolf@web.de/ Signed-off-by: Paul E. McKenney Reviewed-by: Petr Mladek Cc: Andrew Morton Cc: Kuniyuki Iwashima Cc: Mateusz Guzik Cc: Steven Rostedt Cc: John Ogness Cc: Sergey Senozhatsky --- include/linux/ratelimit.h | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index b17e0cd0a30c..8400c5356c18 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -22,16 +22,46 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs) DEFAULT_RATELIMIT_BURST); } +static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs) +{ + rs->missed++; +} + +static inline int ratelimit_state_get_miss(struct ratelimit_state *rs) +{ + return rs->missed; +} + +static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs) +{ + int ret = rs->missed; + + rs->missed = 0; + return ret; +} + +static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&rs->lock, flags); + rs->interval = interval_init; + rs->begin = 0; + rs->printed = 0; + ratelimit_state_reset_miss(rs); + raw_spin_unlock_irqrestore(&rs->lock, flags); +} + static inline void ratelimit_state_exit(struct ratelimit_state *rs) { + int m; + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) return; - if (rs->missed) { - pr_warn("%s: %d output lines suppressed due to ratelimiting\n", - current->comm, rs->missed); - rs->missed = 0; - } + m = ratelimit_state_reset_miss(rs); + if (m) + pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, m); } static inline void -- cgit v1.2.3 From 78bf44de47b3cec72ee8ddc14161d5b3212f25e0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 14 Mar 2025 09:07:13 -0700 Subject: ratelimit: Convert the ->missed field to atomic_t The ratelimit_state structure's ->missed field is sometimes incremented locklessly, and it would be good to avoid lost counts. This is also needed to count the number of misses due to trylock failure. Therefore, convert the ratelimit_state structure's ->missed field to atomic_t. Link: https://lore.kernel.org/all/fbe93a52-365e-47fe-93a4-44a44547d601@paulmck-laptop/ Link: https://lore.kernel.org/all/20250423115409.3425-1-spasswolf@web.de/ Signed-off-by: Paul E. McKenney Reviewed-by: Petr Mladek Cc: Andrew Morton Cc: Kuniyuki Iwashima Cc: Mateusz Guzik Cc: Steven Rostedt Cc: John Ogness Cc: Sergey Senozhatsky --- include/linux/ratelimit.h | 9 +++------ include/linux/ratelimit_types.h | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 8400c5356c18..c78b92b3e5cd 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -24,20 +24,17 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs) static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs) { - rs->missed++; + atomic_inc(&rs->missed); } static inline int ratelimit_state_get_miss(struct ratelimit_state *rs) { - return rs->missed; + return atomic_read(&rs->missed); } static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs) { - int ret = rs->missed; - - rs->missed = 0; - return ret; + return atomic_xchg_relaxed(&rs->missed, 0); } static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init) diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index 765232ce0b5e..d21fe82b67f6 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -18,7 +18,7 @@ struct ratelimit_state { int interval; int burst; int printed; - int missed; + atomic_t missed; unsigned int flags; unsigned long begin; }; -- cgit v1.2.3 From e64a348dc148af41cec266b2143305a2d0228ee7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 Mar 2025 15:32:54 -0700 Subject: ratelimit: Avoid jiffies=0 special case The ___ratelimit() function special-cases the jiffies-counter value of zero as "uninitialized". This works well on 64-bit systems, where the jiffies counter is not going to return to zero for more than half a billion years on systems with HZ=1000, but similar 32-bit systems take less than 50 days to wrap the jiffies counter. And although the consequences of wrapping the jiffies counter seem to be limited to minor confusion on the duration of the rate-limiting interval that happens to end at time zero, it is almost no work to avoid this confusion. Therefore, introduce a RATELIMIT_INITIALIZED bit to the ratelimit_state structure's ->flags field so that a ->begin value of zero is no longer special. Link: https://lore.kernel.org/all/fbe93a52-365e-47fe-93a4-44a44547d601@paulmck-laptop/ Link: https://lore.kernel.org/all/20250423115409.3425-1-spasswolf@web.de/ Signed-off-by: Paul E. McKenney Reviewed-by: Petr Mladek Cc: Andrew Morton Cc: Kuniyuki Iwashima Cc: Mateusz Guzik Cc: Steven Rostedt Cc: John Ogness Cc: Sergey Senozhatsky --- include/linux/ratelimit.h | 2 +- include/linux/ratelimit_types.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index c78b92b3e5cd..adfec24061d1 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -43,7 +43,7 @@ static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, in raw_spin_lock_irqsave(&rs->lock, flags); rs->interval = interval_init; - rs->begin = 0; + rs->flags &= ~RATELIMIT_INITIALIZED; rs->printed = 0; ratelimit_state_reset_miss(rs); raw_spin_unlock_irqrestore(&rs->lock, flags); diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index d21fe82b67f6..ef6711b6b229 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -11,6 +11,7 @@ /* issue num suppressed message on exit */ #define RATELIMIT_MSG_ON_RELEASE BIT(0) +#define RATELIMIT_INITIALIZED BIT(1) struct ratelimit_state { raw_spinlock_t lock; /* protect the state */ -- cgit v1.2.3 From cf8cfa8a9978bfe77f2648a04824a46d58258e68 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 9 Apr 2025 16:54:33 -0700 Subject: ratelimit: Reduce ___ratelimit() false-positive rate limiting Retain the locked design, but check rate-limiting even when the lock could not be acquired. Link: https://lore.kernel.org/all/Z_VRo63o2UsVoxLG@pathway.suse.cz/ Link: https://lore.kernel.org/all/fbe93a52-365e-47fe-93a4-44a44547d601@paulmck-laptop/ Link: https://lore.kernel.org/all/20250423115409.3425-1-spasswolf@web.de/ Signed-off-by: Petr Mladek Signed-off-by: Paul E. McKenney Cc: Petr Mladek Cc: Andrew Morton Cc: Kuniyuki Iwashima Cc: Mateusz Guzik Cc: Steven Rostedt Cc: John Ogness Cc: Sergey Senozhatsky --- include/linux/ratelimit.h | 2 +- include/linux/ratelimit_types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index adfec24061d1..7aaad158ee37 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -44,7 +44,7 @@ static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, in raw_spin_lock_irqsave(&rs->lock, flags); rs->interval = interval_init; rs->flags &= ~RATELIMIT_INITIALIZED; - rs->printed = 0; + atomic_set(&rs->rs_n_left, rs->burst); ratelimit_state_reset_miss(rs); raw_spin_unlock_irqrestore(&rs->lock, flags); } diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index ef6711b6b229..b19c4354540a 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -18,7 +18,7 @@ struct ratelimit_state { int interval; int burst; - int printed; + atomic_t rs_n_left; atomic_t missed; unsigned int flags; unsigned long begin; -- cgit v1.2.3 From f4358f17a62e7a42f72b02d8e8fbd6b760b69f01 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 9 May 2025 00:36:52 +0000 Subject: ASoC: soc.h: remove snd_soc_disconnect_sync() There is no snd_soc_disconnect_sync() implementation, and no one is using it. Let's remove it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87jz6qpql7.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index e425394c3ef4..5d7b98f1dd71 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -469,8 +469,6 @@ static inline int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd) } #endif -void snd_soc_disconnect_sync(struct device *dev); - struct snd_soc_pcm_runtime *snd_soc_get_pcm_runtime(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link); -- cgit v1.2.3 From 321f9db5563e1a6c89546b19bd031269aa3f17b8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 9 May 2025 00:36:42 +0000 Subject: ASoC: soc.h: remove unnecessary definitions We don't need these definitions. Remove it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87ldr6pqlh.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 5d7b98f1dd71..8d113ee8c2bc 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -394,27 +394,20 @@ struct platform_device; #define SOC_ENUM_SINGLE_VIRT_DECL(name, xtexts) \ const struct soc_enum name = SOC_ENUM_SINGLE_VIRT(ARRAY_SIZE(xtexts), xtexts) -struct snd_jack; struct snd_soc_card; -struct snd_soc_pcm_stream; -struct snd_soc_ops; struct snd_soc_pcm_runtime; struct snd_soc_dai; struct snd_soc_dai_driver; struct snd_soc_dai_link; struct snd_soc_component; struct snd_soc_component_driver; -struct soc_enum; struct snd_soc_jack; -struct snd_soc_jack_zone; struct snd_soc_jack_pin; #include #include #include -struct snd_soc_jack_gpio; - enum snd_soc_pcm_subclass { SND_SOC_PCM_CLASS_PCM = 0, SND_SOC_PCM_CLASS_BE = 1, -- cgit v1.2.3 From b9e22b35d4598aefed642928ed2856a9900e5b37 Mon Sep 17 00:00:00 2001 From: Cedric Xing Date: Thu, 8 May 2025 19:07:39 -0700 Subject: tsm-mr: Add TVM Measurement Register support Introduce new TSM Measurement helper library (tsm-mr) for TVM guest drivers to expose MRs (Measurement Registers) as sysfs attributes, with Crypto Agility support. Add the following new APIs (see include/linux/tsm-mr.h for details): - tsm_mr_create_attribute_group(): Take on input a `struct tsm_measurements` instance, which includes one `struct tsm_measurement_register` per MR with properties like `TSM_MR_F_READABLE` and `TSM_MR_F_WRITABLE`, to determine the supported operations and create the sysfs attributes accordingly. On success, return a `struct attribute_group` instance that will typically be included by the guest driver into `miscdevice.groups` before calling misc_register(). - tsm_mr_free_attribute_group(): Free the memory allocated to the attrubute group returned by tsm_mr_create_attribute_group(). tsm_mr_create_attribute_group() creates one attribute for each MR, with names following this pattern: MRNAME[:HASH] - MRNAME - Placeholder for the MR name, as specified by `tsm_measurement_register.mr_name`. - :HASH - Optional suffix indicating the hash algorithm associated with this MR, as specified by `tsm_measurement_register.mr_hash`. Support Crypto Agility by allowing multiple definitions of the same MR (i.e., with the same `mr_name`) with distinct HASH algorithms. NOTE: Crypto Agility, introduced in TPM 2.0, allows new hash algorithms to be introduced without breaking compatibility with applications using older algorithms. CC architectures may face the same challenge in the future, needing new hashes for security while retaining compatibility with older hashes, hence the need for Crypto Agility. Signed-off-by: Cedric Xing Reviewed-by: Dan Williams Acked-by: Dionna Amalie Glaze [djbw: fixup bin_attr const conflict] Link: https://patch.msgid.link/20250509020739.882913-1-dan.j.williams@intel.com Signed-off-by: Dan Williams --- include/linux/tsm-mr.h | 89 +++++++++++++++++++++++++++++++++++++++++++ include/trace/events/tsm_mr.h | 80 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 include/linux/tsm-mr.h create mode 100644 include/trace/events/tsm_mr.h (limited to 'include') diff --git a/include/linux/tsm-mr.h b/include/linux/tsm-mr.h new file mode 100644 index 000000000000..50a521f4ac97 --- /dev/null +++ b/include/linux/tsm-mr.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __TSM_MR_H +#define __TSM_MR_H + +#include + +/** + * struct tsm_measurement_register - describes an architectural measurement + * register (MR) + * @mr_name: name of the MR + * @mr_value: buffer containing the current value of the MR + * @mr_size: size of the MR - typically the digest size of @mr_hash + * @mr_flags: bitwise OR of one or more flags, detailed below + * @mr_hash: optional hash identifier defined in include/uapi/linux/hash_info.h. + * + * A CC guest driver encloses an array of this structure in struct + * tsm_measurements to detail the measurement facility supported by the + * underlying CC hardware. + * + * @mr_name and @mr_value must stay valid until this structure is no longer in + * use. + * + * @mr_flags is the bitwise-OR of zero or more of the flags below. + * + * * %TSM_MR_F_READABLE - the sysfs attribute corresponding to this MR is readable. + * * %TSM_MR_F_WRITABLE - the sysfs attribute corresponding to this MR is writable. + * The semantics is typically to extend the MR but could vary depending on the + * architecture and the MR. + * * %TSM_MR_F_LIVE - this MR's value may differ from the last value written, so + * must be read back from the underlying CC hardware/firmware. + * * %TSM_MR_F_RTMR - bitwise-OR of %TSM_MR_F_LIVE and %TSM_MR_F_WRITABLE. + * * %TSM_MR_F_NOHASH - this MR does NOT have an associated hash algorithm. + * @mr_hash will be ignored when this flag is set. + */ +struct tsm_measurement_register { + const char *mr_name; + void *mr_value; + u32 mr_size; + u32 mr_flags; + enum hash_algo mr_hash; +}; + +#define TSM_MR_F_NOHASH 1 +#define TSM_MR_F_WRITABLE 2 +#define TSM_MR_F_READABLE 4 +#define TSM_MR_F_LIVE 8 +#define TSM_MR_F_RTMR (TSM_MR_F_LIVE | TSM_MR_F_WRITABLE) + +#define TSM_MR_(mr, hash) \ + .mr_name = #mr, .mr_size = hash##_DIGEST_SIZE, \ + .mr_hash = HASH_ALGO_##hash, .mr_flags = TSM_MR_F_READABLE + +/** + * struct tsm_measurements - defines the CC architecture specific measurement + * facility and methods for updating measurement registers (MRs) + * @mrs: Array of MR definitions. + * @nr_mrs: Number of elements in @mrs. + * @refresh: Callback function to load/sync all MRs from TVM hardware/firmware + * into the kernel cache. + * @write: Callback function to write to the MR specified by the parameter @mr. + * Typically, writing to an MR extends the input buffer to that MR. + * + * The @refresh callback is invoked when an MR with %TSM_MR_F_LIVE set is being + * read and the cache is stale. It must reload all MRs with %TSM_MR_F_LIVE set. + * The function parameter @tm is a pointer pointing back to this structure. + * + * The @write callback is invoked whenever an MR is being written. It takes two + * additional parameters besides @tm: + * + * * @mr - points to the MR (an element of @tm->mrs) being written. + * * @data - contains the bytes to write and whose size is @mr->mr_size. + * + * Both @refresh and @write should return 0 on success and an appropriate error + * code on failure. + */ +struct tsm_measurements { + const struct tsm_measurement_register *mrs; + size_t nr_mrs; + int (*refresh)(const struct tsm_measurements *tm); + int (*write)(const struct tsm_measurements *tm, + const struct tsm_measurement_register *mr, const u8 *data); +}; + +const struct attribute_group * +tsm_mr_create_attribute_group(const struct tsm_measurements *tm); +void tsm_mr_free_attribute_group(const struct attribute_group *attr_grp); + +#endif diff --git a/include/trace/events/tsm_mr.h b/include/trace/events/tsm_mr.h new file mode 100644 index 000000000000..f40de4ad3e2d --- /dev/null +++ b/include/trace/events/tsm_mr.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM tsm_mr + +#if !defined(_TRACE_TSM_MR_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TSM_MR_H + +#include +#include + +TRACE_EVENT(tsm_mr_read, + + TP_PROTO(const struct tsm_measurement_register *mr), + + TP_ARGS(mr), + + TP_STRUCT__entry( + __string(mr, mr->mr_name) + __string(hash, mr->mr_flags & TSM_MR_F_NOHASH ? + "data" : hash_algo_name[mr->mr_hash]) + __dynamic_array(u8, d, mr->mr_size) + ), + + TP_fast_assign( + __assign_str(mr); + __assign_str(hash); + memcpy(__get_dynamic_array(d), mr->mr_value, __get_dynamic_array_len(d)); + ), + + TP_printk("[%s] %s:%s", __get_str(mr), __get_str(hash), + __print_hex_str(__get_dynamic_array(d), __get_dynamic_array_len(d))) +); + +TRACE_EVENT(tsm_mr_refresh, + + TP_PROTO(const struct tsm_measurement_register *mr, int rc), + + TP_ARGS(mr, rc), + + TP_STRUCT__entry( + __string(mr, mr->mr_name) + __field(int, rc) + ), + + TP_fast_assign( + __assign_str(mr); + __entry->rc = rc; + ), + + TP_printk("[%s] %s:%d", __get_str(mr), + __entry->rc ? "failed" : "succeeded", __entry->rc) +); + +TRACE_EVENT(tsm_mr_write, + + TP_PROTO(const struct tsm_measurement_register *mr, const u8 *data), + + TP_ARGS(mr, data), + + TP_STRUCT__entry( + __string(mr, mr->mr_name) + __string(hash, mr->mr_flags & TSM_MR_F_NOHASH ? + "data" : hash_algo_name[mr->mr_hash]) + __dynamic_array(u8, d, mr->mr_size) + ), + + TP_fast_assign( + __assign_str(mr); + __assign_str(hash); + memcpy(__get_dynamic_array(d), data, __get_dynamic_array_len(d)); + ), + + TP_printk("[%s] %s:%s", __get_str(mr), __get_str(hash), + __print_hex_str(__get_dynamic_array(d), __get_dynamic_array_len(d))) +); + +#endif + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 4701073c3debd16d7f534f3eb808bd9b50601c0c Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 6 May 2025 16:07:22 +0800 Subject: net: enetc: add initial netc-lib driver to support NTMP Some NETC functionality is controlled using control messages sent to the hardware using BD ring interface with 32B descriptor similar to transmit BD ring used on ENETC. This BD ring interface is referred to as command BD ring. It is used to configure functionality where the underlying resources may be shared between different entities or being too large to configure using direct registers. Therefore, a messaging protocol called NETC Table Management Protocol (NTMP) is provided for exchanging configuration and management information between the software and the hardware using the command BD ring interface. For the management protocol of LS1028A has been retroactively named NTMP 1.0, and its implementation is in enetc_cbdr.c and enetc_qos.c. However, NTMP of i.MX95 has been upgraded to version 2.0, which is incompatible with LS1028A, because the message formats have been changed. Therefore, add the netc-lib driver to support NTMP 2.0 to operate various tables. Note that, only MAC address filter table and RSS table are supported at the moment. More tables will be supported in subsequent patches. It is worth mentioning that the purpose of the netc-lib driver is to provide some NTMP-based generic interfaces for ENETC and NETC Switch drivers. Currently, it only supports the configurations of some tables. Interfaces such as tc flower and debugfs will be added in the future. Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250506080735.3444381-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- include/linux/fsl/ntmp.h | 121 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 include/linux/fsl/ntmp.h (limited to 'include') diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h new file mode 100644 index 000000000000..916dc4fe7de3 --- /dev/null +++ b/include/linux/fsl/ntmp.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2025 NXP */ +#ifndef __NETC_NTMP_H +#define __NETC_NTMP_H + +#include +#include + +struct maft_keye_data { + u8 mac_addr[ETH_ALEN]; + __le16 resv; +}; + +struct maft_cfge_data { + __le16 si_bitmap; + __le16 resv; +}; + +struct netc_cbdr_regs { + void __iomem *pir; + void __iomem *cir; + void __iomem *mr; + + void __iomem *bar0; + void __iomem *bar1; + void __iomem *lenr; +}; + +struct netc_tbl_vers { + u8 maft_ver; + u8 rsst_ver; +}; + +struct netc_cbdr { + struct device *dev; + struct netc_cbdr_regs regs; + + int bd_num; + int next_to_use; + int next_to_clean; + + int dma_size; + void *addr_base; + void *addr_base_align; + dma_addr_t dma_base; + dma_addr_t dma_base_align; + + /* Serialize the order of command BD ring */ + spinlock_t ring_lock; +}; + +struct ntmp_user { + int cbdr_num; /* number of control BD ring */ + struct device *dev; + struct netc_cbdr *ring; + struct netc_tbl_vers tbl; +}; + +struct maft_entry_data { + struct maft_keye_data keye; + struct maft_cfge_data cfge; +}; + +#if IS_ENABLED(CONFIG_NXP_NETC_LIB) +int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev, + const struct netc_cbdr_regs *regs); +void ntmp_free_cbdr(struct netc_cbdr *cbdr); + +/* NTMP APIs */ +int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft); +int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft); +int ntmp_maft_delete_entry(struct ntmp_user *user, u32 entry_id); +int ntmp_rsst_update_entry(struct ntmp_user *user, const u32 *table, + int count); +int ntmp_rsst_query_entry(struct ntmp_user *user, + u32 *table, int count); +#else +static inline int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev, + const struct netc_cbdr_regs *regs) +{ + return 0; +} + +static inline void ntmp_free_cbdr(struct netc_cbdr *cbdr) +{ +} + +static inline int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft) +{ + return 0; +} + +static inline int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft) +{ + return 0; +} + +static inline int ntmp_maft_delete_entry(struct ntmp_user *user, u32 entry_id) +{ + return 0; +} + +static inline int ntmp_rsst_update_entry(struct ntmp_user *user, + const u32 *table, int count) +{ + return 0; +} + +static inline int ntmp_rsst_query_entry(struct ntmp_user *user, + u32 *table, int count) +{ + return 0; +} + +#endif + +#endif -- cgit v1.2.3 From 9cd5cc9da7ff650f1e9818dbfd54232c00ec47ed Mon Sep 17 00:00:00 2001 From: Arun R Murthy Date: Mon, 7 Apr 2025 11:13:45 +0530 Subject: drm/plane: Add new plane property IN_FORMATS_ASYNC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There exists a property IN_FORMATS which exposes the plane supported modifiers/formats to the user. In some platforms when asynchronous flip are used all of modifiers/formats mentioned in IN_FORMATS are not supported. This patch adds a new plane property IN_FORMATS_ASYNC to expose the async flip supported modifiers/formats so that user can use this information ahead and do flip with unsupported formats/modifiers. This will save flip failures. Add a new function pointer similar to format_mod_supported specifically for asynchronous flip. v2: Remove async variable from drm_plane (Ville) v3: Add new function pointer for async (Ville) v5: Typo corrected in commit message & some correction in the kernel documentation. (Chaitanya) v7: Place IN_FORMATS_ASYNC next to IN_FORMATS (Ville) v8: replace uint32_t with u32 and uint64_t with u64 (Chaitanya) Signed-off-by: Arun R Murthy Acked-by: Xaver Hugl Acked-by: Harry Wentland Reviewed-by: Chaitanya Kumar Borah Reviewed-by: Ville Syrjälä Tested-by: Naveen Kumar Signed-off-by: Suraj Kandpal Link: https://lore.kernel.org/r/20250407-asyn-v13-1-b93ef83076c5@intel.com --- include/drm/drm_mode_config.h | 6 ++++++ include/drm/drm_plane.h | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 4b8f0370b79b..9e524b51a001 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -936,6 +936,12 @@ struct drm_mode_config { */ struct drm_property *modifiers_property; + /** + * @async_modifiers_property: Plane property to list support modifier/format + * combination for asynchronous flips. + */ + struct drm_property *async_modifiers_property; + /** * @size_hints_property: Plane SIZE_HINTS property. */ diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index dd718c62ac31..01479dd94e76 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -549,6 +549,23 @@ struct drm_plane_funcs { */ bool (*format_mod_supported)(struct drm_plane *plane, uint32_t format, uint64_t modifier); + /** + * @format_mod_supported_async: + * + * This optional hook is used for the DRM to determine if for + * asynchronous flip the given format/modifier combination is valid for + * the plane. This allows the DRM to generate the correct format + * bitmask (which formats apply to which modifier), and to validate + * modifiers at atomic_check time. + * + * Returns: + * + * True if the given modifier is valid for that format on the plane. + * False otherwise. + */ + bool (*format_mod_supported_async)(struct drm_plane *plane, + u32 format, u64 modifier); + }; /** -- cgit v1.2.3 From a3aa115af25473c1309bc99cac6b2b6cd180fdd9 Mon Sep 17 00:00:00 2001 From: Keke Li Date: Sun, 27 Apr 2025 14:27:14 +0800 Subject: media: Add C3ISP_PARAMS and C3ISP_STATS meta formats C3ISP_PARAMS is the C3 ISP Parameters format. C3ISP_STATS is the C3 ISP Statistics format. Reviewed-by: Daniel Scally Reviewed-by: Jacopo Mondi Signed-off-by: Keke Li Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index ca7b3e8863ca..9e3b366d5fc7 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -868,6 +868,10 @@ struct v4l2_pix_format { #define V4L2_META_FMT_RK_ISP1_STAT_3A v4l2_fourcc('R', 'K', '1', 'S') /* Rockchip ISP1 3A Statistics */ #define V4L2_META_FMT_RK_ISP1_EXT_PARAMS v4l2_fourcc('R', 'K', '1', 'E') /* Rockchip ISP1 3a Extensible Parameters */ +/* Vendor specific - used for C3_ISP */ +#define V4L2_META_FMT_C3ISP_PARAMS v4l2_fourcc('C', '3', 'P', 'M') /* Amlogic C3 ISP Parameters */ +#define V4L2_META_FMT_C3ISP_STATS v4l2_fourcc('C', '3', 'S', 'T') /* Amlogic C3 ISP Statistics */ + /* Vendor specific - used for RaspberryPi PiSP */ #define V4L2_META_FMT_RPI_BE_CFG v4l2_fourcc('R', 'P', 'B', 'C') /* PiSP BE configuration */ #define V4L2_META_FMT_RPI_FE_CFG v4l2_fourcc('R', 'P', 'F', 'C') /* PiSP FE configuration */ -- cgit v1.2.3 From 6d406187ebc092ebccf4fb5a1bc16b92c4bc109a Mon Sep 17 00:00:00 2001 From: Keke Li Date: Sun, 27 Apr 2025 14:27:15 +0800 Subject: media: uapi: Add stats info and parameters buffer for C3 ISP Add a header that describes the 3A statistics buffer and the parameters buffer for C3 ISP Reviewed-by: Jacopo Mondi Signed-off-by: Keke Li Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil --- include/uapi/linux/media/amlogic/c3-isp-config.h | 564 +++++++++++++++++++++++ 1 file changed, 564 insertions(+) create mode 100644 include/uapi/linux/media/amlogic/c3-isp-config.h (limited to 'include') diff --git a/include/uapi/linux/media/amlogic/c3-isp-config.h b/include/uapi/linux/media/amlogic/c3-isp-config.h new file mode 100644 index 000000000000..ed085ea62a57 --- /dev/null +++ b/include/uapi/linux/media/amlogic/c3-isp-config.h @@ -0,0 +1,564 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2024 Amlogic, Inc. All rights reserved + */ + +#ifndef _UAPI_C3_ISP_CONFIG_H_ +#define _UAPI_C3_ISP_CONFIG_H_ + +#include + +/* + * Frames are split into zones of almost equal width and height - a zone is a + * rectangular tile of a frame. The metering blocks within the ISP collect + * aggregated statistics per zone. + */ +#define C3_ISP_AE_MAX_ZONES (17 * 15) +#define C3_ISP_AF_MAX_ZONES (17 * 15) +#define C3_ISP_AWB_MAX_ZONES (32 * 24) + +/* The maximum number of point on the diagonal of the frame for statistics */ +#define C3_ISP_AE_MAX_PT_NUM 18 +#define C3_ISP_AF_MAX_PT_NUM 18 +#define C3_ISP_AWB_MAX_PT_NUM 33 + +/** + * struct c3_isp_awb_zone_stats - AWB statistics of a zone + * + * AWB zone stats is aligned with 8 bytes + * + * @rg: the ratio of R / G in a zone + * @bg: the ratio of B / G in a zone + * @pixel_sum: the total number of pixels used in a zone + */ +struct c3_isp_awb_zone_stats { + __u16 rg; + __u16 bg; + __u32 pixel_sum; +}; + +/** + * struct c3_isp_awb_stats - Auto white balance statistics information. + * + * AWB statistical information of all zones. + * + * @stats: array of auto white balance statistics + */ +struct c3_isp_awb_stats { + struct c3_isp_awb_zone_stats stats[C3_ISP_AWB_MAX_ZONES]; +} __attribute__((aligned(16))); + +/** + * struct c3_isp_ae_zone_stats - AE statistics of a zone + * + * AE zone stats is aligned with 8 bytes. + * This is a 5-bin histogram and the total sum is normalized to 0xffff. + * So hist2 = 0xffff - (hist0 + hist1 + hist3 + hist4) + * + * @hist0: the global normalized pixel count for bin 0 + * @hist1: the global normalized pixel count for bin 1 + * @hist3: the global normalized pixel count for bin 3 + * @hist4: the global normalized pixel count for bin 4 + */ +struct c3_isp_ae_zone_stats { + __u16 hist0; + __u16 hist1; + __u16 hist3; + __u16 hist4; +}; + +/** + * struct c3_isp_ae_stats - Exposure statistics information + * + * AE statistical information consists of all blocks information and a 1024-bin + * histogram. + * + * @stats: array of auto exposure block statistics + * @reserved: undefined buffer space + * @hist: a 1024-bin histogram for the entire image + */ +struct c3_isp_ae_stats { + struct c3_isp_ae_zone_stats stats[C3_ISP_AE_MAX_ZONES]; + __u32 reserved[2]; + __u32 hist[1024]; +} __attribute__((aligned(16))); + +/** + * struct c3_isp_af_zone_stats - AF statistics of a zone + * + * AF zone stats is aligned with 8 bytes. + * The zonal accumulated contrast metrics are stored in floating point format + * with 16 bits mantissa and 5 or 6 bits exponent. Apart from contrast metrics + * we accumulate squared image and quartic image data over the zone. + * + * @i2_mat: the mantissa of zonal squared image pixel sum + * @i4_mat: the mantissa of zonal quartic image pixel sum + * @e4_mat: the mantissa of zonal multi-directional quartic edge sum + * @e4_exp: the exponent of zonal multi-directional quartic edge sum + * @i2_exp: the exponent of zonal squared image pixel sum + * @i4_exp: the exponent of zonal quartic image pixel sum + */ +struct c3_isp_af_zone_stats { + __u16 i2_mat; + __u16 i4_mat; + __u16 e4_mat; + __u16 e4_exp : 5; + __u16 i2_exp : 5; + __u16 i4_exp : 6; +}; + +/** + * struct c3_isp_af_stats - Auto Focus statistics information + * + * AF statistical information of each zone + * + * @stats: array of auto focus block statistics + * @reserved: undefined buffer space + */ +struct c3_isp_af_stats { + struct c3_isp_af_zone_stats stats[C3_ISP_AF_MAX_ZONES]; + __u32 reserved[2]; +} __attribute__((aligned(16))); + +/** + * struct c3_isp_stats_info - V4L2_META_FMT_C3ISP_STATS + * + * Contains ISP statistics + * + * @awb: auto white balance stats + * @ae: auto exposure stats + * @af: auto focus stats + */ +struct c3_isp_stats_info { + struct c3_isp_awb_stats awb; + struct c3_isp_ae_stats ae; + struct c3_isp_af_stats af; +}; + +/** + * enum c3_isp_params_buffer_version - C3 ISP parameters block versioning + * + * @C3_ISP_PARAMS_BUFFER_V0: First version of C3 ISP parameters block + */ +enum c3_isp_params_buffer_version { + C3_ISP_PARAMS_BUFFER_V0, +}; + +/** + * enum c3_isp_params_block_type - Enumeration of C3 ISP parameter blocks + * + * Each block configures a specific processing block of the C3 ISP. + * The block type allows the driver to correctly interpret the parameters block + * data. + * + * @C3_ISP_PARAMS_BLOCK_AWB_GAINS: White balance gains + * @C3_ISP_PARAMS_BLOCK_AWB_CONFIG: AWB statistic format configuration for all + * blocks that control how stats are generated + * @C3_ISP_PARAMS_BLOCK_AE_CONFIG: AE statistic format configuration for all + * blocks that control how stats are generated + * @C3_ISP_PARAMS_BLOCK_AF_CONFIG: AF statistic format configuration for all + * blocks that control how stats are generated + * @C3_ISP_PARAMS_BLOCK_PST_GAMMA: post gamma parameters + * @C3_ISP_PARAMS_BLOCK_CCM: Color correction matrix parameters + * @C3_ISP_PARAMS_BLOCK_CSC: Color space conversion parameters + * @C3_ISP_PARAMS_BLOCK_BLC: Black level correction parameters + * @C3_ISP_PARAMS_BLOCK_SENTINEL: First non-valid block index + */ +enum c3_isp_params_block_type { + C3_ISP_PARAMS_BLOCK_AWB_GAINS, + C3_ISP_PARAMS_BLOCK_AWB_CONFIG, + C3_ISP_PARAMS_BLOCK_AE_CONFIG, + C3_ISP_PARAMS_BLOCK_AF_CONFIG, + C3_ISP_PARAMS_BLOCK_PST_GAMMA, + C3_ISP_PARAMS_BLOCK_CCM, + C3_ISP_PARAMS_BLOCK_CSC, + C3_ISP_PARAMS_BLOCK_BLC, + C3_ISP_PARAMS_BLOCK_SENTINEL +}; + +#define C3_ISP_PARAMS_BLOCK_FL_DISABLE (1U << 0) +#define C3_ISP_PARAMS_BLOCK_FL_ENABLE (1U << 1) + +/** + * struct c3_isp_params_block_header - C3 ISP parameter block header + * + * This structure represents the common part of all the ISP configuration + * blocks. Each parameters block shall embed an instance of this structure type + * as its first member, followed by the block-specific configuration data. The + * driver inspects this common header to discern the block type and its size and + * properly handle the block content by casting it to the correct block-specific + * type. + * + * The @type field is one of the values enumerated by + * :c:type:`c3_isp_params_block_type` and specifies how the data should be + * interpreted by the driver. The @size field specifies the size of the + * parameters block and is used by the driver for validation purposes. The + * @flags field is a bitmask of per-block flags C3_ISP_PARAMS_FL*. + * + * When userspace wants to disable an ISP block the + * C3_ISP_PARAMS_BLOCK_FL_DISABLED bit should be set in the @flags field. In + * this case userspace may optionally omit the remainder of the configuration + * block, which will be ignored by the driver. + * + * When a new configuration of an ISP block needs to be applied userspace + * shall fully populate the ISP block and omit setting the + * C3_ISP_PARAMS_BLOCK_FL_DISABLED bit in the @flags field. + * + * Userspace is responsible for correctly populating the parameters block header + * fields (@type, @flags and @size) and the block-specific parameters. + * + * For example: + * + * .. code-block:: c + * + * void populate_pst_gamma(struct c3_isp_params_block_header *block) { + * struct c3_isp_params_pst_gamma *gamma = + * (struct c3_isp_params_pst_gamma *)block; + * + * gamma->header.type = C3_ISP_PARAMS_BLOCK_PST_GAMMA; + * gamma->header.flags = C3_ISP_PARAMS_BLOCK_FL_ENABLE; + * gamma->header.size = sizeof(*gamma); + * + * for (unsigned int i = 0; i < 129; i++) + * gamma->pst_gamma_lut[i] = i; + * } + * + * @type: The parameters block type from :c:type:`c3_isp_params_block_type` + * @flags: A bitmask of block flags + * @size: Size (in bytes) of the parameters block, including this header + */ +struct c3_isp_params_block_header { + __u16 type; + __u16 flags; + __u32 size; +}; + +/** + * struct c3_isp_params_awb_gains - Gains for auto-white balance + * + * This struct allows users to configure the gains for white balance. + * There are four gain settings corresponding to each colour channel in + * the bayer domain. All of the gains are stored in Q4.8 format. + * + * header.type should be set to C3_ISP_PARAMS_BLOCK_AWB_GAINS + * from :c:type:`c3_isp_params_block_type` + * + * @header: The C3 ISP parameters block header + * @gr_gain: Multiplier for Gr channel (Q4.8 format) + * @r_gain: Multiplier for R channel (Q4.8 format) + * @b_gain: Multiplier for B channel (Q4.8 format) + * @gb_gain: Multiplier for Gb channel (Q4.8 format) + */ +struct c3_isp_params_awb_gains { + struct c3_isp_params_block_header header; + __u16 gr_gain; + __u16 r_gain; + __u16 b_gain; + __u16 gb_gain; +} __attribute__((aligned(8))); + +/** + * enum c3_isp_params_awb_tap_points - Tap points for the AWB statistics + * @C3_ISP_AWB_STATS_TAP_OFE: immediately after the optical frontend block + * @C3_ISP_AWB_STATS_TAP_GE: immediately after the green equal block + * @C3_ISP_AWB_STATS_TAP_BEFORE_WB: immediately before the white balance block + * @C3_ISP_AWB_STATS_TAP_AFTER_WB: immediately after the white balance block + */ +enum c3_isp_params_awb_tap_points { + C3_ISP_AWB_STATS_TAP_OFE = 0, + C3_ISP_AWB_STATS_TAP_GE, + C3_ISP_AWB_STATS_TAP_BEFORE_WB, + C3_ISP_AWB_STATS_TAP_AFTER_WB, +}; + +/** + * struct c3_isp_params_awb_config - Stats settings for auto-white balance + * + * This struct allows the configuration of the statistics generated for auto + * white balance. + * + * header.type should be set to C3_ISP_PARAMS_BLOCK_AWB_CONFIG + * from :c:type:`c3_isp_params_block_type` + * + * @header: the C3 ISP parameters block header + * @tap_point: the tap point from enum c3_isp_params_awb_tap_point + * @satur_vald: AWB statistic over saturation control + * value: 0: disable, 1: enable + * @horiz_zones_num: active number of hotizontal zones [0..32] + * @vert_zones_num: active number of vertical zones [0..24] + * @rg_min: minimum R/G ratio (Q4.8 format) + * @rg_max: maximum R/G ratio (Q4.8 format) + * @bg_min: minimum B/G ratio (Q4.8 format) + * @bg_max: maximum B/G ratio (Q4.8 format) + * @rg_low: R/G ratio trim low (Q4.8 format) + * @rg_high: R/G ratio trim hight (Q4.8 format) + * @bg_low: B/G ratio trim low (Q4.8 format) + * @bg_high: B/G ratio trim high (Q4.8 format) + * @zone_weight: array of weights for AWB statistics zones [0..15] + * @horiz_coord: the horizontal coordinate of points on the diagonal [0..2888] + * @vert_coord: the vertical coordinate of points on the diagonal [0..2240] + */ +struct c3_isp_params_awb_config { + struct c3_isp_params_block_header header; + __u8 tap_point; + __u8 satur_vald; + __u8 horiz_zones_num; + __u8 vert_zones_num; + __u16 rg_min; + __u16 rg_max; + __u16 bg_min; + __u16 bg_max; + __u16 rg_low; + __u16 rg_high; + __u16 bg_low; + __u16 bg_high; + __u8 zone_weight[C3_ISP_AWB_MAX_ZONES]; + __u16 horiz_coord[C3_ISP_AWB_MAX_PT_NUM]; + __u16 vert_coord[C3_ISP_AWB_MAX_PT_NUM]; +} __attribute__((aligned(8))); + +/** + * enum c3_isp_params_ae_tap_points - Tap points for the AE statistics + * @C3_ISP_AE_STATS_TAP_GE: immediately after the green equal block + * @C3_ISP_AE_STATS_TAP_MLS: immediately after the mesh lens shading block + */ +enum c3_isp_params_ae_tap_points { + C3_ISP_AE_STATS_TAP_GE = 0, + C3_ISP_AE_STATS_TAP_MLS, +}; + +/** + * struct c3_isp_params_ae_config - Stats settings for auto-exposure + * + * This struct allows the configuration of the statistics generated for + * auto exposure. + * + * header.type should be set to C3_ISP_PARAMS_BLOCK_AE_CONFIG + * from :c:type:`c3_isp_params_block_type` + * + * @header: the C3 ISP parameters block header + * @horiz_zones_num: active number of horizontal zones [0..17] + * @vert_zones_num: active number of vertical zones [0..15] + * @tap_point: the tap point from enum c3_isp_params_ae_tap_point + * @zone_weight: array of weights for AE statistics zones [0..15] + * @horiz_coord: the horizontal coordinate of points on the diagonal [0..2888] + * @vert_coord: the vertical coordinate of points on the diagonal [0..2240] + * @reserved: applications must zero this array + */ +struct c3_isp_params_ae_config { + struct c3_isp_params_block_header header; + __u8 tap_point; + __u8 horiz_zones_num; + __u8 vert_zones_num; + __u8 zone_weight[C3_ISP_AE_MAX_ZONES]; + __u16 horiz_coord[C3_ISP_AE_MAX_PT_NUM]; + __u16 vert_coord[C3_ISP_AE_MAX_PT_NUM]; + __u16 reserved[3]; +} __attribute__((aligned(8))); + +/** + * enum c3_isp_params_af_tap_points - Tap points for the AF statistics + * @C3_ISP_AF_STATS_TAP_SNR: immediately after the spatial noise reduce block + * @C3_ISP_AF_STATS_TAP_DMS: immediately after the demosaic block + */ +enum c3_isp_params_af_tap_points { + C3_ISP_AF_STATS_TAP_SNR = 0, + C3_ISP_AF_STATS_TAP_DMS, +}; + +/** + * struct c3_isp_params_af_config - Stats settings for auto-focus + * + * This struct allows the configuration of the statistics generated for + * auto focus. + * + * header.type should be set to C3_ISP_PARAMS_BLOCK_AF_CONFIG + * from :c:type:`c3_isp_params_block_type` + * + * @header: the C3 ISP parameters block header + * @tap_point: the tap point from enum c3_isp_params_af_tap_point + * @horiz_zones_num: active number of hotizontal zones [0..17] + * @vert_zones_num: active number of vertical zones [0..15] + * @reserved: applications must zero this array + * @horiz_coord: the horizontal coordinate of points on the diagonal [0..2888] + * @vert_coord: the vertical coordinate of points on the diagonal [0..2240] + */ +struct c3_isp_params_af_config { + struct c3_isp_params_block_header header; + __u8 tap_point; + __u8 horiz_zones_num; + __u8 vert_zones_num; + __u8 reserved[5]; + __u16 horiz_coord[C3_ISP_AF_MAX_PT_NUM]; + __u16 vert_coord[C3_ISP_AF_MAX_PT_NUM]; +} __attribute__((aligned(8))); + +/** + * struct c3_isp_params_pst_gamma - Post gamma configuration + * + * This struct allows the configuration of the look up table for + * post gamma. The gamma curve consists of 129 points, so need to + * set lut[129]. + * + * header.type should be set to C3_ISP_PARAMS_BLOCK_PST_GAMMA + * from :c:type:`c3_isp_params_block_type` + * + * @header: the C3 ISP parameters block header + * @lut: lookup table for P-Stitch gamma [0..1023] + * @reserved: applications must zero this array + */ +struct c3_isp_params_pst_gamma { + struct c3_isp_params_block_header header; + __u16 lut[129]; + __u16 reserved[3]; +} __attribute__((aligned(8))); + +/** + * struct c3_isp_params_ccm - ISP CCM configuration + * + * This struct allows the configuration of the matrix for + * color correction. The matrix consists of 3 x 3 points, + * so need to set matrix[3][3]. + * + * header.type should be set to C3_ISP_PARAMS_BLOCK_CCM + * from :c:type:`c3_isp_params_block_type` + * + * @header: the C3 ISP parameters block header + * @matrix: a 3 x 3 matrix used for color correction, + * the value of matrix[x][y] is orig_value x 256. [-4096..4095] + * @reserved: applications must zero this array + */ +struct c3_isp_params_ccm { + struct c3_isp_params_block_header header; + __s16 matrix[3][3]; + __u16 reserved[3]; +} __attribute__((aligned(8))); + +/** + * struct c3_isp_params_csc - ISP Color Space Conversion configuration + * + * This struct allows the configuration of the matrix for color space + * conversion. The matrix consists of 3 x 3 points, so need to set matrix[3][3]. + * + * header.type should be set to C3_ISP_PARAMS_BLOCK_CSC + * from :c:type:`c3_isp_params_block_type` + * + * @header: the C3 ISP parameters block header + * @matrix: a 3x3 matrix used for the color space conversion, + * the value of matrix[x][y] is orig_value x 256. [-4096..4095] + * @reserved: applications must zero this array + */ +struct c3_isp_params_csc { + struct c3_isp_params_block_header header; + __s16 matrix[3][3]; + __u16 reserved[3]; +} __attribute__((aligned(8))); + +/** + * struct c3_isp_params_blc - ISP Black Level Correction configuration + * + * This struct allows the configuration of the block level offset for each + * color channel. + * + * header.type should be set to C3_ISP_PARAMS_BLOCK_BLC + * from :c:type:`c3_isp_params_block_type` + * + * @header: the C3 ISP parameters block header + * @gr_ofst: Gr blc offset (Q4.12 format) + * @r_ofst: R blc offset (Q4.12 format) + * @b_ofst: B blc offset (Q4.12 format) + * @gb_ofst: Gb blc offset(Q4.12 format) + */ +struct c3_isp_params_blc { + struct c3_isp_params_block_header header; + __u16 gr_ofst; + __u16 r_ofst; + __u16 b_ofst; + __u16 gb_ofst; +}; + +/** + * define C3_ISP_PARAMS_MAX_SIZE - Maximum size of all C3 ISP Parameters + * + * Though the parameters for the C3 ISP are passed as optional blocks, the + * driver still needs to know the absolute maximum size so that it can allocate + * a buffer sized appropriately to accommodate userspace attempting to set all + * possible parameters in a single frame. + */ +#define C3_ISP_PARAMS_MAX_SIZE \ + (sizeof(struct c3_isp_params_awb_gains) + \ + sizeof(struct c3_isp_params_awb_config) + \ + sizeof(struct c3_isp_params_ae_config) + \ + sizeof(struct c3_isp_params_af_config) + \ + sizeof(struct c3_isp_params_pst_gamma) + \ + sizeof(struct c3_isp_params_ccm) + \ + sizeof(struct c3_isp_params_csc) + \ + sizeof(struct c3_isp_params_blc)) + +/** + * struct c3_isp_params_cfg - C3 ISP configuration parameters + * + * This struct contains the configuration parameters of the C3 ISP + * algorithms, serialized by userspace into an opaque data buffer. Each + * configuration parameter block is represented by a block-specific structure + * which contains a :c:type:`c3_isp_param_block_header` entry as first + * member. Userspace populates the @data buffer with configuration parameters + * for the blocks that it intends to configure. As a consequence, the data + * buffer effective size changes according to the number of ISP blocks that + * userspace intends to configure. + * + * The parameters buffer is versioned by the @version field to allow modifying + * and extending its definition. Userspace should populate the @version field to + * inform the driver about the version it intends to use. The driver will parse + * and handle the @data buffer according to the data layout specific to the + * indicated revision and return an error if the desired revision is not + * supported. + * + * For each ISP block that userspace wants to configure, a block-specific + * structure is appended to the @data buffer, one after the other without gaps + * in between nor overlaps. Userspace shall populate the @total_size field with + * the effective size, in bytes, of the @data buffer. + * + * The expected memory layout of the parameters buffer is:: + * + * +-------------------- struct c3_isp_params_cfg ---- ------------------+ + * | version = C3_ISP_PARAM_BUFFER_V0; | + * | data_size = sizeof(struct c3_isp_params_awb_gains) + | + * | sizeof(struct c3_isp_params_awb_config); | + * | +------------------------- data ---------------------------------+ | + * | | +------------ struct c3_isp_params_awb_gains) ------------------+ | + * | | | +--------- struct c3_isp_params_block_header header -----+ | | | + * | | | | type = C3_ISP_PARAMS_BLOCK_AWB_GAINS; | | | | + * | | | | flags = C3_ISP_PARAMS_BLOCK_FL_NONE; | | | | + * | | | | size = sizeof(struct c3_isp_params_awb_gains); | | | | + * | | | +---------------------------------------------------------+ | | | + * | | | gr_gain = ...; | | | + * | | | r_gain = ...; | | | + * | | | b_gain = ...; | | | + * | | | gb_gain = ...; | | | + * | | +------------------ struct c3_isp_params_awb_config ----------+ | | + * | | | +---------- struct c3_isp_param_block_header header ------+ | | | + * | | | | type = C3_ISP_PARAMS_BLOCK_AWB_CONFIG; | | | | + * | | | | flags = C3_ISP_PARAMS_BLOCK_FL_NONE; | | | | + * | | | | size = sizeof(struct c3_isp_params_awb_config) | | | | + * | | | +---------------------------------------------------------+ | | | + * | | | tap_point = ...; | | | + * | | | satur_vald = ...; | | | + * | | | horiz_zones_num = ...; | | | + * | | | vert_zones_num = ...; | | | + * | | +-------------------------------------------------------------+ | | + * | +-----------------------------------------------------------------+ | + * +---------------------------------------------------------------------+ + * + * @version: The C3 ISP parameters buffer version + * @data_size: The C3 ISP configuration data effective size, excluding this + * header + * @data: The C3 ISP configuration blocks data + */ +struct c3_isp_params_cfg { + __u32 version; + __u32 data_size; + __u8 data[C3_ISP_PARAMS_MAX_SIZE]; +}; + +#endif -- cgit v1.2.3 From 1af3331764b9356fadc4652af77bbbc97f3d7f78 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 29 Mar 2025 09:42:19 +0100 Subject: super: add filesystem freezing helpers for suspend and hibernate Allow the power subsystem to support filesystem freeze for suspend and hibernate. For some kernel subsystems it is paramount that they are guaranteed that they are the owner of the freeze to avoid any risk of deadlocks. This is the case for the power subsystem. Enable it to recognize whether it did actually freeze the filesystem. If userspace has 10 filesystems and suspend/hibernate manges to freeze 5 and then fails on the 6th for whatever odd reason (current or future) then power needs to undo the freeze of the first 5 filesystems. It can't just walk the list again because while it's unlikely that a new filesystem got added in the meantime it still cannot tell which filesystems the power subsystem actually managed to get a freeze reference count on that needs to be dropped during thaw. There's various ways out of this ugliness. For example, record the filesystems the power subsystem managed to freeze on a temporary list in the callbacks and then walk that list backwards during thaw to undo the freezing or make sure that the power subsystem just actually exclusively freezes things it can freeze and marking such filesystems as being owned by power for the duration of the suspend or resume cycle. I opted for the latter as that seemed the clean thing to do even if it means more code changes. If hibernation races with filesystem freezing (e.g. DM reconfiguration), then hibernation need not freeze a filesystem because it's already frozen but userspace may thaw the filesystem before hibernation actually happens. If the race happens the other way around, DM reconfiguration may unexpectedly fail with EBUSY. So allow FREEZE_EXCL to nest with other holders. An exclusive freezer cannot be undone by any of the other concurrent freezers. Link: https://lore.kernel.org/r/20250329-work-freeze-v2-6-a47af37ecc3d@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 96f8c8a794ea..7a3f821d2723 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1307,6 +1307,7 @@ struct sb_writers { unsigned short frozen; /* Is sb frozen? */ int freeze_kcount; /* How many kernel freeze requests? */ int freeze_ucount; /* How many userspace freeze requests? */ + const void *freeze_owner; /* Owner of the freeze */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -2269,6 +2270,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed + * @FREEZE_EXCL: a freeze that can only be undone by the owner * * Indicate who the owner of the freeze or thaw request is and whether * the freeze needs to be exclusive or can nest. @@ -2282,6 +2284,7 @@ enum freeze_holder { FREEZE_HOLDER_KERNEL = (1U << 0), FREEZE_HOLDER_USERSPACE = (1U << 1), FREEZE_MAY_NEST = (1U << 2), + FREEZE_EXCL = (1U << 3), }; struct super_operations { @@ -2295,9 +2298,9 @@ struct super_operations { void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - int (*freeze_super) (struct super_block *, enum freeze_holder who); + int (*freeze_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*freeze_fs) (struct super_block *); - int (*thaw_super) (struct super_block *, enum freeze_holder who); + int (*thaw_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); @@ -2705,8 +2708,10 @@ extern int unregister_filesystem(struct file_system_type *); extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); -int freeze_super(struct super_block *super, enum freeze_holder who); -int thaw_super(struct super_block *super, enum freeze_holder who); +int freeze_super(struct super_block *super, enum freeze_holder who, + const void *freeze_owner); +int thaw_super(struct super_block *super, enum freeze_holder who, + const void *freeze_owner); extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); @@ -3518,6 +3523,8 @@ extern void drop_super_exclusive(struct super_block *sb); extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); +void filesystems_freeze(void); +void filesystems_thaw(void); extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); -- cgit v1.2.3 From 79fb8d8d93e41be4ebda8c9cc507e20297277231 Mon Sep 17 00:00:00 2001 From: Joel Savitz Date: Thu, 8 May 2025 14:49:30 -0400 Subject: include/cgroup: separate {get,put}_cgroup_ns no-op case When CONFIG_CGROUPS is not selected, {get,put}_cgroup_ns become no-ops and therefore it is not necessary to compile in the code for changing the reference count. When CONFIG_CGROUP is selected, there is no valid case where either of {get,put}_cgroup_ns() will be called with a NULL argument. Signed-off-by: Joel Savitz Link: https://lore.kernel.org/20250508184930.183040-3-jsavitz@redhat.com Signed-off-by: Christian Brauner --- include/linux/cgroup.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 28e999f2c642..8da19c3627b0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -785,6 +785,17 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); +static inline void get_cgroup_ns(struct cgroup_namespace *ns) +{ + refcount_inc(&ns->ns.count); +} + +static inline void put_cgroup_ns(struct cgroup_namespace *ns) +{ + if (refcount_dec_and_test(&ns->ns.count)) + free_cgroup_ns(ns); +} + #else /* !CONFIG_CGROUPS */ static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } @@ -795,19 +806,10 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, return old_ns; } -#endif /* !CONFIG_CGROUPS */ +static inline void get_cgroup_ns(struct cgroup_namespace *ns) { } +static inline void put_cgroup_ns(struct cgroup_namespace *ns) { } -static inline void get_cgroup_ns(struct cgroup_namespace *ns) -{ - if (ns) - refcount_inc(&ns->ns.count); -} - -static inline void put_cgroup_ns(struct cgroup_namespace *ns) -{ - if (ns && refcount_dec_and_test(&ns->ns.count)) - free_cgroup_ns(ns); -} +#endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUPS -- cgit v1.2.3 From 2d3cbfd6d54a2c39ce3244f33f85c595844bd7b8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 6 May 2025 21:35:58 -0700 Subject: net_sched: Flush gso_skb list too during ->change() Previously, when reducing a qdisc's limit via the ->change() operation, only the main skb queue was trimmed, potentially leaving packets in the gso_skb list. This could result in NULL pointer dereference when we only check sch->limit against sch->q.qlen. This patch introduces a new helper, qdisc_dequeue_internal(), which ensures both the gso_skb list and the main queue are properly flushed when trimming excess packets. All relevant qdiscs (codel, fq, fq_codel, fq_pie, hhf, pie) are updated to use this helper in their ->change() routines. Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM") Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler") Fixes: 10239edf86f1 ("net-qdisc-hhf: Heavy-Hitter Filter (HHF) qdisc") Fixes: d4b36210c2e6 ("net: pkt_sched: PIE AQM scheme") Reported-by: Will Reported-by: Savy Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d48c657191cd..1c05fed05f2b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1031,6 +1031,21 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) return skb; } +static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool direct) +{ + struct sk_buff *skb; + + skb = __skb_dequeue(&sch->gso_skb); + if (skb) { + sch->q.qlen--; + return skb; + } + if (direct) + return __qdisc_dequeue_head(&sch->q); + else + return sch->dequeue(sch); +} + static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) { struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); -- cgit v1.2.3 From 91e40668e70a08cf09c0e22023807b01d78d8b3a Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 22 Apr 2025 09:18:11 +0100 Subject: mm/page_table_check: Batch-check pmds/puds just like ptes Convert page_table_check_p[mu]d_set(...) to page_table_check_p[mu]ds_set(..., nr) to allow checking a contiguous set of pmds/puds in single batch. We retain page_table_check_p[mu]d_set(...) as macros that call new batch functions with nr=1 for compatibility. arm64 is about to reorganise its pte/pmd/pud helpers to reuse more code and to allow the implementation for huge_pte to more efficiently set ptes/pmds/puds in batches. We need these batch-helpers to make the refactoring possible. Reviewed-by: Anshuman Khandual Reviewed-by: Pasha Tatashin Reviewed-by: Catalin Marinas Signed-off-by: Ryan Roberts Tested-by: Luiz Capitulino Link: https://lore.kernel.org/r/20250422081822.1836315-4-ryan.roberts@arm.com Signed-off-by: Will Deacon --- include/linux/page_table_check.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h index 6722941c7cb8..289620d4aad3 100644 --- a/include/linux/page_table_check.h +++ b/include/linux/page_table_check.h @@ -19,8 +19,10 @@ void __page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd); void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud); void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, unsigned int nr); -void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd); -void __page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud); +void __page_table_check_pmds_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd, + unsigned int nr); +void __page_table_check_puds_set(struct mm_struct *mm, pud_t *pudp, pud_t pud, + unsigned int nr); void __page_table_check_pte_clear_range(struct mm_struct *mm, unsigned long addr, pmd_t pmd); @@ -74,22 +76,22 @@ static inline void page_table_check_ptes_set(struct mm_struct *mm, __page_table_check_ptes_set(mm, ptep, pte, nr); } -static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, - pmd_t pmd) +static inline void page_table_check_pmds_set(struct mm_struct *mm, + pmd_t *pmdp, pmd_t pmd, unsigned int nr) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pmd_set(mm, pmdp, pmd); + __page_table_check_pmds_set(mm, pmdp, pmd, nr); } -static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, - pud_t pud) +static inline void page_table_check_puds_set(struct mm_struct *mm, + pud_t *pudp, pud_t pud, unsigned int nr) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pud_set(mm, pudp, pud); + __page_table_check_puds_set(mm, pudp, pud, nr); } static inline void page_table_check_pte_clear_range(struct mm_struct *mm, @@ -129,13 +131,13 @@ static inline void page_table_check_ptes_set(struct mm_struct *mm, { } -static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, - pmd_t pmd) +static inline void page_table_check_pmds_set(struct mm_struct *mm, + pmd_t *pmdp, pmd_t pmd, unsigned int nr) { } -static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, - pud_t pud) +static inline void page_table_check_puds_set(struct mm_struct *mm, + pud_t *pudp, pud_t pud, unsigned int nr) { } @@ -146,4 +148,8 @@ static inline void page_table_check_pte_clear_range(struct mm_struct *mm, } #endif /* CONFIG_PAGE_TABLE_CHECK */ + +#define page_table_check_pmd_set(mm, pmdp, pmd) page_table_check_pmds_set(mm, pmdp, pmd, 1) +#define page_table_check_pud_set(mm, pudp, pud) page_table_check_puds_set(mm, pudp, pud, 1) + #endif /* __LINUX_PAGE_TABLE_CHECK_H */ -- cgit v1.2.3 From 2fba13371fe80b4d0d533a502e460ce0e936d024 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 22 Apr 2025 09:18:16 +0100 Subject: mm/vmalloc: Gracefully unmap huge ptes Commit f7ee1f13d606 ("mm/vmalloc: enable mapping of huge pages at pte level in vmap") added its support by reusing the set_huge_pte_at() API, which is otherwise only used for user mappings. But when unmapping those huge ptes, it continued to call ptep_get_and_clear(), which is a layering violation. To date, the only arch to implement this support is powerpc and it all happens to work ok for it. But arm64's implementation of ptep_get_and_clear() can not be safely used to clear a previous set_huge_pte_at(). So let's introduce a new arch opt-in function, arch_vmap_pte_range_unmap_size(), which can provide the size of a (present) pte. Then we can call huge_ptep_get_and_clear() to tear it down properly. Note that if vunmap_range() is called with a range that starts in the middle of a huge pte-mapped page, we must unmap the entire huge page so the behaviour is consistent with pmd and pud block mappings. In this case emit a warning just like we do for pmd/pud mappings. Reviewed-by: Anshuman Khandual Reviewed-by: Uladzislau Rezki (Sony) Reviewed-by: Catalin Marinas Signed-off-by: Ryan Roberts Tested-by: Luiz Capitulino Link: https://lore.kernel.org/r/20250422081822.1836315-9-ryan.roberts@arm.com Signed-off-by: Will Deacon --- include/linux/vmalloc.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e3..16dd4cba64f2 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -113,6 +113,14 @@ static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, uns } #endif +#ifndef arch_vmap_pte_range_unmap_size +static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr, + pte_t *ptep) +{ + return PAGE_SIZE; +} +#endif + #ifndef arch_vmap_pte_supported_shift static inline int arch_vmap_pte_supported_shift(unsigned long size) { -- cgit v1.2.3 From 63de899cb6220357dea9d0f4e5aa459ff5193bb0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 9 May 2025 12:12:53 +0100 Subject: io_uring: count allocated requests Keep track of the number requests a ring currently has allocated (and not freed), it'll be needed in the next patch. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/c8f8308294dc2a1cb8925d984d937d4fc14ab5d4.1746788718.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 7e23e993280e..73b289b48280 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -435,6 +435,7 @@ struct io_ring_ctx { /* protected by ->completion_lock */ unsigned evfd_last_cq_tail; + unsigned nr_req_allocated; /* * Protection for resize vs mmap races - both the mmap and resize -- cgit v1.2.3 From bb71e40db1b7a7822f434d211b4a94bab9996cc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 9 May 2025 14:22:39 +0100 Subject: mfd: max77759: Add Maxim MAX77759 core driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Maxim MAX77759 is a companion PMIC for USB Type-C applications and includes Battery Charger, Fuel Gauge, temperature sensors, USB Type-C Port Controller (TCPC), NVMEM, and a GPIO expander. Fuel Gauge and TCPC have separate and independent I2C addresses, register maps, and interrupt lines and are therefore excluded from the MFD core device driver here. The GPIO and NVMEM interfaces are accessed via specific commands to the built-in microprocessor. This driver implements an API that client drivers can use for accessing those. Signed-off-by: André Draszik Acked-by: Peter Griffin Link: https://lore.kernel.org/r/20250509-max77759-mfd-v10-1-962ac15ee3ef@linaro.org Signed-off-by: Lee Jones --- include/linux/mfd/max77759.h | 165 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 include/linux/mfd/max77759.h (limited to 'include') diff --git a/include/linux/mfd/max77759.h b/include/linux/mfd/max77759.h new file mode 100644 index 000000000000..c6face34e385 --- /dev/null +++ b/include/linux/mfd/max77759.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2020 Google Inc. + * Copyright 2025 Linaro Ltd. + * + * Maxim MAX77759 core driver + */ + +#ifndef __LINUX_MFD_MAX77759_H +#define __LINUX_MFD_MAX77759_H + +#include +#include +#include + +#define MAX77759_PMIC_REG_PMIC_ID 0x00 +#define MAX77759_PMIC_REG_PMIC_REVISION 0x01 +#define MAX77759_PMIC_REG_OTP_REVISION 0x02 +#define MAX77759_PMIC_REG_INTSRC 0x22 +#define MAX77759_PMIC_REG_INTSRCMASK 0x23 +#define MAX77759_PMIC_REG_INTSRC_MAXQ BIT(3) +#define MAX77759_PMIC_REG_INTSRC_TOPSYS BIT(1) +#define MAX77759_PMIC_REG_INTSRC_CHGR BIT(0) +#define MAX77759_PMIC_REG_TOPSYS_INT 0x24 +#define MAX77759_PMIC_REG_TOPSYS_INT_MASK 0x26 +#define MAX77759_PMIC_REG_TOPSYS_INT_TSHDN BIT(6) +#define MAX77759_PMIC_REG_TOPSYS_INT_SYSOVLO BIT(5) +#define MAX77759_PMIC_REG_TOPSYS_INT_SYSUVLO BIT(4) +#define MAX77759_PMIC_REG_TOPSYS_INT_FSHIP BIT(0) +#define MAX77759_PMIC_REG_I2C_CNFG 0x40 +#define MAX77759_PMIC_REG_SWRESET 0x50 +#define MAX77759_PMIC_REG_CONTROL_FG 0x51 + +#define MAX77759_MAXQ_REG_UIC_INT1 0x64 +#define MAX77759_MAXQ_REG_UIC_INT1_APCMDRESI BIT(7) +#define MAX77759_MAXQ_REG_UIC_INT1_SYSMSGI BIT(6) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIO6I BIT(1) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIO5I BIT(0) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(offs, en) (((en) & 1) << (offs)) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIOxI_MASK(offs) \ + MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(offs, ~0) +#define MAX77759_MAXQ_REG_UIC_INT2 0x65 +#define MAX77759_MAXQ_REG_UIC_INT3 0x66 +#define MAX77759_MAXQ_REG_UIC_INT4 0x67 +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS1 0x68 +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS2 0x69 +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS3 0x6a +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS4 0x6b +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS5 0x6c +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS6 0x6d +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS7 0x6f +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS8 0x6f +#define MAX77759_MAXQ_REG_UIC_INT1_M 0x70 +#define MAX77759_MAXQ_REG_UIC_INT2_M 0x71 +#define MAX77759_MAXQ_REG_UIC_INT3_M 0x72 +#define MAX77759_MAXQ_REG_UIC_INT4_M 0x73 +#define MAX77759_MAXQ_REG_AP_DATAOUT0 0x81 +#define MAX77759_MAXQ_REG_AP_DATAOUT32 0xa1 +#define MAX77759_MAXQ_REG_AP_DATAIN0 0xb1 +#define MAX77759_MAXQ_REG_UIC_SWRST 0xe0 + +#define MAX77759_CHGR_REG_CHG_INT 0xb0 +#define MAX77759_CHGR_REG_CHG_INT2 0xb1 +#define MAX77759_CHGR_REG_CHG_INT_MASK 0xb2 +#define MAX77759_CHGR_REG_CHG_INT2_MASK 0xb3 +#define MAX77759_CHGR_REG_CHG_INT_OK 0xb4 +#define MAX77759_CHGR_REG_CHG_DETAILS_00 0xb5 +#define MAX77759_CHGR_REG_CHG_DETAILS_01 0xb6 +#define MAX77759_CHGR_REG_CHG_DETAILS_02 0xb7 +#define MAX77759_CHGR_REG_CHG_DETAILS_03 0xb8 +#define MAX77759_CHGR_REG_CHG_CNFG_00 0xb9 +#define MAX77759_CHGR_REG_CHG_CNFG_01 0xba +#define MAX77759_CHGR_REG_CHG_CNFG_02 0xbb +#define MAX77759_CHGR_REG_CHG_CNFG_03 0xbc +#define MAX77759_CHGR_REG_CHG_CNFG_04 0xbd +#define MAX77759_CHGR_REG_CHG_CNFG_05 0xbe +#define MAX77759_CHGR_REG_CHG_CNFG_06 0xbf +#define MAX77759_CHGR_REG_CHG_CNFG_07 0xc0 +#define MAX77759_CHGR_REG_CHG_CNFG_08 0xc1 +#define MAX77759_CHGR_REG_CHG_CNFG_09 0xc2 +#define MAX77759_CHGR_REG_CHG_CNFG_10 0xc3 +#define MAX77759_CHGR_REG_CHG_CNFG_11 0xc4 +#define MAX77759_CHGR_REG_CHG_CNFG_12 0xc5 +#define MAX77759_CHGR_REG_CHG_CNFG_13 0xc6 +#define MAX77759_CHGR_REG_CHG_CNFG_14 0xc7 +#define MAX77759_CHGR_REG_CHG_CNFG_15 0xc8 +#define MAX77759_CHGR_REG_CHG_CNFG_16 0xc9 +#define MAX77759_CHGR_REG_CHG_CNFG_17 0xca +#define MAX77759_CHGR_REG_CHG_CNFG_18 0xcb +#define MAX77759_CHGR_REG_CHG_CNFG_19 0xcc + +/* MaxQ opcodes for max77759_maxq_command() */ +#define MAX77759_MAXQ_OPCODE_MAXLENGTH (MAX77759_MAXQ_REG_AP_DATAOUT32 - \ + MAX77759_MAXQ_REG_AP_DATAOUT0 + \ + 1) + +#define MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_READ 0x21 +#define MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_WRITE 0x22 +#define MAX77759_MAXQ_OPCODE_GPIO_CONTROL_READ 0x23 +#define MAX77759_MAXQ_OPCODE_GPIO_CONTROL_WRITE 0x24 +#define MAX77759_MAXQ_OPCODE_USER_SPACE_READ 0x81 +#define MAX77759_MAXQ_OPCODE_USER_SPACE_WRITE 0x82 + +/** + * struct max77759 - core max77759 internal data structure + * + * @regmap_top: Regmap for accessing TOP registers + * @maxq_lock: Lock for serializing access to MaxQ + * @regmap_maxq: Regmap for accessing MaxQ registers + * @cmd_done: Used to signal completion of a MaxQ command + * @regmap_charger: Regmap for accessing charger registers + * + * The MAX77759 comprises several sub-blocks, namely TOP, MaxQ, Charger, + * Fuel Gauge, and TCPCI. + */ +struct max77759 { + struct regmap *regmap_top; + + /* This protects MaxQ commands - only one can be active */ + struct mutex maxq_lock; + struct regmap *regmap_maxq; + struct completion cmd_done; + + struct regmap *regmap_charger; +}; + +/** + * struct max77759_maxq_command - structure containing the MaxQ command to + * send + * + * @length: The number of bytes to send. + * @cmd: The data to send. + */ +struct max77759_maxq_command { + u8 length; + u8 cmd[] __counted_by(length); +}; + +/** + * struct max77759_maxq_response - structure containing the MaxQ response + * + * @length: The number of bytes to receive. + * @rsp: The data received. Must have at least @length bytes space. + */ +struct max77759_maxq_response { + u8 length; + u8 rsp[] __counted_by(length); +}; + +/** + * max77759_maxq_command() - issue a MaxQ command and wait for the response + * and associated data + * + * @max77759: The core max77759 device handle. + * @cmd: The command to be sent. + * @rsp: Any response data associated with the command will be copied here; + * can be %NULL if the command has no response (other than ACK). + * + * Return: 0 on success, a negative error number otherwise. + */ +int max77759_maxq_command(struct max77759 *max77759, + const struct max77759_maxq_command *cmd, + struct max77759_maxq_response *rsp); + +#endif /* __LINUX_MFD_MAX77759_H */ -- cgit v1.2.3 From ee971630f20fd421fffcdc4543731ebcb54ed6d0 Mon Sep 17 00:00:00 2001 From: Feng Yang Date: Tue, 6 May 2025 14:14:33 +0800 Subject: bpf: Allow some trace helpers for all prog types if it works under NMI and doesn't use any context-dependent things, should be fine for any program type. The detailed discussion is in [1]. [1] https://lore.kernel.org/all/CAEf4Bza6gK3dsrTosk6k3oZgtHesNDSrDd8sdeQ-GiS6oJixQg@mail.gmail.com/ Suggested-by: Andrii Nakryiko Signed-off-by: Feng Yang Signed-off-by: Andrii Nakryiko Acked-by: Tejun Heo Link: https://lore.kernel.org/bpf/20250506061434.94277-2-yangfeng59949@163.com --- include/linux/bpf-cgroup.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 9de7adb68294..4847dcade917 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -427,8 +427,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, const struct bpf_func_proto * cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); -const struct bpf_func_proto * -cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); #else static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } @@ -465,12 +463,6 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return NULL; } -static inline const struct bpf_func_proto * -cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) -{ - return NULL; -} - static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( -- cgit v1.2.3 From c24a65b6a27c78d8540409800886b6622ea86ebf Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Tue, 15 Apr 2025 21:15:49 -0500 Subject: iidc/ice/irdma: Update IDC to support multiple consumers In preparation of supporting more than a single core PCI driver for RDMA, move ice specific structs like qset_params, qos_info and qos_params from iidc_rdma.h to iidc_rdma_ice.h. Previously, the ice driver was just exporting its entire PF struct to the auxiliary driver, but since each core driver will have its own different PF struct, implement a universal struct that all core drivers can provide to the auxiliary driver through the probe call. Reviewed-by: Przemek Kitszel Signed-off-by: Dave Ertman Co-developed-by: Mustafa Ismail Signed-off-by: Mustafa Ismail Co-developed-by: Shiraz Saleem Signed-off-by: Shiraz Saleem Co-developed-by: Tatyana Nikolova Signed-off-by: Tatyana Nikolova Signed-off-by: Tony Nguyen --- include/linux/net/intel/iidc_rdma.h | 67 +++++++++----------------------- include/linux/net/intel/iidc_rdma_ice.h | 69 ++++++++++++++++++++++++++++----- 2 files changed, 78 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h index 7f1910289534..8baad1082042 100644 --- a/include/linux/net/intel/iidc_rdma.h +++ b/include/linux/net/intel/iidc_rdma.h @@ -5,7 +5,6 @@ #define _IIDC_RDMA_H_ #include -#include #include #include #include @@ -17,14 +16,19 @@ enum iidc_rdma_event_type { IIDC_RDMA_EVENT_AFTER_MTU_CHANGE, IIDC_RDMA_EVENT_BEFORE_TC_CHANGE, IIDC_RDMA_EVENT_AFTER_TC_CHANGE, + IIDC_RDMA_EVENT_WARN_RESET, IIDC_RDMA_EVENT_CRIT_ERR, IIDC_RDMA_EVENT_NBITS /* must be last */ }; +struct iidc_rdma_event { + DECLARE_BITMAP(type, IIDC_RDMA_EVENT_NBITS); + u32 reg; +}; + enum iidc_rdma_reset_type { - IIDC_PFR, - IIDC_CORER, - IIDC_GLOBR, + IIDC_FUNC_RESET, + IIDC_DEV_RESET, }; enum iidc_rdma_protocol { @@ -32,53 +36,22 @@ enum iidc_rdma_protocol { IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1), }; -#define IIDC_MAX_USER_PRIORITY 8 -#define IIDC_DSCP_PFC_MODE 0x1 - -/* Struct to hold per RDMA Qset info */ -struct iidc_rdma_qset_params { - /* Qset TEID returned to the RDMA driver in - * ice_add_rdma_qset and used by RDMA driver - * for calls to ice_del_rdma_qset - */ - u32 teid; /* Qset TEID */ - u16 qs_handle; /* RDMA driver provides this */ - u16 vport_id; /* VSI index */ - u8 tc; /* TC branch the Qset should belong to */ -}; - -struct iidc_rdma_qos_info { - u64 tc_ctx; - u8 rel_bw; - u8 prio_type; - u8 egress_virt_up; - u8 ingress_virt_up; -}; - -/* Struct to pass QoS info */ -struct iidc_rdma_qos_params { - struct iidc_rdma_qos_info tc_info[IEEE_8021QAZ_MAX_TCS]; - u8 up2tc[IIDC_MAX_USER_PRIORITY]; - u8 vport_relative_bw; - u8 vport_priority_type; - u8 num_tc; - u8 pfc_mode; - u8 dscp_map[DSCP_MAX]; -}; - -struct iidc_rdma_event { - DECLARE_BITMAP(type, IIDC_RDMA_EVENT_NBITS); - u32 reg; +/* Structure to be populated by core LAN PCI driver */ +struct iidc_rdma_core_dev_info { + struct pci_dev *pdev; /* PCI device of corresponding to main function */ + struct auxiliary_device *adev; + /* Current active RDMA protocol */ + enum iidc_rdma_protocol rdma_protocol; + void *iidc_priv; /* elements unique to each driver */ }; /* Structure representing auxiliary driver tailored information about the core * PCI dev, each auxiliary driver using the IIDC interface will have an * instance of this struct dedicated to it. */ - struct iidc_rdma_core_auxiliary_dev { struct auxiliary_device adev; - struct ice_pf *pf; + struct iidc_rdma_core_dev_info *cdev_info; }; /* structure representing the auxiliary driver. This struct is to be @@ -88,12 +61,8 @@ struct iidc_rdma_core_auxiliary_dev { */ struct iidc_rdma_core_auxiliary_drv { struct auxiliary_driver adrv; - /* This event_handler is meant to be a blocking call. For instance, - * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not - * return until the auxiliary driver is ready for the MTU change to - * happen. - */ - void (*event_handler)(struct ice_pf *pf, struct iidc_rdma_event *event); + void (*event_handler)(struct iidc_rdma_core_dev_info *cdev, + struct iidc_rdma_event *event); }; #endif /* _IIDC_RDMA_H_*/ diff --git a/include/linux/net/intel/iidc_rdma_ice.h b/include/linux/net/intel/iidc_rdma_ice.h index 78d10003d776..b40eed0e13fe 100644 --- a/include/linux/net/intel/iidc_rdma_ice.h +++ b/include/linux/net/intel/iidc_rdma_ice.h @@ -4,16 +4,67 @@ #ifndef _IIDC_RDMA_ICE_H_ #define _IIDC_RDMA_ICE_H_ -struct ice_pf; +#include -int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); -int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); -int ice_rdma_request_reset(struct ice_pf *pf, +#define IIDC_MAX_USER_PRIORITY 8 +#define IIDC_DSCP_PFC_MODE 0x1 + +/** + * struct iidc_rdma_qset_params - Struct to hold per RDMA Qset info + * @teid: TEID of the Qset node + * @qs_handle: SW index of the Qset, RDMA provides this + * @vport_id: VSI index + * @tc: Traffic Class branch the QSet should belong to + */ +struct iidc_rdma_qset_params { + /* Qset TEID returned to the RDMA driver in + * ice_add_rdma_qset and used by RDMA driver + * for calls to ice_del_rdma_qset + */ + u32 teid; + u16 qs_handle; + u16 vport_id; + u8 tc; +}; + +struct iidc_rdma_qos_info { + u64 tc_ctx; + u8 rel_bw; + u8 prio_type; + u8 egress_virt_up; + u8 ingress_virt_up; +}; + +/* Struct to pass QoS info */ +struct iidc_rdma_qos_params { + struct iidc_rdma_qos_info tc_info[IEEE_8021QAZ_MAX_TCS]; + u8 up2tc[IIDC_MAX_USER_PRIORITY]; + u8 vport_relative_bw; + u8 vport_priority_type; + u8 num_tc; + u8 pfc_mode; + u8 dscp_map[DSCP_MAX]; +}; + +struct iidc_rdma_priv_dev_info { + u8 pf_id; + u16 vport_id; + struct net_device *netdev; + struct iidc_rdma_qos_params qos_info; + u8 __iomem *hw_addr; +}; + +int ice_add_rdma_qset(struct iidc_rdma_core_dev_info *cdev, + struct iidc_rdma_qset_params *qset); +int ice_del_rdma_qset(struct iidc_rdma_core_dev_info *cdev, + struct iidc_rdma_qset_params *qset); +int ice_rdma_request_reset(struct iidc_rdma_core_dev_info *cdev, enum iidc_rdma_reset_type reset_type); -int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); -void ice_get_qos_params(struct ice_pf *pf, - struct iidc_rdma_qos_params *qos); -int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); -void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); +int ice_rdma_update_vsi_filter(struct iidc_rdma_core_dev_info *cdev, u16 vsi_id, + bool enable); +int ice_alloc_rdma_qvector(struct iidc_rdma_core_dev_info *cdev, + struct msix_entry *entry); +void ice_free_rdma_qvector(struct iidc_rdma_core_dev_info *cdev, + struct msix_entry *entry); #endif /* _IIDC_RDMA_ICE_H_*/ -- cgit v1.2.3 From 092a38565ed87cbda6fe7dd16c742df6f907483e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 5 May 2025 17:21:13 -0400 Subject: ring-buffer: Add ring_buffer_record_is_on_cpu() Add the function ring_buffer_record_is_on_cpu() that returns true if the ring buffer for a give CPU is writable and false otherwise. Also add tracer_tracing_is_on_cpu() to return if the ring buffer for a given CPU is writeable for a given trace_array. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250505212236.059853898@goodmis.org Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 56e27263acf8..cd7f0ae26615 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -192,6 +192,7 @@ void ring_buffer_record_off(struct trace_buffer *buffer); void ring_buffer_record_on(struct trace_buffer *buffer); bool ring_buffer_record_is_on(struct trace_buffer *buffer); bool ring_buffer_record_is_set_on(struct trace_buffer *buffer); +bool ring_buffer_record_is_on_cpu(struct trace_buffer *buffer, int cpu); void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu); void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu); -- cgit v1.2.3 From 823153334042746604fdb416ea358a90940c1d83 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 May 2025 17:35:37 +0200 Subject: bpf: Add support to retrieve ref_ctr_offset for uprobe perf link Adding support to retrieve ref_ctr_offset for uprobe perf link, which got somehow omitted from the initial uprobe link info changes. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Yafang Shao Link: https://lore.kernel.org/bpf/20250509153539.779599-2-jolsa@kernel.org --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 71d5ac83cf5d..16e95398c91c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6724,6 +6724,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from file_name */ __u64 cookie; + __u64 ref_ctr_offset; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ -- cgit v1.2.3 From f4818881c47fd91fcb6d62373c57c7844e3de1c0 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 21 Jun 2024 20:23:23 -0700 Subject: x86/its: Enable Indirect Target Selection mitigation Indirect Target Selection (ITS) is a bug in some pre-ADL Intel CPUs with eIBRS. It affects prediction of indirect branch and RETs in the lower half of cacheline. Due to ITS such branches may get wrongly predicted to a target of (direct or indirect) branch that is located in the upper half of the cacheline. Scope of impact =============== Guest/host isolation -------------------- When eIBRS is used for guest/host isolation, the indirect branches in the VMM may still be predicted with targets corresponding to branches in the guest. Intra-mode ---------- cBPF or other native gadgets can be used for intra-mode training and disclosure using ITS. User/kernel isolation --------------------- When eIBRS is enabled user/kernel isolation is not impacted. Indirect Branch Prediction Barrier (IBPB) ----------------------------------------- After an IBPB, indirect branches may be predicted with targets corresponding to direct branches which were executed prior to IBPB. This is mitigated by a microcode update. Add cmdline parameter indirect_target_selection=off|on|force to control the mitigation to relocate the affected branches to an ITS-safe thunk i.e. located in the upper half of cacheline. Also add the sysfs reporting. When retpoline mitigation is deployed, ITS safe-thunks are not needed, because retpoline sequence is already ITS-safe. Similarly, when call depth tracking (CDT) mitigation is deployed (retbleed=stuff), ITS safe return thunk is not used, as CDT prevents RSB-underflow. To not overcomplicate things, ITS mitigation is not supported with spectre-v2 lfence;jmp mitigation. Moreover, it is less practical to deploy lfence;jmp mitigation on ITS affected parts anyways. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e3049543008b..3aa955102b34 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -78,6 +78,8 @@ extern ssize_t cpu_show_gds(struct device *dev, extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_indirect_target_selection(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From d6d1e3e6580ca35071ad474381f053cbf1fb6414 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Apr 2025 16:30:11 +0200 Subject: mm/execmem: Unify early execmem_cache behaviour Early kernel memory is RWX, only at the end of early boot (before SMP) do we mark things ROX. Have execmem_cache mirror this behaviour for early users. This avoids having to remember what code is execmem and what is not -- we can poke everything with impunity ;-) Also performance for not having to do endless text_poke_mm switches. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre --- include/linux/execmem.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 65655a5d1be2..089c71cc8ddf 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -53,7 +53,7 @@ enum execmem_range_flags { EXECMEM_ROX_CACHE = (1 << 1), }; -#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) /** * execmem_fill_trapping_insns - set memory to contain instructions that * will trap @@ -93,9 +93,15 @@ int execmem_make_temp_rw(void *ptr, size_t size); * Return: 0 on success or negative error code on failure. */ int execmem_restore_rox(void *ptr, size_t size); + +/* + * Called from mark_readonly(), where the system transitions to ROX. + */ +void execmem_cache_make_ro(void); #else static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } +static inline void execmem_cache_make_ro(void) { } #endif /** -- cgit v1.2.3 From 872df34d7c51a79523820ea6a14860398c639b87 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Oct 2024 10:05:48 -0700 Subject: x86/its: Use dynamic thunks for indirect branches ITS mitigation moves the unsafe indirect branches to a safe thunk. This could degrade the prediction accuracy as the source address of indirect branches becomes same for different execution paths. To improve the predictions, and hence the performance, assign a separate thunk for each indirect callsite. This is also a defense-in-depth measure to avoid indirect branches aliasing with each other. As an example, 5000 dynamic thunks would utilize around 16 bits of the address space, thereby gaining entropy. For a BTB that uses 32 bits for indexing, dynamic thunks could provide better prediction accuracy over fixed thunks. Have ITS thunks be variable sized and use EXECMEM_MODULE_TEXT such that they are both more flexible (got to extend them later) and live in 2M TLBs, just like kernel code, avoiding undue TLB pressure. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre --- include/linux/execmem.h | 3 +++ include/linux/module.h | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 089c71cc8ddf..ca42d5e46ccc 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -4,6 +4,7 @@ #include #include +#include #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) @@ -176,6 +177,8 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T)); + #ifdef CONFIG_MMU /** * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory diff --git a/include/linux/module.h b/include/linux/module.h index b3329110d668..8050f77c3b64 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -586,6 +586,11 @@ struct module { atomic_t refcnt; #endif +#ifdef CONFIG_MITIGATION_ITS + int its_num_pages; + void **its_page_array; +#endif + #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; -- cgit v1.2.3 From 1b2900db0119c02e6445bb61ec3fba982d10cc8d Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 8 May 2025 13:30:34 +0300 Subject: ethtool: Block setting of symmetric RSS when non-symmetric rx-flow-hash is requested Symmetric RSS hash requires that: * No other fields besides IP src/dst and/or L4 src/dst are set * If src is set, dst must also be set This restriction was only enforced when RXNFC was configured after symmetric hash was enabled. In the opposite order of operations (RXNFC then symmetric enablement) the check was not performed. Perform the sanity check on set_rxfh as well, by iterating over all flow types hash fields and making sure they are all symmetric. Introduce a function that returns whether a flow type is hashable (not spec only) and needs to be iterated over. To make sure that no one forgets to update the list of hashable flow types when adding new flow types, a static assert is added to draw the developer's attention. The conversion of uapi #defines to enum is not ideal, but as Jakub mentioned [1], we have precedent for that. [1] https://lore.kernel.org/netdev/20250324073509.6571ade3@kernel.org/ Reviewed-by: Tariq Toukan Signed-off-by: Gal Pressman Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250508103034.885536-1-gal@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 134 ++++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 84833cca29fe..707c1844010c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2295,71 +2295,75 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define RXH_XFRM_SYM_OR_XOR (1 << 1) #define RXH_XFRM_NO_CHANGE 0xff -/* L2-L4 network traffic flow types */ -#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ -#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ -#define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ -#define AH_ESP_V4_FLOW 0x04 /* hash only */ -#define TCP_V6_FLOW 0x05 /* hash or spec (tcp_ip6_spec; nfc only) */ -#define UDP_V6_FLOW 0x06 /* hash or spec (udp_ip6_spec; nfc only) */ -#define SCTP_V6_FLOW 0x07 /* hash or spec (sctp_ip6_spec; nfc only) */ -#define AH_ESP_V6_FLOW 0x08 /* hash only */ -#define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */ -#define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */ -#define AH_V6_FLOW 0x0b /* hash or spec (ah_ip6_spec; nfc only) */ -#define ESP_V6_FLOW 0x0c /* hash or spec (esp_ip6_spec; nfc only) */ -#define IPV4_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ -#define IP_USER_FLOW IPV4_USER_FLOW -#define IPV6_USER_FLOW 0x0e /* spec only (usr_ip6_spec; nfc only) */ -#define IPV4_FLOW 0x10 /* hash only */ -#define IPV6_FLOW 0x11 /* hash only */ -#define ETHER_FLOW 0x12 /* spec only (ether_spec) */ - -/* Used for GTP-U IPv4 and IPv6. - * The format of GTP packets only includes - * elements such as TEID and GTP version. - * It is primarily intended for data communication of the UE. - */ -#define GTPU_V4_FLOW 0x13 /* hash only */ -#define GTPU_V6_FLOW 0x14 /* hash only */ - -/* Use for GTP-C IPv4 and v6. - * The format of these GTP packets does not include TEID. - * Primarily expected to be used for communication - * to create sessions for UE data communication, - * commonly referred to as CSR (Create Session Request). - */ -#define GTPC_V4_FLOW 0x15 /* hash only */ -#define GTPC_V6_FLOW 0x16 /* hash only */ - -/* Use for GTP-C IPv4 and v6. - * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. - * After session creation, it becomes this packet. - * This is mainly used for requests to realize UE handover. - */ -#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ -#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ - -/* Use for GTP-U and extended headers for the PSC (PDU Session Container). - * The format of these GTP packets includes TEID and QFI. - * In 5G communication using UPF (User Plane Function), - * data communication with this extended header is performed. - */ -#define GTPU_EH_V4_FLOW 0x19 /* hash only */ -#define GTPU_EH_V6_FLOW 0x1a /* hash only */ - -/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. - * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by - * UL/DL included in the PSC. - * There are differences in the data included based on Downlink/Uplink, - * and can be used to distinguish packets. - * The functions described so far are useful when you want to - * handle communication from the mobile network in UPF, PGW, etc. - */ -#define GTPU_UL_V4_FLOW 0x1b /* hash only */ -#define GTPU_UL_V6_FLOW 0x1c /* hash only */ -#define GTPU_DL_V4_FLOW 0x1d /* hash only */ -#define GTPU_DL_V6_FLOW 0x1e /* hash only */ +enum { + /* L2-L4 network traffic flow types */ + TCP_V4_FLOW = 0x01, /* hash or spec (tcp_ip4_spec) */ + UDP_V4_FLOW = 0x02, /* hash or spec (udp_ip4_spec) */ + SCTP_V4_FLOW = 0x03, /* hash or spec (sctp_ip4_spec) */ + AH_ESP_V4_FLOW = 0x04, /* hash only */ + TCP_V6_FLOW = 0x05, /* hash or spec (tcp_ip6_spec; nfc only) */ + UDP_V6_FLOW = 0x06, /* hash or spec (udp_ip6_spec; nfc only) */ + SCTP_V6_FLOW = 0x07, /* hash or spec (sctp_ip6_spec; nfc only) */ + AH_ESP_V6_FLOW = 0x08, /* hash only */ + AH_V4_FLOW = 0x09, /* hash or spec (ah_ip4_spec) */ + ESP_V4_FLOW = 0x0a, /* hash or spec (esp_ip4_spec) */ + AH_V6_FLOW = 0x0b, /* hash or spec (ah_ip6_spec; nfc only) */ + ESP_V6_FLOW = 0x0c, /* hash or spec (esp_ip6_spec; nfc only) */ + IPV4_USER_FLOW = 0x0d, /* spec only (usr_ip4_spec) */ + IP_USER_FLOW = IPV4_USER_FLOW, + IPV6_USER_FLOW = 0x0e, /* spec only (usr_ip6_spec; nfc only) */ + IPV4_FLOW = 0x10, /* hash only */ + IPV6_FLOW = 0x11, /* hash only */ + ETHER_FLOW = 0x12, /* spec only (ether_spec) */ + + /* Used for GTP-U IPv4 and IPv6. + * The format of GTP packets only includes + * elements such as TEID and GTP version. + * It is primarily intended for data communication of the UE. + */ + GTPU_V4_FLOW = 0x13, /* hash only */ + GTPU_V6_FLOW = 0x14, /* hash only */ + + /* Use for GTP-C IPv4 and v6. + * The format of these GTP packets does not include TEID. + * Primarily expected to be used for communication + * to create sessions for UE data communication, + * commonly referred to as CSR (Create Session Request). + */ + GTPC_V4_FLOW = 0x15, /* hash only */ + GTPC_V6_FLOW = 0x16, /* hash only */ + + /* Use for GTP-C IPv4 and v6. + * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. + * After session creation, it becomes this packet. + * This is mainly used for requests to realize UE handover. + */ + GTPC_TEID_V4_FLOW = 0x17, /* hash only */ + GTPC_TEID_V6_FLOW = 0x18, /* hash only */ + + /* Use for GTP-U and extended headers for the PSC (PDU Session Container). + * The format of these GTP packets includes TEID and QFI. + * In 5G communication using UPF (User Plane Function), + * data communication with this extended header is performed. + */ + GTPU_EH_V4_FLOW = 0x19, /* hash only */ + GTPU_EH_V6_FLOW = 0x1a, /* hash only */ + + /* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. + * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by + * UL/DL included in the PSC. + * There are differences in the data included based on Downlink/Uplink, + * and can be used to distinguish packets. + * The functions described so far are useful when you want to + * handle communication from the mobile network in UPF, PGW, etc. + */ + GTPU_UL_V4_FLOW = 0x1b, /* hash only */ + GTPU_UL_V6_FLOW = 0x1c, /* hash only */ + GTPU_DL_V4_FLOW = 0x1d, /* hash only */ + GTPU_DL_V6_FLOW = 0x1e, /* hash only */ + + __FLOW_TYPE_COUNT, +}; /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 -- cgit v1.2.3 From 6c14058edfd01cdc0d3018b9069643b0da7c3e80 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 8 May 2025 12:52:36 +0300 Subject: net: dsa: convert to ndo_hwtstamp_get() and ndo_hwtstamp_set() New timestamping API was introduced in commit 66f7223039c0 ("net: add NDOs for configuring hardware timestamping") from kernel v6.6. It is time to convert DSA to the new API, so that the ndo_eth_ioctl() path can be removed completely. Move the ds->ops->port_hwtstamp_get() and ds->ops->port_hwtstamp_set() calls from dsa_user_ioctl() to dsa_user_hwtstamp_get() and dsa_user_hwtstamp_set(). Due to the fact that the underlying ifreq type changes to kernel_hwtstamp_config, the drivers and the Ocelot switchdev front-end, all hooked up directly or indirectly, must also be converted all at once. The conversion also updates the comment from dsa_port_supports_hwtstamp(), which is no longer true because kernel_hwtstamp_config is kernel memory and does not need copy_to_user(). I've deliberated whether it is necessary to also update "err != -EOPNOTSUPP" to a more general "!err", but all drivers now either return 0 or -EOPNOTSUPP. The existing logic from the ocelot_ioctl() function, to avoid configuring timestamping if the PHY supports the operation, is obsoleted by more advanced core logic in dev_set_hwtstamp_phylib(). This is only a partial preparation for proper PHY timestamping support. None of these switch driver currently sets up PTP traps for PHY timestamping, so setting dev->see_all_hwtstamp_requests is not yet necessary and the conversion is relatively trivial. Signed-off-by: Vladimir Oltean Tested-by: Vladimir Oltean # felix, sja1105, mv88e6xxx Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250508095236.887789-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 5 +++-- include/soc/mscc/ocelot.h | 7 +++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index a0a9481c52c2..55e2d97f247e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1131,9 +1131,10 @@ struct dsa_switch_ops { * PTP functionality */ int (*port_hwtstamp_get)(struct dsa_switch *ds, int port, - struct ifreq *ifr); + struct kernel_hwtstamp_config *config); int (*port_hwtstamp_set)(struct dsa_switch *ds, int port, - struct ifreq *ifr); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void (*port_txtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb); bool (*port_rxtstamp)(struct dsa_switch *ds, int port, diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6db7fc9dbaa4..48d6deb3efd7 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -1073,8 +1073,11 @@ int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid, int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged); int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid); -int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr); -int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr); +void ocelot_hwstamp_get(struct ocelot *ocelot, int port, + struct kernel_hwtstamp_config *cfg); +int ocelot_hwstamp_set(struct ocelot *ocelot, int port, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack); int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, struct sk_buff *skb, struct sk_buff **clone); -- cgit v1.2.3 From 20fb4ac9cda06527cf60c5ec7dda7c463c9c81be Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Wed, 7 May 2025 15:19:20 -0500 Subject: dt-bindings: clock: sun50i-h616-ccu: Add LVDS reset Add the required LVDS reset binding for the LCD TCON. Signed-off-by: Chris Morgan Signed-off-by: Ryan Walklin Reviewed-by: Andre Przywara Link: https://patch.msgid.link/20250507201943.330111-2-macroalpha82@gmail.com Signed-off-by: Chen-Yu Tsai --- include/dt-bindings/reset/sun50i-h616-ccu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/reset/sun50i-h616-ccu.h b/include/dt-bindings/reset/sun50i-h616-ccu.h index 81b1eba2a7f7..ba626f7015b5 100644 --- a/include/dt-bindings/reset/sun50i-h616-ccu.h +++ b/include/dt-bindings/reset/sun50i-h616-ccu.h @@ -69,5 +69,6 @@ #define RST_BUS_GPADC 60 #define RST_BUS_TCON_LCD0 61 #define RST_BUS_TCON_LCD1 62 +#define RST_BUS_LVDS 63 #endif /* _DT_BINDINGS_RESET_SUN50I_H616_H_ */ -- cgit v1.2.3 From f2987c5816bda01a8ffdd4eb5dfaaa2b41b67039 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 6 May 2025 20:48:59 +0800 Subject: block: export API to get the number of bdev inflight IO - rename part_in_{flight, flight_rw} to bdev_count_{inflight, inflight_rw} - export bdev_count_inflight, to fix a problem in mdraid that foreground IO can be starved by background sync IO in later patches Link: https://lore.kernel.org/linux-raid/20250506124903.2540268-6-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke --- include/linux/part_stat.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index c5e9cac0575e..eeeff2a04529 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -79,4 +79,6 @@ static inline void part_stat_set_all(struct block_device *part, int value) #define part_stat_local_read_cpu(part, field, cpu) \ local_read(&(part_stat_get_cpu(part, field, cpu))) +unsigned int bdev_count_inflight(struct block_device *part); + #endif /* _LINUX_PART_STAT_H */ -- cgit v1.2.3 From 752d0464b78a5b28682256ed7a057106119e1d1a Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 6 May 2025 20:49:03 +0800 Subject: md: clean up accounting for issued sync IO It's no longer used and can be removed, also remove the field 'gendisk->sync_io'. Link: https://lore.kernel.org/linux-raid/20250506124903.2540268-10-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 52b7a27c46e8..ad75dee3b213 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -182,7 +182,6 @@ struct gendisk { struct list_head slave_bdevs; #endif struct timer_rand_state *random; - atomic_t sync_io; /* RAID */ struct disk_events *ev; #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From b887afb9b2362b15c1ee5585df1fb8cf3a3384c6 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 24 Mar 2025 09:41:02 +0100 Subject: dt-bindings: clock: add SM6350 QCOM video clock bindings Add device tree bindings for video clock controller for SM6350 SoCs. Signed-off-by: Konrad Dybcio Co-developed-by: Luca Weiss Signed-off-by: Luca Weiss Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250324-sm6350-videocc-v2-2-cc22386433f4@fairphone.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sm6350-videocc.h | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sm6350-videocc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sm6350-videocc.h b/include/dt-bindings/clock/qcom,sm6350-videocc.h new file mode 100644 index 000000000000..2af7f91fa023 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm6350-videocc.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, Konrad Dybcio + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_VIDEO_CC_SM6350_H +#define _DT_BINDINGS_CLK_QCOM_VIDEO_CC_SM6350_H + +/* VIDEO_CC clocks */ +#define VIDEO_PLL0 0 +#define VIDEO_PLL0_OUT_EVEN 1 +#define VIDEO_CC_IRIS_AHB_CLK 2 +#define VIDEO_CC_IRIS_CLK_SRC 3 +#define VIDEO_CC_MVS0_AXI_CLK 4 +#define VIDEO_CC_MVS0_CORE_CLK 5 +#define VIDEO_CC_MVSC_CORE_CLK 6 +#define VIDEO_CC_MVSC_CTL_AXI_CLK 7 +#define VIDEO_CC_SLEEP_CLK 8 +#define VIDEO_CC_SLEEP_CLK_SRC 9 +#define VIDEO_CC_VENUS_AHB_CLK 10 + +/* GDSCs */ +#define MVSC_GDSC 0 +#define MVS0_GDSC 1 + +#endif -- cgit v1.2.3 From 094ac8cff7858bee5fa4554f6ea66c964f8e160e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 10 May 2025 10:45:28 +0200 Subject: futex: Relax the rcu_assign_pointer() assignment of mm->futex_phash in futex_mm_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following commit added an rcu_assign_pointer() assignment to futex_mm_init() in : bd54df5ea7ca ("futex: Allow to resize the private local hash") Which breaks the build on older compilers (gcc-9, x86-64 defconfig): CC io_uring/futex.o In file included from ./arch/x86/include/generated/asm/rwonce.h:1, from ./include/linux/compiler.h:390, from ./include/linux/array_size.h:5, from ./include/linux/kernel.h:16, from io_uring/futex.c:2: ./include/linux/futex.h: In function 'futex_mm_init': ./include/linux/rcupdate.h:555:36: error: dereferencing pointer to incomplete type 'struct futex_private_hash' The problem is that this variant of rcu_assign_pointer() wants to know the full type of 'struct futex_private_hash', which type is local to futex.c: kernel/futex/core.c:struct futex_private_hash { There are a couple of mechanical solutions for this bug: - we can uninline futex_mm_init() and move it into futex/core.c - or we can share the structure definition with kernel/fork.c. But both of these solutions have disadvantages: the first one adds runtime overhead, while the second one dis-encapsulates private futex types. A third solution, implemented by this patch, is to just initialize mm->futex_phash with NULL like the patch below, it's not like this new MM's ->futex_phash can be observed externally until the task is inserted into the task list, which guarantees full store ordering. The relaxation of this initialization might also give a tiny speedup on certain platforms. Fixes: bd54df5ea7ca ("futex: Allow to resize the private local hash") Signed-off-by: Ingo Molnar Cc: André Almeida Cc: Darren Hart Cc: Davidlohr Bueso Cc: Juri Lelli Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Valentin Schneider Cc: Waiman Long Link: https://lore.kernel.org/r/aB8SI00EHBri23lB@gmail.com --- include/linux/futex.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index eccc99751bd9..168ffd5996b4 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -88,7 +88,14 @@ void futex_hash_free(struct mm_struct *mm); static inline void futex_mm_init(struct mm_struct *mm) { - rcu_assign_pointer(mm->futex_phash, NULL); + /* + * No need for rcu_assign_pointer() here, as we can rely on + * tasklist_lock write-ordering in copy_process(), before + * the task's MM becomes visible and the ->futex_phash + * becomes externally observable: + */ + mm->futex_phash = NULL; + mutex_init(&mm->futex_hash_lock); } -- cgit v1.2.3 From 1f93d877f09d987f08baedd50597aaaa72a37be4 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 8 May 2025 21:12:07 +0300 Subject: ALSA/hda: intel-sdw-acpi: Correct sdw_intel_acpi_scan() function parameter The acpi_handle should be just a handle and not a pointer in sdw_intel_acpi_scan() parameter list. It is called with 'acpi_handle handle' as parameter and it is passing it to acpi_walk_namespace, which also expects acpi_handle and not acpi_handle* Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Link: https://patch.msgid.link/20250508181207.22113-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- include/linux/soundwire/sdw_intel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 493d9de4e472..dc6ebaee3d18 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -365,7 +365,7 @@ struct sdw_intel_res { * on e.g. which machine driver to select (I2S mode, HDaudio or * SoundWire). */ -int sdw_intel_acpi_scan(acpi_handle *parent_handle, +int sdw_intel_acpi_scan(acpi_handle parent_handle, struct sdw_intel_acpi_info *info); void sdw_intel_process_wakeen_event(struct sdw_intel_ctx *ctx); -- cgit v1.2.3 From f67c90d60f68cc02942895f0c594c5613ef43f64 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 9 May 2025 00:36:15 +0000 Subject: ALSA: isa/gus: remove snd_gf1_lfo_xxx() There is no snd_gf1_lfo_xxx() implementation, and no one is using it. Let's remove it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87o6w2pqm8.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Takashi Iwai --- include/sound/gus.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/sound/gus.h b/include/sound/gus.h index bdd60ffe15c2..1c8fb6c93e50 100644 --- a/include/sound/gus.h +++ b/include/sound/gus.h @@ -513,22 +513,6 @@ struct _SND_IW_LFO_PROGRAM { unsigned short depth; }; -#if 0 -extern irqreturn_t snd_gf1_lfo_effect_interrupt(struct snd_gus_card * gus, snd_gf1_voice_t * voice); -#endif -extern void snd_gf1_lfo_init(struct snd_gus_card * gus); -extern void snd_gf1_lfo_done(struct snd_gus_card * gus); -extern void snd_gf1_lfo_program(struct snd_gus_card * gus, int voice, int lfo_type, struct _SND_IW_LFO_PROGRAM *program); -extern void snd_gf1_lfo_enable(struct snd_gus_card * gus, int voice, int lfo_type); -extern void snd_gf1_lfo_disable(struct snd_gus_card * gus, int voice, int lfo_type); -extern void snd_gf1_lfo_change_freq(struct snd_gus_card * gus, int voice, int lfo_type, int freq); -extern void snd_gf1_lfo_change_depth(struct snd_gus_card * gus, int voice, int lfo_type, int depth); -extern void snd_gf1_lfo_setup(struct snd_gus_card * gus, int voice, int lfo_type, int freq, int current_depth, int depth, int sweep, int shape); -extern void snd_gf1_lfo_shutdown(struct snd_gus_card * gus, int voice, int lfo_type); -#if 0 -extern void snd_gf1_lfo_command(struct snd_gus_card * gus, int voice, unsigned char *command); -#endif - /* gus_mem.c */ void snd_gf1_mem_lock(struct snd_gf1_mem * alloc, int xup); -- cgit v1.2.3 From f95e4b6367f7ce062078dfb4769d283c56703872 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 9 May 2025 00:36:31 +0000 Subject: ALSA: wavefront: remove snd_wavefront_xxx() There is no snd_wavefront_xxx() implementation, and no one is using it. Let's remove it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87msbmpqls.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Takashi Iwai --- include/sound/snd_wavefront.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/sound/snd_wavefront.h b/include/sound/snd_wavefront.h index 27f7e8a477c2..30f508a56766 100644 --- a/include/sound/snd_wavefront.h +++ b/include/sound/snd_wavefront.h @@ -110,12 +110,8 @@ struct _snd_wavefront_card { }; extern void snd_wavefront_internal_interrupt (snd_wavefront_card_t *card); -extern int snd_wavefront_detect_irq (snd_wavefront_t *dev) ; -extern int snd_wavefront_check_irq (snd_wavefront_t *dev, int irq); -extern int snd_wavefront_restart (snd_wavefront_t *dev); extern int snd_wavefront_start (snd_wavefront_t *dev); extern int snd_wavefront_detect (snd_wavefront_card_t *card); -extern int snd_wavefront_config_midi (snd_wavefront_t *dev) ; extern int snd_wavefront_cmd (snd_wavefront_t *, int, unsigned char *, unsigned char *); -- cgit v1.2.3 From dd33993a9721ab1dae38bd37c9f665987d554239 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 11 May 2025 16:11:45 +0200 Subject: ALSA: ump: Fix a typo of snd_ump_stream_msg_device_info s/devince/device/ It's used only internally, so no any behavior changes. Fixes: 37e0e14128e0 ("ALSA: ump: Support UMP Endpoint and Function Block parsing") Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20250511141147.10246-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/ump_msg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/ump_msg.h b/include/sound/ump_msg.h index 72f60ddfea75..9556b4755a1e 100644 --- a/include/sound/ump_msg.h +++ b/include/sound/ump_msg.h @@ -604,7 +604,7 @@ struct snd_ump_stream_msg_ep_info { } __packed; /* UMP Stream Message: Device Info Notification (128bit) */ -struct snd_ump_stream_msg_devince_info { +struct snd_ump_stream_msg_device_info { #ifdef __BIG_ENDIAN_BITFIELD /* 0 */ u32 type:4; @@ -754,7 +754,7 @@ struct snd_ump_stream_msg_fb_name { union snd_ump_stream_msg { struct snd_ump_stream_msg_ep_discovery ep_discovery; struct snd_ump_stream_msg_ep_info ep_info; - struct snd_ump_stream_msg_devince_info device_info; + struct snd_ump_stream_msg_device_info device_info; struct snd_ump_stream_msg_stream_cfg stream_cfg; struct snd_ump_stream_msg_fb_discovery fb_discovery; struct snd_ump_stream_msg_fb_info fb_info; -- cgit v1.2.3 From 85c5d9b9464b17eb2f5a00edd145f45a12ae31ba Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Thu, 8 May 2025 19:16:35 +0530 Subject: dt-bindings: arm: qcom,ids: add SoC ID for SM8750 Add the unique ID for Qualcomm SM8750 SoC. This value is used to differentiate the SoC across qcom targets. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Mukesh Ojha Reviewed-by: Melody Olvera Link: https://lore.kernel.org/r/20250508134635.1627031-2-mukesh.ojha@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index 1b3e0176dcb7..897b8135dc12 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -277,6 +277,7 @@ #define QCOM_ID_IPQ5302 595 #define QCOM_ID_QCS8550 603 #define QCOM_ID_QCM8550 604 +#define QCOM_ID_SM8750 618 #define QCOM_ID_IPQ5300 624 #define QCOM_ID_IPQ5321 650 #define QCOM_ID_IPQ5424 651 -- cgit v1.2.3 From 18c64378ad85ef00e70f196793ee8901a8aa2fa1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Apr 2025 10:22:14 -0400 Subject: sunrpc: add info about xprt queue times to svc_xprt_dequeue tracepoint I've been looking at a problem where we see increased RPC timeouts in clients when the nfs_layout_flexfiles dataserver_timeo value is tuned very low (6s). This is necessary to ensure quick failover to a different mirror if a server goes down, but it causes a lot more major RPC timeouts. Ultimately, the problem is server-side however. It's sometimes doesn't respond to connection attempts. My theory is that the interrupt handler runs when a connection comes in, the xprt ends up being enqueued, but it takes a significant amount of time for the nfsd thread to pick it up. Currently, the svc_xprt_dequeue tracepoint displays "wakeup-us". This is the time between the wake_up() call, and the thread dequeueing the xprt. If no thread was woken, or the thread ended up picking up a different xprt than intended, then this value won't tell us how long the xprt was waiting. Add a new xpt_qtime field to struct svc_xprt and set it in svc_xprt_enqueue(). When the dequeue tracepoint fires, also store the time that the xprt sat on the queue in total. Display it as "qtime-us". Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 1 + include/trace/events/sunrpc.h | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 72be60952579..369a89aea186 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -53,6 +53,7 @@ struct svc_xprt { struct svc_xprt_class *xpt_class; const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; + ktime_t xpt_qtime; struct list_head xpt_list; struct lwq_node xpt_ready; unsigned long xpt_flags; diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5d331383047b..67db3f2953d5 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -2040,19 +2040,20 @@ TRACE_EVENT(svc_xprt_dequeue, TP_STRUCT__entry( SVC_XPRT_ENDPOINT_FIELDS(rqst->rq_xprt) - __field(unsigned long, wakeup) + __field(unsigned long, qtime) ), TP_fast_assign( - SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqst->rq_xprt); + ktime_t ktime = ktime_get(); - __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), - rqst->rq_qtime)); + SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqst->rq_xprt); + __entry->wakeup = ktime_to_us(ktime_sub(ktime, rqst->rq_qtime)); + __entry->qtime = ktime_to_us(ktime_sub(ktime, rqst->rq_xprt->xpt_qtime)); ), - TP_printk(SVC_XPRT_ENDPOINT_FORMAT " wakeup-us=%lu", - SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup) + TP_printk(SVC_XPRT_ENDPOINT_FORMAT " wakeup-us=%lu qtime-us=%lu", + SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup, __entry->qtime) ); DECLARE_EVENT_CLASS(svc_xprt_event, -- cgit v1.2.3 From adbdd746e8ba10088ad3efebee1ec278106a1a38 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:21 -0400 Subject: nfsd: add a tracepoint for nfsd_setattr Turn Sargun's internal kprobe based implementation of this into a normal static tracepoint. Also, remove the dprintk's that got added recently with the fix for zero-length ACLs. Cc: Sargun Dillon Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/trace/misc/fs.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h index 738b97f22f36..0406ebe2a80a 100644 --- a/include/trace/misc/fs.h +++ b/include/trace/misc/fs.h @@ -120,3 +120,24 @@ { LOOKUP_BENEATH, "BENEATH" }, \ { LOOKUP_IN_ROOT, "IN_ROOT" }, \ { LOOKUP_CACHED, "CACHED" }) + +#define show_ia_valid_flags(flags) \ + __print_flags(flags, "|", \ + { ATTR_MODE, "MODE" }, \ + { ATTR_UID, "UID" }, \ + { ATTR_GID, "GID" }, \ + { ATTR_SIZE, "SIZE" }, \ + { ATTR_ATIME, "ATIME" }, \ + { ATTR_MTIME, "MTIME" }, \ + { ATTR_CTIME, "CTIME" }, \ + { ATTR_ATIME_SET, "ATIME_SET" }, \ + { ATTR_MTIME_SET, "MTIME_SET" }, \ + { ATTR_FORCE, "FORCE" }, \ + { ATTR_KILL_SUID, "KILL_SUID" }, \ + { ATTR_KILL_SGID, "KILL_SGID" }, \ + { ATTR_FILE, "FILE" }, \ + { ATTR_KILL_PRIV, "KILL_PRIV" }, \ + { ATTR_OPEN, "OPEN" }, \ + { ATTR_TIMES_SET, "TIMES_SET" }, \ + { ATTR_TOUCH, "TOUCH"}, \ + { ATTR_DELEG, "DELEG"}) -- cgit v1.2.3 From 0ae0227fa31dda5bfc6b5a0145952d46fe57408b Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Tue, 6 May 2025 03:30:34 +0800 Subject: mm/codetag: move tag retrieval back upfront in __free_pages() Commit 51ff4d7486f0 ("mm: avoid extra mem_alloc_profiling_enabled() checks") introduces a possible use-after-free scenario, when page is non-compound, page[0] could be released by other thread right after put_page_testzero failed in current thread, pgalloc_tag_sub_pages afterwards would manipulate an invalid page for accounting remaining pages: [timeline] [thread1] [thread2] | alloc_page non-compound V | get_page, rf counter inc V | in ___free_pages | put_page_testzero fails V | put_page, page released V | in ___free_pages, | pgalloc_tag_sub_pages | manipulate an invalid page V Restore __free_pages() to its state before, retrieve alloc tag beforehand. Link: https://lkml.kernel.org/r/20250505193034.91682-1-00107082@163.com Fixes: 51ff4d7486f0 ("mm: avoid extra mem_alloc_profiling_enabled() checks") Signed-off-by: David Wang <00107082@163.com> Acked-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Cc: Brendan Jackman Cc: Johannes Weiner Cc: Michal Hocko Cc: Shakeel Butt Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index c74077977830..8a7f4f802c57 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -188,6 +188,13 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page) return tag; } +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) +{ + if (mem_alloc_profiling_enabled()) + return __pgalloc_tag_get(page); + return NULL; +} + void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); void pgalloc_tag_swap(struct folio *new, struct folio *old); @@ -199,6 +206,7 @@ static inline void clear_page_tag_ref(struct page *page) {} static inline void alloc_tag_sec_init(void) {} static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } #endif /* CONFIG_MEM_ALLOC_PROFILING */ -- cgit v1.2.3 From cb5b13cd6c9237fe5ac978b22453eb3fa098a8d6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 19:16:56 +0100 Subject: mm: introduce a common definition of mk_pte() Most architectures simply call pfn_pte(). Centralise that as the normal definition and remove the definition of mk_pte() from the architectures which have either that exact definition or something similar. Link: https://lkml.kernel.org/r/20250402181709.2386022-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Geert Uytterhoeven # m68k Acked-by: David Hildenbrand Reviewed-by: Alexander Gordeev # s390 Cc: Zi Yan Cc: Andreas Larsson Cc: Anton Ivanov Cc: Dave Hansen Cc: "David S. Miller" Cc: Johannes Berg Cc: Muchun Song Cc: Richard Weinberger Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index bf55206935c4..aa944eaad0ec 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2004,6 +2004,15 @@ static inline struct folio *pfn_folio(unsigned long pfn) return page_folio(pfn_to_page(pfn)); } +#ifndef mk_pte +#ifdef CONFIG_MMU +static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) +{ + return pfn_pte(page_to_pfn(page), pgprot); +} +#endif +#endif + static inline bool folio_has_pincount(const struct folio *folio) { if (IS_ENABLED(CONFIG_64BIT)) -- cgit v1.2.3 From 4ec492a628d897806bb6dc13b1c257c4e06eb1cf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 19:17:00 +0100 Subject: mm: make mk_pte() definition unconditional All architectures now use the common mk_pte() definition, so we can remove the condition. Link: https://lkml.kernel.org/r/20250402181709.2386022-7-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Zi Yan Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Anton Ivanov Cc: Dave Hansen Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Johannes Berg Cc: Muchun Song Cc: Richard Weinberger Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index aa944eaad0ec..3a55903d68e2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2004,14 +2004,12 @@ static inline struct folio *pfn_folio(unsigned long pfn) return page_folio(pfn_to_page(pfn)); } -#ifndef mk_pte #ifdef CONFIG_MMU static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) { return pfn_pte(page_to_pfn(page), pgprot); } #endif -#endif static inline bool folio_has_pincount(const struct folio *folio) { -- cgit v1.2.3 From deb8d4d28e4d05c4ecfc6e242c0a53d49e119224 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 19:17:01 +0100 Subject: mm: add folio_mk_pte() Remove a cast from folio to page in four callers of mk_pte(). Link: https://lkml.kernel.org/r/20250402181709.2386022-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Zi Yan Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Anton Ivanov Cc: Dave Hansen Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Johannes Berg Cc: Muchun Song Cc: Richard Weinberger Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3a55903d68e2..cbad8c663c4d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2009,6 +2009,21 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) { return pfn_pte(page_to_pfn(page), pgprot); } + +/** + * folio_mk_pte - Create a PTE for this folio + * @folio: The folio to create a PTE for + * @pgprot: The page protection bits to use + * + * Create a page table entry for the first page of this folio. + * This is suitable for passing to set_ptes(). + * + * Return: A page table entry suitable for mapping this folio. + */ +static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot) +{ + return pfn_pte(folio_pfn(folio), pgprot); +} #endif static inline bool folio_has_pincount(const struct folio *folio) -- cgit v1.2.3 From 7b7aa8a4adb62e3c3312d1c6086891014addc567 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 19:17:03 +0100 Subject: mm: remove mk_huge_pte() The only remaining user of mk_huge_pte() is the debug code, so remove the API and replace its use with pfn_pte() which lets us remove the conversion to a page first. We should always call arch_make_huge_pte() to turn this PTE into a huge PTE before operating on it with huge_pte_mkdirty() etc. Link: https://lkml.kernel.org/r/20250402181709.2386022-10-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Zi Yan Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Anton Ivanov Cc: Dave Hansen Cc: David Hildenbrand Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Johannes Berg Cc: Muchun Song Cc: Richard Weinberger Cc: Signed-off-by: Andrew Morton --- include/asm-generic/hugetlb.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 2afc95bf1655..3e0a8fe9b108 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -5,11 +5,6 @@ #include #include -static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) -{ - return mk_pte(page, pgprot); -} - static inline unsigned long huge_pte_write(pte_t pte) { return pte_write(pte); -- cgit v1.2.3 From e3981db444a0a18d350d9f92e3f2e8d489b54211 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 19:17:04 +0100 Subject: mm: add folio_mk_pmd() Removes five conversions from folio to page. Also removes both callers of mk_pmd() that aren't part of mk_huge_pmd(), getting us a step closer to removing the confusion between mk_pmd(), mk_huge_pmd() and pmd_mkhuge(). Link: https://lkml.kernel.org/r/20250402181709.2386022-11-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Anton Ivanov Cc: Dave Hansen Cc: David Hildenbrand Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Johannes Berg Cc: Muchun Song Cc: Richard Weinberger Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index cbad8c663c4d..733dd7100ca1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2024,7 +2024,24 @@ static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot) { return pfn_pte(folio_pfn(folio), pgprot); } + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/** + * folio_mk_pmd - Create a PMD for this folio + * @folio: The folio to create a PMD for + * @pgprot: The page protection bits to use + * + * Create a page table entry for the first page of this folio. + * This is suitable for passing to set_pmd_at(). + * + * Return: A page table entry suitable for mapping this folio. + */ +static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot) +{ + return pmd_mkhuge(pfn_pmd(folio_pfn(folio), pgprot)); +} #endif +#endif /* CONFIG_MMU */ static inline bool folio_has_pincount(const struct folio *folio) { -- cgit v1.2.3 From 5071ea3d7b3d1e9660524374083a929a6885d78a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 19:17:05 +0100 Subject: arch: remove mk_pmd() There are now no callers of mk_huge_pmd() and mk_pmd(). Remove them. Link: https://lkml.kernel.org/r/20250402181709.2386022-12-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Zi Yan Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Anton Ivanov Cc: Dave Hansen Cc: David Hildenbrand Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Johannes Berg Cc: Muchun Song Cc: Richard Weinberger Cc: Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e893d546a49f..f190998b2ebd 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -495,8 +495,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); void mm_put_huge_zero_folio(struct mm_struct *mm); -#define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) - static inline bool thp_migration_supported(void) { return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); -- cgit v1.2.3 From c09b997342bcd1b3c2b63c6ff6fecf037a68beff Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 22:06:03 +0100 Subject: filemap: remove readahead_page() Patch series "Misc folio patches for 6.16". Remove a few APIs that we've converted everybody from using. I also found a few places that extract a page pointer from i_pages, which will be an invalid thing to do when we separate pages from folios. This patch (of 8): All filesystems have now been converted to call readahead_folio() so we can delete this wrapper. Link: https://lkml.kernel.org/r/20250402210612.2444135-1-willy@infradead.org Link: https://lkml.kernel.org/r/20250402210612.2444135-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 26baa78f1ca7..cd4bd0f8e5f6 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1308,9 +1308,9 @@ static inline bool filemap_range_needs_writeback(struct address_space *mapping, * struct readahead_control - Describes a readahead request. * * A readahead request is for consecutive pages. Filesystems which - * implement the ->readahead method should call readahead_page() or - * readahead_page_batch() in a loop and attempt to start I/O against - * each page in the request. + * implement the ->readahead method should call readahead_folio() or + * __readahead_batch() in a loop and attempt to start reads into each + * folio in the request. * * Most of the fields in this struct are private and should be accessed * by the functions below. @@ -1415,22 +1415,6 @@ static inline struct folio *__readahead_folio(struct readahead_control *ractl) return folio; } -/** - * readahead_page - Get the next page to read. - * @ractl: The current readahead request. - * - * Context: The page is locked and has an elevated refcount. The caller - * should decreases the refcount once the page has been submitted for I/O - * and unlock the page once all I/O to that page has completed. - * Return: A pointer to the next page, or %NULL if we are done. - */ -static inline struct page *readahead_page(struct readahead_control *ractl) -{ - struct folio *folio = __readahead_folio(ractl); - - return &folio->page; -} - /** * readahead_folio - Get the next folio to read. * @ractl: The current readahead request. -- cgit v1.2.3 From a55139579082e4bc9ea9b04003cb9f78c781e08b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 22:06:04 +0100 Subject: mm: remove offset_in_thp() All callers have been converted to call offset_in_folio(). Link: https://lkml.kernel.org/r/20250402210612.2444135-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 733dd7100ca1..ae67d3a33792 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2445,7 +2445,6 @@ static inline void clear_page_pfmemalloc(struct page *page) extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) -#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1)) #define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1)) /* -- cgit v1.2.3 From 9c532d79082f9f2e58a45f2e92020dda65b7dedd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 22:06:07 +0100 Subject: filemap: remove find_subpage() All users of this function now call folio_file_page() instead. Delete it. Link: https://lkml.kernel.org/r/20250402210612.2444135-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index cd4bd0f8e5f6..0ddd4bd8cdf8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -945,19 +945,6 @@ static inline bool folio_contains(struct folio *folio, pgoff_t index) return index - folio_index(folio) < folio_nr_pages(folio); } -/* - * Given the page we found in the page cache, return the page corresponding - * to this index in the file - */ -static inline struct page *find_subpage(struct page *head, pgoff_t index) -{ - /* HugeTLBfs wants the head page regardless */ - if (PageHuge(head)) - return head; - - return head + (index & (thp_nr_pages(head) - 1)); -} - unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); unsigned filemap_get_folios_contig(struct address_space *mapping, -- cgit v1.2.3 From 8dfc8cbf7b07da172b3a4c8bea064e84fbfa5d56 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 22:06:08 +0100 Subject: filemap: convert __readahead_batch() to use a folio Extract folios from i_mapping, not pages. Removes a hidden call to compound_head(), a use of thp_nr_pages() and an unnecessary assertion that we didn't find a tail page in the page cache. Link: https://lkml.kernel.org/r/20250402210612.2444135-7-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0ddd4bd8cdf8..c5c9b3770d75 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1424,7 +1424,7 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, { unsigned int i = 0; XA_STATE(xas, &rac->mapping->i_pages, 0); - struct page *page; + struct folio *folio; BUG_ON(rac->_batch_count > rac->_nr_pages); rac->_nr_pages -= rac->_batch_count; @@ -1433,13 +1433,12 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, xas_set(&xas, rac->_index); rcu_read_lock(); - xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) { - if (xas_retry(&xas, page)) + xas_for_each(&xas, folio, rac->_index + rac->_nr_pages - 1) { + if (xas_retry(&xas, folio)) continue; - VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(PageTail(page), page); - array[i++] = page; - rac->_batch_count += thp_nr_pages(page); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + array[i++] = folio_page(folio, 0); + rac->_batch_count += folio_nr_pages(folio); if (i == array_sz) break; } -- cgit v1.2.3 From 41e422a898da69e903a846dfb2b0c0ff62abc3cd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 22:06:09 +0100 Subject: filemap: remove readahead_page_batch() This function has no more callers; delete it. Link: https://lkml.kernel.org/r/20250402210612.2444135-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index c5c9b3770d75..af25fb640463 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1447,20 +1447,6 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, return i; } -/** - * readahead_page_batch - Get a batch of pages to read. - * @rac: The current readahead request. - * @array: An array of pointers to struct page. - * - * Context: The pages are locked and have an elevated refcount. The caller - * should decreases the refcount once the page has been submitted for I/O - * and unlock the page once all I/O to that page has completed. - * Return: The number of pages placed in the array. 0 indicates the request - * is complete. - */ -#define readahead_page_batch(rac, array) \ - __readahead_batch(rac, array, ARRAY_SIZE(array)) - /** * readahead_pos - The byte offset into the file of this readahead request. * @rac: The readahead request. -- cgit v1.2.3 From 2355153ea8185e7deaf9ce728716dadb707ef59f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 22:06:10 +0100 Subject: mm: delete thp_nr_pages() All callers now use folio_nr_pages(). Delete this wrapper. Link: https://lkml.kernel.org/r/20250402210612.2444135-9-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/mm.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ae67d3a33792..dcdb798184ef 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2223,15 +2223,6 @@ static inline long compound_nr(struct page *page) return folio_large_nr_pages(folio); } -/** - * thp_nr_pages - The number of regular pages in this huge page. - * @page: The head page of a huge page. - */ -static inline long thp_nr_pages(struct page *page) -{ - return folio_nr_pages((struct folio *)page); -} - /** * folio_next - Move to the next physical folio. * @folio: The folio we're currently operating on. -- cgit v1.2.3 From 56e5a103a721d0ef139bba7ff3d3ada6c8217d5b Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Wed, 2 Apr 2025 13:44:16 -0700 Subject: zsmalloc: prefer the the original page's node for compressed data Currently, zsmalloc, zswap's and zram's backend memory allocator, does not enforce any policy for the allocation of memory for the compressed data, instead just adopting the memory policy of the task entering reclaim, or the default policy (prefer local node) if no such policy is specified. This can lead to several pathological behaviors in multi-node NUMA systems: 1. Systems with CXL-based memory tiering can encounter the following inversion with zswap/zram: the coldest pages demoted to the CXL tier can return to the high tier when they are reclaimed to compressed swap, creating memory pressure on the high tier. 2. Consider a direct reclaimer scanning nodes in order of allocation preference. If it ventures into remote nodes, the memory it compresses there should stay there. Trying to shift those contents over to the reclaiming thread's preferred node further *increases* its local pressure, and provoking more spills. The remote node is also the most likely to refault this data again. This undesirable behavior was pointed out by Johannes Weiner in [1]. 3. For zswap writeback, the zswap entries are organized in node-specific LRUs, based on the node placement of the original pages, allowing for targeted zswap writeback for specific nodes. However, the compressed data of a zswap entry can be placed on a different node from the LRU it is placed on. This means that reclaim targeted at one node might not free up memory used for zswap entries in that node, but instead reclaiming memory in a different node. All of these issues will be resolved if the compressed data go to the same node as the original page. This patch encourages this behavior by having zswap and zram pass the node of the original page to zsmalloc, and have zsmalloc prefer the specified node if we need to allocate new (zs)pages for the compressed data. Note that we are not strictly binding the allocation to the preferred node. We still allow the allocation to fall back to other nodes when the preferred node is full, or if we have zspages with slots available on a different node. This is OK, and still a strict improvement over the status quo: 1. On a system with demotion enabled, we will generally prefer demotions over compressed swapping, and only swap when pages have already gone to the lowest tier. This patch should achieve the desired effect for the most part. 2. If the preferred node is out of memory, letting the compressed data going to other nodes can be better than the alternative (OOMs, keeping cold memory unreclaimed, disk swapping, etc.). 3. If the allocation go to a separate node because we have a zspage with slots available, at least we're not creating extra immediate memory pressure (since the space is already allocated). 3. While there can be mixings, we generally reclaim pages in same-node batches, which encourage zspage grouping that is more likely to go to the right node. 4. A strict binding would require partitioning zsmalloc by node, which is more complicated, and more prone to regression, since it reduces the storage density of zsmalloc. We need to evaluate the tradeoff and benchmark carefully before adopting such an involved solution. [1]: https://lore.kernel.org/linux-mm/20250331165306.GC2110528@cmpxchg.org/ [senozhatsky@chromium.org: coding-style fixes] Link: https://lkml.kernel.org/r/mnvexa7kseswglcqbhlot4zg3b3la2ypv2rimdl5mh5glbmhvz@wi6bgqn47hge Link: https://lkml.kernel.org/r/20250402204416.3435994-1-nphamcs@gmail.com Signed-off-by: Nhat Pham Suggested-by: Gregory Price Acked-by: Dan Williams Reviewed-by: Chengming Zhou Acked-by: Sergey Senozhatsky [zram, zsmalloc] Acked-by: Johannes Weiner Acked-by: Yosry Ahmed [zswap/zsmalloc] Cc: "Huang, Ying" Cc: Joanthan Cameron Cc: Minchan Kim Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/zpool.h | 4 ++-- include/linux/zsmalloc.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 52f30e526607..369ef068fad8 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -22,7 +22,7 @@ const char *zpool_get_type(struct zpool *pool); void zpool_destroy_pool(struct zpool *pool); int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, - unsigned long *handle); + unsigned long *handle, const int nid); void zpool_free(struct zpool *pool, unsigned long handle); @@ -64,7 +64,7 @@ struct zpool_driver { void (*destroy)(void *pool); int (*malloc)(void *pool, size_t size, gfp_t gfp, - unsigned long *handle); + unsigned long *handle, const int nid); void (*free)(void *pool, unsigned long handle); void *(*obj_read_begin)(void *pool, unsigned long handle, diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index c26baf9fb331..13e9cc5490f7 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -26,7 +26,8 @@ struct zs_pool; struct zs_pool *zs_create_pool(const char *name); void zs_destroy_pool(struct zs_pool *pool); -unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags); +unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags, + const int nid); void zs_free(struct zs_pool *pool, unsigned long obj); size_t zs_huge_class_size(struct zs_pool *pool); -- cgit v1.2.3 From 737e9d021993c99377bb61e762c0d6945a37615b Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Mon, 27 Jan 2025 10:34:03 -0500 Subject: memory: implement memory_block_advise/probe_max_size Patch series "memory,x86,acpi: hotplug memory alignment advisement", v8. When physical address regions are not aligned to memory block size, the misaligned portion is lost (stranded capacity). Block size (min/max/selected) is architecture defined. Most architectures tend to use the minimum block size or some simplistic heurist. On x86, memory block size increases up to 2GB, and is otherwise fitted to the alignment of non-hotplug (i.e. not special purpose memory). CXL exposes its memory for management through the ACPI CEDT (CXL Early Detection Table) in a field called the CXL Fixed Memory Window. Per the CXL specification, this memory must be aligned to at least 256MB. When a CFMW aligns on a size less than the block size, this causes a loss of up to 2GB per CFMW on x86. It is not uncommon for CFMW to be allocated per-device - though this behavior is BIOS defined. This patch set provides 3 things: 1) implement advise/query functions in driverse/base/memory.c to report/query architecture agnostic hotplug block alignment advice. 2) update x86 memblock size logic to consider the hotplug advice 3) add code in acpi/numa/srat.c to report CFMW alignment advice The advisement interfaces are design to be called during arch_init code prior to allocator and smp_init. start_kernel will call these through setup_arch() (via acpi and mm/init_64.c on x86), which occurs prior to mm_core_init and smp_init - so no need for atomics. There's an attempt to signal callers to advise() that query has already occurred, but this is predicated on the notion that query actually occurs (which presently only happens on the x86 arch). This is to assist debugging future users. Otherwise, the advise() call has been marked __init to help static discovery of bad call times. Once query is called the first time, it will always return the same value. Interfaces return -EBUSY and 0 respectively on systems without hotplug. This patch (of 3): Hotplug memory sources may have opinions on what the memblock size should be - usually for alignment purposes. For example, CXL memory extents can be 256MB with a matching alignment. If this size/alignment is smaller than the block size, it can result in stranded capacity. Implement memory_block_advise_max_size for use prior to allocator init, for software to advise the system on the max block size. Implement memory_block_probe_max_size for use by arch init code to calculate the best block size. Use of advice is architecture defined. The probe value can never change after first probe. Calls to advise after probe will return -EBUSY to aid debugging. On systems without hotplug, always return -ENODEV and 0 respectively. Link: https://lkml.kernel.org/r/20250127153405.3379117-1-gourry@gourry.net Link: https://lkml.kernel.org/r/20250127153405.3379117-2-gourry@gourry.net Signed-off-by: Gregory Price Suggested-by: Ira Weiny Acked-by: David Hildenbrand Acked-by: Mike Rapoport (Microsoft) Acked-by: Dan Williams Tested-by: Fan Ni Reviewed-by: Ira Weiny Acked-by: Oscar Salvador Cc: Alison Schofield Cc: Andy Lutomirski Cc: Borislav Betkov Cc: Bruno Faccini Cc: Dave Hansen Cc: Dave Jiang Cc: Greg Kroah-Hartman Cc: Haibo Xu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Joanthan Cameron Cc: Len Brown Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Robert Richter Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- include/linux/memory.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index 12daa6ec7d09..5ec4e6d209b9 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -149,6 +149,14 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) { return 0; } +static inline int memory_block_advise_max_size(unsigned long size) +{ + return -ENODEV; +} +static inline unsigned long memory_block_advised_max_size(void) +{ + return 0; +} #else /* CONFIG_MEMORY_HOTPLUG */ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); @@ -181,6 +189,8 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, void memory_block_add_nid(struct memory_block *mem, int nid, enum meminit_context context); #endif /* CONFIG_NUMA */ +int memory_block_advise_max_size(unsigned long size); +unsigned long memory_block_advised_max_size(void); #endif /* CONFIG_MEMORY_HOTPLUG */ /* -- cgit v1.2.3 From b4c829fa4d56f3b566bbbb41c9a8ff0c83ae84c5 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 31 Mar 2025 19:10:25 -0700 Subject: mm/compaction: use folio in hugetlb pathway Use a folio in the hugetlb pathway during the compaction migrate-able pageblock scan. This removes a call to compound_head(). Link: https://lkml.kernel.org/r/20250401021025.637333-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Acked-by: Oscar Salvador Reviewed-by: Zi Yan Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8f3ac832ee7f..a57bed83c657 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -695,7 +695,7 @@ struct huge_bootmem_page { bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m); -int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); +int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list); int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); void wait_for_freed_hugetlb_folios(void); struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, @@ -1083,7 +1083,7 @@ static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, return NULL; } -static inline int isolate_or_dissolve_huge_page(struct page *page, +static inline int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list) { return -ENOMEM; -- cgit v1.2.3 From 2e976567233228ff928c2405f7e03ebb7fb7aa50 Mon Sep 17 00:00:00 2001 From: Ignacio Encinas Date: Mon, 31 Mar 2025 21:57:05 +0200 Subject: mm: annotate data race in update_hiwater_rss mm_struct.hiwater_rss can be accessed concurrently without proper synchronization as reported by KCSAN. This data race is benign as it only affects accounting information. Annotate it with data_race() to make KCSAN happy. Link: https://lkml.kernel.org/r/20250331-mm-maxrss-data-race-v2-1-cf958e6205bf@iencinas.com Signed-off-by: Ignacio Encinas Reported-by: syzbot+419c4b42acc36c420ad3@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67e3390c.050a0220.1ec46.0001.GAE@google.com/ Suggested-by: Lorenzo Stoakes Acked-by: Pedro Falcato Cc: Liam Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index dcdb798184ef..1690f21e7808 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -2796,7 +2797,7 @@ static inline void update_hiwater_rss(struct mm_struct *mm) { unsigned long _rss = get_mm_rss(mm); - if ((mm)->hiwater_rss < _rss) + if (data_race(mm->hiwater_rss) < _rss) (mm)->hiwater_rss = _rss; } -- cgit v1.2.3 From 17fc7b8f9bce5d3d61ef347dd8cfccb6365dcaa1 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 3 Mar 2025 13:20:09 +0200 Subject: syscall.h: add syscall_set_arguments() This function is going to be needed on all HAVE_ARCH_TRACEHOOK architectures to implement PTRACE_SET_SYSCALL_INFO API. This partially reverts commit 7962c2eddbfe ("arch: remove unused function syscall_set_arguments()") by reusing some of old syscall_set_arguments() implementations. [nathan@kernel.org: fix compile time fortify checks] Link: https://lkml.kernel.org/r/20250408213131.GA2872426@ax162 Link: https://lkml.kernel.org/r/20250303112009.GC24170@strace.io Signed-off-by: Dmitry V. Levin Signed-off-by: Nathan Chancellor Tested-by: Charlie Jenkins Reviewed-by: Charlie Jenkins Acked-by: Helge Deller # parisc Reviewed-by: Maciej W. Rozycki [mips] Cc: Alexander Gordeev Cc: Alexey Gladkov (Intel) Cc: Andreas Larsson Cc: anton ivanov Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Brian Cain Cc: Christian Borntraeger Cc: Christian Zankel Cc: Christophe Leroy Cc: Dave Hansen Cc: Davide Berardi Cc: David S. Miller Cc: Dinh Nguyen Cc: Eugene Syromiatnikov Cc: Eugene Syromyatnikov Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Jonas Bonn Cc: Madhavan Srinivasan Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Mike Frysinger Cc: Naveen N Rao Cc: Nicholas Piggin Cc: Oleg Nesterov Cc: Renzo Davoi Cc: Richard Weinberger Cc: Rich Felker Cc: Russel King Cc: Shuah Khan Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: WANG Xuerui Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- include/asm-generic/syscall.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 182b039ce5fa..292b412f4e9a 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -117,6 +117,22 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args); +/** + * syscall_set_arguments - change system call parameter value + * @task: task of interest, must be in system call entry tracing + * @regs: task_pt_regs() of @task + * @args: array of argument values to store + * + * Changes 6 arguments to the system call. + * The first argument gets value @args[0], and so on. + * + * It's only valid to call this when @task is stopped for tracing on + * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_AUDIT. + */ +void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, + const unsigned long *args); + /** * syscall_get_arch - return the AUDIT_ARCH for the current system call * @task: task of interest, must be blocked -- cgit v1.2.3 From cc6622730be77fa88acc4fb0942cd39e6fa5ca27 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 3 Mar 2025 13:20:20 +0200 Subject: syscall.h: introduce syscall_set_nr() Similar to syscall_set_arguments() that complements syscall_get_arguments(), introduce syscall_set_nr() that complements syscall_get_nr(). syscall_set_nr() is going to be needed along with syscall_set_arguments() on all HAVE_ARCH_TRACEHOOK architectures to implement PTRACE_SET_SYSCALL_INFO API. Link: https://lkml.kernel.org/r/20250303112020.GD24170@strace.io Signed-off-by: Dmitry V. Levin Tested-by: Charlie Jenkins Reviewed-by: Charlie Jenkins Acked-by: Helge Deller # parisc Reviewed-by: Maciej W. Rozycki # mips Cc: Alexander Gordeev Cc: Alexey Gladkov (Intel) Cc: Andreas Larsson Cc: anton ivanov Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Brian Cain Cc: Christian Borntraeger Cc: Christian Zankel Cc: Christophe Leroy Cc: Dave Hansen Cc: Davide Berardi Cc: David S. Miller Cc: Dinh Nguyen Cc: Eugene Syromiatnikov Cc: Eugene Syromyatnikov Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Jonas Bonn Cc: Madhavan Srinivasan Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Mike Frysinger Cc: Naveen N Rao Cc: Nicholas Piggin Cc: Oleg Nesterov Cc: Renzo Davoi Cc: Richard Weinberger Cc: Rich Felker Cc: Russel King Cc: Shuah Khan Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: WANG Xuerui Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- include/asm-generic/syscall.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 292b412f4e9a..c5a3ad53beec 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -37,6 +37,20 @@ struct pt_regs; */ int syscall_get_nr(struct task_struct *task, struct pt_regs *regs); +/** + * syscall_set_nr - change the system call a task is executing + * @task: task of interest, must be blocked + * @regs: task_pt_regs() of @task + * @nr: system call number + * + * Changes the system call number @task is about to execute. + * + * It's only valid to call this when @task is stopped for tracing on + * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_AUDIT. + */ +void syscall_set_nr(struct task_struct *task, struct pt_regs *regs, int nr); + /** * syscall_rollback - roll back registers after an aborted system call * @task: task of interest, must be in system call exit tracing -- cgit v1.2.3 From 26bb32768fe6552de044f782a58b3272073fbfc0 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 3 Mar 2025 13:20:44 +0200 Subject: ptrace: introduce PTRACE_SET_SYSCALL_INFO request PTRACE_SET_SYSCALL_INFO is a generic ptrace API that complements PTRACE_GET_SYSCALL_INFO by letting the ptracer modify details of system calls the tracee is blocked in. This API allows ptracers to obtain and modify system call details in a straightforward and architecture-agnostic way, providing a consistent way of manipulating the system call number and arguments across architectures. As in case of PTRACE_GET_SYSCALL_INFO, PTRACE_SET_SYSCALL_INFO also does not aim to address numerous architecture-specific system call ABI peculiarities, like differences in the number of system call arguments for such system calls as pread64 and preadv. The current implementation supports changing only those bits of system call information that are used by strace system call tampering, namely, syscall number, syscall arguments, and syscall return value. Support of changing additional details returned by PTRACE_GET_SYSCALL_INFO, such as instruction pointer and stack pointer, could be added later if needed, by using struct ptrace_syscall_info.flags to specify the additional details that should be set. Currently, "flags" and "reserved" fields of struct ptrace_syscall_info must be initialized with zeroes; "arch", "instruction_pointer", and "stack_pointer" fields are currently ignored. PTRACE_SET_SYSCALL_INFO currently supports only PTRACE_SYSCALL_INFO_ENTRY, PTRACE_SYSCALL_INFO_EXIT, and PTRACE_SYSCALL_INFO_SECCOMP operations. Other operations could be added later if needed. Ideally, PTRACE_SET_SYSCALL_INFO should have been introduced along with PTRACE_GET_SYSCALL_INFO, but it didn't happen. The last straw that convinced me to implement PTRACE_SET_SYSCALL_INFO was apparent failure to provide an API of changing the first system call argument on riscv architecture. ptrace(2) man page: long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data); ... PTRACE_SET_SYSCALL_INFO Modify information about the system call that caused the stop. The "data" argument is a pointer to struct ptrace_syscall_info that specifies the system call information to be set. The "addr" argument should be set to sizeof(struct ptrace_syscall_info)). Link: https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/ Link: https://lkml.kernel.org/r/20250303112044.GF24170@strace.io Signed-off-by: Dmitry V. Levin Reviewed-by: Alexey Gladkov Reviewed-by: Charlie Jenkins Tested-by: Charlie Jenkins Reviewed-by: Eugene Syromiatnikov Reviewed-by: Oleg Nesterov Cc: Alexander Gordeev Cc: Andreas Larsson Cc: anton ivanov Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Brian Cain Cc: Christian Borntraeger Cc: Christian Zankel Cc: Christophe Leroy Cc: Dave Hansen Cc: Davide Berardi Cc: David S. Miller Cc: Dinh Nguyen Cc: Eugene Syromyatnikov Cc: Geert Uytterhoeven Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Jonas Bonn Cc: Maciej W. Rozycki Cc: Madhavan Srinivasan Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Mike Frysinger Cc: Naveen N Rao Cc: Nicholas Piggin Cc: Renzo Davoi Cc: Richard Weinberger Cc: Rich Felker Cc: Russel King Cc: Shuah Khan Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: WANG Xuerui Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton --- include/uapi/linux/ptrace.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 72c038fc71d0..5f8ef6156752 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -74,6 +74,7 @@ struct seccomp_metadata { }; #define PTRACE_GET_SYSCALL_INFO 0x420e +#define PTRACE_SET_SYSCALL_INFO 0x4212 #define PTRACE_SYSCALL_INFO_NONE 0 #define PTRACE_SYSCALL_INFO_ENTRY 1 #define PTRACE_SYSCALL_INFO_EXIT 2 @@ -81,7 +82,8 @@ struct seccomp_metadata { struct ptrace_syscall_info { __u8 op; /* PTRACE_SYSCALL_INFO_* */ - __u8 pad[3]; + __u8 reserved; + __u16 flags; __u32 arch; __u64 instruction_pointer; __u64 stack_pointer; @@ -98,6 +100,7 @@ struct ptrace_syscall_info { __u64 nr; __u64 args[6]; __u32 ret_data; + __u32 reserved2; } seccomp; }; }; @@ -142,6 +145,8 @@ struct ptrace_sud_config { __u64 len; }; +/* 0x4212 is PTRACE_SET_SYSCALL_INFO */ + /* * These values are stored in task->ptrace_message * by ptrace_stop to describe the current syscall-stop. -- cgit v1.2.3 From a516403787e08119b70ce8bfff985272ef318a58 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 24 Mar 2025 06:53:26 +0000 Subject: fs/proc: extend the PAGEMAP_SCAN ioctl to report guard regions Patch series "fs/proc: extend the PAGEMAP_SCAN ioctl to report guard regions", v2. Introduce the PAGE_IS_GUARD flag in the PAGEMAP_SCAN ioctl to expose information about guard regions. This allows userspace tools, such as CRIU, to detect and handle guard regions. Currently, CRIU utilizes PAGEMAP_SCAN as a more efficient alternative to parsing /proc/pid/pagemap. Without this change, guard regions are incorrectly reported as swap-anon regions, leading CRIU to attempt dumping them and subsequently failing. The series includes updates to the documentation and selftests to reflect the new functionality. This patch (of 3): Introduce the PAGE_IS_GUARD flag in the PAGEMAP_SCAN ioctl to expose information about guard regions. This allows userspace tools, such as CRIU, to detect and handle guard regions. Link: https://lkml.kernel.org/r/20250324065328.107678-1-avagin@google.com Link: https://lkml.kernel.org/r/20250324065328.107678-2-avagin@google.com Signed-off-by: Andrei Vagin Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/uapi/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index e762e1af650c..0098b0ce8ccb 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -361,6 +361,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) #define PAGE_IS_SOFT_DIRTY (1 << 7) +#define PAGE_IS_GUARD (1 << 8) /* * struct page_region - Page region with flags -- cgit v1.2.3 From 979f3ef0f798d9b4fda4806d37fb1a264fc38566 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 21 Mar 2025 22:02:21 +1000 Subject: mm: fix parameter passed to page_mapcount_is_type() Patch series "Fix parameter passed to page_mapcount_is_type()", v2. Found by code inspection. There are two places where the parameter passed to page_mapcount_is_type() is (page->_mapcount), which is incorrect since it should be one more than the value, as explained in the comments to page_mapcount_is_type(): (a) page_has_type() in page-flags.h (b) __dump_folio() in mm/debug.c PATCH[1] fixes the parameter for (a) PATCH[2] fixes the parameter for (b) Note that the issue doesn't cause any visible impacts due to the safety gap introduced by PGTY_mapcount_underflow limit. So the tag 'Cc: stable@vger.kernel.org' isn't needed. This patch (of 2): As the comments of page_mapcount_is_type() indicate, the parameter passed to the function should be one more than page->_mapcount. However, page->_mapcount (equivalent to page->page_type) is passed to the function by commit 4ffca5a96678 ("mm: support only one page_type per page") page_type_has_type() is replaced by page_mapcount_is_type(), but the parameter isn't adjusted. Fix it by replacing page_mapcount_is_type() with page_type_has_type() in page_has_type(). Note that the issue doesn't cause any visible impacts due to the safety gap introduced by PGTY_mapcount_underflow limit. Link: https://lkml.kernel.org/r/20250321120222.1456770-1-gshan@redhat.com Link: https://lkml.kernel.org/r/20250321120222.1456770-2-gshan@redhat.com Fixes: 4ffca5a96678 ("mm: support only one page_type per page") Signed-off-by: Gavin Shan Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: gehao Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e6a21b62dcce..d3909cb1e576 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -982,7 +982,7 @@ static inline bool page_mapcount_is_type(unsigned int mapcount) static inline bool page_has_type(const struct page *page) { - return page_mapcount_is_type(data_race(page->page_type)); + return page_type_has_type(data_race(page->page_type)); } #define FOLIO_TYPE_OPS(lname, fname) \ -- cgit v1.2.3 From 8a5577428e8e586a107ee5f39eb4c86d32c971cd Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Mar 2025 12:37:12 +0100 Subject: kernel/events/uprobes: pass VMA to set_swbp(), set_orig_insn() and uprobe_write_opcode() We already have the VMA, no need to look it up using get_user_page_vma_remote(). We can now switch to get_user_pages_remote(). Link: https://lkml.kernel.org/r/20250321113713.204682-3-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: "Masami Hiramatsu (Google)" Cc: Matthew Wilcox (Oracle) Cc: Namhyung kim Cc: Russel King Cc: tongtiangen Signed-off-by: Andrew Morton --- include/linux/uprobes.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 2e46b69ff0a6..516217c39094 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -188,13 +188,13 @@ struct uprobes_state { }; extern void __init uprobes_init(void); -extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); -extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); +extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); +extern int set_orig_insn(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); extern bool is_swbp_insn(uprobe_opcode_t *insn); extern bool is_trap_insn(uprobe_opcode_t *insn); extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); -extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); +extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); -- cgit v1.2.3 From e064e7384f991c7df81999cad4ce30fed7ef7d88 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 7 Apr 2025 11:01:11 +0530 Subject: mm/ptdump: split note_page() into level specific callbacks Patch series "mm/ptdump: Drop assumption that pxd_val() is u64", v2. Last argument passed down in note_page() is u64 assuming pxd_val() returned value (all page table levels) is 64 bit - which might not be the case going ahead when D128 page tables is enabled on arm64 platform. Besides pxd_val() is very platform specific and its type should not be assumed in generic MM. A similar problem exists for effective_prot(), although it is restricted to x86 platform. This series splits note_page() and effective_prot() into individual page table level specific callbacks which accepts corresponding pxd_t page table entry as an argument instead and later on all subscribing platforms could derive pxd_val() from the table entries as required and proceed as before. Define ptdesc_t type which describes the basic page table descriptor layout on arm64 platform. Subsequently all level specific pxxval_t descriptors are derived from ptdesc_t thus establishing a common original format, which can also be appropriate for page table entries, masks and protection values etc which are used at all page table levels. This patch (of 3): Last argument passed down in note_page() is u64 assuming pxd_val() returned value (all page table levels) is 64 bit - which might not be the case going ahead when D128 page tables is enabled on arm64 platform. Besides pxd_val() is very platform specific and its type should not be assumed in generic MM. Split note_page() into individual page table level specific callbacks which accepts corresponding pxd_t argument instead and then subscribing platforms just derive pxd_val() from the entries as required and proceed as earlier. Also add a note_page_flush() callback for flushing the last page table page that was being handled earlier via level = -1. Link: https://lkml.kernel.org/r/20250407053113.746295-1-anshuman.khandual@arm.com Link: https://lkml.kernel.org/r/20250407053113.746295-2-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Cc: Madhavan Srinivasan Cc: Nicholas Piggin Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Ard Biesheuvel Cc: Dave Hansen Cc: Mark Rutland Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/ptdump.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h index 8dbd51ea8626..1c1eb1fae199 100644 --- a/include/linux/ptdump.h +++ b/include/linux/ptdump.h @@ -11,9 +11,12 @@ struct ptdump_range { }; struct ptdump_state { - /* level is 0:PGD to 4:PTE, or -1 if unknown */ - void (*note_page)(struct ptdump_state *st, unsigned long addr, - int level, u64 val); + void (*note_page_pte)(struct ptdump_state *st, unsigned long addr, pte_t pte); + void (*note_page_pmd)(struct ptdump_state *st, unsigned long addr, pmd_t pmd); + void (*note_page_pud)(struct ptdump_state *st, unsigned long addr, pud_t pud); + void (*note_page_p4d)(struct ptdump_state *st, unsigned long addr, p4d_t p4d); + void (*note_page_pgd)(struct ptdump_state *st, unsigned long addr, pgd_t pgd); + void (*note_page_flush)(struct ptdump_state *st); void (*effective_prot)(struct ptdump_state *st, int level, u64 val); const struct ptdump_range *range; }; -- cgit v1.2.3 From 08978fc3b0d5709583f6dc2072a8607d0b527860 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 7 Apr 2025 11:01:12 +0530 Subject: mm/ptdump: split effective_prot() into level specific callbacks Last argument in effective_prot() is u64 assuming pxd_val() returned value (all page table levels) is 64 bit. pxd_val() is very platform specific and its type should not be assumed in generic MM. Split effective_prot() into individual page table level specific callbacks which accepts corresponding pxd_t argument instead and then the subscribing platform (only x86) just derive pxd_val() from the entries as required and proceed as earlier. Link: https://lkml.kernel.org/r/20250407053113.746295-3-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Dave Hansen Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Nicholas Piggin Cc: Ryan Roberts Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/ptdump.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h index 1c1eb1fae199..240bd3bff18d 100644 --- a/include/linux/ptdump.h +++ b/include/linux/ptdump.h @@ -17,7 +17,11 @@ struct ptdump_state { void (*note_page_p4d)(struct ptdump_state *st, unsigned long addr, p4d_t p4d); void (*note_page_pgd)(struct ptdump_state *st, unsigned long addr, pgd_t pgd); void (*note_page_flush)(struct ptdump_state *st); - void (*effective_prot)(struct ptdump_state *st, int level, u64 val); + void (*effective_prot_pte)(struct ptdump_state *st, pte_t pte); + void (*effective_prot_pmd)(struct ptdump_state *st, pmd_t pmd); + void (*effective_prot_pud)(struct ptdump_state *st, pud_t pud); + void (*effective_prot_p4d)(struct ptdump_state *st, p4d_t p4d); + void (*effective_prot_pgd)(struct ptdump_state *st, pgd_t pgd); const struct ptdump_range *range; }; -- cgit v1.2.3 From 4c97a17a252bf8396f7bd65efced00bf401a8c25 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Thu, 20 Mar 2025 11:22:19 +0100 Subject: xarray: make xa_alloc_cyclic() return 0 on all success cases Change xa_alloc_cyclic() to return 0 even on wrap-around. Do the same for xa_alloc_cyclic_irq() and xa_alloc_cyclic_bh(). This will prevent any future bug of treating return of 1 as an error: int ret = xa_alloc_cyclic(...) if (ret) // currently mishandles ret==1 goto failure; If there will be someone interested in when wrap-around occurs, there is still __xa_alloc_cyclic() that behaves as before. For now there is no such user. Link: https://lkml.kernel.org/r/20250320102219.8101-1-przemyslaw.kitszel@intel.com Signed-off-by: Przemek Kitszel Suggested-by: Matthew Wilcox Link: https://lore.kernel.org/netdev/Z9gUd-5t8b5NX2wE@casper.infradead.org Cc: Andriy Shevchenko Cc: Dave Hansen Cc: Michal Swiatkowski Cc: Przemek Kitszel Signed-off-by: Andrew Morton --- include/linux/xarray.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 78eede109b1a..be850174e802 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -965,10 +965,12 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set * in xa_init_flags(). * + * Note that callers interested in whether wrapping has occurred should + * use __xa_alloc_cyclic() instead. + * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. - * Return: 0 if the allocation succeeded without wrapping. 1 if the - * allocation succeeded after wrapping, -ENOMEM if memory could not be + * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be * allocated or -EBUSY if there are no free entries in @limit. */ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, @@ -981,7 +983,7 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock(xa); - return err; + return err < 0 ? err : 0; } /** @@ -1002,10 +1004,12 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set * in xa_init_flags(). * + * Note that callers interested in whether wrapping has occurred should + * use __xa_alloc_cyclic() instead. + * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. - * Return: 0 if the allocation succeeded without wrapping. 1 if the - * allocation succeeded after wrapping, -ENOMEM if memory could not be + * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be * allocated or -EBUSY if there are no free entries in @limit. */ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, @@ -1018,7 +1022,7 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock_bh(xa); - return err; + return err < 0 ? err : 0; } /** @@ -1039,10 +1043,12 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set * in xa_init_flags(). * + * Note that callers interested in whether wrapping has occurred should + * use __xa_alloc_cyclic() instead. + * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. - * Return: 0 if the allocation succeeded without wrapping. 1 if the - * allocation succeeded after wrapping, -ENOMEM if memory could not be + * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be * allocated or -EBUSY if there are no free entries in @limit. */ static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry, @@ -1055,7 +1061,7 @@ static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry, err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock_irq(xa); - return err; + return err < 0 ? err : 0; } /** -- cgit v1.2.3 From a40b3fa844b4c29276a228ce878bd427a3149d37 Mon Sep 17 00:00:00 2001 From: Liu Ye Date: Tue, 18 Mar 2025 14:32:26 +0800 Subject: fs/proc/page: refactor to reduce code duplication kpageflags_read() and kpagecgroup_read() are quite similar to kpagecount_read(). Refactor common code into a helper function to reduce code duplication. Link: https://lkml.kernel.org/r/20250318063226.223284-1-liuyerd@163.com Signed-off-by: Liu Ye Acked-by: David Hildenbrand Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Muchun Song Cc: Ran Xiaokai Cc: Roman Gushchin Cc: Shakeel Butt Cc: Svetly Todorov Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 53364526d877..5264d148bdd9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1793,6 +1793,10 @@ static inline void count_objcg_events(struct obj_cgroup *objcg, { } +static inline ino_t page_cgroup_ino(struct page *page) +{ + return 0; +} #endif /* CONFIG_MEMCG */ #if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP) -- cgit v1.2.3 From d82d3bf4115217bb3f43f9320bad5d68a35c278f Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Tue, 8 Apr 2025 10:52:11 +0100 Subject: mm: pass mm down to pagetable_{pte,pmd}_ctor Patch series "Always call constructor for kernel page tables", v2. There has been much confusion around exactly when page table constructors/destructors (pagetable_*_[cd]tor) are supposed to be called. They were initially introduced for user PTEs only (to support split page table locks), then at the PMD level for the same purpose. Accounting was added later on, starting at the PTE level and then moving to higher levels (PMD, PUD). Finally, with my earlier series "Account page tables at all levels" [1], the ctor/dtor is run for all levels, all the way to PGD. I thought this was the end of the story, and it hopefully is for user pgtables, but I was wrong for what concerns kernel pgtables. The current situation there makes very little sense: * At the PTE level, the ctor/dtor is not called (at least in the generic implementation). Specific helpers are used for kernel pgtables at this level (pte_{alloc,free}_kernel()) and those have never called the ctor/dtor, most likely because they were initially irrelevant in the kernel case. * At all other levels, the ctor/dtor is normally called. This is potentially wasteful at the PMD level (more on that later). This series aims to ensure that the ctor/dtor is always called for kernel pgtables, as it already is for user pgtables. Besides consistency, the main motivation is to guarantee that ctor/dtor hooks are systematically called; this makes it possible to insert hooks to protect page tables [2], for instance. There is however an extra challenge: split locks are not used for kernel pgtables, and it would therefore be wasteful to initialise them (ptlock_init()). It is worth clarifying exactly when split locks are used. They clearly are for user pgtables, but as illustrated in commit 61444cde9170 ("ARM: 8591/1: mm: use fully constructed struct pages for EFI pgd allocations"), they also are for special page tables like efi_mm. The one case where split locks are definitely unused is pgtables owned by init_mm; this is consistent with the behaviour of apply_to_pte_range(). The approach chosen in this series is therefore to pass the mm associated to the pgtables being constructed to pagetable_{pte,pmd}_ctor() (patch 1), and skip ptlock_init() if mm == &init_mm (patch 3 and 7). This makes it possible to call the PTE ctor/dtor from pte_{alloc,free}_kernel() without unintended consequences (patch 3). As a result the accounting functions are now called at all levels for kernel pgtables, and split locks are never initialised. In configurations where ptlocks are dynamically allocated (32-bit, PREEMPT_RT, etc.) and ARCH_ENABLE_SPLIT_PMD_PTLOCK is selected, this series results in the removal of a kmem_cache allocation for every kernel PMD. Additionally, for certain architectures that do not use such as s390, the same optimisation occurs at the PTE level. === Things get more complicated when it comes to special pgtable allocators (patch 8-12). All architectures need such allocators to create initial kernel pgtables; we are not concerned with those as the ctor cannot be called so early in the boot sequence. However, those allocators may also be used later in the boot sequence or during normal operations. There are two main use-cases: 1. Mapping EFI memory: efi_mm (arm, arm64, riscv) 2. arch_add_memory(): init_mm The ctor is already explicitly run (at the PTE/PMD level) in the first case, as required for pgtables that are not associated with init_mm. However the same allocators may also be used for the second use-case (or others), and this is where it gets messy. Patch 1 calls the ctor with NULL as mm in those situations, as the actual mm isn't available. Practically this means that ptlocks will be unconditionally initialised. This is fine on arm - create_mapping_late() is only used for the EFI mapping. On arm64, __create_pgd_mapping() is also used by arch_add_memory(); patch 8/9/11 ensure that ctors are called at all levels with the appropriate mm. The situation is similar on riscv, but propagating the mm down to the ctor would require significant refactoring. Since they are already called unconditionally, this series leaves riscv no worse off - patch 10 adds comments to clarify the situation. From a cursory look at other architectures implementing arch_add_memory(), s390 and x86 may also need a similar treatment to add constructor calls. This is to be taken care of in a future version or as a follow-up. === The complications in those special pgtable allocators beg the question: does it really make sense to treat efi_mm and init_mm differently in e.g. apply_to_pte_range()? Maybe what we really need is a way to tell if an mm corresponds to user memory or not, and never use split locks for non-user mm's. Feedback and suggestions welcome! This patch (of 12): In preparation for calling constructors for all kernel page tables while eliding unnecessary ptlock initialisation, let's pass down the associated mm to the PTE/PMD level ctors. (These are the two levels where ptlocks are used.) In most cases the mm is already around at the point of calling the ctor so we simply pass it down. This is however not the case for special page table allocators: * arch/arm/mm/mmu.c * arch/arm64/mm/mmu.c * arch/riscv/mm/init.c In those cases, the page tables being allocated are either for standard kernel memory (init_mm) or special page directories, which may not be associated to any mm. For now let's pass NULL as mm; this will be refined where possible in future patches. No functional change in this patch. Link: https://lore.kernel.org/linux-mm/20250103184415.2744423-1-kevin.brodsky@arm.com/ [1] Link: https://lore.kernel.org/linux-hardening/20250203101839.1223008-1-kevin.brodsky@arm.com/ [2] Link: https://lkml.kernel.org/r/20250408095222.860601-1-kevin.brodsky@arm.com Link: https://lkml.kernel.org/r/20250408095222.860601-2-kevin.brodsky@arm.com Signed-off-by: Kevin Brodsky Reviewed-by: Alexander Gordeev [s390] Cc: Albert Ou Cc: Andreas Larsson Cc: Catalin Marinas Cc: David S. Miller Cc: Geert Uytterhoeven Cc: Kevin Brodsky Cc: Linus Waleij Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: Mike Rapoport Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ryan Roberts Cc: Will Deacon Cc: Yang Shi Cc: Cc: Dave Hansen Signed-off-by: Andrew Morton --- include/asm-generic/pgalloc.h | 4 ++-- include/linux/mm.h | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 892ece4558a2..e164ca66f0f6 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -70,7 +70,7 @@ static inline pgtable_t __pte_alloc_one_noprof(struct mm_struct *mm, gfp_t gfp) ptdesc = pagetable_alloc_noprof(gfp, 0); if (!ptdesc) return NULL; - if (!pagetable_pte_ctor(ptdesc)) { + if (!pagetable_pte_ctor(mm, ptdesc)) { pagetable_free(ptdesc); return NULL; } @@ -137,7 +137,7 @@ static inline pmd_t *pmd_alloc_one_noprof(struct mm_struct *mm, unsigned long ad ptdesc = pagetable_alloc_noprof(gfp, 0); if (!ptdesc) return NULL; - if (!pagetable_pmd_ctor(ptdesc)) { + if (!pagetable_pmd_ctor(mm, ptdesc)) { pagetable_free(ptdesc); return NULL; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 1690f21e7808..fa22b17e337e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3147,7 +3147,8 @@ static inline void pagetable_dtor_free(struct ptdesc *ptdesc) pagetable_free(ptdesc); } -static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc) +static inline bool pagetable_pte_ctor(struct mm_struct *mm, + struct ptdesc *ptdesc) { if (!ptlock_init(ptdesc)) return false; @@ -3253,7 +3254,8 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) return ptl; } -static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc) +static inline bool pagetable_pmd_ctor(struct mm_struct *mm, + struct ptdesc *ptdesc) { if (!pmd_ptlock_init(ptdesc)) return false; -- cgit v1.2.3 From 49f5996664201a3581ee5ea5949ee7d5fafb9d86 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Tue, 8 Apr 2025 10:52:13 +0100 Subject: mm: call ctor/dtor for kernel PTEs Since [1], constructors/destructors are expected to be called for all page table pages, at all levels and for both user and kernel pgtables. There is however one glaring exception: kernel PTEs are managed via separate helpers (pte_alloc_kernel/pte_free_kernel), which do not call the [cd]tor, at least not in the generic implementation. The most obvious reason for this anomaly is that init_mm is special-cased not to use split page table locks. As a result calling ptlock_init() for PTEs associated with init_mm would be wasteful, potentially resulting in dynamic memory allocation. However, pgtable [cd]tors perform other actions - currently related to accounting/statistics, and potentially more functionally significant in the future. Now that pagetable_pte_ctor() is passed the associated mm, we can make it skip the call to ptlock_init() for init_mm; this allows us to call the ctor from pte_alloc_one_kernel() too. This is matched by a call to the pgtable destructor in pte_free_kernel(); no special-casing is needed on that path, as ptlock_free() is already called unconditionally. (ptlock_free() is a no-op unless a ptlock was allocated for the given PTP.) This patch ensures that all architectures that rely on call the [cd]tor for kernel PTEs. pte_free_kernel() cannot be overridden so changing the generic implementation is sufficient. pte_alloc_one_kernel() can be overridden using __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL, and a few architectures implement it by calling the page allocator directly. We amend those so that they call the generic __pte_alloc_one_kernel() instead, if possible, ensuring that the ctor is called. A few architectures do not use ; those will be taken care of separately. [1] https://lore.kernel.org/linux-mm/20250103184415.2744423-1-kevin.brodsky@arm.com/ Link: https://lkml.kernel.org/r/20250408095222.860601-4-kevin.brodsky@arm.com Signed-off-by: Kevin Brodsky Reviewed-by: Alexander Gordeev # s390 Cc: Albert Ou Cc: Andreas Larsson Cc: Catalin Marinas Cc: David S. Miller Cc: Geert Uytterhoeven Cc: Linus Waleij Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: Mike Rapoport Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ryan Roberts Cc: Will Deacon Cc: Cc: Yang Shi Cc: Dave Hansen Signed-off-by: Andrew Morton --- include/asm-generic/pgalloc.h | 7 ++++++- include/linux/mm.h | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index e164ca66f0f6..3c8ec3bfea44 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -23,6 +23,11 @@ static inline pte_t *__pte_alloc_one_kernel_noprof(struct mm_struct *mm) if (!ptdesc) return NULL; + if (!pagetable_pte_ctor(mm, ptdesc)) { + pagetable_free(ptdesc); + return NULL; + } + return ptdesc_address(ptdesc); } #define __pte_alloc_one_kernel(...) alloc_hooks(__pte_alloc_one_kernel_noprof(__VA_ARGS__)) @@ -48,7 +53,7 @@ static inline pte_t *pte_alloc_one_kernel_noprof(struct mm_struct *mm) */ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - pagetable_free(virt_to_ptdesc(pte)); + pagetable_dtor_free(virt_to_ptdesc(pte)); } /** diff --git a/include/linux/mm.h b/include/linux/mm.h index fa22b17e337e..ce6832787f6d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3150,7 +3150,7 @@ static inline void pagetable_dtor_free(struct ptdesc *ptdesc) static inline bool pagetable_pte_ctor(struct mm_struct *mm, struct ptdesc *ptdesc) { - if (!ptlock_init(ptdesc)) + if (mm != &init_mm && !ptlock_init(ptdesc)) return false; __pagetable_ctor(ptdesc); return true; -- cgit v1.2.3 From 8240d8d3c5fb65fbdace7ae4db909b51f2253da7 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Tue, 8 Apr 2025 10:52:17 +0100 Subject: mm: skip ptlock_init() for kernel PMDs Split page table locks are not used for pgtables associated to init_mm, at any level. pte_alloc_kernel() does not call ptlock_init() as a result. There is however no separate alloc/free functions for kernel PMDs, and pmd_ptlock_init() is called unconditionally. When ALLOC_SPLIT_PTLOCKS is true (e.g. 32-bit architectures or if CONFIG_PREEMPT_RT is selected), this results in unnecessary dynamic memory allocation every time a kernel PMD is allocated. Now that pagetable_pmd_ctor() is passed the associated mm, we can easily remove this overhead by skipping pmd_ptlock_init() if the pgtable is associated to init_mm. No special-casing is needed on the dtor path, as ptlock_free() is already called unconditionally for all levels. (ptlock_free() is a no-op unless a ptlock was allocated for the given PTP.) Link: https://lkml.kernel.org/r/20250408095222.860601-8-kevin.brodsky@arm.com Signed-off-by: Kevin Brodsky Cc: Albert Ou Cc: Andreas Larsson Cc: Catalin Marinas Cc: David S. Miller Cc: Geert Uytterhoeven Cc: Linus Waleij Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: Mike Rapoport Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ryan Roberts Cc: Will Deacon Cc: Cc: Yang Shi Cc: Dave Hansen Cc: Alexander Gordeev Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ce6832787f6d..5eb0d77c4438 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3257,7 +3257,7 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) static inline bool pagetable_pmd_ctor(struct mm_struct *mm, struct ptdesc *ptdesc) { - if (!pmd_ptlock_init(ptdesc)) + if (mm != &init_mm && !pmd_ptlock_init(ptdesc)) return false; ptdesc_pmd_pts_init(ptdesc); __pagetable_ctor(ptdesc); -- cgit v1.2.3 From ad88fc17d2dafe45e40de2af80207f4b2e3b1f71 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 10 Apr 2025 19:14:43 +0000 Subject: maple_tree: use vacant nodes to reduce worst case allocations In order to determine the store type for a maple tree operation, a walk of the tree is done through mas_wr_walk(). This function descends the tree until a spanning write is detected or we reach a leaf node. While descending, keep track of the height at which we encounter a node with available space. This is done by checking if mas->end is less than the number of slots a given node type can fit. Now that the height of the vacant node is tracked, we can use the difference between the height of the tree and the height of the vacant node to know how many levels we will have to propagate creating new nodes. Update mas_prealloc_calc() to consider the vacant height and reduce the number of worst-case allocations. Rebalancing and spanning stores are not supported and fall back to using the full height of the tree for allocations. Update preallocation testing assertions to take into account vacant height. Link: https://lkml.kernel.org/r/20250410191446.2474640-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Wei Yang Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index cbbcd18d4186..657adb33e61e 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -463,6 +463,7 @@ struct ma_wr_state { void __rcu **slots; /* mas->node->slots pointer */ void *entry; /* The entry to write */ void *content; /* The existing entry that is being overwritten */ + unsigned char vacant_height; /* Height of lowest node with free space */ }; #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) @@ -498,6 +499,7 @@ struct ma_wr_state { .mas = ma_state, \ .content = NULL, \ .entry = wr_entry, \ + .vacant_height = 0 \ } #define MA_TOPIARY(name, tree) \ -- cgit v1.2.3 From 271152a973cb01c135d29e91d1a05f51fbd88a9c Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 10 Apr 2025 19:14:45 +0000 Subject: maple_tree: add sufficient height In order to support rebalancing and spanning stores using less than the worst case number of nodes, we need to track more than just the vacant height. Using only vacant height to reduce the worst case maple node allocation count can lead to a shortcoming of nodes in the following scenarios. For rebalancing writes, when a leaf node becomes insufficient, it may be combined with a sibling into a single node. This means that the parent node which has entries for this children will lose one entry. If this parent node was just meeting the minimum entries, losing one entry will now cause this parent node to be insufficient. This leads to a cascading operation of rebalancing at different levels and can lead to more node allocations than simply using vacant height can return. For spanning writes, a similar situation occurs. At the location at which a spanning write is detected, the number of ancestor nodes may similarly need to rebalanced into a smaller number of nodes and the same cascading situation could occur. To use less than the full height of the tree for the number of allocations, we also need to track the height at which a non-leaf node cannot become insufficient. This means even if a rebalance occurs to a child of this node, it currently has enough entries that it can lose one without any further action. This field is stored in the maple write state as sufficient height. In mas_prealloc_calc() when figuring out how many nodes to allocate, we check if the vacant node is lower in the tree than a sufficient node (has a larger value). If it is, we cannot use the vacant height and must use the difference in the height and sufficient height as the basis for the number of nodes needed. An off by one bug was also discovered in mast_overflow() where it is using >= rather than >. This caused extra iterations of the mas_spanning_rebalance() loop and lead to unneeded allocations. A test is also added to check the number of allocations is correct. Link: https://lkml.kernel.org/r/20250410191446.2474640-6-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Wei Yang Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 657adb33e61e..9ef129038224 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -464,6 +464,7 @@ struct ma_wr_state { void *entry; /* The entry to write */ void *content; /* The existing entry that is being overwritten */ unsigned char vacant_height; /* Height of lowest node with free space */ + unsigned char sufficient_height;/* Height of lowest node with min sufficiency + 1 nodes */ }; #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) @@ -499,7 +500,8 @@ struct ma_wr_state { .mas = ma_state, \ .content = NULL, \ .entry = wr_entry, \ - .vacant_height = 0 \ + .vacant_height = 0, \ + .sufficient_height = 0 \ } #define MA_TOPIARY(name, tree) \ -- cgit v1.2.3 From 06340b927051bf71b59a9cd4cff3417247318251 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Wed, 16 Apr 2025 13:12:15 -0700 Subject: mm: convert free_page_and_swap_cache() to free_folio_and_swap_cache() free_page_and_swap_cache() takes a struct page pointer as input parameter, but it will immediately convert it to folio and all operations following within use folio instead of page. It makes more sense to pass in folio directly. Convert free_page_and_swap_cache() to free_folio_and_swap_cache() to consume folio directly. Link: https://lkml.kernel.org/r/20250416201720.41678-1-nifan.cxl@gmail.com Signed-off-by: Fan Ni Acked-by: Davidlohr Bueso Acked-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcox (Oracle) Cc: Adam Manzanares Cc: "Aneesh Kumar K.V" Cc: Heiko Carstens Cc: Luis Chamberalin Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/swap.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index db46b25a65ae..4e4e27d3ce3d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -450,7 +450,7 @@ static inline unsigned long total_swapcache_pages(void) } void free_swap_cache(struct folio *folio); -void free_page_and_swap_cache(struct page *); +void free_folio_and_swap_cache(struct folio *folio); void free_pages_and_swap_cache(struct encoded_page **, int); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; @@ -520,10 +520,8 @@ static inline void put_swap_device(struct swap_info_struct *si) #define si_swapinfo(val) \ do { (val)->freeswap = (val)->totalswap = 0; } while (0) -/* only sparc can not include linux/pagemap.h in this file - * so leave put_page and release_pages undeclared... */ -#define free_page_and_swap_cache(page) \ - put_page(page) +#define free_folio_and_swap_cache(folio) \ + folio_put(folio) #define free_pages_and_swap_cache(pages, nr) \ release_pages((pages), (nr)); -- cgit v1.2.3 From 75404e07663b1622948944cf31531fa87cb1785d Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 16 Apr 2025 11:38:36 +0100 Subject: mm: move mmap/vma locking logic into specific files Currently the VMA and mmap locking logic is entangled in two of the most overwrought files in mm - include/linux/mm.h and mm/memory.c. Separate this logic out so we can more easily make changes and create an appropriate MAINTAINERS entry that spans only the logic relating to locking. This should have no functional change. Care is taken to avoid dependency loops, we must regrettably keep release_fault_lock() and assert_fault_locked() in mm.h as a result due to the dependence on the vm_fault type. Additionally we must declare rcuwait_wake_up() manually to avoid a dependency cycle on linux/rcuwait.h. Additionally move the nommu implementatino of lock_mm_and_find_vma() to mmap_lock.c so everything lock-related is in one place. Link: https://lkml.kernel.org/r/bec6c8e29fa8de9267a811a10b1bdae355d67ed4.1744799282.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Suren Baghdasaryan Reviewed-by: Liam R. Howlett Reviewed-by: Vlastimil Babka Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: "Paul E . McKenney" Cc: SeongJae Park Cc: Shakeel Butt Signed-off-by: Andrew Morton --- include/linux/mm.h | 231 +--------------------------------------------- include/linux/mmap_lock.h | 227 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+), 227 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5eb0d77c4438..9b701cfbef22 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -671,204 +671,11 @@ static inline void vma_numab_state_init(struct vm_area_struct *vma) {} static inline void vma_numab_state_free(struct vm_area_struct *vma) {} #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_PER_VMA_LOCK -static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - static struct lock_class_key lockdep_key; - - lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0); -#endif - if (reset_refcnt) - refcount_set(&vma->vm_refcnt, 0); - vma->vm_lock_seq = UINT_MAX; -} - -static inline bool is_vma_writer_only(int refcnt) -{ - /* - * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma - * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on - * a detached vma happens only in vma_mark_detached() and is a rare - * case, therefore most of the time there will be no unnecessary wakeup. - */ - return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1; -} - -static inline void vma_refcount_put(struct vm_area_struct *vma) -{ - /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ - struct mm_struct *mm = vma->vm_mm; - int oldcnt; - - rwsem_release(&vma->vmlock_dep_map, _RET_IP_); - if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) { - - if (is_vma_writer_only(oldcnt - 1)) - rcuwait_wake_up(&mm->vma_writer_wait); - } -} - -/* - * Try to read-lock a vma. The function is allowed to occasionally yield false - * locked result to avoid performance overhead, in which case we fall back to - * using mmap_lock. The function should never yield false unlocked result. - * False locked result is possible if mm_lock_seq overflows or if vma gets - * reused and attached to a different mm before we lock it. - * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got - * detached. - */ -static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, - struct vm_area_struct *vma) -{ - int oldcnt; - - /* - * Check before locking. A race might cause false locked result. - * We can use READ_ONCE() for the mm_lock_seq here, and don't need - * ACQUIRE semantics, because this is just a lockless check whose result - * we don't rely on for anything - the mm_lock_seq read against which we - * need ordering is below. - */ - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) - return NULL; - - /* - * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() - * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. - * Acquire fence is required here to avoid reordering against later - * vm_lock_seq check and checks inside lock_vma_under_rcu(). - */ - if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, - VMA_REF_LIMIT))) { - /* return EAGAIN if vma got detached from under us */ - return oldcnt ? NULL : ERR_PTR(-EAGAIN); - } - - rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); - /* - * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. - * False unlocked result is impossible because we modify and check - * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq - * modification invalidates all existing locks. - * - * We must use ACQUIRE semantics for the mm_lock_seq so that if we are - * racing with vma_end_write_all(), we only start reading from the VMA - * after it has been unlocked. - * This pairs with RELEASE semantics in vma_end_write_all(). - */ - if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { - vma_refcount_put(vma); - return NULL; - } - - return vma; -} - -/* - * Use only while holding mmap read lock which guarantees that locking will not - * fail (nobody can concurrently write-lock the vma). vma_start_read() should - * not be used in such cases because it might fail due to mm_lock_seq overflow. - * This functionality is used to obtain vma read lock and drop the mmap read lock. - */ -static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) -{ - int oldcnt; - - mmap_assert_locked(vma->vm_mm); - if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, - VMA_REF_LIMIT))) - return false; - - rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); - return true; -} - -/* - * Use only while holding mmap read lock which guarantees that locking will not - * fail (nobody can concurrently write-lock the vma). vma_start_read() should - * not be used in such cases because it might fail due to mm_lock_seq overflow. - * This functionality is used to obtain vma read lock and drop the mmap read lock. - */ -static inline bool vma_start_read_locked(struct vm_area_struct *vma) -{ - return vma_start_read_locked_nested(vma, 0); -} - -static inline void vma_end_read(struct vm_area_struct *vma) -{ - vma_refcount_put(vma); -} - -/* WARNING! Can only be used if mmap_lock is expected to be write-locked */ -static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) -{ - mmap_assert_write_locked(vma->vm_mm); - - /* - * current task is holding mmap_write_lock, both vma->vm_lock_seq and - * mm->mm_lock_seq can't be concurrently modified. - */ - *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; - return (vma->vm_lock_seq == *mm_lock_seq); -} - -void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq); - -/* - * Begin writing to a VMA. - * Exclude concurrent readers under the per-VMA lock until the currently - * write-locked mmap_lock is dropped or downgraded. - */ -static inline void vma_start_write(struct vm_area_struct *vma) -{ - unsigned int mm_lock_seq; - - if (__is_vma_write_locked(vma, &mm_lock_seq)) - return; - - __vma_start_write(vma, mm_lock_seq); -} - -static inline void vma_assert_write_locked(struct vm_area_struct *vma) -{ - unsigned int mm_lock_seq; - - VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); -} - -static inline void vma_assert_locked(struct vm_area_struct *vma) -{ - unsigned int mm_lock_seq; - - VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 && - !__is_vma_write_locked(vma, &mm_lock_seq), vma); -} - /* - * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these - * assertions should be made either under mmap_write_lock or when the object - * has been isolated under mmap_write_lock, ensuring no competing writers. + * These must be here rather than mmap_lock.h as dependent on vm_fault type, + * declared in this header. */ -static inline void vma_assert_attached(struct vm_area_struct *vma) -{ - WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); -} - -static inline void vma_assert_detached(struct vm_area_struct *vma) -{ - WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); -} - -static inline void vma_mark_attached(struct vm_area_struct *vma) -{ - vma_assert_write_locked(vma); - vma_assert_detached(vma); - refcount_set_release(&vma->vm_refcnt, 1); -} - -void vma_mark_detached(struct vm_area_struct *vma); - +#ifdef CONFIG_PER_VMA_LOCK static inline void release_fault_lock(struct vm_fault *vmf) { if (vmf->flags & FAULT_FLAG_VMA_LOCK) @@ -884,36 +691,7 @@ static inline void assert_fault_locked(struct vm_fault *vmf) else mmap_assert_locked(vmf->vma->vm_mm); } - -struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, - unsigned long address); - -#else /* CONFIG_PER_VMA_LOCK */ - -static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} -static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, - struct vm_area_struct *vma) - { return NULL; } -static inline void vma_end_read(struct vm_area_struct *vma) {} -static inline void vma_start_write(struct vm_area_struct *vma) {} -static inline void vma_assert_write_locked(struct vm_area_struct *vma) - { mmap_assert_write_locked(vma->vm_mm); } -static inline void vma_assert_attached(struct vm_area_struct *vma) {} -static inline void vma_assert_detached(struct vm_area_struct *vma) {} -static inline void vma_mark_attached(struct vm_area_struct *vma) {} -static inline void vma_mark_detached(struct vm_area_struct *vma) {} - -static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, - unsigned long address) -{ - return NULL; -} - -static inline void vma_assert_locked(struct vm_area_struct *vma) -{ - mmap_assert_locked(vma->vm_mm); -} - +#else static inline void release_fault_lock(struct vm_fault *vmf) { mmap_read_unlock(vmf->vma->vm_mm); @@ -923,7 +701,6 @@ static inline void assert_fault_locked(struct vm_fault *vmf) { mmap_assert_locked(vmf->vma->vm_mm); } - #endif /* CONFIG_PER_VMA_LOCK */ extern const struct vm_operations_struct vma_dummy_vm_ops; diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 4706c6769902..7983b2efe9bf 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -1,6 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MMAP_LOCK_H #define _LINUX_MMAP_LOCK_H +/* Avoid a dependency loop by declaring here. */ +extern int rcuwait_wake_up(struct rcuwait *w); + #include #include #include @@ -104,6 +108,206 @@ static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int return read_seqcount_retry(&mm->mm_lock_seq, seq); } +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + static struct lock_class_key lockdep_key; + + lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0); +#endif + if (reset_refcnt) + refcount_set(&vma->vm_refcnt, 0); + vma->vm_lock_seq = UINT_MAX; +} + +static inline bool is_vma_writer_only(int refcnt) +{ + /* + * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma + * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on + * a detached vma happens only in vma_mark_detached() and is a rare + * case, therefore most of the time there will be no unnecessary wakeup. + */ + return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1; +} + +static inline void vma_refcount_put(struct vm_area_struct *vma) +{ + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *mm = vma->vm_mm; + int oldcnt; + + rwsem_release(&vma->vmlock_dep_map, _RET_IP_); + if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) { + + if (is_vma_writer_only(oldcnt - 1)) + rcuwait_wake_up(&mm->vma_writer_wait); + } +} + +/* + * Try to read-lock a vma. The function is allowed to occasionally yield false + * locked result to avoid performance overhead, in which case we fall back to + * using mmap_lock. The function should never yield false unlocked result. + * False locked result is possible if mm_lock_seq overflows or if vma gets + * reused and attached to a different mm before we lock it. + * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got + * detached. + */ +static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, + struct vm_area_struct *vma) +{ + int oldcnt; + + /* + * Check before locking. A race might cause false locked result. + * We can use READ_ONCE() for the mm_lock_seq here, and don't need + * ACQUIRE semantics, because this is just a lockless check whose result + * we don't rely on for anything - the mm_lock_seq read against which we + * need ordering is below. + */ + if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) + return NULL; + + /* + * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() + * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. + * Acquire fence is required here to avoid reordering against later + * vm_lock_seq check and checks inside lock_vma_under_rcu(). + */ + if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, + VMA_REF_LIMIT))) { + /* return EAGAIN if vma got detached from under us */ + return oldcnt ? NULL : ERR_PTR(-EAGAIN); + } + + rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + /* + * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. + * False unlocked result is impossible because we modify and check + * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq + * modification invalidates all existing locks. + * + * We must use ACQUIRE semantics for the mm_lock_seq so that if we are + * racing with vma_end_write_all(), we only start reading from the VMA + * after it has been unlocked. + * This pairs with RELEASE semantics in vma_end_write_all(). + */ + if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { + vma_refcount_put(vma); + return NULL; + } + + return vma; +} + +/* + * Use only while holding mmap read lock which guarantees that locking will not + * fail (nobody can concurrently write-lock the vma). vma_start_read() should + * not be used in such cases because it might fail due to mm_lock_seq overflow. + * This functionality is used to obtain vma read lock and drop the mmap read lock. + */ +static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) +{ + int oldcnt; + + mmap_assert_locked(vma->vm_mm); + if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, + VMA_REF_LIMIT))) + return false; + + rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + return true; +} + +/* + * Use only while holding mmap read lock which guarantees that locking will not + * fail (nobody can concurrently write-lock the vma). vma_start_read() should + * not be used in such cases because it might fail due to mm_lock_seq overflow. + * This functionality is used to obtain vma read lock and drop the mmap read lock. + */ +static inline bool vma_start_read_locked(struct vm_area_struct *vma) +{ + return vma_start_read_locked_nested(vma, 0); +} + +static inline void vma_end_read(struct vm_area_struct *vma) +{ + vma_refcount_put(vma); +} + +/* WARNING! Can only be used if mmap_lock is expected to be write-locked */ +static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) +{ + mmap_assert_write_locked(vma->vm_mm); + + /* + * current task is holding mmap_write_lock, both vma->vm_lock_seq and + * mm->mm_lock_seq can't be concurrently modified. + */ + *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; + return (vma->vm_lock_seq == *mm_lock_seq); +} + +void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq); + +/* + * Begin writing to a VMA. + * Exclude concurrent readers under the per-VMA lock until the currently + * write-locked mmap_lock is dropped or downgraded. + */ +static inline void vma_start_write(struct vm_area_struct *vma) +{ + unsigned int mm_lock_seq; + + if (__is_vma_write_locked(vma, &mm_lock_seq)) + return; + + __vma_start_write(vma, mm_lock_seq); +} + +static inline void vma_assert_write_locked(struct vm_area_struct *vma) +{ + unsigned int mm_lock_seq; + + VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); +} + +static inline void vma_assert_locked(struct vm_area_struct *vma) +{ + unsigned int mm_lock_seq; + + VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 && + !__is_vma_write_locked(vma, &mm_lock_seq), vma); +} + +/* + * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these + * assertions should be made either under mmap_write_lock or when the object + * has been isolated under mmap_write_lock, ensuring no competing writers. + */ +static inline void vma_assert_attached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); +} + +static inline void vma_assert_detached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); +} + +static inline void vma_mark_attached(struct vm_area_struct *vma) +{ + vma_assert_write_locked(vma); + vma_assert_detached(vma); + refcount_set_release(&vma->vm_refcnt, 1); +} + +void vma_mark_detached(struct vm_area_struct *vma); + +struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address); + #else /* CONFIG_PER_VMA_LOCK */ static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} @@ -119,6 +323,29 @@ static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int { return true; } +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} +static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, + struct vm_area_struct *vma) + { return NULL; } +static inline void vma_end_read(struct vm_area_struct *vma) {} +static inline void vma_start_write(struct vm_area_struct *vma) {} +static inline void vma_assert_write_locked(struct vm_area_struct *vma) + { mmap_assert_write_locked(vma->vm_mm); } +static inline void vma_assert_attached(struct vm_area_struct *vma) {} +static inline void vma_assert_detached(struct vm_area_struct *vma) {} +static inline void vma_mark_attached(struct vm_area_struct *vma) {} +static inline void vma_mark_detached(struct vm_area_struct *vma) {} + +static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address) +{ + return NULL; +} + +static inline void vma_assert_locked(struct vm_area_struct *vma) +{ + mmap_assert_locked(vma->vm_mm); +} #endif /* CONFIG_PER_VMA_LOCK */ -- cgit v1.2.3 From 3ca55ca225d7627e76fcfe2894740c4f615ff2e0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 2 Apr 2025 11:09:25 -0700 Subject: exit: move and extend sched_process_exit() tracepoint It is useful to be able to access current->mm at task exit to, say, record a bunch of VMA information right before the task exits (e.g., for stack symbolization reasons when dealing with short-lived processes that exit in the middle of profiling session). Currently, trace_sched_process_exit() is triggered after exit_mm() which resets current->mm to NULL making this tracepoint unsuitable for inspecting and recording task's mm_struct-related data when tracing process lifetimes. There is a particularly suitable place, though, right after taskstats_exit() is called, but before we do exit_mm() and other exit_*() resource teardowns. taskstats performs a similar kind of accounting that some applications do with BPF, and so co-locating them seems like a good fit. So that's where trace_sched_process_exit() is moved with this patch. Also, existing trace_sched_process_exit() tracepoint is notoriously missing `group_dead` flag that is certainly useful in practice and some of our production applications have to work around this. So plumb `group_dead` through while at it, to have a richer and more complete tracepoint. Note that we can't use sched_process_template anymore, and so we use TRACE_EVENT()-based tracepoint definition. But all the field names and order, as well as assign and output logic remain intact. We just add one extra field at the end in backwards-compatible way. [andrii@kernel.org: document sched_process_exit and sched_process_template relation] Link: https://lkml.kernel.org/r/20250403174120.4087794-1-andrii@kernel.org Link: https://lkml.kernel.org/r/20250402180925.90914-1-andrii@kernel.org Signed-off-by: Andrii Nakryiko Acked-by: Steven Rostedt (Google) Acked-by: Oleg Nesterov Suggested-by: Ingo Molnar Cc: Alexander Potapenko Cc: Christian Brauner Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Michal Hocko Signed-off-by: Andrew Morton --- include/trace/events/sched.h | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8994e97d86c1..3bec9fb73a36 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -326,11 +326,37 @@ DEFINE_EVENT(sched_process_template, sched_process_free, TP_ARGS(p)); /* - * Tracepoint for a task exiting: + * Tracepoint for a task exiting. + * Note, it's a superset of sched_process_template and should be kept + * compatible as much as possible. sched_process_exits has an extra + * `group_dead` argument, so sched_process_template can't be used, + * unfortunately, just like sched_migrate_task above. */ -DEFINE_EVENT(sched_process_template, sched_process_exit, - TP_PROTO(struct task_struct *p), - TP_ARGS(p)); +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p, bool group_dead), + + TP_ARGS(p, group_dead), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( bool, group_dead ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ + __entry->group_dead = group_dead; + ), + + TP_printk("comm=%s pid=%d prio=%d group_dead=%s", + __entry->comm, __entry->pid, __entry->prio, + __entry->group_dead ? "true" : "false" + ) +); /* * Tracepoint for waiting on task to unschedule: -- cgit v1.2.3 From db80bd2cea1b7c2574578461c9de4c3d9ee7634a Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 3 Apr 2025 14:33:03 +0200 Subject: task_stack.h: remove obsolete __HAVE_ARCH_KSTACK_END check Remove __HAVE_ARCH_KSTACK_END as it has been obsolete since removal of metag architecture in v4.17. Link: https://lkml.kernel.org/r/20250403-kstack-end-v1-1-7798e71f34d1@linaro.org Signed-off-by: Pasha Tatashin Link: https://lore.kernel.org/20240311164638.2015063-2-pasha.tatashin@soleen.com Signed-off-by: Linus Walleij Signed-off-by: Andrew Morton --- include/linux/sched/task_stack.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index cffad65bdc6a..85c5a6392e02 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -106,7 +106,6 @@ static inline unsigned long stack_not_used(struct task_struct *p) #endif extern void set_task_stack_end_magic(struct task_struct *tsk); -#ifndef __HAVE_ARCH_KSTACK_END static inline int kstack_end(void *addr) { /* Reliable end of stack detection: @@ -114,6 +113,5 @@ static inline int kstack_end(void *addr) */ return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); } -#endif #endif /* _LINUX_SCHED_TASK_STACK_H */ -- cgit v1.2.3 From 4ef5211ee68113070bd42142f06347866675055e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 24 Mar 2025 12:50:24 +0200 Subject: kernel.h: move READ/WRITE definitions to Patch series "kernel.h: Move out a couple of macros and constants". kernel.h hosts a couple of macros and constants that may be better placed. Do that. Also add missing documentation. No functional changes intended. This patch (of 2): Headers shouldn't be forced to include just to gain these simple constants. Link: https://lkml.kernel.org/r/20250324105228.775784-1-andriy.shevchenko@linux.intel.com Link: https://lkml.kernel.org/r/20250324105228.775784-2-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Andy Shevchenko Cc: Alexandru Ardelean Signed-off-by: Andrew Morton --- include/linux/kernel.h | 4 ---- include/linux/types.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index be2e8c0a187e..01bb0fac3667 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -41,10 +41,6 @@ #define STACK_MAGIC 0xdeadbeef -/* generic data direction definitions */ -#define READ 0 -#define WRITE 1 - #define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) #define u64_to_user_ptr(x) ( \ diff --git a/include/linux/types.h b/include/linux/types.h index 49b79c8bb1a9..6dfdb8e8e4c3 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -136,6 +136,10 @@ typedef s64 ktime_t; typedef u64 sector_t; typedef u64 blkcnt_t; +/* generic data direction definitions */ +#define READ 0 +#define WRITE 1 + /* * The type of an index into the pagecache. */ -- cgit v1.2.3 From 029c896c4105b7d74fcd88d99c5fbab2558fee89 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Mar 2025 12:50:25 +0200 Subject: kernel.h: move PTR_IF() and u64_to_user_ptr() to util_macros.h While the natural choice of PTR_IF() is kconfig.h, the latter is too broad to include C code and actually the macro was moved out from there in the past. But kernel.h is neither a good choice for that. Move it to util_macros.h. Do the same for u64_to_user_ptr(). While moving, add necessary documentation. Link: https://lkml.kernel.org/r/20250324105228.775784-3-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Alexandru Ardelean Cc: Ingo Molnar Signed-off-by: Andrew Morton --- include/linux/kernel.h | 10 +------ include/linux/util_macros.h | 66 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 01bb0fac3667..1cce1f6410a9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -41,15 +42,6 @@ #define STACK_MAGIC 0xdeadbeef -#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) - -#define u64_to_user_ptr(x) ( \ -{ \ - typecheck(u64, (x)); \ - (void __user *)(uintptr_t)(x); \ -} \ -) - struct completion; struct user; diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 3b570b765b75..7b64fb597f85 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -79,6 +79,72 @@ (__fc_i); \ }) +/** + * PTR_IF - evaluate to @ptr if @cond is true, or to NULL otherwise. + * @cond: A conditional, usually in a form of IS_ENABLED(CONFIG_FOO) + * @ptr: A pointer to assign if @cond is true. + * + * PTR_IF(IS_ENABLED(CONFIG_FOO), ptr) evaluates to @ptr if CONFIG_FOO is set + * to 'y' or 'm', or to NULL otherwise. The (ptr) argument must be a pointer. + * + * The macro can be very useful to help compiler dropping dead code. + * + * For instance, consider the following:: + * + * #ifdef CONFIG_FOO_SUSPEND + * static int foo_suspend(struct device *dev) + * { + * ... + * } + * #endif + * + * static struct pm_ops foo_ops = { + * #ifdef CONFIG_FOO_SUSPEND + * .suspend = foo_suspend, + * #endif + * }; + * + * While this works, the foo_suspend() macro is compiled conditionally, + * only when CONFIG_FOO_SUSPEND is set. This is problematic, as there could + * be a build bug in this function, we wouldn't have a way to know unless + * the configuration option is set. + * + * An alternative is to declare foo_suspend() always, but mark it + * as __maybe_unused. This works, but the __maybe_unused attribute + * is required to instruct the compiler that the function may not + * be referenced anywhere, and is safe to remove without making + * a fuss about it. This makes the programmer responsible for tagging + * the functions that can be garbage-collected. + * + * With the macro it is possible to write the following: + * + * static int foo_suspend(struct device *dev) + * { + * ... + * } + * + * static struct pm_ops foo_ops = { + * .suspend = PTR_IF(IS_ENABLED(CONFIG_FOO_SUSPEND), foo_suspend), + * }; + * + * The foo_suspend() function will now be automatically dropped by the + * compiler, and it does not require any specific attribute. + */ +#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) + +/** + * to_user_ptr - cast a pointer passed as u64 from user space to void __user * + * @x: The u64 value from user space, usually via IOCTL + * + * to_user_ptr() simply casts a pointer passed as u64 from user space to void + * __user * correctly. Using this lets us get rid of all the tiresome casts. + */ +#define u64_to_user_ptr(x) \ +({ \ + typecheck(u64, (x)); \ + (void __user *)(uintptr_t)(x); \ +}) + /** * is_insidevar - check if the @ptr points inside the @var memory range. * @ptr: the pointer to a memory address. -- cgit v1.2.3 From e711faaafbe54a884f33b53472434063d342f6d4 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Mon, 14 Apr 2025 22:59:43 +0800 Subject: hung_task: replace blocker_mutex with encoded blocker Patch series "hung_task: extend blocking task stacktrace dump to semaphore", v5. Inspired by mutex blocker tracking[1], this patch series extend the feature to not only dump the blocker task holding a mutex but also to support semaphores. Unlike mutexes, semaphores lack explicit ownership tracking, making it challenging to identify the root cause of hangs. To address this, we introduce a last_holder field to the semaphore structure, which is updated when a task successfully calls down() and cleared during up(). The assumption is that if a task is blocked on a semaphore, the holders must not have released it. While this does not guarantee that the last holder is one of the current blockers, it likely provides a practical hint for diagnosing semaphore-related stalls. With this change, the hung task detector can now show blocker task's info like below: [Tue Apr 8 12:19:07 2025] INFO: task cat:945 blocked for more than 120 seconds. [Tue Apr 8 12:19:07 2025] Tainted: G E 6.14.0-rc6+ #1 [Tue Apr 8 12:19:07 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [Tue Apr 8 12:19:07 2025] task:cat state:D stack:0 pid:945 tgid:945 ppid:828 task_flags:0x400000 flags:0x00000000 [Tue Apr 8 12:19:07 2025] Call Trace: [Tue Apr 8 12:19:07 2025] [Tue Apr 8 12:19:07 2025] __schedule+0x491/0xbd0 [Tue Apr 8 12:19:07 2025] schedule+0x27/0xf0 [Tue Apr 8 12:19:07 2025] schedule_timeout+0xe3/0xf0 [Tue Apr 8 12:19:07 2025] ? __folio_mod_stat+0x2a/0x80 [Tue Apr 8 12:19:07 2025] ? set_ptes.constprop.0+0x27/0x90 [Tue Apr 8 12:19:07 2025] __down_common+0x155/0x280 [Tue Apr 8 12:19:07 2025] down+0x53/0x70 [Tue Apr 8 12:19:07 2025] read_dummy_semaphore+0x23/0x60 [Tue Apr 8 12:19:07 2025] full_proxy_read+0x5f/0xa0 [Tue Apr 8 12:19:07 2025] vfs_read+0xbc/0x350 [Tue Apr 8 12:19:07 2025] ? __count_memcg_events+0xa5/0x140 [Tue Apr 8 12:19:07 2025] ? count_memcg_events.constprop.0+0x1a/0x30 [Tue Apr 8 12:19:07 2025] ? handle_mm_fault+0x180/0x260 [Tue Apr 8 12:19:07 2025] ksys_read+0x66/0xe0 [Tue Apr 8 12:19:07 2025] do_syscall_64+0x51/0x120 [Tue Apr 8 12:19:07 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Tue Apr 8 12:19:07 2025] RIP: 0033:0x7f419478f46e [Tue Apr 8 12:19:07 2025] RSP: 002b:00007fff1c4d2668 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Tue Apr 8 12:19:07 2025] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f419478f46e [Tue Apr 8 12:19:07 2025] RDX: 0000000000020000 RSI: 00007f4194683000 RDI: 0000000000000003 [Tue Apr 8 12:19:07 2025] RBP: 00007f4194683000 R08: 00007f4194682010 R09: 0000000000000000 [Tue Apr 8 12:19:07 2025] R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000 [Tue Apr 8 12:19:07 2025] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 [Tue Apr 8 12:19:07 2025] [Tue Apr 8 12:19:07 2025] INFO: task cat:945 blocked on a semaphore likely last held by task cat:938 [Tue Apr 8 12:19:07 2025] task:cat state:S stack:0 pid:938 tgid:938 ppid:584 task_flags:0x400000 flags:0x00000000 [Tue Apr 8 12:19:07 2025] Call Trace: [Tue Apr 8 12:19:07 2025] [Tue Apr 8 12:19:07 2025] __schedule+0x491/0xbd0 [Tue Apr 8 12:19:07 2025] ? _raw_spin_unlock_irqrestore+0xe/0x40 [Tue Apr 8 12:19:07 2025] schedule+0x27/0xf0 [Tue Apr 8 12:19:07 2025] schedule_timeout+0x77/0xf0 [Tue Apr 8 12:19:07 2025] ? __pfx_process_timeout+0x10/0x10 [Tue Apr 8 12:19:07 2025] msleep_interruptible+0x49/0x60 [Tue Apr 8 12:19:07 2025] read_dummy_semaphore+0x2d/0x60 [Tue Apr 8 12:19:07 2025] full_proxy_read+0x5f/0xa0 [Tue Apr 8 12:19:07 2025] vfs_read+0xbc/0x350 [Tue Apr 8 12:19:07 2025] ? __count_memcg_events+0xa5/0x140 [Tue Apr 8 12:19:07 2025] ? count_memcg_events.constprop.0+0x1a/0x30 [Tue Apr 8 12:19:07 2025] ? handle_mm_fault+0x180/0x260 [Tue Apr 8 12:19:07 2025] ksys_read+0x66/0xe0 [Tue Apr 8 12:19:07 2025] do_syscall_64+0x51/0x120 [Tue Apr 8 12:19:07 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Tue Apr 8 12:19:07 2025] RIP: 0033:0x7f7c584a646e [Tue Apr 8 12:19:07 2025] RSP: 002b:00007ffdba8ce158 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Tue Apr 8 12:19:07 2025] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f7c584a646e [Tue Apr 8 12:19:07 2025] RDX: 0000000000020000 RSI: 00007f7c5839a000 RDI: 0000000000000003 [Tue Apr 8 12:19:07 2025] RBP: 00007f7c5839a000 R08: 00007f7c58399010 R09: 0000000000000000 [Tue Apr 8 12:19:07 2025] R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000 [Tue Apr 8 12:19:07 2025] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 [Tue Apr 8 12:19:07 2025] This patch (of 3): This patch replaces 'struct mutex *blocker_mutex' with 'unsigned long blocker', as only one blocker is active at a time. The blocker filed can store both the lock addrees and the lock type, with LSB used to encode the type as Masami suggested, making it easier to extend the feature to cover other types of locks. Also, once the lock type is determined, we can directly extract the address and cast it to a lock pointer ;) Link: https://lkml.kernel.org/r/20250414145945.84916-1-ioworker0@gmail.com Link: https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com [1] Link: https://lkml.kernel.org/r/20250414145945.84916-2-ioworker0@gmail.com Signed-off-by: Mingzhe Yang Signed-off-by: Lance Yang Reviewed-by: Masami Hiramatsu (Google) Suggested-by: Andrew Morton Suggested-by: Masami Hiramatsu (Google) Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Cc: Zi Li Signed-off-by: Andrew Morton --- include/linux/hung_task.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 6 ++- 2 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 include/linux/hung_task.h (limited to 'include') diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h new file mode 100644 index 000000000000..1bc2b3244613 --- /dev/null +++ b/include/linux/hung_task.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Detect Hung Task: detecting tasks stuck in D state + * + * Copyright (C) 2025 Tongcheng Travel (www.ly.com) + * Author: Lance Yang + */ +#ifndef __LINUX_HUNG_TASK_H +#define __LINUX_HUNG_TASK_H + +#include +#include +#include + +/* + * @blocker: Combines lock address and blocking type. + * + * Since lock pointers are at least 4-byte aligned(32-bit) or 8-byte + * aligned(64-bit). This leaves the 2 least bits (LSBs) of the pointer + * always zero. So we can use these bits to encode the specific blocking + * type. + * + * Type encoding: + * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) + * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) + * 10 - Blocked on rt-mutex (BLOCKER_TYPE_RTMUTEX) + * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM) + */ +#define BLOCKER_TYPE_MUTEX 0x00UL +#define BLOCKER_TYPE_SEM 0x01UL +#define BLOCKER_TYPE_RTMUTEX 0x02UL +#define BLOCKER_TYPE_RWSEM 0x03UL + +#define BLOCKER_TYPE_MASK 0x03UL + +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER +static inline void hung_task_set_blocker(void *lock, unsigned long type) +{ + unsigned long lock_ptr = (unsigned long)lock; + + WARN_ON_ONCE(!lock_ptr); + WARN_ON_ONCE(READ_ONCE(current->blocker)); + + /* + * If the lock pointer matches the BLOCKER_TYPE_MASK, return + * without writing anything. + */ + if (WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK)) + return; + + WRITE_ONCE(current->blocker, lock_ptr | type); +} + +static inline void hung_task_clear_blocker(void) +{ + WARN_ON_ONCE(!READ_ONCE(current->blocker)); + + WRITE_ONCE(current->blocker, 0UL); +} + +/* + * hung_task_get_blocker_type - Extracts blocker type from encoded blocker + * address. + * + * @blocker: Blocker pointer with encoded type (via LSB bits) + * + * Returns: BLOCKER_TYPE_MUTEX, BLOCKER_TYPE_SEM, etc. + */ +static inline unsigned long hung_task_get_blocker_type(unsigned long blocker) +{ + WARN_ON_ONCE(!blocker); + + return blocker & BLOCKER_TYPE_MASK; +} + +static inline void *hung_task_blocker_to_lock(unsigned long blocker) +{ + WARN_ON_ONCE(!blocker); + + return (void *)(blocker & ~BLOCKER_TYPE_MASK); +} +#else +static inline void hung_task_set_blocker(void *lock, unsigned long type) +{ +} +static inline void hung_task_clear_blocker(void) +{ +} +static inline unsigned long hung_task_get_blocker_type(unsigned long blocker) +{ + return 0UL; +} +static inline void *hung_task_blocker_to_lock(unsigned long blocker) +{ + return NULL; +} +#endif + +#endif /* __LINUX_HUNG_TASK_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index f96ac1982893..393d81978f40 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1240,7 +1240,11 @@ struct task_struct { #endif #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER - struct mutex *blocker_mutex; + /* + * Encoded lock address causing task block (lower 2 bits = type from + * ). Accessed via hung_task_*() helpers. + */ + unsigned long blocker; #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -- cgit v1.2.3 From 194a9b9e843b4077033be70a07d191107972aa53 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Mon, 14 Apr 2025 22:59:44 +0800 Subject: hung_task: show the blocker task if the task is hung on semaphore Inspired by mutex blocker tracking[1], this patch makes a trade-off to balance the overhead and utility of the hung task detector. Unlike mutexes, semaphores lack explicit ownership tracking, making it challenging to identify the root cause of hangs. To address this, we introduce a last_holder field to the semaphore structure, which is updated when a task successfully calls down() and cleared during up(). The assumption is that if a task is blocked on a semaphore, the holders must not have released it. While this does not guarantee that the last holder is one of the current blockers, it likely provides a practical hint for diagnosing semaphore-related stalls. With this change, the hung task detector can now show blocker task's info like below: [Tue Apr 8 12:19:07 2025] INFO: task cat:945 blocked for more than 120 seconds. [Tue Apr 8 12:19:07 2025] Tainted: G E 6.14.0-rc6+ #1 [Tue Apr 8 12:19:07 2025] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [Tue Apr 8 12:19:07 2025] task:cat state:D stack:0 pid:945 tgid:945 ppid:828 task_flags:0x400000 flags:0x00000000 [Tue Apr 8 12:19:07 2025] Call Trace: [Tue Apr 8 12:19:07 2025] [Tue Apr 8 12:19:07 2025] __schedule+0x491/0xbd0 [Tue Apr 8 12:19:07 2025] schedule+0x27/0xf0 [Tue Apr 8 12:19:07 2025] schedule_timeout+0xe3/0xf0 [Tue Apr 8 12:19:07 2025] ? __folio_mod_stat+0x2a/0x80 [Tue Apr 8 12:19:07 2025] ? set_ptes.constprop.0+0x27/0x90 [Tue Apr 8 12:19:07 2025] __down_common+0x155/0x280 [Tue Apr 8 12:19:07 2025] down+0x53/0x70 [Tue Apr 8 12:19:07 2025] read_dummy_semaphore+0x23/0x60 [Tue Apr 8 12:19:07 2025] full_proxy_read+0x5f/0xa0 [Tue Apr 8 12:19:07 2025] vfs_read+0xbc/0x350 [Tue Apr 8 12:19:07 2025] ? __count_memcg_events+0xa5/0x140 [Tue Apr 8 12:19:07 2025] ? count_memcg_events.constprop.0+0x1a/0x30 [Tue Apr 8 12:19:07 2025] ? handle_mm_fault+0x180/0x260 [Tue Apr 8 12:19:07 2025] ksys_read+0x66/0xe0 [Tue Apr 8 12:19:07 2025] do_syscall_64+0x51/0x120 [Tue Apr 8 12:19:07 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Tue Apr 8 12:19:07 2025] RIP: 0033:0x7f419478f46e [Tue Apr 8 12:19:07 2025] RSP: 002b:00007fff1c4d2668 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Tue Apr 8 12:19:07 2025] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f419478f46e [Tue Apr 8 12:19:07 2025] RDX: 0000000000020000 RSI: 00007f4194683000 RDI: 0000000000000003 [Tue Apr 8 12:19:07 2025] RBP: 00007f4194683000 R08: 00007f4194682010 R09: 0000000000000000 [Tue Apr 8 12:19:07 2025] R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000 [Tue Apr 8 12:19:07 2025] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 [Tue Apr 8 12:19:07 2025] [Tue Apr 8 12:19:07 2025] INFO: task cat:945 blocked on a semaphore likely last held by task cat:938 [Tue Apr 8 12:19:07 2025] task:cat state:S stack:0 pid:938 tgid:938 ppid:584 task_flags:0x400000 flags:0x00000000 [Tue Apr 8 12:19:07 2025] Call Trace: [Tue Apr 8 12:19:07 2025] [Tue Apr 8 12:19:07 2025] __schedule+0x491/0xbd0 [Tue Apr 8 12:19:07 2025] ? _raw_spin_unlock_irqrestore+0xe/0x40 [Tue Apr 8 12:19:07 2025] schedule+0x27/0xf0 [Tue Apr 8 12:19:07 2025] schedule_timeout+0x77/0xf0 [Tue Apr 8 12:19:07 2025] ? __pfx_process_timeout+0x10/0x10 [Tue Apr 8 12:19:07 2025] msleep_interruptible+0x49/0x60 [Tue Apr 8 12:19:07 2025] read_dummy_semaphore+0x2d/0x60 [Tue Apr 8 12:19:07 2025] full_proxy_read+0x5f/0xa0 [Tue Apr 8 12:19:07 2025] vfs_read+0xbc/0x350 [Tue Apr 8 12:19:07 2025] ? __count_memcg_events+0xa5/0x140 [Tue Apr 8 12:19:07 2025] ? count_memcg_events.constprop.0+0x1a/0x30 [Tue Apr 8 12:19:07 2025] ? handle_mm_fault+0x180/0x260 [Tue Apr 8 12:19:07 2025] ksys_read+0x66/0xe0 [Tue Apr 8 12:19:07 2025] do_syscall_64+0x51/0x120 [Tue Apr 8 12:19:07 2025] entry_SYSCALL_64_after_hwframe+0x76/0x7e [Tue Apr 8 12:19:07 2025] RIP: 0033:0x7f7c584a646e [Tue Apr 8 12:19:07 2025] RSP: 002b:00007ffdba8ce158 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [Tue Apr 8 12:19:07 2025] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f7c584a646e [Tue Apr 8 12:19:07 2025] RDX: 0000000000020000 RSI: 00007f7c5839a000 RDI: 0000000000000003 [Tue Apr 8 12:19:07 2025] RBP: 00007f7c5839a000 R08: 00007f7c58399010 R09: 0000000000000000 [Tue Apr 8 12:19:07 2025] R10: fffffffffffffbc5 R11: 0000000000000246 R12: 0000000000000000 [Tue Apr 8 12:19:07 2025] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 [Tue Apr 8 12:19:07 2025] [1] https://lore.kernel.org/all/174046694331.2194069.15472952050240807469.stgit@mhiramat.tok.corp.google.com Link: https://lkml.kernel.org/r/20250414145945.84916-3-ioworker0@gmail.com Signed-off-by: Mingzhe Yang Signed-off-by: Lance Yang Suggested-by: Andrew Morton Suggested-by: Masami Hiramatsu (Google) Reviewed-by: Masami Hiramatsu (Google) Cc: Anna Schumaker Cc: Boqun Feng Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Cc: Zi Li Signed-off-by: Andrew Morton --- include/linux/semaphore.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 04655faadc2d..89706157e622 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -16,13 +16,25 @@ struct semaphore { raw_spinlock_t lock; unsigned int count; struct list_head wait_list; + +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER + unsigned long last_holder; +#endif }; +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER +#define __LAST_HOLDER_SEMAPHORE_INITIALIZER \ + , .last_holder = 0UL +#else +#define __LAST_HOLDER_SEMAPHORE_INITIALIZER +#endif + #define __SEMAPHORE_INITIALIZER(name, n) \ { \ .lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \ .count = n, \ - .wait_list = LIST_HEAD_INIT((name).wait_list), \ + .wait_list = LIST_HEAD_INIT((name).wait_list) \ + __LAST_HOLDER_SEMAPHORE_INITIALIZER \ } /* @@ -47,5 +59,6 @@ extern int __must_check down_killable(struct semaphore *sem); extern int __must_check down_trylock(struct semaphore *sem); extern int __must_check down_timeout(struct semaphore *sem, long jiffies); extern void up(struct semaphore *sem); +extern unsigned long sem_last_holder(struct semaphore *sem); #endif /* __LINUX_SEMAPHORE_H */ -- cgit v1.2.3 From 8d1d4b538bb12a858fa95a073c4aff31e79088ee Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Wed, 16 Apr 2025 10:06:13 -0600 Subject: scatterlist: inline sg_next() sg_next() is a short function called frequently in I/O paths. Define it in the header file so it can be inlined into its callers. Link: https://lkml.kernel.org/r/20250416160615.3571958-1-csander@purestorage.com Signed-off-by: Caleb Sander Mateos Cc: Eric Biggers Signed-off-by: Andrew Morton --- include/linux/scatterlist.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 138e2f1bd08f..0cdbfc42f153 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -94,6 +94,28 @@ static inline bool sg_is_last(struct scatterlist *sg) return __sg_flags(sg) & SG_END; } +/** + * sg_next - return the next scatterlist entry in a list + * @sg: The current sg entry + * + * Description: + * Usually the next entry will be @sg@ + 1, but if this sg element is part + * of a chained scatterlist, it could jump to the start of a new + * scatterlist array. + * + **/ +static inline struct scatterlist *sg_next(struct scatterlist *sg) +{ + if (sg_is_last(sg)) + return NULL; + + sg++; + if (unlikely(sg_is_chain(sg))) + sg = sg_chain_ptr(sg); + + return sg; +} + /** * sg_assign_page - Assign a given page to an SG entry * @sg: SG entry @@ -418,7 +440,6 @@ static inline void sg_init_marker(struct scatterlist *sgl, int sg_nents(struct scatterlist *sg); int sg_nents_for_len(struct scatterlist *sg, u64 len); -struct scatterlist *sg_next(struct scatterlist *); struct scatterlist *sg_last(struct scatterlist *s, unsigned int); void sg_init_table(struct scatterlist *, unsigned int); void sg_init_one(struct scatterlist *, const void *, unsigned int); -- cgit v1.2.3 From ba8182d44b4e557e2dc34e51a88105ce5b083372 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 19 Apr 2025 21:30:12 +0100 Subject: rapidio: remove unused functions rio_request_dma() and rio_dma_prep_slave_sg() were added in 2012 by commit e42d98ebe7d7 ("rapidio: add DMA engine support for RIO data transfers") but never used. rio_find_mport() last use was removed in 2013 by commit 9edbc30b434f ("rapidio: update enumerator registration mechanism") rio_unregister_scan() was added in 2013 by commit a11650e11093 ("rapidio: make enumeration/discovery configurable") but never used. Remove them. Link: https://lkml.kernel.org/r/20250419203012.429787-3-linux@treblig.org Signed-off-by: Dr. David Alan Gilbert Cc: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton --- include/linux/rio_drv.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index e49c32b0f394..dd8afe511242 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -391,13 +391,8 @@ struct rio_dev *rio_dev_get(struct rio_dev *); void rio_dev_put(struct rio_dev *); #ifdef CONFIG_RAPIDIO_DMA_ENGINE -extern struct dma_chan *rio_request_dma(struct rio_dev *rdev); extern struct dma_chan *rio_request_mport_dma(struct rio_mport *mport); extern void rio_release_dma(struct dma_chan *dchan); -extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg( - struct rio_dev *rdev, struct dma_chan *dchan, - struct rio_dma_data *data, - enum dma_transfer_direction direction, unsigned long flags); extern struct dma_async_tx_descriptor *rio_dma_prep_xfer( struct dma_chan *dchan, u16 destid, struct rio_dma_data *data, -- cgit v1.2.3 From 2a1c6158131f05c228001e1f73304535bc205444 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 19 Apr 2025 00:49:32 +0100 Subject: relay: remove unused relay_late_setup_files The last use of relay_late_setup_files() was removed in 2018 by commit 2b47733045aa ("drm/i915/guc: Merge log relay file and channel creation") Remove it and the helper it used. relay_late_setup_files() was used for eventually registering 'buffer only' channels. With it gone, delete the docs that explain how to do that. Which suggests it should be possible to lose the 'has_base_filename' flags. (Are there any other uses??) Link: https://lkml.kernel.org/r/20250418234932.490863-1-linux@treblig.org Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jens Axboe Cc: Al Viro Cc: Andriy Shevchenko Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/relay.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/relay.h b/include/linux/relay.h index 72b876dd5cb8..b3224111d074 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -159,9 +159,6 @@ struct rchan *relay_open(const char *base_filename, size_t n_subbufs, const struct rchan_callbacks *cb, void *private_data); -extern int relay_late_setup_files(struct rchan *chan, - const char *base_filename, - struct dentry *parent); extern void relay_close(struct rchan *chan); extern void relay_flush(struct rchan *chan); extern void relay_subbufs_consumed(struct rchan *chan, -- cgit v1.2.3 From 2e27fa943b74c9fc5fdf93090508c3c722531c66 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Mon, 21 Apr 2025 15:49:10 +0800 Subject: treewide: fix typo "previlege" There are some spelling mistakes of 'previlege' in comments which should be 'privilege'. Fix them and add it to scripts/spelling.txt. The typo in arch/loongarch/kvm/main.c was corrected by a different patch [1] and is therefore not included in this submission. [1]. https://lore.kernel.org/all/20250420142208.2252280-1-wheatfox17@icloud.com/ Link: https://lkml.kernel.org/r/46AD404E411A4BAC+20250421074910.66988-1-wangyuli@uniontech.com Signed-off-by: WangYuli Signed-off-by: Andrew Morton --- include/linux/habanalabs/hl_boot_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h index d2a9fc96424b..af5fb4ad77eb 100644 --- a/include/linux/habanalabs/hl_boot_if.h +++ b/include/linux/habanalabs/hl_boot_if.h @@ -295,7 +295,7 @@ enum cpu_boot_dev_sts { * Initialized in: linux * * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from - * previleged entity. FW sets this status + * privileged entity. FW sets this status * bit for host. If this bit is set then * GIC can not be accessed from host. * Initialized in: linux -- cgit v1.2.3 From 9c7b53b21fb1df20f85e3822160b4687f98ff7b3 Mon Sep 17 00:00:00 2001 From: Marc Herbert Date: Thu, 24 Apr 2025 19:40:35 +0000 Subject: compiler_types.h: fix "unused variable" in __compiletime_assert() This refines commit c03567a8e8d5 ("include/linux/compiler.h: don't perform compiletime_assert with -O0") and restores #ifdef __OPTIMIZE__ symmetry by evaluating the 'condition' variable in both compile-time variants of __compiletimeassert(). As __OPTIMIZE__ is always true by default, this commit does not change anything by default. But it fixes warnings with _non-default_ CFLAGS like for instance this: make CFLAGS_tcp.o='-Og -U__OPTIMIZE__' from net/ipv4/tcp.c:273: include/net/sch_generic.h: In function `qdisc_cb_private_validate': include/net/sch_generic.h:511:30: error: unused variable `qcb' [-Werror=unused-variable] { struct qdisc_skb_cb *qcb; BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb)); ... } [akpm@linux-foundation.org: regularize comment layout, reflow comment] Link: https://lkml.kernel.org/r/20250424194048.652571-1-marc.herbert@linux.intel.com Signed-off-by: Marc Herbert Cc: Alexander Potapenko Cc: Changbin Du Cc: Jan Hendrik Farr Cc: Macro Elver Cc: Miguel Ojeda Cc: Tony Ambardar Cc: Uros Bizjak Cc: Kees Cook Signed-off-by: Andrew Morton --- include/linux/compiler_types.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 501cffddc2f4..1f2eee8331d3 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -525,6 +525,12 @@ struct ftrace_likely_data { sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) #ifdef __OPTIMIZE__ +/* + * #ifdef __OPTIMIZE__ is only a good approximation; for instance "make + * CFLAGS_foo.o=-Og" defines __OPTIMIZE__, does not elide the conditional code + * and can break compilation with wrong error message(s). Combine with + * -U__OPTIMIZE__ when needed. + */ # define __compiletime_assert(condition, msg, prefix, suffix) \ do { \ /* \ @@ -538,7 +544,7 @@ struct ftrace_likely_data { prefix ## suffix(); \ } while (0) #else -# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0) +# define __compiletime_assert(condition, msg, prefix, suffix) ((void)(condition)) #endif #define _compiletime_assert(condition, msg, prefix, suffix) \ -- cgit v1.2.3 From f3def8270c67217efb0e23dcd54714f74de3b6b2 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Sun, 27 Apr 2025 23:14:49 +0300 Subject: sort.h: hoist cmp_int() into generic header file Deduplicate the same functionality implemented in several places by moving the cmp_int() helper macro into linux/sort.h. The macro performs a three-way comparison of the arguments mostly useful in different sorting strategies and algorithms. Link: https://lkml.kernel.org/r/20250427201451.900730-1-pchelkin@ispras.ru Signed-off-by: Fedor Pchelkin Suggested-by: Darrick J. Wong Acked-by: Kent Overstreet Acked-by: Coly Li Cc: Al Viro Cc: Carlos Maiolino Cc: Christian Brauner Cc: Coly Li Cc: Jan Kara Signed-off-by: Andrew Morton --- include/linux/sort.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/sort.h b/include/linux/sort.h index 8e5603b10941..c01ef804a0eb 100644 --- a/include/linux/sort.h +++ b/include/linux/sort.h @@ -4,6 +4,16 @@ #include +/** + * cmp_int - perform a three-way comparison of the arguments + * @l: the left argument + * @r: the right argument + * + * Return: 1 if the left argument is greater than the right one; 0 if the + * arguments are equal; -1 if the left argument is less than the right one. + */ +#define cmp_int(l, r) (((l) > (r)) - ((l) < (r))) + void sort_r(void *base, size_t num, size_t size, cmp_r_func_t cmp_func, swap_r_func_t swap_func, -- cgit v1.2.3 From c91d78622e16f628176d7248044106b03818af6d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 28 Apr 2025 10:27:37 +0300 Subject: util_macros.h: fix the reference in kernel-doc In PTR_IF() description the text refers to the parameter as (ptr) while the kernel-doc format asks for @ptr. Fix this accordingly. Link: https://lkml.kernel.org/r/20250428072737.3265239-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Alexandru Ardelean Signed-off-by: Andrew Morton --- include/linux/util_macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 7b64fb597f85..8183ac610cc5 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -85,7 +85,7 @@ * @ptr: A pointer to assign if @cond is true. * * PTR_IF(IS_ENABLED(CONFIG_FOO), ptr) evaluates to @ptr if CONFIG_FOO is set - * to 'y' or 'm', or to NULL otherwise. The (ptr) argument must be a pointer. + * to 'y' or 'm', or to NULL otherwise. The @ptr argument must be a pointer. * * The macro can be very useful to help compiler dropping dead code. * -- cgit v1.2.3 From 479d26ee013c5388ad6855e512ab20d81e43750d Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 1 May 2025 02:05:02 +0100 Subject: lib/oid_registry.c: remove unused sprint_OID sprint_OID() was added as part of 2012's commit 4f73175d0375 ("X.509: Add utility functions to render OIDs as strings") but it hasn't been used. Remove it. Note that there's also 'sprint_oid' (lower case) which is used in a lot of places; that's left as is except for fixing its case in a comment. Link: https://lkml.kernel.org/r/20250501010502.326472-1-linux@treblig.org Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Andrew Morton --- include/linux/oid_registry.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 6f9242259edc..6de479ebbe5d 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -151,6 +151,5 @@ enum OID { extern enum OID look_up_OID(const void *data, size_t datasize); extern int parse_OID(const void *data, size_t datasize, enum OID *oid); extern int sprint_oid(const void *, size_t, char *, size_t); -extern int sprint_OID(enum OID, char *, size_t); #endif /* _LINUX_OID_REGISTRY_H */ -- cgit v1.2.3 From cf80fdbc0a5512feabbc56280b2abbb51d4e5b4e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 2 May 2025 15:17:42 +0300 Subject: list: remove redundant 'extern' for function prototypes The 'extern' keyword is redundant for function prototypes. list.h never used them and new code in general is better without them. Link: https://lkml.kernel.org/r/20250502121742.3997529-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton --- include/linux/list.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 29a375889fb8..e7e28afd28f8 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -50,9 +50,9 @@ static inline void INIT_LIST_HEAD(struct list_head *list) * Performs the full set of list corruption checks before __list_add(). * On list corruption reports a warning, and returns false. */ -extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, - struct list_head *prev, - struct list_head *next); +bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, + struct list_head *prev, + struct list_head *next); /* * Performs list corruption checks before __list_add(). Returns false if a @@ -93,7 +93,7 @@ static __always_inline bool __list_add_valid(struct list_head *new, * Performs the full set of list corruption checks before __list_del_entry(). * On list corruption reports a warning, and returns false. */ -extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); +bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); /* * Performs list corruption checks before __list_del_entry(). Returns false if a -- cgit v1.2.3 From 98066f2f8901ccf72f3c5d6c391c8fff1cabd49d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 5 May 2025 11:18:21 -0700 Subject: crypto: lib/chacha - strongly type the ChaCha state The ChaCha state matrix is 16 32-bit words. Currently it is represented in the code as a raw u32 array, or even just a pointer to u32. This weak typing is error-prone. Instead, introduce struct chacha_state: struct chacha_state { u32 x[16]; }; Convert all ChaCha and HChaCha functions to use struct chacha_state. No functional changes. Signed-off-by: Eric Biggers Acked-by: Kent Overstreet Signed-off-by: Herbert Xu --- include/crypto/chacha.h | 65 ++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index 58129e18cc31..64fb270f2bfc 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -30,16 +30,23 @@ /* 192-bit nonce, then 64-bit stream position */ #define XCHACHA_IV_SIZE 32 -void chacha_block_generic(u32 *state, u8 *stream, int nrounds); -static inline void chacha20_block(u32 *state, u8 *stream) +struct chacha_state { + u32 x[CHACHA_STATE_WORDS]; +}; + +void chacha_block_generic(struct chacha_state *state, u8 *stream, int nrounds); +static inline void chacha20_block(struct chacha_state *state, u8 *stream) { chacha_block_generic(state, stream, 20); } -void hchacha_block_arch(const u32 *state, u32 *out, int nrounds); -void hchacha_block_generic(const u32 *state, u32 *out, int nrounds); +void hchacha_block_arch(const struct chacha_state *state, u32 *out, + int nrounds); +void hchacha_block_generic(const struct chacha_state *state, u32 *out, + int nrounds); -static inline void hchacha_block(const u32 *state, u32 *out, int nrounds) +static inline void hchacha_block(const struct chacha_state *state, u32 *out, + int nrounds) { if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) hchacha_block_arch(state, out, nrounds); @@ -54,37 +61,39 @@ enum chacha_constants { /* expand 32-byte k */ CHACHA_CONSTANT_TE_K = 0x6b206574U }; -static inline void chacha_init_consts(u32 *state) +static inline void chacha_init_consts(struct chacha_state *state) { - state[0] = CHACHA_CONSTANT_EXPA; - state[1] = CHACHA_CONSTANT_ND_3; - state[2] = CHACHA_CONSTANT_2_BY; - state[3] = CHACHA_CONSTANT_TE_K; + state->x[0] = CHACHA_CONSTANT_EXPA; + state->x[1] = CHACHA_CONSTANT_ND_3; + state->x[2] = CHACHA_CONSTANT_2_BY; + state->x[3] = CHACHA_CONSTANT_TE_K; } -static inline void chacha_init(u32 *state, const u32 *key, const u8 *iv) +static inline void chacha_init(struct chacha_state *state, + const u32 *key, const u8 *iv) { chacha_init_consts(state); - state[4] = key[0]; - state[5] = key[1]; - state[6] = key[2]; - state[7] = key[3]; - state[8] = key[4]; - state[9] = key[5]; - state[10] = key[6]; - state[11] = key[7]; - state[12] = get_unaligned_le32(iv + 0); - state[13] = get_unaligned_le32(iv + 4); - state[14] = get_unaligned_le32(iv + 8); - state[15] = get_unaligned_le32(iv + 12); + state->x[4] = key[0]; + state->x[5] = key[1]; + state->x[6] = key[2]; + state->x[7] = key[3]; + state->x[8] = key[4]; + state->x[9] = key[5]; + state->x[10] = key[6]; + state->x[11] = key[7]; + state->x[12] = get_unaligned_le32(iv + 0); + state->x[13] = get_unaligned_le32(iv + 4); + state->x[14] = get_unaligned_le32(iv + 8); + state->x[15] = get_unaligned_le32(iv + 12); } -void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, +void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds); -void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src, +void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds); -static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src, +static inline void chacha_crypt(struct chacha_state *state, + u8 *dst, const u8 *src, unsigned int bytes, int nrounds) { if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) @@ -93,8 +102,8 @@ static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src, chacha_crypt_generic(state, dst, src, bytes, nrounds); } -static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes) +static inline void chacha20_crypt(struct chacha_state *state, + u8 *dst, const u8 *src, unsigned int bytes) { chacha_crypt(state, dst, src, bytes, 20); } -- cgit v1.2.3 From 607c92141cdec6e472d80de813f5251685b9ddc1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 5 May 2025 11:18:23 -0700 Subject: crypto: lib/chacha - add strongly-typed state zeroization Now that the ChaCha state matrix is strongly-typed, add a helper function chacha_zeroize_state() which zeroizes it. Then convert all applicable callers to use it instead of direct memzero_explicit. No functional changes. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/chacha.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index 64fb270f2bfc..7c2e6c68919b 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -16,6 +16,7 @@ #define _CRYPTO_CHACHA_H #include +#include #include /* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */ @@ -108,6 +109,11 @@ static inline void chacha20_crypt(struct chacha_state *state, chacha_crypt(state, dst, src, bytes, 20); } +static inline void chacha_zeroize_state(struct chacha_state *state) +{ + memzero_explicit(state, sizeof(*state)); +} + #if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA) bool chacha_is_arch_optimized(void); #else -- cgit v1.2.3 From bdc2a55687f123bd32aaefb81e11c7450a431eaf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 5 May 2025 11:18:24 -0700 Subject: crypto: lib/chacha - add array bounds to function prototypes Add explicit array bounds to the function prototypes for the parameters that didn't already get handled by the conversion to use chacha_state: - chacha_block_*(): Change 'u8 *out' or 'u8 *stream' to u8 out[CHACHA_BLOCK_SIZE]. - hchacha_block_*(): Change 'u32 *out' or 'u32 *stream' to u32 out[HCHACHA_OUT_WORDS]. - chacha_init(): Change 'const u32 *key' to 'const u32 key[CHACHA_KEY_WORDS]'. Change 'const u8 *iv' to 'const u8 iv[CHACHA_IV_SIZE]'. No functional changes. This just makes it clear when fixed-size arrays are expected. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/chacha.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index 7c2e6c68919b..91f6b4cf561c 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -26,7 +26,9 @@ #define CHACHA_BLOCK_SIZE 64 #define CHACHAPOLY_IV_SIZE 12 -#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32)) +#define CHACHA_KEY_WORDS 8 +#define CHACHA_STATE_WORDS 16 +#define HCHACHA_OUT_WORDS 8 /* 192-bit nonce, then 64-bit stream position */ #define XCHACHA_IV_SIZE 32 @@ -35,19 +37,21 @@ struct chacha_state { u32 x[CHACHA_STATE_WORDS]; }; -void chacha_block_generic(struct chacha_state *state, u8 *stream, int nrounds); -static inline void chacha20_block(struct chacha_state *state, u8 *stream) +void chacha_block_generic(struct chacha_state *state, + u8 out[CHACHA_BLOCK_SIZE], int nrounds); +static inline void chacha20_block(struct chacha_state *state, + u8 out[CHACHA_BLOCK_SIZE]) { - chacha_block_generic(state, stream, 20); + chacha_block_generic(state, out, 20); } -void hchacha_block_arch(const struct chacha_state *state, u32 *out, - int nrounds); -void hchacha_block_generic(const struct chacha_state *state, u32 *out, - int nrounds); +void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); +void hchacha_block_generic(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); -static inline void hchacha_block(const struct chacha_state *state, u32 *out, - int nrounds) +static inline void hchacha_block(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) { if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) hchacha_block_arch(state, out, nrounds); @@ -71,7 +75,8 @@ static inline void chacha_init_consts(struct chacha_state *state) } static inline void chacha_init(struct chacha_state *state, - const u32 *key, const u8 *iv) + const u32 key[CHACHA_KEY_WORDS], + const u8 iv[CHACHA_IV_SIZE]) { chacha_init_consts(state); state->x[4] = key[0]; -- cgit v1.2.3 From d8aeec147affcb6cb65060ed5fe095a2d0fe8a20 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 5 May 2025 12:10:41 -0700 Subject: crypto: geniv - use memcpy_sglist() instead of null skcipher For copying data between two scatterlists, just use memcpy_sglist() instead of the so-called "null skcipher". This is much simpler. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/internal/geniv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/crypto/internal/geniv.h b/include/crypto/internal/geniv.h index 7fd7126f593a..012f5fb22d43 100644 --- a/include/crypto/internal/geniv.h +++ b/include/crypto/internal/geniv.h @@ -15,7 +15,6 @@ struct aead_geniv_ctx { spinlock_t lock; struct crypto_aead *child; - struct crypto_sync_skcipher *sknull; u8 salt[] __attribute__ ((aligned(__alignof__(u32)))); }; -- cgit v1.2.3 From c10f66b0acc4ec9db4d443779871132108e57f10 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 5 May 2025 12:10:43 -0700 Subject: crypto: null - remove the default null skcipher crypto_get_default_null_skcipher() and crypto_put_default_null_skcipher() are no longer used, so remove them. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/null.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/crypto/null.h b/include/crypto/null.h index 0ef577cc00e3..1c66abf9de3b 100644 --- a/include/crypto/null.h +++ b/include/crypto/null.h @@ -9,7 +9,4 @@ #define NULL_DIGEST_SIZE 0 #define NULL_IV_SIZE 0 -struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void); -void crypto_put_default_null_skcipher(void); - #endif -- cgit v1.2.3 From 698de822780fbae79b11e5d749863c1aa64a0a55 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 5 May 2025 13:33:42 -0700 Subject: crypto: testmgr - make it easier to enable the full set of tests Currently the full set of crypto self-tests requires CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y. This is problematic in two ways. First, developers regularly overlook this option. Second, the description of the tests as "extra" sometimes gives the impression that it is not required that all algorithms pass these tests. Given that the main use case for the crypto self-tests is for developers, make enabling CONFIG_CRYPTO_SELFTESTS=y just enable the full set of crypto self-tests by default. The slow tests can still be disabled by adding the command-line parameter cryptomgr.noextratests=1, soon to be renamed to cryptomgr.noslowtests=1. The only known use case for doing this is for people trying to use the crypto self-tests to satisfy the FIPS 140-3 pre-operational self-testing requirements when the kernel is being validated as a FIPS 140-3 cryptographic module. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/internal/simd.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h index f56049bd1660..7e7f1ac3b7fd 100644 --- a/include/crypto/internal/simd.h +++ b/include/crypto/internal/simd.h @@ -44,11 +44,9 @@ void simd_unregister_aeads(struct aead_alg *algs, int count, * * This delegates to may_use_simd(), except that this also returns false if SIMD * in crypto code has been temporarily disabled on this CPU by the crypto - * self-tests, in order to test the no-SIMD fallback code. This override is - * currently limited to configurations where the extra self-tests are enabled, - * because it might be a bit too invasive to be part of the regular self-tests. + * self-tests, in order to test the no-SIMD fallback code. */ -#ifdef CONFIG_CRYPTO_MANAGER_EXTRA_TESTS +#ifdef CONFIG_CRYPTO_SELFTESTS DECLARE_PER_CPU(bool, crypto_simd_disabled_for_test); #define crypto_simd_usable() \ (may_use_simd() && !this_cpu_read(crypto_simd_disabled_for_test)) -- cgit v1.2.3 From 285e871884ff3dc31c0c2c1a87f0018481bc8471 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 28 Apr 2025 12:22:16 +0300 Subject: mm/hmm: let users to tag specific PFN with DMA mapped bit Introduce new sticky flag (HMM_PFN_DMA_MAPPED), which isn't overwritten by HMM range fault. Such flag allows users to tag specific PFNs with information if this specific PFN was already DMA mapped. Tested-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/linux/hmm.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 126a36571667..a43e56f273a1 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -23,6 +23,8 @@ struct mmu_interval_notifier; * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID) * HMM_PFN_ERROR - accessing the pfn is impossible and the device should * fail. ie poisoned memory, special pages, no vma, etc + * HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation + * to mark that page is already DMA mapped * * On input: * 0 - Return the current state of the page, do not fault it. @@ -36,13 +38,19 @@ enum hmm_pfn_flags { HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1), HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2), HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3), - HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 8), + /* + * Sticky flags, carried from input to output, + * don't forget to update HMM_PFN_INOUT_FLAGS + */ + HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 4), + + HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 9), /* Input flags */ HMM_PFN_REQ_FAULT = HMM_PFN_VALID, HMM_PFN_REQ_WRITE = HMM_PFN_WRITE, - HMM_PFN_FLAGS = 0xFFUL << HMM_PFN_ORDER_SHIFT, + HMM_PFN_FLAGS = ~((1UL << HMM_PFN_ORDER_SHIFT) - 1), }; /* @@ -57,6 +65,14 @@ static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn) return pfn_to_page(hmm_pfn & ~HMM_PFN_FLAGS); } +/* + * hmm_pfn_to_phys() - return physical address pointed to by a device entry + */ +static inline phys_addr_t hmm_pfn_to_phys(unsigned long hmm_pfn) +{ + return __pfn_to_phys(hmm_pfn & ~HMM_PFN_FLAGS); +} + /* * hmm_pfn_to_map_order() - return the CPU mapping size order * -- cgit v1.2.3 From 8cad47130566123b2c70ef2aa53be02ef1aee5e5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 28 Apr 2025 12:22:17 +0300 Subject: mm/hmm: provide generic DMA managing logic HMM callers use PFN list to populate range while calling to hmm_range_fault(), the conversion from PFN to DMA address is done by the callers with help of another DMA list. However, it is wasteful on any modern platform and by doing the right logic, that DMA list can be avoided. Provide generic logic to manage these lists and gave an interface to map/unmap PFNs to DMA addresses, without requiring from the callers to be an experts in DMA core API. Tested-by: Jens Axboe Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/linux/hmm-dma.h | 33 +++++++++++++++++++++++++++++++++ include/linux/hmm.h | 6 +++++- 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 include/linux/hmm-dma.h (limited to 'include') diff --git a/include/linux/hmm-dma.h b/include/linux/hmm-dma.h new file mode 100644 index 000000000000..f58b9fc71999 --- /dev/null +++ b/include/linux/hmm-dma.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */ +#ifndef LINUX_HMM_DMA_H +#define LINUX_HMM_DMA_H + +#include + +struct dma_iova_state; +struct pci_p2pdma_map_state; + +/* + * struct hmm_dma_map - array of PFNs and DMA addresses + * + * @state: DMA IOVA state + * @pfns: array of PFNs + * @dma_list: array of DMA addresses + * @dma_entry_size: size of each DMA entry in the array + */ +struct hmm_dma_map { + struct dma_iova_state state; + unsigned long *pfn_list; + dma_addr_t *dma_list; + size_t dma_entry_size; +}; + +int hmm_dma_map_alloc(struct device *dev, struct hmm_dma_map *map, + size_t nr_entries, size_t dma_entry_size); +void hmm_dma_map_free(struct device *dev, struct hmm_dma_map *map); +dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map, + size_t idx, + struct pci_p2pdma_map_state *p2pdma_state); +bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx); +#endif /* LINUX_HMM_DMA_H */ diff --git a/include/linux/hmm.h b/include/linux/hmm.h index a43e56f273a1..db75ffc949a7 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -23,6 +23,8 @@ struct mmu_interval_notifier; * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID) * HMM_PFN_ERROR - accessing the pfn is impossible and the device should * fail. ie poisoned memory, special pages, no vma, etc + * HMM_PFN_P2PDMA - P2P page + * HMM_PFN_P2PDMA_BUS - Bus mapped P2P transfer * HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation * to mark that page is already DMA mapped * @@ -43,8 +45,10 @@ enum hmm_pfn_flags { * don't forget to update HMM_PFN_INOUT_FLAGS */ HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 4), + HMM_PFN_P2PDMA = 1UL << (BITS_PER_LONG - 5), + HMM_PFN_P2PDMA_BUS = 1UL << (BITS_PER_LONG - 6), - HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 9), + HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 11), /* Input flags */ HMM_PFN_REQ_FAULT = HMM_PFN_VALID, -- cgit v1.2.3 From eedd5b1276e76d6b260a7a77a149ef5155aa76f0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 28 Apr 2025 12:22:18 +0300 Subject: RDMA/umem: Store ODP access mask information in PFN As a preparation to remove dma_list, store access mask in PFN pointer and not in dma_addr_t. Tested-by: Jens Axboe Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/rdma/ib_umem_odp.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 0844c1d05ac6..a345c26a745d 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -8,6 +8,7 @@ #include #include +#include struct ib_umem_odp { struct ib_umem umem; @@ -67,19 +68,6 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) umem_odp->page_shift; } -/* - * The lower 2 bits of the DMA address signal the R/W permissions for - * the entry. To upgrade the permissions, provide the appropriate - * bitmask to the map_dma_pages function. - * - * Be aware that upgrading a mapped address might result in change of - * the DMA address for the page. - */ -#define ODP_READ_ALLOWED_BIT (1<<0ULL) -#define ODP_WRITE_ALLOWED_BIT (1<<1ULL) - -#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) - #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_umem_odp * -- cgit v1.2.3 From 1efe8c0670d6a6883faa09c9abc746c741f5664a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 28 Apr 2025 12:22:19 +0300 Subject: RDMA/core: Convert UMEM ODP DMA mapping to caching IOVA and page linkage Reuse newly added DMA API to cache IOVA and only link/unlink pages in fast path for UMEM ODP flow. Tested-by: Jens Axboe Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/rdma/ib_umem_odp.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index a345c26a745d..2a24bf791c10 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -8,24 +8,17 @@ #include #include -#include +#include struct ib_umem_odp { struct ib_umem umem; struct mmu_interval_notifier notifier; struct pid *tgid; - /* An array of the pfns included in the on-demand paging umem. */ - unsigned long *pfn_list; + struct hmm_dma_map map; /* - * An array with DMA addresses mapped for pfns in pfn_list. - * The lower two bits designate access permissions. - * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT. - */ - dma_addr_t *dma_list; - /* - * The umem_mutex protects the page_list and dma_list fields of an ODP + * The umem_mutex protects the page_list field of an ODP * umem, allowing only a single thread to map/unmap pages. The mutex * also protects access to the mmu notifier counters. */ -- cgit v1.2.3 From f4856c20c137a73d73e448caa3964098024248bf Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 29 Apr 2025 02:36:03 +0200 Subject: power: supply: core: Add additional health status values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some batteries can signal when an internal fuse was blown. In such a case POWER_SUPPLY_HEALTH_DEAD is too vague for userspace applications to perform meaningful diagnostics. Additionally some batteries can also signal when some of their internal cells are imbalanced. In such a case returning POWER_SUPPLY_HEALTH_UNSPEC_FAILURE is again too vague for userspace applications to perform meaningful diagnostics. Add new health status values for both cases. Signed-off-by: Armin Wolf Reviewed-by: Sebastian Reichel Link: https://lore.kernel.org/r/20250429003606.303870-1-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/power_supply.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index cbec930430a7..03786c8c2efe 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -71,6 +71,8 @@ enum { POWER_SUPPLY_HEALTH_COOL, POWER_SUPPLY_HEALTH_HOT, POWER_SUPPLY_HEALTH_NO_BATTERY, + POWER_SUPPLY_HEALTH_BLOWN_FUSE, + POWER_SUPPLY_HEALTH_CELL_IMBALANCE, }; enum { -- cgit v1.2.3 From ced82fce77e93315239f54caebbc88e263078e31 Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Wed, 7 May 2025 08:59:02 -0700 Subject: net: mana: Probe rdma device in mana driver Initialize gdma device for rdma inside mana module. For each gdma device, initialize an auxiliary ib device. Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/1746633545-17653-2-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Long Li Signed-off-by: Leon Romanovsky --- include/net/mana/mana.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 0f78065de8fe..5857efc885a6 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -488,6 +488,9 @@ int mana_detach(struct net_device *ndev, bool from_close); int mana_probe(struct gdma_dev *gd, bool resuming); void mana_remove(struct gdma_dev *gd, bool suspending); +int mana_rdma_probe(struct gdma_dev *gd); +void mana_rdma_remove(struct gdma_dev *gd); + void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev); int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames, u32 flags); -- cgit v1.2.3 From 505cc26bcae00699bacaee66cd50ede7a9cc89cb Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 7 May 2025 08:59:05 -0700 Subject: net: mana: Add support for auxiliary device servicing events Handle soc servicing events which require the rdma auxiliary device resources to be cleaned up during a suspend, and re-initialized during a resume. Signed-off-by: Shiraz Saleem Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/1746633545-17653-5-git-send-email-kotaranov@linux.microsoft.com Signed-off-by: Leon Romanovsky --- include/net/mana/gdma.h | 19 +++++++++++++++++++ include/net/mana/hw_channel.h | 9 +++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index ffa9820f14ba..3ce56a816425 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -60,6 +60,7 @@ enum gdma_eqe_type { GDMA_EQE_HWC_INIT_DONE = 131, GDMA_EQE_HWC_SOC_RECONFIG = 132, GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, + GDMA_EQE_HWC_SOC_SERVICE = 134, GDMA_EQE_RNIC_QP_FATAL = 176, }; @@ -70,6 +71,18 @@ enum { GDMA_DEVICE_MANA_IB = 3, }; +enum gdma_service_type { + GDMA_SERVICE_TYPE_NONE = 0, + GDMA_SERVICE_TYPE_RDMA_SUSPEND = 1, + GDMA_SERVICE_TYPE_RDMA_RESUME = 2, +}; + +struct mana_service_work { + struct work_struct work; + struct gdma_dev *gdma_dev; + enum gdma_service_type event; +}; + struct gdma_resource { /* Protect the bitmap */ spinlock_t lock; @@ -224,6 +237,8 @@ struct gdma_dev { void *driver_data; struct auxiliary_device *adev; + bool is_suspended; + bool rdma_teardown; }; /* MANA_PAGE_SIZE is the DMA unit */ @@ -409,6 +424,8 @@ struct gdma_context { struct gdma_dev mana_ib; u64 pf_cap_flags1; + + struct workqueue_struct *service_wq; }; static inline bool mana_gd_is_mana(struct gdma_dev *gd) @@ -891,4 +908,6 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle); void mana_register_debugfs(void); void mana_unregister_debugfs(void); +int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event); + #endif /* _GDMA_H */ diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h index 158b125692c2..83cf93338eb3 100644 --- a/include/net/mana/hw_channel.h +++ b/include/net/mana/hw_channel.h @@ -49,6 +49,15 @@ union hwc_init_type_data { }; }; /* HW DATA */ +union hwc_init_soc_service_type { + u32 as_uint32; + + struct { + u32 value : 28; + u32 type : 4; + }; +}; /* HW DATA */ + struct hwc_rx_oob { u32 type : 6; u32 eom : 1; -- cgit v1.2.3 From 144d6dfc7482455eabf8e8caa974a6e8d9572705 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 12 May 2025 02:33:16 +0000 Subject: ASoC: soc-core: merge snd_soc_unregister_component() and snd_soc_unregister_component_by_driver() We have below 2 functions, but these are very similar (A) snd_soc_unregister_component_by_driver() (B) snd_soc_unregister_component() (A) void snd_soc_unregister_component_by_driver(...) { ... (a) mutex_lock(&client_mutex); ^ (X) component = snd_soc_lookup_component_nolocked(dev, component_driver->name); | if (!component) ^^^^^^^^^^^^^^^^^^^^^^ | goto out; (b) | snd_soc_del_component_unlocked(component); v out: (c) mutex_unlock(&client_mutex); } (B) void snd_soc_unregister_component_by_driver(...) { (a) mutex_lock(&client_mutex); ^ while (1) { | (X) struct snd_soc_component *component = snd_soc_lookup_component_nolocked(dev, NULL); | ^^^^ (b) if (!component) | break; | | snd_soc_del_component_unlocked(component); v } (c) mutex_unlock(&client_mutex); } Both are calling lock (a), find component and remove it (b), and unlock (c). The big diff is whether use driver name for lookup() or not (X). Merge these into snd_soc_unregister_component_by_driver() (B), and snd_soc_unregister_component_by_driver() (A) can be macro. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87h61qy2vn.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 8d113ee8c2bc..1fffef311c41 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -444,7 +444,7 @@ int snd_soc_register_component(struct device *dev, int devm_snd_soc_register_component(struct device *dev, const struct snd_soc_component_driver *component_driver, struct snd_soc_dai_driver *dai_drv, int num_dai); -void snd_soc_unregister_component(struct device *dev); +#define snd_soc_unregister_component(dev) snd_soc_unregister_component_by_driver(dev, NULL) void snd_soc_unregister_component_by_driver(struct device *dev, const struct snd_soc_component_driver *component_driver); struct snd_soc_component *snd_soc_lookup_component_nolocked(struct device *dev, -- cgit v1.2.3 From 6157e62b07d9331cc1d4d9d525dab33d45b0e83c Mon Sep 17 00:00:00 2001 From: Paul Geurts Date: Mon, 5 May 2025 13:59:36 +0200 Subject: regulator: pca9450: Add restart handler When restarting a CPU powered by the PCA9450 power management IC, it is beneficial to use the PCA9450 to power cycle the CPU and all its connected peripherals to start up in a known state. The PCA9450 features a cold start procedure initiated by an I2C command. Add a restart handler so that the PCA9450 is used to restart the CPU. The restart handler sends command 0x14 to the SW_RST register, initiating a cold reset (Power recycle all regulators except LDO1, LDO2 and CLK_32K_OUT) As the PCA9450 is a PMIC specific for the i.MX8M family CPU, the restart handler priority is set just slightly higher than imx2_wdt and the PSCI restart handler. This makes sure this restart handler takes precedence. Signed-off-by: Paul Geurts Link: https://patch.msgid.link/20250505115936.1946891-1-paul.geurts@prodrive-technologies.com Signed-off-by: Mark Brown --- include/linux/regulator/pca9450.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h index b427b5873de1..85b4fecc10d8 100644 --- a/include/linux/regulator/pca9450.h +++ b/include/linux/regulator/pca9450.h @@ -35,6 +35,8 @@ enum { PCA9450_DVS_LEVEL_MAX, }; +#define PCA9450_RESTART_HANDLER_PRIORITY 130 + #define PCA9450_BUCK1_VOLTAGE_NUM 0x80 #define PCA9450_BUCK2_VOLTAGE_NUM 0x80 #define PCA9450_BUCK3_VOLTAGE_NUM 0x80 @@ -235,4 +237,7 @@ enum { #define I2C_LT_ON_RUN 0x02 #define I2C_LT_FORCE_ENABLE 0x03 +/* PCA9450_REG_SW_RST command */ +#define SW_RST_COMMAND 0x14 + #endif /* __LINUX_REG_PCA9450_H__ */ -- cgit v1.2.3 From f61c39494901d729d5c21ba92040e9164ea957be Mon Sep 17 00:00:00 2001 From: Alexey Neyman Date: Fri, 25 Apr 2025 21:22:35 +0200 Subject: ACPICA: Interpret SIDP structures in DMAR ACPICA commit af51f730e0bccf789686cea68e116d5f0b27aacb Added in revision 3.4 of the VT-d spec. To support SIDP, part of the previously reserved field in the device scope structure was used to create a 1-byte "Flags" field. Link: https://github.com/acpica/acpica/commit/af51f730 Signed-off-by: Alexey Neyman Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2239745.irdbgypaU6@rjwysocki.net --- include/acpi/actbl1.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 329454c303b9..e0cbda95fec8 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -819,7 +819,8 @@ enum acpi_dmar_type { ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, ACPI_DMAR_TYPE_NAMESPACE = 4, ACPI_DMAR_TYPE_SATC = 5, - ACPI_DMAR_TYPE_RESERVED = 6 /* 6 and greater are reserved */ + ACPI_DMAR_TYPE_SIDP = 6, + ACPI_DMAR_TYPE_RESERVED = 7 /* 7 and greater are reserved */ }; /* DMAR Device Scope structure */ @@ -827,7 +828,8 @@ enum acpi_dmar_type { struct acpi_dmar_device_scope { u8 entry_type; u8 length; - u16 reserved; + u8 flags; + u8 reserved; u8 enumeration_id; u8 bus; }; @@ -923,6 +925,15 @@ struct acpi_dmar_satc { u8 reserved; u16 segment; }; + +/* 6: so_c Integrated Device Property Reporting Structure */ + +struct acpi_dmar_sidp { + struct acpi_dmar_header header; + u16 reserved; + u16 segment; +}; + /******************************************************************************* * * DRTM - Dynamic Root of Trust for Measurement table -- cgit v1.2.3 From 67f9d690c94ad97f03e055e1444f5471b2d44e4c Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 25 Apr 2025 21:23:17 +0200 Subject: ACPICA: infrastructure: Add new header and ACPI_DMT_BUF26 types ACPICA commit 52840d3826bd7e183fcb555e044e190aea0b5021 New MRRM tables can have subtables that are larger than 255 bytes. Add a new header typedef that uses u16 for Length. Could be backported to acpi_aspt_header, struct acpi_dmar_header, struct acpi_nfit_header, struct acpi_prmt_module_header, struct acpi_prmt_module_info. Will be used for upcoming ERDT table. MRRM table has a 26-byte reserved section in header. Add ACPI_DMT_BUF26 to describe this in struct acpi_dmtable_info. Link: https://github.com/acpica/acpica/commit/52840d38 Signed-off-by: Tony Luck Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3005638.e9J7NaK4W3@rjwysocki.net --- include/acpi/actbl1.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index e0cbda95fec8..b403b0b60bf6 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -155,6 +155,13 @@ struct acpi_aspt_acpi_mbox_regs { u64 reserved2[2]; }; +/* Larger subtable header (when Length can exceed 255) */ + +struct acpi_subtable_header_16 { + u16 type; + u16 length; +}; + /******************************************************************************* * * ASF - Alert Standard Format table (Signature "ASF!") -- cgit v1.2.3 From ec0ed62b076048528c8951ba6691b43a005f1021 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 25 Apr 2025 21:24:02 +0200 Subject: ACPICA: actbl2.h: MRRM: Add typedef and other definitions ACPICA commit 04fd53b2647b9f6f98cfca551383689cb3b59362 The MRRM table describes association between physical address ranges and "region numbers". Structure defined in the Intel Resource Director Technology (RDT) Architecture specification downloadable from www.intel.com/sdm Link: https://github.com/acpica/acpica/commit/04fd53b2 Signed-off-by: Tony Luck Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3372188.44csPzL39Z@rjwysocki.net --- include/acpi/actbl2.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index a3008bae6f48..4fd5e8b2e199 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -37,6 +37,7 @@ #define ACPI_SIG_MCHI "MCHI" /* Management Controller Host Interface table */ #define ACPI_SIG_MPAM "MPAM" /* Memory System Resource Partitioning and Monitoring Table */ #define ACPI_SIG_MPST "MPST" /* Memory Power State Table */ +#define ACPI_SIG_MRRM "MRRM" /* Memory Range and Region Mapping table */ #define ACPI_SIG_MSDM "MSDM" /* Microsoft Data Management Table */ #define ACPI_SIG_NFIT "NFIT" /* NVDIMM Firmware Interface Table */ #define ACPI_SIG_NHLT "NHLT" /* Non HD Audio Link Table */ @@ -1736,6 +1737,45 @@ struct acpi_msct_proximity { u64 memory_capacity; /* In bytes */ }; +/******************************************************************************* + * + * MRRM - Memory Range and Region Mapping (MRRM) table + * + ******************************************************************************/ + +struct acpi_table_mrrm { + struct acpi_table_header header; /* Common ACPI table header */ + u8 max_mem_region; /* Max Memory Regions supported */ + u8 flags; /* Region assignment type */ + u8 reserved[26]; + u8 memory_range_entry[]; +}; + +/* Flags */ +#define ACPI_MRRM_FLAGS_REGION_ASSIGNMENT_OS (1<<0) + +/******************************************************************************* + * + * Memory Range entry - Memory Range entry in MRRM table + * + ******************************************************************************/ + +struct acpi_table_mrrm_mem_range_entry { + ACPI_SUBTABLE_HEADER_16 header; + u32 reserved0; /* Reserved */ + u64 addr_base; /* Base addr of the mem range */ + u64 addr_len; /* Length of the mem range */ + u16 region_id_flags; /* Valid local or remote Region-ID */ + u8 local_region_id; /* Platform-assigned static local Region-ID */ + u8 remote_region_id; /* Platform-assigned static remote Region-ID */ + u32 reserved1; /* Reserved */ + /* Region-ID Programming Registers[] */ +}; + +/* Values for region_id_flags above */ +#define ACPI_MRRM_VALID_REGION_ID_FLAGS_LOCAL (1<<0) +#define ACPI_MRRM_VALID_REGION_ID_FLAGS_REMOTE (1<<1) + /******************************************************************************* * * MSDM - Microsoft Data Management table -- cgit v1.2.3 From ced63370237ae86d6d51fa58cc7bc2e3b7f6457c Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Fri, 25 Apr 2025 21:24:41 +0200 Subject: ACPICA: actbl2: Add definitions for RIMT ACPICA commit 73c32bc89cad64ab19c1231a202361e917e6823c RISC-V IO Mapping Table (RIMT) is a new static table defined for RISC-V to communicate IOMMU information to the OS. The specification for RIMT is available at [1]. Add structure definitions for RIMT. Link: https://github.com/riscv-non-isa/riscv-acpi-rimt [1] Link: https://github.com/acpica/acpica/commit/73c32bc8 Signed-off-by: Sunil V L Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/10665648.nUPlyArG6x@rjwysocki.net --- include/acpi/actbl2.h | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 4fd5e8b2e199..0ae9d9915d58 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -51,6 +51,7 @@ #define ACPI_SIG_RAS2 "RAS2" /* RAS2 Feature table */ #define ACPI_SIG_RGRT "RGRT" /* Regulatory Graphics Resource Table */ #define ACPI_SIG_RHCT "RHCT" /* RISC-V Hart Capabilities Table */ +#define ACPI_SIG_RIMT "RIMT" /* RISC-V IO Mapping Table */ #define ACPI_SIG_SBST "SBST" /* Smart Battery Specification Table */ #define ACPI_SIG_SDEI "SDEI" /* Software Delegated Exception Interface Table */ #define ACPI_SIG_SDEV "SDEV" /* Secure Devices table */ @@ -3042,6 +3043,88 @@ struct acpi_rhct_hart_info { u32 uid; /* ACPI processor UID */ }; +/******************************************************************************* + * + * RIMT - RISC-V IO Remapping Table + * + * https://github.com/riscv-non-isa/riscv-acpi-rimt + * + ******************************************************************************/ + +struct acpi_table_rimt { + struct acpi_table_header header; /* Common ACPI table header */ + u32 num_nodes; /* Number of RIMT Nodes */ + u32 node_offset; /* Offset to RIMT Node Array */ + u32 reserved; +}; + +struct acpi_rimt_node { + u8 type; + u8 revision; + u16 length; + u16 reserved; + u16 id; + char node_data[]; +}; + +enum acpi_rimt_node_type { + ACPI_RIMT_NODE_TYPE_IOMMU = 0x0, + ACPI_RIMT_NODE_TYPE_PCIE_ROOT_COMPLEX = 0x1, + ACPI_RIMT_NODE_TYPE_PLAT_DEVICE = 0x2, +}; + +struct acpi_rimt_iommu { + u8 hardware_id[8]; /* Hardware ID */ + u64 base_address; /* Base Address */ + u32 flags; /* Flags */ + u32 proximity_domain; /* Proximity Domain */ + u16 pcie_segment_number; /* PCIe Segment number */ + u16 pcie_bdf; /* PCIe B/D/F */ + u16 num_interrupt_wires; /* Number of interrupt wires */ + u16 interrupt_wire_offset; /* Interrupt wire array offset */ + u64 interrupt_wire[]; /* Interrupt wire array */ +}; + +/* IOMMU Node Flags */ +#define ACPI_RIMT_IOMMU_FLAGS_PCIE (1) +#define ACPI_RIMT_IOMMU_FLAGS_PXM_VALID (1 << 1) + +/* Interrupt Wire Structure */ +struct acpi_rimt_iommu_wire_gsi { + u32 irq_num; /* Interrupt Number */ + u32 flags; /* Flags */ +}; + +/* Interrupt Wire Flags */ +#define ACPI_RIMT_GSI_LEVEL_TRIGGERRED (1) +#define ACPI_RIMT_GSI_ACTIVE_HIGH (1 << 1) + +struct acpi_rimt_id_mapping { + u32 source_id_base; /* Source ID Base */ + u32 num_ids; /* Number of IDs */ + u32 dest_id_base; /* Destination Device ID Base */ + u32 dest_offset; /* Destination IOMMU Offset */ + u32 flags; /* Flags */ +}; + +struct acpi_rimt_pcie_rc { + u32 flags; /* Flags */ + u16 reserved; /* Reserved */ + u16 pcie_segment_number; /* PCIe Segment number */ + u16 id_mapping_offset; /* ID mapping array offset */ + u16 num_id_mappings; /* Number of ID mappings */ +}; + +/* PCIe Root Complex Node Flags */ +#define ACPI_RIMT_PCIE_ATS_SUPPORTED (1) +#define ACPI_RIMT_PCIE_PRI_SUPPORTED (1 << 1) + +struct acpi_rimt_platform_device { + u16 id_mapping_offset; /* ID Mapping array offset */ + u16 num_id_mappings; /* Number of ID mappings */ + char device_name[]; /* Device Object Name */ +}; + /******************************************************************************* * * SBST - Smart Battery Specification Table -- cgit v1.2.3 From 6d788e6848ce25e57afc7c1edaf7a925edf940d4 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 25 Apr 2025 21:25:23 +0200 Subject: ACPICA: MRRM: Some cleanups ACPICA commit 022e2e4169841f429dbda677a4780830bf4c2177 1) Added source specification to MRRM table comment in actbl2.h 2) Shorten typedef from ACPI_TABLE_MRRM_MEM_RANGE_ENTRY to struct acpi_mrrm_mem_range_entry 3) Add new typedefs to source/tools/acpisrc/astable.c 4) Fix cut and paste errors in acpi_dm_table_info_mrrm0[] definition 5) Fix indent and source code style errors in actbl2.h 6) The base/length fields in the memory range structure are system memory addresses, not "MMIO". Update the acpi_dm_table_info_mrrm0[] strings. 7) Add main/sub table comments to acpi_dm_dump_mrrm() and dt_compile_mrrm() Link: https://github.com/acpica/acpica/commit/022e2e41 Signed-off-by: Tony Luck Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2018955.PYKUYFuaPT@rjwysocki.net --- include/acpi/actbl2.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 0ae9d9915d58..274b3b85b6d7 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1741,6 +1741,8 @@ struct acpi_msct_proximity { /******************************************************************************* * * MRRM - Memory Range and Region Mapping (MRRM) table + * Conforms to "Intel Resource Director Technology Architecture Specification" + * Version 1.1, January 2025 * ******************************************************************************/ @@ -1756,13 +1758,13 @@ struct acpi_table_mrrm { #define ACPI_MRRM_FLAGS_REGION_ASSIGNMENT_OS (1<<0) /******************************************************************************* - * - * Memory Range entry - Memory Range entry in MRRM table - * - ******************************************************************************/ + * + * Memory Range entry - Memory Range entry in MRRM table + * + ******************************************************************************/ -struct acpi_table_mrrm_mem_range_entry { - ACPI_SUBTABLE_HEADER_16 header; +struct acpi_mrrm_mem_range_entry { + struct acpi_subtable_header_16 header; u32 reserved0; /* Reserved */ u64 addr_base; /* Base addr of the mem range */ u64 addr_len; /* Length of the mem range */ -- cgit v1.2.3 From 5833942db741bb321a8557511969c04481086c81 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 25 Apr 2025 21:26:44 +0200 Subject: ACPICA: infrastructure: Add new DMT_BUF types and shorten a long name ACPICA commit b8713f71b4023a0396fe61503bbbf5226e5eed1b Some ERDT subtables have 11 and 24 byte reserved fields. Add the ACPI_DMT_BUF11 and ACPI_DMT_BUF24 types to describe these reserved fields in struct acpi_dmtable_info structures. Shorten the ACPI_SUBTABLE_HEADER_16 name to ACPI_SUBTBL_HDR Link: https://github.com/acpica/acpica/commit/b8713f71 Signed-off-by: Tony Luck Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3643286.iIbC2pHGDl@rjwysocki.net --- include/acpi/actbl1.h | 2 +- include/acpi/actbl2.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index b403b0b60bf6..6d2cc181e3a4 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -157,7 +157,7 @@ struct acpi_aspt_acpi_mbox_regs { /* Larger subtable header (when Length can exceed 255) */ -struct acpi_subtable_header_16 { +struct acpi_subtbl_hdr_16 { u16 type; u16 length; }; diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 274b3b85b6d7..2c07b097ef9f 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1764,7 +1764,7 @@ struct acpi_table_mrrm { ******************************************************************************/ struct acpi_mrrm_mem_range_entry { - struct acpi_subtable_header_16 header; + struct acpi_subtbl_hdr_16 header; u32 reserved0; /* Reserved */ u64 addr_base; /* Base addr of the mem range */ u64 addr_len; /* Length of the mem range */ -- cgit v1.2.3 From e91e596431490f8f55572e4cd43dd51a82f00033 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 25 Apr 2025 21:27:24 +0200 Subject: ACPICA: actbl2.h: ERDT: Add typedef and other definitions ACPICA commit dddd9270531d74af523afa68515d8aae6a18bbe0 The ERDT table (and its many subtables) enumerate capabilities and methods for Intel Resource Director Technology to monitor and control L3 cache allocation and memory bandwidth by CPU cores and IO devices. Structure defined in the Intel Resource Director Technology (RDT) Architecture specification downloadable from www.intel.com/sdm Link: https://github.com/acpica/acpica/commit/dddd9270 Signed-off-by: Tony Luck Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3296755.5fSG56mABF@rjwysocki.net --- include/acpi/actbl2.h | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 2c07b097ef9f..ccbf543bceeb 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -29,6 +29,7 @@ #define ACPI_SIG_BDAT "BDAT" /* BIOS Data ACPI Table */ #define ACPI_SIG_CCEL "CCEL" /* CC Event Log Table */ #define ACPI_SIG_CDAT "CDAT" /* Coherent Device Attribute Table */ +#define ACPI_SIG_ERDT "ERDT" /* Enhanced Resource Director Technology */ #define ACPI_SIG_IORT "IORT" /* IO Remapping Table */ #define ACPI_SIG_IVRS "IVRS" /* I/O Virtualization Reporting Structure */ #define ACPI_SIG_LPIT "LPIT" /* Low Power Idle Table */ @@ -450,6 +451,195 @@ struct acpi_table_ccel { u64 log_area_start_address; }; +/******************************************************************************* + * + * ERDT - Enhanced Resource Director Technology (ERDT) table + * + * Conforms to "Intel Resource Director Technology Architecture Specification" + * Version 1.1, January 2025 + * + ******************************************************************************/ + +struct acpi_table_erdt { + struct acpi_table_header header; /* Common ACPI table header */ + u32 max_clos; /* Maximum classes of service */ + u8 reserved[24]; + u8 erdt_substructures[]; +}; + +/* Values for subtable type in struct acpi_subtbl_hdr_16 */ + +enum acpi_erdt_type { + ACPI_ERDT_TYPE_RMDD = 0, + ACPI_ERDT_TYPE_CACD = 1, + ACPI_ERDT_TYPE_DACD = 2, + ACPI_ERDT_TYPE_CMRC = 3, + ACPI_ERDT_TYPE_MMRC = 4, + ACPI_ERDT_TYPE_MARC = 5, + ACPI_ERDT_TYPE_CARC = 6, + ACPI_ERDT_TYPE_CMRD = 7, + ACPI_ERDT_TYPE_IBRD = 8, + ACPI_ERDT_TYPE_IBAD = 9, + ACPI_ERDT_TYPE_CARD = 10, + ACPI_ERDT_TYPE_RESERVED = 11 /* 11 and above are reserved */ +}; + +/* + * ERDT Subtables, correspond to Type in struct acpi_subtbl_hdr_16 + */ + +/* 0: RMDD - Resource Management Domain Description */ + +struct acpi_erdt_rmdd { + struct acpi_subtbl_hdr_16 header; + u16 flags; + u16 IO_l3_slices; /* Number of slices in IO cache */ + u8 IO_l3_sets; /* Number of sets in IO cache */ + u8 IO_l3_ways; /* Number of ways in IO cache */ + u64 reserved; + u16 domain_id; /* Unique domain ID */ + u32 max_rmid; /* Maximun RMID supported */ + u64 creg_base; /* Control Register Base Address */ + u16 creg_size; /* Control Register Size (4K pages) */ + u8 rmdd_structs[]; +}; + +/* 1: CACD - CPU Agent Collection Description */ + +struct acpi_erdt_cacd { + struct acpi_subtbl_hdr_16 header; + u16 reserved; + u16 domain_id; /* Unique domain ID */ + u32 X2APICIDS[]; +}; + +/* 2: DACD - Device Agent Collection Description */ + +struct acpi_erdt_dacd { + struct acpi_subtbl_hdr_16 header; + u16 reserved; + u16 domain_id; /* Unique domain ID */ + u8 dev_paths[]; +}; + +struct acpi_erdt_dacd_dev_paths { + struct acpi_subtable_header header; + u16 segment; + u8 reserved; + u8 start_bus; + u8 path[]; +}; + +/* 3: CMRC - Cache Monitoring Registers for CPU Agents */ + +struct acpi_erdt_cmrc { + struct acpi_subtbl_hdr_16 header; + u32 reserved1; + u32 flags; + u8 index_fn; + u8 reserved2[11]; + u64 cmt_reg_base; + u32 cmt_reg_size; + u16 clump_size; + u16 clump_stride; + u64 up_scale; +}; + +/* 4: MMRC - Memory-bandwidth Monitoring Registers for CPU Agents */ + +struct acpi_erdt_mmrc { + struct acpi_subtbl_hdr_16 header; + u32 reserved1; + u32 flags; + u8 index_fn; + u8 reserved2[11]; + u64 reg_base; + u32 reg_size; + u8 counter_width; + u64 up_scale; + u8 reserved3[7]; + u32 corr_factor_list_len; + u32 corr_factor_list[]; +}; + +/* 5: MARC - Memory-bandwidth Allocation Registers for CPU Agents */ + +struct acpi_erdt_marc { + struct acpi_subtbl_hdr_16 header; + u16 reserved1; + u16 flags; + u8 index_fn; + u8 reserved2[7]; + u64 reg_base_opt; + u64 reg_base_min; + u64 reg_base_max; + u32 mba_reg_size; + u32 mba_ctrl_range; +}; + +/* 6: CARC - Cache Allocation Registers for CPU Agents */ + +struct acpi_erdt_carc { + struct acpi_subtbl_hdr_16 header; +}; + +/* 7: CMRD - Cache Monitoring Registers for Device Agents */ + +struct acpi_erdt_cmrd { + struct acpi_subtbl_hdr_16 header; + u32 reserved1; + u32 flags; + u8 index_fn; + u8 reserved2[11]; + u64 reg_base; + u32 reg_size; + u16 cmt_reg_off; + u16 cmt_clump_size; + u64 up_scale; +}; + +/* 8: IBRD - Cache Monitoring Registers for Device Agents */ + +struct acpi_erdt_ibrd { + struct acpi_subtbl_hdr_16 header; + u32 reserved1; + u32 flags; + u8 index_fn; + u8 reserved2[11]; + u64 reg_base; + u32 reg_size; + u16 total_bw_offset; + u16 Iomiss_bw_offset; + u16 total_bw_clump; + u16 Iomiss_bw_clump; + u8 reserved3[7]; + u8 counter_width; + u64 up_scale; + u32 corr_factor_list_len; + u32 corr_factor_list[]; +}; + +/* 9: IBAD - IO bandwidth Allocation Registers for device agents */ + +struct acpi_erdt_ibad { + struct acpi_subtbl_hdr_16 header; +}; + +/* 10: CARD - IO bandwidth Allocation Registers for Device Agents */ + +struct acpi_erdt_card { + struct acpi_subtbl_hdr_16 header; + u32 reserved1; + u32 flags; + u32 contention_mask; + u8 index_fn; + u8 reserved2[7]; + u64 reg_base; + u32 reg_size; + u16 cat_reg_offset; + u16 cat_reg_block_size; +}; + /******************************************************************************* * * IORT - IO Remapping Table -- cgit v1.2.3 From 6da5e6f3028d46e4fee7849e85eda681939c630b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 21:27:58 +0200 Subject: ACPICA: Introduce ACPI_NONSTRING ACPICA commit 878823ca20f1987cba0c9d4c1056be0d117ea4fe In order to distinguish character arrays from C Strings (i.e. strings with a terminating NUL character), add support for the "nonstring" attribute provided by GCC. (A better name might be "ACPI_NONCSTRING", but that's the attribute name, so stick to the existing naming convention.) GCC 15's -Wunterminated-string-initialization will warn about truncation of the NUL byte for string initializers unless the destination is marked with "nonstring". Prepare for applying this attribute to the project. Link: https://github.com/acpica/acpica/commit/878823ca Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/1841930.VLH7GnMWUR@rjwysocki.net Signed-off-by: Kees Cook [ rjw: Pick up the tag from Kees ] Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 4 ++++ include/acpi/platform/acgcc.h | 8 ++++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 80767e8bf3ad..f7b3c4a4b7e7 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -1327,4 +1327,8 @@ typedef enum { #define ACPI_FLEX_ARRAY(TYPE, NAME) TYPE NAME[0] #endif +#ifndef ACPI_NONSTRING +#define ACPI_NONSTRING /* No terminating NUL character */ +#endif + #endif /* __ACTYPES_H__ */ diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index 04b4bf620517..68e9379623e6 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -72,4 +72,12 @@ TYPE NAME[]; \ } +/* + * Explicitly mark strings that lack a terminating NUL character so + * that ACPICA can be built with -Wunterminated-string-initialization. + */ +#if __has_attribute(__nonstring__) +#define ACPI_NONSTRING __attribute__((__nonstring__)) +#endif + #endif /* __ACGCC_H__ */ -- cgit v1.2.3 From d46825d2e505caa52d6f9e614575c364fd2190c6 Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Fri, 25 Apr 2025 21:29:12 +0200 Subject: ACPICA: actbl2.h: ACPI 6.5: RAS2: Rename structure and field names of the RAS2 table ACPICA commit 2c8a38f747de9d977491a494faf0dfaf799b777b Rename the structure and field names of the RAS2 table to shorten them and avoid long lines in the ACPI RAS2 drivers. 1. struct acpi_ras2_shared_memory to struct acpi_ras2_shmem 2. In struct acpi_ras2_shared_memory: fields, - set_capabilities[16] to set_caps[16] - num_parameter_blocks to num_param_blks - set_capabilities_status to set_caps_status 3. struct acpi_ras2_patrol_scrub_parameter to struct acpi_ras2_patrol_scrub_param 4. In struct acpi_ras2_patrol_scrub_parameter: fields, - patrol_scrub_command to command - requested_address_range to req_addr_range - actual_address_range to actl_addr_range Link: https://github.com/acpica/acpica/commit/2c8a38f7 Signed-off-by: Shiju Jose Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/1942053.CQOukoFCf9@rjwysocki.net --- include/acpi/actbl2.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index ccbf543bceeb..3c2c0730165d 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -3035,15 +3035,15 @@ struct acpi_ras2_pcc_desc { /* RAS2 Platform Communication Channel Shared Memory Region */ -struct acpi_ras2_shared_memory { +struct acpi_ras2_shmem { u32 signature; u16 command; u16 status; u16 version; u8 features[16]; - u8 set_capabilities[16]; - u16 num_parameter_blocks; - u32 set_capabilities_status; + u8 set_caps[16]; + u16 num_param_blks; + u32 set_caps_status; }; /* RAS2 Parameter Block Structure for PATROL_SCRUB */ @@ -3056,11 +3056,11 @@ struct acpi_ras2_parameter_block { /* RAS2 Parameter Block Structure for PATROL_SCRUB */ -struct acpi_ras2_patrol_scrub_parameter { +struct acpi_ras2_patrol_scrub_param { struct acpi_ras2_parameter_block header; - u16 patrol_scrub_command; - u64 requested_address_range[2]; - u64 actual_address_range[2]; + u16 command; + u64 req_addr_range[2]; + u64 actl_addr_range[2]; u32 flags; u32 scrub_params_out; u32 scrub_params_in; -- cgit v1.2.3 From 64b9dfd0776e9c38d733094859a09f13282ce6f8 Mon Sep 17 00:00:00 2001 From: Ahmed Salem Date: Fri, 25 Apr 2025 21:30:27 +0200 Subject: ACPICA: Avoid sequence overread in call to strncmp() ACPICA commit 8b83a8d88dfec59ea147fad35fc6deea8859c58c ap_get_table_length() checks if tables are valid by calling ap_is_valid_header(). The latter then calls ACPI_VALIDATE_RSDP_SIG(Table->Signature). ap_is_valid_header() accepts struct acpi_table_header as an argument, so the signature size is always fixed to 4 bytes. The problem is when the string comparison is between ACPI-defined table signature and ACPI_SIG_RSDP. Common ACPI table header specifies the Signature field to be 4 bytes long[1], with the exception of the RSDP structure whose signature is 8 bytes long "RSD PTR " (including the trailing blank character)[2]. Calling strncmp(sig, rsdp_sig, 8) would then result in a sequence overread[3] as sig would be smaller (4 bytes) than the specified bound (8 bytes). As a workaround, pass the bound conditionally based on the size of the signature being passed. Link: https://uefi.org/specs/ACPI/6.5_A/05_ACPI_Software_Programming_Model.html#system-description-table-header [1] Link: https://uefi.org/specs/ACPI/6.5_A/05_ACPI_Software_Programming_Model.html#root-system-description-pointer-rsdp-structure [2] Link: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wstringop-overread [3] Link: https://github.com/acpica/acpica/commit/8b83a8d8 Signed-off-by: Ahmed Salem Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2248233.Mh6RI2rZIc@rjwysocki.net --- include/acpi/actypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index f7b3c4a4b7e7..5b9f9a612548 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -527,7 +527,7 @@ typedef u64 acpi_integer; /* Support for the special RSDP signature (8 characters) */ -#define ACPI_VALIDATE_RSDP_SIG(a) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, 8)) +#define ACPI_VALIDATE_RSDP_SIG(a) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, (sizeof(a) < 8) ? ACPI_NAMESEG_SIZE : 8)) #define ACPI_MAKE_RSDP_SIG(dest) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) /* Support for OEMx signature (x can be any character) */ -- cgit v1.2.3 From 70662db73d5455ebc8a1da29973fa70237b18cd2 Mon Sep 17 00:00:00 2001 From: Ahmed Salem Date: Fri, 25 Apr 2025 21:31:05 +0200 Subject: ACPICA: Apply ACPI_NONSTRING in more places ACPICA commit 1035a3d453f7dd49a235a59ee84ebda9d2d2f41b Add ACPI_NONSTRING for destination char arrays without a terminating NUL character. This is a follow-up to commit 35ad99236f3a ("ACPICA: Apply ACPI_NONSTRING") where not all instances received the same treatment, in preparation for replacing strncpy() calls with memcpy() Link: https://github.com/acpica/acpica/commit/1035a3d4 Signed-off-by: Ahmed Salem Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3833065.MHq7AAxBmi@rjwysocki.net --- include/acpi/actbl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 2fc89704be17..74cc61e3ab09 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -66,12 +66,12 @@ ******************************************************************************/ struct acpi_table_header { - char signature[ACPI_NAMESEG_SIZE] __nonstring; /* ASCII table signature */ + char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; /* ASCII table signature */ u32 length; /* Length of table in bytes, including this header */ u8 revision; /* ACPI Specification minor version number */ u8 checksum; /* To make sum of entire table == 0 */ - char oem_id[ACPI_OEM_ID_SIZE]; /* ASCII OEM identification */ - char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; /* ASCII OEM table identification */ + char oem_id[ACPI_OEM_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM identification */ + char oem_table_id[ACPI_OEM_TABLE_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM table identification */ u32 oem_revision; /* OEM revision number */ char asl_compiler_id[ACPI_NAMESEG_SIZE]; /* ASCII ASL compiler vendor ID */ u32 asl_compiler_revision; /* ASL compiler version */ -- cgit v1.2.3 From ebf27765421c9238b7835d32a95e4a7fb8db26a4 Mon Sep 17 00:00:00 2001 From: Ahmed Salem Date: Fri, 25 Apr 2025 21:32:12 +0200 Subject: ACPICA: Replace strncpy() with memcpy() ACPICA commit 83019b471e1902151e67c588014ba2d09fa099a3 strncpy() is deprecated for NUL-terminated destination buffers[1]. Use memcpy() for length-bounded destinations. Link: https://github.com/KSPP/linux/issues/90 [1] Link: https://github.com/acpica/acpica/commit/83019b47 Signed-off-by: Ahmed Salem Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/1910878.atdPhlSkOF@rjwysocki.net --- include/acpi/actypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 5b9f9a612548..e90ca342d7de 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -522,7 +522,7 @@ typedef u64 acpi_integer; #define ACPI_COPY_NAMESEG(dest,src) (*ACPI_CAST_PTR (u32, (dest)) = *ACPI_CAST_PTR (u32, (src))) #else #define ACPI_COMPARE_NAMESEG(a,b) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAMESEG_SIZE)) -#define ACPI_COPY_NAMESEG(dest,src) (strncpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAMESEG_SIZE)) +#define ACPI_COPY_NAMESEG(dest,src) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAMESEG_SIZE)) #endif /* Support for the special RSDP signature (8 characters) */ -- cgit v1.2.3 From 403e7508c46981db9a01249c6e97852a2697de04 Mon Sep 17 00:00:00 2001 From: Saket Dumbre Date: Fri, 25 Apr 2025 21:33:58 +0200 Subject: ACPICA: Logfile: Changes for version 20250404 ACPICA commit 52de9040740c562a35244de19d21c0313964c874 Link: https://github.com/acpica/acpica/commit/52de9040 Signed-off-by: Saket Dumbre Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2251077.Icojqenx9y@rjwysocki.net --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index adf315e25d8d..eacf37b4cd20 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -12,7 +12,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20241212 +#define ACPI_CA_VERSION 0x20250404 #include #include -- cgit v1.2.3 From 0e9fd691a7026580708a5f22f18fca0e58b61899 Mon Sep 17 00:00:00 2001 From: Saket Dumbre Date: Thu, 3 Apr 2025 22:21:13 -0700 Subject: ACPICA: Update copyright year ACPICA commit 45253be18b3f37d46cd0072aa3f8a0a21a70e0a4 Changes needed by acpisrc to update copyright year when building for release. Link: https://github.com/acpica/acpica/commit/45253be1 Signed-off-by: Saket Dumbre Signed-off-by: Rafael J. Wysocki --- include/acpi/acbuffer.h | 2 +- include/acpi/acconfig.h | 2 +- include/acpi/acexcep.h | 2 +- include/acpi/acnames.h | 2 +- include/acpi/acoutput.h | 2 +- include/acpi/acpi.h | 2 +- include/acpi/acpiosxf.h | 2 +- include/acpi/acpixf.h | 2 +- include/acpi/acrestyp.h | 2 +- include/acpi/actbl.h | 2 +- include/acpi/actbl1.h | 2 +- include/acpi/actbl2.h | 2 +- include/acpi/actbl3.h | 2 +- include/acpi/actypes.h | 2 +- include/acpi/acuuid.h | 2 +- include/acpi/platform/acenv.h | 2 +- include/acpi/platform/acenvex.h | 2 +- include/acpi/platform/acgcc.h | 2 +- include/acpi/platform/acgccex.h | 2 +- include/acpi/platform/aclinux.h | 2 +- include/acpi/platform/aclinuxex.h | 2 +- include/acpi/platform/aczephyr.h | 2 +- 22 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/acpi/acbuffer.h b/include/acpi/acbuffer.h index 252b235dce5a..cbc9aeabcd99 100644 --- a/include/acpi/acbuffer.h +++ b/include/acpi/acbuffer.h @@ -3,7 +3,7 @@ * * Name: acbuffer.h - Support for buffers returned by ACPI predefined names * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h index 2da5f4a6e814..521d4bfa6ef0 100644 --- a/include/acpi/acconfig.h +++ b/include/acpi/acconfig.h @@ -3,7 +3,7 @@ * * Name: acconfig.h - Global configuration constants * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h index c5ecd0a0170c..53c98f5fe3c3 100644 --- a/include/acpi/acexcep.h +++ b/include/acpi/acexcep.h @@ -3,7 +3,7 @@ * * Name: acexcep.h - Exception codes returned by the ACPI subsystem * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h index 76aa6aa346ba..cb6a4dcc4e8e 100644 --- a/include/acpi/acnames.h +++ b/include/acpi/acnames.h @@ -3,7 +3,7 @@ * * Name: acnames.h - Global names and strings * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h index 5e0346142f98..3584f33e352c 100644 --- a/include/acpi/acoutput.h +++ b/include/acpi/acoutput.h @@ -3,7 +3,7 @@ * * Name: acoutput.h -- debug output * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/acpi.h b/include/acpi/acpi.h index 8b4a497c1300..92bf80937e5f 100644 --- a/include/acpi/acpi.h +++ b/include/acpi/acpi.h @@ -3,7 +3,7 @@ * * Name: acpi.h - Master public include file used to interface to ACPICA * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index 914c029f64c9..65c5737b6286 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -5,7 +5,7 @@ * interfaces must be implemented by OSL to interface the * ACPI components to the host operating system. * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index eacf37b4cd20..b49396aa4058 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -3,7 +3,7 @@ * * Name: acpixf.h - External interfaces to the ACPI subsystem * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h index efef208b0324..842f932e2c2b 100644 --- a/include/acpi/acrestyp.h +++ b/include/acpi/acrestyp.h @@ -3,7 +3,7 @@ * * Name: acrestyp.h - Defines, types, and structures for resource descriptors * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 74cc61e3ab09..243097a3da63 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -3,7 +3,7 @@ * * Name: actbl.h - Basic ACPI Table Definitions * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 6d2cc181e3a4..99fd1588ff38 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -3,7 +3,7 @@ * * Name: actbl1.h - Additional ACPI table definitions * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 3c2c0730165d..048f5f47f8b8 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -3,7 +3,7 @@ * * Name: actbl2.h - ACPI Table Definitions * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h index f9b459b75e98..79d3aa5a4bad 100644 --- a/include/acpi/actbl3.h +++ b/include/acpi/actbl3.h @@ -3,7 +3,7 @@ * * Name: actbl3.h - ACPI Table Definitions * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index e90ca342d7de..8fe893d776dd 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -3,7 +3,7 @@ * * Name: actypes.h - Common data types for the entire ACPI subsystem * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/acuuid.h b/include/acpi/acuuid.h index 52a84523bfac..25dd3e998727 100644 --- a/include/acpi/acuuid.h +++ b/include/acpi/acuuid.h @@ -3,7 +3,7 @@ * * Name: acuuid.h - ACPI-related UUID/GUID definitions * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h index 3f31df09a9d6..a11fa83955f8 100644 --- a/include/acpi/platform/acenv.h +++ b/include/acpi/platform/acenv.h @@ -3,7 +3,7 @@ * * Name: acenv.h - Host and compiler configuration * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/platform/acenvex.h b/include/acpi/platform/acenvex.h index 7e67e3503f7b..8ffc4e1c87cf 100644 --- a/include/acpi/platform/acenvex.h +++ b/include/acpi/platform/acenvex.h @@ -3,7 +3,7 @@ * * Name: acenvex.h - Extra host and compiler configuration * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index 68e9379623e6..8e4cf2f6b383 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -3,7 +3,7 @@ * * Name: acgcc.h - GCC specific defines, etc. * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/platform/acgccex.h b/include/acpi/platform/acgccex.h index 7c9f10e9633a..4a3c019a4d03 100644 --- a/include/acpi/platform/acgccex.h +++ b/include/acpi/platform/acgccex.h @@ -3,7 +3,7 @@ * * Name: acgccex.h - Extra GCC specific defines, etc. * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index f3249b7df5cb..edbbc9061d1e 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -3,7 +3,7 @@ * * Name: aclinux.h - OS specific defines, etc. for Linux * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/platform/aclinuxex.h b/include/acpi/platform/aclinuxex.h index eeff40295b4b..73265650f46b 100644 --- a/include/acpi/platform/aclinuxex.h +++ b/include/acpi/platform/aclinuxex.h @@ -3,7 +3,7 @@ * * Name: aclinuxex.h - Extra OS specific defines, etc. for Linux * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/include/acpi/platform/aczephyr.h b/include/acpi/platform/aczephyr.h index 703db4dc740d..03d9a4a39c80 100644 --- a/include/acpi/platform/aczephyr.h +++ b/include/acpi/platform/aczephyr.h @@ -3,7 +3,7 @@ * * Module Name: aczephyr.h - OS specific defines, etc. * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ -- cgit v1.2.3 From b9020bdb9f76b58117f9902db3047693fd12b11b Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 5 May 2025 10:38:17 -0700 Subject: ACPI: MRRM: Minimal parse of ACPI MRRM table The resctrl file system code needs to know how many region tags are supported. Parse the ACPI MRRM table and save the max_mem_region value. Provide a function for resctrl to collect that value. Signed-off-by: Tony Luck Link: https://patch.msgid.link/20250505173819.419271-2-tony.luck@intel.com Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3f2e93ed9730..c409f4cecb09 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -772,6 +772,10 @@ int acpi_get_local_u64_address(acpi_handle handle, u64 *addr); int acpi_get_local_address(acpi_handle handle, u32 *addr); const char *acpi_get_subsystem_id(acpi_handle handle); +#ifdef CONFIG_ACPI_MRRM +int acpi_mrrm_max_mem_region(void); +#endif + #else /* !CONFIG_ACPI */ #define acpi_disabled 1 @@ -1092,6 +1096,11 @@ static inline acpi_handle acpi_get_processor_handle(int cpu) return NULL; } +static inline int acpi_mrrm_max_mem_region(void) +{ + return -ENOENT; +} + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HMAT -- cgit v1.2.3 From 8fb7aee05591fd4d3dca1460448a59e95fa821c3 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 9 May 2025 12:12:54 +0100 Subject: io_uring: drain based on allocates reqs Don't rely on CQ sequence numbers for draining, as it has become messy and needs cq_extra adjustments. Instead, base it on the number of allocated requests and only allow flushing when all requests are in the drain list. As a result, cq_extra is gone, no overhead for its accounting in aux cqe posting, less bloating as it was inlined before, and it's in general simpler than trying to track where we should bump it and where it should be put back like in cases of overflow. Also, it'll likely help with cleaning and unifying some of the CQ posting helpers. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/46ece1e34320b046c06fee2498d6b4cd12a700f2.1746788718.git.asml.silence@gmail.com Link: https://lore.kernel.org/r/24497b04b004bceada496033d3c9d09ff8e81ae9.1746944903.git.asml.silence@gmail.com [axboe: fold in fix from link2] Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 73b289b48280..00dbd7cd0e7d 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -341,7 +341,6 @@ struct io_ring_ctx { unsigned cached_cq_tail; unsigned cq_entries; struct io_ev_fd __rcu *io_ev_fd; - unsigned cq_extra; void *cq_wait_arg; size_t cq_wait_size; @@ -417,6 +416,7 @@ struct io_ring_ctx { struct callback_head poll_wq_task_work; struct list_head defer_list; + unsigned nr_drained; struct io_alloc_cache msg_cache; spinlock_t msg_lock; -- cgit v1.2.3 From cdeaeb9dd762d7711241a62459dfb730b2cd0281 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 24 Apr 2025 18:02:50 +0200 Subject: drm: nova-drm: add initial driver skeleton Add the initial nova-drm driver skeleton. nova-drm is connected to nova-core through the auxiliary bus and implements the DRM parts of the nova driver stack. For now, it implements the fundamental DRM abstractions, i.e. creates a DRM device and registers it, exposing a three sample IOCTLs. DRM_IOCTL_NOVA_GETPARAM - provides the PCI bar size from the bar that maps the GPUs VRAM from nova-core DRM_IOCTL_NOVA_GEM_CREATE - creates a new dummy DRM GEM object and returns a handle DRM_IOCTL_NOVA_GEM_INFO - provides metadata for the DRM GEM object behind a given handle I implemented a small userspace test suite [1] that utilizes this interface. Link: https://gitlab.freedesktop.org/dakr/drm-test [1] Reviewed-by: Maxime Ripard Acked-by: Dave Airlie Link: https://lore.kernel.org/r/20250424160452.8070-3-dakr@kernel.org [ Kconfig: depend on DRM=y rather than just DRM. - Danilo ] Signed-off-by: Danilo Krummrich --- include/uapi/drm/nova_drm.h | 101 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 include/uapi/drm/nova_drm.h (limited to 'include') diff --git a/include/uapi/drm/nova_drm.h b/include/uapi/drm/nova_drm.h new file mode 100644 index 000000000000..3ca90ed9d2bb --- /dev/null +++ b/include/uapi/drm/nova_drm.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __NOVA_DRM_H__ +#define __NOVA_DRM_H__ + +#include "drm.h" + +/* DISCLAIMER: Do not use, this is not a stable uAPI. + * + * This uAPI serves only testing purposes as long as this driver is still in + * development. It is required to implement and test infrastructure which is + * upstreamed in the context of this driver. See also [1]. + * + * [1] https://lore.kernel.org/dri-devel/Zfsj0_tb-0-tNrJy@cassiopeiae/T/#u + */ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * NOVA_GETPARAM_VRAM_BAR_SIZE + * + * Query the VRAM BAR size in bytes. + */ +#define NOVA_GETPARAM_VRAM_BAR_SIZE 0x1 + +/** + * struct drm_nova_getparam - query GPU and driver metadata + */ +struct drm_nova_getparam { + /** + * @param: The identifier of the parameter to query. + */ + __u64 param; + + /** + * @value: The value for the specified parameter. + */ + __u64 value; +}; + +/** + * struct drm_nova_gem_create - create a new DRM GEM object + */ +struct drm_nova_gem_create { + /** + * @handle: The handle of the new DRM GEM object. + */ + __u32 handle; + + /** + * @pad: 32 bit padding, should be 0. + */ + __u32 pad; + + /** + * @size: The size of the new DRM GEM object. + */ + __u64 size; +}; + +/** + * struct drm_nova_gem_info - query DRM GEM object metadata + */ +struct drm_nova_gem_info { + /** + * @handle: The handle of the DRM GEM object to query. + */ + __u32 handle; + + /** + * @pad: 32 bit padding, should be 0. + */ + __u32 pad; + + /** + * @size: The size of the DRM GEM obejct. + */ + __u64 size; +}; + +#define DRM_NOVA_GETPARAM 0x00 +#define DRM_NOVA_GEM_CREATE 0x01 +#define DRM_NOVA_GEM_INFO 0x02 + +/* Note: this is an enum so that it can be resolved by Rust bindgen. */ +enum { + DRM_IOCTL_NOVA_GETPARAM = DRM_IOWR(DRM_COMMAND_BASE + DRM_NOVA_GETPARAM, + struct drm_nova_getparam), + DRM_IOCTL_NOVA_GEM_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_NOVA_GEM_CREATE, + struct drm_nova_gem_create), + DRM_IOCTL_NOVA_GEM_INFO = DRM_IOWR(DRM_COMMAND_BASE + DRM_NOVA_GEM_INFO, + struct drm_nova_gem_info), +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* __NOVA_DRM_H__ */ -- cgit v1.2.3 From 2c04e58e30ce858cc2be531298312c67c7d55fc3 Mon Sep 17 00:00:00 2001 From: Melody Olvera Date: Mon, 12 May 2025 13:54:43 -0700 Subject: soc: qcom: llcc-qcom: Add support for SM8750 Add system cache table and configs for SM8750 SoCs. Signed-off-by: Melody Olvera Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250512-sm8750_llcc_master-v5-3-d78dca6282a5@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/llcc-qcom.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 8e5d78fb4847..7a69210a250c 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -24,6 +24,7 @@ #define LLCC_CMPTDMA 15 #define LLCC_DISP 16 #define LLCC_VIDFW 17 +#define LLCC_CAMFW 18 #define LLCC_MDMHPFX 20 #define LLCC_MDMPNG 21 #define LLCC_AUDHW 22 @@ -67,6 +68,13 @@ #define LLCC_EVCS_LEFT 67 #define LLCC_EVCS_RIGHT 68 #define LLCC_SPAD 69 +#define LLCC_VIDDEC 70 +#define LLCC_CAMOFE 71 +#define LLCC_CAMRTIP 72 +#define LLCC_CAMSRTIP 73 +#define LLCC_CAMRTRF 74 +#define LLCC_CAMSRTRF 75 +#define LLCC_CPUSSMPAM 89 /** * struct llcc_slice_desc - Cache slice descriptor -- cgit v1.2.3 From d060b6aab031b6113f78cd3d1585115f13386eec Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Mon, 12 May 2025 21:53:46 +0100 Subject: helpers: make few bpf helpers public Make bpf_dynptr_slice_rdwr, bpf_dynptr_check_off_len and __bpf_dynptr_write available outside of the helpers.c by adding their prototypes into linux/include/bpf.h. bpf_dynptr_check_off_len() implementation is moved to header and made inline explicitly, as small function should typically be inlined. These functions are going to be used from bpf_trace.c in the next patch of this series. Acked-by: Andrii Nakryiko Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20250512205348.191079-2-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3f0cc89c0622..83c56f40842b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1349,6 +1349,20 @@ u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, + void *src, u32 len, u64 flags); +void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, + void *buffer__opt, u32 buffer__szk); + +static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +{ + u32 size = __bpf_dynptr_size(ptr); + + if (len > size || offset > size - len) + return -E2BIG; + + return 0; +} #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, -- cgit v1.2.3 From a96876057b9e44f60d936f8e4887543555b0593c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 9 May 2025 14:27:51 -0700 Subject: netlink: fix policy dump for int with validation callback Recent devlink change added validation of an integer value via NLA_POLICY_VALIDATE_FN, for sparse enums. Handle this in policy dump. We can't extract any info out of the callback, so report only the type. Fixes: 429ac6211494 ("devlink: define enum for attr types of dynamic attributes") Reported-by: syzbot+01eb26848144516e7f0a@syzkaller.appspotmail.com Link: https://patch.msgid.link/20250509212751.1905149-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 82e07e272290..90a560dc167a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -321,7 +321,13 @@ enum nla_policy_validation { * All other Unused - but note that it's a union * * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN: + * NLA_U8, NLA_U16, + * NLA_U32, NLA_U64, + * NLA_S8, NLA_S16, + * NLA_S32, NLA_S64, + * NLA_MSECS, * NLA_BINARY Validation function called for the attribute. + * * All other Unused - but note that it's a union * * Example: -- cgit v1.2.3 From cd6856d38881f8137d795b64ff5587c0232a0c34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 May 2025 08:06:37 +0200 Subject: scsi: sd: Remove the stream_status member from scsi_stream_status_header Having a variable length array at the end of scsi_stream_status_header only causes problems. Remove it and switch sd_is_perm_stream(), which is the only place that currently uses it, to use the scsi_stream_status directly following it in the local buf structure. Besides being a much better data structure design, this also avoids a -Wflex-array-member-not-at-end warning. Reported-by: Gustavo A. R. Silva Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250505060640.3398500-1-hch@lst.de Reviewed-by: John Garry Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_proto.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h index aeca37816506..f64385cde5b9 100644 --- a/include/scsi/scsi_proto.h +++ b/include/scsi/scsi_proto.h @@ -346,10 +346,9 @@ static_assert(sizeof(struct scsi_stream_status) == 8); /* GET STREAM STATUS parameter data */ struct scsi_stream_status_header { - __be32 len; /* length in bytes of stream_status[] array. */ + __be32 len; /* length in bytes of following payload */ u16 reserved; __be16 number_of_open_streams; - DECLARE_FLEX_ARRAY(struct scsi_stream_status, stream_status); }; static_assert(sizeof(struct scsi_stream_status_header) == 8); -- cgit v1.2.3 From e8007fad5457ea547ca63bb011fdb03213571c7e Mon Sep 17 00:00:00 2001 From: Steve Siwinski Date: Thu, 8 May 2025 16:01:22 -0400 Subject: scsi: sd_zbc: block: Respect bio vector limits for REPORT ZONES buffer The REPORT ZONES buffer size is currently limited by the HBA's maximum segment count to ensure the buffer can be mapped. However, the block layer further limits the number of iovec entries to 1024 when allocating a bio. To avoid allocation of buffers too large to be mapped, further restrict the maximum buffer size to BIO_MAX_INLINE_VECS. Replace the UIO_MAXIOV symbolic name with the more contextually appropriate BIO_MAX_INLINE_VECS. Fixes: b091ac616846 ("sd_zbc: Fix report zones buffer allocation") Cc: stable@vger.kernel.org Signed-off-by: Steve Siwinski Link: https://lore.kernel.org/r/20250508200122.243129-1-ssiwinski@atto.com Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index cafc7c215de8..b786ec5bcc81 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -11,6 +11,7 @@ #include #define BIO_MAX_VECS 256U +#define BIO_MAX_INLINE_VECS UIO_MAXIOV struct queue_limits; -- cgit v1.2.3 From 0e1c773b501f33437d87b72c7d26080361e224b1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 20 Apr 2025 12:40:24 -0700 Subject: mm/damon/core: introduce damos quota goal metrics for memory node utilization Patch series "mm/damon: auto-tune DAMOS for NUMA setups including tiered memory". Utilizing DAMON for memory tiering usually requires manual tuning and/or tedious controls. Let it self-tune hotness and coldness thresholds for promotion and demotion aiming high utilization of high memory tiers, by introducing new DAMOS quota goal metrics representing the used and the free memory ratios of specific NUMA nodes. And introduce a sample DAMON module that demonstrates how the new feature can be used for memory tiering use cases. Backgrounds =========== A type of tiered memory system exposes the memory tiers as NUMA nodes. A straightforward pages placement strategy for such systems is placing access-hot and cold pages on upper and lower tiers, reespectively, pursuing higher utilization of upper tiers. Since access temperature can be dynamic, periodically finding and migrating hot pages and cold pages to proper tiers (promoting and demoting) is also required. Linux kernel provides several features for such dynamic and transparent pages placement. Page Faults and LRU ------------------- One widely known way is using NUMA balancing in tiering mode (a.k.a NUMAB-2) and reclaim-based demotion features. In the setup, NUMAB-2 finds hot pages using access check-purpose page faults (a.k.a prot_none) and promote those inside each process' context, until there is no more pages to promote, or the upper tier is filled up and memory pressure happens. In the latter case, LRU-based reclaim logic wakes up as a response to the memory pressure and demotes cold pages to lower tiers in asynchronous (kswapd) and/or synchronous ways (direct reclaim). DAMON ----- Yet another available solution is using DAMOS with migrate_hot and migrate_cold DAMOS actions for promotions and demotions, respectively. To make it optimum, users need to specify aggressiveness and access temperature thresholds for promotions and demotions in a good balance that results in high utilization of upper tiers. The number of parameters is not small, and optimum parameter values depend on characteristics of the underlying hardware and the workload. As a result, it often requires manual, time consuming and repetitive tuning of the DAMOS schemes for given workloads and systems combinations. Self-tuned DAMON-based Memory Tiering ===================================== To solve such manual tuning problems, DAMOS provides aim-oriented feedback-driven quotas self-tuning. Using the feature, we design a self-tuned DAMON-based memory tiering for general multi-tier memory systems. For each memory tier node, if it has a lower tier, run a DAMOS scheme that demotes cold pages of the node, auto-tuning the aggressiveness aiming an amount of free space of the node. The free space is for keeping the headroom that avoids significant memory pressure during upper tier memory usage spike, and promoting hot pages from the lower tier. For each memory tier node, if it has an upper tier, run a DAMOS scheme that promotes hot pages of the current node to the upper tier, auto-tuning the aggressiveness aiming a high utilization ratio of the upper tier. The target ratio is to ensure higher tiers are utilized as much as possible. It should match with the headroom for demotion scheme, but have slight overlap, to ensure promotion and demotion are not entirely stopped. The aim-oriented aggressiveness auto-tuning of DAMOS is already available. Hence, to make such tiering solution implementation, only new quota goal metrics for utilization and free space ratio of specific NUMA node need to be developed. Discussions =========== The design imposes below discussion points. Expected Behaviors ------------------ The system will let upper tier memory node accommodates as many hot data as possible. If total amount of the data is less than the top tier memory's promotion/demotion target utilization, entire data will be just placed on the top tier. Promotion scheme will do nothing since there is no data to promote. Demotion scheme will also do nothing since the free space ratio of the top tier is higher than the goal. Only if the amount of data is larger than the top tier's utilization ratio, demotion scheme will demote cold pages and ensure the headroom free space. Since the promotion and demotion schemes for a single node has small overlap at their target utilization and free space goals, promotions and demotions will continue working with a moderate aggressiveness level. It will keep all data is placed on access hotness under dynamic access pattern, while minimizing the migration overhead. In any case, each node will keep headroom free space and as many upper tiers are utilized as possible. Ease of Use ----------- Users still need to set the target utilization and free space ratio, but it will be easier to set. We argue 99.7 % utilization and 0.5 % free space ratios can be good default values. It can be easily adjusted based on desired headroom size of given use case. Users are also still required to answer the minimum coldness and hotness thresholds. Together with monitoring intervals auto-tuning[2], DAMON will always show meaningful amount of hot and cold memory. And DAMOS quota's prioritization mechanism will make good decision as long as the source information is that colorful. Hence, users can very naively set the minimum criterias. We believe any access observation and no access observation within last one aggregation interval is enough for minimum hot and cold regions criterias. General Tiered Memory Setup Applicability ----------------------------------------- The design can be applied to any number of tiers having any performance characteristics, as long as they can be hierarchical. Hence, applying the system to different tiered memory system will be straightforward. Note that this assumes only single CPU NUMA node case. Because today's DAMON is not aware of which CPU made each access, applying this on systems having multiple CPU NUMA nodes can be complicated. We are planning to extend DAMON for the use case, but that's out of the scope of this patch series. How To Use ---------- Users can implement the auto-tuned DAMON-based memory tiering using DAMON sysfs interface. It can be easily done using DAMON user-space tool like user-space tool. Below evaluation results section shows an example DAMON user-space tool command for that. For wider and simpler deployment, having a kernel module that sets up and run the DAMOS schemes via DAMON kernel API can be useful. The module can enable the memory tiering at boot time via kernel command line parameter or at run time with single command. This patch series implements a sample DAMON kernel module that shows how such module can be implemented. Comparison To Page Faults and LRU-based Approaches -------------------------------------------------- The existing page faults based promotion (NUMAB-2) does hot pages detection and migration in the process context. When there are many pages to promote, it can block the progress of the application's real works. DAMOS works in asynchronous worker thread, so it doesn't block the real works. NUMAB-2 doesn't provide a way to control aggressiveness of promotion other than the maximum amount of pages to promote per given time widnow. If hot pages are found, promotions can happen in the upper-bound speed, regardless of upper tier's memory pressure. If the maximum speed is not well set for the given workload, it can result in slow promotion or unnecessary memory pressure. Self-tuned DAMON-based memory tiering alleviates the problem by adjusting the speed based on current utilization of the upper tier. LRU-based demotion can be triggered in both asynchronous (kswapd) and synchronous (direct reclaim) ways. Other than the way of finding cold pages, asynchronous LRU-based demotion and DAMON-based demotion has no big difference. DAMON-based demotion can make a better balancing with DAMON-based promotion, though. The LRU-based demotion can do better than DAMON-based demotion when the tier is having significant memory pressure. It would be wise to use DAMON-based demotion as a proactive and primary one, but utilizing LRU-based demotions together as a fast backup solution. Evaluation ========== In short, under a setup that requires fast and frequent promotions, self-tuned DAMON-based memory tiering's hot pages promotion improves performance about 4.42 %. We believe this shows self-tuned DAMON-based promotion's effectiveness. Meanwhile, NUMAB-2's hot pages promotion degrades the performance about 7.34 %. We suspect the degradation is mostly due to NUMAB-2's synchronous nature that can block the application's progress, which highlights the advantage of DAMON-based solution's asynchronous nature. Note that the test was done with the RFC version of this patch series. We don't run it again since this patch series got no meaningful change after the RFC, while the test takes pretty long time. Setup ----- Hardware. Use a machine that equips 250 GiB DRAM memory tier and 50 GiB CXL memory tier. The tiers are exposed as NUMA nodes 0 and 1, respectively. Kernel. Use Linux kernel v6.13 that modified as following. Add all DAMON patches that available on mm tree of 2025-03-15, and this patch series. Also modify it to ignore mempolicy() system calls, to avoid bad effects from application's traditional NUMA systems assumed optimizations. Workload. Use a modified version of Taobench benchmark[3] that available on DCPerf benchmark suite. It represents an in-memory caching workload. We set its 'memsize', 'warmup_time', and 'test_time' parameter as 340 GiB, 2,500 seconds and 1,440 seconds. The parameters are chosen to ensure the workload uses more than DRAM memory tier. Its RSS under the parameter grows to 270 GiB within the warmup time. It turned out the workload has a very static access pattrn. Only about 13 % of the RSS is frequently accessed from the beginning to end. Hence promotion shows no meaningful performance difference regardless of different design and implementations. We therefore modify the kernel to periodically demote up to 10 GiB hot pages and promote up to 10 GiB cold pages once per minute. The intention is to simulate periodic access pattern changes. The hotness and coldness threshold is very naively set so that it is more like random access pattern change rather than strict hot/cold pages exchange. This is why we call the workload as "modified". It is implemented as two DAMOS schemes each running on an asynchronous thread. It can be reproduced with DAMON user-space tool like below. # ./damo start \ --ops paddr --numa_node 0 --monitoring_intervals 10s 200s 200s \ --damos_action migrate_hot 1 \ --damos_quota_interval 60s --damos_quota_space 10G \ --ops paddr --numa_node 1 --monitoring_intervals 10s 200s 200s \ --damos_action migrate_cold 0 \ --damos_quota_interval 60s --damos_quota_space 10G \ --nr_schemes 1 1 --nr_targets 1 1 --nr_ctxs 1 1 System configurations. Use below variant system configurations. - Baseline. No memory tiering features are turned on. - Numab_tiering. On the baseline, enable NUMAB-2 and relcaim-based demotion. In detail, following command is executed: echo 2 > /proc/sys/kernel/numa_balancing; echo 1 > /sys/kernel/mm/numa/demotion_enabled; echo 7 > /proc/sys/vm/zone_reclaim_mode - DAMON_tiering. On the baseline, utilize self-tuned DAMON-based memory tiering implementation via DAMON user-space tool. It utilizes two kernel threads, namely promotion thread and demotion thread. Demotion thread monitors access pattern of DRAM node using DAMON with auto-tuned monitoring intervals aiming 4% DAMON-observed access ratio, and demote coldest pages up to 200 MiB per second aiming 0.5% free space of DRAM node. Promotion thread monitors CXL node using same intervals auto-tuning, and promote hot pages in same way but aiming for 99.7% utilization of DRAM node. Because DAMON provides only best-effort accuracy, add young page DAMOS filters to allow only and reject all young pages at promoting and demoting, respectively. It can be reproduced with DAMON user-space tool like below. # ./damo start \ --numa_node 0 --monitoring_intervals_goal 4% 3 5ms 10s \ --damos_action migrate_cold 1 --damos_access_rate 0% 0% \ --damos_apply_interval 1s \ --damos_quota_interval 1s --damos_quota_space 200MB \ --damos_quota_goal node_mem_free_bp 0.5% 0 \ --damos_filter reject young \ --numa_node 1 --monitoring_intervals_goal 4% 3 5ms 10s \ --damos_action migrate_hot 0 --damos_access_rate 5% max \ --damos_apply_interval 1s \ --damos_quota_interval 1s --damos_quota_space 200MB \ --damos_quota_goal node_mem_used_bp 99.7% 0 \ --damos_filter allow young \ --damos_nr_quota_goals 1 1 --damos_nr_filters 1 1 \ --nr_targets 1 1 --nr_schemes 1 1 --nr_ctxs 1 1 Measurment Results ------------------ On each system configuration, run the modified version of Taobench and collect 'score'. 'score' is a metric that calculated and provided by Taobench to represents the performance of the run on the system. To handle the measurement errors, repeat the measurement five times. The results are as below. Config Score Stdev (%) Normalized Baseline 1.6165 0.0319 1.9764 1.0000 Numab_tiering 1.4976 0.0452 3.0209 0.9264 DAMON_tiering 1.6881 0.0249 1.4767 1.0443 'Config' column shows the system config of the measurement. 'Score' column shows the 'score' measurement in average of the five runs on the system config. 'Stdev' column shows the standsard deviation of the five measurements of the scores. '(%)' column shows the 'Stdev' to 'Score' ratio in percentage. Finally, 'Normalized' column shows the averaged score values of the configs that normalized to that of 'Baseline'. The periodic hot pages demotion and cold pages promotion that was conducted to simulate dynamic access pattern was started from the beginning of the workload. It resulted in the DRAM tier utilization always under the watermark, and hence no real demotion was happened for all test runs. This means the above results show no difference between LRU-based and DAMON-based demotions. Only difference between NUMAB-2 and DAMON-based promotions are represented on the results. Numab_tiering config degraded the performance about 7.36 %. We suspect this happened because NUMAB-2's synchronous promotion was blocking the Taobench's real work progress. DAMON_tiering config improved the performance about 4.43 %. We believe this shows effectiveness of DAMON-based promotion that didn't block Taobench's real work progress due to its asynchronous nature. Also this means DAMON's monitoring results are accurate enough to provide visible amount of improvement. Evaluation Limitations ---------------------- As mentioned above, this evaluation shows only comparison of promotion mechanisms. DAMON-based tiering is recommended to be used together with reclaim-based demotion as a faster backup under significant memory pressure, though. From some perspective, the modified version of Taobench may seems making the picture distorted too much. It would be better to evaluate with more realistic workload, or more finely tuned micro benchmarks. Patch Sequence ============== The first patch (patch 1) implements two new quota goal metrics on core layer and expose it to DAMON core kernel API. The second and third ones (patches 2 and 3) further link it to DAMON sysfs interface. Three following patches (patches 4-6) document the new feature and sysfs file on design, usage, and ABI documents. The final one (patch 7) implements a working version of a self-tuned DAMON-based memory tiering solution in an incomplete but easy to understand form as a kernel module under samples/damon/ directory. References ========== [1] https://lore.kernel.org/20231112195602.61525-1-sj@kernel.org/ [2] https://lore.kernel.org/20250303221726.484227-1-sj@kernel.org [3] https://github.com/facebookresearch/DCPerf/blob/main/packages/tao_bench/README.md This patch (of 7): Used and free space ratios for specific NUMA nodes can be useful inputs for NUMA-specific DAMOS schemes' aggressiveness self-tuning feedback loop. Implement DAMOS quota goal metrics for such self-tuned schemes. Link: https://lkml.kernel.org/r/20250420194030.75838-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250420194030.75838-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Yunjeong Mun Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/damon.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 47e36e6ea203..a4011726cb3b 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -145,6 +145,8 @@ enum damos_action { * * @DAMOS_QUOTA_USER_INPUT: User-input value. * @DAMOS_QUOTA_SOME_MEM_PSI_US: System level some memory PSI in us. + * @DAMOS_QUOTA_NODE_MEM_USED_BP: MemUsed ratio of a node. + * @DAMOS_QUOTA_NODE_MEM_FREE_BP: MemFree ratio of a node. * @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics. * * Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported. @@ -152,6 +154,8 @@ enum damos_action { enum damos_quota_goal_metric { DAMOS_QUOTA_USER_INPUT, DAMOS_QUOTA_SOME_MEM_PSI_US, + DAMOS_QUOTA_NODE_MEM_USED_BP, + DAMOS_QUOTA_NODE_MEM_FREE_BP, NR_DAMOS_QUOTA_GOAL_METRICS, }; @@ -161,6 +165,7 @@ enum damos_quota_goal_metric { * @target_value: Target value of @metric to achieve with the tuning. * @current_value: Current value of @metric. * @last_psi_total: Last measured total PSI + * @nid: Node id. * @list: List head for siblings. * * Data structure for getting the current score of the quota tuning goal. The @@ -179,6 +184,7 @@ struct damos_quota_goal { /* metric-dependent fields */ union { u64 last_psi_total; + int nid; }; struct list_head list; }; -- cgit v1.2.3 From 786d5cc2b92ac331d0654452c6c3cea611772e09 Mon Sep 17 00:00:00 2001 From: "Christoph Lameter (Ampere)" Date: Mon, 21 Apr 2025 13:58:06 -0700 Subject: Update Christoph's Email address and make it consistent Use cl@gentwo.org throughout and remove the old email addresses. Link: https://lkml.kernel.org/r/8b962f57-4d98-cbb0-cd82-b6ba456733e8@gentwo.org Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton --- include/linux/percpu-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 0aeb0e276a3e..c16cdeaa505e 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -375,7 +375,7 @@ do { \ } while (0) /* - * this_cpu operations (C) 2008-2013 Christoph Lameter + * this_cpu operations (C) 2008-2013 Christoph Lameter * * Optimized manipulation for memory allocated through the per cpu * allocator or for addresses of per cpu variables. -- cgit v1.2.3 From 8adce0857769d596c5b000d118119e3bb1d63a32 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Thu, 24 Apr 2025 16:28:05 -0400 Subject: cpuset: rename cpuset_node_allowed to cpuset_current_node_allowed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "vmscan: enforce mems_effective during demotion", v5. Change reclaim to respect cpuset.mems_effective during demotion when possible. Presently, reclaim explicitly ignores cpuset.mems_effective when demoting, which may cause the cpuset settings to violated. Implement cpuset_node_allowed() to check the cpuset.mems_effective associated wih the mem_cgroup of the lruvec being scanned. This only applies to cgroup/cpuset v2, as cpuset exists in a different hierarchy than mem_cgroup in v1. This requires renaming the existing cpuset_node_allowed() to be cpuset_current_now_allowed() - which is more descriptive anyway - to implement the new cpuset_node_allowed() which takes a target cgroup. This patch (of 2): Rename cpuset_node_allowed to reflect that the function checks the current task's cpuset.mems. This allows us to make a new cpuset_node_allowed function that checks a target cgroup's cpuset.mems. Link: https://lkml.kernel.org/r/20250424202806.52632-1-gourry@gourry.net Link: https://lkml.kernel.org/r/20250424202806.52632-2-gourry@gourry.net Signed-off-by: Gregory Price Acked-by: Waiman Long Acked-by: Tejun Heo Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Cc: Roman Gushchin Signed-off-by: Andrew Morton --- include/linux/cpuset.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 5466c96a33db..d6a4fe5c3b6e 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -82,11 +82,11 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p); void cpuset_init_current_mems_allowed(void); int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); -extern bool cpuset_node_allowed(int node, gfp_t gfp_mask); +extern bool cpuset_current_node_allowed(int node, gfp_t gfp_mask); static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { - return cpuset_node_allowed(zone_to_nid(z), gfp_mask); + return cpuset_current_node_allowed(zone_to_nid(z), gfp_mask); } static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) -- cgit v1.2.3 From 7d709f49babc28907b0ac60228f522d2e6216add Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Thu, 24 Apr 2025 16:28:06 -0400 Subject: vmscan,cgroup: apply mems_effective to reclaim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible for a reclaimer to cause demotions of an lruvec belonging to a cgroup with cpuset.mems set to exclude some nodes. Attempt to apply this limitation based on the lruvec's memcg and prevent demotion. Notably, this may still allow demotion of shared libraries or any memory first instantiated in another cgroup. This means cpusets still cannot cannot guarantee complete isolation when demotion is enabled, and the docs have been updated to reflect this. This is useful for isolating workloads on a multi-tenant system from certain classes of memory more consistently - with the noted exceptions. Note on locking: The cgroup_get_e_css reference protects the css->effective_mems, and calls of this interface would be subject to the same race conditions associated with a non-atomic access to cs->effective_mems. So while this interface cannot make strong guarantees of correctness, it can therefore avoid taking a global or rcu_read_lock for performance. Link: https://lkml.kernel.org/r/20250424202806.52632-3-gourry@gourry.net Signed-off-by: Gregory Price Suggested-by: Shakeel Butt Suggested-by: Waiman Long Acked-by: Tejun Heo Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Reviewed-by: Waiman Long Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Cc: Roman Gushchin Signed-off-by: Andrew Morton --- include/linux/cpuset.h | 5 +++++ include/linux/memcontrol.h | 7 +++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index d6a4fe5c3b6e..2ddb256187b5 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -173,6 +173,7 @@ static inline void set_mems_allowed(nodemask_t nodemask) task_unlock(current); } +extern bool cpuset_node_allowed(struct cgroup *cgroup, int nid); #else /* !CONFIG_CPUSETS */ static inline bool cpusets_enabled(void) { return false; } @@ -293,6 +294,10 @@ static inline bool read_mems_allowed_retry(unsigned int seq) return false; } +static inline bool cpuset_node_allowed(struct cgroup *cgroup, int nid) +{ + return true; +} #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5264d148bdd9..ac1b003ee5b8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1736,6 +1736,8 @@ static inline void count_objcg_events(struct obj_cgroup *objcg, rcu_read_unlock(); } +bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid); + #else static inline bool mem_cgroup_kmem_disabled(void) { @@ -1797,6 +1799,11 @@ static inline ino_t page_cgroup_ino(struct page *page) { return 0; } + +static inline bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid) +{ + return true; +} #endif /* CONFIG_MEMCG */ #if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP) -- cgit v1.2.3 From b960818d51b3c8275d0f80c5fc4156eb2ff2fde6 Mon Sep 17 00:00:00 2001 From: Gavin Guo Date: Fri, 25 Apr 2025 18:38:59 +0800 Subject: mm/huge_memory: remove useless folio pointers passing Since the previous commit "mm/huge_memory: Adjust try_to_migrate_one() and split_huge_pmd_locked()" has simplified the logic by leveraging the folio verification in page_vma_mapped_walk(), this patch removes the unnecessary folio pointers passing. Link: https://lkml.kernel.org/r/20250425103859.825879-3-gavinguo@igalia.com Link: https://lore.kernel.org/all/98d1d195-7821-4627-b518-83103ade56c0@redhat.com/ Link: https://lore.kernel.org/all/91599a3c-e69e-4d79-bac5-5013c96203d7@redhat.com/ Signed-off-by: Gavin Guo Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Baolin Wang Cc: Florent Revest Cc: Gavin Shan Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f190998b2ebd..2f190c90192d 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -395,7 +395,7 @@ static inline int split_huge_page(struct page *page) void deferred_split_folio(struct folio *folio, bool partially_mapped); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long address, bool freeze, struct folio *folio); + unsigned long address, bool freeze); #define split_huge_pmd(__vma, __pmd, __address) \ do { \ @@ -403,12 +403,11 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \ || pmd_devmap(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ - false, NULL); \ + false); \ } while (0) - void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, - bool freeze, struct folio *folio); + bool freeze); void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, unsigned long address); @@ -501,7 +500,7 @@ static inline bool thp_migration_supported(void) } void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, bool freeze, struct folio *folio); + pmd_t *pmd, bool freeze); bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp, struct folio *folio); @@ -576,12 +575,12 @@ static inline void deferred_split_folio(struct folio *folio, bool partially_mapp do { } while (0) static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long address, bool freeze, struct folio *folio) {} + unsigned long address, bool freeze) {} static inline void split_huge_pmd_address(struct vm_area_struct *vma, - unsigned long address, bool freeze, struct folio *folio) {} + unsigned long address, bool freeze) {} static inline void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, - bool freeze, struct folio *folio) {} + bool freeze) {} static inline bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp, -- cgit v1.2.3 From bc9817bb7a21f64fbca2c4b83811d943036ec870 Mon Sep 17 00:00:00 2001 From: Huan Yang Date: Fri, 25 Apr 2025 11:19:23 +0800 Subject: mm/memcg: move mem_cgroup_init() ahead of cgroup_init() Patch series "Use kmem_cache for memcg alloc", v3. (willy tldr: "you've gone from allocating 8 objects per 32KiB to allocating 13 objects per 32KiB, a 62% improvement in memory consumption" [1]) The mem_cgroup_alloc function creates mem_cgroup struct and it's associated structures including mem_cgroup_per_node. Through detailed analysis on our test machine (Arm64, 16GB RAM, 6.6 kernel, 1 NUMA node, memcgv2 with nokmem,nosocket,cgroup_disable=pressure), we can observe the memory allocation for these structures using the following shell commands: # Enable tracing echo 1 > /sys/kernel/tracing/events/kmem/kmalloc/enable echo 1 > /sys/kernel/tracing/tracing_on cat /sys/kernel/tracing/trace_pipe | grep kmalloc | grep mem_cgroup # Trigger allocation if cgroup subtree do not enable memcg echo +memory > /sys/fs/cgroup/cgroup.subtree_control Ftrace Output: # mem_cgroup struct allocation sh-6312 [000] ..... 58015.698365: kmalloc: call_site=mem_cgroup_css_alloc+0xd8/0x5b4 ptr=000000003e4c3799 bytes_req=2312 bytes_alloc=4096 gfp_flags=GFP_KERNEL|__GFP_ZERO node=-1 accounted=false # mem_cgroup_per_node allocation sh-6312 [000] ..... 58015.698389: kmalloc: call_site=mem_cgroup_css_alloc+0x1d8/0x5b4 ptr=00000000d798700c bytes_req=2896 bytes_alloc=4096 gfp_flags=GFP_KERNEL|__GFP_ZERO node=0 accounted=false Key Observations: 1. Both structures use kmalloc with requested sizes between 2KB-4KB 2. Allocation alignment forces 4KB slab usage due to pre-defined sizes (64B, 128B,..., 2KB, 4KB, 8KB) 3. Memory waste per memcg instance: Base struct: 4096 - 2312 = 1784 bytes Per-node struct: 4096 - 2896 = 1200 bytes Total waste: 2984 bytes (1-node system) NUMA scaling: (1200 + 8) * nr_node_ids bytes So, it's a little waste. This patchset introduces dedicated kmem_cache: Patch2 - mem_cgroup kmem_cache - memcg_cachep Patch3 - mem_cgroup_per_node kmem_cache - memcg_pn_cachep The benefits of this change can be observed with the following tracing commands: # Enable tracing echo 1 > /sys/kernel/tracing/events/kmem/kmem_cache_alloc/enable echo 1 > /sys/kernel/tracing/tracing_on cat /sys/kernel/tracing/trace_pipe | grep kmem_cache_alloc | grep mem_cgroup # In another terminal: echo +memory > /sys/fs/cgroup/cgroup.subtree_control The output might now look like this: # mem_cgroup struct allocation sh-9827 [000] ..... 289.513598: kmem_cache_alloc: call_site=mem_cgroup_css_alloc+0xbc/0x5d4 ptr=00000000695c1806 bytes_req=2312 bytes_alloc=2368 gfp_flags=GFP_KERNEL|__GFP_ZERO node=-1 accounted=false # mem_cgroup_per_node allocation sh-9827 [000] ..... 289.513602: kmem_cache_alloc: call_site=mem_cgroup_css_alloc+0x1b8/0x5d4 ptr=000000002989e63a bytes_req=2896 bytes_alloc=2944 gfp_flags=GFP_KERNEL|__GFP_ZERO node=0 accounted=false This indicates that the `mem_cgroup` struct now requests 2312 bytes and is allocated 2368 bytes, while `mem_cgroup_per_node` requests 2896 bytes and is allocated 2944 bytes. The slight increase in allocated size is due to `SLAB_HWCACHE_ALIGN` in the `kmem_cache`. Without `SLAB_HWCACHE_ALIGN`, the allocation might appear as: # mem_cgroup struct allocation sh-9269 [003] ..... 80.396366: kmem_cache_alloc: call_site=mem_cgroup_css_alloc+0xbc/0x5d4 ptr=000000005b12b475 bytes_req=2312 bytes_alloc=2312 gfp_flags=GFP_KERNEL|__GFP_ZERO node=-1 accounted=false # mem_cgroup_per_node allocation sh-9269 [003] ..... 80.396411: kmem_cache_alloc: call_site=mem_cgroup_css_alloc+0x1b8/0x5d4 ptr=00000000f347adc6 bytes_req=2896 bytes_alloc=2896 gfp_flags=GFP_KERNEL|__GFP_ZERO node=0 accounted=false While the `bytes_alloc` now matches the `bytes_req`, this patchset defaults to using `SLAB_HWCACHE_ALIGN` as it is generally considered more beneficial for performance. Please let me know if there are any issues or if I've misunderstood anything. This patchset also move mem_cgroup_init ahead of cgroup_init() due to cgroup_init() will allocate root_mem_cgroup, but each initcall invoke after cgroup_init, so if each kmem_cache do not prepare, we need testing NULL before use it. This patch (of 3): When cgroup_init() creates root_mem_cgroup through css_alloc callback, some critical resources might not be fully initialized, forcing later operations to perform conditional checks for resource availability. This patch move mem_cgroup_init() to address the init order, it invoke before cgroup_init, so, compare to subsys_initcall, it can use to prepare some key resources before root_mem_cgroup alloc. Link: https://lkml.kernel.org/r/aAsRCj-niMMTtmK8@casper.infradead.org [1] Link: https://lkml.kernel.org/r/20250425031935.76411-1-link@vivo.com Link: https://lkml.kernel.org/r/20250425031935.76411-2-link@vivo.com Signed-off-by: Huan Yang Suggested-by: Shakeel Butt Acked-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Francesco Valla Cc: guoweikang Cc: Huang Shijie Cc: KP Singh Cc: Michal Hocko Cc: Muchun Song Cc: "Paul E . McKenney" Cc: Petr Mladek Cc: Rasmus Villemoes Cc: Raul E Rangel Cc: Roman Gushchin Cc: "Uladzislau Rezki (Sony)" Cc: Vlastimil Babka Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ac1b003ee5b8..9ed75f82b858 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1057,6 +1057,7 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm) return id; } +extern int mem_cgroup_init(void); #else /* CONFIG_MEMCG */ #define MEM_CGROUP_ID_SHIFT 0 @@ -1472,6 +1473,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm) { return 0; } + +static inline int mem_cgroup_init(void) { return 0; } #endif /* CONFIG_MEMCG */ /* -- cgit v1.2.3 From 8d88b0769e256c238f80615f08fc5b7aebc29439 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Wed, 2 Apr 2025 20:56:13 +0000 Subject: mm/hugetlb: use separate nodemask for bootmem allocations Hugetlb boot allocation has used online nodes for allocation since commit de55996d7188 ("mm/hugetlb: use online nodes for bootmem allocation"). This was needed to be able to do the allocations earlier in boot, before N_MEMORY was set. This might lead to a different distribution of gigantic hugepages across NUMA nodes if there are memoryless nodes in the system. What happens is that the memoryless nodes are tried, but then the memblock allocation fails and falls back, which usually means that the node that has the highest physical address available will be used (top-down allocation). While this will end up getting the same number of hugetlb pages, they might not be be distributed the same way. The fallback for each memoryless node might not end up coming from the same node as the successful round-robin allocation from N_MEMORY nodes. While administrators that rely on having a specific number of hugepages per node should use the hugepages=N:X syntax, it's better not to change the old behavior for the plain hugepages=N case. To do this, construct a nodemask for hugetlb bootmem purposes only, containing nodes that have memory. Then use that for round-robin bootmem allocations. This saves some cycles, and the added advantage here is that hugetlb_cma can use it too, avoiding the older issue of pointless attempts to create a CMA area for memoryless nodes (which will also cause the per-node CMA area size to be too small). Link: https://lkml.kernel.org/r/20250402205613.3086864-1-fvdl@google.com Fixes: de55996d7188 ("mm/hugetlb: use online nodes for bootmem allocation") Signed-off-by: Frank van der Linden Reviewed-by: Oscar Salvador Reviewed-by: Luiz Capitulino Cc: David Hildenbrand Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index a57bed83c657..23ebf49c5d6a 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -14,6 +14,7 @@ #include #include #include +#include struct ctl_table; struct user_struct; @@ -176,6 +177,8 @@ extern struct list_head huge_boot_pages[MAX_NUMNODES]; void hugetlb_bootmem_alloc(void); bool hugetlb_bootmem_allocated(void); +extern nodemask_t hugetlb_bootmem_nodes; +void hugetlb_bootmem_set_nodes(void); /* arch callbacks */ -- cgit v1.2.3 From 68a1436bde00b40327efe27926076b53088775a3 Mon Sep 17 00:00:00 2001 From: Zhongkun He Date: Mon, 21 Apr 2025 17:13:28 +0800 Subject: mm: add swappiness=max arg to memory.reclaim for only anon reclaim Patch series "add max arg to swappiness in memory.reclaim and lru_gen", v4. This patchset adds max arg to swappiness in memory.reclaim and lru_gen for anon only proactive memory reclaim. With commit <68cd9050d871> ("mm: add swappiness= arg to memory.reclaim") we can submit an additional swappiness= argument to memory.reclaim. It is very useful because we can dynamically adjust the reclamation ratio based on the anonymous folios and file folios of each cgroup. For example,when swappiness is set to 0, we only reclaim from file folios. But we can not relciam memory just from anon folios. This patchset introduces a new macro, SWAPPINESS_ANON_ONLY, defined as MAX_SWAPPINESS + 1, represent the max arg semantics. It specifically indicates that reclamation should occur only from anonymous pages. Patch 1 adds swappiness=max arg to memory.reclaim suggested-by: Yosry Ahmed Patch 2 add more comments for cache_trim_mode from Johannes Weiner in [1]. Patch 3 add max arg to lru_gen for proactive memory reclaim in MGLRU. The MGLRU already supports reclaiming exclusively from anonymous pages. This patch formalizes that behavior by introducing a max parameter to represent the corresponding semantics. Patch 4 using SWAPPINESS_ANON_ONLY in MGLRU Using SWAPPINESS_ANON_ONLY instead of MAX_SWAPPINESS + 1 to indicate reclaiming only from anonymous pages makes the code more readable and explicit Here is the previous discussion: https://lore.kernel.org/all/20250314033350.1156370-1-hezhongkun.hzk@bytedance.com/ https://lore.kernel.org/all/20250312094337.2296278-1-hezhongkun.hzk@bytedance.com/ https://lore.kernel.org/all/20250318135330.3358345-1-hezhongkun.hzk@bytedance.com/ This patch (of 4): With commit <68cd9050d871> ("mm: add swappiness= arg to memory.reclaim") we can submit an additional swappiness= argument to memory.reclaim. It is very useful because we can dynamically adjust the reclamation ratio based on the anonymous folios and file folios of each cgroup. For example,when swappiness is set to 0, we only reclaim from file folios. However,we have also encountered a new issue: when swappiness is set to the MAX_SWAPPINESS, it may still only reclaim file folios. So, we hope to add a new arg 'swappiness=max' in memory.reclaim where proactive memory reclaim only reclaims from anonymous folios when swappiness is set to max. The swappiness semantics from a user perspective remain unchanged. For example, something like this: echo "2M swappiness=max" > /sys/fs/cgroup/memory.reclaim will perform reclaim on the rootcg with a swappiness setting of 'max' (a new mode) regardless of the file folios. Users have a more comprehensive view of the application's memory distribution because there are many metrics available. For example, if we find that a certain cgroup has a large number of inactive anon folios, we can reclaim only those and skip file folios, because with the zram/zswap, the IO tradeoff that cache_trim_mode or other file first logic is making doesn't hold - file refaults will cause IO, whereas anon decompression will not. With this patch, the swappiness argument of memory.reclaim has a new mode 'max', means reclaiming just from anonymous folios both in traditional LRU and MGLRU. Link: https://lkml.kernel.org/r/cover.1745225696.git.hezhongkun.hzk@bytedance.com Link: https://lore.kernel.org/all/20250314141833.GA1316033@cmpxchg.org/ [1] Link: https://lkml.kernel.org/r/519e12b9b1f8c31a01e228c8b4b91a2419684f77.1745225696.git.hezhongkun.hzk@bytedance.com Signed-off-by: Zhongkun He Suggested-by: Yosry Ahmed Acked-by: Muchun Song Cc: Johannes Weiner Cc: Michal Hocko Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/swap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 4e4e27d3ce3d..bc0e1c275fc0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -414,6 +414,10 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #define MEMCG_RECLAIM_PROACTIVE (1 << 2) #define MIN_SWAPPINESS 0 #define MAX_SWAPPINESS 200 + +/* Just recliam from anon folios in proactive memory reclaim */ +#define SWAPPINESS_ANON_ONLY (MAX_SWAPPINESS + 1) + extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, -- cgit v1.2.3 From a3365bdca2200e24d815b19382d3f05deb8567ab Mon Sep 17 00:00:00 2001 From: Cheng-Han Wu Date: Sun, 27 Apr 2025 22:50:04 +0800 Subject: mm: remove unused macro INIT_PASID The macro INIT_PASID was originally used by mm_init_pasid. However, since commit a6cbd44093ef ("kernel/fork: Initialize mm's PASID"), mm_init_pasid has been removed. Therefore, INIT_PASID is no longer needed and is removed. Link: https://lkml.kernel.org/r/20250427145004.13049-1-hank20010209@gmail.com Signed-off-by: Cheng-Han Wu Reviewed-by: Anshuman Khandual Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 56d07edd01f9..e76bade9ebb1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -28,7 +28,6 @@ #endif #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) -#define INIT_PASID 0 struct address_space; struct mem_cgroup; -- cgit v1.2.3 From 50dbe531291abfa4513c1283d66fad420c1fd299 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Thu, 24 Apr 2025 17:16:51 -0700 Subject: khugepaged: pass folio instead of head page to trace events The trace functions trace_mm_collapse_huge_page_isolate() and trace_mm_khugepaged_scan_pmd() each have a single user, which always passes in the head page of a folio. Refactor both functions to take a folio directly. Link: https://lkml.kernel.org/r/20250425002425.533698-1-nifan.cxl@gmail.com Signed-off-by: Fan Ni Reviewed-by: Nico Pache Reviewed-by: Davidlohr Bueso Reviewed-by: Baolin Wang Reviewed-by: Yang Shi Acked-by: David Hildenbrand Reviewed-by: Matthew Wilcox (Oracle) Cc: Adam Manzanares Cc: Luis Chamberalin Cc: Mariano Pache Cc: "Masami Hiramatsu (Google)" Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/trace/events/huge_memory.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 9d5c00b0285c..2305df6cb485 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -55,10 +55,10 @@ SCAN_STATUS TRACE_EVENT(mm_khugepaged_scan_pmd, - TP_PROTO(struct mm_struct *mm, struct page *page, bool writable, + TP_PROTO(struct mm_struct *mm, struct folio *folio, bool writable, int referenced, int none_or_zero, int status, int unmapped), - TP_ARGS(mm, page, writable, referenced, none_or_zero, status, unmapped), + TP_ARGS(mm, folio, writable, referenced, none_or_zero, status, unmapped), TP_STRUCT__entry( __field(struct mm_struct *, mm) @@ -72,7 +72,7 @@ TRACE_EVENT(mm_khugepaged_scan_pmd, TP_fast_assign( __entry->mm = mm; - __entry->pfn = page ? page_to_pfn(page) : -1; + __entry->pfn = folio ? folio_pfn(folio) : -1; __entry->writable = writable; __entry->referenced = referenced; __entry->none_or_zero = none_or_zero; @@ -116,10 +116,10 @@ TRACE_EVENT(mm_collapse_huge_page, TRACE_EVENT(mm_collapse_huge_page_isolate, - TP_PROTO(struct page *page, int none_or_zero, + TP_PROTO(struct folio *folio, int none_or_zero, int referenced, bool writable, int status), - TP_ARGS(page, none_or_zero, referenced, writable, status), + TP_ARGS(folio, none_or_zero, referenced, writable, status), TP_STRUCT__entry( __field(unsigned long, pfn) @@ -130,7 +130,7 @@ TRACE_EVENT(mm_collapse_huge_page_isolate, ), TP_fast_assign( - __entry->pfn = page ? page_to_pfn(page) : -1; + __entry->pfn = folio ? folio_pfn(folio) : -1; __entry->none_or_zero = none_or_zero; __entry->referenced = referenced; __entry->writable = writable; -- cgit v1.2.3 From 4c78cc596bb8d39532f059e0198eeabf370c50f5 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Fri, 9 May 2025 00:46:19 -0700 Subject: memblock: add MEMBLOCK_RSRV_KERN flag Patch series "kexec: introduce Kexec HandOver (KHO)", v8. Kexec today considers itself purely a boot loader: When we enter the new kernel, any state the previous kernel left behind is irrelevant and the new kernel reinitializes the system. However, there are use cases where this mode of operation is not what we actually want. In virtualization hosts for example, we want to use kexec to update the host kernel while virtual machine memory stays untouched. When we add device assignment to the mix, we also need to ensure that IOMMU and VFIO states are untouched. If we add PCIe peer to peer DMA, we need to do the same for the PCI subsystem. If we want to kexec while an SEV-SNP enabled virtual machine is running, we need to preserve the VM context pages and physical memory. See "pkernfs: Persisting guest memory and kernel/device state safely across kexec" Linux Plumbers Conference 2023 presentation for details: https://lpc.events/event/17/contributions/1485/ To start us on the journey to support all the use cases above, this patch implements basic infrastructure to allow hand over of kernel state across kexec (Kexec HandOver, aka KHO). As a really simple example target, we use memblock's reserve_mem. With this patchset applied, memory that was reserved using "reserve_mem" command line options remains intact after kexec and it is guaranteed to reside at the same physical address. == Alternatives == There are alternative approaches to (parts of) the problems above: * Memory Pools [1] - preallocated persistent memory region + allocator * PRMEM [2] - resizable persistent memory regions with fixed metadata pointer on the kernel command line + allocator * Pkernfs [3] - preallocated file system for in-kernel data with fixed address location on the kernel command line * PKRAM [4] - handover of user space pages using a fixed metadata page specified via command line All of the approaches above fundamentally have the same problem: They require the administrator to explicitly carve out a physical memory location because they have no mechanism outside of the kernel command line to pass data (including memory reservations) between kexec'ing kernels. KHO provides that base foundation. We will determine later whether we still need any of the approaches above for fast bulk memory handover of for example IOMMU page tables. But IMHO they would all be users of KHO, with KHO providing the foundational primitive to pass metadata and bulk memory reservations as well as provide easy versioning for data. == Overview == We introduce a metadata file that the kernels pass between each other. How they pass it is architecture specific. The file's format is a Flattened Device Tree (fdt) which has a generator and parser already included in Linux. KHO is enabled in the kernel command line by `kho=on`. When the root user enables KHO through /sys/kernel/debug/kho/out/finalize, the kernel invokes callbacks to every KHO users to register preserved memory regions, which contain drivers' states. When the actual kexec happens, the fdt is part of the image set that we boot into. In addition, we keep "scratch regions" available for kexec: physically contiguous memory regions that are guaranteed to not have any memory that KHO would preserve. The new kernel bootstraps itself using the scratch regions and sets all handed over memory as in use. When drivers initialize that support KHO, they introspect the fdt, restore preserved memory regions, and retrieve their states stored in the preserved memory. == Limitations == Currently KHO is only implemented for file based kexec. The kernel interfaces in the patch set are already in place to support user space kexec as well, but it is still not implemented it yet inside kexec tools. == How to Use == To use the code, please boot the kernel with the "kho=on" command line parameter. KHO will automatically create scratch regions. If you want to set the scratch size explicitly you can use "kho_scratch=" command line parameter. For instance, "kho_scratch=16M,512M,256M" will reserve a 16 MiB low memory scratch area, a 512 MiB global scratch region, and 256 MiB per NUMA node scratch regions on boot. Make sure to have a reserved memory range requested with reserv_mem command line option, for example, "reserve_mem=64m:4k:n1". Then before you invoke file based "kexec -l", finalize KHO FDT: # echo 1 > /sys/kernel/debug/kho/out/finalize You can preview the generated FDT using `dtc`, # dtc /sys/kernel/debug/kho/out/fdt # dtc /sys/kernel/debug/kho/out/sub_fdts/memblock `dtc` is available on ubuntu by `sudo apt-get install device-tree-compiler`. Now kexec into the new kernel, # kexec -l Image --initrd=initrd -s # kexec -e (The order of KHO finalization and "kexec -l" does not matter.) The new kernel will boot up and contain the previous kernel's reserve_mem contents at the same physical address as the first kernel. You can also review the FDT passed from the old kernel, # dtc /sys/kernel/debug/kho/in/fdt # dtc /sys/kernel/debug/kho/in/sub_fdts/memblock This patch (of 17): To denote areas that were reserved for kernel use either directly with memblock_reserve_kern() or via memblock allocations. Link: https://lore.kernel.org/lkml/20250424083258.2228122-1-changyuanl@google.com/ Link: https://lore.kernel.org/lkml/aAeaJ2iqkrv_ffhT@kernel.org/ Link: https://lore.kernel.org/lkml/35c58191-f774-40cf-8d66-d1e2aaf11a62@intel.com/ Link: https://lore.kernel.org/lkml/20250424093302.3894961-1-arnd@kernel.org/ Link: https://lkml.kernel.org/r/20250509074635.3187114-1-changyuanl@google.com Link: https://lkml.kernel.org/r/20250509074635.3187114-2-changyuanl@google.com Signed-off-by: Mike Rapoport (Microsoft) Co-developed-by: Changyuan Lyu Signed-off-by: Changyuan Lyu Cc: Alexander Graf Cc: Andy Lutomirski Cc: Anthony Yznaga Cc: Arnd Bergmann Cc: Ashish Kalra Cc: Ben Herrenschmidt Cc: Borislav Betkov Cc: Catalin Marinas Cc: David Woodhouse Cc: Eric Biederman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Gowans Cc: Jonathan Corbet Cc: Krzysztof Kozlowski Cc: Marc Rutland Cc: Paolo Bonzini Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Pratyush Yadav Cc: Rob Herring Cc: Saravana Kannan Cc: Stanislav Kinsburskii Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Thomas Lendacky Cc: Will Deacon Cc: Dave Hansen Cc: Jason Gunthorpe Signed-off-by: Andrew Morton --- include/linux/memblock.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index ef5a1ecc6e59..6c00fbc08513 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -42,6 +42,9 @@ extern unsigned long long max_possible_pfn; * kernel resource tree. * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are * not initialized (only for reserved regions). + * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use, + * either explictitly with memblock_reserve_kern() or via memblock + * allocation APIs. All memblock allocations set this flag. */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ @@ -50,6 +53,7 @@ enum memblock_flags { MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */ + MEMBLOCK_RSRV_KERN = 0x20, /* memory reserved for kernel use */ }; /** @@ -116,7 +120,19 @@ int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid, int memblock_add(phys_addr_t base, phys_addr_t size); int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_phys_free(phys_addr_t base, phys_addr_t size); -int memblock_reserve(phys_addr_t base, phys_addr_t size); +int __memblock_reserve(phys_addr_t base, phys_addr_t size, int nid, + enum memblock_flags flags); + +static __always_inline int memblock_reserve(phys_addr_t base, phys_addr_t size) +{ + return __memblock_reserve(base, size, NUMA_NO_NODE, 0); +} + +static __always_inline int memblock_reserve_kern(phys_addr_t base, phys_addr_t size) +{ + return __memblock_reserve(base, size, NUMA_NO_NODE, MEMBLOCK_RSRV_KERN); +} + #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif @@ -476,6 +492,7 @@ static inline __init_memblock bool memblock_bottom_up(void) phys_addr_t memblock_phys_mem_size(void); phys_addr_t memblock_reserved_size(void); +phys_addr_t memblock_reserved_kern_size(phys_addr_t limit, int nid); unsigned long memblock_estimated_nr_free_pages(void); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); -- cgit v1.2.3 From d59f43b5748092557d34244e29a618221a250501 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 9 May 2025 00:46:20 -0700 Subject: memblock: add support for scratch memory With KHO (Kexec HandOver), we need a way to ensure that the new kernel does not allocate memory on top of any memory regions that the previous kernel was handing over. But to know where those are, we need to include them in the memblock.reserved array which may not be big enough to hold all ranges that need to be persisted across kexec. To resize the array, we need to allocate memory. That brings us into a catch 22 situation. The solution to that is limit memblock allocations to the scratch regions: safe regions to operate in the case when there is memory that should remain intact across kexec. KHO provides several "scratch regions" as part of its metadata. These scratch regions are contiguous memory blocks that known not to contain any memory that should be persisted across kexec. These regions should be large enough to accommodate all memblock allocations done by the kexeced kernel. We introduce a new memblock_set_scratch_only() function that allows KHO to indicate that any memblock allocation must happen from the scratch regions. Later, we may want to perform another KHO kexec. For that, we reuse the same scratch regions. To ensure that no eventually handed over data gets allocated inside a scratch region, we flip the semantics of the scratch region with memblock_clear_scratch_only(): After that call, no allocations may happen from scratch memblock regions. We will lift that restriction in the next patch. Link: https://lkml.kernel.org/r/20250509074635.3187114-3-changyuanl@google.com Signed-off-by: Alexander Graf Co-developed-by: Mike Rapoport (Microsoft) Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Changyuan Lyu Cc: Andy Lutomirski Cc: Anthony Yznaga Cc: Arnd Bergmann Cc: Ashish Kalra Cc: Ben Herrenschmidt Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David Woodhouse Cc: Eric Biederman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Gowans Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Krzysztof Kozlowski Cc: Marc Rutland Cc: Paolo Bonzini Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Pratyush Yadav Cc: Rob Herring Cc: Saravana Kannan Cc: Stanislav Kinsburskii Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Thomas Lendacky Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/memblock.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 6c00fbc08513..993937a6b962 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -45,6 +45,11 @@ extern unsigned long long max_possible_pfn; * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use, * either explictitly with memblock_reserve_kern() or via memblock * allocation APIs. All memblock allocations set this flag. + * @MEMBLOCK_KHO_SCRATCH: memory region that kexec can pass to the next + * kernel in handover mode. During early boot, we do not know about all + * memory reservations yet, so we get scratch memory from the previous + * kernel that we know is good to use. It is the only memory that + * allocations may happen from in this phase. */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ @@ -54,6 +59,7 @@ enum memblock_flags { MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */ MEMBLOCK_RSRV_KERN = 0x20, /* memory reserved for kernel use */ + MEMBLOCK_KHO_SCRATCH = 0x40, /* scratch memory for kexec handover */ }; /** @@ -148,6 +154,8 @@ int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size); +int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size); +int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size); void memblock_free(void *ptr, size_t size); void reset_all_zones_managed_pages(void); @@ -291,6 +299,11 @@ static inline bool memblock_is_driver_managed(struct memblock_region *m) return m->flags & MEMBLOCK_DRIVER_MANAGED; } +static inline bool memblock_is_kho_scratch(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_KHO_SCRATCH; +} + int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, @@ -619,5 +632,12 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) { } static inline void memtest_report_meminfo(struct seq_file *m) { } #endif +#ifdef CONFIG_MEMBLOCK_KHO_SCRATCH +void memblock_set_kho_scratch_only(void); +void memblock_clear_kho_scratch_only(void); +#else +static inline void memblock_set_kho_scratch_only(void) { } +static inline void memblock_clear_kho_scratch_only(void) { } +#endif #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3 From b8a8f96a6dce527ad316184ff1e20f238ed413d8 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Fri, 9 May 2025 00:46:21 -0700 Subject: memblock: introduce memmap_init_kho_scratch() With deferred initialization of struct page it will be necessary to initialize memory map for KHO scratch regions early. Add memmap_init_kho_scratch() method that will allow such initialization in upcoming patches. Link: https://lkml.kernel.org/r/20250509074635.3187114-4-changyuanl@google.com Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Changyuan Lyu Cc: Alexander Graf Cc: Andy Lutomirski Cc: Anthony Yznaga Cc: Arnd Bergmann Cc: Ashish Kalra Cc: Ben Herrenschmidt Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David Woodhouse Cc: Eric Biederman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Gowans Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Krzysztof Kozlowski Cc: Marc Rutland Cc: Paolo Bonzini Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Pratyush Yadav Cc: Rob Herring Cc: Saravana Kannan Cc: Stanislav Kinsburskii Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Thomas Lendacky Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/memblock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 993937a6b962..bb19a2534224 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -635,9 +635,11 @@ static inline void memtest_report_meminfo(struct seq_file *m) { } #ifdef CONFIG_MEMBLOCK_KHO_SCRATCH void memblock_set_kho_scratch_only(void); void memblock_clear_kho_scratch_only(void); +void memmap_init_kho_scratch_pages(void); #else static inline void memblock_set_kho_scratch_only(void) { } static inline void memblock_clear_kho_scratch_only(void) { } +static inline void memmap_init_kho_scratch_pages(void) {} #endif #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3 From 3dc92c311498c4d307cfdd0c6c3ac9355b50f683 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 9 May 2025 00:46:22 -0700 Subject: kexec: add Kexec HandOver (KHO) generation helpers Add the infrastructure to generate Kexec HandOver metadata. Kexec HandOver is a mechanism that allows Linux to preserve state - arbitrary properties as well as memory locations - across kexec. It does so using 2 concepts: 1) KHO FDT - Every KHO kexec carries a KHO specific flattened device tree blob that describes preserved memory regions. Device drivers can register to KHO to serialize and preserve their states before kexec. 2) Scratch Regions - CMA regions that we allocate in the first kernel. CMA gives us the guarantee that no handover pages land in those regions, because handover pages must be at a static physical memory location. We use these regions as the place to load future kexec images so that they won't collide with any handover data. Link: https://lkml.kernel.org/r/20250509074635.3187114-5-changyuanl@google.com Signed-off-by: Alexander Graf Co-developed-by: Mike Rapoport (Microsoft) Signed-off-by: Mike Rapoport (Microsoft) Co-developed-by: Pratyush Yadav Signed-off-by: Pratyush Yadav Co-developed-by: Changyuan Lyu Signed-off-by: Changyuan Lyu Cc: Andy Lutomirski Cc: Anthony Yznaga Cc: Arnd Bergmann Cc: Ashish Kalra Cc: Ben Herrenschmidt Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David Woodhouse Cc: Eric Biederman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Gowans Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Krzysztof Kozlowski Cc: Marc Rutland Cc: Paolo Bonzini Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Rob Herring Cc: Saravana Kannan Cc: Stanislav Kinsburskii Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Thomas Lendacky Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/kexec_handover.h | 59 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 include/linux/kexec_handover.h (limited to 'include') diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h new file mode 100644 index 000000000000..2e19004776f6 --- /dev/null +++ b/include/linux/kexec_handover.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_KEXEC_HANDOVER_H +#define LINUX_KEXEC_HANDOVER_H + +#include +#include + +struct kho_scratch { + phys_addr_t addr; + phys_addr_t size; +}; + +/* KHO Notifier index */ +enum kho_event { + KEXEC_KHO_FINALIZE = 0, + KEXEC_KHO_ABORT = 1, +}; + +struct notifier_block; + +struct kho_serialization; + +#ifdef CONFIG_KEXEC_HANDOVER +bool kho_is_enabled(void); + +int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); + +int register_kho_notifier(struct notifier_block *nb); +int unregister_kho_notifier(struct notifier_block *nb); + +void kho_memory_init(void); +#else +static inline bool kho_is_enabled(void) +{ + return false; +} + +static inline int kho_add_subtree(struct kho_serialization *ser, + const char *name, void *fdt) +{ + return -EOPNOTSUPP; +} + +static inline int register_kho_notifier(struct notifier_block *nb) +{ + return -EOPNOTSUPP; +} + +static inline int unregister_kho_notifier(struct notifier_block *nb) +{ + return -EOPNOTSUPP; +} + +static inline void kho_memory_init(void) +{ +} +#endif /* CONFIG_KEXEC_HANDOVER */ + +#endif /* LINUX_KEXEC_HANDOVER_H */ -- cgit v1.2.3 From c609c144b0e8dbc19712ff8c8a0929be38afe58d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 9 May 2025 00:46:23 -0700 Subject: kexec: add KHO parsing support When we have a KHO kexec, we get an FDT blob and scratch region to populate the state of the system. Provide helper functions that allow architecture code to easily handle memory reservations based on them and give device drivers visibility into the KHO FDT and memory reservations so they can recover their own state. Include a fix from Arnd Bergmann https://lore.kernel.org/lkml/20250424093302.3894961-1-arnd@kernel.org/. Link: https://lkml.kernel.org/r/20250509074635.3187114-6-changyuanl@google.com Signed-off-by: Alexander Graf Signed-off-by: Arnd Bergmann Co-developed-by: Mike Rapoport (Microsoft) Signed-off-by: Mike Rapoport (Microsoft) Co-developed-by: Changyuan Lyu Signed-off-by: Changyuan Lyu Cc: Andy Lutomirski Cc: Anthony Yznaga Cc: Ashish Kalra Cc: Ben Herrenschmidt Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David Woodhouse Cc: Eric Biederman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Gowans Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Krzysztof Kozlowski Cc: Marc Rutland Cc: Paolo Bonzini Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Pratyush Yadav Cc: Rob Herring Cc: Saravana Kannan Cc: Stanislav Kinsburskii Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Thomas Lendacky Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/kexec_handover.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index 2e19004776f6..02dcfc8c427e 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -24,11 +24,15 @@ struct kho_serialization; bool kho_is_enabled(void); int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); +int kho_retrieve_subtree(const char *name, phys_addr_t *phys); int register_kho_notifier(struct notifier_block *nb); int unregister_kho_notifier(struct notifier_block *nb); void kho_memory_init(void); + +void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys, + u64 scratch_len); #else static inline bool kho_is_enabled(void) { @@ -41,6 +45,11 @@ static inline int kho_add_subtree(struct kho_serialization *ser, return -EOPNOTSUPP; } +static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys) +{ + return -EOPNOTSUPP; +} + static inline int register_kho_notifier(struct notifier_block *nb) { return -EOPNOTSUPP; @@ -54,6 +63,11 @@ static inline int unregister_kho_notifier(struct notifier_block *nb) static inline void kho_memory_init(void) { } + +static inline void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, + phys_addr_t scratch_phys, u64 scratch_len) +{ +} #endif /* CONFIG_KEXEC_HANDOVER */ #endif /* LINUX_KEXEC_HANDOVER_H */ -- cgit v1.2.3 From fc33e4b44b2717feba2f6f07ce7943a96499c9ec Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Fri, 9 May 2025 00:46:24 -0700 Subject: kexec: enable KHO support for memory preservation Introduce APIs allowing KHO users to preserve memory across kexec and get access to that memory after boot of the kexeced kernel kho_preserve_folio() - record a folio to be preserved over kexec kho_restore_folio() - recreates the folio from the preserved memory kho_preserve_phys() - record physically contiguous range to be preserved over kexec. The memory preservations are tracked by two levels of xarrays to manage chunks of per-order 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a 1TB x86 system would fit inside a single 512 byte bitmap. For order 0 allocations each bitmap will cover 16M of address space. Thus, for 16G of memory at most 512K of bitmap memory will be needed for order 0. At serialization time all bitmaps are recorded in a linked list of pages for the next kernel to process and the physical address of the list is recorded in KHO FDT. The next kernel then processes that list, reserves the memory ranges and later, when a user requests a folio or a physical range, KHO restores corresponding memory map entries. Link: https://lkml.kernel.org/r/20250509074635.3187114-7-changyuanl@google.com Suggested-by: Jason Gunthorpe Signed-off-by: Mike Rapoport (Microsoft) Co-developed-by: Changyuan Lyu Signed-off-by: Changyuan Lyu Cc: Alexander Graf Cc: Andy Lutomirski Cc: Anthony Yznaga Cc: Arnd Bergmann Cc: Ashish Kalra Cc: Ben Herrenschmidt Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David Woodhouse Cc: Eric Biederman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Gowans Cc: Jonathan Corbet Cc: Krzysztof Kozlowski Cc: Marc Rutland Cc: Paolo Bonzini Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Pratyush Yadav Cc: Rob Herring Cc: Saravana Kannan Cc: Stanislav Kinsburskii Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Thomas Lendacky Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/kexec_handover.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index 02dcfc8c427e..348844cffb13 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -16,13 +16,34 @@ enum kho_event { KEXEC_KHO_ABORT = 1, }; +struct folio; struct notifier_block; +#define DECLARE_KHOSER_PTR(name, type) \ + union { \ + phys_addr_t phys; \ + type ptr; \ + } name +#define KHOSER_STORE_PTR(dest, val) \ + ({ \ + typeof(val) v = val; \ + typecheck(typeof((dest).ptr), v); \ + (dest).phys = virt_to_phys(v); \ + }) +#define KHOSER_LOAD_PTR(src) \ + ({ \ + typeof(src) s = src; \ + (typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \ + }) + struct kho_serialization; #ifdef CONFIG_KEXEC_HANDOVER bool kho_is_enabled(void); +int kho_preserve_folio(struct folio *folio); +int kho_preserve_phys(phys_addr_t phys, size_t size); +struct folio *kho_restore_folio(phys_addr_t phys); int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); int kho_retrieve_subtree(const char *name, phys_addr_t *phys); @@ -39,6 +60,21 @@ static inline bool kho_is_enabled(void) return false; } +static inline int kho_preserve_folio(struct folio *folio) +{ + return -EOPNOTSUPP; +} + +static inline int kho_preserve_phys(phys_addr_t phys, size_t size) +{ + return -EOPNOTSUPP; +} + +static inline struct folio *kho_restore_folio(phys_addr_t phys) +{ + return NULL; +} + static inline int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt) { -- cgit v1.2.3 From 3bdecc3c93f9f68d11ed54971dde169b6ead9d78 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 9 May 2025 00:46:25 -0700 Subject: kexec: add KHO support to kexec file loads Kexec has 2 modes: A user space driven mode and a kernel driven mode. For the kernel driven mode, kernel code determines the physical addresses of all target buffers that the payload gets copied into. With KHO, we can only safely copy payloads into the "scratch area". Teach the kexec file loader about it, so it only allocates for that area. In addition, enlighten it with support to ask the KHO subsystem for its respective payloads to copy into target memory. Also teach the KHO subsystem how to fill the images for file loads. Link: https://lkml.kernel.org/r/20250509074635.3187114-8-changyuanl@google.com Signed-off-by: Alexander Graf Co-developed-by: Mike Rapoport (Microsoft) Signed-off-by: Mike Rapoport (Microsoft) Co-developed-by: Changyuan Lyu Signed-off-by: Changyuan Lyu Cc: Andy Lutomirski Cc: Anthony Yznaga Cc: Arnd Bergmann Cc: Ashish Kalra Cc: Ben Herrenschmidt Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David Woodhouse Cc: Eric Biederman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Gowans Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Krzysztof Kozlowski Cc: Marc Rutland Cc: Paolo Bonzini Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Pratyush Yadav Cc: Rob Herring Cc: Saravana Kannan Cc: Stanislav Kinsburskii Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Thomas Lendacky Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/kexec.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c8971861521a..075255de8154 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -371,6 +371,11 @@ struct kimage { size_t ima_buffer_size; #endif + struct { + struct kexec_segment *scratch; + phys_addr_t fdt; + } kho; + /* Core ELF header buffer */ void *elf_headers; unsigned long elf_headers_sz; -- cgit v1.2.3 From f88ce2c84a341f44a7d00bc10868714bc4751f7e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 23 Apr 2025 14:33:37 +0100 Subject: mm: introduce for_each_valid_pfn() and use it from reserve_bootmem_region() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: Introduce for_each_valid_pfn()", v4. There are cases where a naïve loop over a PFN range, calling pfn_valid() on each one, is horribly inefficient. Ruihan Li reported the case where memmap_init() iterates all the way from zero to a potentially large value of ARCH_PFN_OFFSET, and we at Amazon found the reserve_bootmem_region() one as it affects hypervisor live update. Others are more cosmetic. By introducing a for_each_valid_pfn() helper it can optimise away a lot of pointless calls to pfn_valid(), skipping immediately to the next valid PFN and also skipping *all* checks within a valid (sub)region according to the granularity of the memory model in use. This patch (of 7) Especially since commit 9092d4f7a1f8 ("memblock: update initialization of reserved pages"), the reserve_bootmem_region() function can spend a significant amount of time iterating over every 4KiB PFN in a range, calling pfn_valid() on each one, and ultimately doing absolutely nothing. On a platform used for virtualization, with large NOMAP regions that eventually get used for guest RAM, this leads to a significant increase in steal time experienced during kexec for a live update. Introduce for_each_valid_pfn() and use it from reserve_bootmem_region(). This implementation is precisely the same naïve loop that the functio used to have, but subsequent commits will provide optimised versions for FLATMEM and SPARSEMEM, and this version will remain for those architectures which provide their own pfn_valid() implementation, until/unless they also provide a matching for_each_valid_pfn(). Link: https://lkml.kernel.org/r/20250423133821.789413-1-dwmw2@infradead.org Link: https://lkml.kernel.org/r/20250423133821.789413-2-dwmw2@infradead.org Signed-off-by: David Woodhouse Reviewed-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Marc Rutland Cc: Marc Zyngier Cc: Ruihan Li Cc: Will Deacon Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6ccec1bf2896..230a29c2ed1a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2177,6 +2177,16 @@ void sparse_init(void); #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ +/* + * Fallback case for when the architecture provides its own pfn_valid() but + * not a corresponding for_each_valid_pfn(). + */ +#ifndef for_each_valid_pfn +#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn) \ + for ((_pfn) = (_start_pfn); (_pfn) < (_end_pfn); (_pfn)++) \ + if (pfn_valid(_pfn)) +#endif + #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ -- cgit v1.2.3 From 928930c2e0a8e1d9252aedb1fa9be83c3669dfd7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 23 Apr 2025 14:33:38 +0100 Subject: mm: implement for_each_valid_pfn() for CONFIG_FLATMEM In the FLATMEM case, the default pfn_valid() just checks that the PFN is within the range [ ARCH_PFN_OFFSET .. ARCH_PFN_OFFSET + max_mapnr ). The for_each_valid_pfn() function can therefore be a simple for() loop using those as min/max respectively. Link: https://lkml.kernel.org/r/20250423133821.789413-3-dwmw2@infradead.org Signed-off-by: David Woodhouse Reviewed-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Marc Rutland Cc: Marc Zyngier Cc: Ruihan Li Cc: Will Deacon Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/asm-generic/memory_model.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index a3b5029aebbd..74d0077cc5fa 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -30,7 +30,15 @@ static inline int pfn_valid(unsigned long pfn) return pfn >= pfn_offset && (pfn - pfn_offset) < max_mapnr; } #define pfn_valid pfn_valid -#endif + +#ifndef for_each_valid_pfn +#define for_each_valid_pfn(pfn, start_pfn, end_pfn) \ + for ((pfn) = max_t(unsigned long, (start_pfn), ARCH_PFN_OFFSET); \ + (pfn) < min_t(unsigned long, (end_pfn), \ + ARCH_PFN_OFFSET + max_mapnr); \ + (pfn)++) +#endif /* for_each_valid_pfn */ +#endif /* valid_pfn */ #elif defined(CONFIG_SPARSEMEM_VMEMMAP) -- cgit v1.2.3 From 037926316c9ded1194927ee56b862e3a1450aaf3 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 23 Apr 2025 14:33:39 +0100 Subject: mm: implement for_each_valid_pfn() for CONFIG_SPARSEMEM Implement for_each_valid_pfn() based on two helper functions. The first_valid_pfn() function largely mirrors pfn_valid(), calling into a pfn_section_first_valid() helper which is trivial for the !VMEMMAP case, and in the VMEMMAP case will skip to the next subsection as needed. Since next_valid_pfn() knows that its argument *is* a valid PFN, it doesn't need to do any checking at all while iterating over the low bits within a (sub)section mask; the whole (sub)section is either present or not. Note that the VMEMMAP version of pfn_section_first_valid() may return a value *higher* than end_pfn when skipping to the next subsection, and first_valid_pfn() happily returns that higher value. This is fine. [dwmw2@infradead.org: fix next_valid_pfn() for sparsemem] Link: https://lkml.kernel.org/r/c15100fcf6781a60b852c4dbb43bdc98a678fcf0.camel@infradead.org Link: https://lkml.kernel.org/r/20250423133821.789413-4-dwmw2@infradead.org Signed-off-by: David Woodhouse Reviewed-by: Mike Rapoport (Microsoft) Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: David Hildenbrand Cc: Marc Rutland Cc: Marc Zyngier Cc: Ruihan Li Cc: Will Deacon Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 230a29c2ed1a..b19a98c20de8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2075,11 +2075,37 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) return usage ? test_bit(idx, usage->subsection_map) : 0; } + +static inline bool pfn_section_first_valid(struct mem_section *ms, unsigned long *pfn) +{ + struct mem_section_usage *usage = READ_ONCE(ms->usage); + int idx = subsection_map_index(*pfn); + unsigned long bit; + + if (!usage) + return false; + + if (test_bit(idx, usage->subsection_map)) + return true; + + /* Find the next subsection that exists */ + bit = find_next_bit(usage->subsection_map, SUBSECTIONS_PER_SECTION, idx); + if (bit == SUBSECTIONS_PER_SECTION) + return false; + + *pfn = (*pfn & PAGE_SECTION_MASK) + (bit * PAGES_PER_SUBSECTION); + return true; +} #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { return 1; } + +static inline bool pfn_section_first_valid(struct mem_section *ms, unsigned long *pfn) +{ + return true; +} #endif void sparse_init_early_section(int nid, struct page *map, unsigned long pnum, @@ -2128,6 +2154,58 @@ static inline int pfn_valid(unsigned long pfn) return ret; } + +/* Returns end_pfn or higher if no valid PFN remaining in range */ +static inline unsigned long first_valid_pfn(unsigned long pfn, unsigned long end_pfn) +{ + unsigned long nr = pfn_to_section_nr(pfn); + + rcu_read_lock_sched(); + + while (nr <= __highest_present_section_nr && pfn < end_pfn) { + struct mem_section *ms = __pfn_to_section(pfn); + + if (valid_section(ms) && + (early_section(ms) || pfn_section_first_valid(ms, &pfn))) { + rcu_read_unlock_sched(); + return pfn; + } + + /* Nothing left in this section? Skip to next section */ + nr++; + pfn = section_nr_to_pfn(nr); + } + + rcu_read_unlock_sched(); + return end_pfn; +} + +static inline unsigned long next_valid_pfn(unsigned long pfn, unsigned long end_pfn) +{ + pfn++; + + if (pfn >= end_pfn) + return end_pfn; + + /* + * Either every PFN within the section (or subsection for VMEMMAP) is + * valid, or none of them are. So there's no point repeating the check + * for every PFN; only call first_valid_pfn() again when crossing a + * (sub)section boundary (i.e. !(pfn & ~PAGE_{SUB,}SECTION_MASK)). + */ + if (pfn & ~(IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP) ? + PAGE_SUBSECTION_MASK : PAGE_SECTION_MASK)) + return pfn; + + return first_valid_pfn(pfn, end_pfn); +} + + +#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn) \ + for ((_pfn) = first_valid_pfn((_start_pfn), (_end_pfn)); \ + (_pfn) < (_end_pfn); \ + (_pfn) = next_valid_pfn((_pfn), (_end_pfn))) + #endif static inline int pfn_in_present_section(unsigned long pfn) -- cgit v1.2.3 From 4428a35f91f0f0e31d874038b3091e1c5a461f34 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Thu, 24 Apr 2025 23:56:06 +0800 Subject: mm/rmap: inline folio_test_large_maybe_mapped_shared() into callers To prevent the function from being used when CONFIG_MM_ID is disabled, we intend to inline it into its few callers, which also would help maintain the expected code placement. Link: https://lkml.kernel.org/r/20250424155606.57488-1-lance.yang@linux.dev Signed-off-by: Lance Yang Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Mingzhe Yang Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- include/linux/page-flags.h | 4 ---- include/linux/rmap.h | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9b701cfbef22..21dd110b6655 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2111,7 +2111,7 @@ static inline bool folio_maybe_mapped_shared(struct folio *folio) */ if (mapcount <= 1) return false; - return folio_test_large_maybe_mapped_shared(folio); + return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids); } #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index d3909cb1e576..37b11f15dbd9 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1230,10 +1230,6 @@ static inline int folio_has_private(const struct folio *folio) return !!(folio->flags & PAGE_FLAGS_PRIVATE); } -static inline bool folio_test_large_maybe_mapped_shared(const struct folio *folio) -{ - return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids); -} #undef PF_ANY #undef PF_HEAD #undef PF_NO_TAIL diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 6b82b618846e..c4f4903b1088 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -223,7 +223,7 @@ static inline void __folio_large_mapcount_sanity_checks(const struct folio *foli VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY && folio->_mm_id_mapcount[1] < 0); VM_WARN_ON_ONCE(!folio_mapped(folio) && - folio_test_large_maybe_mapped_shared(folio)); + test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids)); } static __always_inline void folio_set_large_mapcount(struct folio *folio, -- cgit v1.2.3 From 3fc567e4c0b71d6c59ba26c5d6e54cf3c490dd3a Mon Sep 17 00:00:00 2001 From: Libo Chen Date: Wed, 23 Apr 2025 19:45:23 -0700 Subject: sched/numa: add tracepoint that tracks the skipping of numa balancing due to cpuset memory pinning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike sched_skip_vma_numa tracepoint which tracks skipped VMAs, this tracks the task subjected to cpuset.mems pinning and prints out its allowed memory node mask. Link: https://lkml.kernel.org/r/20250424024523.2298272-3-libo.chen@oracle.com Signed-off-by: Libo Chen Cc: "Chen, Tim C" Cc: Chen Yu Cc: Chris Hyser Cc: Daniel Jordan Cc: Ingo Molnar Cc: Juri Lelli Cc: K Prateek Nayak Cc: Lorenzo Stoakes Cc: Madadi Vineeth Reddy Cc: Mel Gorman Cc: Michal Koutný Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Srikanth Aithal Cc: Steven Rostedt Cc: Tejun Heo Cc: Venkat Rao Bagalkote Cc: Vincent Guittot Signed-off-by: Andrew Morton --- include/trace/events/sched.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8994e97d86c1..ff3990318aec 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -745,6 +745,39 @@ TRACE_EVENT(sched_skip_vma_numa, __entry->vm_end, __print_symbolic(__entry->reason, NUMAB_SKIP_REASON)) ); + +TRACE_EVENT(sched_skip_cpuset_numa, + + TP_PROTO(struct task_struct *tsk, nodemask_t *mem_allowed_ptr), + + TP_ARGS(tsk, mem_allowed_ptr), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( pid_t, tgid ) + __field( pid_t, ngid ) + __array( unsigned long, mem_allowed, BITS_TO_LONGS(MAX_NUMNODES)) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = task_pid_nr(tsk); + __entry->tgid = task_tgid_nr(tsk); + __entry->ngid = task_numa_group_id(tsk); + BUILD_BUG_ON(sizeof(nodemask_t) != \ + BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long)); + memcpy(__entry->mem_allowed, mem_allowed_ptr->bits, + sizeof(__entry->mem_allowed)); + ), + + TP_printk("comm=%s pid=%d tgid=%d ngid=%d mem_nodes_allowed=%*pbl", + __entry->comm, + __entry->pid, + __entry->tgid, + __entry->ngid, + MAX_NUMNODES, __entry->mem_allowed) +); #endif /* CONFIG_NUMA_BALANCING */ /* -- cgit v1.2.3 From 60309008e1e2b2d4bff3c2475b0c74faf395f787 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 28 Apr 2025 10:27:54 +0300 Subject: util_macros.h: make the header more resilient Add missing header inclusions. Link: https://lkml.kernel.org/r/20250428072754.3265274-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton --- include/linux/util_macros.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 3b570b765b75..76ca2b83c13e 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -2,7 +2,10 @@ #ifndef _LINUX_HELPER_MACROS_H_ #define _LINUX_HELPER_MACROS_H_ +#include #include +#include +#include /** * for_each_if - helper for handling conditionals in various for_each macros -- cgit v1.2.3 From 86ebd50224c0734d965843260d0dc057a9431c61 Mon Sep 17 00:00:00 2001 From: Shivank Garg Date: Wed, 30 Apr 2025 10:01:51 +0000 Subject: mm: add folio_expected_ref_count() for reference count calculation Patch series " JFS: Implement migrate_folio for jfs_metapage_aops" v5. This patchset addresses a warning that occurs during memory compaction due to JFS's missing migrate_folio operation. The warning was introduced by commit 7ee3647243e5 ("migrate: Remove call to ->writepage") which added explicit warnings when filesystem don't implement migrate_folio. The syzbot reported following [1]: jfs_metapage_aops does not implement migrate_folio WARNING: CPU: 1 PID: 5861 at mm/migrate.c:955 fallback_migrate_folio mm/migrate.c:953 [inline] WARNING: CPU: 1 PID: 5861 at mm/migrate.c:955 move_to_new_folio+0x70e/0x840 mm/migrate.c:1007 Modules linked in: CPU: 1 UID: 0 PID: 5861 Comm: syz-executor280 Not tainted 6.15.0-rc1-next-20250411-syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 RIP: 0010:fallback_migrate_folio mm/migrate.c:953 [inline] RIP: 0010:move_to_new_folio+0x70e/0x840 mm/migrate.c:1007 To fix this issue, this series implement metapage_migrate_folio() for JFS which handles both single and multiple metapages per page configurations. While most filesystems leverage existing migration implementations like filemap_migrate_folio(), buffer_migrate_folio_norefs() or buffer_migrate_folio() (which internally used folio_expected_refs()), JFS's metapage architecture requires special handling of its private data during migration. To support this, this series introduce the folio_expected_ref_count(), which calculates external references to a folio from page/swap cache, private data, and page table mappings. This standardized implementation replaces the previous ad-hoc folio_expected_refs() function and enables JFS to accurately determine whether a folio has unexpected references before attempting migration. Implement folio_expected_ref_count() to calculate expected folio reference counts from: - Page/swap cache (1 per page) - Private data (1) - Page table mappings (1 per map) While originally needed for page migration operations, this improved implementation standardizes reference counting by consolidating all refcount contributors into a single, reusable function that can benefit any subsystem needing to detect unexpected references to folios. The folio_expected_ref_count() returns the sum of these external references without including any reference the caller itself might hold. Callers comparing against the actual folio_ref_count() must account for their own references separately. Link: https://syzkaller.appspot.com/bug?extid=8bb6fd945af4e0ad9299 [1] Link: https://lkml.kernel.org/r/20250430100150.279751-1-shivankg@amd.com Link: https://lkml.kernel.org/r/20250430100150.279751-2-shivankg@amd.com Signed-off-by: David Hildenbrand Signed-off-by: Shivank Garg Suggested-by: Matthew Wilcox Co-developed-by: David Hildenbrand Cc: Alistair Popple Cc: Dave Kleikamp Cc: Donet Tom Cc: Jane Chu Cc: Kefeng Wang Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 21dd110b6655..1d1953e37baa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2114,6 +2114,61 @@ static inline bool folio_maybe_mapped_shared(struct folio *folio) return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids); } +/** + * folio_expected_ref_count - calculate the expected folio refcount + * @folio: the folio + * + * Calculate the expected folio refcount, taking references from the pagecache, + * swapcache, PG_private and page table mappings into account. Useful in + * combination with folio_ref_count() to detect unexpected references (e.g., + * GUP or other temporary references). + * + * Does currently not consider references from the LRU cache. If the folio + * was isolated from the LRU (which is the case during migration or split), + * the LRU cache does not apply. + * + * Calling this function on an unmapped folio -- !folio_mapped() -- that is + * locked will return a stable result. + * + * Calling this function on a mapped folio will not result in a stable result, + * because nothing stops additional page table mappings from coming (e.g., + * fork()) or going (e.g., munmap()). + * + * Calling this function without the folio lock will also not result in a + * stable result: for example, the folio might get dropped from the swapcache + * concurrently. + * + * However, even when called without the folio lock or on a mapped folio, + * this function can be used to detect unexpected references early (for example, + * if it makes sense to even lock the folio and unmap it). + * + * The caller must add any reference (e.g., from folio_try_get()) it might be + * holding itself to the result. + * + * Returns the expected folio refcount. + */ +static inline int folio_expected_ref_count(const struct folio *folio) +{ + const int order = folio_order(folio); + int ref_count = 0; + + if (WARN_ON_ONCE(folio_test_slab(folio))) + return 0; + + if (folio_test_anon(folio)) { + /* One reference per page from the swapcache. */ + ref_count += folio_test_swapcache(folio) << order; + } else if (!((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS)) { + /* One reference per page from the pagecache. */ + ref_count += !!folio->mapping << order; + /* One reference from PG_private. */ + ref_count += folio_test_private(folio); + } + + /* One reference per page table mapping. */ + return ref_count + folio_mapcount(folio); +} + #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE static inline int arch_make_folio_accessible(struct folio *folio) { -- cgit v1.2.3 From 6c36ac1e124f1be97cf0485a220865fce5a2020d Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 28 Apr 2025 16:28:14 +0100 Subject: mm: establish mm/vma_exec.c for shared exec/mm VMA functionality Patch series "move all VMA allocation, freeing and duplication logic to mm", v3. Currently VMA allocation, freeing and duplication exist in kernel/fork.c, which is a violation of separation of concerns, and leaves these functions exposed to the rest of the kernel when they are in fact internal implementation details. Resolve this by moving this logic to mm, and making it internal to vma.c, vma.h. This also allows us, in future, to provide userland testing around this functionality. We additionally abstract dup_mmap() to mm, being careful to ensure kernel/fork.c acceses this via the mm internal header so it is not exposed elsewhere in the kernel. As part of this change, also abstract initial stack allocation performed in __bprm_mm_init() out of fs code into mm via the create_init_stack_vma(), as this code uses vm_area_alloc() and vm_area_free(). In order to do so sensibly, we introduce a new mm/vma_exec.c file, which contains the code that is shared by mm and exec. This file is added to both memory mapping and exec sections in MAINTAINERS so both sets of maintainers can maintain oversight. As part of this change, we also move relocate_vma_down() to mm/vma_exec.c so all shared mm/exec functionality is kept in one place. We add code shared between nommu and mmu-enabled configurations in order to share VMA allocation, freeing and duplication code correctly while also keeping these functions available in userland VMA testing. This is achieved by adding a mm/vma_init.c file which is also compiled by the userland tests. This patch (of 4): There is functionality that overlaps the exec and memory mapping subsystems. While it properly belongs in mm, it is important that exec maintainers maintain oversight of this functionality correctly. We can establish both goals by adding a new mm/vma_exec.c file which contains these 'glue' functions, and have fs/exec.c import them. As a part of this change, to ensure that proper oversight is achieved, add the file to both the MEMORY MAPPING and EXEC & BINFMT API, ELF sections. scripts/get_maintainer.pl can correctly handle files in multiple entries and this neatly handles the cross-over. [akpm@linux-foundation.org: fix comment typo] Link: https://lkml.kernel.org/r/80f0d0c6-0b68-47f9-ab78-0ab7f74677fc@lucifer.local Link: https://lkml.kernel.org/r/cover.1745853549.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/91f2cee8f17d65214a9d83abb7011aa15f1ea690.1745853549.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Suren Baghdasaryan Reviewed-by: Pedro Falcato Reviewed-by: David Hildenbrand Reviewed-by: Kees Cook Reviewed-by: Vlastimil Babka Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1d1953e37baa..43748c8f3454 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3278,7 +3278,6 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void exit_mmap(struct mm_struct *); -int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift); bool mmap_read_lock_maybe_expand(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, bool write); -- cgit v1.2.3 From 69eadd6a05409ca3725cabf8d60ccf6c8f87e193 Mon Sep 17 00:00:00 2001 From: Guilherme Giacomo Simoes Date: Mon, 28 Apr 2025 17:14:09 -0300 Subject: mm: page-flags-layout.h: change the KASAN_TAG_WIDTH for HW_TAGS KASAN_TAG_WIDTH is 8 bits for both (HW_TAGS and SW_TAGS), but for HW_TAGS the KASAN_TAG_WIDTH can be 4 bits bits because due to the design of the MTE the memory words for storing metadata only need 4 bits. Change the preprocessor define KASAN_TAG_WIDTH for check if SW_TAGS is define, so KASAN_TAG_WIDTH should be 8 bits, but if HW_TAGS is define, so KASAN_TAG_WIDTH should be 4 bits to save a few flags bits. Link: https://lkml.kernel.org/r/20250428201409.5482-1-trintaeoitogc@gmail.com Signed-off-by: Guilherme Giacomo Simoes Suggested-by: Andrey Konovalov Reviewed-by: Andrey Konovalov Cc: Pasha Tatashin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/page-flags-layout.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 4f5c9e979bb9..760006b1c480 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -72,8 +72,10 @@ #define NODE_NOT_IN_PAGE_FLAGS 1 #endif -#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) +#if defined(CONFIG_KASAN_SW_TAGS) #define KASAN_TAG_WIDTH 8 +#elif defined(CONFIG_KASAN_HW_TAGS) +#define KASAN_TAG_WIDTH 4 #else #define KASAN_TAG_WIDTH 0 #endif -- cgit v1.2.3 From 2b80f633c360ce3c56a7071782eae70852c8f344 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Thu, 1 May 2025 02:10:50 +0800 Subject: filemap: do not use folio_contains for swap cache folios Currently, none of the folio_contains callers should encounter swap cache folios. For fs/ callers, swap cache folios are never part of their workflow. For filemap and truncate, folio_contains is only used for sanity checks to verify the folio index matches the expected lookup / invalidation target. The swap cache does not utilize filemap or truncate helpers in ways that would trigger these checks, as it mostly implements its own cache management. Shmem won't trigger these sanity checks either unless thing went wrong, as it would directly trigger a BUG because swap cache index are unrelated and almost never matches shmem index. Shmem have to handle mixed values of folios, shadows, and swap entries, so it has its own way of handling the mapping. While some filemap helpers works for swap cache space, the swap cache is different from the page cache in many ways. So this particular helper will unlikely to work in a helpful way for swap cache folios. So make it explicit here that folio_contains should not be used for swap cache folios. This helps to avoid misuse, make swap cache less exposed and remove the folio_index usage here. [akpm@linux-foundation.org: s/VM_WARN_ON_FOLIO/VM_WARN_ON_ONCE_FOLIO/, per Kairui] Link: https://lkml.kernel.org/r/20250430181052.55698-5-ryncsn@gmail.com Signed-off-by: Kairui Song Acked-by: David Hildenbrand Cc: Chao Yu Cc: Chris Li Cc: Chris Mason Cc: Christian Brauner Cc: David Sterba Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Jaegeuk Kim Cc: Joanne Koong Cc: Johannes Weiner Cc: Josef Bacik Cc: Matthew Wilcox (Oracle) Cc: Miklos Szeredi Cc: Nhat Pham Cc: Qu Wenruo Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index af25fb640463..b2c2ff8de046 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -935,14 +935,14 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) * @folio: The folio. * @index: The page index within the file. * - * Context: The caller should have the page locked in order to prevent - * (eg) shmem from moving the page between the page cache and swap cache - * and changing its index in the middle of the operation. + * Context: The caller should have the folio locked and ensure + * e.g., shmem did not move this folio to the swap cache. * Return: true or false. */ static inline bool folio_contains(struct folio *folio, pgoff_t index) { - return index - folio_index(folio) < folio_nr_pages(folio); + VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio); + return index - folio->index < folio_nr_pages(folio); } unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, -- cgit v1.2.3 From 7d0f0f06153116bb737eef76d47406639e161613 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Thu, 1 May 2025 02:10:51 +0800 Subject: mm: move folio_index to mm/swap.h and remove no longer needed helper There are no remaining users of folio_index() outside the mm subsystem. Move it to mm/swap.h to co-locate it with swap_cache_index(), eliminating a forward declaration, and a function call overhead. Also remove the helper that was used to fix circular header dependency issue. Link: https://lkml.kernel.org/r/20250430181052.55698-6-ryncsn@gmail.com Signed-off-by: Kairui Song Acked-by: David Hildenbrand Cc: Chao Yu Cc: Chris Li Cc: Chris Mason Cc: Christian Brauner Cc: David Sterba Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Jaegeuk Kim Cc: Joanne Koong Cc: Johannes Weiner Cc: Josef Bacik Cc: Matthew Wilcox (Oracle) Cc: Miklos Szeredi Cc: Nhat Pham Cc: Qu Wenruo Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b2c2ff8de046..5d66786867eb 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -884,26 +884,6 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, mapping_gfp_mask(mapping)); } -extern pgoff_t __folio_swap_cache_index(struct folio *folio); - -/** - * folio_index - File index of a folio. - * @folio: The folio. - * - * For a folio which is either in the page cache or the swap cache, - * return its index within the address_space it belongs to. If you know - * the page is definitely in the page cache, you can look at the folio's - * index directly. - * - * Return: The index (offset in units of pages) of a folio in its file. - */ -static inline pgoff_t folio_index(struct folio *folio) -{ - if (unlikely(folio_test_swapcache(folio))) - return __folio_swap_cache_index(folio); - return folio->index; -} - /** * folio_next_index - Get the index of the next folio. * @folio: The current folio. -- cgit v1.2.3 From dd309bfc68efc4522b4d33a95e92afff249e103b Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Thu, 1 May 2025 02:10:52 +0800 Subject: mm, swap: remove no longer used swap mapping helper This helper existed to fix the circular header dependency issue but it is no longer used since commit 0d40cfe63a2f ("fs: remove folio_file_mapping()"), remove it. Link: https://lkml.kernel.org/r/20250430181052.55698-7-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Chao Yu Cc: Chris Li Cc: Chris Mason Cc: Christian Brauner Cc: David Sterba Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Jaegeuk Kim Cc: Joanne Koong Cc: Johannes Weiner Cc: Josef Bacik Cc: Miklos Szeredi Cc: Nhat Pham Cc: Qu Wenruo Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 5d66786867eb..d2ced9920992 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -533,7 +533,6 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) } struct address_space *folio_mapping(struct folio *); -struct address_space *swapcache_mapping(struct folio *); /** * folio_flush_mapping - Find the file mapping this folio belongs to. -- cgit v1.2.3 From 0aa7b390fc40a871267a2328bbbefca8b37ad307 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 24 Apr 2025 16:25:25 +0300 Subject: mtd: core: always create master device Create master device without partition when CONFIG_MTD_PARTITIONED_MASTER flag is unset. This streamlines device tree and allows to anchor runtime power management on master device in all cases. Signed-off-by: Alexander Usyskin Signed-off-by: Miquel Raynal --- include/linux/mtd/partitions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index b74a539ec581..5daf80df9e89 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -108,7 +108,7 @@ extern void deregister_mtd_parser(struct mtd_part_parser *parser); deregister_mtd_parser) int mtd_add_partition(struct mtd_info *master, const char *name, - long long offset, long long length); + long long offset, long long length, struct mtd_info **part); int mtd_del_partition(struct mtd_info *master, int partno); uint64_t mtd_get_device_size(const struct mtd_info *mtd); -- cgit v1.2.3 From 598995027b9181ada81789bf01fb8ef30d93c9dc Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Tue, 6 May 2025 21:57:31 +0100 Subject: soc: samsung: exynos-pmu: enable CPU hotplug support for gs101 Some additional register writes are required when hotplugging CPUs on gs101, without these the system hangs when hotplugging. Specifically a CPU_INFORM register needs to be programmed with a hint value which is used by the EL3 firmware (el3mon) and the pmu-intr-gen registers need to be programmed. With this patch applied, and corresponding DT update CPU hotplug now works as expected. e.g. echo 0 > /sys/devices/system/cpu/cpu6/online echo 1 > /sys/devices/system/cpu/cpu6/online Note: to maintain compatibility with older DTs that didn't specify pmu-intr-gen phandle only a warning is issued if the syscon can't be obtained. Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250506-contrib-pg-cpu-hotplug-suspend2ram-fixes-v1-v4-5-9f64a2657316@linaro.org [krzk: few blank line and white-space alignment fixes from checkpatch] Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/exynos-regs-pmu.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index ce1a3790d6fb..0d5a17ea8fb8 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -658,9 +658,20 @@ #define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION (0x32A8) /* For Tensor GS101 */ +/* PMU ALIVE */ #define GS101_SYSIP_DAT0 (0x810) +#define GS101_CPU0_INFORM (0x860) +#define GS101_CPU_INFORM(cpu) \ + (GS101_CPU0_INFORM + (cpu*4)) #define GS101_SYSTEM_CONFIGURATION (0x3A00) #define GS101_PHY_CTRL_USB20 (0x3EB0) #define GS101_PHY_CTRL_USBDP (0x3EB4) +/* PMU INTR GEN */ +#define GS101_GRP1_INTR_BID_UPEND (0x0108) +#define GS101_GRP1_INTR_BID_CLEAR (0x010c) +#define GS101_GRP2_INTR_BID_ENABLE (0x0200) +#define GS101_GRP2_INTR_BID_UPEND (0x0208) +#define GS101_GRP2_INTR_BID_CLEAR (0x020c) + #endif /* __LINUX_SOC_EXYNOS_REGS_PMU_H */ -- cgit v1.2.3 From 8c619eb21b8e87ae95877e9cca9fcb0e3115776e Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sun, 4 May 2025 10:14:34 +0200 Subject: net: phy: micrel: remove KSZ9477 EEE quirks now handled by phylink The KSZ9477 PHY driver contained workarounds for broken EEE capability advertisements by manually masking supported EEE modes and forcibly disabling EEE if MICREL_NO_EEE was set. With proper MAC-side EEE handling implemented via phylink, these quirks are no longer necessary. Remove MICREL_NO_EEE handling and the use of ksz9477_get_features(). This simplifies the PHY driver and avoids duplicated EEE management logic. Signed-off-by: Oleksij Rempel Cc: stable@vger.kernel.org # v6.14+ Link: https://patch.msgid.link/20250504081434.424489-3-o.rempel@pengutronix.de Signed-off-by: Paolo Abeni --- include/linux/micrel_phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 591bf5b5e8dc..9af01bdd86d2 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -44,7 +44,6 @@ #define MICREL_PHY_50MHZ_CLK BIT(0) #define MICREL_PHY_FXEN BIT(1) #define MICREL_KSZ8_P1_ERRATA BIT(2) -#define MICREL_NO_EEE BIT(3) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC -- cgit v1.2.3 From 03e96b8c11d140fb4ead0b30c2d6e1a294b501ef Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 8 May 2025 00:48:21 +0000 Subject: netmem: add niov->type attribute to distinguish different net_iov types Later patches in the series adds TX net_iovs where there is no pp associated, so we can't rely on niov->pp->mp_ops to tell what is the type of the net_iov. Add a type enum to the net_iov which tells us the net_iov type. Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20250508004830.4100853-2-almasrymina@google.com Signed-off-by: Paolo Abeni --- include/net/netmem.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netmem.h b/include/net/netmem.h index c61d5b21e7b4..973fdbcfef38 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -20,8 +20,17 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers); */ #define NET_IOV 0x01UL +enum net_iov_type { + NET_IOV_DMABUF, + NET_IOV_IOURING, + + /* Force size to unsigned long to make the NET_IOV_ASSERTS below pass. + */ + NET_IOV_MAX = ULONG_MAX +}; + struct net_iov { - unsigned long __unused_padding; + enum net_iov_type type; unsigned long pp_magic; struct page_pool *pp; struct net_iov_area *owner; -- cgit v1.2.3 From e9f3d61db5cb29b3f17f0dc40c3ec2cda2ee93e5 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 8 May 2025 00:48:22 +0000 Subject: net: add get_netmem/put_netmem support Currently net_iovs support only pp ref counts, and do not support a page ref equivalent. This is fine for the RX path as net_iovs are used exclusively with the pp and only pp refcounting is needed there. The TX path however does not use pp ref counts, thus, support for get_page/put_page equivalent is needed for netmem. Support get_netmem/put_netmem. Check the type of the netmem before passing it to page or net_iov specific code to obtain a page ref equivalent. For dmabuf net_iovs, we obtain a ref on the underlying binding. This ensures the entire binding doesn't disappear until all the net_iovs have been put_netmem'ed. We do not need to track the refcount of individual dmabuf net_iovs as we don't allocate/free them from a pool similar to what the buddy allocator does for pages. This code is written to be extensible by other net_iov implementers. get_netmem/put_netmem will check the type of the netmem and route it to the correct helper: pages -> [get|put]_page() dmabuf net_iovs -> net_devmem_[get|put]_net_iov() new net_iovs -> new helpers Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250508004830.4100853-3-almasrymina@google.com Signed-off-by: Paolo Abeni --- include/linux/skbuff_ref.h | 4 ++-- include/net/netmem.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 0f3c58007488..9e49372ef1a0 100644 --- a/include/linux/skbuff_ref.h +++ b/include/linux/skbuff_ref.h @@ -17,7 +17,7 @@ */ static inline void __skb_frag_ref(skb_frag_t *frag) { - get_page(skb_frag_page(frag)); + get_netmem(skb_frag_netmem(frag)); } /** @@ -40,7 +40,7 @@ static inline void skb_page_unref(netmem_ref netmem, bool recycle) if (recycle && napi_pp_put_page(netmem)) return; #endif - put_page(netmem_to_page(netmem)); + put_netmem(netmem); } /** diff --git a/include/net/netmem.h b/include/net/netmem.h index 973fdbcfef38..ecb6b29c93f6 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -273,4 +273,7 @@ static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) return __netmem_clear_lsb(netmem)->dma_addr; } +void get_netmem(netmem_ref netmem); +void put_netmem(netmem_ref netmem); + #endif /* _NET_NETMEM_H */ -- cgit v1.2.3 From 8802087d20c0e1c26c4b4fe30e22264bf8285e51 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 8 May 2025 00:48:23 +0000 Subject: net: devmem: TCP tx netlink api Add bind-tx netlink call to attach dmabuf for TX; queue is not required, only ifindex and dmabuf fd for attachment. Signed-off-by: Stanislav Fomichev Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20250508004830.4100853-4-almasrymina@google.com Signed-off-by: Paolo Abeni --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7600bf62dbdf..7eb9571786b8 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -219,6 +219,7 @@ enum { NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, + NETDEV_CMD_BIND_TX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From bd61848900bff597764238f3a8ec67c815cd316e Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 8 May 2025 00:48:24 +0000 Subject: net: devmem: Implement TX path Augment dmabuf binding to be able to handle TX. Additional to all the RX binding, we also create tx_vec needed for the TX path. Provide API for sendmsg to be able to send dmabufs bound to this device: - Provide a new dmabuf_tx_cmsg which includes the dmabuf to send from. - MSG_ZEROCOPY with SCM_DEVMEM_DMABUF cmsg indicates send from dma-buf. Devmem is uncopyable, so piggyback off the existing MSG_ZEROCOPY implementation, while disabling instances where MSG_ZEROCOPY falls back to copying. We additionally pipe the binding down to the new zerocopy_fill_skb_from_devmem which fills a TX skb with net_iov netmems instead of the traditional page netmems. We also special case skb_frag_dma_map to return the dma-address of these dmabuf net_iovs instead of attempting to map pages. The TX path may release the dmabuf in a context where we cannot wait. This happens when the user unbinds a TX dmabuf while there are still references to its netmems in the TX path. In that case, the netmems will be put_netmem'd from a context where we can't unmap the dmabuf, Resolve this by making __net_devmem_dmabuf_binding_free schedule_work'd. Based on work by Stanislav Fomichev . A lot of the meat of the implementation came from devmem TCP RFC v1[1], which included the TX path, but Stan did all the rebasing on top of netmem/net_iov. Cc: Stanislav Fomichev Signed-off-by: Kaiyuan Zhang Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250508004830.4100853-5-almasrymina@google.com Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 17 +++++++++++++---- include/net/sock.h | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f3e72be6f634..c7397b17bb08 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1707,13 +1707,16 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops; struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, - struct ubuf_info *uarg); + struct ubuf_info *uarg, bool devmem); void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); +struct net_devmem_dmabuf_binding; + int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, - size_t length); + size_t length, + struct net_devmem_dmabuf_binding *binding); int zerocopy_fill_skb_from_iter(struct sk_buff *skb, struct iov_iter *from, size_t length); @@ -1721,12 +1724,14 @@ int zerocopy_fill_skb_from_iter(struct sk_buff *skb, static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { - return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len); + return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len, + NULL); } int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, - struct ubuf_info *uarg); + struct ubuf_info *uarg, + struct net_devmem_dmabuf_binding *binding); /* Internal */ #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) @@ -3697,6 +3702,10 @@ static inline dma_addr_t __skb_frag_dma_map(struct device *dev, size_t offset, size_t size, enum dma_data_direction dir) { + if (skb_frag_is_net_iov(frag)) { + return netmem_to_net_iov(frag->netmem)->dma_addr + offset + + frag->offset; + } return dma_map_page(dev, skb_frag_page(frag), skb_frag_off(frag) + offset, size, dir); } diff --git a/include/net/sock.h b/include/net/sock.h index f0fabb9fd28a..3e15d7105ad2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1851,6 +1851,7 @@ struct sockcm_cookie { u32 tsflags; u32 ts_opt_id; u32 priority; + u32 dmabuf_id; }; static inline void sockcm_init(struct sockcm_cookie *sockc, -- cgit v1.2.3 From 383faec0fd64b9bff15eb5f700f023ec35520a96 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 8 May 2025 00:48:26 +0000 Subject: net: enable driver support for netmem TX Drivers need to make sure not to pass netmem dma-addrs to the dma-mapping API in order to support netmem TX. Add helpers and netmem_dma_*() helpers that enables special handling of netmem dma-addrs that drivers can use. Document in netmem.rst what drivers need to do to support netmem TX. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250508004830.4100853-7-almasrymina@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 ++ include/net/netmem.h | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 773167508c82..32a1e41636a9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1772,6 +1772,7 @@ enum netdev_reg_state { * @lltx: device supports lockless Tx. Deprecated for real HW * drivers. Mainly used by logical interfaces, such as * bonding and tunnels + * @netmem_tx: device support netmem_tx. * * @name: This is the first field of the "visible" part of this structure * (i.e. as seen by users in the "Space.c" file). It is the name @@ -2087,6 +2088,7 @@ struct net_device { struct_group(priv_flags_fast, unsigned long priv_flags:32; unsigned long lltx:1; + unsigned long netmem_tx:1; ); const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; diff --git a/include/net/netmem.h b/include/net/netmem.h index ecb6b29c93f6..386164fb9c18 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -8,6 +8,7 @@ #ifndef _NET_NETMEM_H #define _NET_NETMEM_H +#include #include #include @@ -276,4 +277,23 @@ static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) void get_netmem(netmem_ref netmem); void put_netmem(netmem_ref netmem); +#define netmem_dma_unmap_addr_set(NETMEM, PTR, ADDR_NAME, VAL) \ + do { \ + if (!netmem_is_net_iov(NETMEM)) \ + dma_unmap_addr_set(PTR, ADDR_NAME, VAL); \ + else \ + dma_unmap_addr_set(PTR, ADDR_NAME, 0); \ + } while (0) + +static inline void netmem_dma_unmap_page_attrs(struct device *dev, + dma_addr_t addr, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + if (!addr) + return; + + dma_unmap_page_attrs(dev, addr, size, dir, attrs); +} + #endif /* _NET_NETMEM_H */ -- cgit v1.2.3 From 4f8ceb0302b36c5f78bcc8d0e7cfa2372fba134c Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 29 Apr 2025 14:51:28 +0200 Subject: mfd: stm32-lptimer: Add support for stm32mp25 Add support for STM32MP25 SoC. A new hardware configuration register (HWCFGR2) has been added, to gather number of capture/compare channels, autonomous mode and input capture capability. The full feature set is implemented in LPTIM1/2/3/4. LPTIM5 supports a smaller set of features. This can now be read from HWCFGR registers. Add new registers to the stm32-lptimer.h: CCMR1, CCR2, HWCFGR1/2 and VERR. Update the stm32_lptimer data struct so signal the number of capture/compare channels to the child devices. Also Remove some unused bit masks (CMPOK_ARROK / CMPOKCF_ARROKCF). Signed-off-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20250429125133.1574167-3-fabrice.gasnier@foss.st.com Signed-off-by: Lee Jones --- include/linux/mfd/stm32-lptimer.h | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/stm32-lptimer.h b/include/linux/mfd/stm32-lptimer.h index 06d3f11dc3c9..a592c8dc716d 100644 --- a/include/linux/mfd/stm32-lptimer.h +++ b/include/linux/mfd/stm32-lptimer.h @@ -17,20 +17,30 @@ #define STM32_LPTIM_IER 0x08 /* Interrupt Enable Reg */ #define STM32_LPTIM_CFGR 0x0C /* Configuration Reg */ #define STM32_LPTIM_CR 0x10 /* Control Reg */ -#define STM32_LPTIM_CMP 0x14 /* Compare Reg */ +#define STM32_LPTIM_CMP 0x14 /* Compare Reg (MP25 CCR1) */ #define STM32_LPTIM_ARR 0x18 /* Autoreload Reg */ #define STM32_LPTIM_CNT 0x1C /* Counter Reg */ +#define STM32_LPTIM_CCMR1 0x2C /* Capture/Compare Mode MP25 */ +#define STM32_LPTIM_CCR2 0x34 /* Compare Reg2 MP25 */ + +#define STM32_LPTIM_HWCFGR2 0x3EC /* Hardware configuration register 2 - MP25 */ +#define STM32_LPTIM_HWCFGR1 0x3F0 /* Hardware configuration register 1 - MP15 */ +#define STM32_LPTIM_VERR 0x3F4 /* Version identification register - MP15 */ /* STM32_LPTIM_ISR - bit fields */ +#define STM32_LPTIM_DIEROK_ARROK (BIT(24) | BIT(4)) /* MP25 */ +#define STM32_LPTIM_CMP2_ARROK (BIT(19) | BIT(4)) #define STM32_LPTIM_CMPOK_ARROK GENMASK(4, 3) #define STM32_LPTIM_ARROK BIT(4) #define STM32_LPTIM_CMPOK BIT(3) /* STM32_LPTIM_ICR - bit fields */ -#define STM32_LPTIM_ARRMCF BIT(1) +#define STM32_LPTIM_DIEROKCF_ARROKCF (BIT(24) | BIT(4)) /* MP25 */ +#define STM32_LPTIM_CMP2OKCF_ARROKCF (BIT(19) | BIT(4)) #define STM32_LPTIM_CMPOKCF_ARROKCF GENMASK(4, 3) +#define STM32_LPTIM_ARRMCF BIT(1) -/* STM32_LPTIM_IER - bit flieds */ +/* STM32_LPTIM_IER - bit fields */ #define STM32_LPTIM_ARRMIE BIT(1) /* STM32_LPTIM_CR - bit fields */ @@ -53,16 +63,37 @@ /* STM32_LPTIM_ARR */ #define STM32_LPTIM_MAX_ARR 0xFFFF +/* STM32_LPTIM_CCMR1 */ +#define STM32_LPTIM_CC2P GENMASK(19, 18) +#define STM32_LPTIM_CC2E BIT(17) +#define STM32_LPTIM_CC2SEL BIT(16) +#define STM32_LPTIM_CC1P GENMASK(3, 2) +#define STM32_LPTIM_CC1E BIT(1) +#define STM32_LPTIM_CC1SEL BIT(0) + +/* STM32_LPTIM_HWCFGR1 */ +#define STM32_LPTIM_HWCFGR1_ENCODER BIT(16) + +/* STM32_LPTIM_HWCFGR2 */ +#define STM32_LPTIM_HWCFGR2_CHAN_NUM GENMASK(3, 0) + +/* STM32_LPTIM_VERR */ +#define STM32_LPTIM_VERR_23 0x23 /* STM32MP25 */ + /** * struct stm32_lptimer - STM32 Low-Power Timer data assigned by parent device * @clk: clock reference for this instance * @regmap: register map reference for this instance * @has_encoder: indicates this Low-Power Timer supports encoder mode + * @num_cc_chans: indicates the number of capture/compare channels + * @version: indicates the major and minor revision of the controller */ struct stm32_lptimer { struct clk *clk; struct regmap *regmap; bool has_encoder; + unsigned int num_cc_chans; + u32 version; }; #endif -- cgit v1.2.3 From 34a364ff04e960a4d47f558acf7fbafcc3085c1f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 9 May 2025 15:02:27 +0200 Subject: PM: sleep: Introduce pm_suspend_in_progress() Introduce pm_suspend_in_progress() to be used for checking if a system- wide suspend or resume transition is in progress, instead of comparing pm_suspend_target_state directly to PM_SUSPEND_ON, and use it where applicable. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Rodrigo Vivi Acked-by: Rodrigo Vivi Reviewed-by: Raag Jadav Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/2020901.PYKUYFuaPT@rjwysocki.net --- include/linux/suspend.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index da6ebca3ff77..52ea108f9451 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -298,6 +298,11 @@ static inline void s2idle_set_ops(const struct platform_s2idle_ops *ops) {} static inline void s2idle_wake(void) {} #endif /* !CONFIG_SUSPEND */ +static inline bool pm_suspend_in_progress(void) +{ + return pm_suspend_target_state != PM_SUSPEND_ON; +} + /* struct pbe is used for creating lists of pages that should be restored * atomically during the resume from disk, because the page frames they have * occupied before the suspend are in use. -- cgit v1.2.3 From 4a6b1cf0d4c02d6da2976c6314c264d20672937e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 May 2025 22:41:21 +0200 Subject: PM: EM: Introduce em_adjust_cpu_capacity() Add a function for updating the Energy Model for a CPU after its capacity has changed, which subsequently will be used by the intel_pstate driver. An EM_PERF_DOMAIN_ARTIFICIAL check is added to em_recalc_and_update() to prevent it from calling em_compute_costs() for an "artificial" perf domain with a NULL cb parameter which would cause it to crash. Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Tested-by: Christian Loehle Reviewed-by: Dietmar Eggemann Link: https://patch.msgid.link/3637203.iIbC2pHGDl@rjwysocki.net --- include/linux/energy_model.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index d8eabbf86a5b..7fa1eb3cc823 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -179,6 +179,7 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, int em_dev_update_chip_binning(struct device *dev); int em_update_performance_limits(struct em_perf_domain *pd, unsigned long freq_min_khz, unsigned long freq_max_khz); +void em_adjust_cpu_capacity(unsigned int cpu); void em_rebuild_sched_domains(void); /** @@ -403,6 +404,7 @@ int em_update_performance_limits(struct em_perf_domain *pd, { return -EINVAL; } +static inline void em_adjust_cpu_capacity(unsigned int cpu) {} static inline void em_rebuild_sched_domains(void) {} #endif -- cgit v1.2.3 From 1df57411a658fd8b411323f8dd0d67e789b9c777 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 3 Apr 2025 16:51:20 +0800 Subject: drm/amd: add definition for new memory type Support new version of HBM. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1fd96474e64c..45c4fa13499c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1368,6 +1368,7 @@ struct drm_amdgpu_info_vbios { #define AMDGPU_VRAM_TYPE_DDR5 10 #define AMDGPU_VRAM_TYPE_LPDDR4 11 #define AMDGPU_VRAM_TYPE_LPDDR5 12 +#define AMDGPU_VRAM_TYPE_HBM3E 13 struct drm_amdgpu_info_device { /** PCI Device ID */ -- cgit v1.2.3 From a98e892c694284256296465a78d11072e2c5419f Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Tue, 11 Feb 2025 14:39:05 +0100 Subject: HID: core: Add functions for HID drivers to react on first open and last close call Adds a new function to the hid_driver struct that is called when the userspace starts using the device, and another one that is called when userspace stop using the device. With this a hid driver can implement special suspend handling for devices currently not in use. Signed-off-by: Werner Sembach Link: https://patch.msgid.link/20250211133950.422232-1-wse@tuxedocomputers.com Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index a1305210b2fd..568a9d8c749b 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -795,6 +795,8 @@ struct hid_usage_id { * @suspend: invoked on suspend (NULL means nop) * @resume: invoked on resume if device was not reset (NULL means nop) * @reset_resume: invoked on resume if device was reset (NULL means nop) + * @on_hid_hw_open: invoked when hid core opens first instance (NULL means nop) + * @on_hid_hw_close: invoked when hid core closes last instance (NULL means nop) * * probe should return -errno on error, or 0 on success. During probe, * input will not be passed to raw_event unless hid_device_io_start is @@ -850,6 +852,8 @@ struct hid_driver { int (*suspend)(struct hid_device *hdev, pm_message_t message); int (*resume)(struct hid_device *hdev); int (*reset_resume)(struct hid_device *hdev); + void (*on_hid_hw_open)(struct hid_device *hdev); + void (*on_hid_hw_close)(struct hid_device *hdev); /* private: */ struct device_driver driver; -- cgit v1.2.3 From 6c58d2791d6046727d87db50a5e46644f195dcf9 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 10 Apr 2025 17:24:16 +0800 Subject: tick/nohz: Remove unused tick_nohz_full_add_cpus_to() This function isn't used anywhere. Remove it. Signed-off-by: Alex Shi Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250410092423.9831-1-alexs@kernel.org --- include/linux/tick.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/tick.h b/include/linux/tick.h index b8ddc8e631a3..ac76ae9fa36d 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -195,12 +195,6 @@ static inline bool tick_nohz_full_enabled(void) __ret; \ }) -static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) -{ - if (tick_nohz_full_enabled()) - cpumask_or(mask, mask, tick_nohz_full_mask); -} - extern void tick_nohz_dep_set(enum tick_dep_bits bit); extern void tick_nohz_dep_clear(enum tick_dep_bits bit); extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); @@ -281,7 +275,6 @@ extern void __init tick_nohz_full_setup(cpumask_var_t cpumask); #else static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } -static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } -- cgit v1.2.3 From 895ee6a22e3195b7c1fee140c842bdeedb89ed33 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Fri, 9 May 2025 12:20:08 -0400 Subject: topology: make for_each_node_with_cpus() O(N) for_each_node_with_cpus() calls nr_cpus_node() at every iteration, which makes it O(N^2). Kernel tracks such nodes with N_CPU record in node_states array. Switching to it makes for_each_node_with_cpus() O(N). Andrea: Now we can include also offline nodes with CPUs assigned (assuming it's possible). If checking the online state is required, the user must use node_online() within the loop. CC: Andrea Righi CC:Tejun Heo Signed-off-by: Yury Norov [NVIDIA] --- include/linux/nodemask.h | 1 + include/linux/topology.h | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 0aa6b63aa747..f08ae71585fa 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -522,6 +522,7 @@ static __always_inline int node_random(const nodemask_t *maskp) #define for_each_node(node) for_each_node_state(node, N_POSSIBLE) #define for_each_online_node(node) for_each_node_state(node, N_ONLINE) +#define for_each_node_with_cpus(node) for_each_node_state(node, N_CPU) /* * For nodemask scratch area. diff --git a/include/linux/topology.h b/include/linux/topology.h index 24e715f0f6d2..ffee6b4a071a 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -39,10 +40,6 @@ #define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node)) #endif -#define for_each_node_with_cpus(node) \ - for_each_online_node(node) \ - if (nr_cpus_node(node)) - int arch_update_cpu_topology(void); /* Conform to ACPI 2.0 SLIT distance definitions */ -- cgit v1.2.3 From c4da7bf54b1f76e7c5c8cc6d1c4db8b19af67c5d Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Tue, 6 May 2025 10:09:31 +0800 Subject: blk-throttle: Introduce flag "BIO_TG_BPS_THROTTLED" Subsequent patches will split the single queue into separate bps and iops queues. To prevent IO that has already passed through the bps queue at a single tg level from being counted toward bps wait time again, we introduce "BIO_TG_BPS_THROTTLED" flag. Since throttle and QoS operate at different levels, we reuse the value as "BIO_QOS_THROTTLED". We set this flag when charge bps and clear it when charge iops, as the bio will move to the upper-level tg or be dispatched. This patch does not involve functional changes. Signed-off-by: Zizhi Wo Reviewed-by: Yu Kuai Signed-off-by: Zizhi Wo Link: https://lore.kernel.org/r/20250506020935.655574-5-wozizhi@huaweicloud.com Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index f38425338c3f..3d1577f07c1c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -296,6 +296,14 @@ enum { * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + /* + * This bio has completed bps throttling at the single tg granularity, + * which is different from BIO_BPS_THROTTLED. When the bio is enqueued + * into the sq->queued of the upper tg, or is about to be dispatched, + * this flag needs to be cleared. Since blk-throttle and rq_qos are not + * on the same hierarchical level, reuse the value. + */ + BIO_TG_BPS_THROTTLED = BIO_QOS_THROTTLED, BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */ -- cgit v1.2.3 From f5c0ecf196aaf78777f1606f1e0392c5e57c4530 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 9 May 2025 15:03:35 +0200 Subject: PM: sleep: Introduce pm_sleep_transition_in_progress() The "suspend in progress" check in device_wakeup_enable() does not cover hibernation, but arguably it should do that, so introduce pm_sleep_transition_in_progress() covering transitions during both system suspend and hibernation to use in there and use it also in pm_debug_messages_should_print(). Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/7820474.EvYhyI6sBW@rjwysocki.net [ rjw: Move the new function definition under CONFIG_PM_SLEEP ] Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 52ea108f9451..b1c76c8f2c82 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -475,6 +475,8 @@ extern void pm_print_active_wakeup_sources(void); extern unsigned int lock_system_sleep(void); extern void unlock_system_sleep(unsigned int); +extern bool pm_sleep_transition_in_progress(void); + #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) @@ -503,6 +505,8 @@ static inline void pm_system_irq_wakeup(unsigned int irq_number) {} static inline unsigned int lock_system_sleep(void) { return 0; } static inline void unlock_system_sleep(unsigned int flags) {} +static inline bool pm_sleep_transition_in_progress(void) { return false; } + #endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_SLEEP_DEBUG -- cgit v1.2.3 From c84bf6dd2b836b49bb2662668ff1692350d28236 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 9 May 2025 13:13:34 +0100 Subject: mm: introduce new .mmap_prepare() file callback Patch series "eliminate mmap() retry merge, add .mmap_prepare hook", v2. During the mmap() of a file-backed mapping, we invoke the underlying driver file's mmap() callback in order to perform driver/file system initialisation of the underlying VMA. This has been a source of issues in the past, including a significant security concern relating to unwinding of error state discovered by Jann Horn, as fixed in commit 5de195060b2e ("mm: resolve faulty mmap_region() error path behaviour") which performed the recent, significant, rework of mmap() as a whole. However, we have had a fly in the ointment remain - drivers have a great deal of freedom in the .mmap() hook to manipulate VMA state (as well as page table state). This can be problematic, as we can no longer reason sensibly about VMA state once the call is complete (the ability to do - anything - here does rather interfere with that). In addition, callers may choose to do odd or unusual things which might interfere with subsequent steps in the mmap() process, and it may do so and then raise an error, requiring very careful unwinding of state about which we can make no assumptions. Rather than providing such an open-ended interface, this series provides an alternative, far more restrictive one - we expose a whitelist of fields which can be adjusted by the driver, along with immutable state upon which the driver can make such decisions: struct vm_area_desc { /* Immutable state. */ struct mm_struct *mm; unsigned long start; unsigned long end; /* Mutable fields. Populated with initial state. */ pgoff_t pgoff; struct file *file; vm_flags_t vm_flags; pgprot_t page_prot; /* Write-only fields. */ const struct vm_operations_struct *vm_ops; void *private_data; }; The mmap logic then updates the state used to either merge with a VMA or establish a new VMA based upon this logic. This is achieved via new file hook .mmap_prepare(), which is, importantly, invoked very early on in the mmap() process. If an error arises, we can very simply abort the operation with very little unwinding of state required. The existing logic contains another, related, peccadillo - since the .mmap() callback might do anything, it may also cause a previously unmergeable VMA to become mergeable with adjacent VMAs. Right now the logic will retry a merge like this only if the driver changes VMA flags, and changes them in such a way that a merge might succeed (that is, the flags are not 'special', that is do not contain any of the flags specified in VM_SPECIAL). This has also been the source of a great deal of pain - it's hard to reason about an .mmap() callback that might do - anything - but it's also hard to reason about setting up a VMA and writing to the maple tree, only to do it again utilising a great deal of shared state. Since .mmap_prepare() sets fields before the first merge is even attempted, the use of this callback obviates the need for this retry merge logic. A driver may only specify .mmap_prepare() or the deprecated .mmap() callback. In future we may add futher callbacks beyond .mmap_prepare() to faciliate all use cass as we convert drivers. In researching this change, I examined every .mmap() callback, and discovered only a very few that set VMA state in such a way that a. the VMA flags changed and b. this would be mergeable. In the majority of cases, it turns out that drivers are mapping kernel memory and thus ultimately set VM_PFNMAP, VM_MIXEDMAP, or other unmergeable VM_SPECIAL flags. Of those that remain I identified a number of cases which are only applicable in DAX, setting the VM_HUGEPAGE flag: * dax_mmap() * erofs_file_mmap() * ext4_file_mmap() * xfs_file_mmap() For this remerge to not occur and to impact users, each of these cases would require a user to mmap() files using DAX, in parts, immediately adjacent to one another. This is a very unlikely usecase and so it does not appear to be worthwhile to adjust this functionality accordingly. We can, however, very quickly do so if needed by simply adding an .mmap_prepare() callback to these as required. There are two further non-DAX cases I idenitfied: * orangefs_file_mmap() - Clears VM_RAND_READ if set, replacing with VM_SEQ_READ. * usb_stream_hwdep_mmap() - Sets VM_DONTDUMP. Both of these cases again seem very unlikely to be mmap()'d immediately adjacent to one another in a fashion that would result in a merge. Finally, we are left with a viable case: * secretmem_mmap() - Set VM_LOCKED, VM_DONTDUMP. This is viable enough that the mm selftests trigger the logic as a matter of course. Therefore, this series replace the .secretmem_mmap() hook with .secret_mmap_prepare(). This patch (of 3): Provide a means by which drivers can specify which fields of those permitted to be changed should be altered to prior to mmap()'ing a range (which may either result from a merge or from mapping an entirely new VMA). Doing so is substantially safer than the existing .mmap() calback which provides unrestricted access to the part-constructed VMA and permits drivers and file systems to do 'creative' things which makes it hard to reason about the state of the VMA after the function returns. The existing .mmap() callback's freedom has caused a great deal of issues, especially in error handling, as unwinding the mmap() state has proven to be non-trivial and caused significant issues in the past, for instance those addressed in commit 5de195060b2e ("mm: resolve faulty mmap_region() error path behaviour"). It also necessitates a second attempt at merge once the .mmap() callback has completed, which has caused issues in the past, is awkward, adds overhead and is difficult to reason about. The .mmap_prepare() callback eliminates this requirement, as we can update fields prior to even attempting the first merge. It is safer, as we heavily restrict what can actually be modified, and being invoked very early in the mmap() process, error handling can be performed safely with very little unwinding of state required. The .mmap_prepare() and deprecated .mmap() callbacks are mutually exclusive, so we permit only one to be invoked at a time. Update vma userland test stubs to account for changes. Link: https://lkml.kernel.org/r/cover.1746792520.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/adb36a7c4affd7393b2fc4b54cc5cfe211e41f71.1746792520.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Al Viro Cc: Christian Brauner Cc: David Hildenbrand Cc: Jan Kara Cc: Jann Horn Cc: Liam Howlett Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/fs.h | 25 +++++++++++++++++++++++++ include/linux/mm_types.h | 24 ++++++++++++++++++++++++ 2 files changed, 49 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..e2721a1ff13d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2169,6 +2169,7 @@ struct file_operations { int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, unsigned int poll_flags); + int (*mmap_prepare)(struct vm_area_desc *); } __randomize_layout; /* Supports async buffered reads */ @@ -2238,11 +2239,35 @@ struct inode_operations { struct offset_ctx *(*get_offset_ctx)(struct inode *inode); } ____cacheline_aligned; +/* Did the driver provide valid mmap hook configuration? */ +static inline bool file_has_valid_mmap_hooks(struct file *file) +{ + bool has_mmap = file->f_op->mmap; + bool has_mmap_prepare = file->f_op->mmap_prepare; + + /* Hooks are mutually exclusive. */ + if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) + return false; + if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare)) + return false; + + return true; +} + static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { + if (WARN_ON_ONCE(file->f_op->mmap_prepare)) + return -EINVAL; + return file->f_op->mmap(file, vma); } +static inline int __call_mmap_prepare(struct file *file, + struct vm_area_desc *desc) +{ + return file->f_op->mmap_prepare(desc); +} + extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e76bade9ebb1..15808cad2bc1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -763,6 +763,30 @@ struct vma_numab_state { int prev_scan_seq; }; +/* + * Describes a VMA that is about to be mmap()'ed. Drivers may choose to + * manipulate mutable fields which will cause those fields to be updated in the + * resultant VMA. + * + * Helper functions are not required for manipulating any field. + */ +struct vm_area_desc { + /* Immutable state. */ + struct mm_struct *mm; + unsigned long start; + unsigned long end; + + /* Mutable fields. Populated with initial state. */ + pgoff_t pgoff; + struct file *file; + vm_flags_t vm_flags; + pgprot_t page_prot; + + /* Write-only fields. */ + const struct vm_operations_struct *vm_ops; + void *private_data; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory -- cgit v1.2.3 From 0cad6736f4b9bbe8129f367ed5818fa4aef6c2b5 Mon Sep 17 00:00:00 2001 From: Feng Lee <379943137@qq.com> Date: Fri, 9 May 2025 14:32:30 +0800 Subject: mm: remove obsolete pgd_offset_gate() Remove pgd_offset_gate() completely and simply make the single caller use pgd_offset(). It appears that the gate area resides in the kernel-mapped segment exclusively on IA64. Therefore, removing pgd_offset_k is safe since IA64 is now obsolete. Link: https://lkml.kernel.org/r/tencent_503130C3CD56569191396268CF4D12F09A06@qq.com Signed-off-by: Feng Lee <379943137@qq.com> Reviewed-by: Barry Song Acked-by: David Hildenbrand Cc: Anshuman Khandual Cc: bibo mao Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: John Hubbard Cc: Lance Yang Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index b50447ef1c92..f1e890b60460 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1164,10 +1164,6 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) } #endif -#ifndef __HAVE_ARCH_PGD_OFFSET_GATE -#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) -#endif - #ifndef __HAVE_ARCH_MOVE_PTE #define move_pte(pte, old_addr, new_addr) (pte) #endif -- cgit v1.2.3 From dc75c3ced10c611f524e9e444303a0249ce32e43 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 12 May 2025 22:20:59 +0200 Subject: net: phy: remove stub for mdiobus_register_board_info The functionality of mdiobus_register_board_info() typically isn't optional for the caller. Therefore remove the stub. Note: Currently we have only one caller of mdiobus_register_board_info(), in a DSA/PHYLINK context. Therefore CONFIG_MDIO_DEVICE is selected anyway. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/410a2222-c4e8-45b0-9091-d49674caeb00@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index d62d292024bc..7c29d346d4b3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2071,17 +2071,8 @@ struct mdio_board_info { const void *platform_data; }; -#if IS_ENABLED(CONFIG_MDIO_DEVICE) int mdiobus_register_board_info(const struct mdio_board_info *info, unsigned int n); -#else -static inline int mdiobus_register_board_info(const struct mdio_board_info *i, - unsigned int n) -{ - return 0; -} -#endif - /** * phy_module_driver() - Helper macro for registering PHY drivers -- cgit v1.2.3 From bc049387b41f41bee61e8cc338a5e99ca9798a09 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 13 May 2025 07:28:12 -0700 Subject: bpf: Add support for __prog argument suffix to pass in prog->aux Instead of hardcoding the list of kfuncs that need prog->aux passed to them with a combination of fixup_kfunc_call adjustment + __ign suffix, combine both in __prog suffix, which ignores the argument passed in, and fixes it up to the prog->aux. This allows kfuncs to have the prog->aux passed into them without having to touch the verifier. Cc: Tejun Heo Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250513142812.1021591-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9734544b6957..cedd66867ecf 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -591,6 +591,7 @@ struct bpf_insn_aux_data { * bpf_fastcall pattern. */ u8 fastcall_spills_num:3; + u8 arg_prog:4; /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ -- cgit v1.2.3 From 3937f6db6e932c560a0f9ee2cd2a4fdcc314dadf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 12 May 2025 19:21:15 -0700 Subject: lib/crc16: unexport crc16_table and crc16_byte() Now that neither crc16_table nor crc16_byte() is used outside lib/crc16.c, fold them into lib/crc16.c. Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250513022115.39109-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/crc16.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/crc16.h b/include/linux/crc16.h index 9fa74529b317..b861d969b161 100644 --- a/include/linux/crc16.h +++ b/include/linux/crc16.h @@ -15,14 +15,7 @@ #include -extern u16 const crc16_table[256]; - -extern u16 crc16(u16 crc, const u8 *buffer, size_t len); - -static inline u16 crc16_byte(u16 crc, const u8 data) -{ - return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; -} +u16 crc16(u16 crc, const u8 *p, size_t len); #endif /* __CRC16_H */ -- cgit v1.2.3 From 439d47608bb3e5f7b5f5eb55c568576b60731c4d Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 12 May 2025 10:44:00 +0200 Subject: ata: libata: Print if port is external on boot Commit affccb16c117 ("ata: ahci: print the lpm policy on boot") added a lpm-pol print during boot, which shows the LPM policy used by each port. While the LPM policy is usually determined by the Kconfig CONFIG_SATA_MOBILE_LPM_POLICY, the Kconfig value is overridden e.g. if firmware has marked the port as hotplug capable / external. Commit f97106b10d9a ("ata: ahci: Add debug print for external port") did add a debug print to show if LPM was disabled because firmware has marked the port as external, however, because devices having broken LPM (even though they claim to support it) is more common than one would have hoped, print "ext" during boot if firmware has marked the port is external. This will make it easier to debug certain LPM issues, e.g. if firmware has enabled/marked only some of the ports as hotplug capable / external. Before (port marked as external by firmware): ata1: SATA max UDMA/133 abar m4096@0xfebd3000 port 0xfebd3100 irq 57 lpm-pol 0 After (port marked as external by firmware): ata1: SATA max UDMA/133 abar m4096@0xfebd3000 port 0xfebd3100 irq 57 lpm-pol 0 ext Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 98affa1d9136..31be45fd47a6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1618,6 +1618,8 @@ static inline void ata_port_desc_misc(struct ata_port *ap, int irq) { ata_port_desc(ap, "irq %d", irq); ata_port_desc(ap, "lpm-pol %d", ap->target_lpm_policy); + if (ap->pflags & ATA_PFLAG_EXTERNAL) + ata_port_desc(ap, "ext"); } static inline bool ata_tag_internal(unsigned int tag) -- cgit v1.2.3 From 6a09ae8281986d5fb5ce0115135c05a5afb8718e Mon Sep 17 00:00:00 2001 From: Richard Leitner Date: Wed, 7 May 2025 09:51:31 +0200 Subject: leds: flash: Add support for flash/strobe duration Add support for the new V4L2_CID_FLASH_DURATION control to the LEDs driver. Signed-off-by: Richard Leitner Link: https://lore.kernel.org/r/20250507-ov9282-flash-strobe-v4-2-72b299c1b7c9@linux.dev Signed-off-by: Lee Jones --- include/linux/led-class-flash.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 36df927ec4b7..21ec856c36bc 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -45,6 +45,8 @@ struct led_flash_ops { int (*timeout_set)(struct led_classdev_flash *fled_cdev, u32 timeout); /* get the flash LED fault */ int (*fault_get)(struct led_classdev_flash *fled_cdev, u32 *fault); + /* set flash duration */ + int (*duration_set)(struct led_classdev_flash *fled_cdev, u32 duration); }; /* @@ -75,6 +77,9 @@ struct led_classdev_flash { /* flash timeout value in microseconds along with its constraints */ struct led_flash_setting timeout; + /* flash timeout value in microseconds along with its constraints */ + struct led_flash_setting duration; + /* LED Flash class sysfs groups */ const struct attribute_group *sysfs_groups[LED_FLASH_SYSFS_GROUPS_SIZE]; }; @@ -209,4 +214,15 @@ int led_set_flash_timeout(struct led_classdev_flash *fled_cdev, u32 timeout); */ int led_get_flash_fault(struct led_classdev_flash *fled_cdev, u32 *fault); +/** + * led_set_flash_duration - set flash LED duration + * @fled_cdev: the flash LED to set + * @timeout: the flash duration to set it to + * + * Set the flash strobe duration. + * + * Returns: 0 on success or negative error value on failure + */ +int led_set_flash_duration(struct led_classdev_flash *fled_cdev, u32 duration); + #endif /* __LINUX_FLASH_LEDS_H_INCLUDED */ -- cgit v1.2.3 From ecab3c40fa49a2073c4c916ebff9496a6b5db7bd Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Sun, 27 Apr 2025 09:43:23 +0200 Subject: dt-bindings: clock: stm32h7: rename USART{7,8}_CK to UART{7,8}_CK As stated in the reference manual RM0433, the STM32H743 MCU has USART1/2/3/6, UART4/5/7/8, and LPUART1. The patches make all the clock macros for the serial ports consistent with the documentation. Signed-off-by: Dario Binacchi Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250427074404.3278732-5-dario.binacchi@amarulasolutions.com Signed-off-by: Alexandre Torgue --- include/dt-bindings/clock/stm32h7-clks.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/stm32h7-clks.h b/include/dt-bindings/clock/stm32h7-clks.h index 6637272b3242..330b39c2c303 100644 --- a/include/dt-bindings/clock/stm32h7-clks.h +++ b/include/dt-bindings/clock/stm32h7-clks.h @@ -126,8 +126,8 @@ #define ADC3_CK 128 #define DSI_CK 129 #define LTDC_CK 130 -#define USART8_CK 131 -#define USART7_CK 132 +#define UART8_CK 131 +#define UART7_CK 132 #define HDMICEC_CK 133 #define I2C3_CK 134 #define I2C2_CK 135 -- cgit v1.2.3 From 823a036f049f5ea4afc55a2bc2e7f80a1acdef4a Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Tue, 13 May 2025 13:39:23 +0000 Subject: ASoC: cs35l56: Log tuning unique identifiers during firmware load The cs35l56 smart amplifier has some informational firmware controls that are populated by a tuning bin file to unique values - logging these during firmware load identifies the specific configuration being used on that device instance. Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/47762a5f1ce2b178ad863c6698296aea09b72e10.1747142267.git.simont@opensource.cirrus.com Acked-by: Takashi Iwai Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 5d653a3491d0..bbc1cba40cd9 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -332,6 +332,7 @@ void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_ds int cs35l56_get_calibration(struct cs35l56_base *cs35l56_base); int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base, bool *fw_missing, unsigned int *fw_version); +void cs35l56_log_tuning(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_dsp); int cs35l56_hw_init(struct cs35l56_base *cs35l56_base); int cs35l56_get_speaker_id(struct cs35l56_base *cs35l56_base); int cs35l56_get_bclk_freq_id(unsigned int freq); -- cgit v1.2.3 From 28026cf2dd84d961a62123b1fa941dc3c2c4a132 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 May 2025 17:31:40 +0100 Subject: genirq/msi: Add .msi_teardown() callback as the reverse of .msi_prepare() While the MSI ops do have a .msi_prepare() callback that is responsible for setting up the relevant (usually per-device) allocation, there is no callback reversing this setup. For this purpose, add .msi_teardown() callback. In order to avoid breaking the ITS driver that suffers from related issues, do not call the callback just yet. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250513163144.2215824-2-maz@kernel.org --- include/linux/msi.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 8c0ec9fc05a3..63c23003ec9b 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -423,6 +423,7 @@ struct msi_domain_info; * @msi_init: Domain specific init function for MSI interrupts * @msi_free: Domain specific function to free a MSI interrupts * @msi_prepare: Prepare the allocation of the interrupts in the domain + * @msi_teardown: Reverse the effects of @msi_prepare * @prepare_desc: Optional function to prepare the allocated MSI descriptor * in the domain * @set_desc: Set the msi descriptor for an interrupt @@ -438,8 +439,9 @@ struct msi_domain_info; * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. * - * @msi_check, @msi_prepare, @prepare_desc and @set_desc are callbacks used by the - * msi_domain_alloc/free_irqs*() variants. + * @msi_check, @msi_prepare, @msi_teardown, @prepare_desc and + * @set_desc are callbacks used by the msi_domain_alloc/free_irqs*() + * variants. * * @domain_alloc_irqs, @domain_free_irqs can be used to override the * default allocation/free functions (__msi_domain_alloc/free_irqs). This @@ -461,6 +463,8 @@ struct msi_domain_ops { int (*msi_prepare)(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg); + void (*msi_teardown)(struct irq_domain *domain, + msi_alloc_info_t *arg); void (*prepare_desc)(struct irq_domain *domain, msi_alloc_info_t *arg, struct msi_desc *desc); void (*set_desc)(msi_alloc_info_t *arg, @@ -489,6 +493,7 @@ struct msi_domain_ops { * @handler: Optional: associated interrupt flow handler * @handler_data: Optional: associated interrupt flow handler data * @handler_name: Optional: associated interrupt flow handler name + * @alloc_data: Optional: associated interrupt allocation data * @data: Optional: domain specific data */ struct msi_domain_info { @@ -501,6 +506,7 @@ struct msi_domain_info { irq_flow_handler_t handler; void *handler_data; const char *handler_name; + msi_alloc_info_t *alloc_data; void *data; }; -- cgit v1.2.3 From 1396e89e09f00deb816e5f4a176d71d554922292 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 May 2025 17:31:42 +0100 Subject: genirq/msi: Move prepare() call to per-device allocation The current device MSI infrastructure is subtly broken, as it will issue an .msi_prepare() callback into the MSI controller driver every time it needs to allocate an MSI. That's pretty wrong, as the contract (or unwarranted assumption, depending who you ask) between the MSI controller and the core code is that .msi_prepare() is called exactly once per device. This leads to some subtle breakage in some MSI controller drivers, as it gives the impression that there are multiple endpoints sharing a bus identifier (RID in PCI parlance, DID for GICv3+). It implies that whatever allocation the ITS driver (for example) has done on behalf of these devices cannot be undone, as there is no way to track the shared state. This is particularly bad for wire-MSI devices, for which .msi_prepare() is called for each input line. To address this issue, move the call to .msi_prepare() to take place at the point of irq domain allocation, which is the only place that makes sense. The msi_alloc_info_t structure is made part of the msi_domain_template, so that its life-cycle is that of the domain as well. Finally, the msi_info::alloc_data field is made to point at this allocation tracking structure, ensuring that it is carried around the block. This is all pretty straightforward, except for the non-device-MSI leftovers, which still have to call .msi_prepare() at the old spot. One day... Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250513163144.2215824-4-maz@kernel.org --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 63c23003ec9b..f4b94ccaf0c1 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -516,12 +516,14 @@ struct msi_domain_info { * @chip: Interrupt chip for this domain * @ops: MSI domain ops * @info: MSI domain info data + * @alloc_info: MSI domain allocation data (architecture specific) */ struct msi_domain_template { char name[48]; struct irq_chip chip; struct msi_domain_ops ops; struct msi_domain_info info; + msi_alloc_info_t alloc_info; }; /* -- cgit v1.2.3 From cc52a697f87e8b2d88298827aca3f81398385572 Mon Sep 17 00:00:00 2001 From: Ivaylo Ivanov Date: Sun, 4 May 2025 17:45:27 +0300 Subject: phy: exynos5-usbdrd: support Exynos USBDRD 3.2 4nm controller Add support for the Exynos USB 3.2 DRD 4nm controller. It's used in recent 4nm SoCs like Exynos2200 and Exynos2400. This device consists of 3 underlying and independent phys: SEC link control phy, Synopsys eUSB 2.0 and Synopsys USBDP/SS combophy. Unlike older device designs, where the internal phy blocks were all IP of Samsung, Synopsys phys are present. This means that the link controller is now mapped differently to account for missing bits and registers. The Synopsys phys also have separate register bases. As there are non-SEC PHYs present now, it doesn't make much sense to implement them in this driver. They are expected to be configured by external drivers, so pass phandles to them. USBDRD3.2 link controller set up is still required beforehand. This commit adds the necessary changes for USB HS to work. USB SS and DisplayPort are out of scope in this commit and will be introduced in the future. Signed-off-by: Ivaylo Ivanov Link: https://lore.kernel.org/r/20250504144527.1723980-11-ivo.ivanov.ivanov1@gmail.com Signed-off-by: Vinod Koul --- include/linux/soc/samsung/exynos-regs-pmu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index cde299a85384..e921e7fe9be7 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -187,6 +187,9 @@ /* Only for S5Pv210 */ #define S5PV210_EINT_WAKEUP_MASK 0xC004 +/* Only for Exynos2200 */ +#define EXYNOS2200_PHY_CTRL_USB20 0x72C + /* Only for Exynos4210 */ #define S5P_CMU_CLKSTOP_LCD1_LOWPWR 0x1154 #define S5P_CMU_RESET_LCD1_LOWPWR 0x1174 -- cgit v1.2.3 From 5fa96c83b81e50833274f3b450ee9a8c0b2172bc Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 1 Apr 2025 19:07:03 +0100 Subject: coresight: Introduce pause and resume APIs for source Introduce APIs for pausing and resuming trace source and export as GPL symbols. Signed-off-by: Leo Yan Reviewed-by: Mike Leach Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250401180708.385396-3-leo.yan@arm.com --- include/linux/coresight.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 8abdd8b5c791..4ac65c68bbf4 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -398,6 +398,8 @@ struct coresight_ops_link { * is associated to. * @enable: enables tracing for a source. * @disable: disables tracing for a source. + * @resume_perf: resumes tracing for a source in perf session. + * @pause_perf: pauses tracing for a source in perf session. */ struct coresight_ops_source { int (*cpu_id)(struct coresight_device *csdev); @@ -405,6 +407,8 @@ struct coresight_ops_source { enum cs_mode mode, struct coresight_path *path); void (*disable)(struct coresight_device *csdev, struct perf_event *event); + int (*resume_perf)(struct coresight_device *csdev); + void (*pause_perf)(struct coresight_device *csdev); }; /** -- cgit v1.2.3 From 676e8cf70cb0533e1118e29898c9a9c33ae3a10f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 May 2025 13:36:59 +0200 Subject: sched,livepatch: Untangle cond_resched() and live-patching With the goal of deprecating / removing VOLUNTARY preempt, live-patch needs to stop relying on cond_resched() to make forward progress. Instead, rely on schedule() with TASK_FREEZABLE set. Just like live-patching, the freezer needs to be able to stop tasks in a safe / known state. [bigeasy: use likely() in __klp_sched_try_switch() and update comments] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: Petr Mladek Tested-by: Petr Mladek Tested-by: Miroslav Benes Acked-by: Miroslav Benes Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20250509113659.wkP_HJ5z@linutronix.de --- include/linux/livepatch_sched.h | 14 +++++--------- include/linux/sched.h | 6 ------ 2 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h index 013794fb5da0..065c185f2763 100644 --- a/include/linux/livepatch_sched.h +++ b/include/linux/livepatch_sched.h @@ -3,27 +3,23 @@ #define _LINUX_LIVEPATCH_SCHED_H_ #include -#include +#include #ifdef CONFIG_LIVEPATCH void __klp_sched_try_switch(void); -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) - DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key); -static __always_inline void klp_sched_try_switch(void) +static __always_inline void klp_sched_try_switch(struct task_struct *curr) { - if (static_branch_unlikely(&klp_sched_try_switch_key)) + if (static_branch_unlikely(&klp_sched_try_switch_key) && + READ_ONCE(curr->__state) & TASK_FREEZABLE) __klp_sched_try_switch(); } -#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ - #else /* !CONFIG_LIVEPATCH */ -static inline void klp_sched_try_switch(void) {} -static inline void __klp_sched_try_switch(void) {} +static inline void klp_sched_try_switch(struct task_struct *curr) {} #endif /* CONFIG_LIVEPATCH */ #endif /* _LINUX_LIVEPATCH_SCHED_H_ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index f96ac1982893..b98195991031 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -44,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -2089,9 +2088,6 @@ extern int __cond_resched(void); #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) -void sched_dynamic_klp_enable(void); -void sched_dynamic_klp_disable(void); - DECLARE_STATIC_CALL(cond_resched, __cond_resched); static __always_inline int _cond_resched(void) @@ -2112,7 +2108,6 @@ static __always_inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return __cond_resched(); } @@ -2122,7 +2117,6 @@ static inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return 0; } -- cgit v1.2.3 From aab12022b076f0b385b7a9a78e1161bd2df5d1e3 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 29 Apr 2025 11:18:08 +0100 Subject: soundwire: bus: Add internal slave ID and use for IRQs Currently the SoundWire IRQ code uses the dev_num to create an IRQ mapping for each slave. However, there is an issue there, the dev_num is only allocated when the slave enumerates on the bus and enumeration may happen before or after probe of the slave driver. In the case enumeration happens after probe of the slave driver then the IRQ mapping will use dev_num before it is set. This could cause multiple slaves to use zero as their IRQ mapping. It is very desirable to have the IRQ mapped before the slave probe is called, so drivers can do resource allocation in probe as normal. To solve these issues add an internal ID created for each slave when it is probed and use that for mapping the IRQ. Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20250429101808.348462-3-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 2362f621d94c..0832776262ac 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,7 @@ struct sdw_slave; #define SDW_FRAME_CTRL_BITS 48 #define SDW_MAX_DEVICES 11 +#define SDW_FW_MAX_DEVICES 16 #define SDW_MAX_PORTS 15 #define SDW_VALID_PORT_RANGE(n) ((n) < SDW_MAX_PORTS && (n) >= 1) @@ -630,6 +632,7 @@ struct sdw_slave_ops { * struct sdw_slave - SoundWire Slave * @id: MIPI device ID * @dev: Linux device + * @index: internal ID for this slave * @irq: IRQ number * @status: Status reported by the Slave * @bus: Bus handle @@ -661,6 +664,7 @@ struct sdw_slave_ops { struct sdw_slave { struct sdw_slave_id id; struct device dev; + int index; int irq; enum sdw_slave_status status; struct sdw_bus *bus; @@ -968,6 +972,7 @@ struct sdw_stream_runtime { * @md: Master device * @bus_lock_key: bus lock key associated to @bus_lock * @bus_lock: bus lock + * @slave_ida: IDA for allocating internal slave IDs * @slaves: list of Slaves on this bus * @msg_lock_key: message lock key associated to @msg_lock * @msg_lock: message lock @@ -1010,6 +1015,7 @@ struct sdw_bus { struct sdw_master_device *md; struct lock_class_key bus_lock_key; struct mutex bus_lock; + struct ida slave_ida; struct list_head slaves; struct lock_class_key msg_lock_key; struct mutex msg_lock; -- cgit v1.2.3 From e1f3f5be9e8e730290ebe6d490383af4d77f0f38 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 30 Apr 2025 15:47:13 +0800 Subject: soundwire: intel: Add awareness of ACE3+ microphone privacy ACE3 introduced microphone privacy and along this feature it adds a new register in vendor specific SHIM to control and status reporting. The control of mic privacy via the SHIM register is only to enable the interrupt generation via soundwire, but not handled by the soundwire code as the mic privacy is not a feature of the soundwire IP. On the other hand, printing the register value brings value for debugging, so add a new flag to allow this conditionally. Signed-off-by: Peter Ujfalusi Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20250430074714.94000-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_intel.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 493d9de4e472..2af1d8174c50 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -189,6 +189,9 @@ #define SDW_SHIM3_INTEL_VS_ACTMCTL_DOAISE2 BIT(14) #define SDW_SHIM3_INTEL_VS_ACTMCTL_CLDE BIT(15) +/* ACE3+ Mic privacy control and status register */ +#define SDW_SHIM2_INTEL_VS_PVCCS 0x10 + /** * struct sdw_intel_stream_params_data: configuration passed during * the @params_stream callback, e.g. for interaction with DSP @@ -331,6 +334,7 @@ struct sdw_intel_ctx { * @shim_base: sdw shim base. * @alh_base: sdw alh base. * @ext: extended HDaudio link support + * @mic_privacy: ACE version supports microphone privacy * @hbus: hdac_bus pointer, needed for power management * @eml_lock: mutex protecting shared registers in the HDaudio multi-link * space @@ -349,6 +353,7 @@ struct sdw_intel_res { u32 shim_base; u32 alh_base; bool ext; + bool mic_privacy; struct hdac_bus *hbus; struct mutex *eml_lock; }; -- cgit v1.2.3 From 9002b75aa8e6f034ffbd1c1ccac46927a1cf0f12 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Wed, 23 Apr 2025 15:34:19 +0100 Subject: irqchip/renesas-rzv2h: Add rzv2h_icu_register_dma_req() On the Renesas RZ/V2H(P) family of SoCs, DMAC IPs are connected to the Interrupt Control Unit (ICU). For DMA transfers, a request number must be registered with the ICU, which means that the DMAC driver has to be able to instruct the ICU driver with the registration of such id. Export rzv2h_icu_register_dma_req() so that the DMAC driver can register the DMAC request number. Signed-off-by: Fabrizio Castro Reviewed-by: Thomas Gleixner Reviewed-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250423143422.3747702-4-fabrizio.castro.jz@renesas.com Signed-off-by: Vinod Koul --- include/linux/irqchip/irq-renesas-rzv2h.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/irqchip/irq-renesas-rzv2h.h (limited to 'include') diff --git a/include/linux/irqchip/irq-renesas-rzv2h.h b/include/linux/irqchip/irq-renesas-rzv2h.h new file mode 100644 index 000000000000..618a60d2eac0 --- /dev/null +++ b/include/linux/irqchip/irq-renesas-rzv2h.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Renesas RZ/V2H(P) Interrupt Control Unit (ICU) + * + * Copyright (C) 2025 Renesas Electronics Corporation. + */ + +#ifndef __LINUX_IRQ_RENESAS_RZV2H +#define __LINUX_IRQ_RENESAS_RZV2H + +#include + +#define RZV2H_ICU_DMAC_REQ_NO_DEFAULT 0x3ff + +#ifdef CONFIG_RENESAS_RZV2H_ICU +void rzv2h_icu_register_dma_req(struct platform_device *icu_dev, u8 dmac_index, u8 dmac_channel, + u16 req_no); +#else +static inline void rzv2h_icu_register_dma_req(struct platform_device *icu_dev, u8 dmac_index, + u8 dmac_channel, u16 req_no) { } +#endif + +#endif /* __LINUX_IRQ_RENESAS_RZV2H */ -- cgit v1.2.3 From 9510b38dc0ba358c93cbf5ee7c28820afb85937b Mon Sep 17 00:00:00 2001 From: Erick Shepherd Date: Mon, 31 Mar 2025 17:13:37 -0500 Subject: mmc: Add quirk to disable DDR50 tuning Adds the MMC_QUIRK_NO_UHS_DDR50_TUNING quirk and updates mmc_execute_tuning() to return 0 if that quirk is set. This fixes an issue on certain Swissbit SD cards that do not support DDR50 tuning where tuning requests caused I/O errors to be thrown. Signed-off-by: Erick Shepherd Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20250331221337.1414534-1-erick.shepherd@ni.com Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 526fce581657..ddcdf23d731c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -329,6 +329,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ #define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ #define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY (1<<17) /* Disable broken SD poweroff notify support */ +#define MMC_QUIRK_NO_UHS_DDR50_TUNING (1<<18) /* Disable DDR50 tuning */ bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ -- cgit v1.2.3 From da012e1eb5372be518e2667c1afa00996bb076e6 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 1 Apr 2025 11:58:46 +0200 Subject: mmc: rename mmc_can_gpio_cd() to mmc_host_can_gpio_cd() mmc_can_* functions sometimes relate to the card and sometimes to the host. Make it obvious by renaming this function to include 'host'. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250401095847.29271-11-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- include/linux/mmc/slot-gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 1ed7b0d1e4f9..4f6a46c636ec 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -24,7 +24,7 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config); int mmc_gpio_set_cd_wake(struct mmc_host *host, bool on); void mmc_gpiod_request_cd_irq(struct mmc_host *host); -bool mmc_can_gpio_cd(struct mmc_host *host); +bool mmc_host_can_gpio_cd(struct mmc_host *host); bool mmc_can_gpio_ro(struct mmc_host *host); #endif -- cgit v1.2.3 From 2e1a26ed6b3864576720364c793e4abf0681fcf5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 1 Apr 2025 11:58:47 +0200 Subject: mmc: rename mmc_can_gpio_ro() to mmc_host_can_gpio_ro() mmc_can_* functions sometimes relate to the card and sometimes to the host. Make it obvious by renaming this function to include 'host'. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250401095847.29271-12-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- include/linux/mmc/slot-gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 4f6a46c636ec..23ac5696fa38 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -25,6 +25,6 @@ int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config); int mmc_gpio_set_cd_wake(struct mmc_host *host, bool on); void mmc_gpiod_request_cd_irq(struct mmc_host *host); bool mmc_host_can_gpio_cd(struct mmc_host *host); -bool mmc_can_gpio_ro(struct mmc_host *host); +bool mmc_host_can_gpio_ro(struct mmc_host *host); #endif -- cgit v1.2.3 From ac01fa73f5309a35eff83be61442a8891159b487 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 10 May 2025 16:37:30 -0400 Subject: tracepoint: Have tracepoints created with DECLARE_TRACE() have _tp suffix Most tracepoints in the kernel are created with TRACE_EVENT(). The TRACE_EVENT() macro (and DECLARE_EVENT_CLASS() and DEFINE_EVENT() where in reality, TRACE_EVENT() is just a helper macro that calls those other two macros), will create not only a tracepoint (the function trace_() used in the kernel), it also exposes the tracepoint to user space along with defining what fields will be saved by that tracepoint. There are a few places that tracepoints are created in the kernel that are not exposed to userspace via tracefs. They can only be accessed from code within the kernel. These tracepoints are created with DEFINE_TRACE() Most of these tracepoints end with "_tp". This is useful as when the developer sees that, they know that the tracepoint is for in-kernel only (meaning it can only be accessed inside the kernel, either directly by the kernel or indirectly via modules and BPF programs) and is not exposed to user space. Instead of making this only a process to add "_tp", enforce it by making the DECLARE_TRACE() append the "_tp" suffix to the tracepoint. This requires adding DECLARE_TRACE_EVENT() macros for the TRACE_EVENT() macro to use that keeps the original name. Link: https://lore.kernel.org/all/20250418083351.20a60e64@gandalf.local.home/ Cc: netdev Cc: Jiri Olsa Cc: Peter Zijlstra Cc: David Ahern Cc: Juri Lelli Cc: Breno Leitao Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Gabriele Monaco Cc: Masami Hiramatsu Link: https://lore.kernel.org/20250510163730.092fad5b@gandalf.local.home Acked-by: Mathieu Desnoyers Acked-by: Andrii Nakryiko Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 38 ++++++++++++++++++++++++++------------ include/trace/bpf_probe.h | 8 ++++---- include/trace/define_trace.h | 17 ++++++++++++++++- include/trace/events/sched.h | 30 +++++++++++++++--------------- include/trace/events/tcp.h | 2 +- 5 files changed, 62 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a351763e6965..826ce3f8e1f8 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -464,16 +464,30 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #endif #define DECLARE_TRACE(name, proto, args) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + __DECLARE_TRACE(name##_tp, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()), \ PARAMS(void *__data, proto)) #define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + __DECLARE_TRACE(name##_tp, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ PARAMS(void *__data, proto)) #define DECLARE_TRACE_SYSCALL(name, proto, args) \ + __DECLARE_TRACE_SYSCALL(name##_tp, PARAMS(proto), PARAMS(args), \ + PARAMS(void *__data, proto)) + +#define DECLARE_TRACE_EVENT(name, proto, args) \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()), \ + PARAMS(void *__data, proto)) + +#define DECLARE_TRACE_EVENT_CONDITION(name, proto, args, cond) \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ + PARAMS(void *__data, proto)) + +#define DECLARE_TRACE_EVENT_SYSCALL(name, proto, args) \ __DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args), \ PARAMS(void *__data, proto)) @@ -591,32 +605,32 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_CONDITION(template, name, proto, \ args, cond) \ - DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ + DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ assign, print, reg, unreg) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, \ + DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, \ assign, print, reg, unreg) \ - DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ + DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_CONDITION(name, proto, args, cond, \ struct, assign, print) \ - DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ + DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, \ print, reg, unreg) \ - DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args)) + DECLARE_TRACE_EVENT_SYSCALL(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FLAGS(event, flag) diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 183fa2aa2935..9391d54d3f12 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -119,14 +119,14 @@ static inline void bpf_test_buffer_##call(void) \ #undef DECLARE_TRACE #define DECLARE_TRACE(call, proto, args) \ - __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) \ - __DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), 0) + __BPF_DECLARE_TRACE(call##_tp, PARAMS(proto), PARAMS(args)) \ + __DEFINE_EVENT(call##_tp, call##_tp, PARAMS(proto), PARAMS(args), 0) #undef DECLARE_TRACE_WRITABLE #define DECLARE_TRACE_WRITABLE(call, proto, args, size) \ __CHECK_WRITABLE_BUF_SIZE(call, PARAMS(proto), PARAMS(args), size) \ - __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) \ - __DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), size) + __BPF_DECLARE_TRACE(call##_tp, PARAMS(proto), PARAMS(args)) \ + __DEFINE_EVENT(call##_tp, call##_tp, PARAMS(proto), PARAMS(args), size) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index ed52d0506c69..b2ba5a80583f 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -74,10 +74,18 @@ #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ - DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) + DEFINE_TRACE(name##_tp, PARAMS(proto), PARAMS(args)) #undef DECLARE_TRACE_CONDITION #define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ + DEFINE_TRACE(name##_tp, PARAMS(proto), PARAMS(args)) + +#undef DECLARE_TRACE_EVENT +#define DECLARE_TRACE_EVENT(name, proto, args) \ + DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) + +#undef DECLARE_TRACE_EVENT_CONDITION +#define DECLARE_TRACE_EVENT_CONDITION(name, proto, args, cond) \ DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) /* If requested, create helpers for calling these tracepoints from Rust. */ @@ -115,6 +123,11 @@ #undef DECLARE_TRACE_CONDITION #define DECLARE_TRACE_CONDITION(name, proto, args, cond) +#undef DECLARE_TRACE_EVENT +#define DECLARE_TRACE_EVENT(name, proto, args) +#undef DECLARE_TRACE_EVENT_CONDITION +#define DECLARE_TRACE_EVENT_CONDITION(name, proto, args, cond) + #ifdef TRACEPOINTS_ENABLED #include #include @@ -136,6 +149,8 @@ #undef TRACE_HEADER_MULTI_READ #undef DECLARE_TRACE #undef DECLARE_TRACE_CONDITION +#undef DECLARE_TRACE_EVENT +#undef DECLARE_TRACE_EVENT_CONDITION /* Only undef what we defined in this file */ #ifdef UNDEF_TRACE_INCLUDE_FILE diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8994e97d86c1..152fc8b37aa5 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -773,64 +773,64 @@ TRACE_EVENT(sched_wake_idle_without_ipi, * * Postfixed with _tp to make them easily identifiable in the code. */ -DECLARE_TRACE(pelt_cfs_tp, +DECLARE_TRACE(pelt_cfs, TP_PROTO(struct cfs_rq *cfs_rq), TP_ARGS(cfs_rq)); -DECLARE_TRACE(pelt_rt_tp, +DECLARE_TRACE(pelt_rt, TP_PROTO(struct rq *rq), TP_ARGS(rq)); -DECLARE_TRACE(pelt_dl_tp, +DECLARE_TRACE(pelt_dl, TP_PROTO(struct rq *rq), TP_ARGS(rq)); -DECLARE_TRACE(pelt_hw_tp, +DECLARE_TRACE(pelt_hw, TP_PROTO(struct rq *rq), TP_ARGS(rq)); -DECLARE_TRACE(pelt_irq_tp, +DECLARE_TRACE(pelt_irq, TP_PROTO(struct rq *rq), TP_ARGS(rq)); -DECLARE_TRACE(pelt_se_tp, +DECLARE_TRACE(pelt_se, TP_PROTO(struct sched_entity *se), TP_ARGS(se)); -DECLARE_TRACE(sched_cpu_capacity_tp, +DECLARE_TRACE(sched_cpu_capacity, TP_PROTO(struct rq *rq), TP_ARGS(rq)); -DECLARE_TRACE(sched_overutilized_tp, +DECLARE_TRACE(sched_overutilized, TP_PROTO(struct root_domain *rd, bool overutilized), TP_ARGS(rd, overutilized)); -DECLARE_TRACE(sched_util_est_cfs_tp, +DECLARE_TRACE(sched_util_est_cfs, TP_PROTO(struct cfs_rq *cfs_rq), TP_ARGS(cfs_rq)); -DECLARE_TRACE(sched_util_est_se_tp, +DECLARE_TRACE(sched_util_est_se, TP_PROTO(struct sched_entity *se), TP_ARGS(se)); -DECLARE_TRACE(sched_update_nr_running_tp, +DECLARE_TRACE(sched_update_nr_running, TP_PROTO(struct rq *rq, int change), TP_ARGS(rq, change)); -DECLARE_TRACE(sched_compute_energy_tp, +DECLARE_TRACE(sched_compute_energy, TP_PROTO(struct task_struct *p, int dst_cpu, unsigned long energy, unsigned long max_util, unsigned long busy_time), TP_ARGS(p, dst_cpu, energy, max_util, busy_time)); -DECLARE_TRACE(sched_entry_tp, +DECLARE_TRACE(sched_entry, TP_PROTO(bool preempt, unsigned long ip), TP_ARGS(preempt, ip)); -DECLARE_TRACE(sched_exit_tp, +DECLARE_TRACE(sched_exit, TP_PROTO(bool is_switch, unsigned long ip), TP_ARGS(is_switch, ip)); -DECLARE_TRACE_CONDITION(sched_set_state_tp, +DECLARE_TRACE_CONDITION(sched_set_state, TP_PROTO(struct task_struct *tsk, int state), TP_ARGS(tsk, state), TP_CONDITION(!!(tsk->__state) != !!state)); diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 1a40c41ff8c3..4f9fa1b5b89b 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -259,7 +259,7 @@ TRACE_EVENT(tcp_retransmit_synack, __entry->saddr_v6, __entry->daddr_v6) ); -DECLARE_TRACE(tcp_cwnd_reduction_tp, +DECLARE_TRACE(tcp_cwnd_reduction, TP_PROTO(const struct sock *sk, int newly_acked_sacked, int newly_lost, int flag), TP_ARGS(sk, newly_acked_sacked, newly_lost, flag) -- cgit v1.2.3 From 155fd6c3e2f02efdc71a9b62888942efc217aff0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 7 May 2025 13:34:58 -0400 Subject: tracing/sched: Use __string() instead of fixed lengths for task->comm The sched_switch and sched_waking events hardcoded the length of the comm it recorded because these events were created before the dynamic strings were implemented. Unfortunately, several other events copied this method. As the size of the comm may change in the future, make the string dynamic. The dynamic string requires a 4 byte meta data to hold the size and offset of the string. The amount stored in the ring buffer will then be the strlen(comm) + 5 (for the \n), and aligned to 4 bytes if there's no other strings. This means that a task comm can have up to 10 characters before it requires another 4 bytes in the ring buffer. Most tasks are usually less than that, so this should not be a problem, and it also allows the name to be extended over the TASK_COMM_LEN [1] Note, sched_switch and the sched_waking trace events still hardcode the length, as there is tooling that still requires that. An effort to update the tooling will be made to allow this to change in the future. [1] https://lore.kernel.org/all/20250507110444.963779-1-bhupesh@igalia.com/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Bhupesh Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Juri Lelli Link: https://lore.kernel.org/20250507133458.51bafd95@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- include/trace/events/sched.h | 94 ++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 152fc8b37aa5..fadc7592372b 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -20,16 +20,16 @@ TRACE_EVENT(sched_kthread_stop, TP_ARGS(t), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) + __string( comm, t->comm ) + __field( pid_t, pid ) ), TP_fast_assign( - memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = t->pid; ), - TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) + TP_printk("comm=%s pid=%d", __get_str(comm), __entry->pid) ); /* @@ -276,15 +276,15 @@ TRACE_EVENT(sched_migrate_task, TP_ARGS(p, dest_cpu), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, orig_cpu ) - __field( int, dest_cpu ) + __string( comm, p->comm ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) ), TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = p->pid; __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->orig_cpu = task_cpu(p); @@ -292,7 +292,7 @@ TRACE_EVENT(sched_migrate_task, ), TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", - __entry->comm, __entry->pid, __entry->prio, + __get_str(comm), __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu) ); @@ -303,19 +303,19 @@ DECLARE_EVENT_CLASS(sched_process_template, TP_ARGS(p), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) + __string( comm, p->comm ) + __field( pid_t, pid ) + __field( int, prio ) ), TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = p->pid; __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", - __entry->comm, __entry->pid, __entry->prio) + __get_str(comm), __entry->pid, __entry->prio) ); /* @@ -349,19 +349,19 @@ TRACE_EVENT(sched_process_wait, TP_ARGS(pid), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) + __string( comm, current->comm ) __field( pid_t, pid ) __field( int, prio ) ), TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = pid_nr(pid); __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", - __entry->comm, __entry->pid, __entry->prio) + __get_str(comm), __entry->pid, __entry->prio) ); /* @@ -374,22 +374,22 @@ TRACE_EVENT(sched_process_fork, TP_ARGS(parent, child), TP_STRUCT__entry( - __array( char, parent_comm, TASK_COMM_LEN ) - __field( pid_t, parent_pid ) - __array( char, child_comm, TASK_COMM_LEN ) - __field( pid_t, child_pid ) + __string( parent_comm, parent->comm ) + __field( pid_t, parent_pid ) + __string( child_comm, child->comm ) + __field( pid_t, child_pid ) ), TP_fast_assign( - memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __assign_str(parent_comm); __entry->parent_pid = parent->pid; - memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __assign_str(child_comm); __entry->child_pid = child->pid; ), TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid) + __get_str(parent_comm), __entry->parent_pid, + __get_str(child_comm), __entry->child_pid) ); /* @@ -473,19 +473,19 @@ DECLARE_EVENT_CLASS_SCHEDSTAT(sched_stat_template, TP_ARGS(__perf_task(tsk), __perf_count(delay)), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) + __string( comm, tsk->comm ) + __field( pid_t, pid ) + __field( u64, delay ) ), TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = tsk->pid; __entry->delay = delay; ), TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, + __get_str(comm), __entry->pid, (unsigned long long)__entry->delay) ); @@ -531,19 +531,19 @@ DECLARE_EVENT_CLASS(sched_stat_runtime, TP_ARGS(tsk, __perf_count(runtime)), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, runtime ) + __string( comm, tsk->comm ) + __field( pid_t, pid ) + __field( u64, runtime ) ), TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = tsk->pid; __entry->runtime = runtime; ), TP_printk("comm=%s pid=%d runtime=%Lu [ns]", - __entry->comm, __entry->pid, + __get_str(comm), __entry->pid, (unsigned long long)__entry->runtime) ); @@ -562,14 +562,14 @@ TRACE_EVENT(sched_pi_setprio, TP_ARGS(tsk, pi_task), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, oldprio ) - __field( int, newprio ) + __string( comm, tsk->comm ) + __field( pid_t, pid ) + __field( int, oldprio ) + __field( int, newprio ) ), TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = tsk->pid; __entry->oldprio = tsk->prio; __entry->newprio = pi_task ? @@ -579,7 +579,7 @@ TRACE_EVENT(sched_pi_setprio, ), TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", - __entry->comm, __entry->pid, + __get_str(comm), __entry->pid, __entry->oldprio, __entry->newprio) ); @@ -589,16 +589,16 @@ TRACE_EVENT(sched_process_hang, TP_ARGS(tsk), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) + __string( comm, tsk->comm ) + __field( pid_t, pid ) ), TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = tsk->pid; ), - TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) + TP_printk("comm=%s pid=%d", __get_str(comm), __entry->pid) ); #endif /* CONFIG_DETECT_HUNG_TASK */ -- cgit v1.2.3 From 7bd68ce21d39150d80806233965326359f517b78 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Mon, 12 May 2025 06:54:55 -0700 Subject: drm/gpusvm: Introduce devmem_only flag for allocation This commit adds a new flag, devmem_only, to the drm_gpusvm structure. The purpose of this flag is to ensure that the get_pages function allocates memory exclusively from the device's memory. If the allocation from device memory fails, the function will return an -EFAULT error. Required for shared CPU and GPU atomics on certain devices. v3: - s/vram_only/devmem_only/ Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250512135500.1405019-2-matthew.brost@intel.com (cherry picked from commit 8a9b978ebd47df9e0694c34748c2d6fa0c31eb4d) Signed-off-by: Lucas De Marchi --- include/drm/drm_gpusvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index df120b4d1f83..9fd25fc880a4 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -286,6 +286,7 @@ struct drm_gpusvm { * @in_notifier: entering from a MMU notifier * @read_only: operating on read-only memory * @devmem_possible: possible to use device memory + * @devmem_only: use only device memory * * Context that is DRM GPUSVM is operating in (i.e. user arguments). */ @@ -294,6 +295,7 @@ struct drm_gpusvm_ctx { unsigned int in_notifier :1; unsigned int read_only :1; unsigned int devmem_possible :1; + unsigned int devmem_only :1; }; int drm_gpusvm_init(struct drm_gpusvm *gpusvm, -- cgit v1.2.3 From 794f5493f518916380578f14d999de92b930b609 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 12 May 2025 06:54:56 -0700 Subject: drm/xe: Strict migration policy for atomic SVM faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixing GPU and CPU atomics does not work unless a strict migration policy of GPU atomics must be device memory. Enforce a policy of must be in VRAM with a retry loop of 3 attempts, if retry loop fails abort fault. Removing always_migrate_to_vram modparam as we now have real migration policy. v2: - Only retry migration on atomics - Drop alway migrate modparam v3: - Only set vram_only on DGFX (Himal) - Bail on get_pages failure if vram_only and retry count exceeded (Himal) - s/vram_only/devmem_only - Update xe_svm_range_is_valid to accept devmem_only argument v4: - Fix logic bug get_pages failure v5: - Fix commit message (Himal) - Mention removing always_migrate_to_vram in commit message (Lucas) - Fix xe_svm_range_is_valid to check for devmem pages - Bail on devmem_only && !migrate_devmem (Thomas) v6: - Add READ_ONCE barriers for opportunistic checks (Thomas) - Pair READ_ONCE with WRITE_ONCE (Thomas) v7: - Adjust comments (Thomas) Fixes: 2f118c949160 ("drm/xe: Add SVM VRAM migration") Cc: stable@vger.kernel.org Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Matthew Brost Acked-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20250512135500.1405019-3-matthew.brost@intel.com (cherry picked from commit a9ac0fa455b050d03e3032501368048fb284d318) Signed-off-by: Lucas De Marchi --- include/drm/drm_gpusvm.h | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 9fd25fc880a4..653d48dbe1c1 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -185,6 +185,31 @@ struct drm_gpusvm_notifier { } flags; }; +/** + * struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags + * + * @migrate_devmem: Flag indicating whether the range can be migrated to device memory + * @unmapped: Flag indicating if the range has been unmapped + * @partial_unmap: Flag indicating if the range has been partially unmapped + * @has_devmem_pages: Flag indicating if the range has devmem pages + * @has_dma_mapping: Flag indicating if the range has a DMA mapping + * @__flags: Flags for range in u16 form (used for READ_ONCE) + */ +struct drm_gpusvm_range_flags { + union { + struct { + /* All flags below must be set upon creation */ + u16 migrate_devmem : 1; + /* All flags below must be set / cleared under notifier lock */ + u16 unmapped : 1; + u16 partial_unmap : 1; + u16 has_devmem_pages : 1; + u16 has_dma_mapping : 1; + }; + u16 __flags; + }; +}; + /** * struct drm_gpusvm_range - Structure representing a GPU SVM range * @@ -198,11 +223,6 @@ struct drm_gpusvm_notifier { * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping. * Note this is assuming only one drm_pagemap per range is allowed. * @flags: Flags for range - * @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory - * @flags.unmapped: Flag indicating if the range has been unmapped - * @flags.partial_unmap: Flag indicating if the range has been partially unmapped - * @flags.has_devmem_pages: Flag indicating if the range has devmem pages - * @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping * * This structure represents a GPU SVM range used for tracking memory ranges * mapped in a DRM device. @@ -216,15 +236,7 @@ struct drm_gpusvm_range { unsigned long notifier_seq; struct drm_pagemap_device_addr *dma_addr; struct drm_pagemap *dpagemap; - struct { - /* All flags below must be set upon creation */ - u16 migrate_devmem : 1; - /* All flags below must be set / cleared under notifier lock */ - u16 unmapped : 1; - u16 partial_unmap : 1; - u16 has_devmem_pages : 1; - u16 has_dma_mapping : 1; - } flags; + struct drm_gpusvm_range_flags flags; }; /** -- cgit v1.2.3 From df8c37810b5a4a5c8bff1f880f59163c800a4985 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 12 May 2025 06:54:57 -0700 Subject: drm/gpusvm: Add timeslicing support to GPU SVM Add timeslicing support to GPU SVM which will guarantee the GPU a minimum execution time on piece of physical memory before migration back to CPU. Intended to implement strict migration policies which require memory to be in a certain placement for correct execution. Required for shared CPU and GPU atomics on certain devices. Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20250512135500.1405019-4-matthew.brost@intel.com (cherry picked from commit 8dc1812b5b3a42311d28eb385eed88e2053ad3cb) Signed-off-by: Lucas De Marchi --- include/drm/drm_gpusvm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 653d48dbe1c1..eaf704d3d05e 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -89,6 +89,7 @@ struct drm_gpusvm_devmem_ops { * @ops: Pointer to the operations structure for GPU SVM device memory * @dpagemap: The struct drm_pagemap of the pages this allocation belongs to. * @size: Size of device memory allocation + * @timeslice_expiration: Timeslice expiration in jiffies */ struct drm_gpusvm_devmem { struct device *dev; @@ -97,6 +98,7 @@ struct drm_gpusvm_devmem { const struct drm_gpusvm_devmem_ops *ops; struct drm_pagemap *dpagemap; size_t size; + u64 timeslice_expiration; }; /** @@ -295,6 +297,8 @@ struct drm_gpusvm { * @check_pages_threshold: Check CPU pages for present if chunk is less than or * equal to threshold. If not present, reduce chunk * size. + * @timeslice_ms: The timeslice MS which in minimum time a piece of memory + * remains with either exclusive GPU or CPU access. * @in_notifier: entering from a MMU notifier * @read_only: operating on read-only memory * @devmem_possible: possible to use device memory @@ -304,6 +308,7 @@ struct drm_gpusvm { */ struct drm_gpusvm_ctx { unsigned long check_pages_threshold; + unsigned long timeslice_ms; unsigned int in_notifier :1; unsigned int read_only :1; unsigned int devmem_possible :1; -- cgit v1.2.3 From 289c99bec7eed918ab37c62cbb29a2e3f58fb1fb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 13 May 2025 22:24:09 -0700 Subject: lib/crc32: add SPDX license identifier lib/crc32.c and include/linux/crc32.h got missed by the bulk SPDX conversion because of the nonstandard explanation of the license. However, crc32.c clearly states that it's licensed under the GNU General Public License, Version 2. And the comment in crc32.h clearly indicates that it's meant to have the same license as crc32.c. Therefore, apply SPDX-License-Identifier: GPL-2.0-only to both files. Reviewed-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250514052409.194822-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/crc32.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 69c2e8bb3782..569dc13f139f 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -1,7 +1,4 @@ -/* - * crc32.h - * See linux/lib/crc32.c for license and changes - */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _LINUX_CRC32_H #define _LINUX_CRC32_H -- cgit v1.2.3 From 539fbab37881e32ba6a708a100de6db19e1e7e7d Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 7 Apr 2025 15:28:05 +0300 Subject: tpm: Mask TPM RC in tpm2_start_auth_session() tpm2_start_auth_session() does not mask TPM RC correctly from the callers: [ 28.766528] tpm tpm0: A TPM error (2307) occurred start auth session Process TPM RCs inside tpm2_start_auth_session(), and map them to POSIX error codes. Cc: stable@vger.kernel.org # v6.10+ Fixes: 699e3efd6c64 ("tpm: Add HMAC session start and end functions") Reported-by: Herbert Xu Closes: https://lore.kernel.org/linux-integrity/Z_NgdRHuTKP6JK--@gondor.apana.org.au/ Reviewed-by: Stefano Garzarella Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 6c3125300c00..9ac9768cc8f7 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -257,6 +257,7 @@ enum tpm2_return_codes { TPM2_RC_TESTING = 0x090A, /* RC_WARN */ TPM2_RC_REFERENCE_H0 = 0x0910, TPM2_RC_RETRY = 0x0922, + TPM2_RC_SESSION_MEMORY = 0x0903, }; enum tpm2_command_codes { @@ -437,6 +438,24 @@ static inline u32 tpm2_rc_value(u32 rc) return (rc & BIT(7)) ? rc & 0xbf : rc; } +/* + * Convert a return value from tpm_transmit_cmd() to POSIX error code. + */ +static inline ssize_t tpm_ret_to_err(ssize_t ret) +{ + if (ret < 0) + return ret; + + switch (tpm2_rc_value(ret)) { + case TPM2_RC_SUCCESS: + return 0; + case TPM2_RC_SESSION_MEMORY: + return -ENOMEM; + default: + return -EFAULT; + } +} + #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_is_tpm2(struct tpm_chip *chip); -- cgit v1.2.3 From 2f661f71fda1fc0c42b7746ca5b7da529eb6b5be Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 4 Apr 2025 10:23:14 +0200 Subject: tpm: tis: Double the timeout B to 4s With some Infineon chips the timeouts in tpm_tis_send_data (both B and C) can reach up to about 2250 ms. Timeout C is retried since commit de9e33df7762 ("tpm, tpm_tis: Workaround failed command reception on Infineon devices") Timeout B still needs to be extended. The problem is most commonly encountered with context related operation such as load context/save context. These are issued directly by the kernel, and there is no retry logic for them. When a filesystem is set up to use the TPM for unlocking the boot fails, and restarting the userspace service is ineffective. This is likely because ignoring a load context/save context result puts the real TPM state and the TPM state expected by the kernel out of sync. Chips known to be affected: tpm_tis IFX1522:00: 2.0 TPM (device-id 0x1D, rev-id 54) Description: SLB9672 Firmware Revision: 15.22 tpm_tis MSFT0101:00: 2.0 TPM (device-id 0x1B, rev-id 22) Firmware Revision: 7.83 tpm_tis MSFT0101:00: 2.0 TPM (device-id 0x1A, rev-id 16) Firmware Revision: 5.63 Link: https://lore.kernel.org/linux-integrity/Z5pI07m0Muapyu9w@kitsune.suse.cz/ Signed-off-by: Michal Suchanek Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 9ac9768cc8f7..a3d8305e88a5 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -224,7 +224,7 @@ enum tpm2_const { enum tpm2_timeouts { TPM2_TIMEOUT_A = 750, - TPM2_TIMEOUT_B = 2000, + TPM2_TIMEOUT_B = 4000, TPM2_TIMEOUT_C = 200, TPM2_TIMEOUT_D = 30, TPM2_DURATION_SHORT = 20, -- cgit v1.2.3 From 45a442fe369e6c4e0b4aa9f63b31c3f2f9e2090e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 12 May 2025 17:06:04 -0700 Subject: Drivers: hv: vmbus: Remove vmbus_sendpacket_pagebuffer() With the netvsc driver changed to use vmbus_sendpacket_mpb_desc() instead of vmbus_sendpacket_pagebuffer(), the latter has no remaining callers. Remove it. Cc: # 6.1.x Signed-off-by: Michael Kelley Link: https://patch.msgid.link/20250513000604.1396-6-mhklinux@outlook.com Signed-off-by: Jakub Kicinski --- include/linux/hyperv.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 675959fb97ba..ea806cef4a0d 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1161,13 +1161,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel, enum vmbus_packet_type type, u32 flags); -extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, - void *buffer, - u32 bufferlen, - u64 requestid); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, -- cgit v1.2.3 From 8d9117009dd690f647a66912f429c96335069907 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 13 May 2025 13:23:31 +0200 Subject: fuse: don't allow signals to interrupt getdents copying When getting the directory contents, the entries are first fetched to a kernel buffer, then they are copied to userspace with dir_emit(). This second phase is non-blocking as long as the userspace buffer is not paged out, making it interruptible makes zero sense. Overload d_type as flags, since it only uses 4 bits from 32. Reviewed-by: Bernd Schubert Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/20250513112335.1473177-1-mszeredi@redhat.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index cf7208500cee..c8b5af17060e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2073,6 +2073,9 @@ struct dir_context { loff_t pos; }; +/* If OR-ed with d_type, pending signals are not checked */ +#define FILLDIR_FLAG_NOINTR 0x1000 + /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. -- cgit v1.2.3 From 132833405e61463d47d6badff1b8080b09b5808e Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 5 May 2025 19:54:39 +0530 Subject: PCI: Add debugfs support for exposing PTM context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Precision Time Management (PTM) mechanism defined in PCIe spec r6.0, sec 6.21 allows precise coordination of timing information across multiple components in a PCIe hierarchy with independent local time clocks. PCI core already supports enabling PTM in the root port and endpoint devices through PTM Extended Capability registers. But the PTM context supported by the PTM capable components such as Root Complex (RC) and Endpoint (EP) controllers were not exposed as of now. Part of the reason is that the spec doesn't define how the context information is exposed to the software and left it to the vendor implementation. So there is no standardized way to get access to the context information and each vendor have defined their own way. This commit adds debugfs support to expose the PTM context to userspace from both PCIe RC and EP controllers. Since the context information is exposed in a vendor specific way, the debugfs interface allows the controller drivers to implement callbacks for each attribute, to be called by the generic PTM driver. The Controller drivers are expected to call pcie_ptm_create_debugfs() to create the debugfs attributes for the PTM context and call pcie_ptm_destroy_debugfs() to destroy them. The drivers should also populate the relevant callbacks in the 'struct pcie_ptm_ops' structure based on the controller implementation. Below PTM context are exposed through debugfs: PCIe RC ======= 1. PTM Local clock 2. PTM T2 timestamp 3. PTM T3 timestamp 4. PTM Context valid PCIe EP ======= 1. PTM Local clock 2. PTM T1 timestamp 3. PTM T4 timestamp 4. PTM Master clock 5. PTM Context update Signed-off-by: Manivannan Sadhasivam [kwilczynski: fix overflow issue reported by Dan Carpenter from https://lore.kernel.org/linux-pci/b41c1754-c6b7-4805-9f14-7c643d6c5304@suswa.mountain] Signed-off-by: Krzysztof Wilczyński Link: https://patch.msgid.link/20250505-pcie-ptm-v4-1-02d26d51400b@linaro.org --- include/linux/pci.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e96..01a29076604f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1856,6 +1856,39 @@ static inline bool pci_aer_available(void) { return false; } bool pci_ats_disabled(void); +#define PCIE_PTM_CONTEXT_UPDATE_AUTO 0 +#define PCIE_PTM_CONTEXT_UPDATE_MANUAL 1 + +struct pcie_ptm_ops { + int (*check_capability)(void *drvdata); + int (*context_update_write)(void *drvdata, u8 mode); + int (*context_update_read)(void *drvdata, u8 *mode); + int (*context_valid_write)(void *drvdata, bool valid); + int (*context_valid_read)(void *drvdata, bool *valid); + int (*local_clock_read)(void *drvdata, u64 *clock); + int (*master_clock_read)(void *drvdata, u64 *clock); + int (*t1_read)(void *drvdata, u64 *clock); + int (*t2_read)(void *drvdata, u64 *clock); + int (*t3_read)(void *drvdata, u64 *clock); + int (*t4_read)(void *drvdata, u64 *clock); + + bool (*context_update_visible)(void *drvdata); + bool (*context_valid_visible)(void *drvdata); + bool (*local_clock_visible)(void *drvdata); + bool (*master_clock_visible)(void *drvdata); + bool (*t1_visible)(void *drvdata); + bool (*t2_visible)(void *drvdata); + bool (*t3_visible)(void *drvdata); + bool (*t4_visible)(void *drvdata); +}; + +struct pci_ptm_debugfs { + struct dentry *debugfs; + const struct pcie_ptm_ops *ops; + struct mutex lock; + void *pdata; +}; + #ifdef CONFIG_PCIE_PTM int pci_enable_ptm(struct pci_dev *dev, u8 *granularity); void pci_disable_ptm(struct pci_dev *dev); @@ -1868,6 +1901,18 @@ static inline bool pcie_ptm_enabled(struct pci_dev *dev) { return false; } #endif +#if IS_ENABLED(CONFIG_DEBUG_FS) && IS_ENABLED(CONFIG_PCIE_PTM) +struct pci_ptm_debugfs *pcie_ptm_create_debugfs(struct device *dev, void *pdata, + const struct pcie_ptm_ops *ops); +void pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs); +#else +static inline struct pci_ptm_debugfs +*pcie_ptm_create_debugfs(struct device *dev, void *pdata, + const struct pcie_ptm_ops *ops) { return NULL; } +static inline void +pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs) { } +#endif + void pci_cfg_access_lock(struct pci_dev *dev); bool pci_cfg_access_trylock(struct pci_dev *dev); void pci_cfg_access_unlock(struct pci_dev *dev); -- cgit v1.2.3 From e0410e956b97e8b50b2aa7b02ba70e5f09b31ebe Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 13 May 2025 17:10:08 +0200 Subject: readdir: supply dir_context.count as readdir buffer size hint This is a preparation for large readdir buffers in fuse. Simply setting the fuse buffer size to the userspace buffer size should work, the record sizes are similar (fuse's is slightly larger than libc's, so no overflow should ever happen). Signed-off-by: Miklos Szeredi Signed-off-by: Jaco Kroon Link: https://lore.kernel.org/20250513151012.1476536-1-mszeredi@redhat.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index c8b5af17060e..ddd54a664916 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2071,6 +2071,13 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, struct dir_context { filldir_t actor; loff_t pos; + /* + * Filesystems MUST NOT MODIFY count, but may use as a hint: + * 0 unknown + * > 0 space in buffer (assume at least one entry) + * INT_MAX unlimited + */ + int count; }; /* If OR-ed with d_type, pending signals are not checked */ -- cgit v1.2.3 From d8c5507cd140d9471472ece673e70250b957c595 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Tue, 13 May 2025 17:03:24 +0200 Subject: include/linux/fs.h: add inode_lock_killable() Prepare for making inode operations killable while they're waiting for the lock. Signed-off-by: Max Kellermann Link: https://lore.kernel.org/20250513150327.1373061-1-max.kellermann@ionos.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index ddd54a664916..82c6093b8582 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -867,6 +867,11 @@ static inline void inode_lock(struct inode *inode) down_write(&inode->i_rwsem); } +static inline __must_check int inode_lock_killable(struct inode *inode) +{ + return down_write_killable(&inode->i_rwsem); +} + static inline void inode_unlock(struct inode *inode) { up_write(&inode->i_rwsem); @@ -877,6 +882,11 @@ static inline void inode_lock_shared(struct inode *inode) down_read(&inode->i_rwsem); } +static inline __must_check int inode_lock_shared_killable(struct inode *inode) +{ + return down_read_killable(&inode->i_rwsem); +} + static inline void inode_unlock_shared(struct inode *inode) { up_read(&inode->i_rwsem); -- cgit v1.2.3 From c16608005ccb99fbde3a4cd96eab28e16f148abf Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 13 May 2025 11:19:22 +0300 Subject: net: Look for bonding slaves in the bond's network namespace Update the for_each_netdev_in_bond_rcu macro to iterate through network devices in the bond's network namespace instead of always using init_net. This change is safe because: 1. **Bond-Slave Namespace Relationship**: A bond device and its slaves must reside in the same network namespace. The bond device's namespace is established at creation time and cannot change. 2. **Slave Movement Implications**: Any attempt to move a slave device to a different namespace automatically removes it from the bond, as per kernel networking stack rules. This maintains the invariant that slaves must exist in the same namespace as their bond. This change is part of an effort to enable Link Aggregation (LAG) to work properly inside custom network namespaces. Previously, the macro would only find slave devices in the initial network namespace, preventing proper bonding functionality in custom namespaces. Signed-off-by: Shay Drory Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250513081922.525716-1-mbloch@nvidia.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 32a1e41636a9..9e3a2d8452d6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3268,7 +3268,7 @@ int call_netdevice_notifiers_info(unsigned long val, #define for_each_netdev_continue_rcu(net, d) \ list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_in_bond_rcu(bond, slave) \ - for_each_netdev_rcu(&init_net, slave) \ + for_each_netdev_rcu(dev_net_rcu(bond), slave) \ if (netdev_master_upper_dev_get_rcu(slave) == (bond)) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) -- cgit v1.2.3 From 5c41f6010ef8139bf19c63419498fd1d38d3ed2b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 28 Mar 2025 12:26:59 +0000 Subject: btrfs: remove EXTENT_UPTODATE io tree flag The EXTENT_UPTODATE io tree flag is now used only to mark ranges in the fs_info->excluded_extents as used by super blocks and not available for extent allocation (to prevent adding those ranges as free space in the in memory space caches). As we can use any flag for that purpose, and we are using EXTENT_DIRTY for the pinned extents io tree for example, remove the EXTENT_UPTODATE flag and use instead EXTENT_DIRTY for the excluded extents io tree. Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 3efc00cc1bcd..13bd5f36a468 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -143,7 +143,6 @@ FLUSH_STATES #define EXTENT_FLAGS \ { EXTENT_DIRTY, "DIRTY"}, \ - { EXTENT_UPTODATE, "UPTODATE"}, \ { EXTENT_LOCKED, "LOCKED"}, \ { EXTENT_NEW, "NEW"}, \ { EXTENT_DELALLOC, "DELALLOC"}, \ -- cgit v1.2.3 From 0f987c099d22c3b8c7d94fd13f957792e46f79c9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 3 Apr 2025 16:23:41 +0100 Subject: btrfs: tracepoints: use btrfs_root_id() to get the id of a root Instead of open coding btrfs_root_id() to get the ID of a root, use the helper in the trace points, which also makes the code less verbose. Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 50 ++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 13bd5f36a468..5f3aabae642a 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -223,8 +223,7 @@ DECLARE_EVENT_CLASS(btrfs__inode, __entry->generation = BTRFS_I(inode)->generation; __entry->last_trans = BTRFS_I(inode)->last_trans; __entry->logged_trans = BTRFS_I(inode)->logged_trans; - __entry->root_objectid = - BTRFS_I(inode)->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); ), TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%llu blocks=%llu " @@ -296,7 +295,7 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->ino = btrfs_ino(inode); __entry->start = map->start; __entry->len = map->len; @@ -375,7 +374,7 @@ DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular, ), TP_fast_assign_btrfs(bi->root->fs_info, - __entry->root_obj = bi->root->root_key.objectid; + __entry->root_obj = btrfs_root_id(bi->root); __entry->ino = btrfs_ino(bi); __entry->isize = bi->vfs_inode.i_size; __entry->disk_isize = bi->disk_i_size; @@ -426,7 +425,7 @@ DECLARE_EVENT_CLASS( TP_fast_assign_btrfs( bi->root->fs_info, - __entry->root_obj = bi->root->root_key.objectid; + __entry->root_obj = btrfs_root_id(bi->root); __entry->ino = btrfs_ino(bi); __entry->isize = bi->vfs_inode.i_size; __entry->disk_isize = bi->disk_i_size; @@ -526,7 +525,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent, __entry->flags = ordered->flags; __entry->compress_type = ordered->compress_type; __entry->refs = refcount_read(&ordered->refs); - __entry->root_objectid = inode->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(inode->root); __entry->truncated_len = ordered->truncated_len; ), @@ -663,7 +662,7 @@ TRACE_EVENT(btrfs_finish_ordered_extent, __entry->start = start; __entry->len = len; __entry->uptodate = uptodate; - __entry->root_objectid = inode->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(inode->root); ), TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu uptodate=%d", @@ -704,8 +703,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage, __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; __entry->writeback_index = inode->i_mapping->writeback_index; - __entry->root_objectid = - BTRFS_I(inode)->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); ), TP_printk_btrfs("root=%llu(%s) ino=%llu page_index=%lu " @@ -749,7 +747,7 @@ TRACE_EVENT(btrfs_writepage_end_io_hook, __entry->start = start; __entry->end = end; __entry->uptodate = uptodate; - __entry->root_objectid = inode->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(inode->root); ), TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu end=%llu uptodate=%d", @@ -779,8 +777,7 @@ TRACE_EVENT(btrfs_sync_file, __entry->ino = btrfs_ino(BTRFS_I(inode)); __entry->parent = btrfs_ino(BTRFS_I(d_inode(dentry->d_parent))); __entry->datasync = datasync; - __entry->root_objectid = - BTRFS_I(inode)->root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); ), TP_printk_btrfs("root=%llu(%s) ino=%llu parent=%llu datasync=%d", @@ -1051,7 +1048,7 @@ DECLARE_EVENT_CLASS(btrfs__chunk, __entry->sub_stripes = map->sub_stripes; __entry->offset = offset; __entry->size = size; - __entry->root_objectid = fs_info->chunk_root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(fs_info->chunk_root); ), TP_printk_btrfs("root=%llu(%s) offset=%llu size=%llu " @@ -1096,7 +1093,7 @@ TRACE_EVENT(btrfs_cow_block, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->buf_start = buf->start; __entry->refs = atomic_read(&buf->refs); __entry->cow_start = cow->start; @@ -1254,7 +1251,7 @@ TRACE_EVENT(find_free_extent, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->num_bytes = ffe_ctl->num_bytes; __entry->empty_size = ffe_ctl->empty_size; __entry->flags = ffe_ctl->flags; @@ -1283,7 +1280,7 @@ TRACE_EVENT(find_free_extent_search_loop, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->num_bytes = ffe_ctl->num_bytes; __entry->empty_size = ffe_ctl->empty_size; __entry->flags = ffe_ctl->flags; @@ -1317,7 +1314,7 @@ TRACE_EVENT(find_free_extent_have_block_group, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->num_bytes = ffe_ctl->num_bytes; __entry->empty_size = ffe_ctl->empty_size; __entry->flags = ffe_ctl->flags; @@ -1671,8 +1668,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data, ), TP_fast_assign_btrfs(btrfs_sb(inode->i_sb), - __entry->rootid = - BTRFS_I(inode)->root->root_key.objectid; + __entry->rootid = btrfs_root_id(BTRFS_I(inode)->root); __entry->ino = btrfs_ino(BTRFS_I(inode)); __entry->start = start; __entry->len = len; @@ -1865,7 +1861,7 @@ TRACE_EVENT(qgroup_meta_reserve, ), TP_fast_assign_btrfs(root->fs_info, - __entry->refroot = root->root_key.objectid; + __entry->refroot = btrfs_root_id(root); __entry->diff = diff; __entry->type = type; ), @@ -1887,7 +1883,7 @@ TRACE_EVENT(qgroup_meta_convert, ), TP_fast_assign_btrfs(root->fs_info, - __entry->refroot = root->root_key.objectid; + __entry->refroot = btrfs_root_id(root); __entry->diff = diff; ), @@ -1911,7 +1907,7 @@ TRACE_EVENT(qgroup_meta_free_all_pertrans, ), TP_fast_assign_btrfs(root->fs_info, - __entry->refroot = root->root_key.objectid; + __entry->refroot = btrfs_root_id(root); spin_lock(&root->qgroup_meta_rsv_lock); __entry->diff = -(s64)root->qgroup_meta_rsv_pertrans; spin_unlock(&root->qgroup_meta_rsv_lock); @@ -1993,7 +1989,7 @@ TRACE_EVENT(btrfs_inode_mod_outstanding_extents, ), TP_fast_assign_btrfs(root->fs_info, - __entry->root_objectid = root->root_key.objectid; + __entry->root_objectid = btrfs_root_id(root); __entry->ino = ino; __entry->mod = mod; __entry->outstanding = outstanding; @@ -2078,7 +2074,7 @@ TRACE_EVENT(btrfs_set_extent_bit, __entry->owner = tree->owner; __entry->ino = inode ? btrfs_ino(inode) : 0; - __entry->rootid = inode ? inode->root->root_key.objectid : 0; + __entry->rootid = inode ? btrfs_root_id(inode->root) : 0; __entry->start = start; __entry->len = len; __entry->set_bits = set_bits; @@ -2111,7 +2107,7 @@ TRACE_EVENT(btrfs_clear_extent_bit, __entry->owner = tree->owner; __entry->ino = inode ? btrfs_ino(inode) : 0; - __entry->rootid = inode ? inode->root->root_key.objectid : 0; + __entry->rootid = inode ? btrfs_root_id(inode->root) : 0; __entry->start = start; __entry->len = len; __entry->clear_bits = clear_bits; @@ -2145,7 +2141,7 @@ TRACE_EVENT(btrfs_convert_extent_bit, __entry->owner = tree->owner; __entry->ino = inode ? btrfs_ino(inode) : 0; - __entry->rootid = inode ? inode->root->root_key.objectid : 0; + __entry->rootid = inode ? btrfs_root_id(inode->root) : 0; __entry->start = start; __entry->len = len; __entry->set_bits = set_bits; @@ -2620,7 +2616,7 @@ TRACE_EVENT(btrfs_extent_map_shrinker_remove_em, TP_fast_assign_btrfs(inode->root->fs_info, __entry->ino = btrfs_ino(inode); - __entry->root_id = inode->root->root_key.objectid; + __entry->root_id = btrfs_root_id(inode->root); __entry->start = em->start; __entry->len = em->len; __entry->flags = em->flags; -- cgit v1.2.3 From 41708a4c23500ebd024d66adaafe0e6022df29a9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 7 Apr 2025 17:52:22 +0100 Subject: btrfs: add btrfs prefix to trace events for extent state alloc and free These trace events don't have the 'btrfs_' prefix in their name, unlike the other trace events from extent-io-tree.c. So add the prefix to make them consistent and follow coding style conventions too. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 5f3aabae642a..60f9a6ca54f8 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1476,7 +1476,7 @@ TRACE_EVENT(btrfs_setup_cluster, ); struct extent_state; -TRACE_EVENT(alloc_extent_state, +TRACE_EVENT(btrfs_alloc_extent_state, TP_PROTO(const struct extent_state *state, gfp_t mask, unsigned long IP), @@ -1499,7 +1499,7 @@ TRACE_EVENT(alloc_extent_state, show_gfp_flags(__entry->mask), __entry->ip) ); -TRACE_EVENT(free_extent_state, +TRACE_EVENT(btrfs_free_extent_state, TP_PROTO(const struct extent_state *state, unsigned long IP), -- cgit v1.2.3 From 02c340c2781177242386ac5f31cc498a6929ed44 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 4 Apr 2025 11:09:07 +0100 Subject: btrfs: rename the functions to get inode and fs_info from an extent io tree These functions are exported so they should have a 'btrfs_' prefix by convention, to make it clear they are btrfs specific and to avoid collisions with functions from elsewhere in the kernel. So add a 'btrfs_' prefix to their name to make it clear they are from btrfs. Also remove the 'const' suffix from extent_io_tree_to_inode_const() since there's no non-const variant anymore and makes the naming consistent with extent_io_tree_to_fs_info() (no 'const' suffix and returns a const pointer). Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 60f9a6ca54f8..8edbc0379a01 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2069,8 +2069,8 @@ TRACE_EVENT(btrfs_set_extent_bit, __field( unsigned, set_bits) ), - TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree), - const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree); + TP_fast_assign_btrfs(btrfs_extent_io_tree_to_fs_info(tree), + const struct btrfs_inode *inode = btrfs_extent_io_tree_to_inode(tree); __entry->owner = tree->owner; __entry->ino = inode ? btrfs_ino(inode) : 0; @@ -2102,8 +2102,8 @@ TRACE_EVENT(btrfs_clear_extent_bit, __field( unsigned, clear_bits) ), - TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree), - const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree); + TP_fast_assign_btrfs(btrfs_extent_io_tree_to_fs_info(tree), + const struct btrfs_inode *inode = btrfs_extent_io_tree_to_inode(tree); __entry->owner = tree->owner; __entry->ino = inode ? btrfs_ino(inode) : 0; @@ -2136,8 +2136,8 @@ TRACE_EVENT(btrfs_convert_extent_bit, __field( unsigned, clear_bits) ), - TP_fast_assign_btrfs(extent_io_tree_to_fs_info(tree), - const struct btrfs_inode *inode = extent_io_tree_to_inode_const(tree); + TP_fast_assign_btrfs(btrfs_extent_io_tree_to_fs_info(tree), + const struct btrfs_inode *inode = btrfs_extent_io_tree_to_inode(tree); __entry->owner = tree->owner; __entry->ino = inode ? btrfs_ino(inode) : 0; -- cgit v1.2.3 From 81eb6ce8b55a0f6757e9f87702bd6437ea834c65 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 8 Apr 2025 12:01:06 +0100 Subject: btrfs: tracepoints: add btrfs prefix to names where it's missing Most of our tracepoints have the 'btrfs_' prefix in their names but a few of them are missing, making it inconsistent. So add the prefix to the ones that are missing it, creating consistency, making it clear for users these are btrfs tracepoints and eventually avoid name collisions with other tracepoints defined by other kernel subsystems. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 8edbc0379a01..690541892f17 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1236,7 +1236,7 @@ DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_free, TP_ARGS(fs_info, start, len) ); -TRACE_EVENT(find_free_extent, +TRACE_EVENT(btrfs_find_free_extent, TP_PROTO(const struct btrfs_root *root, const struct find_free_extent_ctl *ffe_ctl), @@ -1264,7 +1264,7 @@ TRACE_EVENT(find_free_extent, BTRFS_GROUP_FLAGS)) ); -TRACE_EVENT(find_free_extent_search_loop, +TRACE_EVENT(btrfs_find_free_extent_search_loop, TP_PROTO(const struct btrfs_root *root, const struct find_free_extent_ctl *ffe_ctl), @@ -1294,7 +1294,7 @@ TRACE_EVENT(find_free_extent_search_loop, __entry->loop) ); -TRACE_EVENT(find_free_extent_have_block_group, +TRACE_EVENT(btrfs_find_free_extent_have_block_group, TP_PROTO(const struct btrfs_root *root, const struct find_free_extent_ctl *ffe_ctl, @@ -1739,7 +1739,7 @@ DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_trace_extent, TP_ARGS(fs_info, rec, bytenr) ); -TRACE_EVENT(qgroup_num_dirty_extents, +TRACE_EVENT(btrfs_qgroup_num_dirty_extents, TP_PROTO(const struct btrfs_fs_info *fs_info, u64 transid, u64 num_dirty_extents), @@ -1793,7 +1793,7 @@ TRACE_EVENT(btrfs_qgroup_account_extent, __entry->nr_new_roots) ); -TRACE_EVENT(qgroup_update_counters, +TRACE_EVENT(btrfs_qgroup_update_counters, TP_PROTO(const struct btrfs_fs_info *fs_info, const struct btrfs_qgroup *qgroup, @@ -1822,7 +1822,7 @@ TRACE_EVENT(qgroup_update_counters, __entry->cur_old_count, __entry->cur_new_count) ); -TRACE_EVENT(qgroup_update_reserve, +TRACE_EVENT(btrfs_qgroup_update_reserve, TP_PROTO(const struct btrfs_fs_info *fs_info, const struct btrfs_qgroup *qgroup, s64 diff, int type), @@ -1848,7 +1848,7 @@ TRACE_EVENT(qgroup_update_reserve, __entry->cur_reserved, __entry->diff) ); -TRACE_EVENT(qgroup_meta_reserve, +TRACE_EVENT(btrfs_qgroup_meta_reserve, TP_PROTO(const struct btrfs_root *root, s64 diff, int type), @@ -1871,7 +1871,7 @@ TRACE_EVENT(qgroup_meta_reserve, __print_symbolic(__entry->type, QGROUP_RSV_TYPES), __entry->diff) ); -TRACE_EVENT(qgroup_meta_convert, +TRACE_EVENT(btrfs_qgroup_meta_convert, TP_PROTO(const struct btrfs_root *root, s64 diff), @@ -1894,7 +1894,7 @@ TRACE_EVENT(qgroup_meta_convert, __entry->diff) ); -TRACE_EVENT(qgroup_meta_free_all_pertrans, +TRACE_EVENT(btrfs_qgroup_meta_free_all_pertrans, TP_PROTO(struct btrfs_root *root), -- cgit v1.2.3 From cff6df108b39b0433aea16c35ee806ffca662bee Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 8 Apr 2025 12:08:49 +0100 Subject: btrfs: tracepoints: remove no longer used tracepoints for eb locking There are several tracepoints for extent buffer locks that are not used anymore: * btrfs_tree_read_unlock_blocking * btrfs_set_lock_blocking_read * btrfs_set_lock_blocking_write * btrfs_tree_read_lock_atomic These stopped being used after we switched extent buffer locks from a custom implementation to rw semaphores in commit 196d59ab9ccc ("btrfs: switch extent buffer tree lock to rw_semaphore"). Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 690541892f17..bebc252db865 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2336,11 +2336,7 @@ DEFINE_EVENT(btrfs_locking_events, name, \ DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_unlock); DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock); -DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock_blocking); -DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_read); -DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_write); DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock); -DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic); DECLARE_EVENT_CLASS(btrfs__space_info_update, -- cgit v1.2.3 From b9eef3391de028fdd88fd7a2f81a4834fc98c9ac Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 May 2025 11:27:26 +0200 Subject: xdp: Use nested-BH locking for system_page_pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit system_page_pool is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Make a struct with a page_pool member (original system_page_pool) and a local_lock_t and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Cc: Andrew Lunn Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jesper Dangaard Brouer Cc: John Fastabend Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250512092736.229935-6-bigeasy@linutronix.de Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9e3a2d8452d6..73a97cf1bbce 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3503,7 +3503,12 @@ struct softnet_data { }; DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); -DECLARE_PER_CPU(struct page_pool *, system_page_pool); + +struct page_pool_bh { + struct page_pool *pool; + local_lock_t bh_lock; +}; +DECLARE_PER_CPU(struct page_pool_bh, system_page_pool); #ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) -- cgit v1.2.3 From 7fe70c06a182a140be9996b02256d907e114479a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 May 2025 11:27:31 +0200 Subject: net/sched: act_mirred: Move the recursion counter struct netdev_xmit mirred_nest_level is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Move mirred_nest_level to struct netdev_xmit as u8, provide wrappers. Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Juri Lelli Link: https://patch.msgid.link/20250512092736.229935-11-bigeasy@linutronix.de Signed-off-by: Paolo Abeni --- include/linux/netdevice_xmit.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h index 38325e070296..848735b3a7c0 100644 --- a/include/linux/netdevice_xmit.h +++ b/include/linux/netdevice_xmit.h @@ -8,6 +8,9 @@ struct netdev_xmit { #ifdef CONFIG_NET_EGRESS u8 skip_txqueue; #endif +#if IS_ENABLED(CONFIG_NET_ACT_MIRRED) + u8 sched_mirred_nest; +#endif }; #endif -- cgit v1.2.3 From ce45dc4bb22e96b59a07e19b67e915d99dd5281b Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 25 Apr 2025 11:24:22 +0200 Subject: PCI: Limit visibility of match_driver flag to PCI core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 58d9a38f6fac ("PCI: Skip attaching driver in device_add()"), PCI enumeration is split into two steps: In the first step, all devices are published in sysfs with device_add(). In the second step, drivers are bound to the devices with device_attach(). To delay driver binding until the second step, a "bool match_driver" in struct pci_dev is used. Instead of a bool, use a bit in the "unsigned long priv_flags" to shrink struct pci_dev a little and prevent use of the bool outside the PCI core (as has happened with commit cbbc00be2ce3 ("iommu/amd: Prevent binding other PCI drivers to IOMMU PCI devices")). Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Link: https://patch.msgid.link/d22a9e5b81d6bd8dd1837607d6156679b3b1199c.1745572340.git.lukas@wunner.de --- include/linux/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index d26e6611bd00..053d9dd494d4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -423,8 +423,6 @@ struct pci_dev { struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ struct resource driver_exclusive_resource; /* driver exclusive resource ranges */ - bool match_driver; /* Skip attaching driver */ - unsigned int transparent:1; /* Subtractive decode bridge */ unsigned int io_window:1; /* Bridge has I/O window */ unsigned int pref_window:1; /* Bridge has pref mem window */ -- cgit v1.2.3 From 241b9b584d50e01f7eb50c0555aa570502bcc8c5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 May 2025 13:50:33 -0700 Subject: dm-zone: Use bdev_*() helper functions where applicable Improve code readability by using bdev_is_zone_aligned() and bdev_offset_from_zone_start() where applicable. No functionality has been changed. This patch is a reworked version of a patch from Pankaj Raghav. See also https://lore.kernel.org/linux-block/20220923173618.6899-11-p.raghav@samsung.com/. Cc: Damien Le Moal Cc: Pankaj Raghav Signed-off-by: Bart Van Assche Reviewed-by: Damien Le Moal Signed-off-by: Mikulas Patocka --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e39c45bc0a97..edc941ec8fdd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1419,6 +1419,13 @@ static inline bool bdev_is_zone_start(struct block_device *bdev, return bdev_offset_from_zone_start(bdev, sector) == 0; } +/* Check whether @sector is a multiple of the zone size. */ +static inline bool bdev_is_zone_aligned(struct block_device *bdev, + sector_t sector) +{ + return bdev_is_zone_start(bdev, sector); +} + /** * bdev_zone_is_seq - check if a sector belongs to a sequential write zone * @bdev: block device to check -- cgit v1.2.3 From 18049c8cff9cc89daadc4df6975f7d9069638926 Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Thu, 8 May 2025 16:26:42 -0700 Subject: perf/aux: Allocate non-contiguous AUX pages by default perf always allocates contiguous AUX pages based on aux_watermark. However, this contiguous allocation doesn't benefit all PMUs. For instance, ARM SPE and TRBE operate with virtual pages, and Coresight ETR allocates a separate buffer. For these PMUs, allocating contiguous AUX pages unnecessarily exacerbates memory fragmentation. This fragmentation can prevent their use on long-running devices. This patch modifies the perf driver to be memory-friendly by default, by allocating non-contiguous AUX pages. For PMUs requiring contiguous pages (Intel BTS and some Intel PT), the existing PERF_PMU_CAP_AUX_NO_SG capability can be used. For PMUs that don't require but can benefit from contiguous pages (some Intel PT), a new capability, PERF_PMU_CAP_AUX_PREFER_LARGE, is added to maintain their existing behavior. Signed-off-by: Yabin Cui Signed-off-by: Ingo Molnar Reviewed-by: James Clark Reviewed-by: Anshuman Khandual Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20250508232642.148767-1-yabinc@google.com --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 947ad12dfdbe..a96c00e2ceca 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -303,6 +303,7 @@ struct perf_event_pmu_context; #define PERF_PMU_CAP_AUX_OUTPUT 0x0080 #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 #define PERF_PMU_CAP_AUX_PAUSE 0x0200 +#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400 /** * pmu::scope -- cgit v1.2.3 From 189572bf4e005431051319def435ac4b7e4489ca Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Thu, 15 May 2025 16:58:31 +0000 Subject: cpumask: Relax cpumask_any_but() Similarly to other cpumask search functions, accept -1, and consider it as 'any CPU' hint. This helps users to avoid coding special cases. Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: James Morse Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Tested-by: James Morse Tested-by: Tony Luck Tested-by: Fenghua Yu Link: https://lore.kernel.org/20250515165855.31452-2-james.morse@arm.com --- include/linux/cpumask.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f9a868384083..a3ee875df508 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -413,14 +413,18 @@ unsigned int cpumask_next_wrap(int n, const struct cpumask *src) * @cpu: the cpu to ignore. * * Often used to find any cpu but smp_processor_id() in a mask. + * If @cpu == -1, the function is equivalent to cpumask_any(). * Return: >= nr_cpu_ids if no cpus set. */ static __always_inline -unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) +unsigned int cpumask_any_but(const struct cpumask *mask, int cpu) { unsigned int i; - cpumask_check(cpu); + /* -1 is a legal arg here. */ + if (cpu != -1) + cpumask_check(cpu); + for_each_cpu(i, mask) if (i != cpu) break; @@ -433,16 +437,20 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) * @mask2: the second input cpumask * @cpu: the cpu to ignore * + * If @cpu == -1, the function is equivalent to cpumask_any_and(). * Returns >= nr_cpu_ids if no cpus set. */ static __always_inline unsigned int cpumask_any_and_but(const struct cpumask *mask1, const struct cpumask *mask2, - unsigned int cpu) + int cpu) { unsigned int i; - cpumask_check(cpu); + /* -1 is a legal arg here. */ + if (cpu != -1) + cpumask_check(cpu); + i = cpumask_first_and(mask1, mask2); if (i != cpu) return i; -- cgit v1.2.3 From 13f0a02bf4c1c5888c736cedef9ca50de666adb3 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Thu, 15 May 2025 16:58:32 +0000 Subject: find: Add find_first_andnot_bit() The function helps to implement cpumask_andnot() APIs. Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: James Morse Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Tested-by: James Morse Tested-by: Tony Luck Tested-by: Fenghua Yu Link: https://lore.kernel.org/20250515165855.31452-3-james.morse@arm.com --- include/linux/find.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/find.h b/include/linux/find.h index 68685714bc18..5a2c267ea7f9 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -29,6 +29,8 @@ unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsign unsigned long n); extern unsigned long _find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size); +unsigned long _find_first_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size); unsigned long _find_first_and_and_bit(const unsigned long *addr1, const unsigned long *addr2, const unsigned long *addr3, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); @@ -347,6 +349,29 @@ unsigned long find_first_and_bit(const unsigned long *addr1, } #endif +/** + * find_first_andnot_bit - find the first bit set in 1st memory region and unset in 2nd + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the first set bit + * If no bits are set, returns >= @size. + */ +static __always_inline +unsigned long find_first_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_andnot_bit(addr1, addr2, size); +} + /** * find_first_and_and_bit - find the first set bit in 3 memory regions * @addr1: The first address to base the search on -- cgit v1.2.3 From 5da703ef4e4a82b2f9a00f2b3a1eae5657e68a92 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Thu, 15 May 2025 16:58:33 +0000 Subject: cpumask: Add cpumask_{first,next}_andnot() API With the lack of the functions, client code has to abuse less efficient cpumask_nth(). Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: James Morse Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: James Morse Tested-by: Tony Luck Link: https://lore.kernel.org/20250515165855.31452-4-james.morse@arm.com --- include/linux/cpumask.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index a3ee875df508..6a569c7534db 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -178,6 +178,19 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } +/** + * cpumask_first_andnot - return the first cpu from *srcp1 & ~*srcp2 + * @srcp1: the first input + * @srcp2: the second input + * + * Return: >= nr_cpu_ids if no such cpu found. + */ +static __always_inline +unsigned int cpumask_first_andnot(const struct cpumask *srcp1, const struct cpumask *srcp2) +{ + return find_first_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); +} + /** * cpumask_first_and_and - return the first cpu from *srcp1 & *srcp2 & *srcp3 * @srcp1: the first input @@ -284,6 +297,25 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, small_cpumask_bits, n + 1); } +/** + * cpumask_next_andnot - get the next cpu in *src1p & ~*src2p + * @n: the cpu prior to the place to search (i.e. return will be > @n) + * @src1p: the first cpumask pointer + * @src2p: the second cpumask pointer + * + * Return: >= nr_cpu_ids if no further cpus set in both. + */ +static __always_inline +unsigned int cpumask_next_andnot(int n, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_andnot_bit(cpumask_bits(src1p), cpumask_bits(src2p), + small_cpumask_bits, n + 1); +} + /** * cpumask_next_and_wrap - get the next cpu in *src1p & *src2p, starting from * @n+1. If nothing found, wrap around and start from @@ -458,6 +490,33 @@ unsigned int cpumask_any_and_but(const struct cpumask *mask1, return cpumask_next_and(cpu, mask1, mask2); } +/** + * cpumask_any_andnot_but - pick an arbitrary cpu from *mask1 & ~*mask2, but not this one. + * @mask1: the first input cpumask + * @mask2: the second input cpumask + * @cpu: the cpu to ignore + * + * If @cpu == -1, the function returns the first matching cpu. + * Returns >= nr_cpu_ids if no cpus set. + */ +static __always_inline +unsigned int cpumask_any_andnot_but(const struct cpumask *mask1, + const struct cpumask *mask2, + int cpu) +{ + unsigned int i; + + /* -1 is a legal arg here. */ + if (cpu != -1) + cpumask_check(cpu); + + i = cpumask_first_andnot(mask1, mask2); + if (i != cpu) + return i; + + return cpumask_next_andnot(cpu, mask1, mask2); +} + /** * cpumask_nth - get the Nth cpu in a cpumask * @srcp: the cpumask pointer -- cgit v1.2.3 From c1269d3d12b88151ee4c109624b5022d53a11738 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 May 2025 19:39:09 +0000 Subject: tcp: add tcp_rcvbuf_grow() tracepoint Provide a new tracepoint to better understand tcp_rcv_space_adjust() (currently broken) behavior. Call it only when tcp_rcv_space_adjust() has a chance to make a change. I chose to leave trace_tcp_rcv_space_adjust() as is, because commit 6163849d289b ("net: introduce a new tracepoint for tcp_rcv_space_adjust") intent was to get it called after each data delivery to user space. Tested: Pair of hosts in the same rack. Ideally, sk->sk_rcvbuf should be kept small. echo "4096 131072 33554432" >/proc/sys/net/ipv4/tcp_rmem ./netserver perf record -C10 -e tcp:tcp_rcvbuf_grow sleep 30 Trace for a TS enabled TCP flow (with standard ms granularity) perf script // We can see that sk_rcvbuf is growing very fast to tcp_mem[2] 260.500397: tcp:tcp_rcvbuf_grow: time=291 rtt_us=274 copied=110592 inq=0 space=41080 ooo=0 scaling_ratio=230 rcvbuf=131072 ... 260.501333: tcp:tcp_rcvbuf_grow: time=555 rtt_us=364 copied=333824 inq=0 space=110592 ooo=0 scaling_ratio=230 rcvbuf=1399144 ... 260.501664: tcp:tcp_rcvbuf_grow: time=331 rtt_us=330 copied=798720 inq=0 space=333824 ooo=0 scaling_ratio=230 rcvbuf=4110551 ... 260.502003: tcp:tcp_rcvbuf_grow: time=340 rtt_us=330 copied=1040384 inq=49152 space=798720 ooo=0 scaling_ratio=230 rcvbuf=7006410 ... 260.502483: tcp:tcp_rcvbuf_grow: time=479 rtt_us=330 copied=2658304 inq=49152 space=1040384 ooo=0 scaling_ratio=230 rcvbuf=7006410 ... 260.502899: tcp:tcp_rcvbuf_grow: time=416 rtt_us=413 copied=4026368 inq=147456 space=2658304 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.504233: tcp:tcp_rcvbuf_grow: time=493 rtt_us=487 copied=4800512 inq=196608 space=4026368 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.504792: tcp:tcp_rcvbuf_grow: time=559 rtt_us=551 copied=5672960 inq=49152 space=4800512 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.506614: tcp:tcp_rcvbuf_grow: time=610 rtt_us=607 copied=6688768 inq=180224 space=5672960 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.507280: tcp:tcp_rcvbuf_grow: time=666 rtt_us=656 copied=6868992 inq=49152 space=6688768 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.507979: tcp:tcp_rcvbuf_grow: time=699 rtt_us=699 copied=7000064 inq=0 space=6868992 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.508681: tcp:tcp_rcvbuf_grow: time=703 rtt_us=699 copied=7208960 inq=0 space=7000064 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.509426: tcp:tcp_rcvbuf_grow: time=744 rtt_us=737 copied=7569408 inq=0 space=7208960 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.510213: tcp:tcp_rcvbuf_grow: time=787 rtt_us=770 copied=7880704 inq=49152 space=7569408 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.511013: tcp:tcp_rcvbuf_grow: time=801 rtt_us=798 copied=8339456 inq=0 space=7880704 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.511860: tcp:tcp_rcvbuf_grow: time=847 rtt_us=824 copied=8601600 inq=49152 space=8339456 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.512710: tcp:tcp_rcvbuf_grow: time=850 rtt_us=846 copied=8814592 inq=65536 space=8601600 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.514428: tcp:tcp_rcvbuf_grow: time=871 rtt_us=865 copied=8855552 inq=49152 space=8814592 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.515333: tcp:tcp_rcvbuf_grow: time=905 rtt_us=882 copied=9228288 inq=49152 space=8855552 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.516237: tcp:tcp_rcvbuf_grow: time=905 rtt_us=896 copied=9371648 inq=49152 space=9228288 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.517149: tcp:tcp_rcvbuf_grow: time=911 rtt_us=909 copied=9543680 inq=49152 space=9371648 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.518070: tcp:tcp_rcvbuf_grow: time=921 rtt_us=921 copied=9793536 inq=0 space=9543680 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.520895: tcp:tcp_rcvbuf_grow: time=948 rtt_us=947 copied=10203136 inq=114688 space=9793536 ooo=0 scaling_ratio=230 rcvbuf=24622616 ... 260.521853: tcp:tcp_rcvbuf_grow: time=959 rtt_us=954 copied=10293248 inq=57344 space=10203136 ooo=0 scaling_ratio=230 rcvbuf=24691992 ... 260.522818: tcp:tcp_rcvbuf_grow: time=964 rtt_us=959 copied=10330112 inq=0 space=10293248 ooo=0 scaling_ratio=230 rcvbuf=24691992 ... 260.524760: tcp:tcp_rcvbuf_grow: time=979 rtt_us=969 copied=10633216 inq=49152 space=10330112 ooo=0 scaling_ratio=230 rcvbuf=24691992 ... 260.526709: tcp:tcp_rcvbuf_grow: time=975 rtt_us=973 copied=12013568 inq=163840 space=10633216 ooo=0 scaling_ratio=230 rcvbuf=25136755 ... 260.527694: tcp:tcp_rcvbuf_grow: time=985 rtt_us=976 copied=12025856 inq=32768 space=12013568 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... 260.530655: tcp:tcp_rcvbuf_grow: time=991 rtt_us=986 copied=12050432 inq=98304 space=12025856 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... 260.533626: tcp:tcp_rcvbuf_grow: time=993 rtt_us=989 copied=12124160 inq=0 space=12050432 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... 260.538606: tcp:tcp_rcvbuf_grow: time=1000 rtt_us=994 copied=12222464 inq=49152 space=12124160 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... 260.545605: tcp:tcp_rcvbuf_grow: time=1005 rtt_us=998 copied=12263424 inq=81920 space=12222464 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... 260.553626: tcp:tcp_rcvbuf_grow: time=1005 rtt_us=999 copied=12320768 inq=12288 space=12263424 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... 260.589749: tcp:tcp_rcvbuf_grow: time=1001 rtt_us=1000 copied=12398592 inq=16384 space=12320768 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... 260.806577: tcp:tcp_rcvbuf_grow: time=1010 rtt_us=1000 copied=12402688 inq=32768 space=12398592 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... 261.002386: tcp:tcp_rcvbuf_grow: time=1002 rtt_us=1000 copied=12419072 inq=98304 space=12402688 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... 261.803432: tcp:tcp_rcvbuf_grow: time=1013 rtt_us=1000 copied=12468224 inq=49152 space=12419072 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... 261.829533: tcp:tcp_rcvbuf_grow: time=1004 rtt_us=1000 copied=12615680 inq=0 space=12468224 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... 265.505435: tcp:tcp_rcvbuf_grow: time=1007 rtt_us=1000 copied=12632064 inq=32768 space=12615680 ooo=0 scaling_ratio=230 rcvbuf=33554432 ... We also see rtt_us going gradually to 1000 usec, causing massive overshoot. Trace for a usec TS enabled TCP flow (us granularity) perf script // We can see that sk_rcvbuf is growing to a smaller value, thanks to tight rtt_us values. 1509.273955: tcp:tcp_rcvbuf_grow: time=396 rtt_us=377 copied=110592 inq=0 space=41080 ooo=0 scaling_ratio=230 rcvbuf=131072 ... 1509.274366: tcp:tcp_rcvbuf_grow: time=412 rtt_us=365 copied=129024 inq=0 space=110592 ooo=0 scaling_ratio=230 rcvbuf=1399144 ... 1509.274738: tcp:tcp_rcvbuf_grow: time=372 rtt_us=355 copied=194560 inq=0 space=129024 ooo=0 scaling_ratio=230 rcvbuf=1399144 ... 1509.275020: tcp:tcp_rcvbuf_grow: time=282 rtt_us=257 copied=401408 inq=0 space=194560 ooo=0 scaling_ratio=230 rcvbuf=1399144 ... 1509.275190: tcp:tcp_rcvbuf_grow: time=170 rtt_us=144 copied=741376 inq=229376 space=401408 ooo=0 scaling_ratio=230 rcvbuf=3021625 ... 1509.275300: tcp:tcp_rcvbuf_grow: time=110 rtt_us=110 copied=1146880 inq=65536 space=741376 ooo=0 scaling_ratio=230 rcvbuf=4642390 ... 1509.275449: tcp:tcp_rcvbuf_grow: time=149 rtt_us=106 copied=1310720 inq=737280 space=1146880 ooo=0 scaling_ratio=230 rcvbuf=5498637 ... 1509.275560: tcp:tcp_rcvbuf_grow: time=111 rtt_us=107 copied=1388544 inq=430080 space=1310720 ooo=0 scaling_ratio=230 rcvbuf=5498637 ... 1509.275674: tcp:tcp_rcvbuf_grow: time=114 rtt_us=113 copied=1495040 inq=421888 space=1388544 ooo=0 scaling_ratio=230 rcvbuf=5498637 ... 1509.275800: tcp:tcp_rcvbuf_grow: time=126 rtt_us=126 copied=1572864 inq=77824 space=1495040 ooo=0 scaling_ratio=230 rcvbuf=5498637 ... 1509.275968: tcp:tcp_rcvbuf_grow: time=168 rtt_us=161 copied=1863680 inq=172032 space=1572864 ooo=0 scaling_ratio=230 rcvbuf=5498637 ... 1509.276129: tcp:tcp_rcvbuf_grow: time=161 rtt_us=161 copied=1941504 inq=204800 space=1863680 ooo=0 scaling_ratio=230 rcvbuf=5782790 ... 1509.276288: tcp:tcp_rcvbuf_grow: time=159 rtt_us=158 copied=1990656 inq=131072 space=1941504 ooo=0 scaling_ratio=230 rcvbuf=5782790 ... 1509.276900: tcp:tcp_rcvbuf_grow: time=228 rtt_us=226 copied=2883584 inq=266240 space=1990656 ooo=0 scaling_ratio=230 rcvbuf=5782790 ... 1509.277819: tcp:tcp_rcvbuf_grow: time=242 rtt_us=236 copied=3022848 inq=0 space=2883584 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.278072: tcp:tcp_rcvbuf_grow: time=253 rtt_us=247 copied=3055616 inq=49152 space=3022848 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.279560: tcp:tcp_rcvbuf_grow: time=268 rtt_us=264 copied=3133440 inq=180224 space=3055616 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.279833: tcp:tcp_rcvbuf_grow: time=274 rtt_us=270 copied=3424256 inq=0 space=3133440 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.282187: tcp:tcp_rcvbuf_grow: time=277 rtt_us=273 copied=3465216 inq=180224 space=3424256 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.284685: tcp:tcp_rcvbuf_grow: time=292 rtt_us=292 copied=3481600 inq=147456 space=3465216 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.284983: tcp:tcp_rcvbuf_grow: time=297 rtt_us=295 copied=3702784 inq=45056 space=3481600 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.285596: tcp:tcp_rcvbuf_grow: time=311 rtt_us=310 copied=3723264 inq=40960 space=3702784 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.285909: tcp:tcp_rcvbuf_grow: time=313 rtt_us=304 copied=3846144 inq=196608 space=3723264 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.291654: tcp:tcp_rcvbuf_grow: time=322 rtt_us=311 copied=3960832 inq=49152 space=3846144 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.291986: tcp:tcp_rcvbuf_grow: time=333 rtt_us=330 copied=4075520 inq=360448 space=3960832 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.292319: tcp:tcp_rcvbuf_grow: time=332 rtt_us=332 copied=4079616 inq=65536 space=4075520 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.292666: tcp:tcp_rcvbuf_grow: time=348 rtt_us=347 copied=4177920 inq=212992 space=4079616 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.293015: tcp:tcp_rcvbuf_grow: time=349 rtt_us=345 copied=4276224 inq=262144 space=4177920 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.293371: tcp:tcp_rcvbuf_grow: time=356 rtt_us=346 copied=4415488 inq=49152 space=4276224 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... 1509.515798: tcp:tcp_rcvbuf_grow: time=424 rtt_us=411 copied=4833280 inq=81920 space=4415488 ooo=0 scaling_ratio=230 rcvbuf=12316197 ... Signed-off-by: Eric Dumazet Reviewed-by: Wei Wang Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20250513193919.1089692-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 53e878fa14d1..006c2116c8f6 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -213,6 +213,79 @@ DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust, TP_ARGS(sk) ); +TRACE_EVENT(tcp_rcvbuf_grow, + + TP_PROTO(struct sock *sk, int time), + + TP_ARGS(sk, time), + + TP_STRUCT__entry( + __field(int, time) + __field(__u32, rtt_us) + __field(__u32, copied) + __field(__u32, inq) + __field(__u32, space) + __field(__u32, ooo_space) + __field(__u32, rcvbuf) + __field(__u8, scaling_ratio) + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + __array(__u8, saddr, 4) + __array(__u8, daddr, 4) + __array(__u8, saddr_v6, 16) + __array(__u8, daddr_v6, 16) + __field(const void *, skaddr) + __field(__u64, sock_cookie) + ), + + TP_fast_assign( + struct inet_sock *inet = inet_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); + __be32 *p32; + + __entry->time = time; + __entry->rtt_us = tp->rcv_rtt_est.rtt_us >> 3; + __entry->copied = tp->copied_seq - tp->rcvq_space.seq; + __entry->inq = tp->rcv_nxt - tp->copied_seq; + __entry->space = tp->rcvq_space.space; + __entry->ooo_space = RB_EMPTY_ROOT(&tp->out_of_order_queue) ? 0 : + TCP_SKB_CB(tp->ooo_last_skb)->end_seq - + tp->rcv_nxt; + + __entry->rcvbuf = sk->sk_rcvbuf; + __entry->scaling_ratio = tp->scaling_ratio; + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; + + p32 = (__be32 *) __entry->saddr; + *p32 = inet->inet_saddr; + + p32 = (__be32 *) __entry->daddr; + *p32 = inet->inet_daddr; + + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); + + __entry->skaddr = sk; + __entry->sock_cookie = sock_gen_cookie(sk); + ), + + TP_printk("time=%u rtt_us=%u copied=%u inq=%u space=%u ooo=%u scaling_ratio=%u rcvbuf=%u " + "family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 " + "saddrv6=%pI6c daddrv6=%pI6c skaddr=%p sock_cookie=%llx", + __entry->time, __entry->rtt_us, __entry->copied, + __entry->inq, __entry->space, __entry->ooo_space, + __entry->scaling_ratio, __entry->rcvbuf, + show_family_name(__entry->family), + __entry->sport, __entry->dport, + __entry->saddr, __entry->daddr, + __entry->saddr_v6, __entry->daddr_v6, + __entry->skaddr, + __entry->sock_cookie) +); + TRACE_EVENT(tcp_retransmit_synack, TP_PROTO(const struct sock *sk, const struct request_sock *req), -- cgit v1.2.3 From ea33537d82921e71f852ea2ed985acc562125efe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 May 2025 19:39:12 +0000 Subject: tcp: add receive queue awareness in tcp_rcv_space_adjust() If the application can not drain fast enough a TCP socket queue, tcp_rcv_space_adjust() can overestimate tp->rcvq_space.space. Then sk->sk_rcvbuf can grow and hit tcp_rmem[2] for no good reason. Fix this by taking into acount the number of available bytes. Keeping sk->sk_rcvbuf at the right size allows better cache efficiency. Signed-off-by: Eric Dumazet Reviewed-by: Wei Wang Link: https://patch.msgid.link/20250513193919.1089692-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a8af71623ba7..29f59d50dc73 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -340,7 +340,7 @@ struct tcp_sock { } rcv_rtt_est; /* Receiver queue space */ struct { - u32 space; + int space; u32 seq; u64 time; } rcvq_space; -- cgit v1.2.3 From 1119e5519dcdb7b3527f5d85accf9c7aa02b2b28 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 13 May 2025 15:17:52 -0700 Subject: net: sched: uapi: add more sanely named duplicate defines The TCA_FLOWER_KEY_CFM enum has a UNSPEC and MAX with _OPT in the name, but the real attributes don't. Add a MAX that more reasonably matches the attrs. The PAD in TCA_TAPRIO is the only attr which doesn't have _ATTR in it, perhaps signifying that it's not a real attr? If so interesting idea in abstract but it makes codegen painful. Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250513221752.843102-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_cls.h | 1 + include/uapi/linux/pkt_sched.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 490821364165..28d94b11d1aa 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -697,6 +697,7 @@ enum { }; #define TCA_FLOWER_KEY_CFM_OPT_MAX (__TCA_FLOWER_KEY_CFM_OPT_MAX - 1) +#define TCA_FLOWER_KEY_CFM_MAX (__TCA_FLOWER_KEY_CFM_OPT_MAX - 1) #define TCA_FLOWER_MASK_FLAGS_RANGE (1 << 0) /* Range-based match */ diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 9ea874395717..3e41349f3fa2 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1182,6 +1182,7 @@ enum { TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */ TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */ TCA_TAPRIO_PAD, + TCA_TAPRIO_ATTR_PAD = TCA_TAPRIO_PAD, TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */ -- cgit v1.2.3 From bc740420d7ae7878696d5ad9f3dbcb11e2599b2e Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 15 May 2025 16:58:39 +0000 Subject: x86/resctrl: Drop __init/__exit on assorted symbols Because ARM's MPAM controls are probed using MMIO, resctrl can't be initialised until enough CPUs are online to have determined the system-wide supported num_closid. Arm64 also supports 'late onlined secondaries', where only a subset of CPUs are online during boot. These two combine to mean the MPAM driver may not be able to initialise resctrl until user-space has brought 'enough' CPUs online. To allow MPAM to initialise resctrl after __init text has been free'd, remove all the __init markings from resctrl. The existing __exit markings cause these functions to be removed by the linker as it has never been possible to build resctrl as a module. MPAM has an error interrupt which causes the driver to reset and disable itself. Remove the __exit markings to allow the MPAM driver to tear down resctrl when an error occurs. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Tested-by: Tony Luck Link: https://lore.kernel.org/20250515165855.31452-10-james.morse@arm.com --- include/linux/resctrl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 880351ca3dfc..b8f8240050b4 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -358,7 +358,7 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); -__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); +bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); /** * resctrl_arch_mon_event_config_write() - Write the config for an event. @@ -514,7 +514,7 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r); extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -int __init resctrl_init(void); -void __exit resctrl_exit(void); +int resctrl_init(void); +void resctrl_exit(void); #endif /* _RESCTRL_H */ -- cgit v1.2.3 From eff042ddf4b9587fa7394d502b93e06ec6015177 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:50 -0400 Subject: sunrpc: Add a helper to derive maxpages from sv_max_mesg This page count is to be used to allocate various arrays of pages and bio_vecs, replacing the fixed RPCSVC_MAXPAGES value. The documenting comment is somewhat stale -- of course NFSv4 COMPOUND procedures may have multiple payloads. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 74658cca0f38..749f8e53e8a6 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -159,6 +159,23 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); #define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ + 2 + 1) +/** + * svc_serv_maxpages - maximum count of pages needed for one RPC message + * @serv: RPC service context + * + * Returns a count of pages or vectors that can hold the maximum + * size RPC message for @serv. + * + * Each request/reply pair can have at most one "payload", plus two + * pages, one for the request, and one for the reply. + * nfsd_splice_actor() might need an extra page when a READ payload + * is not page-aligned. + */ +static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv) +{ + return DIV_ROUND_UP(serv->sv_max_mesg, PAGE_SIZE) + 2 + 1; +} + /* * The context of a single thread, including the request currently being * processed. -- cgit v1.2.3 From ed603bcf4feac05df937bf78bc7feb75f988a971 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:52 -0400 Subject: sunrpc: Replace the rq_pages array with dynamically-allocated memory As a step towards making NFSD's maximum rsize and wsize variable at run-time, replace the fixed-size rq_vec[] array in struct svc_rqst with a chunk of dynamically-allocated memory. On a system with 8-byte pointers and 4KB pages, pahole reports that the rq_pages[] array is 2080 bytes. This patch replaces that with a single 8-byte pointer field. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 749f8e53e8a6..57be69539888 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -205,7 +205,8 @@ struct svc_rqst { struct xdr_stream rq_res_stream; struct page *rq_scratch_page; struct xdr_buf rq_res; - struct page *rq_pages[RPCSVC_MAXPAGES + 1]; + unsigned long rq_maxpages; /* num of entries in rq_pages */ + struct page * *rq_pages; struct page * *rq_respages; /* points into rq_pages */ struct page * *rq_next_page; /* next reply page to use */ struct page * *rq_page_end; /* one past the last page */ -- cgit v1.2.3 From 59cf7346542babdaae99e72365174eab4fa276ac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:54 -0400 Subject: sunrpc: Replace the rq_bvec array with dynamically-allocated memory As a step towards making NFSD's maximum rsize and wsize variable at run-time, replace the fixed-size rq_bvec[] array in struct svc_rqst with a chunk of dynamically-allocated memory. The rq_bvec[] array contains enough bio_vecs to handle each page in a maximum size RPC message. On a system with 8-byte pointers and 4KB pages, pahole reports that the rq_bvec[] array is 4144 bytes. This patch replaces that array with a single 8-byte pointer field. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 57be69539888..538bea716c6b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -213,7 +213,7 @@ struct svc_rqst { struct folio_batch rq_fbatch; struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ - struct bio_vec rq_bvec[RPCSVC_MAXPAGES]; + struct bio_vec *rq_bvec; __be32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ -- cgit v1.2.3 From f2e597353d50f05a600dddbf84a362839cf1c224 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 May 2025 13:39:23 -0400 Subject: NFSD: De-duplicate the svc_fill_write_vector() call sites All three call sites do the same thing. I'm struggling with this a bit, however. struct xdr_buf is an XDR layer object and unmarshaling a WRITE payload is clearly a task intended to be done by the proc and xdr functions, not by VFS. This feels vaguely like a layering violation. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 538bea716c6b..dec636345ee2 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -471,7 +471,7 @@ int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct xdr_buf *payload); + const struct xdr_buf *payload); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); -- cgit v1.2.3 From b406c6b78198f123e08061c45d56803459785975 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 8 May 2025 11:56:40 -0400 Subject: SUNRPC: Remove svc_fill_write_vector() Clean up: This API is no longer used. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dec636345ee2..510ee1927977 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -470,8 +470,6 @@ const char * svc_proc_name(const struct svc_rqst *rqstp); int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); -unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - const struct xdr_buf *payload); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); -- cgit v1.2.3 From 1259560b988c959abf9b84da0b9e56c2863b6837 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 May 2025 20:15:08 -0400 Subject: SUNRPC: Remove svc_rqst :: rq_vec Clean up: This array is no longer used. On a system with 8-byte pointers and 4KB pages, pahole reports that the rq_vec[] array accounts for 4144 bytes. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 510ee1927977..6540af4b9bdb 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -212,7 +212,6 @@ struct svc_rqst { struct page * *rq_page_end; /* one past the last page */ struct folio_batch rq_fbatch; - struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ struct bio_vec *rq_bvec; __be32 rq_xid; /* transmission id */ -- cgit v1.2.3 From f4126823c1fd677d9811a0252830c6c73ddb7e99 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:55 -0400 Subject: sunrpc: Adjust size of socket's receive page array dynamically As a step towards making NFSD's maximum rsize and wsize variable at run-time, make sk_pages a flexible array. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svcsock.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index bf45d9e8492a..963bbe251e52 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -40,7 +40,9 @@ struct svc_sock { struct completion sk_handshake_done; - struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ + /* received data */ + unsigned long sk_maxpages; + struct page * sk_pages[] __counted_by(sk_maxpages); }; static inline u32 svc_sock_reclen(struct svc_sock *svsk) -- cgit v1.2.3 From 81381d1a90dea186ce4585d9bd3cdc40b50e9c48 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:56 -0400 Subject: svcrdma: Adjust the number of entries in svc_rdma_recv_ctxt::rc_pages Allow allocation of more entries in the rc_pages[] array when the maximum size of an RPC message is increased. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 619fc0bd837a..1016f2feddc4 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -202,7 +202,8 @@ struct svc_rdma_recv_ctxt { struct svc_rdma_pcl rc_reply_pcl; unsigned int rc_page_count; - struct page *rc_pages[RPCSVC_MAXPAGES]; + unsigned long rc_maxpages; + struct page *rc_pages[] __counted_by(rc_maxpages); }; /* -- cgit v1.2.3 From 56ab43f50d708e155f013ef60cc1b4fe39bed42e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:57 -0400 Subject: svcrdma: Adjust the number of entries in svc_rdma_send_ctxt::sc_pages Allow allocation of more entries in the sc_pages[] array when the maximum size of an RPC message is increased. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 1016f2feddc4..22704c2e5b9b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -245,7 +245,8 @@ struct svc_rdma_send_ctxt { void *sc_xprt_buf; int sc_page_count; int sc_cur_sge_no; - struct page *sc_pages[RPCSVC_MAXPAGES]; + unsigned long sc_maxpages; + struct page **sc_pages; struct ib_sge sc_sges[]; }; -- cgit v1.2.3 From 0af165bb903cdc005066494cc931076dafd76894 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:58 -0400 Subject: sunrpc: Remove the RPCSVC_MAXPAGES macro It is no longer used. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6540af4b9bdb..d57df042e24a 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -150,14 +150,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); * list. xdr_buf.tail points to the end of the first page. * This assumes that the non-page part of an rpc reply will fit * in a page - NFSd ensures this. lockd also has no trouble. - * - * Each request/reply pair can have at most one "payload", plus two pages, - * one for the request, and one for the reply. - * We using ->sendfile to return read data, we might need one extra page - * if the request is not page-aligned. So add another '1'. */ -#define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ - + 2 + 1) /** * svc_serv_maxpages - maximum count of pages needed for one RPC message -- cgit v1.2.3 From 1e7dbad6d1fe4b35ccd2aa8fea9496d837af4430 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:37:02 -0400 Subject: SUNRPC: Bump the maximum payload size for the server Increase the maximum server-side RPC payload to 4MB. The default remains at 1MB. An API to adjust the operational maximum was added in 2006 by commit 596bbe53eb3a ("[PATCH] knfsd: Allow max size of NFSd payload to be configured"). To adjust the operational maximum using this API, shut down the NFS server. Then echo a new value into: /proc/fs/nfsd/max_block_size And restart the NFS server. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d57df042e24a..48666b83fe68 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -119,14 +119,14 @@ void svc_destroy(struct svc_serv **svcp); * Linux limit; someone who cares more about NFS/UDP performance * can test a larger number. * - * For TCP transports we have more freedom. A size of 1MB is - * chosen to match the client limit. Other OSes are known to - * have larger limits, but those numbers are probably beyond - * the point of diminishing returns. + * For non-UDP transports we have more freedom. A size of 4MB is + * chosen to accommodate clients that support larger I/O sizes. */ -#define RPCSVC_MAXPAYLOAD (1*1024*1024u) -#define RPCSVC_MAXPAYLOAD_TCP RPCSVC_MAXPAYLOAD -#define RPCSVC_MAXPAYLOAD_UDP (32*1024u) +enum { + RPCSVC_MAXPAYLOAD = 4 * 1024 * 1024, + RPCSVC_MAXPAYLOAD_TCP = RPCSVC_MAXPAYLOAD, + RPCSVC_MAXPAYLOAD_UDP = 32 * 1024, +}; extern u32 svc_max_payload(const struct svc_rqst *rqstp); -- cgit v1.2.3 From 4eb56b0761e75034dd35067a81da4c280c178262 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 14 May 2025 20:08:20 +0800 Subject: erofs: refine readahead tracepoint - trace_erofs_readpages => trace_erofs_readahead; - Rename a redundant statement `nrpages = readahead_count(rac);`; - Move the tracepoint to the beginning of z_erofs_readahead(). Signed-off-by: Gao Xiang Reviewed-by: Hongbo Li Link: https://lore.kernel.org/r/20250514120820.2739288-1-hsiangkao@linux.alibaba.com Signed-off-by: Gao Xiang --- include/trace/events/erofs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index c69c7b1e41d1..a5f4b9234f46 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -113,7 +113,7 @@ TRACE_EVENT(erofs_read_folio, __entry->raw) ); -TRACE_EVENT(erofs_readpages, +TRACE_EVENT(erofs_readahead, TP_PROTO(struct inode *inode, pgoff_t start, unsigned int nrpage, bool raw), -- cgit v1.2.3 From 93a81ca0657758b607c3f4ba889ae806be9beb73 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 16 May 2025 10:08:16 +0200 Subject: ALSA: pcm: Fix race of buffer access at PCM OSS layer The PCM OSS layer tries to clear the buffer with the silence data at initialization (or reconfiguration) of a stream with the explicit call of snd_pcm_format_set_silence() with runtime->dma_area. But this may lead to a UAF because the accessed runtime->dma_area might be freed concurrently, as it's performed outside the PCM ops. For avoiding it, move the code into the PCM core and perform it inside the buffer access lock, so that it won't be changed during the operation. Reported-by: syzbot+32d4647f551007595173@syzkaller.appspotmail.com Closes: https://lore.kernel.org/68164d8e.050a0220.11da1b.0019.GAE@google.com Cc: Link: https://patch.msgid.link/20250516080817.20068-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 8becb4504887..8582d22f3818 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -1404,6 +1404,8 @@ int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_s #define snd_pcm_lib_mmap_iomem NULL #endif +void snd_pcm_runtime_buffer_set_silence(struct snd_pcm_runtime *runtime); + /** * snd_pcm_limit_isa_dma_size - Get the max size fitting with ISA DMA transfer * @dma: DMA number -- cgit v1.2.3 From 06aa9378df017ea7482b1bfdcd750104c8b3c407 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 12 May 2025 12:50:59 +0200 Subject: x86/tracing, x86/mm: Move page fault tracepoints to generic Page fault tracepoints are interesting for other architectures as well. Move them to be generic. Signed-off-by: Nam Cao Signed-off-by: Ingo Molnar Cc: Dave Hansen Cc: Gabriele Monaco Cc: H. Peter Anvin Cc: John Ogness Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: linux-trace-kernel@vger.kernel.org Link: https://lore.kernel.org/r/89c2f284adf9b4c933f0e65811c50cef900a5a95.1747046848.git.namcao@linutronix.de --- include/trace/events/exceptions.h | 43 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 include/trace/events/exceptions.h (limited to 'include') diff --git a/include/trace/events/exceptions.h b/include/trace/events/exceptions.h new file mode 100644 index 000000000000..a631f8de8917 --- /dev/null +++ b/include/trace/events/exceptions.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM exceptions + +#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PAGE_FAULT_H + +#include + +DECLARE_EVENT_CLASS(exceptions, + + TP_PROTO(unsigned long address, struct pt_regs *regs, + unsigned long error_code), + + TP_ARGS(address, regs, error_code), + + TP_STRUCT__entry( + __field( unsigned long, address ) + __field( unsigned long, ip ) + __field( unsigned long, error_code ) + ), + + TP_fast_assign( + __entry->address = address; + __entry->ip = instruction_pointer(regs); + __entry->error_code = error_code; + ), + + TP_printk("address=%ps ip=%ps error_code=0x%lx", + (void *)__entry->address, (void *)__entry->ip, + __entry->error_code) ); + +DEFINE_EVENT(exceptions, page_fault_user, + TP_PROTO(unsigned long address, struct pt_regs *regs, unsigned long error_code), + TP_ARGS(address, regs, error_code)); +DEFINE_EVENT(exceptions, page_fault_kernel, + TP_PROTO(unsigned long address, struct pt_regs *regs, unsigned long error_code), + TP_ARGS(address, regs, error_code)); + +#endif /* _TRACE_PAGE_FAULT_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From d4fb6b8e4640adeff9673ae398525be7382d3197 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 15 May 2025 16:58:41 +0000 Subject: x86/resctrl: Add end-marker to the resctrl_event_id enum The resctrl_event_id enum gives names to the counter event numbers on x86. These are used directly by resctrl. To allow the MPAM driver to keep an array of these the size of the enum needs to be known. Add a 'num_events' enum entry which can be used to size an array. This is added to the enum to reduce conflicts with another series, which in turn requires get_arch_mbm_state() to have a default case. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Babu Moger Tested-by: Shaopeng Tan Tested-by: Tony Luck Link: https://lore.kernel.org/20250515165855.31452-12-james.morse@arm.com --- include/linux/resctrl_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index f26450b3326b..69bf740130ac 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -49,6 +49,9 @@ enum resctrl_event_id { QOS_L3_OCCUP_EVENT_ID = 0x01, QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, + + /* Must be the last */ + QOS_NUM_EVENTS, }; #endif /* __LINUX_RESCTRL_TYPES_H */ -- cgit v1.2.3 From bff70402d6d67843fe319338e4c56e1cba13fbd8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 15 May 2025 16:58:45 +0000 Subject: fs/resctrl: Add boiler plate for external resctrl code Add Makefile and Kconfig for fs/resctrl. Add ARCH_HAS_CPU_RESCTRL for the common parts of the resctrl interface and make X86_CPU_RESCTRL select this. Adding an include of asm/resctrl.h to linux/resctrl.h allows the /fs/resctrl files to switch over to using this header instead. Co-developed-by: Dave Martin Signed-off-by: Dave Martin Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Tested-by: Fenghua Yu Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Tested-by: Tony Luck Link: https://lore.kernel.org/20250515165855.31452-16-james.morse@arm.com --- include/linux/resctrl.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index b8f8240050b4..5c7c8bf2c47f 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -8,6 +8,10 @@ #include #include +#ifdef CONFIG_ARCH_HAS_CPU_RESCTRL +#include +#endif + /* CLOSID, RMID value used by the default control group */ #define RESCTRL_RESERVED_CLOSID 0 #define RESCTRL_RESERVED_RMID 0 -- cgit v1.2.3 From 3d95a49b365e06d1b4c4e5a426fdc70449a334f3 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 15 May 2025 16:58:46 +0000 Subject: x86/resctrl: Move the filesystem bits to headers visible to fs/resctrl Once the filesystem parts of resctrl move to fs/resctrl, it cannot rely on definitions in x86's internal.h. Move definitions in internal.h that need to be shared between the filesystem and architecture code to header files that fs/resctrl can include. Doing this separately means the filesystem code only moves between files of the same name, instead of having these changes mixed in too. Co-developed-by: Dave Martin Signed-off-by: Dave Martin Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shaopeng Tan Reviewed-by: Tony Luck Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Tested-by: Fenghua Yu Tested-by: Carl Worth # arm64 Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Amit Singh Tomar # arm64 Tested-by: Shanker Donthineni # arm64 Tested-by: Babu Moger Tested-by: Tony Luck Link: https://lore.kernel.org/20250515165855.31452-17-james.morse@arm.com --- include/linux/resctrl.h | 3 +++ include/linux/resctrl_types.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 5c7c8bf2c47f..b9d1f2916e9c 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -403,6 +403,9 @@ static inline u32 resctrl_get_config_index(u32 closid, } } +bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l); +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index 69bf740130ac..a66e7936943e 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -7,6 +7,9 @@ #ifndef __LINUX_RESCTRL_TYPES_H #define __LINUX_RESCTRL_TYPES_H +#define MAX_MBA_BW 100u +#define MBM_OVERFLOW_INTERVAL 1000 + /* Reads to Local DRAM Memory */ #define READS_TO_LOCAL_MEM BIT(0) -- cgit v1.2.3 From 7bdb619c7f9f0a7264b2d69ad0472bf2c785e52a Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 15 May 2025 16:58:47 +0000 Subject: x86/resctrl: Move enum resctrl_event_id to resctrl.h resctrl_types.h contains common types and constants to enable architectures to use these types in their definitions within asm/resctrl.h. enum resctrl_event_id was placed in resctrl_types.h for resctrl_arch_get_cdp_enabled() and resctrl_arch_set_cdp_enabled(), but these two functions are no longer inlined by any architecture. Move enum resctrl_event_id to resctrl.h. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Babu Moger Tested-by: Shaopeng Tan Tested-by: Tony Luck Link: https://lore.kernel.org/20250515165855.31452-18-james.morse@arm.com --- include/linux/resctrl.h | 10 ++++++++++ include/linux/resctrl_types.h | 10 ---------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index b9d1f2916e9c..5ef972cbf56b 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -48,6 +48,16 @@ int proc_resctrl_show(struct seq_file *m, for_each_rdt_resource((r)) \ if ((r)->mon_capable) +enum resctrl_res_level { + RDT_RESOURCE_L3, + RDT_RESOURCE_L2, + RDT_RESOURCE_MBA, + RDT_RESOURCE_SMBA, + + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + /** * enum resctrl_conf_type - The type of configuration. * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index a66e7936943e..a25fb9c4070d 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -34,16 +34,6 @@ /* Max event bits supported */ #define MAX_EVT_CONFIG_BITS GENMASK(6, 0) -enum resctrl_res_level { - RDT_RESOURCE_L3, - RDT_RESOURCE_L2, - RDT_RESOURCE_MBA, - RDT_RESOURCE_SMBA, - - /* Must be the last */ - RDT_NUM_RESOURCES, -}; - /* * Event IDs, the values match those used to program IA32_QM_EVTSEL before * reading IA32_QM_CTR on RDT systems. -- cgit v1.2.3 From 279f225951e3c77a1708d77f557b4cc7bdbd76ed Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 15 May 2025 16:58:49 +0000 Subject: x86/resctrl: Move pseudo lock prototypes to include/linux/resctrl.h The resctrl pseudo-lock feature allows an architecture to allocate data into particular cache portions, which are then treated as reserved to avoid that data ever being evicted. Setting this up is deeply architecture specific as it involves disabling prefetchers etc. It is not possible to support this kind of feature on arm64. Risc-V is assumed to be the same. The prototypes for the architecture code were added to x86's asm/resctrl.h, with other architectures able to provide stubs for their architecture. This forces other architectures to provide identical stubs. Move the prototypes and stubs to linux/resctrl.h, and switch between them using the existing Kconfig symbol. Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Babu Moger Tested-by: Shaopeng Tan Tested-by: Tony Luck Link: https://lore.kernel.org/20250515165855.31452-20-james.morse@arm.com --- include/linux/resctrl.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 5ef972cbf56b..9ba771f2ddea 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -534,4 +534,17 @@ extern unsigned int resctrl_rmid_realloc_limit; int resctrl_init(void); void resctrl_exit(void); +#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK +u64 resctrl_arch_get_prefetch_disable_bits(void); +int resctrl_arch_pseudo_lock_fn(void *_plr); +int resctrl_arch_measure_cycles_lat_fn(void *_plr); +int resctrl_arch_measure_l2_residency(void *_plr); +int resctrl_arch_measure_l3_residency(void *_plr); +#else +static inline u64 resctrl_arch_get_prefetch_disable_bits(void) { return 0; } +static inline int resctrl_arch_pseudo_lock_fn(void *_plr) { return 0; } +static inline int resctrl_arch_measure_cycles_lat_fn(void *_plr) { return 0; } +static inline int resctrl_arch_measure_l2_residency(void *_plr) { return 0; } +static inline int resctrl_arch_measure_l3_residency(void *_plr) { return 0; } +#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ #endif /* _RESCTRL_H */ -- cgit v1.2.3 From e847a847aea5728dfcd13b558b9d82b79f9a85c7 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:04 +0100 Subject: irqdomain: Drop of_node_to_fwnode() All uses of of_node_to_fwnode() in non-irqdomain code were changed to "officially" defined of_fwnode_handle(). Therefore, the former can be dropped along with the last uses in the irqdomain code. Due to merge logistics the inline cannot be dropped immediately. Move it to a deprecated section, which will be removed during the merge window. [ tglx: Handle merge logistics ] Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-12-jirislaby@kernel.org --- include/linux/irqdomain.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index df7e9278c8ac..91ed86feac07 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -358,11 +358,6 @@ int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node, const struct irq_affinity_desc *affinity); -static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) -{ - return node ? &node->fwnode : NULL; -} - extern const struct fwnode_operations irqchip_fwnode_ops; static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode) @@ -387,7 +382,7 @@ struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, static inline struct irq_domain *irq_find_matching_host(struct device_node *node, enum irq_domain_bus_token bus_token) { - return irq_find_matching_fwnode(of_node_to_fwnode(node), bus_token); + return irq_find_matching_fwnode(of_fwnode_handle(node), bus_token); } static inline struct irq_domain *irq_find_host(struct device_node *node) @@ -407,7 +402,7 @@ static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_no const struct irq_domain_ops *ops, void *host_data) { - return irq_domain_create_simple(of_node_to_fwnode(of_node), size, first_irq, ops, host_data); + return irq_domain_create_simple(of_fwnode_handle(of_node), size, first_irq, ops, host_data); } /** @@ -423,7 +418,7 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no void *host_data) { struct irq_domain_info info = { - .fwnode = of_node_to_fwnode(of_node), + .fwnode = of_fwnode_handle(of_node), .size = size, .hwirq_max = size, .ops = ops, @@ -442,7 +437,7 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod void *host_data) { struct irq_domain_info info = { - .fwnode = of_node_to_fwnode(of_node), + .fwnode = of_fwnode_handle(of_node), .hwirq_max = max_irq, .direct_max = max_irq, .ops = ops, @@ -462,7 +457,7 @@ static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node void *host_data) { struct irq_domain_info info = { - .fwnode = of_node_to_fwnode(of_node), + .fwnode = of_fwnode_handle(of_node), .hwirq_max = ~0U, .ops = ops, .host_data = host_data, @@ -611,7 +606,7 @@ static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *par void *host_data) { return irq_domain_create_hierarchy(parent, flags, size, - of_node_to_fwnode(node), + of_fwnode_handle(node), ops, host_data); } @@ -755,6 +750,12 @@ static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsig } #endif +/* Deprecated functions. Will be removed in the merge window */ +static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) +{ + return node ? &node->fwnode : NULL; +} + #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } static inline struct irq_domain *irq_find_matching_fwnode( -- cgit v1.2.3 From c7131b12080ad9c74eadd4f62386c8642168bec7 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:05 +0100 Subject: irqdomain: Make irq_domain_create_hierarchy() an inline There is no reason to export the function as an extra symbol. It is simple enough and is just a wrapper to already exported functions. Therefore, switch the exported function to an inline. Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-13-jirislaby@kernel.org --- include/linux/irqdomain.h | 45 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 91ed86feac07..6e9a5ec2986f 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -591,12 +591,45 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, void *handler_data, const char *handler_name); void irq_domain_reset_irq_data(struct irq_data *irq_data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY -struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, - unsigned int flags, - unsigned int size, - struct fwnode_handle *fwnode, - const struct irq_domain_ops *ops, - void *host_data); +/** + * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy + * @parent: Parent irq domain to associate with the new domain + * @flags: Irq domain flags associated to the domain + * @size: Size of the domain. See below + * @fwnode: Optional fwnode of the interrupt controller + * @ops: Pointer to the interrupt domain callbacks + * @host_data: Controller private data pointer + * + * If @size is 0 a tree domain is created, otherwise a linear domain. + * + * If successful the parent is associated to the new domain and the + * domain flags are set. + * Returns pointer to IRQ domain, or NULL on failure. + */ +static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, + unsigned int flags, + unsigned int size, + struct fwnode_handle *fwnode, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain_info info = { + .fwnode = fwnode, + .size = size, + .hwirq_max = size, + .ops = ops, + .host_data = host_data, + .domain_flags = flags, + .parent = parent, + }; + struct irq_domain *d; + + if (!info.size) + info.hwirq_max = ~0U; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; +} static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent, unsigned int flags, -- cgit v1.2.3 From 9cf19f061ccc98b63367b16551c290016b7d6b13 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:10 +0100 Subject: gpio: Switch to irq_domain_create_*() irq_domain_add_*() interfaces are going away as being obsolete now. Switch to the preferred irq_domain_create_*() ones. Those differ in the node parameter: They take more generic struct fwnode_handle instead of struct device_node. Therefore, of_fwnode_handle() is added around the original parameter. Note some of the users can likely use dev->fwnode directly instead of indirect of_fwnode_handle(dev->of_node). But dev->fwnode is not guaranteed to be set for all, so this has to be investigated on case to case basis (by people who can actually test with the HW). [ tglx: Fix up subject prefix ] Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-18-jirislaby@kernel.org --- include/linux/gpio/driver.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4c0294a9104d..b53233051bee 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -287,8 +287,9 @@ struct gpio_irq_chip { /** * @first: * - * Required for static IRQ allocation. If set, irq_domain_add_simple() - * will allocate and map all IRQs during initialization. + * Required for static IRQ allocation. If set, + * irq_domain_create_simple() will allocate and map all IRQs + * during initialization. */ unsigned int first; -- cgit v1.2.3 From 813da4f379e789c428d2e0a44730f852e090d47d Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:32 +0100 Subject: powerpc: Switch irq_domain_add_nomap() to use fwnode All irq_domain_add_*() functions are going away. PowerPC is the only user of irq_domain_add_nomap() and there is no irq_domain_create_nomap() complement. Therefore, to align with the rest of the kernel, rename irq_domain_add_nomap() to irq_domain_create_nomap() and accept a fwnode_handle instead of a device_node. [ tglx: Fix up subject prefix ] Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-40-jirislaby@kernel.org --- include/linux/irqdomain.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 6e9a5ec2986f..f3c79f908a28 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -431,13 +431,13 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no } #ifdef CONFIG_IRQ_DOMAIN_NOMAP -static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, +static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *fwnode, unsigned int max_irq, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain_info info = { - .fwnode = of_fwnode_handle(of_node), + .fwnode = fwnode, .hwirq_max = max_irq, .direct_max = max_irq, .ops = ops, -- cgit v1.2.3 From 42b8b16fe56c206fde7fbae0116769d0addef4b7 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:33 +0100 Subject: irqdomain: Drop irq_domain_add_*() functions Most irq_domain_add_*() functions are unused now, so drop them. The remaining ones are moved to the deprecated section and will be removed during the merge window after the patches in various trees have been merged. Note: The Chinese docs are touched but unfinished. I cannot parse those. [ tglx: Remove the leftover in irq-domain.rst and handle merge logistics ] Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-41-jirislaby@kernel.org --- include/linux/irqdomain.h | 102 ++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index f3c79f908a28..712c662de981 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -338,12 +338,6 @@ struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, unsigned int first_irq, const struct irq_domain_ops *ops, void *host_data); -struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, - unsigned int size, - unsigned int first_irq, - irq_hw_number_t first_hwirq, - const struct irq_domain_ops *ops, - void *host_data); struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, unsigned int size, unsigned int first_irq, @@ -396,40 +390,6 @@ static inline struct irq_domain *irq_find_host(struct device_node *node) return d; } -static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_node, - unsigned int size, - unsigned int first_irq, - const struct irq_domain_ops *ops, - void *host_data) -{ - return irq_domain_create_simple(of_fwnode_handle(of_node), size, first_irq, ops, host_data); -} - -/** - * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. - * @of_node: pointer to interrupt controller's device tree node. - * @size: Number of interrupts in the domain. - * @ops: map/unmap domain callbacks - * @host_data: Controller private data pointer - */ -static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node, - unsigned int size, - const struct irq_domain_ops *ops, - void *host_data) -{ - struct irq_domain_info info = { - .fwnode = of_fwnode_handle(of_node), - .size = size, - .hwirq_max = size, - .ops = ops, - .host_data = host_data, - }; - struct irq_domain *d; - - d = irq_domain_instantiate(&info); - return IS_ERR(d) ? NULL : d; -} - #ifdef CONFIG_IRQ_DOMAIN_NOMAP static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *fwnode, unsigned int max_irq, @@ -452,22 +412,6 @@ static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *f unsigned int irq_create_direct_mapping(struct irq_domain *domain); #endif -static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, - const struct irq_domain_ops *ops, - void *host_data) -{ - struct irq_domain_info info = { - .fwnode = of_fwnode_handle(of_node), - .hwirq_max = ~0U, - .ops = ops, - .host_data = host_data, - }; - struct irq_domain *d; - - d = irq_domain_instantiate(&info); - return IS_ERR(d) ? NULL : d; -} - static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode, unsigned int size, const struct irq_domain_ops *ops, @@ -631,18 +575,6 @@ static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain * return IS_ERR(d) ? NULL : d; } -static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent, - unsigned int flags, - unsigned int size, - struct device_node *node, - const struct irq_domain_ops *ops, - void *host_data) -{ - return irq_domain_create_hierarchy(parent, flags, size, - of_fwnode_handle(node), - ops, host_data); -} - int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc, @@ -789,6 +721,40 @@ static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) return node ? &node->fwnode : NULL; } +static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain_info info = { + .fwnode = of_fwnode_handle(of_node), + .hwirq_max = ~0U, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; +} + +static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node, + unsigned int size, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain_info info = { + .fwnode = of_fwnode_handle(of_node), + .size = size, + .hwirq_max = size, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; +} + #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } static inline struct irq_domain *irq_find_matching_fwnode( -- cgit v1.2.3 From 14ebb11ba895c9223d9a453a17df2fd81410c96c Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:42 +0100 Subject: irqdomain: Drop irq_linear_revmap() irq_linear_revmap() is deprecated and unused now. So remove it. Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-50-jirislaby@kernel.org --- include/linux/irqdomain.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 712c662de981..c8e55cd1e352 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -492,12 +492,6 @@ static inline unsigned int irq_find_mapping(struct irq_domain *domain, return 0; } -static inline unsigned int irq_linear_revmap(struct irq_domain *domain, - irq_hw_number_t hwirq) -{ - return irq_find_mapping(domain, hwirq); -} - extern const struct irq_domain_ops irq_domain_simple_ops; /* stock xlate functions */ -- cgit v1.2.3 From 18e743e911020eb74cc4eaf549c18ed12a5acb9a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:43 +0100 Subject: irqdomain: Use irq_domain_instantiate()'s return value as initializers This makes the code more compact. Note that irq_domain_create_hierarchy()'s handling of size is now part of info's initializer (using the ternary operator). That makes more sense while reading it. Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-51-jirislaby@kernel.org --- include/linux/irqdomain.h | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index c8e55cd1e352..1e3858438e69 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -403,9 +403,8 @@ static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *f .ops = ops, .host_data = host_data, }; - struct irq_domain *d; + struct irq_domain *d = irq_domain_instantiate(&info); - d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } @@ -424,9 +423,8 @@ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle * .ops = ops, .host_data = host_data, }; - struct irq_domain *d; + struct irq_domain *d = irq_domain_instantiate(&info); - d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } @@ -440,9 +438,8 @@ static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fw .ops = ops, .host_data = host_data, }; - struct irq_domain *d; + struct irq_domain *d = irq_domain_instantiate(&info); - d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } @@ -554,18 +551,14 @@ static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain * struct irq_domain_info info = { .fwnode = fwnode, .size = size, - .hwirq_max = size, + .hwirq_max = size ? : ~0U, .ops = ops, .host_data = host_data, .domain_flags = flags, .parent = parent, }; - struct irq_domain *d; + struct irq_domain *d = irq_domain_instantiate(&info); - if (!info.size) - info.hwirq_max = ~0U; - - d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } -- cgit v1.2.3 From 66cbf17fe67156608421e717975c415a85c08466 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:44 +0100 Subject: irqdomain: Make struct irq_domain_info variables const This is just expressing it explicitly as irq_domain_instantiate() takes const already. No functional change. Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-52-jirislaby@kernel.org --- include/linux/irqdomain.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 1e3858438e69..66a26dfd254b 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -396,7 +396,7 @@ static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *f const struct irq_domain_ops *ops, void *host_data) { - struct irq_domain_info info = { + const struct irq_domain_info info = { .fwnode = fwnode, .hwirq_max = max_irq, .direct_max = max_irq, @@ -416,7 +416,7 @@ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle * const struct irq_domain_ops *ops, void *host_data) { - struct irq_domain_info info = { + const struct irq_domain_info info = { .fwnode = fwnode, .size = size, .hwirq_max = size, @@ -432,7 +432,7 @@ static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fw const struct irq_domain_ops *ops, void *host_data) { - struct irq_domain_info info = { + const struct irq_domain_info info = { .fwnode = fwnode, .hwirq_max = ~0, .ops = ops, @@ -548,7 +548,7 @@ static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain * const struct irq_domain_ops *ops, void *host_data) { - struct irq_domain_info info = { + const struct irq_domain_info info = { .fwnode = fwnode, .size = size, .hwirq_max = size ? : ~0U, -- cgit v1.2.3 From 2272a78b3f4a7a1621f00ac6e9c9232d12b7ff01 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:45 +0100 Subject: irqdomain: Improve kernel-docs of functions Many of irqdomain.h's functions are referenced in Documentation/ but are not properly documented. Therefore, document these. And use "Returns:" tag consistently, so that it is properly generated in the resulting docs. Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-53-jirislaby@kernel.org --- include/linux/irqdomain.h | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 66a26dfd254b..a70e2ba0b91f 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -411,6 +411,15 @@ static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *f unsigned int irq_create_direct_mapping(struct irq_domain *domain); #endif +/** + * irq_domain_create_linear - Allocate and register a linear revmap irq_domain. + * @fwnode: pointer to interrupt controller's FW node. + * @size: Number of interrupts in the domain. + * @ops: map/unmap domain callbacks + * @host_data: Controller private data pointer + * + * Returns: Newly created irq_domain + */ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode, unsigned int size, const struct irq_domain_ops *ops, @@ -457,6 +466,18 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); void irq_dispose_mapping(unsigned int virq); +/** + * irq_create_mapping - Map a hardware interrupt into linux irq space + * @domain: domain owning this hardware interrupt or NULL for default domain + * @hwirq: hardware irq number in that domain space + * + * Only one mapping per hardware interrupt is permitted. + * + * If the sense/trigger is to be specified, set_irq_type() should be called + * on the number returned from that call. + * + * Returns: Linux irq number or 0 on error + */ static inline unsigned int irq_create_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { @@ -467,6 +488,13 @@ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, irq_hw_number_t hwirq, unsigned int *irq); +/** + * irq_resolve_mapping - Find a linux irq from a hw irq number. + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space + * + * Returns: Interrupt descriptor + */ static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { @@ -477,6 +505,8 @@ static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain, * irq_find_mapping() - Find a linux irq from a hw irq number. * @domain: domain owning this hardware interrupt * @hwirq: hardware irq number in that domain space + * + * Returns: Linux irq number or 0 if not found */ static inline unsigned int irq_find_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) @@ -539,7 +569,8 @@ void irq_domain_reset_irq_data(struct irq_data *irq_data); * * If successful the parent is associated to the new domain and the * domain flags are set. - * Returns pointer to IRQ domain, or NULL on failure. + * + * Returns: A pointer to IRQ domain, or %NULL on failure. */ static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, unsigned int flags, @@ -570,6 +601,15 @@ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); int irq_domain_activate_irq(struct irq_data *irq_data, bool early); void irq_domain_deactivate_irq(struct irq_data *irq_data); +/** + * irq_domain_alloc_irqs - Allocate IRQs from domain + * @domain: domain to allocate from + * @nr_irqs: number of IRQs to allocate + * @node: NUMA node id for memory allocation + * @arg: domain specific argument + * + * See __irq_domain_alloc_irqs()' documentation. + */ static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) { -- cgit v1.2.3 From a4efe303e50e3222591ab6ec5eb0ea92b7260d96 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:29:49 +0100 Subject: Documentation: irqdomain: Update it The irqdomain documentaion became obsolete over time. Update and extend it a bit with respect to the current code and HW. Most notably the doubled documentation of irq_domain (from .rst and .h) was unified and let only in .rst. A reference link was added to .h. Furthermore: * Add some 'struct' keywords, so that the respective structs are hyperlinked * :c:member: use where appropriate to mark a member of a struct * Rephrase some wording to improve readability/understanding Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-57-jirislaby@kernel.org --- include/linux/irqdomain.h | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a70e2ba0b91f..1a1786dc19d2 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -1,30 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * irq_domain - IRQ translation domains + * irq_domain - IRQ Translation Domains * - * Translation infrastructure between hw and linux irq numbers. This is - * helpful for interrupt controllers to implement mapping between hardware - * irq numbers and the Linux irq number space. - * - * irq_domains also have hooks for translating device tree or other - * firmware interrupt representations into a hardware irq number that - * can be mapped back to a Linux irq number without any extra platform - * support code. - * - * Interrupt controller "domain" data structure. This could be defined as a - * irq domain controller. That is, it handles the mapping between hardware - * and virtual interrupt numbers for a given interrupt domain. The domain - * structure is generally created by the PIC code for a given PIC instance - * (though a domain can cover more than one PIC if they have a flat number - * model). It's the domain callbacks that are responsible for setting the - * irq_chip on a given irq_desc after it's been mapped. - * - * The host code and data structures use a fwnode_handle pointer to - * identify the domain. In some cases, and in order to preserve source - * code compatibility, this fwnode pointer is "upgraded" to a DT - * device_node. For those firmware infrastructures that do not provide - * a unique identifier for an interrupt controller, the irq_domain - * code offers a fwnode allocator. + * See Documentation/core-api/irq/irq-domain.rst for the details. */ #ifndef _LINUX_IRQDOMAIN_H -- cgit v1.2.3 From 38c1e73fdeb37962324a3265ef95618dfa4552ab Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 6 May 2025 14:22:59 +0200 Subject: irqdomain: Consolidate coding style Now that the file has been thrown through the mincer, finish the job and consolidate the coding style. Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 257 ++++++++++++++++++++-------------------------- 1 file changed, 113 insertions(+), 144 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 1a1786dc19d2..2f6e4c9dd743 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -39,9 +39,9 @@ struct msi_parent_ops; * pass a device-specific description of an interrupt. */ struct irq_fwspec { - struct fwnode_handle *fwnode; - int param_count; - u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS]; + struct fwnode_handle *fwnode; + int param_count; + u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS]; }; /* Conversion function from of_phandle_args fields to fwspec */ @@ -50,26 +50,26 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, /** * struct irq_domain_ops - Methods for irq_domain objects - * @match: Match an interrupt controller device node to a domain, returns - * 1 on a match - * @select: Match an interrupt controller fw specification. It is more generic - * than @match as it receives a complete struct irq_fwspec. Therefore, - * @select is preferred if provided. Returns 1 on a match. - * @map: Create or update a mapping between a virtual irq number and a hw - * irq number. This is called only once for a given mapping. - * @unmap: Dispose of such a mapping - * @xlate: Given a device tree node and interrupt specifier, decode - * the hardware irq number and linux irq type value. - * @alloc: Allocate @nr_irqs interrupts starting from @virq. - * @free: Free @nr_irqs interrupts starting from @virq. - * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only - * reserve the vector. If unset, assign the vector (called from - * request_irq()). - * @deactivate: Disarm one interrupt (@irqd). - * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and - * linux irq type value (@out_type). This is a generalised @xlate - * (over struct irq_fwspec) and is preferred if provided. - * @debug_show: For domains to show specific data for an interrupt in debugfs. + * @match: Match an interrupt controller device node to a domain, returns + * 1 on a match + * @select: Match an interrupt controller fw specification. It is more generic + * than @match as it receives a complete struct irq_fwspec. Therefore, + * @select is preferred if provided. Returns 1 on a match. + * @map: Create or update a mapping between a virtual irq number and a hw + * irq number. This is called only once for a given mapping. + * @unmap: Dispose of such a mapping + * @xlate: Given a device tree node and interrupt specifier, decode + * the hardware irq number and linux irq type value. + * @alloc: Allocate @nr_irqs interrupts starting from @virq. + * @free: Free @nr_irqs interrupts starting from @virq. + * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only + * reserve the vector. If unset, assign the vector (called from + * request_irq()). + * @deactivate: Disarm one interrupt (@irqd). + * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and + * linux irq type value (@out_type). This is a generalised @xlate + * (over struct irq_fwspec) and is preferred if provided. + * @debug_show: For domains to show specific data for an interrupt in debugfs. * * Functions below are provided by the driver and called whenever a new mapping * is created or an old mapping is disposed. The driver can then proceed to @@ -77,29 +77,29 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, * to setup the irq_desc when returning from map(). */ struct irq_domain_ops { - int (*match)(struct irq_domain *d, struct device_node *node, - enum irq_domain_bus_token bus_token); - int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec, - enum irq_domain_bus_token bus_token); - int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw); - void (*unmap)(struct irq_domain *d, unsigned int virq); - int (*xlate)(struct irq_domain *d, struct device_node *node, - const u32 *intspec, unsigned int intsize, - unsigned long *out_hwirq, unsigned int *out_type); + int (*match)(struct irq_domain *d, struct device_node *node, + enum irq_domain_bus_token bus_token); + int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token); + int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw); + void (*unmap)(struct irq_domain *d, unsigned int virq); + int (*xlate)(struct irq_domain *d, struct device_node *node, + const u32 *intspec, unsigned int intsize, + unsigned long *out_hwirq, unsigned int *out_type); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /* extended V2 interfaces to support hierarchy irq_domains */ - int (*alloc)(struct irq_domain *d, unsigned int virq, - unsigned int nr_irqs, void *arg); - void (*free)(struct irq_domain *d, unsigned int virq, - unsigned int nr_irqs); - int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); - void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); - int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, - unsigned long *out_hwirq, unsigned int *out_type); + int (*alloc)(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs, void *arg); + void (*free)(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs); + int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); + void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); + int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); #endif #ifdef CONFIG_GENERIC_IRQ_DEBUGFS - void (*debug_show)(struct seq_file *m, struct irq_domain *d, - struct irq_data *irqd, int ind); + void (*debug_show)(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind); #endif }; @@ -222,8 +222,7 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) return to_of_node(d->fwnode); } -static inline void irq_domain_set_pm_device(struct irq_domain *d, - struct device *dev) +static inline void irq_domain_set_pm_device(struct irq_domain *d, struct device *dev) { if (d) d->pm_dev = dev; @@ -239,14 +238,12 @@ enum { IRQCHIP_FWNODE_NAMED_ID, }; -static inline -struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name) +static inline struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name) { return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL); } -static inline -struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id) +static inline struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id) { return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name, NULL); @@ -311,23 +308,17 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); struct irq_domain *devm_irq_domain_instantiate(struct device *dev, const struct irq_domain_info *info); -struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, - unsigned int size, +struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, unsigned int size, unsigned int first_irq, - const struct irq_domain_ops *ops, - void *host_data); -struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, - unsigned int size, - unsigned int first_irq, - irq_hw_number_t first_hwirq, - const struct irq_domain_ops *ops, - void *host_data); + const struct irq_domain_ops *ops, void *host_data); +struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, unsigned int size, + unsigned int first_irq, irq_hw_number_t first_hwirq, + const struct irq_domain_ops *ops, void *host_data); struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); void irq_set_default_domain(struct irq_domain *domain); struct irq_domain *irq_get_default_domain(void); -int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, - irq_hw_number_t hwirq, int node, +int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node, const struct irq_affinity_desc *affinity); extern const struct fwnode_operations irqchip_fwnode_ops; @@ -337,12 +328,10 @@ static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode) return fwnode && fwnode->ops == &irqchip_fwnode_ops; } -void irq_domain_update_bus_token(struct irq_domain *domain, - enum irq_domain_bus_token bus_token); +void irq_domain_update_bus_token(struct irq_domain *domain, enum irq_domain_bus_token bus_token); -static inline -struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, - enum irq_domain_bus_token bus_token) +static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, + enum irq_domain_bus_token bus_token) { struct irq_fwspec fwspec = { .fwnode = fwnode, @@ -370,9 +359,9 @@ static inline struct irq_domain *irq_find_host(struct device_node *node) #ifdef CONFIG_IRQ_DOMAIN_NOMAP static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *fwnode, - unsigned int max_irq, - const struct irq_domain_ops *ops, - void *host_data) + unsigned int max_irq, + const struct irq_domain_ops *ops, + void *host_data) { const struct irq_domain_info info = { .fwnode = fwnode, @@ -391,17 +380,17 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain); /** * irq_domain_create_linear - Allocate and register a linear revmap irq_domain. - * @fwnode: pointer to interrupt controller's FW node. - * @size: Number of interrupts in the domain. - * @ops: map/unmap domain callbacks - * @host_data: Controller private data pointer + * @fwnode: pointer to interrupt controller's FW node. + * @size: Number of interrupts in the domain. + * @ops: map/unmap domain callbacks + * @host_data: Controller private data pointer * * Returns: Newly created irq_domain */ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode, - unsigned int size, - const struct irq_domain_ops *ops, - void *host_data) + unsigned int size, + const struct irq_domain_ops *ops, + void *host_data) { const struct irq_domain_info info = { .fwnode = fwnode, @@ -416,8 +405,8 @@ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle * } static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode, - const struct irq_domain_ops *ops, - void *host_data) + const struct irq_domain_ops *ops, + void *host_data) { const struct irq_domain_info info = { .fwnode = fwnode, @@ -432,22 +421,19 @@ static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fw void irq_domain_remove(struct irq_domain *domain); -int irq_domain_associate(struct irq_domain *domain, unsigned int irq, - irq_hw_number_t hwirq); -void irq_domain_associate_many(struct irq_domain *domain, - unsigned int irq_base, +int irq_domain_associate(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq); +void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count); -unsigned int irq_create_mapping_affinity(struct irq_domain *domain, - irq_hw_number_t hwirq, +unsigned int irq_create_mapping_affinity(struct irq_domain *domain, irq_hw_number_t hwirq, const struct irq_affinity_desc *affinity); unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); void irq_dispose_mapping(unsigned int virq); /** * irq_create_mapping - Map a hardware interrupt into linux irq space - * @domain: domain owning this hardware interrupt or NULL for default domain - * @hwirq: hardware irq number in that domain space + * @domain: domain owning this hardware interrupt or NULL for default domain + * @hwirq: hardware irq number in that domain space * * Only one mapping per hardware interrupt is permitted. * @@ -456,8 +442,7 @@ void irq_dispose_mapping(unsigned int virq); * * Returns: Linux irq number or 0 on error */ -static inline unsigned int irq_create_mapping(struct irq_domain *domain, - irq_hw_number_t hwirq) +static inline unsigned int irq_create_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { return irq_create_mapping_affinity(domain, hwirq, NULL); } @@ -468,8 +453,8 @@ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, /** * irq_resolve_mapping - Find a linux irq from a hw irq number. - * @domain: domain owning this hardware interrupt - * @hwirq: hardware irq number in that domain space + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space * * Returns: Interrupt descriptor */ @@ -481,8 +466,8 @@ static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain, /** * irq_find_mapping() - Find a linux irq from a hw irq number. - * @domain: domain owning this hardware interrupt - * @hwirq: hardware irq number in that domain space + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space * * Returns: Linux irq number or 0 if not found */ @@ -501,14 +486,14 @@ extern const struct irq_domain_ops irq_domain_simple_ops; /* stock xlate functions */ int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type); + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type); + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type); + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); int irq_domain_xlate_twothreecell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type); @@ -525,12 +510,9 @@ int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest); int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); /* V2 interfaces to support hierarchy IRQ domains. */ -struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, - unsigned int virq); -void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, - const struct irq_chip *chip, - void *chip_data, irq_flow_handler_t handler, +struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); +void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, + const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name); void irq_domain_reset_irq_data(struct irq_data *irq_data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY @@ -551,11 +533,10 @@ void irq_domain_reset_irq_data(struct irq_data *irq_data); * Returns: A pointer to IRQ domain, or %NULL on failure. */ static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, - unsigned int flags, - unsigned int size, - struct fwnode_handle *fwnode, - const struct irq_domain_ops *ops, - void *host_data) + unsigned int flags, unsigned int size, + struct fwnode_handle *fwnode, + const struct irq_domain_ops *ops, + void *host_data) { const struct irq_domain_info info = { .fwnode = fwnode, @@ -571,9 +552,8 @@ static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain * return IS_ERR(d) ? NULL : d; } -int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, - unsigned int nr_irqs, int node, void *arg, - bool realloc, +int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, + int node, void *arg, bool realloc, const struct irq_affinity_desc *affinity); void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); int irq_domain_activate_irq(struct irq_data *irq_data, bool early); @@ -588,37 +568,29 @@ void irq_domain_deactivate_irq(struct irq_data *irq_data); * * See __irq_domain_alloc_irqs()' documentation. */ -static inline int irq_domain_alloc_irqs(struct irq_domain *domain, - unsigned int nr_irqs, int node, void *arg) +static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, + int node, void *arg) { - return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, - NULL); + return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, NULL); } -int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, - unsigned int virq, - irq_hw_number_t hwirq, - const struct irq_chip *chip, +int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data); -void irq_domain_free_irqs_common(struct irq_domain *domain, - unsigned int virq, +void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); -void irq_domain_free_irqs_top(struct irq_domain *domain, - unsigned int virq, unsigned int nr_irqs); +void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg); int irq_domain_pop_irq(struct irq_domain *domain, int virq); -int irq_domain_alloc_irqs_parent(struct irq_domain *domain, - unsigned int irq_base, +int irq_domain_alloc_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, void *arg); -void irq_domain_free_irqs_parent(struct irq_domain *domain, - unsigned int irq_base, +void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs); -int irq_domain_disconnect_hierarchy(struct irq_domain *domain, - unsigned int virq); +int irq_domain_disconnect_hierarchy(struct irq_domain *domain, unsigned int virq); static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { @@ -627,8 +599,7 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) static inline bool irq_domain_is_ipi(struct irq_domain *domain) { - return domain->flags & - (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE); + return domain->flags & (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE); } static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain) @@ -657,14 +628,13 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) } #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ -static inline int irq_domain_alloc_irqs(struct irq_domain *domain, - unsigned int nr_irqs, int node, void *arg) +static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, + int node, void *arg) { return -1; } -static inline void irq_domain_free_irqs(unsigned int virq, - unsigned int nr_irqs) { } +static inline void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) { } static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { @@ -704,8 +674,7 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #ifdef CONFIG_GENERIC_MSI_IRQ -int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, - unsigned int type); +int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, unsigned int type); void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq); #else static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, @@ -727,8 +696,8 @@ static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) } static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, - const struct irq_domain_ops *ops, - void *host_data) + const struct irq_domain_ops *ops, + void *host_data) { struct irq_domain_info info = { .fwnode = of_fwnode_handle(of_node), @@ -743,9 +712,9 @@ static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node } static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node, - unsigned int size, - const struct irq_domain_ops *ops, - void *host_data) + unsigned int size, + const struct irq_domain_ops *ops, + void *host_data) { struct irq_domain_info info = { .fwnode = of_fwnode_handle(of_node), @@ -762,8 +731,8 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } -static inline struct irq_domain *irq_find_matching_fwnode( - struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token) +static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, + enum irq_domain_bus_token bus_token) { return NULL; } -- cgit v1.2.3 From e51b27438a10391fdc94dd2046d9ffa9c2679c74 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 May 2025 18:28:11 +0100 Subject: irqchip: Make irq-msi-lib.h globally available Move irq-msi-lib.h into include/linux/irqchip, making it available to compilation units outside of drivers/irqchip. This requires some churn in drivers to fetch it from the new location, generated using this script: git grep -l -w \"irq-msi-lib.h\" | \ xargs sed -i -e 's:"irq-msi-lib.h":\:' Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250513172819.2216709-2-maz@kernel.org --- include/linux/irqchip/irq-msi-lib.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/irqchip/irq-msi-lib.h (limited to 'include') diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h new file mode 100644 index 000000000000..dd8d1d138544 --- /dev/null +++ b/include/linux/irqchip/irq-msi-lib.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2022 Linutronix GmbH +// Copyright (C) 2022 Intel + +#ifndef _IRQCHIP_IRQ_MSI_LIB_H +#define _IRQCHIP_IRQ_MSI_LIB_H + +#include +#include +#include + +#ifdef CONFIG_PCI_MSI +#define MATCH_PCI_MSI BIT(DOMAIN_BUS_PCI_MSI) +#else +#define MATCH_PCI_MSI (0) +#endif + +#define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI) + +int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token); + +bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, + struct msi_domain_info *info); + +#endif /* _IRQCHIP_IRQ_MSI_LIB_H */ -- cgit v1.2.3 From e4d001b54f78769ba1a1404c2801ae95e19fd893 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 May 2025 18:28:12 +0100 Subject: genirq/msi: Add helper for creating MSI-parent irq domains Creating an irq domain that serves as an MSI parent requires a substantial amount of esoteric boiler-plate code, some of which is often provided twice (such as the bus token). To make things a bit simpler for the unsuspecting MSI tinkerer, provide a helper that does it for them, and serves as documentation of what needs to be provided. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250513172819.2216709-3-maz@kernel.org --- include/linux/msi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index f4b94ccaf0c1..6863540f4b71 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -636,6 +636,10 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); +struct irq_domain_info; +struct irq_domain *msi_create_parent_irq_domain(struct irq_domain_info *info, + const struct msi_parent_ops *msi_parent_ops); + bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, const struct msi_domain_template *template, unsigned int hwsize, void *domain_data, -- cgit v1.2.3 From dcd21b609d4abc7303f8683bce4f35d78d7d6830 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 Apr 2025 18:21:06 -0400 Subject: NFS: Avoid flushing data while holding directory locks in nfs_rename() The Linux client assumes that all filehandles are non-volatile for renames within the same directory (otherwise sillyrename cannot work). However, the existence of the Linux 'subtree_check' export option has meant that nfs_rename() has always assumed it needs to flush writes before attempting to rename. Since NFSv4 does allow the client to query whether or not the server exhibits this behaviour, and since knfsd does actually set the appropriate flag when 'subtree_check' is enabled on an export, it should be OK to optimise away the write flushing behaviour in the cases where it is clearly not needed. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton --- include/linux/nfs_fs_sb.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 71319637a84e..ee03f3cef30c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -213,6 +213,15 @@ struct nfs_server { char *fscache_uniq; /* Uniquifier (or NULL) */ #endif + /* The following #defines numerically match the NFSv4 equivalents */ +#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1) +#define NFS_FH_VOLATILE_ANY (0x2) +#define NFS_FH_VOL_MIGRATION (0x4) +#define NFS_FH_VOL_RENAME (0x8) +#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME) + u32 fh_expire_type; /* V4 bitmask representing file + handle volatility type for + this filesystem */ u32 pnfs_blksize; /* layout_blksize attr */ #if IS_ENABLED(CONFIG_NFS_V4) u32 attr_bitmask[3];/* V4 bitmask representing the set @@ -236,9 +245,6 @@ struct nfs_server { u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ - u32 fh_expire_type; /* V4 bitmask representing file - handle volatility type for - this filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; void *pnfs_ld_data; /* per mount point data */ -- cgit v1.2.3 From 9cd5ef0b8c04c46a15c8f5d002f02ea0d0477790 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 May 2025 10:03:54 +0000 Subject: net: rfs: add sock_rps_delete_flow() helper RFS can exhibit lower performance for workloads using short-lived flows and a small set of 4-tuple. This is often the case for load-testers, using a pair of hosts, if the server has a single listener port. Typical use case : Server : tcp_crr -T128 -F1000 -6 -U -l30 -R 14250 Client : tcp_crr -T128 -F1000 -6 -U -l30 -c -H server | grep local_throughput This is because RFS global hash table contains stale information, when the same RSS key is recycled for another socket and another cpu. Make sure to undo the changes and go back to initial state when a flow is disconnected. Performance of the above test is increased by 22 %, going from 372604 transactions per second to 457773. Signed-off-by: Eric Dumazet Reported-by: Octavian Purdila Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20250515100354.3339920-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/rps.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/net/rps.h b/include/net/rps.h index 507f4aa5d39b..d8ab3a08bcc4 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -123,6 +123,30 @@ static inline void sock_rps_record_flow(const struct sock *sk) #endif } +static inline void sock_rps_delete_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS + struct rps_sock_flow_table *table; + u32 hash, index; + + if (!static_branch_unlikely(&rfs_needed)) + return; + + hash = READ_ONCE(sk->sk_rxhash); + if (!hash) + return; + + rcu_read_lock(); + table = rcu_dereference(net_hotdata.rps_sock_flow_table); + if (table) { + index = hash & table->mask; + if (READ_ONCE(table->ents[index]) != RPS_NO_CPU) + WRITE_ONCE(table->ents[index], RPS_NO_CPU); + } + rcu_read_unlock(); +#endif +} + static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd) { #ifdef CONFIG_RPS -- cgit v1.2.3 From 7b151e4efdde7cc7cfaae66e497d12487a70c6e9 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 14 May 2025 20:54:29 +0200 Subject: net: phy: fixed_phy: remove fixed_phy_register_with_gpiod Since its introduction 6 yrs ago this functions has never had a user. So remove it. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/ccbeef28-65ae-4e28-b1db-816c44338dee@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy_fixed.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 1acafd86ab13..3392c09b5d24 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -13,7 +13,6 @@ struct fixed_phy_status { }; struct device_node; -struct gpio_desc; struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) @@ -24,11 +23,6 @@ extern struct phy_device *fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, struct device_node *np); -extern struct phy_device * -fixed_phy_register_with_gpiod(unsigned int irq, - struct fixed_phy_status *status, - struct gpio_desc *gpiod); - extern void fixed_phy_unregister(struct phy_device *phydev); extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, @@ -46,14 +40,6 @@ static inline struct phy_device *fixed_phy_register(unsigned int irq, return ERR_PTR(-ENODEV); } -static inline struct phy_device * -fixed_phy_register_with_gpiod(unsigned int irq, - struct fixed_phy_status *status, - struct gpio_desc *gpiod) -{ - return ERR_PTR(-ENODEV); -} - static inline void fixed_phy_unregister(struct phy_device *phydev) { } -- cgit v1.2.3 From c46286fdd6aa1d0e33c245bcffe9ff2428a777bd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 May 2025 18:49:26 +0200 Subject: mr: consolidate the ipmr_can_free_table() checks. Guoyu Yin reported a splat in the ipmr netns cleanup path: WARNING: CPU: 2 PID: 14564 at net/ipv4/ipmr.c:440 ipmr_free_table net/ipv4/ipmr.c:440 [inline] WARNING: CPU: 2 PID: 14564 at net/ipv4/ipmr.c:440 ipmr_rules_exit+0x135/0x1c0 net/ipv4/ipmr.c:361 Modules linked in: CPU: 2 UID: 0 PID: 14564 Comm: syz.4.838 Not tainted 6.14.0 #1 Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:ipmr_free_table net/ipv4/ipmr.c:440 [inline] RIP: 0010:ipmr_rules_exit+0x135/0x1c0 net/ipv4/ipmr.c:361 Code: ff df 48 c1 ea 03 80 3c 02 00 75 7d 48 c7 83 60 05 00 00 00 00 00 00 5b 5d 41 5c 41 5d 41 5e e9 71 67 7f 00 e8 4c 2d 8a fd 90 <0f> 0b 90 eb 93 e8 41 2d 8a fd 0f b6 2d 80 54 ea 01 31 ff 89 ee e8 RSP: 0018:ffff888109547c58 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff888108c12dc0 RCX: ffffffff83e09868 RDX: ffff8881022b3300 RSI: ffffffff83e098d4 RDI: 0000000000000005 RBP: ffff888104288000 R08: 0000000000000000 R09: ffffed10211825c9 R10: 0000000000000001 R11: ffff88801816c4a0 R12: 0000000000000001 R13: ffff888108c13320 R14: ffff888108c12dc0 R15: fffffbfff0b74058 FS: 00007f84f39316c0(0000) GS:ffff88811b100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f84f3930f98 CR3: 0000000113b56000 CR4: 0000000000350ef0 Call Trace: ipmr_net_exit_batch+0x50/0x90 net/ipv4/ipmr.c:3160 ops_exit_list+0x10c/0x160 net/core/net_namespace.c:177 setup_net+0x47d/0x8e0 net/core/net_namespace.c:394 copy_net_ns+0x25d/0x410 net/core/net_namespace.c:516 create_new_namespaces+0x3f6/0xaf0 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xc3/0x180 kernel/nsproxy.c:228 ksys_unshare+0x78d/0x9a0 kernel/fork.c:3342 __do_sys_unshare kernel/fork.c:3413 [inline] __se_sys_unshare kernel/fork.c:3411 [inline] __x64_sys_unshare+0x31/0x40 kernel/fork.c:3411 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xa6/0x1a0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f84f532cc29 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f84f3931038 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 00007f84f5615fa0 RCX: 00007f84f532cc29 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000040000400 RBP: 00007f84f53fba18 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f84f5615fa0 R15: 00007fff51c5f328 The running kernel has CONFIG_IP_MROUTE_MULTIPLE_TABLES disabled, and the sanity check for such build is still too loose. Address the issue consolidating the relevant sanity check in a single helper regardless of the kernel configuration. Also share it between the ipv4 and ipv6 code. Reported-by: Guoyu Yin Fixes: 50b94204446e ("ipmr: tune the ipmr_can_free_table() checks.") Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/372dc261e1bf12742276e1b984fc5a071b7fc5a8.1747321903.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/mroute_base.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 58a2401e4b55..0075f6e5c3da 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -262,6 +262,11 @@ struct mr_table { int mroute_reg_vif_num; }; +static inline bool mr_can_free_table(struct net *net) +{ + return !check_net(net) || !net_initialized(net); +} + #ifdef CONFIG_IP_MROUTE_COMMON void vif_device_init(struct vif_device *v, struct net_device *dev, -- cgit v1.2.3 From c80bdb1c55719cd6308d648a7920272a3be09e34 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 16 May 2025 13:16:44 -0600 Subject: io_uring: pass in struct io_big_cqe to io_alloc_ocqe() Rather than pass extra1/extra2 separately, just pass in the (now) named io_big_cqe struct instead. The callers that don't use/support CQE32 will now just pass a single NULL, rather than two seperate mystery zero values. Move the clearing of the big_cqe elements into io_alloc_ocqe() as well, so it can get moved out of the generic code. Reviewed-by: Caleb Sander Mateos Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 00dbd7cd0e7d..2922635986f5 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -710,7 +710,7 @@ struct io_kiocb { const struct cred *creds; struct io_wq_work work; - struct { + struct io_big_cqe { u64 extra1; u64 extra2; } big_cqe; -- cgit v1.2.3 From f0ddbb1eed1898286d2bd99fd6ab64ca9700d267 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 4 Mar 2025 01:56:03 +1000 Subject: drm/dp: add option to disable zero sized address only transactions. Some older NVIDIA and some newer NVIDIA hardware/firmware seems to have issues with address only transactions (firmware rejects them). Add an option to the core drm dp to avoid address only transactions, This just puts the MOT flag removal on the last message of the transfer and avoids the start of transfer transaction. This with the flag set in nouveau, allows eDP probing on GB203 device. Signed-off-by: Dave Airlie Reviewed-by: Ben Skeggs Reviewed-by: Timur Tabi Tested-by: Timur Tabi Signed-off-by: Dave Airlie --- include/drm/display/drm_dp_helper.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 7b19192c7031..e4ca35143ff9 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -518,6 +518,11 @@ struct drm_dp_aux { * @powered_down: If true then the remote endpoint is powered down. */ bool powered_down; + + /** + * @no_zero_sized: If the hw can't use zero sized transfers (NVIDIA) + */ + bool no_zero_sized; }; int drm_dp_dpcd_probe(struct drm_dp_aux *aux, unsigned int offset); -- cgit v1.2.3 From cd5a4d53069ccd7cd48d201c981d5a8a99c93fc4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 15 May 2025 13:54:33 +0800 Subject: crypto: hash - Move core export and import into internel/hash.h The core export and import functions are targeted at implementors so move them into internal/hash.h. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 48 ------------------------------------------ include/crypto/internal/hash.h | 48 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 1760662ad70a..9fc9daaaaab4 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -506,18 +506,6 @@ int crypto_ahash_digest(struct ahash_request *req); */ int crypto_ahash_export(struct ahash_request *req, void *out); -/** - * crypto_ahash_export_core() - extract core state for message digest - * @req: reference to the ahash_request handle whose state is exported - * @out: output buffer of sufficient size that can hold the hash state - * - * Export the hash state without the partial block buffer. - * - * Context: Softirq or process context. - * Return: 0 if the export creation was successful; < 0 if an error occurred - */ -int crypto_ahash_export_core(struct ahash_request *req, void *out); - /** * crypto_ahash_import() - import message digest state * @req: reference to ahash_request handle the state is imported into @@ -531,18 +519,6 @@ int crypto_ahash_export_core(struct ahash_request *req, void *out); */ int crypto_ahash_import(struct ahash_request *req, const void *in); -/** - * crypto_ahash_import_core() - import core state - * @req: reference to ahash_request handle the state is imported into - * @in: buffer holding the state - * - * Import the hash state without the partial block buffer. - * - * Context: Softirq or process context. - * Return: 0 if the import was successful; < 0 if an error occurred - */ -int crypto_ahash_import_core(struct ahash_request *req, const void *in); - /** * crypto_ahash_init() - (re)initialize message digest handle * @req: ahash_request handle that already is initialized with all necessary @@ -933,18 +909,6 @@ int crypto_hash_digest(struct crypto_ahash *tfm, const u8 *data, */ int crypto_shash_export(struct shash_desc *desc, void *out); -/** - * crypto_shash_export_core() - extract core state for message digest - * @desc: reference to the operational state handle whose state is exported - * @out: output buffer of sufficient size that can hold the hash state - * - * Export the hash state without the partial block buffer. - * - * Context: Softirq or process context. - * Return: 0 if the export creation was successful; < 0 if an error occurred - */ -int crypto_shash_export_core(struct shash_desc *desc, void *out); - /** * crypto_shash_import() - import operational state * @desc: reference to the operational state handle the state imported into @@ -959,18 +923,6 @@ int crypto_shash_export_core(struct shash_desc *desc, void *out); */ int crypto_shash_import(struct shash_desc *desc, const void *in); -/** - * crypto_shash_import_core() - import core state - * @desc: reference to the operational state handle the state imported into - * @in: buffer holding the state - * - * Import the hash state without the partial block buffer. - * - * Context: Softirq or process context. - * Return: 0 if the import was successful; < 0 if an error occurred - */ -int crypto_shash_import_core(struct shash_desc *desc, const void *in); - /** * crypto_shash_init() - (re)initialize message digest * @desc: operational state handle that is already filled diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index f2bbdb74e11a..ef5ea75ac5c8 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -305,5 +305,53 @@ static inline unsigned int crypto_shash_coresize(struct crypto_shash *tfm) #define HASH_REQUEST_ZERO(name) \ memzero_explicit(__##name##_req, sizeof(__##name##_req)) +/** + * crypto_ahash_export_core() - extract core state for message digest + * @req: reference to the ahash_request handle whose state is exported + * @out: output buffer of sufficient size that can hold the hash state + * + * Export the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the export creation was successful; < 0 if an error occurred + */ +int crypto_ahash_export_core(struct ahash_request *req, void *out); + +/** + * crypto_ahash_import_core() - import core state + * @req: reference to ahash_request handle the state is imported into + * @in: buffer holding the state + * + * Import the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the import was successful; < 0 if an error occurred + */ +int crypto_ahash_import_core(struct ahash_request *req, const void *in); + +/** + * crypto_shash_export_core() - extract core state for message digest + * @desc: reference to the operational state handle whose state is exported + * @out: output buffer of sufficient size that can hold the hash state + * + * Export the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the export creation was successful; < 0 if an error occurred + */ +int crypto_shash_export_core(struct shash_desc *desc, void *out); + +/** + * crypto_shash_import_core() - import core state + * @desc: reference to the operational state handle the state imported into + * @in: buffer holding the state + * + * Import the hash state without the partial block buffer. + * + * Context: Softirq or process context. + * Return: 0 if the import was successful; < 0 if an error occurred + */ +int crypto_shash_import_core(struct shash_desc *desc, const void *in); + #endif /* _CRYPTO_INTERNAL_HASH_H */ -- cgit v1.2.3 From c6a12f394c488cf6a7ca35c1ad51e0e88897de2e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 15 May 2025 13:54:35 +0800 Subject: crypto: hash - Add export_core and import_core hooks Add export_core and import_core hooks. These are intended to be used by algorithms which are wrappers around block-only algorithms, but are not themselves block-only, e.g., hmac. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 10 ++++++++++ include/crypto/internal/hash.h | 3 +++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 9fc9daaaaab4..bf177cf9be10 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -129,6 +129,10 @@ struct ahash_request { * data so the transformation can continue from this point onward. No * data processing happens at this point. Driver must not use * req->result. + * @export_core: Export partial state without partial block. Only defined + * for algorithms that are not block-only. + * @import_core: Import partial state without partial block. Only defined + * for algorithms that are not block-only. * @init_tfm: Initialize the cryptographic transformation object. * This function is called only once at the instantiation * time, right after the transformation context was @@ -151,6 +155,8 @@ struct ahash_alg { int (*digest)(struct ahash_request *req); int (*export)(struct ahash_request *req, void *out); int (*import)(struct ahash_request *req, const void *in); + int (*export_core)(struct ahash_request *req, void *out); + int (*import_core)(struct ahash_request *req, const void *in); int (*setkey)(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); int (*init_tfm)(struct crypto_ahash *tfm); @@ -200,6 +206,8 @@ struct shash_desc { * @digest: see struct ahash_alg * @export: see struct ahash_alg * @import: see struct ahash_alg + * @export_core: see struct ahash_alg + * @import_core: see struct ahash_alg * @setkey: see struct ahash_alg * @init_tfm: Initialize the cryptographic transformation object. * This function is called only once at the instantiation @@ -230,6 +238,8 @@ struct shash_alg { unsigned int len, u8 *out); int (*export)(struct shash_desc *desc, void *out); int (*import)(struct shash_desc *desc, const void *in); + int (*export_core)(struct shash_desc *desc, void *out); + int (*import_core)(struct shash_desc *desc, const void *in); int (*setkey)(struct crypto_shash *tfm, const u8 *key, unsigned int keylen); int (*init_tfm)(struct crypto_shash *tfm); diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index ef5ea75ac5c8..e9de2bc34a10 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -20,6 +20,9 @@ /* Set this bit if finup can deal with multiple blocks. */ #define CRYPTO_AHASH_ALG_FINUP_MAX 0x04000000 +/* This bit is set by the Crypto API if export_core is not supported. */ +#define CRYPTO_AHASH_ALG_NO_EXPORT_CORE 0x08000000 + #define HASH_FBREQ_ON_STACK(name, req) \ char __##name##_req[sizeof(struct ahash_request) + \ MAX_SYNC_HASH_REQSIZE] CRYPTO_MINALIGN_ATTR; \ -- cgit v1.2.3 From 9d7a0ab1c75365158b28a5f7fa516061ea40b1b8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 15 May 2025 13:54:37 +0800 Subject: crypto: ahash - Handle partial blocks in API Provide an option to handle the partial blocks in the ahash API. Almost every hash algorithm has a block size and are only able to hash partial blocks on finalisation. As a first step disable virtual address support for algorithms with state sizes larger than HASH_MAX_STATESIZE. This is OK as virtual addresses are currently only used on synchronous fallbacks. This means ahash_do_req_chain only needs to handle synchronous fallbacks, removing the complexities of saving the request state. Also move the saved request state into the ahash_request object as nesting is no longer possible. Add a scatterlist to ahash_request to store the partial block. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index bf177cf9be10..05ee817a3180 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -8,8 +8,8 @@ #ifndef _CRYPTO_HASH_H #define _CRYPTO_HASH_H -#include #include +#include #include #include @@ -65,6 +65,10 @@ struct ahash_request { }; u8 *result; + struct scatterlist sg_head[2]; + crypto_completion_t saved_complete; + void *saved_data; + void *__ctx[] CRYPTO_MINALIGN_ATTR; }; @@ -488,7 +492,11 @@ int crypto_ahash_finup(struct ahash_request *req); * -EBUSY if queue is full and request should be resubmitted later; * other < 0 if an error occurred */ -int crypto_ahash_final(struct ahash_request *req); +static inline int crypto_ahash_final(struct ahash_request *req) +{ + req->nbytes = 0; + return crypto_ahash_finup(req); +} /** * crypto_ahash_digest() - calculate message digest for a buffer -- cgit v1.2.3 From 91b6ff579dda7660a9d11778f9dc4dd0a879de22 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 15 May 2025 13:54:47 +0800 Subject: crypto: algapi - Add driver template support to crypto_inst_setname Add support to crypto_inst_setname for having a driver template name that differs from the algorithm template name. Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 423e57eca351..188eface0a11 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -146,8 +146,16 @@ void *crypto_spawn_tfm2(struct crypto_spawn *spawn); struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb); int crypto_check_attr_type(struct rtattr **tb, u32 type, u32 *mask_ret); const char *crypto_attr_alg_name(struct rtattr *rta); -int crypto_inst_setname(struct crypto_instance *inst, const char *name, - struct crypto_alg *alg); +int __crypto_inst_setname(struct crypto_instance *inst, const char *name, + const char *driver, struct crypto_alg *alg); + +#define crypto_inst_setname(inst, name, ...) \ + CONCATENATE(crypto_inst_setname_, COUNT_ARGS(__VA_ARGS__))( \ + inst, name, ##__VA_ARGS__) +#define crypto_inst_setname_1(inst, name, alg) \ + __crypto_inst_setname(inst, name, name, alg) +#define crypto_inst_setname_2(inst, name, driver, alg) \ + __crypto_inst_setname(inst, name, driver, alg) void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen); int crypto_enqueue_request(struct crypto_queue *queue, -- cgit v1.2.3 From c3103416d5217655d707d9417aaf66f184e3d72f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 15 May 2025 13:54:51 +0800 Subject: crypto: hmac - Add ahash support Add ahash support to hmac so that drivers that can't do hmac in hardware do not have to implement duplicate copies of hmac. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 3 ++- include/crypto/internal/hash.h | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 05ee817a3180..6f6b9de12cd3 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -185,7 +185,8 @@ struct shash_desc { * containing a 'struct s390_sha_ctx'. */ #define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 360) -#define MAX_SYNC_HASH_REQSIZE HASH_MAX_DESCSIZE +#define MAX_SYNC_HASH_REQSIZE (sizeof(struct ahash_request) + \ + HASH_MAX_DESCSIZE) #define SHASH_DESC_ON_STACK(shash, ctx) \ char __##shash##_desc[sizeof(struct shash_desc) + HASH_MAX_DESCSIZE] \ diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index e9de2bc34a10..0f85c543f80b 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -67,6 +67,7 @@ int crypto_register_ahashes(struct ahash_alg *algs, int count); void crypto_unregister_ahashes(struct ahash_alg *algs, int count); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); +void ahash_free_singlespawn_instance(struct ahash_instance *inst); int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen); @@ -76,12 +77,20 @@ static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) return alg->setkey != shash_no_setkey; } +bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg); + static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg) { return crypto_shash_alg_has_setkey(alg) && !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); } +static inline bool crypto_hash_alg_needs_key(struct hash_alg_common *alg) +{ + return crypto_hash_alg_has_setkey(alg) && + !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); +} + int crypto_grab_ahash(struct crypto_ahash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); -- cgit v1.2.3 From 18c438b228558e05ede7dccf947a6547516fc0c7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 15 May 2025 13:54:56 +0800 Subject: crypto: testmgr - Add hash export format testing Ensure that the hash state can be exported to and imported from the generic algorithm. Signed-off-by: Herbert Xu --- include/crypto/internal/hash.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 0f85c543f80b..f052afa6e7b0 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -91,6 +91,12 @@ static inline bool crypto_hash_alg_needs_key(struct hash_alg_common *alg) !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); } +static inline bool crypto_hash_no_export_core(struct crypto_ahash *tfm) +{ + return crypto_hash_alg_common(tfm)->base.cra_flags & + CRYPTO_AHASH_ALG_NO_EXPORT_CORE; +} + int crypto_grab_ahash(struct crypto_ahash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); -- cgit v1.2.3 From a7484c80e5ca1ae0c397bb8003bc588f0dcf43f4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 May 2025 11:34:59 +0100 Subject: KVM: arm64: Allow userspace to request KVM_ARM_VCPU_EL2* Since we're (almost) feature complete, let's allow userspace to request KVM_ARM_VCPU_EL2* by bumping KVM_VCPU_MAX_FEATURES up. We also now advertise the features to userspace with new capabilities. It's going to be great... Reviewed-by: Oliver Upton Reviewed-by: Joey Gouly Reviewed-by: Ganapatrao Kulkarni Link: https://lore.kernel.org/r/20250514103501.2225951-17-maz@kernel.org Signed-off-by: Marc Zyngier --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b6ae8ad8934b..c9d4a908976e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -930,6 +930,8 @@ struct kvm_enable_cap { #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_EL2 240 +#define KVM_CAP_ARM_EL2_E2H0 241 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From e4dca67b2463e6abe775876c9cb049ea5b1c8e0d Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Sun, 18 May 2025 21:24:51 +0800 Subject: ASoC: tas2781: Header file cleanup and Move the macro definitions to fwlib Drop the I2C in one comment, for these registers are also used in SPI driver; Move the macro definition of TASDEVICE_CMD_XXX from tas2781.h to tas2781_fmwlib.c, because the macros are only referenced in only fwlib. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250518132451.707-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index 07ea38fba03b..2b3acd2fdff0 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -47,7 +47,7 @@ #define TASDEVICE_REG_SWRESET TASDEVICE_REG(0x0, 0x0, 0x01) #define TASDEVICE_REG_SWRESET_RESET BIT(0) -/* I2C Checksum */ +/* Checksum */ #define TASDEVICE_CHECKSUM_REG TASDEVICE_REG(0x0, 0x0, 0x7e) /* XM_340 */ @@ -103,11 +103,6 @@ #define TAS2781_RUNTIME_RE_REG_TF TASDEVICE_REG(0x64, 0x62, 0x48) #define TAS2781_RUNTIME_RE_REG TASDEVICE_REG(0x64, 0x63, 0x44) -#define TASDEVICE_CMD_SING_W 0x1 -#define TASDEVICE_CMD_BURST 0x2 -#define TASDEVICE_CMD_DELAY 0x3 -#define TASDEVICE_CMD_FIELD_W 0x4 - enum audio_device { TAS2563, TAS2781, -- cgit v1.2.3 From c451e2da54bce183fbb270ec01ab3ca725ddf943 Mon Sep 17 00:00:00 2001 From: Sumanth Gavini Date: Sun, 18 May 2025 01:57:32 -0700 Subject: regulator: max8952: Correct Samsung "Electronics" spelling in copyright headers Fix the misspelling of 'Electronics' in max8952 driver copyright headers. Signed-off-by: Sumanth Gavini Link: https://patch.msgid.link/20250518085734.88890-7-sumanth.gavini@yahoo.com Signed-off-by: Mark Brown --- include/linux/regulator/max8952.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/regulator/max8952.h b/include/linux/regulator/max8952.h index 8712c091abf0..61dcd8e00a2f 100644 --- a/include/linux/regulator/max8952.h +++ b/include/linux/regulator/max8952.h @@ -2,7 +2,7 @@ /* * max8952.h - Voltage regulation for the Maxim 8952 * - * Copyright (C) 2010 Samsung Electrnoics + * Copyright (C) 2010 Samsung Electronics * MyungJoo Ham */ -- cgit v1.2.3 From ec23a899d96f9ee3389abe6c516d09cae2fde5e3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 May 2025 15:32:24 +0200 Subject: spi: sh-msiof: Move register definitions to Move the MSIOF register and register bit definitions from the MSIOF SPI driver to the existing header file , so they can be shared with the MSIOF I2S driver. Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/066d1086973eb309006258484e9fe8138807e565.1747401908.git.geert+renesas@glider.be Signed-off-by: Mark Brown --- include/linux/spi/sh_msiof.h | 125 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) (limited to 'include') diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h index f950d280461b..9fbef3fd4056 100644 --- a/include/linux/spi/sh_msiof.h +++ b/include/linux/spi/sh_msiof.h @@ -2,6 +2,131 @@ #ifndef __SPI_SH_MSIOF_H__ #define __SPI_SH_MSIOF_H__ +#include +#include + +#define SITMDR1 0x00 /* Transmit Mode Register 1 */ +#define SITMDR2 0x04 /* Transmit Mode Register 2 */ +#define SITMDR3 0x08 /* Transmit Mode Register 3 */ +#define SIRMDR1 0x10 /* Receive Mode Register 1 */ +#define SIRMDR2 0x14 /* Receive Mode Register 2 */ +#define SIRMDR3 0x18 /* Receive Mode Register 3 */ +#define SITSCR 0x20 /* Transmit Clock Select Register */ +#define SIRSCR 0x22 /* Receive Clock Select Register (SH, A1, APE6) */ +#define SICTR 0x28 /* Control Register */ +#define SIFCTR 0x30 /* FIFO Control Register */ +#define SISTR 0x40 /* Status Register */ +#define SIIER 0x44 /* Interrupt Enable Register */ +#define SITDR1 0x48 /* Transmit Control Data Register 1 (SH, A1) */ +#define SITDR2 0x4c /* Transmit Control Data Register 2 (SH, A1) */ +#define SITFDR 0x50 /* Transmit FIFO Data Register */ +#define SIRDR1 0x58 /* Receive Control Data Register 1 (SH, A1) */ +#define SIRDR2 0x5c /* Receive Control Data Register 2 (SH, A1) */ +#define SIRFDR 0x60 /* Receive FIFO Data Register */ + +/* SITMDR1 and SIRMDR1 */ +#define SIMDR1_TRMD BIT(31) /* Transfer Mode (1 = Master mode) */ +#define SIMDR1_SYNCMD GENMASK(29, 28) /* SYNC Mode */ +#define SIMDR1_SYNCMD_PULSE 0U /* Frame start sync pulse */ +#define SIMDR1_SYNCMD_SPI 2U /* Level mode/SPI */ +#define SIMDR1_SYNCMD_LR 3U /* L/R mode */ +#define SIMDR1_SYNCAC BIT(25) /* Sync Polarity (1 = Active-low) */ +#define SIMDR1_BITLSB BIT(24) /* MSB/LSB First (1 = LSB first) */ +#define SIMDR1_DTDL GENMASK(22, 20) /* Data Pin Bit Delay for MSIOF_SYNC */ +#define SIMDR1_SYNCDL GENMASK(18, 16) /* Frame Sync Signal Timing Delay */ +#define SIMDR1_FLD GENMASK(3, 2) /* Frame Sync Signal Interval (0-3) */ +#define SIMDR1_XXSTP BIT(0) /* Transmission/Reception Stop on FIFO */ +/* SITMDR1 */ +#define SITMDR1_PCON BIT(30) /* Transfer Signal Connection */ +#define SITMDR1_SYNCCH GENMASK(27, 26) /* Sync Signal Channel Select */ + /* 0=MSIOF_SYNC, 1=MSIOF_SS1, 2=MSIOF_SS2 */ + +/* SITMDR2 and SIRMDR2 */ +#define SIMDR2_GRP GENMASK(31, 30) /* Group Count */ +#define SIMDR2_BITLEN1 GENMASK(28, 24) /* Data Size (8-32 bits) */ +#define SIMDR2_WDLEN1 GENMASK(23, 16) /* Word Count (1-64/256 (SH, A1))) */ +#define SIMDR2_GRPMASK GENMASK(3, 0) /* Group Output Mask 1-4 (SH, A1) */ + +/* SITMDR3 and SIRMDR3 */ +#define SIMDR3_BITLEN2 GENMASK(28, 24) /* Data Size (8-32 bits) */ +#define SIMDR3_WDLEN2 GENMASK(23, 16) /* Word Count (1-64/256 (SH, A1))) */ + +/* SITSCR and SIRSCR */ +#define SISCR_BRPS GENMASK(12, 8) /* Prescaler Setting (1-32) */ +#define SISCR_BRDV GENMASK(2, 0) /* Baud Rate Generator's Division Ratio */ + +/* SICTR */ +#define SICTR_TSCKIZ GENMASK(31, 30) /* Transmit Clock I/O Polarity Select */ +#define SICTR_TSCKIZ_SCK BIT(31) /* Disable SCK when TX disabled */ +#define SICTR_TSCKIZ_POL BIT(30) /* Transmit Clock Polarity */ +#define SICTR_RSCKIZ GENMASK(29, 28) /* Receive Clock Polarity Select */ +#define SICTR_RSCKIZ_SCK BIT(29) /* Must match CTR_TSCKIZ_SCK */ +#define SICTR_RSCKIZ_POL BIT(28) /* Receive Clock Polarity */ +#define SICTR_TEDG BIT(27) /* Transmit Timing (1 = falling edge) */ +#define SICTR_REDG BIT(26) /* Receive Timing (1 = falling edge) */ +#define SICTR_TXDIZ GENMASK(23, 22) /* Pin Output When TX is Disabled */ +#define SICTR_TXDIZ_LOW 0U /* 0 */ +#define SICTR_TXDIZ_HIGH 1U /* 1 */ +#define SICTR_TXDIZ_HIZ 2U /* High-impedance */ +#define SICTR_TSCKE BIT(15) /* Transmit Serial Clock Output Enable */ +#define SICTR_TFSE BIT(14) /* Transmit Frame Sync Signal Output Enable */ +#define SICTR_TXE BIT(9) /* Transmit Enable */ +#define SICTR_RXE BIT(8) /* Receive Enable */ +#define SICTR_TXRST BIT(1) /* Transmit Reset */ +#define SICTR_RXRST BIT(0) /* Receive Reset */ + +/* SIFCTR */ +#define SIFCTR_TFWM GENMASK(31, 29) /* Transmit FIFO Watermark */ +#define SIFCTR_TFWM_64 0U /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 1U /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 2U /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 3U /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 4U /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 5U /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 6U /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 7U /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFUA GENMASK(28, 20) /* Transmit FIFO Usable Area */ +#define SIFCTR_RFWM GENMASK(15, 13) /* Receive FIFO Watermark */ +#define SIFCTR_RFWM_1 0U /* Transfer Request when 1 valid stages */ +#define SIFCTR_RFWM_4 1U /* Transfer Request when 4 valid stages */ +#define SIFCTR_RFWM_8 2U /* Transfer Request when 8 valid stages */ +#define SIFCTR_RFWM_16 3U /* Transfer Request when 16 valid stages */ +#define SIFCTR_RFWM_32 4U /* Transfer Request when 32 valid stages */ +#define SIFCTR_RFWM_64 5U /* Transfer Request when 64 valid stages */ +#define SIFCTR_RFWM_128 6U /* Transfer Request when 128 valid stages */ +#define SIFCTR_RFWM_256 7U /* Transfer Request when 256 valid stages */ +#define SIFCTR_RFUA GENMASK(12, 4) /* Receive FIFO Usable Area (0x40 = full) */ + +/* SISTR */ +#define SISTR_TFEMP BIT(29) /* Transmit FIFO Empty */ +#define SISTR_TDREQ BIT(28) /* Transmit Data Transfer Request */ +#define SISTR_TEOF BIT(23) /* Frame Transmission End */ +#define SISTR_TFSERR BIT(21) /* Transmit Frame Synchronization Error */ +#define SISTR_TFOVF BIT(20) /* Transmit FIFO Overflow */ +#define SISTR_TFUDF BIT(19) /* Transmit FIFO Underflow */ +#define SISTR_RFFUL BIT(13) /* Receive FIFO Full */ +#define SISTR_RDREQ BIT(12) /* Receive Data Transfer Request */ +#define SISTR_REOF BIT(7) /* Frame Reception End */ +#define SISTR_RFSERR BIT(5) /* Receive Frame Synchronization Error */ +#define SISTR_RFUDF BIT(4) /* Receive FIFO Underflow */ +#define SISTR_RFOVF BIT(3) /* Receive FIFO Overflow */ + +/* SIIER */ +#define SIIER_TDMAE BIT(31) /* Transmit Data DMA Transfer Req. Enable */ +#define SIIER_TFEMPE BIT(29) /* Transmit FIFO Empty Enable */ +#define SIIER_TDREQE BIT(28) /* Transmit Data Transfer Request Enable */ +#define SIIER_TEOFE BIT(23) /* Frame Transmission End Enable */ +#define SIIER_TFSERRE BIT(21) /* Transmit Frame Sync Error Enable */ +#define SIIER_TFOVFE BIT(20) /* Transmit FIFO Overflow Enable */ +#define SIIER_TFUDFE BIT(19) /* Transmit FIFO Underflow Enable */ +#define SIIER_RDMAE BIT(15) /* Receive Data DMA Transfer Req. Enable */ +#define SIIER_RFFULE BIT(13) /* Receive FIFO Full Enable */ +#define SIIER_RDREQE BIT(12) /* Receive Data Transfer Request Enable */ +#define SIIER_REOFE BIT(7) /* Frame Reception End Enable */ +#define SIIER_RFSERRE BIT(5) /* Receive Frame Sync Error Enable */ +#define SIIER_RFUDFE BIT(4) /* Receive FIFO Underflow Enable */ +#define SIIER_RFOVFE BIT(3) /* Receive FIFO Overflow Enable */ + enum { MSIOF_SPI_HOST, MSIOF_SPI_TARGET, -- cgit v1.2.3 From 08d6ee6d8a10aef958c2af16bb121070290ed589 Mon Sep 17 00:00:00 2001 From: Nikhil Jha Date: Wed, 19 Mar 2025 13:02:39 -0400 Subject: sunrpc: implement rfc2203 rpcsec_gss seqnum cache This implements a sequence number cache of the last three (right now hardcoded) sent sequence numbers for a given XID, as suggested by the RFC. From RFC2203 5.3.3.1: "Note that the sequence number algorithm requires that the client increment the sequence number even if it is retrying a request with the same RPC transaction identifier. It is not infrequent for clients to get into a situation where they send two or more attempts and a slow server sends the reply for the first attempt. With RPCSEC_GSS, each request and reply will have a unique sequence number. If the client wishes to improve turn around time on the RPC call, it can cache the RPCSEC_GSS sequence number of each request it sends. Then when it receives a response with a matching RPC transaction identifier, it can compute the checksum of each sequence number in the cache to try to match the checksum in the reply's verifier." Signed-off-by: Nikhil Jha Acked-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 17 ++++++++++++++++- include/trace/events/rpcgss.h | 4 ++-- include/trace/events/sunrpc.h | 2 +- 3 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 81b952649d35..f46d1fb8f71a 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -30,6 +30,8 @@ #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) +#define RPC_GSS_SEQNO_ARRAY_SIZE 3U + enum rpc_display_format_t { RPC_DISPLAY_ADDR = 0, RPC_DISPLAY_PORT, @@ -66,7 +68,8 @@ struct rpc_rqst { struct rpc_cred * rq_cred; /* Bound cred */ __be32 rq_xid; /* request XID */ int rq_cong; /* has incremented xprt->cong */ - u32 rq_seqno; /* gss seq no. used on req. */ + u32 rq_seqnos[RPC_GSS_SEQNO_ARRAY_SIZE]; /* past gss req seq nos. */ + unsigned int rq_seqno_count; /* number of entries in rq_seqnos */ int rq_enc_pages_num; struct page **rq_enc_pages; /* scratch pages for use by gss privacy code */ @@ -119,6 +122,18 @@ struct rpc_rqst { #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len +static inline int xprt_rqst_add_seqno(struct rpc_rqst *req, u32 seqno) +{ + if (likely(req->rq_seqno_count < RPC_GSS_SEQNO_ARRAY_SIZE)) + req->rq_seqno_count++; + + /* Shift array to make room for the newest element at the beginning */ + memmove(&req->rq_seqnos[1], &req->rq_seqnos[0], + (RPC_GSS_SEQNO_ARRAY_SIZE - 1) * sizeof(req->rq_seqnos[0])); + req->rq_seqnos[0] = seqno; + return 0; +} + /* RPC transport layer security policies */ enum xprtsec_policies { RPC_XPRTSEC_NONE = 0, diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index b0b6300a0cab..8aeae06cf434 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -409,7 +409,7 @@ TRACE_EVENT(rpcgss_seqno, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; __entry->xid = be32_to_cpu(rqst->rq_xid); - __entry->seqno = rqst->rq_seqno; + __entry->seqno = *rqst->rq_seqnos; ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x seqno=%u", @@ -440,7 +440,7 @@ TRACE_EVENT(rpcgss_need_reencode, __entry->client_id = task->tk_client->cl_clid; __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); __entry->seq_xmit = seq_xmit; - __entry->seqno = task->tk_rqstp->rq_seqno; + __entry->seqno = *task->tk_rqstp->rq_seqnos; __entry->ret = ret; ), diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5d331383047b..751af7b024f9 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1100,7 +1100,7 @@ TRACE_EVENT(xprt_transmit, __entry->client_id = rqst->rq_task->tk_client ? rqst->rq_task->tk_client->cl_clid : -1; __entry->xid = be32_to_cpu(rqst->rq_xid); - __entry->seqno = rqst->rq_seqno; + __entry->seqno = *rqst->rq_seqnos; __entry->status = status; ), -- cgit v1.2.3 From e5296637a322b840d772f88f4d58b4bc72b29058 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 8 Apr 2025 09:43:15 -0400 Subject: nfs: add a refcount tracker for struct net as held by the nfs_client These are long-held references to the netns, so make sure the refcount tracker is aware of them. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index ee03f3cef30c..e02f4c4e9cc4 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -125,6 +125,7 @@ struct nfs_client { */ char cl_ipaddr[48]; struct net *cl_net; + netns_tracker cl_ns_tracker; struct list_head pending_cb_stateids; struct rcu_head rcu; -- cgit v1.2.3 From 1cb0f56d96185cb20e63e191fc291191823e6f52 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 19 May 2025 15:43:57 +0200 Subject: bpf: WARN_ONCE on verifier bugs Throughout the verifier's logic, there are multiple checks for inconsistent states that should never happen and would indicate a verifier bug. These bugs are typically logged in the verifier logs and sometimes preceded by a WARN_ONCE. This patch reworks these checks to consistently emit a verifier log AND a warning when CONFIG_DEBUG_KERNEL is enabled. The consistent use of WARN_ONCE should help fuzzers (ex. syzkaller) expose any situation where they are actually able to reach one of those buggy verifier states. Acked-by: Andrii Nakryiko Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/aCs1nYvNNMq8dAWP@mail.gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++++ include/linux/bpf_verifier.h | 11 +++++++++++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 83c56f40842b..5b25d278409b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -346,6 +346,12 @@ static inline const char *btf_field_type_name(enum btf_field_type type) } } +#if IS_ENABLED(CONFIG_DEBUG_KERNEL) +#define BPF_WARN_ONCE(cond, format...) WARN_ONCE(cond, format) +#else +#define BPF_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) +#endif + static inline u32 btf_field_type_size(enum btf_field_type type) { switch (type) { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index cedd66867ecf..78c97e12ea4e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -839,6 +839,17 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, u32 insn_off, const char *prefix_fmt, ...); +#define verifier_bug_if(cond, env, fmt, args...) \ + ({ \ + bool __cond = (cond); \ + if (unlikely(__cond)) { \ + BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args); \ + bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args); \ + } \ + (__cond); \ + }) +#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args) + static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; -- cgit v1.2.3 From 541a4219bd66bef56d93dbd306dc64a4d70ae99e Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Wed, 14 May 2025 17:19:33 -0700 Subject: cgroup: compare css to cgroup::self in helper for distingushing css Adjust the implementation of css_is_cgroup() so that it compares the given css to cgroup::self. Rename the function to css_is_self() in order to reflect that. Change the existing css->ss NULL check to a warning in the true branch. Finally, adjust call sites to use the new function name. Signed-off-by: JP Kobryn Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1f5b0a4a3356..989c08b09691 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -347,9 +347,15 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css) return css->flags & CSS_DYING; } -static inline bool css_is_cgroup(struct cgroup_subsys_state *css) +static inline bool css_is_self(struct cgroup_subsys_state *css) { - return css->ss == NULL; + if (css == &css->cgroup->self) { + /* cgroup::self should not have subsystem association */ + WARN_ON(css->ss != NULL); + return true; + } + + return false; } static inline void cgroup_get(struct cgroup *cgrp) -- cgit v1.2.3 From 4d6d35d3417dcab14a9b1b9771c3cd62e8ed442b Mon Sep 17 00:00:00 2001 From: "Yo-Jung (Leo) Lin" Date: Wed, 30 Apr 2025 19:26:16 +0800 Subject: i2c: smbus: introduce Write Disable-aware SPD instantiating functions Some SMBus controllers may restrict writes to addresses where SPD sensors may reside. This may lead to some SPD sensors not functioning correctly, and might need extra handling. Introduce new SPD-instantiating functions that are aware of this, and use them instead. Signed-off-by: Yo-Jung Lin (Leo) Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250430-for-upstream-i801-spd5118-no-instantiate-v2-1-2f54d91ae2c7@canonical.com Signed-off-by: Andi Shyti --- include/linux/i2c-smbus.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index ced1c6ead52a..dc1bd2ab4c13 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -44,9 +44,11 @@ static inline void i2c_free_slave_host_notify_device(struct i2c_client *client) #endif #if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_DMI) -void i2c_register_spd(struct i2c_adapter *adap); +void i2c_register_spd_write_disable(struct i2c_adapter *adap); +void i2c_register_spd_write_enable(struct i2c_adapter *adap); #else -static inline void i2c_register_spd(struct i2c_adapter *adap) { } +static inline void i2c_register_spd_write_disable(struct i2c_adapter *adap) { } +static inline void i2c_register_spd_write_enable(struct i2c_adapter *adap) { } #endif #endif /* _LINUX_I2C_SMBUS_H */ -- cgit v1.2.3 From 5da3bfa029d6809e192d112f39fca4dbe0137aaf Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Wed, 14 May 2025 17:19:34 -0700 Subject: cgroup: use separate rstat trees for each subsystem Different subsystems may call cgroup_rstat_updated() within the same cgroup, resulting in a tree of pending updates from multiple subsystems. When one of these subsystems is flushed via cgroup_rstat_flushed(), all other subsystems with pending updates on the tree will also be flushed. Change the paradigm of having a single rstat tree for all subsystems to having separate trees for each subsystem. This separation allows for subsystems to perform flushes without the side effects of other subsystems. As an example, flushing the cpu stats will no longer cause the memory stats to be flushed and vice versa. In order to achieve subsystem-specific trees, change the tree node type from cgroup to cgroup_subsys_state pointer. Then remove those pointers from the cgroup and instead place them on the css. Finally, change update/flush functions to make use of the different node type (css). These changes allow a specific subsystem to be associated with an update or flush. Separate rstat trees will now exist for each unique subsystem. Since updating/flushing will now be done at the subsystem level, there is no longer a need to keep track of updated css nodes at the cgroup level. The list management of these nodes done within the cgroup (rstat_css_list and related) has been removed accordingly. Conditional guards for checking validity of a given css were placed within css_rstat_updated/flush() to prevent undefined behavior occuring from kfunc usage in bpf programs. Guards were also placed within css_rstat_init/exit() in order to help consolidate calls to them. At call sites for all four functions, the existing guards were removed. Signed-off-by: JP Kobryn Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 46 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index e58bfb880111..17ecaae9c5f8 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -169,6 +169,8 @@ struct cgroup_subsys_state { /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; + struct css_rstat_cpu __percpu *rstat_cpu; + /* * siblings list anchored at the parent's ->children * @@ -177,9 +179,6 @@ struct cgroup_subsys_state { struct list_head sibling; struct list_head children; - /* flush target list anchored at cgrp->rstat_css_list */ - struct list_head rstat_css_node; - /* * PI: Subsys-unique ID. 0 is unused and root is always 1. The * matching css can be looked up using css_from_id(). @@ -219,6 +218,13 @@ struct cgroup_subsys_state { * Protected by cgroup_mutex. */ int nr_descendants; + + /* + * A singly-linked list of css structures to be rstat flushed. + * This is a scratch field to be used exclusively by + * css_rstat_flush() and protected by cgroup_rstat_lock. + */ + struct cgroup_subsys_state *rstat_flush_next; }; /* @@ -329,10 +335,10 @@ struct cgroup_base_stat { /* * rstat - cgroup scalable recursive statistics. Accounting is done - * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the + * per-cpu in css_rstat_cpu which is then lazily propagated up the * hierarchy on reads. * - * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are + * When a stat gets updated, the css_rstat_cpu and its ancestors are * linked into the updated tree. On the following read, propagation only * considers and consumes the updated tree. This makes reading O(the * number of descendants which have been active since last read) instead of @@ -346,20 +352,20 @@ struct cgroup_base_stat { * This struct hosts both the fields which implement the above - * updated_children and updated_next. */ -struct cgroup_rstat_cpu { +struct css_rstat_cpu { /* * Child cgroups with stat updates on this cpu since the last read * are linked on the parent's ->updated_children through - * ->updated_next. + * ->updated_next. updated_children is terminated by its container css. * - * In addition to being more compact, singly-linked list pointing - * to the cgroup makes it unnecessary for each per-cpu struct to - * point back to the associated cgroup. + * In addition to being more compact, singly-linked list pointing to + * the css makes it unnecessary for each per-cpu struct to point back + * to the associated css. * * Protected by per-cpu cgroup_rstat_cpu_lock. */ - struct cgroup *updated_children; /* terminated by self cgroup */ - struct cgroup *updated_next; /* NULL iff not on the list */ + struct cgroup_subsys_state *updated_children; + struct cgroup_subsys_state *updated_next; /* NULL if not on the list */ }; /* @@ -521,25 +527,15 @@ struct cgroup { struct cgroup *dom_cgrp; struct cgroup *old_dom_cgrp; /* used while enabling threaded */ - /* per-cpu recursive resource statistics */ - struct cgroup_rstat_cpu __percpu *rstat_cpu; struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu; - struct list_head rstat_css_list; /* - * Add padding to separate the read mostly rstat_cpu and - * rstat_css_list into a different cacheline from the following - * rstat_flush_next and *bstat fields which can have frequent updates. + * Add padding to keep the read mostly rstat per-cpu pointer on a + * different cacheline than the following *bstat fields which can have + * frequent updates. */ CACHELINE_PADDING(_pad_); - /* - * A singly-linked list of cgroup structures to be rstat flushed. - * This is a scratch field to be used exclusively by - * css_rstat_flush_locked() and protected by cgroup_rstat_lock. - */ - struct cgroup *rstat_flush_next; - /* cgroup basic resource statistics */ struct cgroup_base_stat last_bstat; struct cgroup_base_stat bstat; -- cgit v1.2.3 From 748922dcfabdd655d25fb6dd09a60e694a3d35e6 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Wed, 14 May 2025 17:19:35 -0700 Subject: cgroup: use subsystem-specific rstat locks to avoid contention It is possible to eliminate contention between subsystems when updating/flushing stats by using subsystem-specific locks. Let the existing rstat locks be dedicated to the cgroup base stats and rename them to reflect that. Add similar locks to the cgroup_subsys struct for use with individual subsystems. Lock initialization is done in the new function ss_rstat_init(ss) which replaces cgroup_rstat_boot(void). If NULL is passed to this function, the global base stat locks will be initialized. Otherwise, the subsystem locks will be initialized. Change the existing lock helper functions to accept a reference to a css. Then within these functions, conditionally select the appropriate locks based on the subsystem affiliation of the given css. Add helper functions for this selection routine to avoid repeated code. Signed-off-by: JP Kobryn Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 10 ++++++++-- include/trace/events/cgroup.h | 12 ++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 17ecaae9c5f8..5b8127d29dc5 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -222,7 +222,10 @@ struct cgroup_subsys_state { /* * A singly-linked list of css structures to be rstat flushed. * This is a scratch field to be used exclusively by - * css_rstat_flush() and protected by cgroup_rstat_lock. + * css_rstat_flush(). + * + * Protected by rstat_base_lock when css is cgroup::self. + * Protected by css->ss->rstat_ss_lock otherwise. */ struct cgroup_subsys_state *rstat_flush_next; }; @@ -362,7 +365,7 @@ struct css_rstat_cpu { * the css makes it unnecessary for each per-cpu struct to point back * to the associated css. * - * Protected by per-cpu cgroup_rstat_cpu_lock. + * Protected by per-cpu css->ss->rstat_ss_cpu_lock. */ struct cgroup_subsys_state *updated_children; struct cgroup_subsys_state *updated_next; /* NULL if not on the list */ @@ -792,6 +795,9 @@ struct cgroup_subsys { * specifies the mask of subsystems that this one depends on. */ unsigned int depends_on; + + spinlock_t rstat_ss_lock; + raw_spinlock_t __percpu *rstat_ss_cpu_lock; }; extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h index af2755bda6eb..7d332387be6c 100644 --- a/include/trace/events/cgroup.h +++ b/include/trace/events/cgroup.h @@ -231,7 +231,11 @@ DECLARE_EVENT_CLASS(cgroup_rstat, __entry->cpu, __entry->contended) ); -/* Related to global: cgroup_rstat_lock */ +/* + * Related to locks: + * global rstat_base_lock for base stats + * cgroup_subsys::rstat_ss_lock for subsystem stats + */ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_lock_contended, TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), @@ -253,7 +257,11 @@ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_unlock, TP_ARGS(cgrp, cpu, contended) ); -/* Related to per CPU: cgroup_rstat_cpu_lock */ +/* + * Related to per CPU locks: + * global rstat_base_cpu_lock for base stats + * cgroup_subsys::rstat_ss_cpu_lock for subsystem stats + */ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended, TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), -- cgit v1.2.3 From f3921fb7fdc23dd946b0c082f5fedca9ce75d506 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Wed, 14 May 2025 17:19:37 -0700 Subject: cgroup: document the rstat per-cpu initialization The calls to css_rstat_init() occur at different places depending on the context. Document the conditions that determine which point of initialization is used. Signed-off-by: JP Kobryn Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 5b8127d29dc5..e61687d5e496 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -169,6 +169,21 @@ struct cgroup_subsys_state { /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; + /* + * Depending on the context, this field is initialized + * via css_rstat_init() at different places: + * + * when css is associated with cgroup::self + * when css->cgroup is the root cgroup + * performed in cgroup_init() + * when css->cgroup is not the root cgroup + * performed in cgroup_create() + * when css is associated with a subsystem + * when css->cgroup is the root cgroup + * performed in cgroup_init_subsys() in the non-early path + * when css->cgroup is not the root cgroup + * performed in css_create() + */ struct css_rstat_cpu __percpu *rstat_cpu; /* @@ -530,6 +545,15 @@ struct cgroup { struct cgroup *dom_cgrp; struct cgroup *old_dom_cgrp; /* used while enabling threaded */ + /* + * Depending on the context, this field is initialized via + * css_rstat_init() at different places: + * + * when cgroup is the root cgroup + * performed in cgroup_setup_root() + * otherwise + * performed in cgroup_create() + */ struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu; /* -- cgit v1.2.3 From 22f2dc03553fb3f407fbd5e6b5b4f9c747927cb2 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 19 May 2025 11:08:51 +0300 Subject: PCI: Add Intel Wildcat Lake audio Device ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Wildcat Lake (WCL) audio Device ID. Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Bard Liao Reviewed-by: Guennadi Liakhovetski Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Acked-by: Krzysztof Wilczyński Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250519080855.16977-2-peter.ujfalusi@linux.intel.com --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2e28182c3af0..f7ceefce6d3d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3049,6 +3049,7 @@ #define PCI_DEVICE_ID_INTEL_HDA_DG1 0x490d #define PCI_DEVICE_ID_INTEL_HDA_EHL_0 0x4b55 #define PCI_DEVICE_ID_INTEL_HDA_EHL_3 0x4b58 +#define PCI_DEVICE_ID_INTEL_HDA_WCL 0x4d28 #define PCI_DEVICE_ID_INTEL_HDA_JSL_N 0x4dc8 #define PCI_DEVICE_ID_INTEL_HDA_DG2_0 0x4f90 #define PCI_DEVICE_ID_INTEL_HDA_DG2_1 0x4f91 -- cgit v1.2.3 From 58f5fbeb367ff6f30a2448b2cad70f70b2de4b06 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 16 May 2025 21:28:03 +0200 Subject: fanotify: support watching filesystems and mounts inside userns An unprivileged user is allowed to create an fanotify group and add inode marks, but not filesystem, mntns and mount marks. Add limited support for setting up filesystem, mntns and mount marks by an unprivileged user under the following conditions: 1. User has CAP_SYS_ADMIN in the user ns where the group was created 2.a. User has CAP_SYS_ADMIN in the user ns where the sb was created OR (in case setting up a mntns mark) 2.b. User has CAP_SYS_ADMIN in the user ns associated with the mntns Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250516192803.838659-3-amir73il@gmail.com --- include/linux/fanotify.h | 5 ++--- include/linux/fsnotify_backend.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 3c817dc6292e..879cff5eccd4 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -38,8 +38,7 @@ FAN_REPORT_PIDFD | \ FAN_REPORT_FD_ERROR | \ FAN_UNLIMITED_QUEUE | \ - FAN_UNLIMITED_MARKS | \ - FAN_REPORT_MNT) + FAN_UNLIMITED_MARKS) /* * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN. @@ -48,7 +47,7 @@ * so one of the flags for reporting file handles is required. */ #define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \ - FANOTIFY_FID_BITS | \ + FANOTIFY_FID_BITS | FAN_REPORT_MNT | \ FAN_CLOEXEC | FAN_NONBLOCK) #define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 6cd8d1d28b8b..7dd22db06317 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -250,6 +250,7 @@ struct fsnotify_group { * full */ struct mem_cgroup *memcg; /* memcg to charge allocations */ + struct user_namespace *user_ns; /* user ns where group was created */ /* groups can define private fields here or use the void *private */ union { -- cgit v1.2.3 From a462903fa22541f212134fba81084315ad843e6e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 May 2025 13:59:27 +0200 Subject: net: netlink: reduce extack cookie size Seems like the extack cookie hasn't found any users outside of wireless, which always uses nl_set_extack_cookie_u64(). Thus, allocating 20 bytes for it is pointless, reduce that to 8 bytes, and add a BUILD_BUG_ON() to ensure it's enough (obviously it is, for a u64, but in case it changes again.) Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20250516115927.38209-2-johannes@sipsolutions.net Signed-off-by: Jakub Kicinski --- include/linux/netlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index c3ae84a77e16..882e9c1b6c1d 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -63,7 +63,7 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) } /* this can be increased when necessary - don't expose to userland */ -#define NETLINK_MAX_COOKIE_LEN 20 +#define NETLINK_MAX_COOKIE_LEN 8 #define NETLINK_MAX_FMTMSG_LEN 80 /** @@ -212,6 +212,7 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, { if (!extack) return; + BUILD_BUG_ON(sizeof(extack->cookie) < sizeof(cookie)); memcpy(extack->cookie, &cookie, sizeof(cookie)); extack->cookie_len = sizeof(cookie); } -- cgit v1.2.3 From 84b21e61ebd64931d865ce3df49d930db8c9e2cd Mon Sep 17 00:00:00 2001 From: Gur Stavi Date: Sun, 18 May 2025 13:00:54 +0300 Subject: queue_api: reduce risk of name collision over txq Rename local variable in macros from txq to _txq. When macro parameter get_desc is expended it is likely to have a txq token that refers to a different txq variable at the caller's site. Signed-off-by: Gur Stavi Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/95b60d218f004308486d92ed17c8cc6f28bac09d.1747559621.git.gur.stavi@huawei.com Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 069ff35a72de..ba2eaf39089b 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -288,27 +288,27 @@ netdev_txq_completed_mb(struct netdev_queue *dev_queue, #define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \ ({ \ - struct netdev_queue *txq; \ + struct netdev_queue *_txq; \ \ - txq = netdev_get_tx_queue(dev, idx); \ - netif_txq_try_stop(txq, get_desc, start_thrs); \ + _txq = netdev_get_tx_queue(dev, idx); \ + netif_txq_try_stop(_txq, get_desc, start_thrs); \ }) #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ ({ \ - struct netdev_queue *txq; \ + struct netdev_queue *_txq; \ \ - txq = netdev_get_tx_queue(dev, idx); \ - netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs); \ + _txq = netdev_get_tx_queue(dev, idx); \ + netif_txq_maybe_stop(_txq, get_desc, stop_thrs, start_thrs); \ }) #define netif_subqueue_completed_wake(dev, idx, pkts, bytes, \ get_desc, start_thrs) \ ({ \ - struct netdev_queue *txq; \ + struct netdev_queue *_txq; \ \ - txq = netdev_get_tx_queue(dev, idx); \ - netif_txq_completed_wake(txq, pkts, bytes, \ + _txq = netdev_get_tx_queue(dev, idx); \ + netif_txq_completed_wake(_txq, pkts, bytes, \ get_desc, start_thrs); \ }) -- cgit v1.2.3 From 1c9a93bf1d01729c54e9acbaa538db81f16563b2 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 25 Apr 2025 20:06:34 -0600 Subject: dmapool: add NUMA affinity support Introduce dma_pool_create_node(), like dma_pool_create() but taking an additional NUMA node argument. Allocate struct dma_pool on the desired node, and store the node on dma_pool for allocating struct dma_page. Make dma_pool_create() an alias for dma_pool_create_node() with node set to NUMA_NO_NODE. Signed-off-by: Keith Busch Signed-off-by: Caleb Sander Mateos Reviewed-by: Jens Axboe Reviewed-by: John Garry Reviewed-by: Sagi Grimberg Reviewed-by: Kanchan Joshi Signed-off-by: Christoph Hellwig --- include/linux/dmapool.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index f632ecfb4238..06c4de602b2f 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -11,6 +11,7 @@ #ifndef LINUX_DMAPOOL_H #define LINUX_DMAPOOL_H +#include #include #include @@ -18,8 +19,8 @@ struct device; #ifdef CONFIG_HAS_DMA -struct dma_pool *dma_pool_create(const char *name, struct device *dev, - size_t size, size_t align, size_t allocation); +struct dma_pool *dma_pool_create_node(const char *name, struct device *dev, + size_t size, size_t align, size_t boundary, int node); void dma_pool_destroy(struct dma_pool *pool); @@ -35,9 +36,12 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev, void dmam_pool_destroy(struct dma_pool *pool); #else /* !CONFIG_HAS_DMA */ -static inline struct dma_pool *dma_pool_create(const char *name, - struct device *dev, size_t size, size_t align, size_t allocation) -{ return NULL; } +static inline struct dma_pool *dma_pool_create_node(const char *name, + struct device *dev, size_t size, size_t align, size_t boundary, + int node) +{ + return NULL; +} static inline void dma_pool_destroy(struct dma_pool *pool) { } static inline void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { return NULL; } @@ -49,6 +53,13 @@ static inline struct dma_pool *dmam_pool_create(const char *name, static inline void dmam_pool_destroy(struct dma_pool *pool) { } #endif /* !CONFIG_HAS_DMA */ +static inline struct dma_pool *dma_pool_create(const char *name, + struct device *dev, size_t size, size_t align, size_t boundary) +{ + return dma_pool_create_node(name, dev, size, align, boundary, + NUMA_NO_NODE); +} + static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { -- cgit v1.2.3 From 2695b3c7fe4fa06a377ea0d66e3fe5fdb60310f0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 11 May 2025 11:28:36 -0700 Subject: MIPS: bcm63xx: nvram: avoid inefficient use of crc32_le_combine() bcm963xx_nvram_checksum() was using crc32_le_combine() to update a CRC with four zero bytes. However, this is about 5x slower than just CRC'ing four zero bytes in the normal way. Just do that instead. (We could instead make crc32_le_combine() faster on short lengths. But all its callers do just fine without it, so I'd like to just remove it.) Signed-off-by: Eric Biggers Signed-off-by: Thomas Bogendoerfer --- include/linux/bcm963xx_nvram.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/bcm963xx_nvram.h b/include/linux/bcm963xx_nvram.h index c8c7f01159fe..48830bf18042 100644 --- a/include/linux/bcm963xx_nvram.h +++ b/include/linux/bcm963xx_nvram.h @@ -81,25 +81,21 @@ static int __maybe_unused bcm963xx_nvram_checksum( const struct bcm963xx_nvram *nvram, u32 *expected_out, u32 *actual_out) { + const u32 zero = 0; u32 expected, actual; size_t len; if (nvram->version <= 4) { expected = nvram->checksum_v4; - len = BCM963XX_NVRAM_V4_SIZE - sizeof(u32); + len = BCM963XX_NVRAM_V4_SIZE; } else { expected = nvram->checksum_v5; - len = BCM963XX_NVRAM_V5_SIZE - sizeof(u32); + len = BCM963XX_NVRAM_V5_SIZE; } - /* - * Calculate the CRC32 value for the nvram with a checksum value - * of 0 without modifying or copying the nvram by combining: - * - The CRC32 of the nvram without the checksum value - * - The CRC32 of a zero checksum value (which is also 0) - */ - actual = crc32_le_combine( - crc32_le(~0, (u8 *)nvram, len), 0, sizeof(u32)); + /* Calculate the CRC32 of the nvram with the checksum field set to 0. */ + actual = crc32_le(~0, nvram, len - sizeof(u32)); + actual = crc32_le(actual, &zero, sizeof(u32)); if (expected_out) *expected_out = expected; -- cgit v1.2.3 From 52b97d43b7c52da481758931a111a091da5e4802 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 16 May 2025 14:10:05 +0100 Subject: ASoC: SDCA: Fix minor typo Fix minor typo SDAC -> SDCA. Fixes: 42b144cb6a2d ("ASoC: SDCA: Add SDCA Control parsing") Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250516131011.221310-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_function.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 253654568a41..e7165e4f15c4 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -125,7 +125,7 @@ struct sdca_init_write { * macros. * * Short hand to specific a Control type statically for example: - * SDAC_CTL_TYPE_S(IT, MIC_BIAS). + * SDCA_CTL_TYPE_S(IT, MIC_BIAS). */ #define SDCA_CTL_TYPE_S(ent, sel) SDCA_CTL_TYPE(SDCA_ENTITY_TYPE_##ent, \ SDCA_CTL_##ent##_##sel) -- cgit v1.2.3 From 737379e5062e26c59fe069a8bdad2ee363d52686 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 16 May 2025 14:10:08 +0100 Subject: ASoC: dapm: Add component level pin switches The core currently supports pin switches for source/sink widgets, but only at the card level. SDCA components specify the fabric at the level of the individual components, to support this add helpers to allow component level pin switches. Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250516131011.221310-5-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index af802ef536e7..400584474bc8 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -445,6 +445,10 @@ int snd_soc_dapm_get_pin_switch(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *uncontrol); int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *uncontrol); +int snd_soc_dapm_get_component_pin_switch(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *uncontrol); +int snd_soc_dapm_put_component_pin_switch(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *uncontrol); int snd_soc_dapm_new_controls(struct snd_soc_dapm_context *dapm, const struct snd_soc_dapm_widget *widget, unsigned int num); struct snd_soc_dapm_widget *snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm, -- cgit v1.2.3 From 2c8b3a8e6aa877583e5dc2f669a8adc896a771d4 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 16 May 2025 14:10:09 +0100 Subject: ASoC: SDCA: Create DAPM widgets and routes from DisCo Use the previously parsed DisCo information from ACPI to create DAPM widgets and routes representing a SDCA Function. For the most part SDCA maps well to the DAPM abstractions. The primary point of interest is the SDCA Power Domain Entities (PDEs), which actually control the power status of the device. Whilst these PDEs are the primary widgets the other parts of the SDCA graph are added to maintain a consistency with the hardware abstract, and allow routing to take effect. As for the PDEs themselves the code currently only handle PS0 and PS3 (basically on and off), the two intermediate power states are not commonly used and don't map well to ASoC/DAPM. Other minor points of slightly complexity include, the Group Entities (GEs) these set the value of several other controls, typically Selector Units (SUs) for enabling a cetain jack configuration. Multiple SUs being controlled by a GE are easily modelled creating a single control and sharing it among the controlled muxes. SDCA also has a slight habit of having fully connected paths, relying more on activating the PDEs to enable functionality. This doesn't map quite so perfectly to DAPM which considers the path a reason to power the PDE. Whilst in the current specification Mixer Units are defined as fixed-function, in DAPM we create a virtual control for each input (which defaults to connected). This allows paths to be connected/disconnected, providing a more ASoC style approach to managing the power. PIN_SWITCHs will also be added for non-dataport terminal entities in a later patch along with the other ALSA controls, providing greater flexibility in power management. A top level helper sdca_asoc_populate_component() is exported that counts and allocates everything, however, the intermediate counting and population functions are also exported. This will allow end drivers to do allocation and add custom handling, which is probably fairly likely for the early SDCA devices. Clock muxes are currently not fully supported, so some future work will also be required there. Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250516131011.221310-6-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_asoc.h | 30 ++++++++++++++++++++++++++++++ include/sound/sdca_function.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 include/sound/sdca_asoc.h (limited to 'include') diff --git a/include/sound/sdca_asoc.h b/include/sound/sdca_asoc.h new file mode 100644 index 000000000000..414d461b6fc4 --- /dev/null +++ b/include/sound/sdca_asoc.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * The MIPI SDCA specification is available for public downloads at + * https://www.mipi.org/mipi-sdca-v1-0-download + * + * Copyright (C) 2025 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#ifndef __SDCA_ASOC_H__ +#define __SDCA_ASOC_H__ + +struct device; +struct sdca_function_data; +struct snd_soc_component_driver; +struct snd_soc_dapm_route; +struct snd_soc_dapm_widget; + +int sdca_asoc_count_component(struct device *dev, struct sdca_function_data *function, + int *num_widgets, int *num_routes); + +int sdca_asoc_populate_dapm(struct device *dev, struct sdca_function_data *function, + struct snd_soc_dapm_widget *widgets, + struct snd_soc_dapm_route *routes); + +int sdca_asoc_populate_component(struct device *dev, + struct sdca_function_data *function, + struct snd_soc_component_driver *component_drv); + +#endif // __SDCA_ASOC_H__ diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index e7165e4f15c4..008e23882de8 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -257,6 +257,14 @@ enum sdca_pde_controls { SDCA_CTL_PDE_ACTUAL_PS = 0x10, }; +/** + * enum sdca_requested_ps_range - Column definitions for Requested PS + */ +enum sdca_requested_ps_range { + SDCA_REQUESTED_PS_STATE = 0, + SDCA_REQUESTED_PS_NCOLS = 1, +}; + /** * enum sdca_ge_controls - SDCA Controls for Group Unit * @@ -268,6 +276,15 @@ enum sdca_ge_controls { SDCA_CTL_GE_DETECTED_MODE = 0x02, }; +/** + * enum sdca_selected_mode_range - Column definitions for Selected Mode + */ +enum sdca_selected_mode_range { + SDCA_SELECTED_MODE_INDEX = 0, + SDCA_SELECTED_MODE_TERM_TYPE = 1, + SDCA_SELECTED_MODE_NCOLS = 2, +}; + /** * enum sdca_spe_controls - SDCA Controls for Security & Privacy Unit * @@ -773,6 +790,25 @@ enum sdca_terminal_type { SDCA_TERM_TYPE_PRIVACY_INDICATORS = 0x747, }; +#define SDCA_TERM_TYPE_LINEIN_STEREO_NAME "LineIn Stereo" +#define SDCA_TERM_TYPE_LINEIN_FRONT_LR_NAME "LineIn Front-LR" +#define SDCA_TERM_TYPE_LINEIN_CENTER_LFE_NAME "LineIn Center-LFE" +#define SDCA_TERM_TYPE_LINEIN_SURROUND_LR_NAME "LineIn Surround-LR" +#define SDCA_TERM_TYPE_LINEIN_REAR_LR_NAME "LineIn Rear-LR" +#define SDCA_TERM_TYPE_LINEOUT_STEREO_NAME "LineOut Stereo" +#define SDCA_TERM_TYPE_LINEOUT_FRONT_LR_NAME "LineOut Front-LR" +#define SDCA_TERM_TYPE_LINEOUT_CENTER_LFE_NAME "LineOut Center-LFE" +#define SDCA_TERM_TYPE_LINEOUT_SURROUND_LR_NAME "LineOut Surround-LR" +#define SDCA_TERM_TYPE_LINEOUT_REAR_LR_NAME "LineOut Rear-LR" +#define SDCA_TERM_TYPE_MIC_JACK_NAME "Microphone" +#define SDCA_TERM_TYPE_STEREO_JACK_NAME "Speaker Stereo" +#define SDCA_TERM_TYPE_FRONT_LR_JACK_NAME "Speaker Front-LR" +#define SDCA_TERM_TYPE_CENTER_LFE_JACK_NAME "Speaker Center-LFE" +#define SDCA_TERM_TYPE_SURROUND_LR_JACK_NAME "Speaker Surround-LR" +#define SDCA_TERM_TYPE_REAR_LR_JACK_NAME "Speaker Rear-LR" +#define SDCA_TERM_TYPE_HEADPHONE_JACK_NAME "Headphone" +#define SDCA_TERM_TYPE_HEADSET_JACK_NAME "Headset" + /** * enum sdca_connector_type - SDCA Connector Types * -- cgit v1.2.3 From c3ca24e3fcb656c6afe702b69e6e9efcc3ff1128 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 16 May 2025 14:10:10 +0100 Subject: ASoC: SDCA: Create ALSA controls from DisCo Use the previously parsed DisCo information from ACPI to create the ALSA controls required by an SDCA Function. This maps all User and Application level SDCA Controls to ALSA controls. Typically controls marked with those access levels are just volumes and mutes. SDCA defines volume controls as an integer in 1/256ths of a dB and then provides a mechanism to specify what values are valid (range templates). Currently only a simple case of a single linear volume range with a power of 2 step size is supported. This allows the code to expose the volume control using a simple shift. This will need expanded in the future, to support more complex ranges and probably also some additional control types but this should be sufficient to for a first pass. For non-dataport terminal widgets also add a pin switch to allow that endpoint to be turned on/off. Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250516131011.221310-7-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_asoc.h | 6 +++++- include/sound/sdca_function.h | 10 ++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/sdca_asoc.h b/include/sound/sdca_asoc.h index 414d461b6fc4..d19e7e969283 100644 --- a/include/sound/sdca_asoc.h +++ b/include/sound/sdca_asoc.h @@ -12,16 +12,20 @@ struct device; struct sdca_function_data; +struct snd_kcontrol_new; struct snd_soc_component_driver; struct snd_soc_dapm_route; struct snd_soc_dapm_widget; int sdca_asoc_count_component(struct device *dev, struct sdca_function_data *function, - int *num_widgets, int *num_routes); + int *num_widgets, int *num_routes, int *num_controls); int sdca_asoc_populate_dapm(struct device *dev, struct sdca_function_data *function, struct snd_soc_dapm_widget *widgets, struct snd_soc_dapm_route *routes); +int sdca_asoc_populate_controls(struct device *dev, + struct sdca_function_data *function, + struct snd_kcontrol_new *kctl); int sdca_asoc_populate_component(struct device *dev, struct sdca_function_data *function, diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 008e23882de8..2ecf0c3a5e03 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -206,6 +206,16 @@ enum sdca_fu_controls { SDCA_CTL_FU_LATENCY = 0x10, }; +/** + * enum sdca_volume_range - Column definitions for Q7.8dB volumes/gains + */ +enum sdca_volume_range { + SDCA_VOLUME_LINEAR_MIN = 0, + SDCA_VOLUME_LINEAR_MAX = 1, + SDCA_VOLUME_LINEAR_STEP = 2, + SDCA_VOLUME_LINEAR_NCOLS = 3, +}; + /** * enum sdca_xu_controls - SDCA Controls for Extension Unit * -- cgit v1.2.3 From 108f878d435437f649da32dea9f0d4f684eba3ba Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 16 May 2025 14:10:11 +0100 Subject: ASoC: SDCA: Create DAI drivers from DisCo Use the previously parsed DisCo information from ACPI to create the DAI drivers required to connect an SDCA Function into an ASoC soundcard. Create DAI driver structures and populate the supported sample rates and sample widths into them based on the Input/Output Terminal and any attach Clock Source entities. More complex relationships with channels etc. will be added later as constraints as part of the DAI startup. Signed-off-by: Charles Keepax Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250516131011.221310-8-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca_asoc.h | 12 ++++++++++-- include/sound/sdca_function.h | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/sdca_asoc.h b/include/sound/sdca_asoc.h index d19e7e969283..9121531f0826 100644 --- a/include/sound/sdca_asoc.h +++ b/include/sound/sdca_asoc.h @@ -14,11 +14,14 @@ struct device; struct sdca_function_data; struct snd_kcontrol_new; struct snd_soc_component_driver; +struct snd_soc_dai_driver; +struct snd_soc_dai_ops; struct snd_soc_dapm_route; struct snd_soc_dapm_widget; int sdca_asoc_count_component(struct device *dev, struct sdca_function_data *function, - int *num_widgets, int *num_routes, int *num_controls); + int *num_widgets, int *num_routes, int *num_controls, + int *num_dais); int sdca_asoc_populate_dapm(struct device *dev, struct sdca_function_data *function, struct snd_soc_dapm_widget *widgets, @@ -26,9 +29,14 @@ int sdca_asoc_populate_dapm(struct device *dev, struct sdca_function_data *funct int sdca_asoc_populate_controls(struct device *dev, struct sdca_function_data *function, struct snd_kcontrol_new *kctl); +int sdca_asoc_populate_dais(struct device *dev, struct sdca_function_data *function, + struct snd_soc_dai_driver *dais, + const struct snd_soc_dai_ops *ops); int sdca_asoc_populate_component(struct device *dev, struct sdca_function_data *function, - struct snd_soc_component_driver *component_drv); + struct snd_soc_component_driver *component_drv, + struct snd_soc_dai_driver **dai_drv, int *num_dai_drv, + const struct snd_soc_dai_ops *ops); #endif // __SDCA_ASOC_H__ diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 2ecf0c3a5e03..eaedb54a8322 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -168,6 +168,20 @@ enum sdca_ot_controls { SDCA_CTL_OT_NDAI_PACKETTYPE = 0x17, }; +/** + * enum sdca_usage_range - Column definitions for Usage + */ +enum sdca_usage_range { + SDCA_USAGE_NUMBER = 0, + SDCA_USAGE_CBN = 1, + SDCA_USAGE_SAMPLE_RATE = 2, + SDCA_USAGE_SAMPLE_WIDTH = 3, + SDCA_USAGE_FULL_SCALE = 4, + SDCA_USAGE_NOISE_FLOOR = 5, + SDCA_USAGE_TAG = 6, + SDCA_USAGE_NCOLS = 7, +}; + /** * enum sdca_mu_controls - SDCA Controls for Mixer Unit * @@ -246,6 +260,15 @@ enum sdca_cs_controls { SDCA_CTL_CS_SAMPLERATEINDEX = 0x10, }; +/** + * enum sdca_samplerateindex_range - Column definitions for SampleRateIndex + */ +enum sdca_samplerateindex_range { + SDCA_SAMPLERATEINDEX_INDEX = 0, + SDCA_SAMPLERATEINDEX_RATE = 1, + SDCA_SAMPLERATEINDEX_NCOLS = 2, +}; + /** * enum sdca_cx_controls - SDCA Controls for Clock Selector * -- cgit v1.2.3 From 31be641d74267d98317ef5a2b90e6200511cabb3 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 15 May 2025 10:11:54 +0200 Subject: net: phy: make mdio consumer / device layer a separate module After having factored out the provider part from mdio_bus.c, we can make the mdio consumer / device layer a separate module. This also allows to remove Kconfig symbol MDIO_DEVICE. The module init / exit functions from mdio_bus.c no longer have to be called from phy_device.c. The link order defined in drivers/net/phy/Makefile ensures that init / exit functions are called in the right order. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/dba6b156-5748-44ce-b5e2-e8dc2fcee5a7@gmail.com Signed-off-by: Paolo Abeni --- include/linux/phy.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7c29d346d4b3..92a88b5ce356 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2033,9 +2033,6 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); int phy_ethtool_nway_reset(struct net_device *ndev); -int __init mdio_bus_init(void); -void mdio_bus_exit(void); - int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data); int phy_ethtool_get_sset_count(struct phy_device *phydev); int phy_ethtool_get_stats(struct phy_device *phydev, -- cgit v1.2.3 From 3318f7b5cefbff96b1bb49584ac38d2c9997a830 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 19 May 2025 10:51:28 -0700 Subject: iommu/io-pgtable-arm: Add quirk to quiet WARN_ON() In situations where mapping/unmapping sequence can be controlled by userspace, attempting to map over a region that has not yet been unmapped is an error. But not something that should spam dmesg. Now that there is a quirk, we can also drop the selftest_running flag, and use the quirk instead for selftests. Acked-by: Robin Murphy Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20250519175348.11924-6-robdclark@gmail.com [will: Rename quirk to IO_PGTABLE_QUIRK_NO_WARN per Robin's suggestion] Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index bba2a51c87d2..138fbd89b1e6 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -88,6 +88,13 @@ struct io_pgtable_cfg { * * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. * IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits + * + * IO_PGTABLE_QUIRK_NO_WARN: Do not WARN_ON() on conflicting + * mappings, but silently return -EEXISTS. Normally an attempt + * to map over an existing mapping would indicate some sort of + * kernel bug, which would justify the WARN_ON(). But for GPU + * drivers, this could be under control of userspace. Which + * deserves an error return, but not to spam dmesg. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -97,6 +104,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) #define IO_PGTABLE_QUIRK_ARM_S2FWB BIT(8) + #define IO_PGTABLE_QUIRK_NO_WARN BIT(9) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; -- cgit v1.2.3 From d6bf294773a472fe4694b92714af482f5c6aa4c6 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Mon, 12 May 2025 14:33:15 +0800 Subject: ext4/jbd2: convert jbd2_journal_blocks_per_page() to support large folio jbd2_journal_blocks_per_page() returns the number of blocks in a single page. Rename it to jbd2_journal_blocks_per_folio() and make it returns the number of blocks in the largest folio, preparing for the calculation of journal credits blocks when allocating blocks within a large folio in the writeback path. Signed-off-by: Zhang Yi Link: https://patch.msgid.link/20250512063319.3539411-5-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 023e8abdb99a..ebbcdab474d5 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1723,7 +1723,7 @@ static inline int tid_geq(tid_t x, tid_t y) return (difference >= 0); } -extern int jbd2_journal_blocks_per_page(struct inode *inode); +extern int jbd2_journal_blocks_per_folio(struct inode *inode); extern size_t journal_tag_bytes(journal_t *journal); static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) -- cgit v1.2.3 From 76005718cf8bfdb6b0f818ea75ca6a4b3bee34cd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 12 May 2025 22:38:08 -0700 Subject: jbd2: remove journal_t argument from jbd2_chksum() Since jbd2_chksum() no longer uses its journal_t argument, remove it. Signed-off-by: Eric Biggers Reviewed-by: Baokun Li Link: https://patch.msgid.link/20250513053809.699974-4-ebiggers@kernel.org Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ebbcdab474d5..43b9297fe8a7 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1766,8 +1766,7 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) #define BJ_Reserved 4 /* Buffer is reserved for access by journal */ #define BJ_Types 5 -static inline u32 jbd2_chksum(journal_t *journal, u32 crc, - const void *address, unsigned int length) +static inline u32 jbd2_chksum(u32 crc, const void *address, unsigned int length) { return crc32c(crc, address, length); } -- cgit v1.2.3 From 99c1e4eb6a3fbbec27c7c70e5fce15cdc1422893 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 20 May 2025 12:54:33 +0800 Subject: ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG Add UBLK_F_AUTO_BUF_REG for supporting to register buffer automatically to local io_uring context with provided buffer index. Add UAPI structure `struct ublk_auto_buf_reg` for holding user parameter to register request buffer automatically, one 'flags' field is defined, and there is still 32bit available for future extension, such as, adding one io_ring FD field for registering buffer to external io_uring. `struct ublk_auto_buf_reg` is populated from ublk uring_cmd's sqe->addr, and all existing ublk commands are data-less, so it is just fine to reuse sqe->addr for this purpose. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250520045455.515691-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 64 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index be5c6c6b16e0..f6f516b1223b 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -219,6 +219,29 @@ */ #define UBLK_F_UPDATE_SIZE (1ULL << 10) +/* + * request buffer is registered automatically to uring_cmd's io_uring + * context before delivering this io command to ublk server, meantime + * it is un-registered automatically when completing this io command. + * + * For using this feature: + * + * - ublk server has to create sparse buffer table + * + * - ublk server passes auto buf register data via uring_cmd's sqe->addr, + * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see + * the definition of ublk_sqe_addr_to_auto_buf_reg() + * + * - pass buffer index from `ublk_auto_buf_reg.index` + * + * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed + * + * This way avoids extra cost from two uring_cmd, but also simplifies backend + * implementation, such as, the dependency on IO_REGISTER_IO_BUF and + * IO_UNREGISTER_IO_BUF becomes not necessary. + */ +#define UBLK_F_AUTO_BUF_REG (1ULL << 11) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 @@ -339,6 +362,47 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod) return iod->op_flags >> 8; } +struct ublk_auto_buf_reg { + /* index for registering the delivered request buffer */ + __u16 index; + __u16 reserved0; + + /* + * io_ring FD can be passed via the reserve field in future for + * supporting to register io buffer to external io_uring + */ + __u32 reserved1; +}; + +/* + * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via + * uring_cmd's sqe->addr: + * + * - bit0 ~ bit15: buffer index + * - bit24 ~ bit31: reserved0 + * - bit32 ~ bit63: reserved1 + */ +static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( + __u64 sqe_addr) +{ + struct ublk_auto_buf_reg reg = { + .index = sqe_addr & 0xffff, + .reserved0 = (sqe_addr >> 16) & 0xffff, + .reserved1 = sqe_addr >> 32, + }; + + return reg; +} + +static inline __u64 +ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf) +{ + __u64 addr = buf->index | (__u64)buf->reserved0 << 16 | + (__u64)buf->reserved1 << 32; + + return addr; +} + /* issued to ublk driver via /dev/ublkcN */ struct ublksrv_io_cmd { __u16 q_id; -- cgit v1.2.3 From 53f427e7944b4f288866cc4a69835086e0958c6a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 20 May 2025 12:54:34 +0800 Subject: ublk: support UBLK_AUTO_BUF_REG_FALLBACK For UBLK_F_AUTO_BUF_REG, buffer is registered to uring_cmd context automatically with the provided buffer index. User may provide one wrong buffer index, or the specified buffer is registered by application already. Add UBLK_AUTO_BUF_REG_FALLBACK for supporting to auto buffer registering fallback by completing the uring_cmd and telling ublk server the register failure via UBLK_AUTO_BUF_REG_FALLBACK, then ublk server still can register the buffer from userspace. So we can provide reliable way for supporting auto buffer register. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250520045455.515691-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index f6f516b1223b..c4b9942697fc 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -236,9 +236,16 @@ * * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed * + * - pass flags from `ublk_auto_buf_reg.flags` if needed + * * This way avoids extra cost from two uring_cmd, but also simplifies backend * implementation, such as, the dependency on IO_REGISTER_IO_BUF and * IO_UNREGISTER_IO_BUF becomes not necessary. + * + * If wrong data or flags are provided, both IO_FETCH_REQ and + * IO_COMMIT_AND_FETCH_REQ are failed, for the latter, the ublk IO request + * won't be completed until new IO_COMMIT_AND_FETCH_REQ command is issued + * successfully */ #define UBLK_F_AUTO_BUF_REG (1ULL << 11) @@ -328,6 +335,17 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_F_FUA (1U << 13) #define UBLK_IO_F_NOUNMAP (1U << 15) #define UBLK_IO_F_SWAP (1U << 16) +/* + * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only. + * + * This flag is set if auto buffer register is failed & ublk server passes + * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer + * manually for handling the delivered IO command if this flag is observed + * + * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is + * passed in. + */ +#define UBLK_IO_F_NEED_REG_BUF (1U << 17) /* * io cmd is described by this structure, and stored in share memory, indexed @@ -362,10 +380,23 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod) return iod->op_flags >> 8; } +/* + * If this flag is set, fallback by completing the uring_cmd and setting + * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure; + * otherwise the client ublk request is failed silently + * + * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF + * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set, + * ublk server needs to register io buffer manually for handling IO command. + */ +#define UBLK_AUTO_BUF_REG_FALLBACK (1 << 0) +#define UBLK_AUTO_BUF_REG_F_MASK UBLK_AUTO_BUF_REG_FALLBACK + struct ublk_auto_buf_reg { /* index for registering the delivered request buffer */ __u16 index; - __u16 reserved0; + __u8 flags; + __u8 reserved0; /* * io_ring FD can be passed via the reserve field in future for @@ -379,6 +410,7 @@ struct ublk_auto_buf_reg { * uring_cmd's sqe->addr: * * - bit0 ~ bit15: buffer index + * - bit16 ~ bit23: flags * - bit24 ~ bit31: reserved0 * - bit32 ~ bit63: reserved1 */ @@ -387,7 +419,8 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( { struct ublk_auto_buf_reg reg = { .index = sqe_addr & 0xffff, - .reserved0 = (sqe_addr >> 16) & 0xffff, + .flags = (sqe_addr >> 16) & 0xff, + .reserved0 = (sqe_addr >> 24) & 0xff, .reserved1 = sqe_addr >> 32, }; @@ -397,7 +430,7 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( static inline __u64 ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf) { - __u64 addr = buf->index | (__u64)buf->reserved0 << 16 | + __u64 addr = buf->index | (__u64)buf->flags << 16 | (__u64)buf->reserved0 << 24 | (__u64)buf->reserved1 << 32; return addr; -- cgit v1.2.3 From 7e6f4a0a7512eca93cecff3bcb37f0ed164949a2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 19 May 2025 13:13:14 +0200 Subject: i2c: remove 'of_node' member from i2c_boardinfo There is no user of this member anymore. We can remove it. Reviewed-by: Andi Shyti Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index cc1437f29823..20fd41b51d5c 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -405,7 +405,6 @@ static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } * @addr: stored in i2c_client.addr * @dev_name: Overrides the default - dev_name if set * @platform_data: stored in i2c_client.dev.platform_data - * @of_node: **DEPRECATED** - use @fwnode for this * @fwnode: device node supplied by the platform firmware * @swnode: software node for the device * @resources: resources associated with the device @@ -429,7 +428,6 @@ struct i2c_board_info { unsigned short addr; const char *dev_name; void *platform_data; - struct device_node *of_node; struct fwnode_handle *fwnode; const struct software_node *swnode; const struct resource *resources; -- cgit v1.2.3 From 4c2bd7913f52b1e5c978edf56cdef39c30a1f603 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 17 May 2025 13:08:10 -0700 Subject: net: let lockdep compare instance locks AFAIU always returning -1 from lockdep's compare function basically disables checking of dependencies between given locks. Try to be a little more precise about what guarantees that instance locks won't deadlock. Right now we only nest them under protection of rtnl_lock. Mostly in unregister_netdevice_many() and dev_close_many(). Acked-by: Stanislav Fomichev Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250517200810.466531-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netdev_lock.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index 2a753813f849..c345afecd4c5 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -99,16 +99,15 @@ static inline void netdev_unlock_ops_compat(struct net_device *dev) static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) { - /* Only lower devices currently grab the instance lock, so no - * real ordering issues can occur. In the near future, only - * hardware devices will grab instance lock which also does not - * involve any ordering. Suppress lockdep ordering warnings - * until (if) we start grabbing instance lock on pure SW - * devices (bond/team/veth/etc). - */ if (a == b) return 0; - return -1; + + /* Allow locking multiple devices only under rtnl_lock, + * the exact order doesn't matter. + * Note that upper devices don't lock their ops, so nesting + * mostly happens in batched device removal for now. + */ + return lockdep_rtnl_is_held() ? -1 : 1; } #define netdev_lockdep_set_classes(dev) \ -- cgit v1.2.3 From 3f1716ee0f6c63795e6d225e3f5ec3825cd2bd57 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 17 May 2025 22:34:32 +0200 Subject: net: phy: fixed_phy: remove irq argument from fixed_phy_add All callers pass PHY_POLL, therefore remove irq argument from fixed_phy_add(). Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Acked-by: Greg Ungerer Link: https://patch.msgid.link/b3b9b3bc-c310-4a54-b376-c909c83575de@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy_fixed.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 3392c09b5d24..316bb4deda37 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -17,8 +17,7 @@ struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); -extern int fixed_phy_add(unsigned int irq, int phy_id, - struct fixed_phy_status *status); +int fixed_phy_add(int phy_id, struct fixed_phy_status *status); extern struct phy_device *fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, struct device_node *np); @@ -28,7 +27,7 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, struct fixed_phy_status *)); #else -static inline int fixed_phy_add(unsigned int irq, int phy_id, +static inline int fixed_phy_add(int phy_id, struct fixed_phy_status *status) { return -ENODEV; -- cgit v1.2.3 From d23b4af5df3900fb0b4e1a05cb8119dd1c395519 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 17 May 2025 22:35:56 +0200 Subject: net: phy: fixed_phy: remove irq argument from fixed_phy_register All callers pass PHY_POLL, therefore remove irq argument from fixed_phy_register(). Note: I keep the irq argument in fixed_phy_add_gpiod() for now, for the case that somebody may want to use a GPIO interrupt in the future, by e.g. adding a call to fwnode_irq_get() to fixed_phy_get_gpiod(). Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/31cdb232-a5e9-4997-a285-cb9a7d208124@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy_fixed.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 316bb4deda37..634149a73c2a 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -18,9 +18,8 @@ struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); int fixed_phy_add(int phy_id, struct fixed_phy_status *status); -extern struct phy_device *fixed_phy_register(unsigned int irq, - struct fixed_phy_status *status, - struct device_node *np); +struct phy_device *fixed_phy_register(struct fixed_phy_status *status, + struct device_node *np); extern void fixed_phy_unregister(struct phy_device *phydev); extern int fixed_phy_set_link_update(struct phy_device *phydev, @@ -32,9 +31,9 @@ static inline int fixed_phy_add(int phy_id, { return -ENODEV; } -static inline struct phy_device *fixed_phy_register(unsigned int irq, - struct fixed_phy_status *status, - struct device_node *np) +static inline struct phy_device * +fixed_phy_register(struct fixed_phy_status *status, + struct device_node *np) { return ERR_PTR(-ENODEV); } -- cgit v1.2.3 From 4ba1c5bb4811f560a86697311cb4e9741e047a5d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 17 May 2025 22:37:29 +0200 Subject: net: phy: fixed_phy: constify status argument where possible Constify the passed struct fixed_phy_status *status where possible. Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/d1764b62-8538-408b-a4e3-b63715481a38@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy_fixed.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 634149a73c2a..5399b9e41e35 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -17,8 +17,8 @@ struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); -int fixed_phy_add(int phy_id, struct fixed_phy_status *status); -struct phy_device *fixed_phy_register(struct fixed_phy_status *status, +int fixed_phy_add(int phy_id, const struct fixed_phy_status *status); +struct phy_device *fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np); extern void fixed_phy_unregister(struct phy_device *phydev); @@ -27,12 +27,12 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev, struct fixed_phy_status *)); #else static inline int fixed_phy_add(int phy_id, - struct fixed_phy_status *status) + const struct fixed_phy_status *status) { return -ENODEV; } static inline struct phy_device * -fixed_phy_register(struct fixed_phy_status *status, +fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np) { return ERR_PTR(-ENODEV); -- cgit v1.2.3 From aed031da7e8cf6e31816a34afde961fbe0305fea Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 May 2025 13:41:28 -0700 Subject: bnxt_en: Fix netdev locking in ULP IRQ functions netdev_lock is already held when calling bnxt_ulp_irq_stop() and bnxt_ulp_irq_restart(). When converting rtnl_lock to netdev_lock, the original code was rtnl_dereference() to indicate that rtnl_lock was already held. rcu_dereference_protected() is the correct conversion after replacing rtnl_lock with netdev_lock. Add a new helper netdev_lock_dereference() similar to rtnl_dereference(). Fixes: 004b5008016a ("eth: bnxt: remove most dependencies on RTNL") Reviewed-by: Andy Gospodarek Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250519204130.3097027-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- include/net/netdev_lock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index c316b551df8d..0ee5bc766810 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -98,6 +98,9 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, &qdisc_xmit_lock_key); \ } +#define netdev_lock_dereference(p, dev) \ + rcu_dereference_protected(p, lockdep_is_held(&(dev)->lock)) + int netdev_debug_event(struct notifier_block *nb, unsigned long event, void *ptr); -- cgit v1.2.3 From 7f0047cb9d42786b62f7ad91c1a17e55940d2dfb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 18 May 2025 20:26:42 +0200 Subject: scsi: target: core: Constify enabled() in struct target_opcode_descriptor Constify the first argument of the enabled() function in struct target_opcode_descriptor. This is the first step in order to constify struct target_opcode_descriptor. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/4290cf1dbe100c1b1edf2ede5e5aef19b04ee7f2.1747592774.git.christophe.jaillet@wanadoo.fr Signed-off-by: Martin K. Petersen --- include/target/target_core_base.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index a52d4967c0d3..c4d9116904aa 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -898,7 +898,7 @@ struct target_opcode_descriptor { u8 specific_timeout; u16 nominal_timeout; u16 recommended_timeout; - bool (*enabled)(struct target_opcode_descriptor *descr, + bool (*enabled)(const struct target_opcode_descriptor *descr, struct se_cmd *cmd); void (*update_usage_bits)(u8 *usage_bits, struct se_device *dev); -- cgit v1.2.3 From f0a56c17e64bb5e7cdb9295df2b5fc21e4949005 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 May 2025 19:27:18 -0700 Subject: inet: Remove rtnl_is_held arg of lwtunnel_valid_encap_type(_attr)?(). Commit f130a0cc1b4f ("inet: fix lwtunnel_valid_encap_type() lock imbalance") added the rtnl_is_held argument as a temporary fix while I'm converting nexthop and IPv6 routing table to per-netns RTNL or RCU. Now all callers of lwtunnel_valid_encap_type() do not hold RTNL. Let's remove the argument. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250516022759.44392-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/lwtunnel.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 39cd50300a18..c306ebe379a0 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -116,11 +116,9 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_valid_encap_type(u16 encap_type, - struct netlink_ext_ack *extack, - bool rtnl_is_held); + struct netlink_ext_ack *extack); int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, - struct netlink_ext_ack *extack, - bool rtnl_is_held); + struct netlink_ext_ack *extack); int lwtunnel_build_state(struct net *net, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, @@ -203,15 +201,14 @@ static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, } static inline int lwtunnel_valid_encap_type(u16 encap_type, - struct netlink_ext_ack *extack, - bool rtnl_is_held) + struct netlink_ext_ack *extack) { NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel"); return -EOPNOTSUPP; } + static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, - struct netlink_ext_ack *extack, - bool rtnl_is_held) + struct netlink_ext_ack *extack) { /* return 0 since we are not walking attr looking for * RTA_ENCAP_TYPE attribute on nexthops. -- cgit v1.2.3 From 5b9db9c16f428ada473314ad1c49e55681be7a72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 15 May 2025 16:37:25 +0200 Subject: RISC-V: KVM: add KVM_CAP_RISCV_MP_STATE_RESET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a toggleable VM capability to reset the VCPU from userspace by setting MP_STATE_INIT_RECEIVED through IOCTL. Reset through a mp_state to avoid adding a new IOCTL. Do not reset on a transition from STOPPED to RUNNABLE, because it's better to avoid side effects that would complicate userspace adoption. The MP_STATE_INIT_RECEIVED is not a permanent mp_state -- IOCTL resets the VCPU while preserving the original mp_state -- because we wouldn't gain much from having a new state it in the rest of KVM, but it's a very non-standard use of the IOCTL. Signed-off-by: Radim Krčmář Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20250515143723.2450630-5-rkrcmar@ventanamicro.com Signed-off-by: Anup Patel --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b6ae8ad8934b..454b7d4a0448 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -930,6 +930,7 @@ struct kvm_enable_cap { #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_RISCV_MP_STATE_RESET 240 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From 7190b3c8bd2b0cde483bd440cf91ba1c518b4261 Mon Sep 17 00:00:00 2001 From: Ignacio Moreno Gonzalez Date: Wed, 7 May 2025 15:28:06 +0200 Subject: mm: mmap: map MAP_STACK to VM_NOHUGEPAGE only if THP is enabled commit c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE") maps the mmap option MAP_STACK to VM_NOHUGEPAGE. This is also done if CONFIG_TRANSPARENT_HUGEPAGE is not defined. But in that case, the VM_NOHUGEPAGE does not make sense. I discovered this issue when trying to use the tool CRIU to checkpoint and restore a container. Our running kernel is compiled without CONFIG_TRANSPARENT_HUGEPAGE. CRIU parses the output of /proc//smaps and saves the "nh" flag. When trying to restore the container, CRIU fails to restore the "nh" mappings, since madvise() MADV_NOHUGEPAGE always returns an error because CONFIG_TRANSPARENT_HUGEPAGE is not defined. Link: https://lkml.kernel.org/r/20250507-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled-v5-1-c6c38cfefd6e@kuka.com Fixes: c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE") Signed-off-by: Ignacio Moreno Gonzalez Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Reviewed-by: Yang Shi Reviewed-by: Liam R. Howlett Cc: Signed-off-by: Andrew Morton --- include/linux/mman.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mman.h b/include/linux/mman.h index bce214fece16..f4c6346a8fcd 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -155,7 +155,9 @@ calc_vm_flag_bits(struct file *file, unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | +#endif arch_calc_vm_flag_bits(file, flags); } -- cgit v1.2.3 From 0f518255bde881d2a2605bbc080b438b532b6ab2 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 7 May 2025 15:09:57 +0200 Subject: mm: fix VM_UFFD_MINOR == VM_SHADOW_STACK on USERFAULTFD=y && ARM64_GCS=y On configs with CONFIG_ARM64_GCS=y, VM_SHADOW_STACK is bit 38. On configs with CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y (selected by CONFIG_ARM64 when CONFIG_USERFAULTFD=y), VM_UFFD_MINOR is _also_ bit 38. This bit being shared by two different VMA flags could lead to all sorts of unintended behaviors. Presumably, a process could maybe call into userfaultfd in a way that disables the shadow stack vma flag. I can't think of any attack where this would help (presumably, if an attacker tries to disable shadow stacks, they are trying to hijack control flow so can't arbitrarily call into userfaultfd yet anyway) but this still feels somewhat scary. Link: https://lkml.kernel.org/r/20250507131000.1204175-2-revest@chromium.org Fixes: ae80e1629aea ("mm: Define VM_SHADOW_STACK for arm64 when we support GCS") Signed-off-by: Florent Revest Reviewed-by: Mark Brown Cc: Borislav Betkov Cc: Brendan Jackman Cc: Catalin Marinas Cc: Florent Revest Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thiago Jung Bauermann Cc: Thomas Gleinxer Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index bf55206935c4..fdda6b16263b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -385,7 +385,7 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -# define VM_UFFD_MINOR_BIT 38 +# define VM_UFFD_MINOR_BIT 41 # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ # define VM_UFFD_MINOR VM_NONE -- cgit v1.2.3 From 0bf2d838de1ffb6d0bb6f8d18a6ccc59b7d9a705 Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Sat, 10 May 2025 15:54:13 +0800 Subject: taskstats: fix struct taskstats breaks backward compatibility since version 15 Problem ======== commit 658eb5ab916d ("delayacct: add delay max to record delay peak") - adding more fields commit f65c64f311ee ("delayacct: add delay min to record delay peak") - adding more fields commit b016d0873777 ("taskstats: modify taskstats version") - version bump to 15 Since version 15 (TASKSTATS_VERSION=15) the new layout of the structure adds fields in the middle of the structure, rendering all old software incompatible with newer kernels and software compiled against the new kernel headers incompatible with older kernels. Solution ========= move delay max and delay min to the end of taskstat, and bump the version to 16 after the change [wang.yaxin@zte.com.cn: adjust indentation] Link: https://lkml.kernel.org/r/202505192131489882NSciXV4EGd8zzjLuwoOK@zte.com.cn Link: https://lkml.kernel.org/r/20250510155413259V4JNRXxukdDgzsaL0Fo6a@zte.com.cn Fixes: f65c64f311ee ("delayacct: add delay min to record delay peak") Signed-off-by: Wang Yaxin Signed-off-by: xu xin Signed-off-by: Kun Jiang Reviewed-by: Yang Yang Cc: Balbir Singh Cc: Signed-off-by: Andrew Morton --- include/uapi/linux/taskstats.h | 47 ++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 95762232e018..5929030d4e8b 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 15 +#define TASKSTATS_VERSION 16 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -72,8 +72,6 @@ struct taskstats { */ __u64 cpu_count __attribute__((aligned(8))); __u64 cpu_delay_total; - __u64 cpu_delay_max; - __u64 cpu_delay_min; /* Following four fields atomically updated using task->delays->lock */ @@ -82,14 +80,10 @@ struct taskstats { */ __u64 blkio_count; __u64 blkio_delay_total; - __u64 blkio_delay_max; - __u64 blkio_delay_min; /* Delay waiting for page fault I/O (swap in only) */ __u64 swapin_count; __u64 swapin_delay_total; - __u64 swapin_delay_max; - __u64 swapin_delay_min; /* cpu "wall-clock" running time * On some architectures, value will adjust for cpu time stolen @@ -172,14 +166,11 @@ struct taskstats { /* Delay waiting for memory reclaim */ __u64 freepages_count; __u64 freepages_delay_total; - __u64 freepages_delay_max; - __u64 freepages_delay_min; + /* Delay waiting for thrashing page */ __u64 thrashing_count; __u64 thrashing_delay_total; - __u64 thrashing_delay_max; - __u64 thrashing_delay_min; /* v10: 64-bit btime to avoid overflow */ __u64 ac_btime64; /* 64-bit begin time */ @@ -187,8 +178,6 @@ struct taskstats { /* v11: Delay waiting for memory compact */ __u64 compact_count; __u64 compact_delay_total; - __u64 compact_delay_max; - __u64 compact_delay_min; /* v12 begin */ __u32 ac_tgid; /* thread group ID */ @@ -210,15 +199,37 @@ struct taskstats { /* v13: Delay waiting for write-protect copy */ __u64 wpcopy_count; __u64 wpcopy_delay_total; - __u64 wpcopy_delay_max; - __u64 wpcopy_delay_min; /* v14: Delay waiting for IRQ/SOFTIRQ */ __u64 irq_count; __u64 irq_delay_total; - __u64 irq_delay_max; - __u64 irq_delay_min; - /* v15: add Delay max */ + + /* v15: add Delay max and Delay min */ + + /* v16: move Delay max and Delay min to the end of taskstat */ + __u64 cpu_delay_max; + __u64 cpu_delay_min; + + __u64 blkio_delay_max; + __u64 blkio_delay_min; + + __u64 swapin_delay_max; + __u64 swapin_delay_min; + + __u64 freepages_delay_max; + __u64 freepages_delay_min; + + __u64 thrashing_delay_max; + __u64 thrashing_delay_min; + + __u64 compact_delay_max; + __u64 compact_delay_min; + + __u64 wpcopy_delay_max; + __u64 wpcopy_delay_min; + + __u64 irq_delay_max; + __u64 irq_delay_min; }; -- cgit v1.2.3 From 97dfbbd135cb5e4426f37ca53a8fa87eaaa4e376 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 14 May 2025 18:06:02 +0100 Subject: highmem: add folio_test_partial_kmap() In commit c749d9b7ebbc ("iov_iter: fix copy_page_from_iter_atomic() if KMAP_LOCAL_FORCE_MAP"), Hugh correctly noted that if KMAP_LOCAL_FORCE_MAP is enabled, we must limit ourselves to PAGE_SIZE bytes per call to kmap_local(). The same problem exists in memcpy_from_folio(), memcpy_to_folio(), folio_zero_tail(), folio_fill_tail() and memcpy_from_file_folio(), so add folio_test_partial_kmap() to do this more succinctly. Link: https://lkml.kernel.org/r/20250514170607.3000994-2-willy@infradead.org Fixes: 00cdf76012ab ("mm: add memcpy_from_file_folio()") Signed-off-by: Matthew Wilcox (Oracle) Cc: Al Viro Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton --- include/linux/highmem.h | 10 +++++----- include/linux/page-flags.h | 7 +++++++ 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 5c6bea81a90e..c698f8415675 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -461,7 +461,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, const char *from = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset, char *to = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -522,7 +522,7 @@ static inline __must_check void *folio_zero_tail(struct folio *folio, { size_t len = folio_size(folio) - offset; - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -560,7 +560,7 @@ static inline void folio_fill_tail(struct folio *folio, size_t offset, VM_BUG_ON(offset + len > folio_size(folio)); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); } else diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e6a21b62dcce..3b814ce08331 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -615,6 +615,13 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) PAGEFLAG_FALSE(HighMem, highmem) #endif +/* Does kmap_local_folio() only allow access to one page of the folio? */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +#define folio_test_partial_kmap(f) true +#else +#define folio_test_partial_kmap(f) folio_test_highmem(f) +#endif + #ifdef CONFIG_SWAP static __always_inline bool folio_test_swapcache(const struct folio *folio) { -- cgit v1.2.3 From d7500fbfb12067ee7313f13f4c58f771be3018ab Mon Sep 17 00:00:00 2001 From: Bert Karwatzki Date: Wed, 21 May 2025 00:34:29 +0200 Subject: wifi: check if socket flags are valid Checking the SOCK_WIFI_STATUS flag bit in sk_flags may give wrong results since sk_flags are part of a union and the union is used otherwise. Add sk_requests_wifi_status() which checks if sk is non-NULL, sk is a full socket (so flags are valid) and checks the flag bit. Fixes: 76a853f86c97 ("wifi: free SKBTX_WIFI_STATUS skb tx_flags flag") Suggested-by: Johannes Berg Signed-off-by: Bert Karwatzki Reviewed-by: Jason Xing Link: https://patch.msgid.link/20250520223430.6875-1-spasswolf@web.de [edit commit message, fix indentation] Signed-off-by: Johannes Berg --- include/net/sock.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index f0fabb9fd28a..75c12e14fc47 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2821,6 +2821,12 @@ sk_is_refcounted(struct sock *sk) return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE); } +static inline bool +sk_requests_wifi_status(struct sock *sk) +{ + return sk && sk_fullsock(sk) && sock_flag(sk, SOCK_WIFI_STATUS); +} + /* Checks if this SKB belongs to an HW offloaded socket * and whether any SW fallbacks are required based on dev. * Check decrypted mark in case skb_orphan() cleared socket. -- cgit v1.2.3 From 223657cfc87c3d5b9c2172014181c8ed7acf58e8 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Tue, 20 May 2025 15:28:26 +0200 Subject: pinctrl: remove extern specifier for functions in machine.h Extern is the default specifier for a function, no need to define it. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/20250520-aaeon-up-board-pinctrl-support-v6-2-dcb3756be3c6@bootlin.com Signed-off-by: Linus Walleij --- include/linux/pinctrl/machine.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h index 673e96df453b..0940fabb154d 100644 --- a/include/linux/pinctrl/machine.h +++ b/include/linux/pinctrl/machine.h @@ -153,10 +153,10 @@ struct pinctrl_map; #ifdef CONFIG_PINCTRL -extern int pinctrl_register_mappings(const struct pinctrl_map *map, - unsigned int num_maps); -extern void pinctrl_unregister_mappings(const struct pinctrl_map *map); -extern void pinctrl_provide_dummies(void); +int pinctrl_register_mappings(const struct pinctrl_map *map, + unsigned int num_maps); +void pinctrl_unregister_mappings(const struct pinctrl_map *map); +void pinctrl_provide_dummies(void); #else static inline int pinctrl_register_mappings(const struct pinctrl_map *map, -- cgit v1.2.3 From 2e9ba1d9a31f68b4deb5f9e62a9201a51b00abf7 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Tue, 20 May 2025 15:28:27 +0200 Subject: pinctrl: core: add devm_pinctrl_register_mappings() Using devm_pinctrl_register_mappings(), the core can automatically unregister pinctrl mappings. Reviewed-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/20250520-aaeon-up-board-pinctrl-support-v6-3-dcb3756be3c6@bootlin.com Signed-off-by: Linus Walleij --- include/linux/pinctrl/machine.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h index 0940fabb154d..25620229b1d6 100644 --- a/include/linux/pinctrl/machine.h +++ b/include/linux/pinctrl/machine.h @@ -149,12 +149,16 @@ struct pinctrl_map { #define PIN_MAP_CONFIGS_GROUP_HOG_DEFAULT(dev, grp, cfgs) \ PIN_MAP_CONFIGS_GROUP(dev, PINCTRL_STATE_DEFAULT, dev, grp, cfgs) +struct device; struct pinctrl_map; #ifdef CONFIG_PINCTRL int pinctrl_register_mappings(const struct pinctrl_map *map, unsigned int num_maps); +int devm_pinctrl_register_mappings(struct device *dev, + const struct pinctrl_map *map, + unsigned int num_maps); void pinctrl_unregister_mappings(const struct pinctrl_map *map); void pinctrl_provide_dummies(void); #else @@ -165,6 +169,13 @@ static inline int pinctrl_register_mappings(const struct pinctrl_map *map, return 0; } +static inline int devm_pinctrl_register_mappings(struct device *dev, + const struct pinctrl_map *map, + unsigned int num_maps) +{ + return 0; +} + static inline void pinctrl_unregister_mappings(const struct pinctrl_map *map) { } -- cgit v1.2.3 From 70892277ca2dbad30ce89acf62fb62045d4bc59b Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 20 May 2025 15:08:56 -0400 Subject: iommu/arm-smmu-qcom: Make set_stall work when the device is on Up until now we have only called the set_stall callback during initialization when the device is off. But we will soon start calling it to temporarily disable stall-on-fault when the device is on, so handle that by checking if the device is on and writing SCTLR. Signed-off-by: Connor Abbott Reviewed-by: Rob Clark Link: https://lore.kernel.org/r/20250520-msm-gpu-fault-fixes-next-v8-3-fce6ee218787@gmail.com [will: Fix "mixed declarations and code" warning from sparse] Signed-off-by: Will Deacon --- include/linux/adreno-smmu-priv.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h index abec23c7744f..d83c9175828f 100644 --- a/include/linux/adreno-smmu-priv.h +++ b/include/linux/adreno-smmu-priv.h @@ -45,9 +45,9 @@ struct adreno_smmu_fault_info { * TTBR0 translation is enabled with the specified cfg * @get_fault_info: Called by the GPU fault handler to get information about * the fault - * @set_stall: Configure whether stall on fault (CFCFG) is enabled. Call - * before set_ttbr0_cfg(). If stalling on fault is enabled, - * the GPU driver must call resume_translation() + * @set_stall: Configure whether stall on fault (CFCFG) is enabled. If + * stalling on fault is enabled, the GPU driver must call + * resume_translation() * @resume_translation: Resume translation after a fault * * @set_prr_bit: [optional] Configure the GPU's Partially Resident -- cgit v1.2.3 From fe26933cf1e151ce0a5d858c263e8dacb2f12cee Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 7 May 2025 10:13:21 -0400 Subject: vt: add ucs_get_fallback() This is the code querying the newly introduced tables. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250507141535.40655-7-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/consolemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 8167494229db..6180b803795c 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -31,6 +31,7 @@ void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); u32 ucs_recompose(u32 base, u32 mark); +u32 ucs_get_fallback(u32 cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -75,6 +76,11 @@ static inline u32 ucs_recompose(u32 base, u32 mark) { return 0; } + +static inline u32 ucs_get_fallback(u32 cp) +{ + return 0; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ -- cgit v1.2.3 From 80fa7a03378588582eb40f89b6f418c0c256cf24 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 20 May 2025 13:16:43 -0400 Subject: vt: bracketed paste support This is comprised of 3 aspects: - Take note of when applications advertise bracketed paste support via "\e[?2004h" and "\e[?2004l". - Insert bracketed paste markers ("\e[200~" and "\e[201~") around pasted content in paste_selection() when bracketed paste is active. - Add TIOCL_GETBRACKETEDPASTE to return bracketed paste status so user space daemons implementing cut-and-paste functionality (e.g. gpm, BRLTTY) may know when to insert bracketed paste markers. Link: https://en.wikipedia.org/wiki/Bracketed-paste Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250520171851.1219676-2-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/linux/console_struct.h | 1 + include/uapi/linux/tiocl.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 20f564e98552..59b4fec5f254 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -145,6 +145,7 @@ struct vc_data { unsigned int vc_need_wrap : 1; unsigned int vc_can_do_color : 1; unsigned int vc_report_mouse : 2; + unsigned int vc_bracketed_paste : 1; unsigned char vc_utf : 1; /* Unicode UTF-8 encoding */ unsigned char vc_utf_count; int vc_utf_char; diff --git a/include/uapi/linux/tiocl.h b/include/uapi/linux/tiocl.h index b32acc229024..88faba506c3d 100644 --- a/include/uapi/linux/tiocl.h +++ b/include/uapi/linux/tiocl.h @@ -36,5 +36,6 @@ struct tiocl_selection { #define TIOCL_BLANKSCREEN 14 /* keep screen blank even if a key is pressed */ #define TIOCL_BLANKEDSCREEN 15 /* return which vt was blanked */ #define TIOCL_GETKMSGREDIRECT 17 /* get the vt the kernel messages are restricted to */ +#define TIOCL_GETBRACKETEDPASTE 18 /* get whether paste may be bracketed */ #endif /* _LINUX_TIOCL_H */ -- cgit v1.2.3 From 81cf4d7d2379df853a0cbb8486286783c7380ac3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 20 May 2025 13:16:44 -0400 Subject: vt: add VT_GETCONSIZECSRPOS to retrieve console size and cursor position The console dimension and cursor position are available through the /dev/vcsa interface already. However the /dev/vcsa header format uses single-byte fields therefore those values are clamped to 255. As surprizing as this may seem, some people do use 240-column 67-row screens (a 1920x1080 monitor with 8x16 pixel fonts) which is getting close to the limit. Monitors with higher resolution are not uncommon these days (3840x2160 producing a 480x135 character display) and it is just a matter of time before someone with, say, a braille display using the Linux VT console and BRLTTY on such a screen reports a bug about missing and oddly misaligned screen content. Let's add VT_GETCONSIZECSRPOS for the retrieval of console size and cursor position without byte-sized limitations. The actual console size limit as encoded in vt.c is 32767x32767 so using a short here is appropriate. Then this can be used to get the cursor position when /dev/vcsa reports 255. The screen dimension may already be obtained using TIOCGWINSZ and adding the same information to VT_GETCONSIZECSRPOS might be redundant. However applications that care about cursor position also care about display size and having 2 separate system calls to obtain them separately is wasteful. Also, the cursor position can be queried by writing "\e[6n" to a tty and reading back the result but that may be done only by the actual application using that tty and not a sideline observer. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250520171851.1219676-3-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/vt.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h index e9d39c48520a..e5b0c492aa18 100644 --- a/include/uapi/linux/vt.h +++ b/include/uapi/linux/vt.h @@ -2,6 +2,8 @@ #ifndef _UAPI_LINUX_VT_H #define _UAPI_LINUX_VT_H +#include +#include /* * These constants are also useful for user-level apps (e.g., VC @@ -84,4 +86,13 @@ struct vt_setactivate { #define VT_SETACTIVATE 0x560F /* Activate and set the mode of a console */ +/* get console size and cursor position */ +struct vt_consizecsrpos { + __u16 con_rows; /* number of console rows */ + __u16 con_cols; /* number of console columns */ + __u16 csr_row; /* current cursor's row */ + __u16 csr_col; /* current cursor's column */ +}; +#define VT_GETCONSIZECSRPOS _IOR('V', 0x10, struct vt_consizecsrpos) + #endif /* _UAPI_LINUX_VT_H */ -- cgit v1.2.3 From 279f2c2c8e2169403d01190f042efa6e41731578 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 17 May 2025 17:14:53 +0200 Subject: futex: Use RCU_INIT_POINTER() in futex_mm_init(). There is no need for an explicit NULL pointer initialisation plus a comment why it is okay. RCU_INIT_POINTER() can be used for NULL initialisations and it is documented. This has been build tested with gcc version 9.3.0 (Debian 9.3.0-22) on a x86-64 defconfig. Fixes: 094ac8cff7858 ("futex: Relax the rcu_assign_pointer() assignment of mm->futex_phash in futex_mm_init()") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250517151455.1065363-4-bigeasy@linutronix.de --- include/linux/futex.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 168ffd5996b4..005b040c4791 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -88,14 +88,7 @@ void futex_hash_free(struct mm_struct *mm); static inline void futex_mm_init(struct mm_struct *mm) { - /* - * No need for rcu_assign_pointer() here, as we can rely on - * tasklist_lock write-ordering in copy_process(), before - * the task's MM becomes visible and the ->futex_phash - * becomes externally observable: - */ - mm->futex_phash = NULL; - + RCU_INIT_POINTER(mm->futex_phash, NULL); mutex_init(&mm->futex_hash_lock); } -- cgit v1.2.3 From a9194f88782afa1386641451a6c76beaa60485a0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 16 May 2025 13:25:31 +0200 Subject: coredump: add coredump socket Coredumping currently supports two modes: (1) Dumping directly into a file somewhere on the filesystem. (2) Dumping into a pipe connected to a usermode helper process spawned as a child of the system_unbound_wq or kthreadd. For simplicity I'm mostly ignoring (1). There's probably still some users of (1) out there but processing coredumps in this way can be considered adventurous especially in the face of set*id binaries. The most common option should be (2) by now. It works by allowing userspace to put a string into /proc/sys/kernel/core_pattern like: |/usr/lib/systemd/systemd-coredump %P %u %g %s %t %c %h The "|" at the beginning indicates to the kernel that a pipe must be used. The path following the pipe indicator is a path to a binary that will be spawned as a usermode helper process. Any additional parameters pass information about the task that is generating the coredump to the binary that processes the coredump. In the example core_pattern shown above systemd-coredump is spawned as a usermode helper. There's various conceptual consequences of this (non-exhaustive list): - systemd-coredump is spawned with file descriptor number 0 (stdin) connected to the read-end of the pipe. All other file descriptors are closed. That specifically includes 1 (stdout) and 2 (stderr). This has already caused bugs because userspace assumed that this cannot happen (Whether or not this is a sane assumption is irrelevant.). - systemd-coredump will be spawned as a child of system_unbound_wq. So it is not a child of any userspace process and specifically not a child of PID 1. It cannot be waited upon and is in a weird hybrid upcall which are difficult for userspace to control correctly. - systemd-coredump is spawned with full kernel privileges. This necessitates all kinds of weird privilege dropping excercises in userspace to make this safe. - A new usermode helper has to be spawned for each crashing process. This series adds a new mode: (3) Dumping into an AF_UNIX socket. Userspace can set /proc/sys/kernel/core_pattern to: @/path/to/coredump.socket The "@" at the beginning indicates to the kernel that an AF_UNIX coredump socket will be used to process coredumps. The coredump socket must be located in the initial mount namespace. When a task coredumps it opens a client socket in the initial network namespace and connects to the coredump socket. - The coredump server uses SO_PEERPIDFD to get a stable handle on the connected crashing task. The retrieved pidfd will provide a stable reference even if the crashing task gets SIGKILLed while generating the coredump. - By setting core_pipe_limit non-zero userspace can guarantee that the crashing task cannot be reaped behind it's back and thus process all necessary information in /proc/. The SO_PEERPIDFD can be used to detect whether /proc/ still refers to the same process. The core_pipe_limit isn't used to rate-limit connections to the socket. This can simply be done via AF_UNIX sockets directly. - The pidfd for the crashing task will grow new information how the task coredumps. - The coredump server should mark itself as non-dumpable. - A container coredump server in a separate network namespace can simply bind to another well-know address and systemd-coredump fowards coredumps to the container. - Coredumps could in the future also be handled via per-user/session coredump servers that run only with that users privileges. The coredump server listens on the coredump socket and accepts a new coredump connection. It then retrieves SO_PEERPIDFD for the client, inspects uid/gid and hands the accepted client to the users own coredump handler which runs with the users privileges only (It must of coure pay close attention to not forward crashing suid binaries.). The new coredump socket will allow userspace to not have to rely on usermode helpers for processing coredumps and provides a safer way to handle them instead of relying on super privileged coredumping helpers that have and continue to cause significant CVEs. This will also be significantly more lightweight since no fork()+exec() for the usermodehelper is required for each crashing process. The coredump server in userspace can e.g., just keep a worker pool. Link: https://lore.kernel.org/20250516-work-coredump-socket-v8-4-664f3caf2516@kernel.org Acked-by: Luca Boccassi Reviewed-by: Kuniyuki Iwashima Reviewed-by: Alexander Mikhalitsyn Reviewed-by: Jann Horn Signed-off-by: Christian Brauner --- include/linux/net.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 0ff950eecc6b..139c85d0f2ea 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -81,6 +81,7 @@ enum sock_type { #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif +#define SOCK_COREDUMP O_NOCTTY #endif /* ARCH_HAS_SOCKET_TYPES */ -- cgit v1.2.3 From 1d8db6fd698de1f73b1a7d72aea578fdd18d9a87 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 16 May 2025 13:25:32 +0200 Subject: pidfs, coredump: add PIDFD_INFO_COREDUMP Extend the PIDFD_INFO_COREDUMP ioctl() with the new PIDFD_INFO_COREDUMP mask flag. This adds the @coredump_mask field to struct pidfd_info. When a task coredumps the kernel will provide the following information to userspace in @coredump_mask: * PIDFD_COREDUMPED is raised if the task did actually coredump. * PIDFD_COREDUMP_SKIP is raised if the task skipped coredumping (e.g., undumpable). * PIDFD_COREDUMP_USER is raised if this is a regular coredump and doesn't need special care by the coredump server. * PIDFD_COREDUMP_ROOT is raised if the generated coredump should be treated as sensitive and the coredump server should restrict to the generated coredump to sufficiently privileged users. The kernel guarantees that by the time the connection is made the all PIDFD_INFO_COREDUMP info is available. Link: https://lore.kernel.org/20250516-work-coredump-socket-v8-5-664f3caf2516@kernel.org Acked-by: Luca Boccassi Reviewed-by: Alexander Mikhalitsyn Reviewed-by: Jann Horn Signed-off-by: Christian Brauner --- include/linux/pidfs.h | 5 +++++ include/uapi/linux/pidfd.h | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 2676890c4d0d..77e7db194914 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -2,11 +2,16 @@ #ifndef _LINUX_PID_FS_H #define _LINUX_PID_FS_H +struct coredump_params; + struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); void __init pidfs_init(void); void pidfs_add_pid(struct pid *pid); void pidfs_remove_pid(struct pid *pid); void pidfs_exit(struct task_struct *tsk); +#ifdef CONFIG_COREDUMP +void pidfs_coredump(const struct coredump_params *cprm); +#endif extern const struct dentry_operations pidfs_dentry_operations; int pidfs_register_pid(struct pid *pid); void pidfs_get_pid(struct pid *pid); diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 8c1511edd0e9..c27a4e238e4b 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -25,9 +25,23 @@ #define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */ #define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */ #define PIDFD_INFO_EXIT (1UL << 3) /* Only returned if requested. */ +#define PIDFD_INFO_COREDUMP (1UL << 4) /* Only returned if requested. */ #define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */ +/* + * Values for @coredump_mask in pidfd_info. + * Only valid if PIDFD_INFO_COREDUMP is set in @mask. + * + * Note, the @PIDFD_COREDUMP_ROOT flag indicates that the generated + * coredump should be treated as sensitive and access should only be + * granted to privileged users. + */ +#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */ +#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */ +#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */ +#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */ + /* * The concept of process and threads in userland and the kernel is a confusing * one - within the kernel every thread is a 'task' with its own individual PID, @@ -92,6 +106,8 @@ struct pidfd_info { __u32 fsuid; __u32 fsgid; __s32 exit_code; + __u32 coredump_mask; + __u32 __spare1; }; #define PIDFS_IOCTL_MAGIC 0xFF -- cgit v1.2.3 From 01465f296a6871222b185c350423978d44431c96 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 9 May 2025 13:24:50 +0100 Subject: nvmem: Remove unused nvmem cell table support Board files are deprecated by DT, and the last user of nvmem_add_cell_table() was removed by commit 2af4fcc0d3574482 ("ARM: davinci: remove unused board support") in v6.3. Hence remove all support for nvmem cell tables, and update the documentation. Device drivers can still register a single cell using nvmem_add_one_cell() (which was not documented before). Signed-off-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250509122452.11827-2-srini@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 515676ebe598..615a560d9edb 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -137,25 +137,6 @@ struct nvmem_config { struct device *base_dev; }; -/** - * struct nvmem_cell_table - NVMEM cell definitions for given provider - * - * @nvmem_name: Provider name. - * @cells: Array of cell definitions. - * @ncells: Number of cell definitions in the array. - * @node: List node. - * - * This structure together with related helper functions is provided for users - * that don't can't access the nvmem provided structure but wish to register - * cell definitions for it e.g. board files registering an EEPROM device. - */ -struct nvmem_cell_table { - const char *nvmem_name; - const struct nvmem_cell_info *cells; - size_t ncells; - struct list_head node; -}; - /** * struct nvmem_layout - NVMEM layout definitions * @@ -190,9 +171,6 @@ void nvmem_unregister(struct nvmem_device *nvmem); struct nvmem_device *devm_nvmem_register(struct device *dev, const struct nvmem_config *cfg); -void nvmem_add_cell_table(struct nvmem_cell_table *table); -void nvmem_del_cell_table(struct nvmem_cell_table *table); - int nvmem_add_one_cell(struct nvmem_device *nvmem, const struct nvmem_cell_info *info); @@ -223,8 +201,6 @@ devm_nvmem_register(struct device *dev, const struct nvmem_config *c) return nvmem_register(c); } -static inline void nvmem_add_cell_table(struct nvmem_cell_table *table) {} -static inline void nvmem_del_cell_table(struct nvmem_cell_table *table) {} static inline int nvmem_add_one_cell(struct nvmem_device *nvmem, const struct nvmem_cell_info *info) { -- cgit v1.2.3 From b803c4a4f78834b31ebfbbcea350473333760559 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Fri, 2 May 2025 02:12:10 +0900 Subject: can: dev: add struct data_bittiming_params to group FD parameters This is a preparation patch for the introduction of CAN XL. CAN FD and CAN XL uses similar bittiming parameters. Add one level of nesting for all the CAN FD parameters. Typically: priv->can.data_bittiming; becomes: priv->can.fd.data_bittiming; This way, the CAN XL equivalent (to be introduced later) would be: priv->can.xl.data_bittiming; Add the new struct data_bittiming_params which contains all the data bittiming parameters, including the TDC and the callback functions. This done, update all the CAN FD drivers to make use of the new layout. Acked-by: Oliver Hartkopp Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20250501171213.2161572-2-mailhol.vincent@wanadoo.fr [mkl: fix rcar_canfd] Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 23492213ea35..492d23bec7be 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -38,6 +38,17 @@ enum can_termination_gpio { CAN_TERMINATION_GPIO_MAX, }; +struct data_bittiming_params { + const struct can_bittiming_const *data_bittiming_const; + struct can_bittiming data_bittiming; + const struct can_tdc_const *tdc_const; + struct can_tdc tdc; + const u32 *data_bitrate_const; + unsigned int data_bitrate_const_cnt; + int (*do_set_data_bittiming)(struct net_device *dev); + int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv); +}; + /* * CAN common private data */ @@ -45,16 +56,11 @@ struct can_priv { struct net_device *dev; struct can_device_stats can_stats; - const struct can_bittiming_const *bittiming_const, - *data_bittiming_const; - struct can_bittiming bittiming, data_bittiming; - const struct can_tdc_const *tdc_const; - struct can_tdc tdc; - + const struct can_bittiming_const *bittiming_const; + struct can_bittiming bittiming; + struct data_bittiming_params fd; unsigned int bitrate_const_cnt; const u32 *bitrate_const; - const u32 *data_bitrate_const; - unsigned int data_bitrate_const_cnt; u32 bitrate_max; struct can_clock clock; @@ -77,14 +83,12 @@ struct can_priv { struct delayed_work restart_work; int (*do_set_bittiming)(struct net_device *dev); - int (*do_set_data_bittiming)(struct net_device *dev); int (*do_set_mode)(struct net_device *dev, enum can_mode mode); int (*do_set_termination)(struct net_device *dev, u16 term); int (*do_get_state)(const struct net_device *dev, enum can_state *state); int (*do_get_berr_counter)(const struct net_device *dev, struct can_berr_counter *bec); - int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv); }; static inline bool can_tdc_is_enabled(const struct can_priv *priv) @@ -114,11 +118,11 @@ static inline bool can_tdc_is_enabled(const struct can_priv *priv) */ static inline s32 can_get_relative_tdco(const struct can_priv *priv) { - const struct can_bittiming *dbt = &priv->data_bittiming; + const struct can_bittiming *dbt = &priv->fd.data_bittiming; s32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg + dbt->phase_seg1) * dbt->brp; - return (s32)priv->tdc.tdco - sample_point_in_tc; + return (s32)priv->fd.tdc.tdco - sample_point_in_tc; } /* helper to define static CAN controller features at device creation time */ -- cgit v1.2.3 From c1a606cd75fbe98d261b224b6dfb76d47f40dc12 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:47:58 +0100 Subject: fs/netfs: remove unused flag NETFS_SREQ_SEEK_DATA_READ This flag was added by commit 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers") but its only user was removed by commit 86b374d061ee ("netfs: Remove fs/netfs/io.c"). Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-3-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c86a11cfc4a3..d315d86d0ad4 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -191,7 +191,6 @@ struct netfs_io_subrequest { unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ -#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ #define NETFS_SREQ_MADE_PROGRESS 4 /* Set if we transferred at least some data */ #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ -- cgit v1.2.3 From 9cd78ca04fb827f42d7c0d492b96fbb940451266 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:47:59 +0100 Subject: fs/netfs: remove unused source NETFS_INVALID_WRITE This enum choice was added by commit 16af134ca4b7 ("netfs: Extend the netfs_io_*request structs to handle writes") and its only user was later removed by commit c245868524cc ("netfs: Remove the old writeback code"). Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-4-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 - include/trace/events/netfs.h | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index d315d86d0ad4..5a76bea51d24 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -48,7 +48,6 @@ enum netfs_io_source { NETFS_INVALID_READ, NETFS_UPLOAD_TO_SERVER, NETFS_WRITE_TO_CACHE, - NETFS_INVALID_WRITE, } __mode(byte); typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index f880835f7695..59ecae3ad0fb 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -77,8 +77,7 @@ EM(NETFS_READ_FROM_CACHE, "READ") \ EM(NETFS_INVALID_READ, "INVL") \ EM(NETFS_UPLOAD_TO_SERVER, "UPLD") \ - EM(NETFS_WRITE_TO_CACHE, "WRIT") \ - E_(NETFS_INVALID_WRITE, "INVL") + E_(NETFS_WRITE_TO_CACHE, "WRIT") #define netfs_sreq_traces \ EM(netfs_sreq_trace_add_donations, "+DON ") \ -- cgit v1.2.3 From 9fcf235e91fae9f3e99f4e09d332ed09296b11ec Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:00 +0100 Subject: fs/netfs: remove unused flag NETFS_ICTX_WRITETHROUGH This flag was added by commit 41d8e7673a77 ("netfs: Implement a write-through caching option") but it was never used. Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-5-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5a76bea51d24..242daec8c837 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -70,7 +70,6 @@ struct netfs_inode { unsigned long flags; #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ -#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ #define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */ #define NETFS_ICTX_SINGLE_NO_UPLOAD 4 /* Monolithic payload, cache but no upload */ }; -- cgit v1.2.3 From d46a7b217d6abbaea78ce5abf4b295e3c1d819d9 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:01 +0100 Subject: fs/netfs: remove unused enum choice NETFS_READ_HOLE_CLEAR This choice was added by commit 3a11b3a86366 ("netfs: Pass more information on how to deal with a hole in the cache") but the last user was removed by commit 86b374d061ee ("netfs: Remove fs/netfs/io.c"). Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-6-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/fscache.h | 3 --- include/linux/netfs.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9de27643607f..fea0d9779b55 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -498,9 +498,6 @@ static inline void fscache_end_operation(struct netfs_cache_resources *cres) * * NETFS_READ_HOLE_IGNORE - Just try to read (may return a short read). * - * NETFS_READ_HOLE_CLEAR - Seek for data, clearing the part of the buffer - * skipped over, then do as for IGNORE. - * * NETFS_READ_HOLE_FAIL - Give ENODATA if we encounter a hole. */ static inline diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 242daec8c837..73537dafa224 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -318,7 +318,6 @@ struct netfs_request_ops { */ enum netfs_read_from_hole { NETFS_READ_HOLE_IGNORE, - NETFS_READ_HOLE_CLEAR, NETFS_READ_HOLE_FAIL, }; -- cgit v1.2.3 From 314ee7035febc86f4f9452fb90a6c2165a183fe5 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:02 +0100 Subject: fs/netfs: reorder struct fields to eliminate holes This shrinks `struct netfs_io_stream` from 104 to 96 bytes and `struct netfs_io_request` from 600 to 576 bytes. [DH: Modified as the patch to turn netfs_io_request::error into a short was removed from the set] Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-7-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 73537dafa224..33f145f7f2c2 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -144,8 +144,8 @@ struct netfs_io_stream { struct netfs_io_subrequest *front; /* Op being collected */ unsigned long long collected_to; /* Position we've collected results to */ size_t transferred; /* The amount transferred from this stream */ - enum netfs_io_source source; /* Where to read from/write to */ unsigned short error; /* Aggregate error for the stream */ + enum netfs_io_source source; /* Where to read from/write to */ unsigned char stream_nr; /* Index of stream in parent table */ bool avail; /* T if stream is available */ bool active; /* T if stream is active */ @@ -240,19 +240,10 @@ struct netfs_io_request { void *netfs_priv; /* Private data for the netfs */ void *netfs_priv2; /* Private data for the netfs */ struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ - unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ - unsigned int debug_id; - unsigned int rsize; /* Maximum read size (0 for none) */ - unsigned int wsize; /* Maximum write size (0 for none) */ - atomic_t subreq_counter; /* Next subreq->debug_index */ - unsigned int nr_group_rel; /* Number of refs to release on ->group */ - spinlock_t lock; /* Lock for queuing subreqs */ unsigned long long submitted; /* Amount submitted for I/O so far */ unsigned long long len; /* Length of the request */ size_t transferred; /* Amount to be indicated as transferred */ long error; /* 0 or error that occurred */ - enum netfs_io_origin origin; /* Origin of the request */ - bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ unsigned long long i_size; /* Size of the file */ unsigned long long start; /* Start position */ atomic64_t issued_to; /* Write issuer folio cursor */ @@ -260,7 +251,16 @@ struct netfs_io_request { unsigned long long cleaned_to; /* Position we've cleaned folios to */ unsigned long long abandon_to; /* Position to abandon folios to */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ + unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ + unsigned int debug_id; + unsigned int rsize; /* Maximum read size (0 for none) */ + unsigned int wsize; /* Maximum write size (0 for none) */ + atomic_t subreq_counter; /* Next subreq->debug_index */ + unsigned int nr_group_rel; /* Number of refs to release on ->group */ + spinlock_t lock; /* Lock for queuing subreqs */ unsigned char front_folio_order; /* Order (size) of front folio */ + enum netfs_io_origin origin; /* Origin of the request */ + bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ refcount_t ref; unsigned long flags; #define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ -- cgit v1.2.3 From 3dc00bca8dc8226f79f6958293a2777f0691cfb5 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:03 +0100 Subject: fs/netfs: remove `netfs_io_request.ractl` Since this field is only used by netfs_prepare_read_iterator() when called by netfs_readahead(), we can simply pass it as parameter. This shrinks the struct from 576 to 568 bytes. Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-8-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 33f145f7f2c2..2b127527544e 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -228,7 +228,6 @@ struct netfs_io_request { struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; struct netfs_io_request *copy_to_cache; /* Request to write just-read data to the cache */ - struct readahead_control *ractl; /* Readahead descriptor */ struct list_head proc_link; /* Link in netfs_iorequests */ struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ #define NR_IO_STREAMS 2 //wreq->nr_io_streams -- cgit v1.2.3 From 07c08bac9302012d9a91d40e95c8f98800f7d787 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:04 +0100 Subject: fs/netfs: declare field `proc_link` only if CONFIG_PROC_FS=y This field is only used for the "proc" filesystem. Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-9-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 2b127527544e..3f7056d837f8 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -228,7 +228,9 @@ struct netfs_io_request { struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; struct netfs_io_request *copy_to_cache; /* Request to write just-read data to the cache */ +#ifdef CONFIG_PROC_FS struct list_head proc_link; /* Link in netfs_iorequests */ +#endif struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ #define NR_IO_STREAMS 2 //wreq->nr_io_streams struct netfs_group *group; /* Writeback group being written back */ -- cgit v1.2.3 From 6bb09e5db3a07cf3e03f25f725bd8edc959e38ba Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:05 +0100 Subject: folio_queue: remove unused field `marks3` The last user was removed by commit e2d46f2ec332 ("netfs: Change the read result collector to only use one work item"). Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-10-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/folio_queue.h | 42 ------------------------------------------ 1 file changed, 42 deletions(-) (limited to 'include') diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index 45ad2408a80c..adab609c972e 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -34,7 +34,6 @@ struct folio_queue { struct folio_queue *prev; /* Previous queue segment of NULL */ unsigned long marks; /* 1-bit mark per folio */ unsigned long marks2; /* Second 1-bit mark per folio */ - unsigned long marks3; /* Third 1-bit mark per folio */ #if PAGEVEC_SIZE > BITS_PER_LONG #error marks is not big enough #endif @@ -58,7 +57,6 @@ static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id) folioq->prev = NULL; folioq->marks = 0; folioq->marks2 = 0; - folioq->marks3 = 0; folioq->rreq_id = rreq_id; folioq->debug_id = 0; } @@ -178,45 +176,6 @@ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) clear_bit(slot, &folioq->marks2); } -/** - * folioq_is_marked3: Check third folio mark in a folio queue segment - * @folioq: The segment to query - * @slot: The slot number of the folio to query - * - * Determine if the third mark is set for the folio in the specified slot in a - * folio queue segment. - */ -static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot) -{ - return test_bit(slot, &folioq->marks3); -} - -/** - * folioq_mark3: Set the third mark on a folio in a folio queue segment - * @folioq: The segment to modify - * @slot: The slot number of the folio to modify - * - * Set the third mark for the folio in the specified slot in a folio queue - * segment. - */ -static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot) -{ - set_bit(slot, &folioq->marks3); -} - -/** - * folioq_unmark3: Clear the third mark on a folio in a folio queue segment - * @folioq: The segment to modify - * @slot: The slot number of the folio to modify - * - * Clear the third mark for the folio in the specified slot in a folio queue - * segment. - */ -static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) -{ - clear_bit(slot, &folioq->marks3); -} - /** * folioq_append: Add a folio to a folio queue segment * @folioq: The segment to add to @@ -318,7 +277,6 @@ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) folioq->vec.folios[slot] = NULL; folioq_unmark(folioq, slot); folioq_unmark2(folioq, slot); - folioq_unmark3(folioq, slot); } #endif /* _LINUX_FOLIO_QUEUE_H */ -- cgit v1.2.3 From 67b916719a15c33fd18d6e8298828caeef428d00 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:06 +0100 Subject: fs/netfs: remove unused flag NETFS_RREQ_DONT_UNLOCK_FOLIOS NETFS_RREQ_DONT_UNLOCK_FOLIOS has never been used ever since it was added by commit 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers"). Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-11-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 3f7056d837f8..5f60d8e3a7ef 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -266,7 +266,6 @@ struct netfs_io_request { unsigned long flags; #define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ -#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ #define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ -- cgit v1.2.3 From 4b1ca12dd3f2529dc788cf4f18259ed62006ccb8 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:07 +0100 Subject: fs/netfs: remove unused flag NETFS_RREQ_BLOCKED NETFS_RREQ_BLOCKED was added by commit 016dc8516aec ("netfs: Implement unbuffered/DIO read support") but has never been used either. Without NETFS_RREQ_BLOCKED, NETFS_RREQ_NONBLOCK makes no sense, and thus can be removed as well. Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-12-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5f60d8e3a7ef..cf634c28522d 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -270,8 +270,6 @@ struct netfs_io_request { #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ #define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ -#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ -#define NETFS_RREQ_BLOCKED 10 /* We blocked */ #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ -- cgit v1.2.3 From 34eb98c6598c4057640ca56dd1fad6555187473a Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 19 May 2025 10:07:02 +0100 Subject: netfs: Fix setting of transferred bytes with short DIO reads A netfslib request comprises an ordered stream of subrequests that, when doing an unbuffered/DIO read, are contiguous. The subrequests may be performed in parallel, but may not be fully completed. For instance, if we try and make a 256KiB DIO read from a 3-byte file with a 64KiB rsize and 256KiB bsize, netfslib will attempt to make a read of 256KiB, broken up into four 64KiB subreads, with the expectation that the first will be short and the subsequent three be completely devoid - but we do all four on the basis that the file may have been changed by a third party. The read-collection code, however, walks through all the subreqs and advances the notion of how much data has been read in the stream to the start of each subreq plus its amount transferred (which are 3, 0, 0, 0 for the example above) - which gives an amount apparently read of 3*64KiB - which is incorrect. Fix the collection code to cut short the calculation of the transferred amount with the first short subrequest in an unbuffered read; everything beyond that must be ignored as there's a hole that cannot be filled. This applies both to shortness due to hitting the EOF and shortness due to an error. This is achieved by setting a flag on the request when we collect the first short subrequest (collection is done in ascending order). This can be tested by mounting a cifs volume with rsize=65536,bsize=262144 and doing a 256k DIO read of a very small file (e.g. 3 bytes). read() should return 3, not >3. This problem came in when netfs_read_collection() set rreq->transferred to stream->transferred, even for DIO. Prior to that, netfs_rreq_assess_dio() just went over the list and added up the subreqs till it met a short one - but now the subreqs are discarded earlier. Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Reported-by: Nicolas Baranger Closes: https://lore.kernel.org/all/10bec2430ed4df68bde10ed95295d093@3xo.fr/ Signed-off-by: "Paulo Alcantara (Red Hat)" Signed-off-by: David Howells Link: https://lore.kernel.org/20250519090707.2848510-3-dhowells@redhat.com cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c86a11cfc4a3..497c4f4698f6 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -279,6 +279,7 @@ struct netfs_io_request { #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ #define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; -- cgit v1.2.3 From 20d72b00ca814d748f5663484e5c53bb2bf37a3a Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 May 2025 10:07:03 +0100 Subject: netfs: Fix the request's work item to not require a ref When the netfs_io_request struct's work item is queued, it must be supplied with a ref to the work item struct to prevent it being deallocated whilst on the queue or whilst it is being processed. This is tricky to manage as we have to get a ref before we try and queue it and then we may find it's already queued and is thus already holding a ref - in which case we have to try and get rid of the ref again. The problem comes if we're in BH or IRQ context and need to drop the ref: if netfs_put_request() reduces the count to 0, we have to do the cleanup - but the cleanup may need to wait. Fix this by adding a new work item to the request, ->cleanup_work, and dispatching that when the refcount hits zero. That can then synchronously cancel any outstanding work on the main work item before doing the cleanup. Adding a new work item also deals with another problem upstream where it's sometimes changing the work func in the put function and requeuing it - which has occasionally in the past caused the cleanup to happen incorrectly. As a bonus, this allows us to get rid of the 'was_async' parameter from a bunch of functions. This indicated whether the put function might not be permitted to sleep. Fixes: 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers") Signed-off-by: David Howells Link: https://lore.kernel.org/20250519090707.2848510-4-dhowells@redhat.com cc: Paulo Alcantara cc: Marc Dionne cc: Steve French cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/fscache.h | 2 +- include/linux/netfs.h | 13 ++++++------- include/trace/events/netfs.h | 7 +++---- 3 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9de27643607f..266e6c9e6f83 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -628,7 +628,7 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, term_func, term_func_priv, using_pgpriv2, caching); else if (term_func) - term_func(term_func_priv, -ENOBUFS, false); + term_func(term_func_priv, -ENOBUFS); } diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 497c4f4698f6..c3f230732f51 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -51,8 +51,7 @@ enum netfs_io_source { NETFS_INVALID_WRITE, } __mode(byte); -typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, - bool was_async); +typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error); /* * Per-inode context. This wraps the VFS inode. @@ -223,9 +222,10 @@ enum netfs_io_origin { */ struct netfs_io_request { union { - struct work_struct work; + struct work_struct cleanup_work; /* Deferred cleanup work */ struct rcu_head rcu; }; + struct work_struct work; /* Result collector work */ struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ struct kiocb *iocb; /* AIO completion vector */ @@ -270,7 +270,7 @@ struct netfs_io_request { #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ +#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */ #define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ @@ -440,15 +440,14 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, - bool was_async, enum netfs_sreq_ref_trace what); + enum netfs_sreq_ref_trace what); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs); void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); -void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, - bool was_async); +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error); void netfs_queue_write_request(struct netfs_io_subrequest *subreq); int netfs_start_io_read(struct inode *inode); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index f880835f7695..402c5e82e7b8 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -30,6 +30,7 @@ EM(netfs_write_trace_dio_write, "DIO-WRITE") \ EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \ EM(netfs_write_trace_writeback, "WRITEBACK") \ + EM(netfs_write_trace_writeback_single, "WB-SINGLE") \ E_(netfs_write_trace_writethrough, "WRITETHRU") #define netfs_rreq_origins \ @@ -128,17 +129,15 @@ #define netfs_rreq_ref_traces \ EM(netfs_rreq_trace_get_for_outstanding,"GET OUTSTND") \ EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \ - EM(netfs_rreq_trace_get_work, "GET WORK ") \ EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \ EM(netfs_rreq_trace_put_discard, "PUT DISCARD") \ EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \ EM(netfs_rreq_trace_put_no_submit, "PUT NO-SUBM") \ EM(netfs_rreq_trace_put_return, "PUT RETURN ") \ EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \ - EM(netfs_rreq_trace_put_work, "PUT WORK ") \ - EM(netfs_rreq_trace_put_work_complete, "PUT WORK CP") \ - EM(netfs_rreq_trace_put_work_nq, "PUT WORK NQ") \ + EM(netfs_rreq_trace_put_work_ip, "PUT WORK IP ") \ EM(netfs_rreq_trace_see_work, "SEE WORK ") \ + EM(netfs_rreq_trace_see_work_complete, "SEE WORK CP") \ E_(netfs_rreq_trace_new, "NEW ") #define netfs_sreq_ref_traces \ -- cgit v1.2.3 From 35ac2034db72bbbc73609aab5f05ff6e0d38fdd0 Mon Sep 17 00:00:00 2001 From: Akshay Gupta Date: Mon, 28 Apr 2025 06:30:30 +0000 Subject: misc: amd-sbi: Add support for AMD_SBI IOCTL The present sbrmi module only support reporting power via hwmon. However, AMD data center range of processors support various system management functionality using custom protocols defined in Advanced Platform Management Link (APML) specification. Register a miscdevice, which creates a device /dev/sbrmiX with an IOCTL interface for the user space to invoke the APML Mailbox protocol, which is already defined in sbrmi_mailbox_xfer(). The APML protocols depend on a set of RMI registers. Having an IOCTL as a single entry point will help in providing synchronization among these protocols as multiple transactions on RMI register set may create race condition. Support for other protocols will be added in subsequent patches. APML mailbox protocol returns additional error codes written by SMU firmware in the out-bound register 0x37. These errors include, invalid core, message not supported over platform and others. This additional error codes can be used to provide more details to user space. Open-sourced and widely used https://github.com/amd/esmi_oob_library will continue to provide user-space programmable API. Reviewed-by: Naveen Krishna Chatradhi Signed-off-by: Akshay Gupta Link: https://lore.kernel.org/r/20250428063034.2145566-7-akshay.gupta@amd.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/amd-apml.h | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 include/uapi/misc/amd-apml.h (limited to 'include') diff --git a/include/uapi/misc/amd-apml.h b/include/uapi/misc/amd-apml.h new file mode 100644 index 000000000000..a5f086f84b06 --- /dev/null +++ b/include/uapi/misc/amd-apml.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. + */ +#ifndef _AMD_APML_H_ +#define _AMD_APML_H_ + +#include + +/* Mailbox data size for data_in and data_out */ +#define AMD_SBI_MB_DATA_SIZE 4 + +struct apml_mbox_msg { + /* + * Mailbox Message ID + */ + __u32 cmd; + /* + * [0]...[3] mailbox 32bit input/output data + */ + __u32 mb_in_out; + /* + * Error code is returned in case of soft mailbox error + */ + __u32 fw_ret_code; +}; + +/* + * AMD sideband interface base IOCTL + */ +#define SB_BASE_IOCTL_NR 0xF9 + +/** + * DOC: SBRMI_IOCTL_MBOX_CMD + * + * @Parameters + * + * @struct apml_mbox_msg + * Pointer to the &struct apml_mbox_msg that will contain the protocol + * information + * + * @Description + * IOCTL command for APML messages using generic _IOWR + * The IOCTL provides userspace access to AMD sideband mailbox protocol + * - Mailbox message read/write(0x0~0xFF) + * - returning "-EFAULT" if none of the above + * "-EPROTOTYPE" error is returned to provide additional error details + */ +#define SBRMI_IOCTL_MBOX_CMD _IOWR(SB_BASE_IOCTL_NR, 0, struct apml_mbox_msg) + +#endif /*_AMD_APML_H_*/ -- cgit v1.2.3 From bb13a84ed6b78200952b264b4d7a024b730e8246 Mon Sep 17 00:00:00 2001 From: Akshay Gupta Date: Mon, 28 Apr 2025 06:30:31 +0000 Subject: misc: amd-sbi: Add support for CPUID protocol - AMD provides custom protocol to read Processor feature capabilities and configuration information through side band. The information is accessed by providing CPUID Function, extended function and thread ID to the protocol. Undefined function returns 0. Reviewed-by: Naveen Krishna Chatradhi Signed-off-by: Akshay Gupta Link: https://lore.kernel.org/r/20250428063034.2145566-8-akshay.gupta@amd.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/amd-apml.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include') diff --git a/include/uapi/misc/amd-apml.h b/include/uapi/misc/amd-apml.h index a5f086f84b06..bb57dc75758a 100644 --- a/include/uapi/misc/amd-apml.h +++ b/include/uapi/misc/amd-apml.h @@ -25,6 +25,24 @@ struct apml_mbox_msg { __u32 fw_ret_code; }; +struct apml_cpuid_msg { + /* + * CPUID input + * [0]...[3] cpuid func, + * [4][5] cpuid: thread + * [6] cpuid: ext function & read eax/ebx or ecx/edx + * [7:0] -> bits [7:4] -> ext function & + * bit [0] read eax/ebx or ecx/edx + * CPUID output + */ + __u64 cpu_in_out; + /* + * Status code for CPUID read + */ + __u32 fw_ret_code; + __u32 pad; +}; + /* * AMD sideband interface base IOCTL */ @@ -48,4 +66,23 @@ struct apml_mbox_msg { */ #define SBRMI_IOCTL_MBOX_CMD _IOWR(SB_BASE_IOCTL_NR, 0, struct apml_mbox_msg) +/** + * DOC: SBRMI_IOCTL_CPUID_CMD + * + * @Parameters + * + * @struct apml_cpuid_msg + * Pointer to the &struct apml_cpuid_msg that will contain the protocol + * information + * + * @Description + * IOCTL command for APML messages using generic _IOWR + * The IOCTL provides userspace access to AMD sideband cpuid protocol + * - CPUID protocol to get CPU details for Function/Ext Function + * at thread level + * - returning "-EFAULT" if none of the above + * "-EPROTOTYPE" error is returned to provide additional error details + */ +#define SBRMI_IOCTL_CPUID_CMD _IOWR(SB_BASE_IOCTL_NR, 1, struct apml_cpuid_msg) + #endif /*_AMD_APML_H_*/ -- cgit v1.2.3 From 69b1ba83d21c4a89f6fcfbca1d515a60df65cf9e Mon Sep 17 00:00:00 2001 From: Akshay Gupta Date: Mon, 28 Apr 2025 06:30:32 +0000 Subject: misc: amd-sbi: Add support for read MCA register protocol - AMD provides custom protocol to read Machine Check Architecture(MCA) registers over sideband. The information is accessed for range of MCA registers by passing register address and thread ID to the protocol. MCA register read command using the register address to access Core::X86::Msr::MCG_CAP which determines the number of MCA banks. Access is read-only Reviewed-by: Naveen Krishna Chatradhi Signed-off-by: Akshay Gupta Link: https://lore.kernel.org/r/20250428063034.2145566-9-akshay.gupta@amd.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/amd-apml.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/uapi/misc/amd-apml.h b/include/uapi/misc/amd-apml.h index bb57dc75758a..f718675d3966 100644 --- a/include/uapi/misc/amd-apml.h +++ b/include/uapi/misc/amd-apml.h @@ -43,6 +43,21 @@ struct apml_cpuid_msg { __u32 pad; }; +struct apml_mcamsr_msg { + /* + * MCAMSR input + * [0]...[3] mca msr func, + * [4][5] thread + * MCAMSR output + */ + __u64 mcamsr_in_out; + /* + * Status code for MCA/MSR access + */ + __u32 fw_ret_code; + __u32 pad; +}; + /* * AMD sideband interface base IOCTL */ @@ -85,4 +100,22 @@ struct apml_cpuid_msg { */ #define SBRMI_IOCTL_CPUID_CMD _IOWR(SB_BASE_IOCTL_NR, 1, struct apml_cpuid_msg) +/** + * DOC: SBRMI_IOCTL_MCAMSR_CMD + * + * @Parameters + * + * @struct apml_mcamsr_msg + * Pointer to the &struct apml_mcamsr_msg that will contain the protocol + * information + * + * @Description + * IOCTL command for APML messages using generic _IOWR + * The IOCTL provides userspace access to AMD sideband MCAMSR protocol + * - MCAMSR protocol to get MCA bank details for Function at thread level + * - returning "-EFAULT" if none of the above + * "-EPROTOTYPE" error is returned to provide additional error details + */ +#define SBRMI_IOCTL_MCAMSR_CMD _IOWR(SB_BASE_IOCTL_NR, 2, struct apml_mcamsr_msg) + #endif /*_AMD_APML_H_*/ -- cgit v1.2.3 From cf141287b77485ed7624ac1756b85cc801748c7c Mon Sep 17 00:00:00 2001 From: Akshay Gupta Date: Mon, 28 Apr 2025 06:30:33 +0000 Subject: misc: amd-sbi: Add support for register xfer - Provide user register access over IOCTL. Both register read and write are supported. - APML interface does not provide a synchronization method. By defining, a register access path, we use APML modules and library for all APML transactions. Without having to use external tools such as i2c-tools, which may cause race conditions. Reviewed-by: Naveen Krishna Chatradhi Signed-off-by: Akshay Gupta Link: https://lore.kernel.org/r/20250428063034.2145566-10-akshay.gupta@amd.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/amd-apml.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/uapi/misc/amd-apml.h b/include/uapi/misc/amd-apml.h index f718675d3966..745b3338fc06 100644 --- a/include/uapi/misc/amd-apml.h +++ b/include/uapi/misc/amd-apml.h @@ -58,6 +58,21 @@ struct apml_mcamsr_msg { __u32 pad; }; +struct apml_reg_xfer_msg { + /* + * RMI register address offset + */ + __u16 reg_addr; + /* + * Register data for read/write + */ + __u8 data_in_out; + /* + * Register read or write + */ + __u8 rflag; +}; + /* * AMD sideband interface base IOCTL */ @@ -118,4 +133,20 @@ struct apml_mcamsr_msg { */ #define SBRMI_IOCTL_MCAMSR_CMD _IOWR(SB_BASE_IOCTL_NR, 2, struct apml_mcamsr_msg) +/** + * DOC: SBRMI_IOCTL_REG_XFER_CMD + * + * @Parameters + * + * @struct apml_reg_xfer_msg + * Pointer to the &struct apml_reg_xfer_msg that will contain the protocol + * information + * + * @Description + * IOCTL command for APML messages using generic _IOWR + * The IOCTL provides userspace access to AMD sideband register xfer protocol + * - Register xfer protocol to get/set hardware register for given offset + */ +#define SBRMI_IOCTL_REG_XFER_CMD _IOWR(SB_BASE_IOCTL_NR, 3, struct apml_reg_xfer_msg) + #endif /*_AMD_APML_H_*/ -- cgit v1.2.3 From 8f08055bc67a355aca55856cc810b89645506a5f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:28 +0100 Subject: iio: introduced iio_push_to_buffers_with_ts() that takes a data_total_len argument. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check that data_total_len argument against iio_dev->scan_bytes. The size needs to be at least as big as the scan. It can be larger, which is typical if only part of fixed sized storage is used due to a subset of channels being enabled. Reviewed-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-6-jic23@kernel.org Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 3b8d618bb3df..5c84ec4a9810 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -45,6 +45,18 @@ static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev, return iio_push_to_buffers(indio_dev, data); } +static inline int iio_push_to_buffers_with_ts(struct iio_dev *indio_dev, + void *data, size_t data_total_len, + s64 timestamp) +{ + if (unlikely(data_total_len < indio_dev->scan_bytes)) { + dev_err(&indio_dev->dev, "Undersized storage pushed to buffer\n"); + return -ENOSPC; + } + + return iio_push_to_buffers_with_timestamp(indio_dev, data, timestamp); +} + int iio_push_to_buffers_with_ts_unaligned(struct iio_dev *indio_dev, const void *data, size_t data_sz, int64_t timestamp); -- cgit v1.2.3 From 7e00d74eacf77c98a60e7c754a9e5f73d8832095 Mon Sep 17 00:00:00 2001 From: Chelsy Ratnawat Date: Thu, 1 May 2025 17:36:55 -0700 Subject: HID: sensor-hub: Fix typo and improve documentation for sensor_hub_remove_callback() Fixed a typo in "registered" and improved grammar for better readability and consistency with kernel-doc standards. No functional changes. Signed-off-by: Chelsy Ratnawat Reviewed-by: David Lechner Link: https://patch.msgid.link/20250502003655.1943000-1-chelsyratnawat2001@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-hub.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index c27329e2a5ad..0f9f7df865db 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -128,12 +128,13 @@ int sensor_hub_register_callback(struct hid_sensor_hub_device *hsdev, struct hid_sensor_hub_callbacks *usage_callback); /** -* sensor_hub_remove_callback() - Remove client callbacks +* sensor_hub_remove_callback() - Remove client callback * @hsdev: Hub device instance. -* @usage_id: Usage id of the client (E.g. 0x200076 for Gyro). +* @usage_id: Usage id of the client (e.g. 0x200076 for gyro). * -* If there is a callback registred, this call will remove that -* callbacks, so that it will stop data and event notifications. +* Removes a previously registered callback for the given usage_id +* and hsdev. Once removed, the client will no longer receive data or +* event notifications. */ int sensor_hub_remove_callback(struct hid_sensor_hub_device *hsdev, u32 usage_id); -- cgit v1.2.3 From fa19c303254bae5b8d105ecdc7d714d092f2adda Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 7 May 2025 15:42:40 -0500 Subject: iio: make IIO_DMA_MINALIGN minimum of 8 bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a condition to ensure that IIO_DMA_MINALIGN is at least 8 bytes. On some 32-bit architectures, IIO_DMA_MINALIGN is 4. In many cases, drivers are using this alignment for buffers that include a 64-bit timestamp that is used with iio_push_to_buffers_with_ts(), which expects the timestamp to be aligned to 8 bytes. To handle this, we can just make IIO_DMA_MINALIGN at least 8 bytes. Reviewed-by: Nuno Sá Signed-off-by: David Lechner Link: https://patch.msgid.link/20250507-iio-introduce-iio_declare_buffer_with_ts-v6-1-4aee1b9f1b89@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 638cf2420fbd..a574f22398e4 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include /* IIO TODO LIST */ @@ -775,8 +776,14 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) * to in turn include IIO_DMA_MINALIGN'd elements such as buffers which * must not share cachelines with the rest of the structure, thus making * them safe for use with non-coherent DMA. + * + * A number of drivers also use this on buffers that include a 64-bit timestamp + * that is used with iio_push_to_buffer_with_ts(). Therefore, in the case where + * DMA alignment is not sufficient for proper timestamp alignment, we align to + * 8 bytes instead. */ -#define IIO_DMA_MINALIGN ARCH_DMA_MINALIGN +#define IIO_DMA_MINALIGN MAX(ARCH_DMA_MINALIGN, sizeof(s64)) + struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv); /* The information at the returned address is guaranteed to be cacheline aligned */ -- cgit v1.2.3 From 63fc53526d3090b27bd06bb43b92e8bc85f46fb1 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 7 May 2025 15:42:41 -0500 Subject: iio: introduce IIO_DECLARE_BUFFER_WITH_TS macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new macros to help with the common case of declaring a buffer that is safe to use with iio_push_to_buffers_with_ts(). This is not trivial to do correctly because of the alignment requirements of the timestamp. This will make it easier for both authors and reviewers. To avoid double __align() attributes in cases where we also need DMA alignment, add a 2nd variant IIO_DECLARE_DMA_BUFFER_WITH_TS(). Reviewed-by: Nuno Sá Signed-off-by: David Lechner Link: https://patch.msgid.link/20250507-iio-introduce-iio_declare_buffer_with_ts-v6-2-4aee1b9f1b89@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index a574f22398e4..d11668f14a3e 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -7,6 +7,7 @@ #ifndef _INDUSTRIAL_IO_H_ #define _INDUSTRIAL_IO_H_ +#include #include #include #include @@ -784,6 +785,37 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) */ #define IIO_DMA_MINALIGN MAX(ARCH_DMA_MINALIGN, sizeof(s64)) +#define __IIO_DECLARE_BUFFER_WITH_TS(type, name, count) \ + type name[ALIGN((count), sizeof(s64) / sizeof(type)) + sizeof(s64) / sizeof(type)] + +/** + * IIO_DECLARE_BUFFER_WITH_TS() - Declare a buffer with timestamp + * @type: element type of the buffer + * @name: identifier name of the buffer + * @count: number of elements in the buffer + * + * Declares a buffer that is safe to use with iio_push_to_buffer_with_ts(). In + * addition to allocating enough space for @count elements of @type, it also + * allocates space for a s64 timestamp at the end of the buffer and ensures + * proper alignment of the timestamp. + */ +#define IIO_DECLARE_BUFFER_WITH_TS(type, name, count) \ + __IIO_DECLARE_BUFFER_WITH_TS(type, name, count) __aligned(sizeof(s64)) + +/** + * IIO_DECLARE_DMA_BUFFER_WITH_TS() - Declare a DMA-aligned buffer with timestamp + * @type: element type of the buffer + * @name: identifier name of the buffer + * @count: number of elements in the buffer + * + * Same as IIO_DECLARE_BUFFER_WITH_TS(), but is uses __aligned(IIO_DMA_MINALIGN) + * to ensure that the buffer doesn't share cachelines with anything that comes + * before it in a struct. This should not be used for stack-allocated buffers + * as stack memory cannot generally be used for DMA. + */ +#define IIO_DECLARE_DMA_BUFFER_WITH_TS(type, name, count) \ + __IIO_DECLARE_BUFFER_WITH_TS(type, name, count) __aligned(IIO_DMA_MINALIGN) + struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv); /* The information at the returned address is guaranteed to be cacheline aligned */ -- cgit v1.2.3 From 27737b8407585c0160063b9b39e888d486d86188 Mon Sep 17 00:00:00 2001 From: Chelsy Ratnawat Date: Tue, 6 May 2025 22:57:45 -0700 Subject: HID: sensor-hub: Fix typo and improve documentation Includes the following corrections - - Changed Measurment -> Measurement - Changed clode -> close - Gyro -> gyro Signed-off-by: Chelsy Ratnawat Reviewed-by: David Lechner Link: https://patch.msgid.link/20250507055745.4069933-1-chelsyratnawat2001@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-hub.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 0f9f7df865db..e71056553108 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -17,7 +17,7 @@ * @attrib_id: Attribute id for this attribute. * @report_id: Report id in which this information resides. * @index: Field index in the report. - * @units: Measurment unit for this attribute. + * @units: Measurement unit for this attribute. * @unit_expo: Exponent used in the data. * @size: Size in bytes for data size. * @logical_minimum: Logical minimum value for this attribute. @@ -39,8 +39,8 @@ struct hid_sensor_hub_attribute_info { * struct sensor_hub_pending - Synchronous read pending information * @status: Pending status true/false. * @ready: Completion synchronization data. - * @usage_id: Usage id for physical device, E.g. Gyro usage id. - * @attr_usage_id: Usage Id of a field, E.g. X-AXIS for a gyro. + * @usage_id: Usage id for physical device, e.g. gyro usage id. + * @attr_usage_id: Usage Id of a field, e.g. X-axis for a gyro. * @raw_size: Response size for a read request. * @raw_data: Place holder for received response. */ @@ -104,10 +104,10 @@ struct hid_sensor_hub_callbacks { int sensor_hub_device_open(struct hid_sensor_hub_device *hsdev); /** -* sensor_hub_device_clode() - Close hub device +* sensor_hub_device_close() - Close hub device * @hsdev: Hub device instance. * -* Used to clode hid device for sensor hub. +* Used to close hid device for sensor hub. */ void sensor_hub_device_close(struct hid_sensor_hub_device *hsdev); -- cgit v1.2.3 From 04425292a62c15d1fde714522beaf8f3c2ed1de9 Mon Sep 17 00:00:00 2001 From: Hsin-chen Chuang Date: Wed, 16 Apr 2025 09:53:35 +0000 Subject: Bluetooth: Introduce HCI Driver protocol Although commit 75ddcd5ad40e ("Bluetooth: btusb: Configure altsetting for HCI_USER_CHANNEL") has enabled the HCI_USER_CHANNEL user to send out SCO data through USB Bluetooth chips, it's observed that with the patch HFP is flaky on most of the existing USB Bluetooth controllers: Intel chips sometimes send out no packet for Transparent codec; MTK chips may generate SCO data with a wrong handle for CVSD codec; RTK could split the data with a wrong packet size for Transparent codec; ... etc. To address the issue above one needs to reset the altsetting back to zero when there is no active SCO connection, which is the same as the BlueZ behavior, and another benefit is the bus doesn't need to reserve bandwidth when no SCO connection. This patch adds the infrastructure that allow the user space program to talk to Bluetooth drivers directly: - Define the new packet type HCI_DRV_PKT which is specifically used for communication between the user space program and the Bluetooth drviers - hci_send_frame intercepts the packets and invokes drivers' HCI Drv callbacks (so far only defined for btusb) - 2 kinds of events to user space: Command Status and Command Complete, the former simply returns the status while the later may contain additional response data. Cc: chromeos-bluetooth-upstreaming@chromium.org Fixes: b16b327edb4d ("Bluetooth: btusb: add sysfs attribute to control USB alt setting") Signed-off-by: Hsin-chen Chuang Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/hci_core.h | 3 ++ include/net/bluetooth/hci_drv.h | 76 ++++++++++++++++++++++++++++++++++++++++ include/net/bluetooth/hci_mon.h | 2 ++ 4 files changed, 82 insertions(+) create mode 100644 include/net/bluetooth/hci_drv.h (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 797992019f9e..2502febf4da7 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -494,6 +494,7 @@ enum { #define HCI_EVENT_PKT 0x04 #define HCI_ISODATA_PKT 0x05 #define HCI_DIAG_PKT 0xf0 +#define HCI_DRV_PKT 0xf1 #define HCI_VENDOR_PKT 0xff /* HCI packet types */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 54bfeeaa0995..57f6175fd1cd 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -613,6 +614,8 @@ struct hci_dev { struct list_head monitored_devices; bool advmon_pend_notify; + struct hci_drv *hci_drv; + #if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; #endif diff --git a/include/net/bluetooth/hci_drv.h b/include/net/bluetooth/hci_drv.h new file mode 100644 index 000000000000..2f01c44f05ec --- /dev/null +++ b/include/net/bluetooth/hci_drv.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 Google Corporation + */ + +#ifndef __HCI_DRV_H +#define __HCI_DRV_H + +#include + +#include +#include + +struct hci_drv_cmd_hdr { + __le16 opcode; + __le16 len; +} __packed; + +struct hci_drv_ev_hdr { + __le16 opcode; + __le16 len; +} __packed; + +#define HCI_DRV_EV_CMD_STATUS 0x0000 +struct hci_drv_ev_cmd_status { + __le16 opcode; + __u8 status; +} __packed; + +#define HCI_DRV_EV_CMD_COMPLETE 0x0001 +struct hci_drv_ev_cmd_complete { + __le16 opcode; + __u8 status; + __u8 data[]; +} __packed; + +#define HCI_DRV_STATUS_SUCCESS 0x00 +#define HCI_DRV_STATUS_UNSPECIFIED_ERROR 0x01 +#define HCI_DRV_STATUS_UNKNOWN_COMMAND 0x02 +#define HCI_DRV_STATUS_INVALID_PARAMETERS 0x03 + +#define HCI_DRV_MAX_DRIVER_NAME_LENGTH 32 + +/* Common commands that make sense on all drivers start from 0x0000 */ +#define HCI_DRV_OP_READ_INFO 0x0000 +#define HCI_DRV_READ_INFO_SIZE 0 +struct hci_drv_rp_read_info { + __u8 driver_name[HCI_DRV_MAX_DRIVER_NAME_LENGTH]; + __le16 num_supported_commands; + __le16 supported_commands[]; +} __packed; + +/* Driver specific OGF (Opcode Group Field) + * Commands in this group may have different meanings across different drivers. + */ +#define HCI_DRV_OGF_DRIVER_SPECIFIC 0x01 + +int hci_drv_cmd_status(struct hci_dev *hdev, u16 cmd, u8 status); +int hci_drv_cmd_complete(struct hci_dev *hdev, u16 cmd, u8 status, void *rp, + size_t rp_len); +int hci_drv_process_cmd(struct hci_dev *hdev, struct sk_buff *cmd_skb); + +struct hci_drv_handler { + int (*func)(struct hci_dev *hdev, void *data, u16 data_len); + size_t data_len; +}; + +struct hci_drv { + size_t common_handler_count; + const struct hci_drv_handler *common_handlers; + + size_t specific_handler_count; + const struct hci_drv_handler *specific_handlers; +}; + +#endif /* __HCI_DRV_H */ diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 082f89531b88..bbd752494ef9 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -51,6 +51,8 @@ struct hci_mon_hdr { #define HCI_MON_CTRL_EVENT 17 #define HCI_MON_ISO_TX_PKT 18 #define HCI_MON_ISO_RX_PKT 19 +#define HCI_MON_DRV_TX_PKT 20 +#define HCI_MON_DRV_RX_PKT 21 struct hci_mon_new_index { __u8 type; -- cgit v1.2.3 From dd0ccf858057b793beb3779be7576d92c93cf828 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 27 Apr 2025 14:27:25 +0300 Subject: Bluetooth: add support for SIOCETHTOOL ETHTOOL_GET_TS_INFO Bluetooth needs some way for user to get supported so_timestamping flags for the different socket types. Use SIOCETHTOOL API for this purpose. As hci_dev is not associated with struct net_device, the existing implementation can't be reused, so we add a small one here. Add support (only) for ETHTOOL_GET_TS_INFO command. The API differs slightly from netdev in that the result depends also on socket type. Signed-off-by: Pauli Virtanen Acked-by: Willem de Bruijn Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index bbefde319f95..114299bd8b98 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -29,6 +29,7 @@ #include #include #include +#include #define BT_SUBSYS_VERSION 2 #define BT_SUBSYS_REVISION 22 @@ -448,6 +449,9 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, hci_req_complete_t *req_complete, hci_req_complete_skb_t *req_complete_skb); +int hci_ethtool_ts_info(unsigned int index, int sk_proto, + struct kernel_ethtool_ts_info *ts_info); + #define HCI_REQ_START BIT(0) #define HCI_REQ_SKB BIT(1) -- cgit v1.2.3 From 23205562ffc8de20f57afdd984858cab29e77968 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 3 May 2025 17:08:21 +0300 Subject: Bluetooth: separate CIS_LINK and BIS_LINK link types Use separate link type id for unicast and broadcast ISO connections. These connection types are handled with separate HCI commands, socket API is different, and hci_conn has union fields that are different in the two cases, so they shall not be mixed up. Currently in most places it is attempted to distinguish ucast by bacmp(&c->dst, BDADDR_ANY) but it is wrong as dst is set for bcast sink hci_conn in iso_conn_ready(). Additionally checking sync_handle might be OK, but depends on details of bcast conn configuration flow. To avoid complicating it, use separate link types. Fixes: f764a6c2c1e4 ("Bluetooth: ISO: Add broadcast support") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 3 ++- include/net/bluetooth/hci_core.h | 48 ++++++++++++++++++---------------------- 2 files changed, 23 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 2502febf4da7..82cbd54443ac 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -558,7 +558,8 @@ enum { #define ESCO_LINK 0x02 /* Low Energy links do not have defined link type. Use invented one */ #define LE_LINK 0x80 -#define ISO_LINK 0x82 +#define CIS_LINK 0x82 +#define BIS_LINK 0x83 #define INVALID_LINK 0xff /* LMP features */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 57f6175fd1cd..2b261e74e2c4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -999,7 +999,8 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) case ESCO_LINK: h->sco_num++; break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: h->iso_num++; break; } @@ -1025,7 +1026,8 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) case ESCO_LINK: h->sco_num--; break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: h->iso_num--; break; } @@ -1042,7 +1044,8 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) case SCO_LINK: case ESCO_LINK: return h->sco_num; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: return h->iso_num; default: return 0; @@ -1103,7 +1106,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (bacmp(&c->dst, ba) || c->type != ISO_LINK) + if (bacmp(&c->dst, ba) || c->type != BIS_LINK) continue; if (c->iso_qos.bcast.bis == bis) { @@ -1125,7 +1128,7 @@ hci_conn_hash_lookup_create_pa_sync(struct hci_dev *hdev) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) + if (c->type != BIS_LINK) continue; if (!test_bit(HCI_CONN_CREATE_PA_SYNC, &c->flags)) @@ -1151,8 +1154,8 @@ hci_conn_hash_lookup_per_adv_bis(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (bacmp(&c->dst, ba) || c->type != ISO_LINK || - !test_bit(HCI_CONN_PER_ADV, &c->flags)) + if (bacmp(&c->dst, ba) || c->type != BIS_LINK || + !test_bit(HCI_CONN_PER_ADV, &c->flags)) continue; if (c->iso_qos.bcast.big == big && @@ -1241,7 +1244,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY)) + if (c->type != CIS_LINK) continue; /* Match CIG ID if set */ @@ -1273,7 +1276,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cig(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY)) + if (c->type != CIS_LINK) continue; if (handle == c->iso_qos.ucast.cig) { @@ -1296,17 +1299,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) - continue; - - /* An ISO_LINK hcon with BDADDR_ANY as destination - * address is a Broadcast connection. A Broadcast - * slave connection is associated with a PA train, - * so the sync_handle can be used to differentiate - * from unicast. - */ - if (bacmp(&c->dst, BDADDR_ANY) && - c->sync_handle == HCI_SYNC_HANDLE_INVALID) + if (c->type != BIS_LINK) continue; if (handle == c->iso_qos.bcast.big) { @@ -1330,7 +1323,7 @@ hci_conn_hash_lookup_big_sync_pend(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) + if (c->type != BIS_LINK) continue; if (handle == c->iso_qos.bcast.big && num_bis == c->num_bis) { @@ -1353,8 +1346,8 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK || - c->state != state) + if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) || + c->state != state) continue; if (handle == c->iso_qos.bcast.big) { @@ -1377,8 +1370,8 @@ hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || - !test_bit(HCI_CONN_PA_SYNC, &c->flags)) + if (c->type != BIS_LINK || + !test_bit(HCI_CONN_PA_SYNC, &c->flags)) continue; if (c->iso_qos.bcast.big == big) { @@ -1400,7 +1393,7 @@ hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) + if (c->type != BIS_LINK) continue; /* Ignore the listen hcon, we are looking @@ -2015,7 +2008,8 @@ static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, case ESCO_LINK: return sco_connect_ind(hdev, bdaddr, flags); - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: return iso_connect_ind(hdev, bdaddr, flags); default: -- cgit v1.2.3 From 46550e2b878d60923c72f0526a7aac02e8eda3d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sun, 4 May 2025 20:22:44 +0200 Subject: include: pe.h: Fix PE definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Rename constants to their standard PE names: - MZ_MAGIC -> IMAGE_DOS_SIGNATURE - PE_MAGIC -> IMAGE_NT_SIGNATURE - PE_OPT_MAGIC_PE32_ROM -> IMAGE_ROM_OPTIONAL_HDR_MAGIC - PE_OPT_MAGIC_PE32 -> IMAGE_NT_OPTIONAL_HDR32_MAGIC - PE_OPT_MAGIC_PE32PLUS -> IMAGE_NT_OPTIONAL_HDR64_MAGIC - IMAGE_DLL_CHARACTERISTICS_NX_COMPAT -> IMAGE_DLLCHARACTERISTICS_NX_COMPAT * Import constants and their description from readpe and file projects which contains current up-to-date information: - IMAGE_FILE_MACHINE_* - IMAGE_FILE_* - IMAGE_SUBSYSTEM_* - IMAGE_DLLCHARACTERISTICS_* - IMAGE_DLLCHARACTERISTICS_EX_* - IMAGE_DEBUG_TYPE_* * Add missing IMAGE_SCN_* constants and update their incorrect description * Fix incorrect value of IMAGE_SCN_MEM_PURGEABLE constant * Add description for win32_version and loader_flags PE fields Signed-off-by: Pali Rohár Signed-off-by: Ard Biesheuvel --- include/linux/pe.h | 279 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 174 insertions(+), 105 deletions(-) (limited to 'include') diff --git a/include/linux/pe.h b/include/linux/pe.h index fdf9c95709ba..cd2b7275385f 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -39,113 +39,160 @@ */ #define LINUX_PE_MAGIC 0x818223cd -#define MZ_MAGIC 0x5a4d /* "MZ" */ +#define IMAGE_DOS_SIGNATURE 0x5a4d /* "MZ" */ -#define PE_MAGIC 0x00004550 /* "PE\0\0" */ -#define PE_OPT_MAGIC_PE32 0x010b -#define PE_OPT_MAGIC_PE32_ROM 0x0107 -#define PE_OPT_MAGIC_PE32PLUS 0x020b +#define IMAGE_NT_SIGNATURE 0x00004550 /* "PE\0\0" */ + +#define IMAGE_ROM_OPTIONAL_HDR_MAGIC 0x0107 /* ROM image (for R3000/R4000/R10000/ALPHA), without MZ and PE\0\0 sign */ +#define IMAGE_NT_OPTIONAL_HDR32_MAGIC 0x010b /* PE32 executable image */ +#define IMAGE_NT_OPTIONAL_HDR64_MAGIC 0x020b /* PE32+ executable image */ /* machine type */ -#define IMAGE_FILE_MACHINE_UNKNOWN 0x0000 -#define IMAGE_FILE_MACHINE_AM33 0x01d3 -#define IMAGE_FILE_MACHINE_AMD64 0x8664 -#define IMAGE_FILE_MACHINE_ARM 0x01c0 -#define IMAGE_FILE_MACHINE_ARMV7 0x01c4 -#define IMAGE_FILE_MACHINE_ARM64 0xaa64 -#define IMAGE_FILE_MACHINE_EBC 0x0ebc -#define IMAGE_FILE_MACHINE_I386 0x014c -#define IMAGE_FILE_MACHINE_IA64 0x0200 -#define IMAGE_FILE_MACHINE_M32R 0x9041 -#define IMAGE_FILE_MACHINE_MIPS16 0x0266 -#define IMAGE_FILE_MACHINE_MIPSFPU 0x0366 -#define IMAGE_FILE_MACHINE_MIPSFPU16 0x0466 -#define IMAGE_FILE_MACHINE_POWERPC 0x01f0 -#define IMAGE_FILE_MACHINE_POWERPCFP 0x01f1 -#define IMAGE_FILE_MACHINE_R4000 0x0166 -#define IMAGE_FILE_MACHINE_RISCV32 0x5032 -#define IMAGE_FILE_MACHINE_RISCV64 0x5064 -#define IMAGE_FILE_MACHINE_RISCV128 0x5128 -#define IMAGE_FILE_MACHINE_SH3 0x01a2 -#define IMAGE_FILE_MACHINE_SH3DSP 0x01a3 -#define IMAGE_FILE_MACHINE_SH3E 0x01a4 -#define IMAGE_FILE_MACHINE_SH4 0x01a6 -#define IMAGE_FILE_MACHINE_SH5 0x01a8 -#define IMAGE_FILE_MACHINE_THUMB 0x01c2 -#define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169 -#define IMAGE_FILE_MACHINE_LOONGARCH32 0x6232 -#define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 +#define IMAGE_FILE_MACHINE_UNKNOWN 0x0000 /* Unknown architecture */ +#define IMAGE_FILE_MACHINE_TARGET_HOST 0x0001 /* Interacts with the host and not a WOW64 guest (not for file image) */ +#define IMAGE_FILE_MACHINE_ALPHA_OLD 0x0183 /* DEC Alpha AXP 32-bit (old images) */ +#define IMAGE_FILE_MACHINE_ALPHA 0x0184 /* DEC Alpha AXP 32-bit */ +#define IMAGE_FILE_MACHINE_ALPHA64 0x0284 /* DEC Alpha AXP 64-bit (with 8kB page size) */ +#define IMAGE_FILE_MACHINE_AXP64 IMAGE_FILE_MACHINE_ALPHA64 +#define IMAGE_FILE_MACHINE_AM33 0x01d3 /* Matsushita AM33, now Panasonic MN103 */ +#define IMAGE_FILE_MACHINE_AMD64 0x8664 /* AMD64 (x64) */ +#define IMAGE_FILE_MACHINE_ARM 0x01c0 /* ARM Little-Endian (ARMv4) */ +#define IMAGE_FILE_MACHINE_THUMB 0x01c2 /* ARM Thumb Little-Endian (ARMv4T) */ +#define IMAGE_FILE_MACHINE_ARMNT 0x01c4 /* ARM Thumb-2 Little-Endian (ARMv7) */ +#define IMAGE_FILE_MACHINE_ARMV7 IMAGE_FILE_MACHINE_ARMNT +#define IMAGE_FILE_MACHINE_ARM64 0xaa64 /* ARM64 Little-Endian (Classic ABI) */ +#define IMAGE_FILE_MACHINE_ARM64EC 0xa641 /* ARM64 Little-Endian (Emulation Compatible ABI for AMD64) */ +#define IMAGE_FILE_MACHINE_ARM64X 0xa64e /* ARM64 Little-Endian (fat binary with both Classic ABI and EC ABI code) */ +#define IMAGE_FILE_MACHINE_CEE 0xc0ee /* COM+ Execution Engine (CLR pure MSIL object files) */ +#define IMAGE_FILE_MACHINE_CEF 0x0cef /* Windows CE 3.0 Common Executable Format (CEF bytecode) */ +#define IMAGE_FILE_MACHINE_CHPE_X86 0x3a64 /* ARM64 Little-Endian (Compiled Hybrid PE ABI for I386) */ +#define IMAGE_FILE_MACHINE_HYBRID_X86 IMAGE_FILE_MACHINE_CHPE_X86 +#define IMAGE_FILE_MACHINE_EBC 0x0ebc /* EFI/UEFI Byte Code */ +#define IMAGE_FILE_MACHINE_I386 0x014c /* Intel 386 (x86) */ +#define IMAGE_FILE_MACHINE_I860 0x014d /* Intel 860 (N10) */ +#define IMAGE_FILE_MACHINE_IA64 0x0200 /* Intel IA-64 (with 8kB page size) */ +#define IMAGE_FILE_MACHINE_LOONGARCH32 0x6232 /* LoongArch 32-bit processor family */ +#define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 /* LoongArch 64-bit processor family */ +#define IMAGE_FILE_MACHINE_M32R 0x9041 /* Mitsubishi M32R 32-bit Little-Endian */ +#define IMAGE_FILE_MACHINE_M68K 0x0268 /* Motorola 68000 series */ +#define IMAGE_FILE_MACHINE_MIPS16 0x0266 /* MIPS III with MIPS16 ASE Little-Endian */ +#define IMAGE_FILE_MACHINE_MIPSFPU 0x0366 /* MIPS III with FPU Little-Endian */ +#define IMAGE_FILE_MACHINE_MIPSFPU16 0x0466 /* MIPS III with MIPS16 ASE and FPU Little-Endian */ +#define IMAGE_FILE_MACHINE_MPPC_601 0x0601 /* PowerPC 32-bit Big-Endian */ +#define IMAGE_FILE_MACHINE_OMNI 0xace1 /* Microsoft OMNI VM (omniprox.dll) */ +#define IMAGE_FILE_MACHINE_PARISC 0x0290 /* HP PA-RISC */ +#define IMAGE_FILE_MACHINE_POWERPC 0x01f0 /* PowerPC 32-bit Little-Endian */ +#define IMAGE_FILE_MACHINE_POWERPCFP 0x01f1 /* PowerPC 32-bit with FPU Little-Endian */ +#define IMAGE_FILE_MACHINE_POWERPCBE 0x01f2 /* PowerPC 64-bit Big-Endian */ +#define IMAGE_FILE_MACHINE_R3000 0x0162 /* MIPS I Little-Endian */ +#define IMAGE_FILE_MACHINE_R3000_BE 0x0160 /* MIPS I Big-Endian */ +#define IMAGE_FILE_MACHINE_R4000 0x0166 /* MIPS III Little-Endian (with 1kB or 4kB page size) */ +#define IMAGE_FILE_MACHINE_R10000 0x0168 /* MIPS IV Little-Endian */ +#define IMAGE_FILE_MACHINE_RISCV32 0x5032 /* RISC-V 32-bit address space */ +#define IMAGE_FILE_MACHINE_RISCV64 0x5064 /* RISC-V 64-bit address space */ +#define IMAGE_FILE_MACHINE_RISCV128 0x5128 /* RISC-V 128-bit address space */ +#define IMAGE_FILE_MACHINE_SH3 0x01a2 /* Hitachi SH-3 32-bit Little-Endian (with 1kB page size) */ +#define IMAGE_FILE_MACHINE_SH3DSP 0x01a3 /* Hitachi SH-3 DSP 32-bit (with 1kB page size) */ +#define IMAGE_FILE_MACHINE_SH3E 0x01a4 /* Hitachi SH-3E Little-Endian (with 1kB page size) */ +#define IMAGE_FILE_MACHINE_SH4 0x01a6 /* Hitachi SH-4 32-bit Little-Endian (with 1kB page size) */ +#define IMAGE_FILE_MACHINE_SH5 0x01a8 /* Hitachi SH-5 64-bit */ +#define IMAGE_FILE_MACHINE_TAHOE 0x07cc /* Intel EM machine */ +#define IMAGE_FILE_MACHINE_TRICORE 0x0520 /* Infineon AUDO 32-bit */ +#define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169 /* MIPS Windows CE v2 Little-Endian */ /* flags */ -#define IMAGE_FILE_RELOCS_STRIPPED 0x0001 -#define IMAGE_FILE_EXECUTABLE_IMAGE 0x0002 -#define IMAGE_FILE_LINE_NUMS_STRIPPED 0x0004 -#define IMAGE_FILE_LOCAL_SYMS_STRIPPED 0x0008 -#define IMAGE_FILE_AGGRESSIVE_WS_TRIM 0x0010 -#define IMAGE_FILE_LARGE_ADDRESS_AWARE 0x0020 -#define IMAGE_FILE_16BIT_MACHINE 0x0040 -#define IMAGE_FILE_BYTES_REVERSED_LO 0x0080 -#define IMAGE_FILE_32BIT_MACHINE 0x0100 -#define IMAGE_FILE_DEBUG_STRIPPED 0x0200 -#define IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP 0x0400 -#define IMAGE_FILE_NET_RUN_FROM_SWAP 0x0800 -#define IMAGE_FILE_SYSTEM 0x1000 -#define IMAGE_FILE_DLL 0x2000 -#define IMAGE_FILE_UP_SYSTEM_ONLY 0x4000 -#define IMAGE_FILE_BYTES_REVERSED_HI 0x8000 - -#define IMAGE_FILE_OPT_ROM_MAGIC 0x107 -#define IMAGE_FILE_OPT_PE32_MAGIC 0x10b -#define IMAGE_FILE_OPT_PE32_PLUS_MAGIC 0x20b - -#define IMAGE_SUBSYSTEM_UNKNOWN 0 -#define IMAGE_SUBSYSTEM_NATIVE 1 -#define IMAGE_SUBSYSTEM_WINDOWS_GUI 2 -#define IMAGE_SUBSYSTEM_WINDOWS_CUI 3 -#define IMAGE_SUBSYSTEM_POSIX_CUI 7 -#define IMAGE_SUBSYSTEM_WINDOWS_CE_GUI 9 -#define IMAGE_SUBSYSTEM_EFI_APPLICATION 10 -#define IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER 11 -#define IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER 12 -#define IMAGE_SUBSYSTEM_EFI_ROM_IMAGE 13 -#define IMAGE_SUBSYSTEM_XBOX 14 - -#define IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE 0x0040 -#define IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY 0x0080 -#define IMAGE_DLL_CHARACTERISTICS_NX_COMPAT 0x0100 -#define IMAGE_DLLCHARACTERISTICS_NO_ISOLATION 0x0200 -#define IMAGE_DLLCHARACTERISTICS_NO_SEH 0x0400 -#define IMAGE_DLLCHARACTERISTICS_NO_BIND 0x0800 -#define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER 0x2000 -#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000 - -#define IMAGE_DLLCHARACTERISTICS_EX_CET_COMPAT 0x0001 -#define IMAGE_DLLCHARACTERISTICS_EX_FORWARD_CFI_COMPAT 0x0040 - -/* they actually defined 0x00000000 as well, but I think we'll skip that one. */ -#define IMAGE_SCN_RESERVED_0 0x00000001 -#define IMAGE_SCN_RESERVED_1 0x00000002 -#define IMAGE_SCN_RESERVED_2 0x00000004 -#define IMAGE_SCN_TYPE_NO_PAD 0x00000008 /* don't pad - obsolete */ -#define IMAGE_SCN_RESERVED_3 0x00000010 +#define IMAGE_FILE_RELOCS_STRIPPED 0x0001 /* Relocation info stripped from file */ +#define IMAGE_FILE_EXECUTABLE_IMAGE 0x0002 /* File is executable (i.e. no unresolved external references) */ +#define IMAGE_FILE_LINE_NUMS_STRIPPED 0x0004 /* Line nunbers stripped from file */ +#define IMAGE_FILE_LOCAL_SYMS_STRIPPED 0x0008 /* Local symbols stripped from file */ +#define IMAGE_FILE_AGGRESSIVE_WS_TRIM 0x0010 /* Aggressively trim working set */ +#define IMAGE_FILE_LARGE_ADDRESS_AWARE 0x0020 /* App can handle >2gb addresses (image can be loaded at address above 2GB) */ +#define IMAGE_FILE_16BIT_MACHINE 0x0040 /* 16 bit word machine */ +#define IMAGE_FILE_BYTES_REVERSED_LO 0x0080 /* Bytes of machine word are reversed (should be set together with IMAGE_FILE_BYTES_REVERSED_HI) */ +#define IMAGE_FILE_32BIT_MACHINE 0x0100 /* 32 bit word machine */ +#define IMAGE_FILE_DEBUG_STRIPPED 0x0200 /* Debugging info stripped from file in .DBG file */ +#define IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP 0x0400 /* If Image is on removable media, copy and run from the swap file */ +#define IMAGE_FILE_NET_RUN_FROM_SWAP 0x0800 /* If Image is on Net, copy and run from the swap file */ +#define IMAGE_FILE_SYSTEM 0x1000 /* System kernel-mode file (can't be loaded in user-mode) */ +#define IMAGE_FILE_DLL 0x2000 /* File is a DLL */ +#define IMAGE_FILE_UP_SYSTEM_ONLY 0x4000 /* File should only be run on a UP (uniprocessor) machine */ +#define IMAGE_FILE_BYTES_REVERSED_HI 0x8000 /* Bytes of machine word are reversed (should be set together with IMAGE_FILE_BYTES_REVERSED_LO) */ + +/* subsys */ +#define IMAGE_SUBSYSTEM_UNKNOWN 0 /* Unknown subsystem */ +#define IMAGE_SUBSYSTEM_NATIVE 1 /* No subsystem required (NT device drivers and NT native system processes) */ +#define IMAGE_SUBSYSTEM_WINDOWS_GUI 2 /* Windows graphical user interface (GUI) subsystem */ +#define IMAGE_SUBSYSTEM_WINDOWS_CUI 3 /* Windows character-mode user interface (CUI) subsystem */ +#define IMAGE_SUBSYSTEM_WINDOWS_OLD_CE_GUI 4 /* Old Windows CE subsystem */ +#define IMAGE_SUBSYSTEM_OS2_CUI 5 /* OS/2 CUI subsystem */ +#define IMAGE_SUBSYSTEM_RESERVED_6 6 +#define IMAGE_SUBSYSTEM_POSIX_CUI 7 /* POSIX CUI subsystem */ +#define IMAGE_SUBSYSTEM_MMOSA 8 /* MMOSA/Native Win32E */ +#define IMAGE_SUBSYSTEM_WINDOWS_CE_GUI 9 /* Windows CE subsystem */ +#define IMAGE_SUBSYSTEM_EFI_APPLICATION 10 /* Extensible Firmware Interface (EFI) application */ +#define IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER 11 /* EFI driver with boot services */ +#define IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER 12 /* EFI driver with run-time services */ +#define IMAGE_SUBSYSTEM_EFI_ROM_IMAGE 13 /* EFI ROM image */ +#define IMAGE_SUBSYSTEM_XBOX 14 /* Xbox system */ +#define IMAGE_SUBSYSTEM_RESERVED_15 15 +#define IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION 16 /* Windows Boot application */ +#define IMAGE_SUBSYSTEM_XBOX_CODE_CATALOG 17 /* Xbox Code Catalog */ + +/* dll_flags */ +#define IMAGE_LIBRARY_PROCESS_INIT 0x0001 /* DLL initialization function called just after process initialization */ +#define IMAGE_LIBRARY_PROCESS_TERM 0x0002 /* DLL initialization function called just before process termination */ +#define IMAGE_LIBRARY_THREAD_INIT 0x0004 /* DLL initialization function called just after thread initialization */ +#define IMAGE_LIBRARY_THREAD_TERM 0x0008 /* DLL initialization function called just before thread initialization */ +#define IMAGE_DLLCHARACTERISTICS_RESERVED_4 0x0010 +#define IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA 0x0020 /* ASLR with 64 bit address space (image can be loaded at address above 4GB) */ +#define IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE 0x0040 /* The DLL can be relocated at load time */ +#define IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY 0x0080 /* Code integrity checks are forced */ +#define IMAGE_DLLCHARACTERISTICS_NX_COMPAT 0x0100 /* Image is compatible with data execution prevention */ +#define IMAGE_DLLCHARACTERISTICS_NO_ISOLATION 0x0200 /* Image is isolation aware, but should not be isolated (prevents loading of manifest file) */ +#define IMAGE_DLLCHARACTERISTICS_NO_SEH 0x0400 /* Image does not use SEH, no SE handler may reside in this image */ +#define IMAGE_DLLCHARACTERISTICS_NO_BIND 0x0800 /* Do not bind the image */ +#define IMAGE_DLLCHARACTERISTICS_X86_THUNK 0x1000 /* Image is a Wx86 Thunk DLL (for non-x86/risc DLL files) */ +#define IMAGE_DLLCHARACTERISTICS_APPCONTAINER 0x1000 /* Image should execute in an AppContainer (for EXE Metro Apps in Windows 8) */ +#define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER 0x2000 /* A WDM driver */ +#define IMAGE_DLLCHARACTERISTICS_GUARD_CF 0x4000 /* Image supports Control Flow Guard */ +#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000 /* The image is terminal server (Remote Desktop Services) aware */ + +/* IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS flags */ +#define IMAGE_DLLCHARACTERISTICS_EX_CET_COMPAT 0x0001 /* Image is Control-flow Enforcement Technology Shadow Stack compatible */ +#define IMAGE_DLLCHARACTERISTICS_EX_CET_COMPAT_STRICT_MODE 0x0002 /* CET is enforced in strict mode */ +#define IMAGE_DLLCHARACTERISTICS_EX_CET_SET_CONTEXT_IP_VALIDATION_RELAXED_MODE 0x0004 /* Relaxed mode for Context IP Validation under CET is allowed */ +#define IMAGE_DLLCHARACTERISTICS_EX_CET_DYNAMIC_APIS_ALLOW_IN_PROC 0x0008 /* Use of dynamic APIs is restricted to processes only */ +#define IMAGE_DLLCHARACTERISTICS_EX_CET_RESERVED_1 0x0010 +#define IMAGE_DLLCHARACTERISTICS_EX_CET_RESERVED_2 0x0020 +#define IMAGE_DLLCHARACTERISTICS_EX_FORWARD_CFI_COMPAT 0x0040 /* All branch targets in all image code sections are annotated with forward-edge control flow integrity guard instructions */ +#define IMAGE_DLLCHARACTERISTICS_EX_HOTPATCH_COMPATIBLE 0x0080 /* Image can be modified while in use, hotpatch-compatible */ + +/* section_header flags */ +#define IMAGE_SCN_SCALE_INDEX 0x00000001 /* address of tls index is scaled = multiplied by 4 (for .tls section on MIPS only) */ +#define IMAGE_SCN_TYPE_NO_LOAD 0x00000002 /* reserved */ +#define IMAGE_SCN_TYPE_GROUPED 0x00000004 /* obsolete (used for 16-bit offset code) */ +#define IMAGE_SCN_TYPE_NO_PAD 0x00000008 /* .o only - don't pad - obsolete (same as IMAGE_SCN_ALIGN_1BYTES) */ +#define IMAGE_SCN_TYPE_COPY 0x00000010 /* reserved */ #define IMAGE_SCN_CNT_CODE 0x00000020 /* .text */ #define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */ #define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */ -#define IMAGE_SCN_LNK_OTHER 0x00000100 /* reserved */ -#define IMAGE_SCN_LNK_INFO 0x00000200 /* .drectve comments */ -#define IMAGE_SCN_RESERVED_4 0x00000400 +#define IMAGE_SCN_LNK_OTHER 0x00000100 /* .o only - other type than code, data or info */ +#define IMAGE_SCN_LNK_INFO 0x00000200 /* .o only - .drectve comments */ +#define IMAGE_SCN_LNK_OVERLAY 0x00000400 /* section contains overlay */ #define IMAGE_SCN_LNK_REMOVE 0x00000800 /* .o only - scn to be rm'd*/ #define IMAGE_SCN_LNK_COMDAT 0x00001000 /* .o only - COMDAT data */ -#define IMAGE_SCN_RESERVED_5 0x00002000 /* spec omits this */ -#define IMAGE_SCN_RESERVED_6 0x00004000 /* spec omits this */ -#define IMAGE_SCN_GPREL 0x00008000 /* global pointer referenced data */ -/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */ -#define IMAGE_SCN_MEM_PURGEABLE 0x00010000 /* reserved for "future" use */ -#define IMAGE_SCN_16BIT 0x00020000 /* reserved for "future" use */ -#define IMAGE_SCN_LOCKED 0x00040000 /* reserved for "future" use */ -#define IMAGE_SCN_PRELOAD 0x00080000 /* reserved for "future" use */ +#define IMAGE_SCN_RESERVED_13 0x00002000 /* spec omits this */ +#define IMAGE_SCN_MEM_PROTECTED 0x00004000 /* section is memory protected (for M68K) */ +#define IMAGE_SCN_NO_DEFER_SPEC_EXC 0x00004000 /* reset speculative exceptions handling bits in the TLB entries (for non-M68K) */ +#define IMAGE_SCN_MEM_FARDATA 0x00008000 /* section uses FAR_EXTERNAL relocations (for M68K) */ +#define IMAGE_SCN_GPREL 0x00008000 /* global pointer referenced data (for non-M68K) */ +#define IMAGE_SCN_MEM_SYSHEAP 0x00010000 /* use system heap (for M68K) */ +#define IMAGE_SCN_MEM_PURGEABLE 0x00020000 /* section can be released from RAM (for M68K) */ +#define IMAGE_SCN_MEM_16BIT 0x00020000 /* section is 16-bit (for non-M68K where it makes sense: I386, THUMB, MIPS16, MIPSFPU16, ...) */ +#define IMAGE_SCN_MEM_LOCKED 0x00040000 /* prevent the section from being moved (for M68K and .o I386) */ +#define IMAGE_SCN_MEM_PRELOAD 0x00080000 /* section is preload to RAM (for M68K and .o I386) */ /* and here they just stuck a 1-byte integer in the middle of a bitfield */ -#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 /* it does what it says on the box */ +#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 /* .o only - it does what it says on the box */ #define IMAGE_SCN_ALIGN_2BYTES 0x00200000 #define IMAGE_SCN_ALIGN_4BYTES 0x00300000 #define IMAGE_SCN_ALIGN_8BYTES 0x00400000 @@ -159,7 +206,9 @@ #define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000 #define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000 #define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000 -#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */ +#define IMAGE_SCN_ALIGN_RESERVED 0x00f00000 +#define IMAGE_SCN_ALIGN_MASK 0x00f00000 +#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* .o only - extended relocations */ #define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */ #define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */ #define IMAGE_SCN_MEM_NOT_PAGED 0x08000000 /* not pageable */ @@ -168,8 +217,28 @@ #define IMAGE_SCN_MEM_READ 0x40000000 /* readable */ #define IMAGE_SCN_MEM_WRITE 0x80000000 /* writeable */ -#define IMAGE_DEBUG_TYPE_CODEVIEW 2 -#define IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS 20 +#define IMAGE_DEBUG_TYPE_UNKNOWN 0 /* Unknown value, ignored by all tools */ +#define IMAGE_DEBUG_TYPE_COFF 1 /* COFF debugging information */ +#define IMAGE_DEBUG_TYPE_CODEVIEW 2 /* CodeView debugging information or Visual C++ Program Database debugging information */ +#define IMAGE_DEBUG_TYPE_FPO 3 /* Frame pointer omission (FPO) information */ +#define IMAGE_DEBUG_TYPE_MISC 4 /* Location of DBG file with CodeView debugging information */ +#define IMAGE_DEBUG_TYPE_EXCEPTION 5 /* Exception information, copy of .pdata section */ +#define IMAGE_DEBUG_TYPE_FIXUP 6 /* Fixup information */ +#define IMAGE_DEBUG_TYPE_OMAP_TO_SRC 7 /* The mapping from an RVA in image to an RVA in source image */ +#define IMAGE_DEBUG_TYPE_OMAP_FROM_SRC 8 /* The mapping from an RVA in source image to an RVA in image */ +#define IMAGE_DEBUG_TYPE_BORLAND 9 /* Borland debugging information */ +#define IMAGE_DEBUG_TYPE_RESERVED10 10 /* Coldpath / Hotpatch debug information */ +#define IMAGE_DEBUG_TYPE_CLSID 11 /* CLSID */ +#define IMAGE_DEBUG_TYPE_VC_FEATURE 12 /* Visual C++ counts / statistics */ +#define IMAGE_DEBUG_TYPE_POGO 13 /* COFF group information, data for profile-guided optimization */ +#define IMAGE_DEBUG_TYPE_ILTCG 14 /* Incremental link-time code generation */ +#define IMAGE_DEBUG_TYPE_MPX 15 /* Intel Memory Protection Extensions */ +#define IMAGE_DEBUG_TYPE_REPRO 16 /* PE determinism or reproducibility */ +#define IMAGE_DEBUG_TYPE_EMBEDDED_PORTABLE_PDB 17 /* Embedded Portable PDB debugging information */ +#define IMAGE_DEBUG_TYPE_SPGO 18 /* Sample profile-guided optimization */ +#define IMAGE_DEBUG_TYPE_PDBCHECKSUM 19 /* PDB Checksum */ +#define IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS 20 /* Extended DLL characteristics bits */ +#define IMAGE_DEBUG_TYPE_PERFMAP 21 /* Location of associated Ready To Run PerfMap file */ #ifndef __ASSEMBLY__ @@ -235,7 +304,7 @@ struct pe32_opt_hdr { uint16_t image_minor; /* minor image version */ uint16_t subsys_major; /* major subsystem version */ uint16_t subsys_minor; /* minor subsystem version */ - uint32_t win32_version; /* reserved, must be 0 */ + uint32_t win32_version; /* win32 version reported at runtime */ uint32_t image_size; /* image size */ uint32_t header_size; /* header size rounded up to file_align */ @@ -246,7 +315,7 @@ struct pe32_opt_hdr { uint32_t stack_size; /* amt of stack required */ uint32_t heap_size_req; /* amt of heap requested */ uint32_t heap_size; /* amt of heap required */ - uint32_t loader_flags; /* reserved, must be 0 */ + uint32_t loader_flags; /* loader flags */ uint32_t data_dirs; /* number of data dir entries */ }; @@ -269,7 +338,7 @@ struct pe32plus_opt_hdr { uint16_t image_minor; /* minor image version */ uint16_t subsys_major; /* major subsystem version */ uint16_t subsys_minor; /* minor subsystem version */ - uint32_t win32_version; /* reserved, must be 0 */ + uint32_t win32_version; /* win32 version reported at runtime */ uint32_t image_size; /* image size */ uint32_t header_size; /* header size rounded up to file_align */ @@ -280,7 +349,7 @@ struct pe32plus_opt_hdr { uint64_t stack_size; /* amt of stack required */ uint64_t heap_size_req; /* amt of heap requested */ uint64_t heap_size; /* amt of heap required */ - uint32_t loader_flags; /* reserved, must be 0 */ + uint32_t loader_flags; /* loader flags */ uint32_t data_dirs; /* number of data dir entries */ }; @@ -301,10 +370,10 @@ struct data_directory { struct data_dirent global_ptr; /* global pointer reg. Size=0 */ struct data_dirent tls; /* .tls */ struct data_dirent load_config; /* load configuration structure */ - struct data_dirent bound_imports; /* no idea */ + struct data_dirent bound_imports; /* bound import table */ struct data_dirent import_addrs; /* import address table */ struct data_dirent delay_imports; /* delay-load import table */ - struct data_dirent clr_runtime_hdr; /* .cor (object only) */ + struct data_dirent clr_runtime_hdr; /* .cor (clr/.net executables) */ struct data_dirent reserved; }; -- cgit v1.2.3 From e341f9c3c8412e57fe0042a33a2640245ecdf619 Mon Sep 17 00:00:00 2001 From: Joshua Hahn Date: Mon, 5 May 2025 11:23:28 -0700 Subject: mm/mempolicy: Weighted Interleave Auto-tuning On machines with multiple memory nodes, interleaving page allocations across nodes allows for better utilization of each node's bandwidth. Previous work by Gregory Price [1] introduced weighted interleave, which allowed for pages to be allocated across nodes according to user-set ratios. Ideally, these weights should be proportional to their bandwidth, so that under bandwidth pressure, each node uses its maximal efficient bandwidth and prevents latency from increasing exponentially. Previously, weighted interleave's default weights were just 1s -- which would be equivalent to the (unweighted) interleave mempolicy, which goes through the nodes in a round-robin fashion, ignoring bandwidth information. This patch has two main goals: First, it makes weighted interleave easier to use for users who wish to relieve bandwidth pressure when using nodes with varying bandwidth (CXL). By providing a set of "real" default weights that just work out of the box, users who might not have the capability (or wish to) perform experimentation to find the most optimal weights for their system can still take advantage of bandwidth-informed weighted interleave. Second, it allows for weighted interleave to dynamically adjust to hotplugged memory with new bandwidth information. Instead of manually updating node weights every time new bandwidth information is reported or taken off, weighted interleave adjusts and provides a new set of default weights for weighted interleave to use when there is a change in bandwidth information. To meet these goals, this patch introduces an auto-configuration mode for the interleave weights that provides a reasonable set of default weights, calculated using bandwidth data reported by the system. In auto mode, weights are dynamically adjusted based on whatever the current bandwidth information reports (and responds to hotplug events). This patch still supports users manually writing weights into the nodeN sysfs interface by entering into manual mode. When a user enters manual mode, the system stops dynamically updating any of the node weights, even during hotplug events that shift the optimal weight distribution. A new sysfs interface "auto" is introduced, which allows users to switch between the auto (writing 1 or Y) and manual (writing 0 or N) modes. The system also automatically enters manual mode when a nodeN interface is manually written to. There is one functional change that this patch makes to the existing weighted_interleave ABI: previously, writing 0 directly to a nodeN interface was said to reset the weight to the system default. Before this patch, the default for all weights were 1, which meant that writing 0 and 1 were functionally equivalent. With this patch, writing 0 is invalid. Link: https://lkml.kernel.org/r/20250520141236.2987309-1-joshua.hahnjy@gmail.com [joshua.hahnjy@gmail.com: wordsmithing changes, simplification, fixes] Link: https://lkml.kernel.org/r/20250511025840.2410154-1-joshua.hahnjy@gmail.com [joshua.hahnjy@gmail.com: remove auto_kobj_attr field from struct sysfs_wi_group] Link: https://lkml.kernel.org/r/20250512142511.3959833-1-joshua.hahnjy@gmail.com https://lore.kernel.org/linux-mm/20240202170238.90004-1-gregory.price@memverge.com/ [1] Link: https://lkml.kernel.org/r/20250505182328.4148265-1-joshua.hahnjy@gmail.com Co-developed-by: Gregory Price Signed-off-by: Gregory Price Signed-off-by: Joshua Hahn Suggested-by: Yunjeong Mun Suggested-by: Oscar Salvador Suggested-by: Ying Huang Suggested-by: Harry Yoo Reviewed-by: Harry Yoo Reviewed-by: Huang Ying Reviewed-by: Honggyu Kim Cc: Dan Williams Cc: Dave Jiang Cc: Greg Kroah-Hartman Cc: Joanthan Cameron Cc: Johannes Weiner Cc: Len Brown Signed-off-by: Andrew Morton --- include/linux/mempolicy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index ce9885e0178a..0fe96f3ab3ef 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -178,6 +179,9 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol) extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone); +extern int mempolicy_set_node_perf(unsigned int node, + struct access_coordinate *coords); + #else struct mempolicy {}; -- cgit v1.2.3 From bf454ec31add6790f6cdc88328e38901fcbbade6 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Fri, 2 May 2025 09:12:35 +0800 Subject: kexec_file: allow to place kexec_buf randomly Patch series "Support kdump with LUKS encryption by reusing LUKS volume keys", v9. LUKS is the standard for Linux disk encryption, widely adopted by users, and in some cases, such as Confidential VMs, it is a requirement. With kdump enabled, when the first kernel crashes, the system can boot into the kdump/crash kernel to dump the memory image (i.e., /proc/vmcore) to a specified target. However, there are two challenges when dumping vmcore to a LUKS-encrypted device: - Kdump kernel may not be able to decrypt the LUKS partition. For some machines, a system administrator may not have a chance to enter the password to decrypt the device in kdump initramfs after the 1st kernel crashes; For cloud confidential VMs, depending on the policy the kdump kernel may not be able to unseal the keys with TPM and the console virtual keyboard is untrusted. - LUKS2 by default use the memory-hard Argon2 key derivation function which is quite memory-consuming compared to the limited memory reserved for kdump. Take Fedora example, by default, only 256M is reserved for systems having memory between 4G-64G. With LUKS enabled, ~1300M needs to be reserved for kdump. Note if the memory reserved for kdump can't be used by 1st kernel i.e. an user sees ~1300M memory missing in the 1st kernel. Besides users (at least for Fedora) usually expect kdump to work out of the box i.e. no manual password input or custom crashkernel value is needed. And it doesn't make sense to derivate the keys again in kdump kernel which seems to be redundant work. This patchset addresses the above issues by making the LUKS volume keys persistent for kdump kernel with the help of cryptsetup's new APIs (--link-vk-to-keyring/--volume-key-keyring). Here is the life cycle of the kdump copies of LUKS volume keys, 1. After the 1st kernel loads the initramfs during boot, systemd use an user-input passphrase to de-crypt the LUKS volume keys or TPM-sealed key and then save the volume keys to specified keyring (using the --link-vk-to-keyring API) and the key will expire within specified time. 2. A user space tool (kdump initramfs loader like kdump-utils) create key items inside /sys/kernel/config/crash_dm_crypt_keys to inform the 1st kernel which keys are needed. 3. When the kdump initramfs is loaded by the kexec_file_load syscall, the 1st kernel will iterate created key items, save the keys to kdump reserved memory. 4. When the 1st kernel crashes and the kdump initramfs is booted, the kdump initramfs asks the kdump kernel to create a user key using the key stored in kdump reserved memory by writing yes to /sys/kernel/crash_dm_crypt_keys/restore. Then the LUKS encrypted device is unlocked with libcryptsetup's --volume-key-keyring API. 5. The system gets rebooted to the 1st kernel after dumping vmcore to the LUKS encrypted device is finished After libcryptsetup saving the LUKS volume keys to specified keyring, whoever takes this should be responsible for the safety of these copies of keys. The keys will be saved in the memory area exclusively reserved for kdump where even the 1st kernel has no direct access. And further more, two additional protections are added, - save the copy randomly in kdump reserved memory as suggested by Jan - clear the _PAGE_PRESENT flag of the page that stores the copy as suggested by Pingfan This patchset only supports x86. There will be patches to support other architectures once this patch set gets merged. This patch (of 9): Currently, kexec_buf is placed in order which means for the same machine, the info in the kexec_buf is always located at the same position each time the machine is booted. This may cause a risk for sensitive information like LUKS volume key. Now struct kexec_buf has a new field random which indicates it's supposed to be placed in a random position. Note this feature is enabled only when CONFIG_CRASH_DUMP is enabled. So it only takes effect for kdump and won't impact kexec reboot. Link: https://lkml.kernel.org/r/20250502011246.99238-1-coxu@redhat.com Link: https://lkml.kernel.org/r/20250502011246.99238-2-coxu@redhat.com Signed-off-by: Coiby Xu Suggested-by: Jan Pazdziora Acked-by: Baoquan He Cc: "Daniel P. Berrange" Cc: Dave Hansen Cc: Dave Young Cc: Liu Pingfan Cc: Milan Broz Cc: Ondrej Kozina Cc: Vitaly Kuznetsov Signed-off-by: Andrew Morton --- include/linux/kexec.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c8971861521a..1871eaa95432 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -25,6 +25,10 @@ extern note_buf_t __percpu *crash_notes; +#ifdef CONFIG_CRASH_DUMP +#include +#endif + #ifdef CONFIG_KEXEC_CORE #include #include @@ -169,6 +173,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image); * @buf_min: The buffer can't be placed below this address. * @buf_max: The buffer can't be placed above this address. * @top_down: Allocate from top of memory. + * @random: Place the buffer at a random position. */ struct kexec_buf { struct kimage *image; @@ -180,8 +185,33 @@ struct kexec_buf { unsigned long buf_min; unsigned long buf_max; bool top_down; +#ifdef CONFIG_CRASH_DUMP + bool random; +#endif }; + +#ifdef CONFIG_CRASH_DUMP +static inline void kexec_random_range_start(unsigned long start, + unsigned long end, + struct kexec_buf *kbuf, + unsigned long *temp_start) +{ + unsigned short i; + + if (kbuf->random) { + get_random_bytes(&i, sizeof(unsigned short)); + *temp_start = start + (end - start) / USHRT_MAX * i; + } +} +#else +static inline void kexec_random_range_start(unsigned long start, + unsigned long end, + struct kexec_buf *kbuf, + unsigned long *temp_start) +{} +#endif + int kexec_load_purgatory(struct kimage *image, struct kexec_buf *kbuf); int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, void *buf, unsigned int size, -- cgit v1.2.3 From 479e58549b0fa7e80f1e0b9e69e0a2a8e6711132 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Fri, 2 May 2025 09:12:37 +0800 Subject: crash_dump: store dm crypt keys in kdump reserved memory When the kdump kernel image and initrd are loaded, the dm crypts keys will be read from keyring and then stored in kdump reserved memory. Assume a key won't exceed 256 bytes thus MAX_KEY_SIZE=256 according to "cryptsetup benchmark". Link: https://lkml.kernel.org/r/20250502011246.99238-4-coxu@redhat.com Signed-off-by: Coiby Xu Acked-by: Baoquan He Cc: "Daniel P. Berrange" Cc: Dave Hansen Cc: Dave Young Cc: Jan Pazdziora Cc: Liu Pingfan Cc: Milan Broz Cc: Ondrej Kozina Cc: Vitaly Kuznetsov Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 6 +++++- include/linux/kexec.h | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 44305336314e..2e6782239034 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -34,7 +34,11 @@ static inline void arch_kexec_protect_crashkres(void) { } static inline void arch_kexec_unprotect_crashkres(void) { } #endif - +#ifdef CONFIG_CRASH_DM_CRYPT +int crash_load_dm_crypt_keys(struct kimage *image); +#else +static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; } +#endif #ifndef arch_crash_handle_hotplug_event static inline void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { } diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 1871eaa95432..6e688c5d8e4d 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -405,6 +405,10 @@ struct kimage { void *elf_headers; unsigned long elf_headers_sz; unsigned long elf_load_addr; + + /* dm crypt keys buffer */ + unsigned long dm_crypt_keys_addr; + unsigned long dm_crypt_keys_sz; }; /* kexec interface functions */ -- cgit v1.2.3 From 62f17d9df6924cf805de5ae970470615c1c8d9f2 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Fri, 2 May 2025 09:12:39 +0800 Subject: crash_dump: retrieve dm crypt keys in kdump kernel Crash kernel will retrieve the dm crypt keys based on the dmcryptkeys command line parameter. When user space writes the key description to /sys/kernel/config/crash_dm_crypt_key/restore, the crash kernel will save the encryption keys to the user keyring. Then user space e.g. cryptsetup's --volume-key-keyring API can use it to unlock the encrypted device. Link: https://lkml.kernel.org/r/20250502011246.99238-6-coxu@redhat.com Signed-off-by: Coiby Xu Acked-by: Baoquan He Cc: "Daniel P. Berrange" Cc: Dave Hansen Cc: Dave Young Cc: Jan Pazdziora Cc: Liu Pingfan Cc: Milan Broz Cc: Ondrej Kozina Cc: Vitaly Kuznetsov Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 1 + include/linux/crash_dump.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 2e6782239034..d35726d6a415 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -36,6 +36,7 @@ static inline void arch_kexec_unprotect_crashkres(void) { } #ifdef CONFIG_CRASH_DM_CRYPT int crash_load_dm_crypt_keys(struct kimage *image); +ssize_t dm_crypt_keys_read(char *buf, size_t count, u64 *ppos); #else static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; } #endif diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 2f2555e6407c..dd6fc3b2133b 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -15,6 +15,8 @@ extern unsigned long long elfcorehdr_addr; extern unsigned long long elfcorehdr_size; +extern unsigned long long dm_crypt_keys_addr; + #ifdef CONFIG_CRASH_DUMP extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size); extern void elfcorehdr_free(unsigned long long addr); -- cgit v1.2.3 From f68b5d165c906d11948c78e5d4b55a3e0975bba4 Mon Sep 17 00:00:00 2001 From: Casey Connolly Date: Sat, 17 May 2025 23:32:38 +0100 Subject: mailmap: update and consolidate Casey Connolly's name and email I've used several email addresses and a previous name to contribute. Consolidate all of these to my primary email and update my name. Link: https://lkml.kernel.org/r/20250517223237.15647-2-casey.connolly@linaro.org Signed-off-by: Casey Connolly Cc: Krzysztof Kozlowski Signed-off-by: Andrew Morton --- include/soc/qcom/qcom-spmi-pmic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/qcom/qcom-spmi-pmic.h b/include/soc/qcom/qcom-spmi-pmic.h index a62d500a6fda..df3d3a0af98a 100644 --- a/include/soc/qcom/qcom-spmi-pmic.h +++ b/include/soc/qcom/qcom-spmi-pmic.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright (c) 2022 Linaro. All rights reserved. - * Author: Caleb Connolly + * Author: Casey Connolly */ #ifndef __QCOM_SPMI_PMIC_H__ -- cgit v1.2.3 From 4e7bca76e3fed58437dcd7f747d1c8d98507379e Mon Sep 17 00:00:00 2001 From: Hans Zhang Date: Sat, 17 May 2025 00:52:22 +0800 Subject: PCI/MSI: Use bool for MSI enable state tracking Convert pci_msi_enable and pci_msi_enabled() to use bool type for clarity. No functional changes, only code cleanup. Signed-off-by: Hans Zhang Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250516165223.125083-2-18255117159@163.com --- include/linux/pci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e96..f5e908a56498 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1669,7 +1669,7 @@ void pci_disable_msi(struct pci_dev *dev); int pci_msix_vec_count(struct pci_dev *dev); void pci_disable_msix(struct pci_dev *dev); void pci_restore_msi_state(struct pci_dev *dev); -int pci_msi_enabled(void); +bool pci_msi_enabled(void); int pci_enable_msi(struct pci_dev *dev); int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec); @@ -1702,7 +1702,7 @@ static inline void pci_disable_msi(struct pci_dev *dev) { } static inline int pci_msix_vec_count(struct pci_dev *dev) { return -ENOSYS; } static inline void pci_disable_msix(struct pci_dev *dev) { } static inline void pci_restore_msi_state(struct pci_dev *dev) { } -static inline int pci_msi_enabled(void) { return 0; } +static inline bool pci_msi_enabled(void) { return false; } static inline int pci_enable_msi(struct pci_dev *dev) { return -ENOSYS; } static inline int pci_enable_msix_range(struct pci_dev *dev, -- cgit v1.2.3 From 8b8762eeec59b959fbca60afffe21265bce67168 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 May 2025 09:19:05 -0700 Subject: tools: ynl-gen: add makefile deps for neigh Kory is reporting build issues after recent additions to YNL if the system headers are old. Link: https://lore.kernel.org/20250519164949.597d6e92@kmaincent-XPS-13-7390 Reported-by: Kory Maincent Fixes: 0939a418b3b0 ("tools: ynl: submsg: reverse parse / error reporting") Tested-by: Kory Maincent Link: https://patch.msgid.link/20250520161916.413298-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/neighbour.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 5e67a7eaf4a7..b851c36ad25d 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_NEIGHBOUR_H -#define __LINUX_NEIGHBOUR_H +#ifndef _UAPI__LINUX_NEIGHBOUR_H +#define _UAPI__LINUX_NEIGHBOUR_H #include #include -- cgit v1.2.3 From a5bd029c733b8ae790d5873e2afeb88b58e3a151 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:04 -0700 Subject: net: add skb_crc32c() Add skb_crc32c(), which calculates the CRC32C of a sk_buff. It will replace __skb_checksum(), which unnecessarily supports arbitrary checksums. Compared to __skb_checksum(), skb_crc32c(): - Uses the correct type for CRC32C values (u32, not __wsum). - Does not require the caller to provide a skb_checksum_ops struct. - Is faster because it does not use indirect calls and does not use the very slow crc32c_combine(). According to commit 2817a336d4d5 ("net: skb_checksum: allow custom update/combine for walking skb") which added __skb_checksum(), the original motivation for the abstraction layer was to avoid code duplication for CRC32C and other checksums in the future. However: - No additional checksums showed up after CRC32C. __skb_checksum() is only used with the "regular" net checksum and CRC32C. - Indirect calls are expensive. Commit 2544af0344ba ("net: avoid indirect calls in L4 checksum calculation") worked around this using the INDIRECT_CALL_1 macro. But that only avoided the indirect call for the net checksum, and at the cost of an extra branch. - The checksums use different types (__wsum and u32), causing casts to be needed. - It made the checksums of fragments be combined (rather than chained) for both checksums, despite this being highly counterproductive for CRC32C due to how slow crc32c_combine() is. This can clearly be seen in commit 4c2f24549644 ("sctp: linearize early if it's not GSO") which tried to work around this performance bug. With a dedicated function for each checksum, we can instead just use the proper strategy for each checksum. As shown by the following tables, the new function skb_crc32c() is faster than __skb_checksum(), with the improvement varying greatly from 5% to 2500% depending on the case. The largest improvements come from fragmented packets, mainly due to eliminating the inefficient crc32c_combine(). But linear packets are improved too, especially shorter ones, mainly due to eliminating indirect calls. These benchmarks were done on AMD Zen 5. On that CPU, Linux uses IBRS instead of retpoline; an even greater improvement might be seen with retpoline: Linear sk_buffs Length in bytes __skb_checksum cycles skb_crc32c cycles =============== ===================== ================= 64 43 18 256 94 77 1420 204 161 16384 1735 1642 Nonlinear sk_buffs (even split between head and one fragment) Length in bytes __skb_checksum cycles skb_crc32c cycles =============== ===================== ================= 64 579 22 256 829 77 1420 1506 194 16384 4365 1682 Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-3-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c7397b17bb08..7ccc6356acac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4203,6 +4203,7 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum, const struct skb_checksum_ops *ops); __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); +u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc); static inline void * __must_check __skb_header_pointer(const struct sk_buff *skb, int offset, int len, -- cgit v1.2.3 From 99de9d4022e5004f95f425f798f0aa01e87949ff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:07 -0700 Subject: sctp: use skb_crc32c() instead of __skb_checksum() Make sctp_compute_cksum() just use the new function skb_crc32c(), instead of calling __skb_checksum() with a skb_checksum_ops struct that does CRC32C. This is faster and simpler. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-6-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/net/sctp/checksum.h | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 291465c25810..654d37ec0402 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -15,8 +15,6 @@ * Dinakaran Joseph * Jon Grimm * Sridhar Samudrala - * - * Rewritten to use libcrc32c by: * Vlad Yasevich */ @@ -25,39 +23,18 @@ #include #include -#include -#include - -static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum) -{ - return (__force __wsum)crc32c((__force __u32)sum, buff, len); -} - -static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, - int offset, int len) -{ - return (__force __wsum)crc32c_combine((__force __u32)csum, - (__force __u32)csum2, len); -} - -static const struct skb_checksum_ops sctp_csum_ops = { - .update = sctp_csum_update, - .combine = sctp_csum_combine, -}; static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { struct sctphdr *sh = (struct sctphdr *)(skb->data + offset); __le32 old = sh->checksum; - __wsum new; + u32 new; sh->checksum = 0; - new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, - &sctp_csum_ops); + new = ~skb_crc32c(skb, offset, skb->len - offset, ~0); sh->checksum = old; - - return cpu_to_le32((__force __u32)new); + return cpu_to_le32(new); } #endif /* __sctp_checksum_h__ */ -- cgit v1.2.3 From 70c96c7cb9f035d5b960021f2450afa6240e66b4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:08 -0700 Subject: net: fold __skb_checksum() into skb_checksum() Now that the only remaining caller of __skb_checksum() is skb_checksum(), fold __skb_checksum() into skb_checksum(). This makes struct skb_checksum_ops unnecessary, so remove that too and simply do the "regular" net checksum. It also makes the wrapper functions csum_partial_ext() and csum_block_add_ext() unnecessary, so remove those too and just use the underlying functions. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-7-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 9 --------- include/net/checksum.h | 12 ------------ 2 files changed, 21 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7ccc6356acac..018c07230513 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4192,15 +4192,6 @@ static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; } -struct skb_checksum_ops { - __wsum (*update)(const void *mem, int len, __wsum wsum); - __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len); -}; - -extern const struct skb_checksum_ops *crc32c_csum_stub __read_mostly; - -__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, - __wsum csum, const struct skb_checksum_ops *ops); __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc); diff --git a/include/net/checksum.h b/include/net/checksum.h index 243f972267b8..e57986b173f8 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -98,12 +98,6 @@ csum_block_add(__wsum csum, __wsum csum2, int offset) return csum_add(csum, csum_shift(csum2, offset)); } -static __always_inline __wsum -csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) -{ - return csum_block_add(csum, csum2, offset); -} - static __always_inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { @@ -115,12 +109,6 @@ static __always_inline __wsum csum_unfold(__sum16 n) return (__force __wsum)n; } -static __always_inline -__wsum csum_partial_ext(const void *buff, int len, __wsum sum) -{ - return csum_partial(buff, len, sum); -} - #define CSUM_MANGLED_0 ((__force __sum16)0xffff) static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) -- cgit v1.2.3 From b82f72292ab4c65250bd734281464a6ab1ff4133 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:09 -0700 Subject: lib/crc32: remove unused support for CRC32C combination crc32c_combine() and crc32c_shift() are no longer used (except by the KUnit test that tests them), and their current implementation is very slow. Remove them. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-8-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/crc32.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include') diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 69c2e8bb3782..7f7d0be8a0ac 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -76,29 +76,6 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) return crc32_le_shift(crc1, len2) ^ crc2; } -u32 crc32c_shift(u32 crc, size_t len); - -/** - * crc32c_combine - Combine two crc32c check values into one. For two sequences - * of bytes, seq1 and seq2 with lengths len1 and len2, crc32c() - * check values were calculated for each, crc1 and crc2. - * - * @crc1: crc32c of the first block - * @crc2: crc32c of the second block - * @len2: length of the second block - * - * Return: The crc32c() check value of seq1 and seq2 concatenated, requiring - * only crc1, crc2, and len2. Note: If seq_full denotes the concatenated - * memory area of seq1 with seq2, and crc_full the crc32c() value of - * seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when - * crc_full was seeded with the same initializer as crc1, and crc2 seed - * was 0. See also crc_combine_test(). - */ -static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2) -{ - return crc32c_shift(crc1, len2) ^ crc2; -} - #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) /* -- cgit v1.2.3 From ea6342d98928e243f2024fb97a9b4d42ee55dfba Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:10 -0700 Subject: net: add skb_copy_and_crc32c_datagram_iter() Since skb_copy_and_hash_datagram_iter() is used only with CRC32C, the crypto_ahash abstraction provides no value. Add skb_copy_and_crc32c_datagram_iter() which just calls crc32c() directly. This is faster and simpler. It also doesn't have the weird dependency issue where skb_copy_and_hash_datagram_iter() depends on CONFIG_CRYPTO_HASH=y without that being expressed explicitly in the kconfig (presumably because it was too heavyweight for NET to select). The new function is conditional on the hidden boolean symbol NET_CRC32C, which selects CRC32. So it gets compiled only when something that actually needs CRC32C packet checksums is enabled, it has no implicit dependency, and it doesn't depend on the heavyweight crypto layer. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-9-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 018c07230513..510adf63c211 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4137,6 +4137,8 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, struct ahash_request *hash); +int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); -- cgit v1.2.3 From c93f75b2d755c35b596084ddd3feb3528284a53f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:12 -0700 Subject: net: remove skb_copy_and_hash_datagram_iter() Now that skb_copy_and_hash_datagram_iter() is no longer used, remove it. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-11-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 510adf63c211..5520524c93bf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -274,7 +274,6 @@ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) -struct ahash_request; struct net_device; struct scatterlist; struct pipe_inode_info; @@ -4134,9 +4133,6 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, } int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, struct msghdr *msg); -int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, - struct iov_iter *to, int len, - struct ahash_request *hash); int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, -- cgit v1.2.3 From 31afd6bc55cc0093c3e5b0a368319e423d4de8ea Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Sat, 17 May 2025 22:13:45 +0200 Subject: net: phy: pass PHY driver to .match_phy_device OP Pass PHY driver pointer to .match_phy_device OP in addition to phydev. Having access to the PHY driver struct might be useful to check the PHY ID of the driver is being matched for in case the PHY ID scanned in the phydev is not consistent. A scenario for this is a PHY that change PHY ID after a firmware is loaded, in such case, the PHY ID stored in PHY device struct is not valid anymore and PHY will manually scan the ID in the match_phy_device function. Having the PHY driver info is also useful for those PHY driver that implement multiple simple .match_phy_device OP to match specific MMD PHY ID. With this extra info if the parsing logic is the same, the matching function can be generalized by using the phy_id in the PHY driver instead of hardcoding. Rust wrapper callback is updated to align to the new match_phy_device arguments. Suggested-by: Russell King (Oracle) Reviewed-by: Russell King (Oracle) Signed-off-by: Christian Marangi Reviewed-by: Benno Lossin # for Rust Reviewed-by: FUJITA Tomonori Link: https://patch.msgid.link/20250517201353.5137-2-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 92a88b5ce356..10e66d45a8e8 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -990,7 +990,8 @@ struct phy_driver { * driver for the given phydev. If NULL, matching is based on * phy_id and phy_id_mask. */ - int (*match_phy_device)(struct phy_device *phydev); + int (*match_phy_device)(struct phy_device *phydev, + const struct phy_driver *phydrv); /** * @set_wol: Some devices (e.g. qnap TS-119P II) require PHY -- cgit v1.2.3 From d6c45707ac84c2d9f274ece1cea4dddb97996bde Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Sat, 17 May 2025 22:13:48 +0200 Subject: net: phy: introduce genphy_match_phy_device() Introduce new API, genphy_match_phy_device(), to provide a way to check to match a PHY driver for a PHY device based on the info stored in the PHY device struct. The function generalize the logic used in phy_bus_match() to check the PHY ID whether if C45 or C22 ID should be used for matching. This is useful for custom .match_phy_device function that wants to use the generic logic under some condition. (example a PHY is already setup and provide the correct PHY ID) Reviewed-by: Russell King (Oracle) Signed-off-by: Christian Marangi Link: https://patch.msgid.link/20250517201353.5137-5-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 10e66d45a8e8..32b9da274115 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1868,6 +1868,9 @@ char *phy_attached_info_irq(struct phy_device *phydev) __malloc; void phy_attached_info(struct phy_device *phydev); +int genphy_match_phy_device(struct phy_device *phydev, + const struct phy_driver *phydrv); + /* Clause 22 PHY */ int genphy_read_abilities(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); -- cgit v1.2.3 From 4fe238513407d83f38bf5782e8bcdd7b8baeb85d Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Thu, 22 May 2025 09:43:47 +0800 Subject: ALSA: hda/tas2781: Move and unified the calibrated-data getting function for SPI and I2C into the tas2781_hda lib Calibration data getting function for SPI and I2C HDA drivers are almost same, which read the calibration data from UEFI. To put them into tas2781_hda lib for code cleanup is more reasonable than to still keep them in the codec driver. For tas2781 codec driver, there're two different sources for calibrated data, one is from bin file, generated in factory test, requested and read in codec driver side; the other is from user space during device bootup. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250522014347.1163-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- include/sound/tas2781.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index 8192a94d783a..979cdaa12522 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -121,11 +121,6 @@ enum dspbin_type { TASDEV_BETA, }; -enum device_catlog_id { - LENOVO = 0, - OTHERS -}; - struct bulk_reg_val { int reg; unsigned char val[4]; @@ -172,7 +167,6 @@ struct tasdevice_priv { struct regmap *regmap; struct device *dev; - enum device_catlog_id catlog_id; unsigned char cal_binaryname[TASDEVICE_MAX_CHANNELS][64]; unsigned char crc8_lkp_tbl[CRC8_TABLE_SIZE]; unsigned char coef_binaryname[64]; @@ -223,8 +217,6 @@ struct tasdevice_priv { int (*dev_bulk_read)(struct tasdevice_priv *tas_priv, unsigned short chn, unsigned int reg, unsigned char *p_data, unsigned int n_length); - int (*save_calibration)(struct tasdevice_priv *tas_priv); - void (*apply_calibration)(struct tasdevice_priv *tas_priv); }; int tasdevice_dev_read(struct tasdevice_priv *tas_priv, @@ -238,6 +230,4 @@ int tasdevice_dev_bulk_write( struct tasdevice_priv *tas_priv, unsigned short chn, unsigned int reg, unsigned char *p_data, unsigned int n_length); void tasdevice_remove(struct tasdevice_priv *tas_priv); -int tasdevice_save_calibration(struct tasdevice_priv *tas_priv); -void tasdevice_apply_calibration(struct tasdevice_priv *tas_priv); #endif /* __TAS2781_H__ */ -- cgit v1.2.3 From f4b18ff2c147d3b56384fcc8adb30bf733bf2300 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 21 May 2025 15:15:28 -0700 Subject: perf/uapi: Fix PERF_RECORD_SAMPLE comments in AAUX data for PERF_SAMPLE_AUX appears last. PERF_SAMPLE_CGROUP is missing from the comment. This makes the comment match that in the perf_event_open man page. Signed-off-by: Ian Rogers Signed-off-by: Ingo Molnar Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linux-perf-users@vger.kernel.org Link: https://lore.kernel.org/r/20250521221529.2547099-1-irogers@google.com --- include/uapi/linux/perf_event.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 5fc753c23734..b2722dae6f1e 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1035,10 +1035,11 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR - * { u64 size; - * char data[size]; } && PERF_SAMPLE_AUX + * { u64 cgroup;} && PERF_SAMPLE_CGROUP * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, -- cgit v1.2.3 From 44889ff67cee7b9ee2d305690ce7a5488b137a66 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 22 May 2025 09:51:22 +0200 Subject: perf/uapi: Clean up a bit When applying a recent commit to the header I noticed that we have accumulated quite a bit of historic noise in this header, so do a bit of spring cleaning: - Define bitfields in a vertically aligned fashion, like perf_event_mmap_page::capabilities already does. This makes it easier to see the distribution and sizing of bits within a word, at a glance. The following is much more readable: __u64 cap_bit0 : 1, cap_bit0_is_deprecated : 1, cap_user_rdpmc : 1, cap_user_time : 1, cap_user_time_zero : 1, cap_user_time_short : 1, cap_____res : 58; Than: __u64 cap_bit0:1, cap_bit0_is_deprecated:1, cap_user_rdpmc:1, cap_user_time:1, cap_user_time_zero:1, cap_user_time_short:1, cap_____res:58; So convert all bitfield definitions from the latter style to the former style. - Fix typos and grammar - Fix capitalization - Remove whitespace noise - Harmonize the definitions of various generations and groups of PERF_MEM_ ABI values. - Vertically align all definitions and assignments to the same column (48), as the first definition (enum perf_type_id), throughout the entire header. - And in general make the code and comments to be more in sync with each other and to be more readable overall. No change in functionality. Copy the changes over to tools/include/uapi/linux/perf_event.h. Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Ian Rogers Link: https://lore.kernel.org/r/20250521221529.2547099-1-irogers@google.com --- include/uapi/linux/perf_event.h | 652 ++++++++++++++++++++-------------------- 1 file changed, 331 insertions(+), 321 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b2722dae6f1e..78a362b80027 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -39,18 +39,21 @@ enum perf_type_id { /* * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA * AA: hardware event ID * EEEEEEEE: PMU type ID + * * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB * BB: hardware cache ID * CC: hardware cache op ID * DD: hardware cache op result ID * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + * + * If the PMU type ID is 0, PERF_TYPE_RAW will be applied. */ -#define PERF_PMU_TYPE_SHIFT 32 -#define PERF_HW_EVENT_MASK 0xffffffff +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff /* * Generalized performance event event_id types, used by the @@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id { /* * Special "software" events provided by the kernel, even if the hardware * does not support performance events. These events measure various - * physical and sw events of the kernel (and allow the profiling of them as + * physical and SW events of the kernel (and allow the profiling of them as * well): */ enum perf_sw_ids { @@ -167,8 +170,9 @@ enum perf_event_sample_format { }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) + /* - * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set. * * If the user does not pass priv level information via branch_sample_type, * the kernel uses the event's priv level. Branch and event priv levels do @@ -178,20 +182,20 @@ enum perf_event_sample_format { * of branches and therefore it supersedes all the other types. */ enum perf_branch_sample_type_shift { - PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ @@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift { }; enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, - PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, - PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, - PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, - PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, - PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, - PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, - PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, - PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, - PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, - PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, - PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, - PERF_SAMPLE_BRANCH_TYPE_SAVE = - 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, - PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, - PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; /* - * Common flow change classification + * Common control flow change classifications: */ enum { - PERF_BR_UNKNOWN = 0, /* unknown */ - PERF_BR_COND = 1, /* conditional */ - PERF_BR_UNCOND = 2, /* unconditional */ - PERF_BR_IND = 3, /* indirect */ - PERF_BR_CALL = 4, /* function call */ - PERF_BR_IND_CALL = 5, /* indirect function call */ - PERF_BR_RET = 6, /* function return */ - PERF_BR_SYSCALL = 7, /* syscall */ - PERF_BR_SYSRET = 8, /* syscall return */ - PERF_BR_COND_CALL = 9, /* conditional function call */ - PERF_BR_COND_RET = 10, /* conditional function return */ - PERF_BR_ERET = 11, /* exception return */ - PERF_BR_IRQ = 12, /* irq */ - PERF_BR_SERROR = 13, /* system error */ - PERF_BR_NO_TX = 14, /* not in transaction */ - PERF_BR_EXTEND_ABI = 15, /* extend ABI */ + PERF_BR_UNKNOWN = 0, /* Unknown */ + PERF_BR_COND = 1, /* Conditional */ + PERF_BR_UNCOND = 2, /* Unconditional */ + PERF_BR_IND = 3, /* Indirect */ + PERF_BR_CALL = 4, /* Function call */ + PERF_BR_IND_CALL = 5, /* Indirect function call */ + PERF_BR_RET = 6, /* Function return */ + PERF_BR_SYSCALL = 7, /* Syscall */ + PERF_BR_SYSRET = 8, /* Syscall return */ + PERF_BR_COND_CALL = 9, /* Conditional function call */ + PERF_BR_COND_RET = 10, /* Conditional function return */ + PERF_BR_ERET = 11, /* Exception return */ + PERF_BR_IRQ = 12, /* IRQ */ + PERF_BR_SERROR = 13, /* System error */ + PERF_BR_NO_TX = 14, /* Not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* Extend ABI */ PERF_BR_MAX, }; /* - * Common branch speculation outcome classification + * Common branch speculation outcome classifications: */ enum { - PERF_BR_SPEC_NA = 0, /* Not available */ - PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ - PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ - PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ PERF_BR_SPEC_MAX, }; enum { - PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ - PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ - PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ - PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ - PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ - PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ - PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ - PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ PERF_BR_NEW_MAX, }; enum { - PERF_BR_PRIV_UNKNOWN = 0, - PERF_BR_PRIV_USER = 1, - PERF_BR_PRIV_KERNEL = 2, - PERF_BR_PRIV_HV = 3, + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, }; -#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 -#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 -#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 -#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 -#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ @@ -310,9 +313,9 @@ enum { * Values to determine ABI of the registers dump. */ enum perf_sample_regs_abi { - PERF_SAMPLE_REGS_ABI_NONE = 0, - PERF_SAMPLE_REGS_ABI_32 = 1, - PERF_SAMPLE_REGS_ABI_64 = 2, + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, }; /* @@ -320,21 +323,21 @@ enum perf_sample_regs_abi { * abort events. Multiple bits can be set. */ enum { - PERF_TXN_ELISION = (1 << 0), /* From elision */ - PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ - PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ - PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ - PERF_TXN_RETRY = (1 << 4), /* Retry possible */ - PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ - PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ - PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ - PERF_TXN_MAX = (1 << 8), /* non-ABI */ + PERF_TXN_MAX = (1 << 8), /* non-ABI */ - /* bits 32..63 are reserved for the abort code */ + /* Bits 32..63 are reserved for the abort code */ - PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), - PERF_TXN_ABORT_SHIFT = 32, + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, }; /* @@ -369,24 +372,22 @@ enum perf_event_read_format { PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ -#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ -#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ - /* add: sample_stack_user */ -#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ -#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ -#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ -#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ -#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ +#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */ +#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */ + /* Add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ /* - * Hardware event_id to monitor via a performance monitoring event: - * - * @sample_max_stack: Max number of frame pointers in a callchain, - * should be < /proc/sys/kernel/perf_event_max_stack - * Max number of entries of branch stack - * should be < hardware limit + * 'struct perf_event_attr' contains various attributes that define + * a performance event - most of them hardware related configuration + * details, but also a lot of behavioral switches and values implemented + * by the kernel. */ struct perf_event_attr { @@ -396,7 +397,7 @@ struct perf_event_attr { __u32 type; /* - * Size of the attr structure, for fwd/bwd compat. + * Size of the attr structure, for forward/backwards compatibility. */ __u32 size; @@ -451,21 +452,21 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - write_backward : 1, /* Write ring buffer from end to beginning */ + write_backward : 1, /* write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - bpf_event : 1, /* include bpf events */ + bpf_event : 1, /* include BPF events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - build_id : 1, /* use build id in mmap2 events */ + build_id : 1, /* use build ID in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ __reserved_1 : 26; union { - __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_events; /* wake up every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; @@ -474,13 +475,13 @@ struct perf_event_attr { __u64 bp_addr; __u64 kprobe_func; /* for perf_kprobe */ __u64 uprobe_path; /* for perf_uprobe */ - __u64 config1; /* extension of config */ + __u64 config1; /* extension of config */ }; union { __u64 bp_len; - __u64 kprobe_addr; /* when kprobe_func == NULL */ + __u64 kprobe_addr; /* when kprobe_func == NULL */ __u64 probe_offset; /* for perf_[k,u]probe */ - __u64 config2; /* extension of config1 */ + __u64 config2; /* extension of config1 */ }; __u64 branch_sample_type; /* enum perf_branch_sample_type */ @@ -510,7 +511,16 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; + + /* + * Max number of frame pointers in a callchain, should be + * lower than /proc/sys/kernel/perf_event_max_stack. + * + * Max number of entries of branch stack should be lower + * than the hardware limit. + */ __u16 sample_max_stack; + __u16 __reserved_2; __u32 aux_sample_size; @@ -537,7 +547,7 @@ struct perf_event_attr { /* * Structure used by below PERF_EVENT_IOC_QUERY_BPF command - * to query bpf programs attached to the same perf tracepoint + * to query BPF programs attached to the same perf tracepoint * as the given perf event. */ struct perf_event_query_bpf { @@ -559,21 +569,21 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32) #define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) -#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, + PERF_IOC_FLAG_GROUP = 1U << 0, }; /* @@ -584,7 +594,7 @@ struct perf_event_mmap_page { __u32 compat_version; /* lowest version this is compat with */ /* - * Bits needed to read the hw events in user-space. + * Bits needed to read the HW events in user-space. * * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; @@ -622,7 +632,7 @@ struct perf_event_mmap_page { __u32 index; /* hardware event identifier */ __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ - __u64 time_running; /* time event on cpu */ + __u64 time_running; /* time event on CPU */ union { __u64 capabilities; struct { @@ -650,7 +660,7 @@ struct perf_event_mmap_page { /* * If cap_usr_time the below fields can be used to compute the time - * delta since time_enabled (in ns) using rdtsc or similar. + * delta since time_enabled (in ns) using RDTSC or similar. * * u64 quot, rem; * u64 delta; @@ -723,7 +733,7 @@ struct perf_event_mmap_page { * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data, after issueing + * written by user-space to reflect the last read data, after issuing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. * @@ -739,7 +749,7 @@ struct perf_event_mmap_page { /* * AUX area is defined by aux_{offset,size} fields that should be set - * by the userspace, so that + * by the user-space, so that * * aux_offset >= data_offset + data_size * @@ -813,7 +823,7 @@ struct perf_event_mmap_page { * Indicates that thread was preempted in TASK_RUNNING state. * * PERF_RECORD_MISC_MMAP_BUILD_ID: - * Indicates that mmap2 event carries build id data. + * Indicates that mmap2 event carries build ID data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) @@ -824,26 +834,26 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; + __u32 type; + __u16 misc; + __u16 size; }; struct perf_ns_link_info { - __u64 dev; - __u64 ino; + __u64 dev; + __u64 ino; }; enum { - NET_NS_INDEX = 0, - UTS_NS_INDEX = 1, - IPC_NS_INDEX = 2, - PID_NS_INDEX = 3, - USER_NS_INDEX = 4, - MNT_NS_INDEX = 5, - CGROUP_NS_INDEX = 6, - - NR_NAMESPACES, /* number of available namespaces */ + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ }; enum perf_event_type { @@ -859,11 +869,11 @@ enum perf_event_type { * optional fields being ignored. * * struct sample_id { - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 id; } && PERF_SAMPLE_IDENTIFIER * } && perf_event_attr::sample_id_all * @@ -874,7 +884,7 @@ enum perf_event_type { /* * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: + * correlate user-space IPs to code. They have the following structure: * * struct { * struct perf_event_header header; @@ -884,7 +894,7 @@ enum perf_event_type { * u64 len; * u64 pgoff; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP = 1, @@ -894,7 +904,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 id; * u64 lost; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_LOST = 2, @@ -905,7 +915,7 @@ enum perf_event_type { * * u32 pid, tid; * char comm[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_COMM = 3, @@ -916,7 +926,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_EXIT = 4, @@ -927,7 +937,7 @@ enum perf_event_type { * u64 time; * u64 id; * u64 stream_id; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_THROTTLE = 5, @@ -939,7 +949,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_FORK = 7, @@ -950,7 +960,7 @@ enum perf_event_type { * u32 pid, tid; * * struct read_format values; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_READ = 8, @@ -1005,12 +1015,12 @@ enum perf_event_type { * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * - * { u64 abi; # enum perf_sample_regs_abi - * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * - * { u64 size; - * char data[size]; - * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { union perf_sample_weight * { @@ -1071,7 +1081,7 @@ enum perf_event_type { * }; * u32 prot, flags; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP2 = 10, @@ -1080,12 +1090,12 @@ enum perf_event_type { * Records that new data landed in the AUX buffer part. * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u64 aux_offset; - * u64 aux_size; + * u64 aux_offset; + * u64 aux_size; * u64 flags; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_AUX = 11, @@ -1168,7 +1178,7 @@ enum perf_event_type { PERF_RECORD_KSYMBOL = 17, /* - * Record bpf events: + * Record BPF events: * enum perf_bpf_event_type { * PERF_BPF_EVENT_UNKNOWN = 0, * PERF_BPF_EVENT_PROG_LOAD = 1, @@ -1246,181 +1256,181 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) enum perf_bpf_event_type { - PERF_BPF_EVENT_UNKNOWN = 0, - PERF_BPF_EVENT_PROG_LOAD = 1, - PERF_BPF_EVENT_PROG_UNLOAD = 2, - PERF_BPF_EVENT_MAX, /* non-ABI */ + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 127 -#define PERF_MAX_CONTEXTS_PER_STACK 8 +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, - PERF_CONTEXT_MAX = (__u64)-4095, + PERF_CONTEXT_MAX = (__u64)-4095, }; /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */ #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ /* CoreSight PMU AUX buffer formats */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ -#define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#define PERF_FLAG_FD_OUTPUT (1UL << 1) -#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ #if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_op:5, /* type of opcode */ - mem_lvl:14, /* memory hierarchy level */ - mem_snoop:5, /* snoop mode */ - mem_lock:2, /* lock instr */ - mem_dtlb:7, /* tlb access */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_remote:1, /* remote */ - mem_snoopx:2, /* snoop mode, ext */ - mem_blk:3, /* access blocked */ - mem_hops:3, /* hop level */ - mem_rsvd:18; + __u64 mem_op : 5, /* Type of opcode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_snoop : 5, /* Snoop mode */ + mem_lock : 2, /* Lock instr */ + mem_dtlb : 7, /* TLB access */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_remote : 1, /* Remote */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_blk : 3, /* Access blocked */ + mem_hops : 3, /* Hop level */ + mem_rsvd : 18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:18, - mem_hops:3, /* hop level */ - mem_blk:3, /* access blocked */ - mem_snoopx:2, /* snoop mode, ext */ - mem_remote:1, /* remote */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_dtlb:7, /* tlb access */ - mem_lock:2, /* lock instr */ - mem_snoop:5, /* snoop mode */ - mem_lvl:14, /* memory hierarchy level */ - mem_op:5; /* type of opcode */ + __u64 mem_rsvd : 18, + mem_hops : 3, /* Hop level */ + mem_blk : 3, /* Access blocked */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_remote : 1, /* Remote */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_dtlb : 7, /* TLB access */ + mem_lock : 2, /* Lock instr */ + mem_snoop : 5, /* Snoop mode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_op : 5; /* Type of opcode */ }; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif -/* type of opcode (load/store/prefetch,code) */ -#define PERF_MEM_OP_NA 0x01 /* not available */ -#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ -#define PERF_MEM_OP_STORE 0x04 /* store instruction */ -#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ -#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ -#define PERF_MEM_OP_SHIFT 0 +/* Type of memory opcode: */ +#define PERF_MEM_OP_NA 0x0001 /* Not available */ +#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */ +#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */ +#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */ +#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */ +#define PERF_MEM_OP_SHIFT 0 /* - * PERF_MEM_LVL_* namespace being depricated to some extent in the + * The PERF_MEM_LVL_* namespace is being deprecated to some extent in * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. - * Supporting this namespace inorder to not break defined ABIs. + * We support this namespace in order to not break defined ABIs. * - * memory hierarchy (memory level, hit or miss) + * Memory hierarchy (memory level, hit or miss) */ -#define PERF_MEM_LVL_NA 0x01 /* not available */ -#define PERF_MEM_LVL_HIT 0x02 /* hit level */ -#define PERF_MEM_LVL_MISS 0x04 /* miss level */ -#define PERF_MEM_LVL_L1 0x08 /* L1 */ -#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ -#define PERF_MEM_LVL_L2 0x20 /* L2 */ -#define PERF_MEM_LVL_L3 0x40 /* L3 */ -#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ -#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ -#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ -#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ -#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ -#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ -#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ -#define PERF_MEM_LVL_SHIFT 5 - -#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ -#define PERF_MEM_REMOTE_SHIFT 37 - -#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ -#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ -#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ -#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ -/* 0x7 available */ -#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ -#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ -#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ -#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ -#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ - -#define PERF_MEM_LVLNUM_SHIFT 33 - -/* snoop mode */ -#define PERF_MEM_SNOOP_NA 0x01 /* not available */ -#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ -#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ -#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ -#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ -#define PERF_MEM_SNOOP_SHIFT 19 - -#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ -#define PERF_MEM_SNOOPX_SHIFT 38 - -/* locked instruction */ -#define PERF_MEM_LOCK_NA 0x01 /* not available */ -#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ -#define PERF_MEM_LOCK_SHIFT 24 +#define PERF_MEM_LVL_NA 0x0001 /* Not available */ +#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */ +#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */ +#define PERF_MEM_LVL_L1 0x0008 /* L1 */ +#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x0020 /* L2 */ +#define PERF_MEM_LVL_L3 0x0040 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ +#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ +/* 0x007 available */ +#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ +#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + +/* Snoop mode */ +#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */ +#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */ +#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */ +#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */ +#define PERF_MEM_SNOOPX_SHIFT 38 + +/* Locked instruction */ +#define PERF_MEM_LOCK_NA 0x0001 /* Not available */ +#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 /* TLB access */ -#define PERF_MEM_TLB_NA 0x01 /* not available */ -#define PERF_MEM_TLB_HIT 0x02 /* hit level */ -#define PERF_MEM_TLB_MISS 0x04 /* miss level */ -#define PERF_MEM_TLB_L1 0x08 /* L1 */ -#define PERF_MEM_TLB_L2 0x10 /* L2 */ -#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ -#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ -#define PERF_MEM_TLB_SHIFT 26 +#define PERF_MEM_TLB_NA 0x0001 /* Not available */ +#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */ +#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */ +#define PERF_MEM_TLB_L1 0x0008 /* L1 */ +#define PERF_MEM_TLB_L2 0x0010 /* L2 */ +#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 /* Access blocked */ -#define PERF_MEM_BLK_NA 0x01 /* not available */ -#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ -#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ -#define PERF_MEM_BLK_SHIFT 40 - -/* hop level */ -#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ +#define PERF_MEM_BLK_NA 0x0001 /* Not available */ +#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + +/* Hop level */ +#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */ +#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */ +#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */ +#define PERF_MEM_HOPS_3 0x0004 /* Remote board */ /* 5-7 available */ -#define PERF_MEM_HOPS_SHIFT 43 +#define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) /* - * single taken branch record layout: + * Layout of single taken branch records: * * from: source instruction (may not always be a branch insn) * to: branch target @@ -1439,37 +1449,37 @@ union perf_mem_data_src { struct perf_branch_entry { __u64 from; __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - in_tx:1, /* in transaction */ - abort:1, /* transaction abort */ - cycles:16, /* cycle count to last branch */ - type:4, /* branch type */ - spec:2, /* branch speculation info */ - new_type:4, /* additional branch type */ - priv:3, /* privilege level */ - reserved:31; + __u64 mispred : 1, /* target mispredicted */ + predicted : 1, /* target predicted */ + in_tx : 1, /* in transaction */ + abort : 1, /* transaction abort */ + cycles : 16, /* cycle count to last branch */ + type : 4, /* branch type */ + spec : 2, /* branch speculation info */ + new_type : 4, /* additional branch type */ + priv : 3, /* privilege level */ + reserved : 31; }; /* Size of used info bits in struct perf_branch_entry */ #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 union perf_sample_weight { - __u64 full; + __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) struct { - __u32 var1_dw; - __u16 var2_w; - __u16 var3_w; + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; }; #elif defined(__BIG_ENDIAN_BITFIELD) struct { - __u16 var3_w; - __u16 var2_w; - __u32 var1_dw; + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif }; -- cgit v1.2.3 From 18355307dc56c198365c6b6b359a4a24db013685 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Wed, 7 May 2025 15:19:13 +0300 Subject: i2c: atr: add static flag Some I2C ATRs do not support dynamic remapping, only static mapping of direct children. Mappings will only be added or removed as a result of devices being added or removed from a child bus. The ATR pool will have to be big enough to accommodate all devices expected to be added to the child buses. Add a new flag that prevents old mappings to be replaced or new mappings to be created in the alias finding code paths. That mens adding a flags parameter to i2c_atr_new() and an i2c_atr_flags enum. Signed-off-by: Cosmin Tanislav Reviewed-by: Luca Ceresoli Reviewed-by: Romain Gantois Signed-off-by: Wolfram Sang --- include/linux/i2c-atr.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h index 1c3a5bcd939f..5aaab1598084 100644 --- a/include/linux/i2c-atr.h +++ b/include/linux/i2c-atr.h @@ -18,6 +18,19 @@ struct device; struct fwnode_handle; struct i2c_atr; +/** + * enum i2c_atr_flags - Flags for an I2C ATR driver + * + * @I2C_ATR_F_STATIC: ATR does not support dynamic mapping, use static mapping. + * Mappings will only be added or removed as a result of + * devices being added or removed from a child bus. + * The ATR pool will have to be big enough to accomodate all + * devices expected to be added to the child buses. + */ +enum i2c_atr_flags { + I2C_ATR_F_STATIC = BIT(0), +}; + /** * struct i2c_atr_ops - Callbacks from ATR to the device driver. * @attach_addr: Notify the driver of a new device connected on a child @@ -65,6 +78,7 @@ struct i2c_atr_adap_desc { * @dev: The device acting as an ATR * @ops: Driver-specific callbacks * @max_adapters: Maximum number of child adapters + * @flags: Flags for ATR * * The new ATR helper is connected to the parent adapter but has no child * adapters. Call i2c_atr_add_adapter() to add some. @@ -74,7 +88,8 @@ struct i2c_atr_adap_desc { * Return: pointer to the new ATR helper object, or ERR_PTR */ struct i2c_atr *i2c_atr_new(struct i2c_adapter *parent, struct device *dev, - const struct i2c_atr_ops *ops, int max_adapters); + const struct i2c_atr_ops *ops, int max_adapters, + u32 flags); /** * i2c_atr_delete - Delete an I2C ATR helper. -- cgit v1.2.3 From 17a3a30e8e3df77d1d1f5bc705b903604a1eedc9 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Wed, 7 May 2025 15:19:15 +0300 Subject: i2c: atr: add passthrough flag Some I2C ATRs can have other I2C ATRs as children. The I2C messages of the child ATRs need to be forwarded as-is if the parent I2C ATR can only do static mapping. In the case of GMSL, the deserializer I2C ATR actually doesn't have I2C address remapping hardware capabilities, but it is able to select which GMSL link to talk to, allowing it to change the address of the serializer. The child ATRs need to have their alias pools defined in such a way to prevent overlapping addresses between them, but there's no way around this without orchestration between multiple ATR instances. To allow for this use-case, add a flag that allows unmapped addresses to be passed through, since they are already remapped by the child ATRs. There's no case where an address that has not been remapped by the child ATR will hit the parent ATR. Signed-off-by: Cosmin Tanislav Reviewed-by: Luca Ceresoli Reviewed-by: Romain Gantois Signed-off-by: Wolfram Sang --- include/linux/i2c-atr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h index 5aaab1598084..2bb54dc87c8e 100644 --- a/include/linux/i2c-atr.h +++ b/include/linux/i2c-atr.h @@ -26,9 +26,11 @@ struct i2c_atr; * devices being added or removed from a child bus. * The ATR pool will have to be big enough to accomodate all * devices expected to be added to the child buses. + * @I2C_ATR_F_PASSTHROUGH: Allow unmapped incoming addresses to pass through */ enum i2c_atr_flags { I2C_ATR_F_STATIC = BIT(0), + I2C_ATR_F_PASSTHROUGH = BIT(1), }; /** -- cgit v1.2.3 From 6adf48a3aa316ce360e02dd10222e96da9a0eff5 Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Thu, 15 May 2025 16:16:30 +0200 Subject: mfd: bcm590xx: Add support for multiple device types + BCM59054 compatible The BCM59054 is another chip from the BCM590xx line of PMUs, commonly used on devices with the BCM21664/BCM23550 chipsets. Prepare the BCM590xx driver for supporting other devices by adding the PMUID register values for supported chip types and store them in the MFD data struct as "pmu_id". (These will be checked against the actual PMUID register values in a later commit.) Then, add a DT compatible for the BCM59054, and provide the PMU ID as OF match data. Signed-off-by: Artur Weber Reviewed-by: Stanislav Jakubek Link: https://lore.kernel.org/r/20250515-bcm59054-v9-3-14ba0ea2ea5b@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/bcm590xx.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h index 6b8791da6119..76c30e629333 100644 --- a/include/linux/mfd/bcm590xx.h +++ b/include/linux/mfd/bcm590xx.h @@ -13,6 +13,10 @@ #include #include +/* PMU ID register values; also used as device type */ +#define BCM590XX_PMUID_BCM59054 0x54 +#define BCM590XX_PMUID_BCM59056 0x56 + /* max register address */ #define BCM590XX_MAX_REGISTER_PRI 0xe7 #define BCM590XX_MAX_REGISTER_SEC 0xf0 @@ -24,6 +28,9 @@ struct bcm590xx { struct regmap *regmap_pri; struct regmap *regmap_sec; unsigned int id; + + /* PMU ID value; also used as device type */ + u8 pmu_id; }; #endif /* __LINUX_MFD_BCM590XX_H */ -- cgit v1.2.3 From d310cdbb4ee6285f374d4dfc32173c35f8a2273e Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Thu, 15 May 2025 16:16:31 +0200 Subject: mfd: bcm590xx: Add PMU ID/revision parsing function The BCM590xx PMUs have two I2C registers for reading the PMU ID and revision. The revision is useful for subdevice drivers, since different revisions may have slight differences in behavior (for example - BCM59054 has different regulator configurations for revision A0 and A1). Check the PMU ID register and make sure it matches the DT compatible. Fetch the digital and analog revision from the PMUREV register so that it can be used in subdevice drivers. Also add some known revision values to bcm590xx.h, for convenience when writing subdevice drivers. Signed-off-by: Artur Weber Reviewed-by: Stanislav Jakubek Link: https://lore.kernel.org/r/20250515-bcm59054-v9-4-14ba0ea2ea5b@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/bcm590xx.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h index 76c30e629333..a54c50b2d2c8 100644 --- a/include/linux/mfd/bcm590xx.h +++ b/include/linux/mfd/bcm590xx.h @@ -17,6 +17,16 @@ #define BCM590XX_PMUID_BCM59054 0x54 #define BCM590XX_PMUID_BCM59056 0x56 +/* Known chip revision IDs */ +#define BCM59054_REV_DIGITAL_A1 1 +#define BCM59054_REV_ANALOG_A1 2 + +#define BCM59056_REV_DIGITAL_A0 1 +#define BCM59056_REV_ANALOG_A0 1 + +#define BCM59056_REV_DIGITAL_B0 2 +#define BCM59056_REV_ANALOG_B0 2 + /* max register address */ #define BCM590XX_MAX_REGISTER_PRI 0xe7 #define BCM590XX_MAX_REGISTER_SEC 0xf0 @@ -31,6 +41,10 @@ struct bcm590xx { /* PMU ID value; also used as device type */ u8 pmu_id; + + /* Chip revision, read from PMUREV reg */ + u8 rev_digital; + u8 rev_analog; }; #endif /* __LINUX_MFD_BCM590XX_H */ -- cgit v1.2.3 From 75dc12b4450269821fca4c8634f5185d28cf2117 Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Thu, 15 May 2025 16:16:33 +0200 Subject: regulator: bcm590xx: Store regulator descriptions in table Instead of filling in the regulator description programatically, store the data in a struct. This will make it a bit nicer to introduce support for other BCM590xx chips besides the BCM59056. To do this, add a new struct type, bcm590xx_reg_data, to store all of the necessary information. Drop the old IS_LDO, IS_GPLDO... macros in favor of the "type" field in this struct. Adapt the old bcm590xx_reg struct to the new types. Signed-off-by: Artur Weber Reviewed-by: Stanislav Jakubek Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250515-bcm59054-v9-6-14ba0ea2ea5b@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/bcm590xx.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h index a54c50b2d2c8..09d4ae054242 100644 --- a/include/linux/mfd/bcm590xx.h +++ b/include/linux/mfd/bcm590xx.h @@ -27,6 +27,12 @@ #define BCM59056_REV_DIGITAL_B0 2 #define BCM59056_REV_ANALOG_B0 2 +/* regmap types */ +enum bcm590xx_regmap_type { + BCM590XX_REGMAP_PRI, + BCM590XX_REGMAP_SEC, +}; + /* max register address */ #define BCM590XX_MAX_REGISTER_PRI 0xe7 #define BCM590XX_MAX_REGISTER_SEC 0xf0 -- cgit v1.2.3 From 1007ae0d464ceb55a3740634790521d3543aaab9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 May 2025 12:47:31 +0200 Subject: spi: use container_of_cont() for to_spi_device() Some places in the spi core pass in a const pointer to a device and the default container_of() casts that away, which is not a good idea. Preserve the proper const attribute by using container_of_const() for to_spi_device() instead, which is what it was designed for. Note, this removes the NULL check for a device pointer in the call, but no one was ever checking for that return value, and a device pointer should never be NULL overall anyway, so this should be a safe change. Cc: Mark Brown Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *") Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2025052230-fidgeting-stooge-66f5@gregkh Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 0ba5e49bace4..6e64f0193777 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -249,10 +249,7 @@ struct spi_device { static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0, "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap"); -static inline struct spi_device *to_spi_device(const struct device *dev) -{ - return dev ? container_of(dev, struct spi_device, dev) : NULL; -} +#define to_spi_device(__dev) container_of_const(__dev, struct spi_device, dev) /* Most drivers won't need to care about device refcounting */ static inline struct spi_device *spi_dev_get(struct spi_device *spi) -- cgit v1.2.3 From 1c12fbdf40e17df2efc24bf2009a0c3bfa75bfa7 Mon Sep 17 00:00:00 2001 From: Mathieu Dubois-Briand Date: Thu, 22 May 2025 14:06:20 +0200 Subject: regmap: irq: Add support for chips without separate IRQ status Some GPIO chips allow to rise an IRQ on GPIO level changes but do not provide an IRQ status for each separate line: only the current gpio level can be retrieved. Add support for these chips, emulating IRQ status by comparing GPIO levels with the levels during the previous interrupt. Signed-off-by: Mathieu Dubois-Briand Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250522-mdb-max7360-support-v9-5-74fc03517e41@bootlin.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index d17c5ea3d55d..02b83f5499b8 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1641,6 +1641,8 @@ struct regmap_irq_chip_data; * @ack_invert: Inverted ack register: cleared bits for ack. * @clear_ack: Use this to set 1 and 0 or vice-versa to clear interrupts. * @status_invert: Inverted status register: cleared bits are active interrupts. + * @status_is_level: Status register is actuall signal level: Xor status + * register with previous value to get active interrupts. * @wake_invert: Inverted wake register: cleared bits are wake enabled. * @type_in_mask: Use the mask registers for controlling irq type. Use this if * the hardware provides separate bits for rising/falling edge @@ -1704,6 +1706,7 @@ struct regmap_irq_chip { unsigned int ack_invert:1; unsigned int clear_ack:1; unsigned int status_invert:1; + unsigned int status_is_level:1; unsigned int wake_invert:1; unsigned int type_in_mask:1; unsigned int clear_on_unmask:1; -- cgit v1.2.3 From 4ff4d86f6cceb6bea583bdb230e5439655778cce Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 19 May 2025 10:45:05 +0200 Subject: net: Add support for providing the PTP hardware source in tsinfo Multi-PTP source support within a network topology has been merged, but the hardware timestamp source is not yet exposed to users. Currently, users only see the PTP index, which does not indicate whether the timestamp comes from a PHY or a MAC. Add support for reporting the hwtstamp source using a hwtstamp-source field, alongside hwtstamp-phyindex, to describe the origin of the hardware timestamp. Remove HWTSTAMP_SOURCE_UNSPEC enum value as it is not used at all. Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20250519-feature_ptp_source-v4-1-5d10e19a0265@bootlin.com Signed-off-by: Paolo Abeni --- include/linux/ethtool.h | 5 +++++ include/linux/net_tstamp.h | 7 +------ include/uapi/linux/ethtool_netlink_generated.h | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 117718c24814..5e0dd333ad1f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #define ETHTOOL_MM_MAX_VERIFY_TIME_MS 128 @@ -830,6 +831,8 @@ struct ethtool_rxfh_param { * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags * @phc_index: device index of the associated PHC, or -1 if there is none * @phc_qualifier: qualifier of the associated PHC + * @phc_source: source device of the associated PHC + * @phc_phyindex: index of PHY device source of the associated PHC * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values */ @@ -838,6 +841,8 @@ struct kernel_ethtool_ts_info { u32 so_timestamping; int phc_index; enum hwtstamp_provider_qualifier phc_qualifier; + enum hwtstamp_source phc_source; + int phc_phyindex; enum hwtstamp_tx_types tx_types; enum hwtstamp_rx_filters rx_filters; }; diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index ff0758e88ea1..f4936d9c2b3c 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -4,6 +4,7 @@ #define _LINUX_NET_TIMESTAMPING_H_ #include +#include #define SOF_TIMESTAMPING_SOFTWARE_MASK (SOF_TIMESTAMPING_RX_SOFTWARE | \ SOF_TIMESTAMPING_TX_SOFTWARE | \ @@ -13,12 +14,6 @@ SOF_TIMESTAMPING_TX_HARDWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) -enum hwtstamp_source { - HWTSTAMP_SOURCE_UNSPEC, - HWTSTAMP_SOURCE_NETDEV, - HWTSTAMP_SOURCE_PHYLIB, -}; - /** * struct hwtstamp_provider_desc - hwtstamp provider description * diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 30c8dad6214e..9a02f579de22 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -37,6 +37,18 @@ enum ethtool_tcp_data_split { ETHTOOL_TCP_DATA_SPLIT_ENABLED, }; +/** + * enum hwtstamp_source - Source of the hardware timestamp + * @HWTSTAMP_SOURCE_NETDEV: Hardware timestamp comes from a MAC or a device + * which has MAC and PHY integrated + * @HWTSTAMP_SOURCE_PHYLIB: Hardware timestamp comes from one PHY device of the + * network topology + */ +enum hwtstamp_source { + HWTSTAMP_SOURCE_NETDEV = 1, + HWTSTAMP_SOURCE_PHYLIB, +}; + enum { ETHTOOL_A_HEADER_UNSPEC, ETHTOOL_A_HEADER_DEV_INDEX, @@ -401,6 +413,8 @@ enum { ETHTOOL_A_TSINFO_PHC_INDEX, ETHTOOL_A_TSINFO_STATS, ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER, + ETHTOOL_A_TSINFO_HWTSTAMP_SOURCE, + ETHTOOL_A_TSINFO_HWTSTAMP_PHYINDEX, __ETHTOOL_A_TSINFO_CNT, ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) -- cgit v1.2.3 From bfc6270ab3ff9478a4cad4d49482854d632dbce3 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 21 May 2025 16:36:23 -0700 Subject: cxl/features: Remove the inline specifier from to_cxlfs() to_cxlfs() was declared 'inline' in the header but only defined in drivers/cxl/core/features.c. This has worked because features.c was the only file using the function and the definition happened to be available in the same compilation unit. However, in preparation for a second .c file using the header and needing to call the function, the inline specifier became an issue. Sparse flagged the declaration as invalid since 'inline' requires a visible definition at the point of use. Defining the function in the header was considered but rejected, as it depends on internal symbols not visible at that level. Remove the inline specifier to correct the linkage violation. Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Shiju Jose Link: https://patch.msgid.link/20250521233625.1745849-1-alison.schofield@intel.com Signed-off-by: Dave Jiang --- include/cxl/features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/cxl/features.h b/include/cxl/features.h index 5f7f842765a5..b9297693dae7 100644 --- a/include/cxl/features.h +++ b/include/cxl/features.h @@ -64,7 +64,7 @@ struct cxl_features_state { struct cxl_mailbox; struct cxl_memdev; #ifdef CONFIG_CXL_FEATURES -inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds); +struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds); int devm_cxl_setup_features(struct cxl_dev_state *cxlds); int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd); #else -- cgit v1.2.3 From 1d2aeee6dd629084cc524ec2d00100e70aa3c824 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 18 Mar 2025 10:13:41 +0100 Subject: mfd: aat2870: Use per-client debugfs directory The I2C core now provides a debugfs entry for each client. Let this driver use it instead of the custom directory in debugfs root. Further improvements by this change: automatic clean up on removal, support of multiple instances. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250318091426.22258-2-wsa+renesas@sang-engineering.com Signed-off-by: Lee Jones --- include/linux/mfd/aat2870.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h index 2445842d482d..c7a3c53eba68 100644 --- a/include/linux/mfd/aat2870.h +++ b/include/linux/mfd/aat2870.h @@ -133,9 +133,6 @@ struct aat2870_data { int (*read)(struct aat2870_data *aat2870, u8 addr, u8 *val); int (*write)(struct aat2870_data *aat2870, u8 addr, u8 val); int (*update)(struct aat2870_data *aat2870, u8 addr, u8 mask, u8 val); - - /* for debugfs */ - struct dentry *dentry_root; }; struct aat2870_subdev_info { -- cgit v1.2.3 From 3a91f28fab43f093c72312148288d125ae160c2d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 22 May 2025 23:20:39 +0800 Subject: io_uring: add helper io_uring_cmd_ctx_handle() Add helper io_uring_cmd_ctx_handle() for driver to track per-context resource, such as registered kernel io buffer. Suggested-by: Caleb Sander Mateos Signed-off-by: Ming Lei Reviewed-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250522152043.399824-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 0634a3de1782..53408124c1e5 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -140,6 +140,15 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur return cmd_to_io_kiocb(cmd)->async_data; } +/* + * Return uring_cmd's context reference as its context handle for driver to + * track per-context resource, such as registered kernel IO buffer + */ +static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd) +{ + return cmd_to_io_kiocb(cmd)->ctx; +} + int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, void (*release)(void *), unsigned int index, unsigned int issue_flags); -- cgit v1.2.3 From 914e0dc5082a335ea5e7d905e99e1a1cde001369 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 22 May 2025 23:20:40 +0800 Subject: ublk: run auto buf unregisgering in same io_ring_ctx with registering UBLK_F_AUTO_BUF_REG requires that the buffer registered automatically is unregistered in same `io_ring_ctx`, so check it explicitly. Document this requirement for UBLK_F_AUTO_BUF_REG. Drop WARN_ON_ONCE() which is triggered from userspace code path. Fixes: 99c1e4eb6a3f ("ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG") Reported-by: Caleb Sander Mateos Reviewed-by: Caleb Sander Mateos Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250522152043.399824-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index c4b9942697fc..1c40632cb164 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -226,7 +226,11 @@ * * For using this feature: * - * - ublk server has to create sparse buffer table + * - ublk server has to create sparse buffer table on the same `io_ring_ctx` + * for issuing `UBLK_IO_FETCH_REQ` and `UBLK_IO_COMMIT_AND_FETCH_REQ`. + * If uring_cmd isn't issued on same `io_ring_ctx`, it is ublk server's + * responsibility to unregister the buffer by issuing `IO_UNREGISTER_IO_BUF` + * manually, otherwise this ublk request won't complete. * * - ublk server passes auto buf register data via uring_cmd's sqe->addr, * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see -- cgit v1.2.3 From 28be240c763a44932bfe573f09e145d182e52609 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Thu, 22 May 2025 09:04:50 -0600 Subject: trace/io_uring: fix io_uring_local_work_run ctx documentation The comment for the tracepoint io_uring_local_work_run refers to a field "tctx" and a type "io_uring_ctx", neither of which exist. "tctx" looks to mean "ctx" and "io_uring_ctx" should be "io_ring_ctx". Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250522150451.2385652-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index fb81c533b310..178ab6f611be 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -645,7 +645,7 @@ TRACE_EVENT(io_uring_short_write, /* * io_uring_local_work_run - ran ring local task work * - * @tctx: pointer to a io_uring_ctx + * @ctx: pointer to an io_ring_ctx * @count: how many functions it ran * @loops: how many loops it ran * -- cgit v1.2.3 From 62f134ab190c5fd5c9f68fe638ad8e13bb8a4cb4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 May 2025 12:08:05 +0200 Subject: ALSA: core: fix up bus match const issues. In commit d69d80484598 ("driver core: have match() callback in struct bus_type take a const *"), the match bus callback was changed to have the driver be a const pointer. Unfortunately that const attribute was thrown away when container_of() is called, which is not correct and was not caught by the compiler due to how container_of() is implemented. Fix this up by correctly preserving the const attribute of the driver passed to the bus match function which requires the hdac_driver match function to also take a const pointer for the driver structure. Cc: Jaroslav Kysela Cc: Takashi Iwai Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *") Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2025052204-hyphen-thermal-3e72@gregkh Signed-off-by: Takashi Iwai --- include/sound/hdaudio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index b098ceadbe74..9a70048adbc0 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -223,7 +223,7 @@ struct hdac_driver { struct device_driver driver; int type; const struct hda_device_id *id_table; - int (*match)(struct hdac_device *dev, struct hdac_driver *drv); + int (*match)(struct hdac_device *dev, const struct hdac_driver *drv); void (*unsol_event)(struct hdac_device *dev, unsigned int event); /* fields used by ext bus APIs */ @@ -235,7 +235,7 @@ struct hdac_driver { #define drv_to_hdac_driver(_drv) container_of(_drv, struct hdac_driver, driver) const struct hda_device_id * -hdac_get_device_id(struct hdac_device *hdev, struct hdac_driver *drv); +hdac_get_device_id(struct hdac_device *hdev, const struct hdac_driver *drv); /* * Bus verb operators -- cgit v1.2.3 From 9e8c67a9e526f497d606d721cf6b92dd68d90806 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 May 2025 00:44:44 -0400 Subject: sched_ext: Introduce cgroup_lifetime_notifier Other subsystems may make use of the cgroup hierarchy with the cgroup_bpf support being one such example. For such a feature, it's useful to be able to hook into cgroup creation and destruction paths to perform feature-specific initializations and cleanups. Add cgroup_lifetime_notifier which generates CGROUP_LIFETIME_ONLINE and CGROUP_LIFETIME_OFFLINE events whenever cgroups are created and destroyed, respectively. The next patch will convert cgroup_bpf to use the new notifier and other uses are planned. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 989c08b09691..7865b681731c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -40,7 +41,7 @@ struct kernel_clone_args; #ifdef CONFIG_CGROUPS -enum { +enum css_task_iter_flags { CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */ @@ -66,10 +67,16 @@ struct css_task_iter { struct list_head iters_node; /* css_set->task_iters */ }; +enum cgroup_lifetime_events { + CGROUP_LIFETIME_ONLINE, + CGROUP_LIFETIME_OFFLINE, +}; + extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; extern spinlock_t css_set_lock; +extern struct blocking_notifier_head cgroup_lifetime_notifier; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include -- cgit v1.2.3 From 82648b8b2ae0a0ff371e2a98133844658cfaae9a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 May 2025 00:46:12 -0400 Subject: sched_ext: Convert cgroup BPF support to use cgroup_lifetime_notifier Replace explicit cgroup_bpf_inherit/offline() calls from cgroup creation/destruction paths with notification callback registered on cgroup_lifetime_notifier. Signed-off-by: Tejun Heo --- include/linux/bpf-cgroup.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 9de7adb68294..60d1511b4f4d 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -114,8 +114,7 @@ struct bpf_prog_list { u32 flags; }; -int cgroup_bpf_inherit(struct cgroup *cgrp); -void cgroup_bpf_offline(struct cgroup *cgrp); +void __init cgroup_bpf_lifetime_notifier_init(void); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, @@ -431,8 +430,10 @@ const struct bpf_func_proto * cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); #else -static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } -static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} +static inline void cgroup_bpf_lifetime_notifier_init(void) +{ + return; +} static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, -- cgit v1.2.3 From 3f12680913fda8de06c21e836dd5f246fe1684e5 Mon Sep 17 00:00:00 2001 From: Yuquan Wang Date: Thu, 8 May 2025 10:27:19 +0800 Subject: mm: numa_memblks: introduce numa_add_reserved_memblk acpi_parse_cfmws() currently adds empty CFMWS ranges to numa_meminfo with the expectation that numa_cleanup_meminfo moves them to numa_reserved_meminfo. There is no need for that indirection when it is known in advance that these unpopulated ranges are meant for numa_reserved_meminfo in support of future hotplug / CXL provisioning. Introduce and use numa_add_reserved_memblk() to add the empty CFMWS ranges directly. Link: https://lkml.kernel.org/r/20250508022719.3941335-1-wangyuquan1236@phytium.com.cn Signed-off-by: Yuquan Wang Reviewed-by: Alison Schofield Cc: Bruno Faccini Cc: Chen Baozi Cc: Dan Williams Cc: David Hildenbrand Cc: Haibo Xu Cc: Huacai Chen Cc: Joanthan Cameron Cc: Len Brown Cc: Mike Rapoport Cc: Robert Richter Signed-off-by: Andrew Morton --- include/linux/numa_memblks.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h index dd85613cdd86..991076cba7c5 100644 --- a/include/linux/numa_memblks.h +++ b/include/linux/numa_memblks.h @@ -22,6 +22,7 @@ struct numa_meminfo { }; int __init numa_add_memblk(int nodeid, u64 start, u64 end); +int __init numa_add_reserved_memblk(int nid, u64 start, u64 end); void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi); int __init numa_cleanup_meminfo(struct numa_meminfo *mi); -- cgit v1.2.3 From e1e1a3ae7f9f0cb06e80af0f24927be63149d081 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 May 2025 14:34:15 +0200 Subject: mm: convert track_pfn_insert() to pfnmap_setup_cachemode*() ... by factoring it out from track_pfn_remap() into pfnmap_setup_cachemode() and provide pfnmap_setup_cachemode_pfn() as a replacement for track_pfn_insert(). For PMDs/PUDs, we keep checking a single pfn only. Add some documentation, and also document why it is valid to not check the whole pfn range. We'll reuse pfnmap_setup_cachemode() from core MM next. Link: https://lkml.kernel.org/r/20250512123424.637989-3-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Ingo Molnar [x86 bits] Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: Andy Lutomirski Cc: Borislav Betkov Cc: Dave Airlie Cc: "H. Peter Anvin" Cc: Jani Nikula Cc: Jann Horn Cc: Jonas Lahtinen Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Peter Xu Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Tvrtko Ursulin Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 52 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index f1e890b60460..be1745839871 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1496,13 +1496,10 @@ static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, return 0; } -/* - * track_pfn_insert is called when a _new_ single pfn is established - * by vmf_insert_pfn(). - */ -static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, - pfn_t pfn) +static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, + pgprot_t *prot) { + return 0; } /* @@ -1552,8 +1549,32 @@ static inline void untrack_pfn_clear(struct vm_area_struct *vma) extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long addr, unsigned long size); -extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, - pfn_t pfn); + +/** + * pfnmap_setup_cachemode - setup the cachemode in the pgprot for a pfn range + * @pfn: the start of the pfn range + * @size: the size of the pfn range in bytes + * @prot: the pgprot to modify + * + * Lookup the cachemode for the pfn range starting at @pfn with the size + * @size and store it in @prot, leaving other data in @prot unchanged. + * + * This allows for a hardware implementation to have fine-grained control of + * memory cache behavior at page level granularity. Without a hardware + * implementation, this function does nothing. + * + * Currently there is only one implementation for this - x86 Page Attribute + * Table (PAT). See Documentation/arch/x86/pat.rst for more details. + * + * This function can fail if the pfn range spans pfns that require differing + * cachemodes. If the pfn range was previously verified to have a single + * cachemode, it is sufficient to query only a single pfn. The assumption is + * that this is the case for drivers using the vmf_insert_pfn*() interface. + * + * Returns 0 on success and -EINVAL on error. + */ +int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, + pgprot_t *prot); extern int track_pfn_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, unsigned long *pfn); extern void untrack_pfn_copy(struct vm_area_struct *dst_vma, @@ -1563,6 +1584,21 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, extern void untrack_pfn_clear(struct vm_area_struct *vma); #endif +/** + * pfnmap_setup_cachemode_pfn - setup the cachemode in the pgprot for a pfn + * @pfn: the pfn + * @prot: the pgprot to modify + * + * Lookup the cachemode for @pfn and store it in @prot, leaving other + * data in @prot unchanged. + * + * See pfnmap_setup_cachemode() for details. + */ +static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot) +{ + pfnmap_setup_cachemode(pfn, PAGE_SIZE, prot); +} + #ifdef CONFIG_MMU #ifdef __HAVE_COLOR_ZERO_PAGE static inline int is_zero_pfn(unsigned long pfn) -- cgit v1.2.3 From db44863a4d9df3604c4ff76507bb2056b6392e58 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 May 2025 14:34:16 +0200 Subject: mm: introduce pfnmap_track() and pfnmap_untrack() and use them for memremap Let's provide variants of track_pfn_remap() and untrack_pfn() that won't mess with VMAs, and replace the usage in mm/memremap.c. Add some documentation. Link: https://lkml.kernel.org/r/20250512123424.637989-4-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Acked-by: Ingo Molnar [x86 bits] Reviewed-by: Liam R. Howlett Cc: Andy Lutomirski Cc: Borislav Betkov Cc: Dave Airlie Cc: "H. Peter Anvin" Cc: Jani Nikula Cc: Jann Horn Cc: Jonas Lahtinen Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Peter Xu Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Tvrtko Ursulin Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index be1745839871..90f72cd35839 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1502,6 +1502,16 @@ static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, return 0; } +static inline int pfnmap_track(unsigned long pfn, unsigned long size, + pgprot_t *prot) +{ + return 0; +} + +static inline void pfnmap_untrack(unsigned long pfn, unsigned long size) +{ +} + /* * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page * tables copied during copy_page_range(). Will store the pfn to be @@ -1575,6 +1585,35 @@ extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, */ int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, pgprot_t *prot); + +/** + * pfnmap_track - track a pfn range + * @pfn: the start of the pfn range + * @size: the size of the pfn range in bytes + * @prot: the pgprot to track + * + * Requested the pfn range to be 'tracked' by a hardware implementation and + * setup the cachemode in @prot similar to pfnmap_setup_cachemode(). + * + * This allows for fine-grained control of memory cache behaviour at page + * level granularity. Tracking memory this way is persisted across VMA splits + * (VMA merging does not apply for VM_PFNMAP). + * + * Currently, there is only one implementation for this - x86 Page Attribute + * Table (PAT). See Documentation/arch/x86/pat.rst for more details. + * + * Returns 0 on success and -EINVAL on error. + */ +int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot); + +/** + * pfnmap_untrack - untrack a pfn range + * @pfn: the start of the pfn range + * @size: the size of the pfn range in bytes + * + * Untrack a pfn range previously tracked through pfnmap_track(). + */ +void pfnmap_untrack(unsigned long pfn, unsigned long size); extern int track_pfn_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, unsigned long *pfn); extern void untrack_pfn_copy(struct vm_area_struct *dst_vma, -- cgit v1.2.3 From f8e97613fed25758ddf52159b87e1c66e619a23a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 May 2025 14:34:17 +0200 Subject: mm: convert VM_PFNMAP tracking to pfnmap_track() + pfnmap_untrack() Let's use our new interface. In remap_pfn_range(), we'll now decide whether we have to track (full VMA covered) or only lookup the cachemode (partial VMA covered). Remember what we have to untrack by linking it from the VMA. When duplicating VMAs (e.g., splitting, mremap, fork), we'll handle it similar to anon VMA names, and use a kref to share the tracking. Once the last VMA un-refs our tracking data, we'll do the untracking, which simplifies things a lot and should sort our various issues we saw recently, for example, when partially unmapping/zapping a tracked VMA. This change implies that we'll keep tracking the original PFN range even after splitting + partially unmapping it: not too bad, because it was not working reliably before. The only thing that kind-of worked before was shrinking such a mapping using mremap(): we managed to adjust the reservation in a hacky way, now we won't adjust the reservation but leave it around until all involved VMAs are gone. If that ever turns out to be an issue, we could hook into VM splitting code and split the tracking; however, that adds complexity that might not be required, so we'll keep it simple for now. Link: https://lkml.kernel.org/r/20250512123424.637989-5-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Ingo Molnar [x86 bits] Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: Andy Lutomirski Cc: Borislav Betkov Cc: Dave Airlie Cc: "H. Peter Anvin" Cc: Jani Nikula Cc: Jann Horn Cc: Jonas Lahtinen Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Peter Xu Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Tvrtko Ursulin Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 2 ++ include/linux/mm_types.h | 11 +++++++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index f9157a0c42a5..89b518ff097e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -447,6 +447,8 @@ static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, #endif /* CONFIG_ANON_VMA_NAME */ +void pfnmap_track_ctx_release(struct kref *ref); + static inline void init_tlb_flush_pending(struct mm_struct *mm) { atomic_set(&mm->tlb_flush_pending, 0); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 15808cad2bc1..3e934dc6057c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -763,6 +763,14 @@ struct vma_numab_state { int prev_scan_seq; }; +#ifdef __HAVE_PFNMAP_TRACKING +struct pfnmap_track_ctx { + struct kref kref; + unsigned long pfn; + unsigned long size; /* in bytes */ +}; +#endif + /* * Describes a VMA that is about to be mmap()'ed. Drivers may choose to * manipulate mutable fields which will cause those fields to be updated in the @@ -900,6 +908,9 @@ struct vm_area_struct { struct anon_vma_name *anon_name; #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; +#ifdef __HAVE_PFNMAP_TRACKING + struct pfnmap_track_ctx *pfnmap_track_ctx; +#endif } __randomize_layout; #ifdef CONFIG_NUMA -- cgit v1.2.3 From 7bd7d74ec01954fde9eb65b065eb55bcda4f86e2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 May 2025 14:34:18 +0200 Subject: x86/mm/pat: remove old pfnmap tracking interface We can now get rid of the old interface along with get_pat_info() and follow_phys(). Link: https://lkml.kernel.org/r/20250512123424.637989-6-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Acked-by: Ingo Molnar [x86 bits] Reviewed-by: Liam R. Howlett Cc: Andy Lutomirski Cc: Borislav Betkov Cc: Dave Airlie Cc: "H. Peter Anvin" Cc: Jani Nikula Cc: Jann Horn Cc: Jonas Lahtinen Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Peter Xu Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Tvrtko Ursulin Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 66 ------------------------------------------------- 1 file changed, 66 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 90f72cd35839..0b6e1f781d86 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1485,17 +1485,6 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) * vmf_insert_pfn. */ -/* - * track_pfn_remap is called when a _new_ pfn mapping is being established - * by remap_pfn_range() for physical range indicated by pfn and size. - */ -static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long addr, - unsigned long size) -{ - return 0; -} - static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, pgprot_t *prot) { @@ -1511,55 +1500,7 @@ static inline int pfnmap_track(unsigned long pfn, unsigned long size, static inline void pfnmap_untrack(unsigned long pfn, unsigned long size) { } - -/* - * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page - * tables copied during copy_page_range(). Will store the pfn to be - * passed to untrack_pfn_copy() only if there is something to be untracked. - * Callers should initialize the pfn to 0. - */ -static inline int track_pfn_copy(struct vm_area_struct *dst_vma, - struct vm_area_struct *src_vma, unsigned long *pfn) -{ - return 0; -} - -/* - * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during - * copy_page_range(), but after track_pfn_copy() was already called. Can - * be called even if track_pfn_copy() did not actually track anything: - * handled internally. - */ -static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma, - unsigned long pfn) -{ -} - -/* - * untrack_pfn is called while unmapping a pfnmap for a region. - * untrack can be called for a specific region indicated by pfn and size or - * can be for the entire vma (in which case pfn, size are zero). - */ -static inline void untrack_pfn(struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, - bool mm_wr_locked) -{ -} - -/* - * untrack_pfn_clear is called in the following cases on a VM_PFNMAP VMA: - * - * 1) During mremap() on the src VMA after the page tables were moved. - * 2) During fork() on the dst VMA, immediately after duplicating the src VMA. - */ -static inline void untrack_pfn_clear(struct vm_area_struct *vma) -{ -} #else -extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long addr, - unsigned long size); - /** * pfnmap_setup_cachemode - setup the cachemode in the pgprot for a pfn range * @pfn: the start of the pfn range @@ -1614,13 +1555,6 @@ int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot); * Untrack a pfn range previously tracked through pfnmap_track(). */ void pfnmap_untrack(unsigned long pfn, unsigned long size); -extern int track_pfn_copy(struct vm_area_struct *dst_vma, - struct vm_area_struct *src_vma, unsigned long *pfn); -extern void untrack_pfn_copy(struct vm_area_struct *dst_vma, - unsigned long pfn); -extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size, bool mm_wr_locked); -extern void untrack_pfn_clear(struct vm_area_struct *vma); #endif /** -- cgit v1.2.3 From cba4dbeb7bfcf8b69d491288c3bf877ead883214 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 May 2025 14:34:19 +0200 Subject: mm: remove VM_PAT It's unused, so let's remove it. Link: https://lkml.kernel.org/r/20250512123424.637989-7-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Acked-by: Ingo Molnar [x86 bits] Reviewed-by: Liam R. Howlett Cc: Andy Lutomirski Cc: Borislav Betkov Cc: Dave Airlie Cc: "H. Peter Anvin" Cc: Jani Nikula Cc: Jann Horn Cc: Jonas Lahtinen Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Peter Xu Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Steven Rostedt Cc: Thomas Gleinxer Cc: Tvrtko Ursulin Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 4 +--- include/trace/events/mmflags.h | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 43748c8f3454..a916ea42cfd5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -357,9 +357,7 @@ extern unsigned int kobjsize(const void *objp); # define VM_SHADOW_STACK VM_NONE #endif -#if defined(CONFIG_X86) -# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ -#elif defined(CONFIG_PPC64) +#if defined(CONFIG_PPC64) # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 15aae955a10b..aa441f593e9a 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -172,9 +172,7 @@ IF_HAVE_PG_ARCH_3(arch_3) __def_pageflag_names \ ) : "none" -#if defined(CONFIG_X86) -#define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } -#elif defined(CONFIG_PPC64) +#if defined(CONFIG_PPC64) #define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" } #elif defined(CONFIG_PARISC) #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" } -- cgit v1.2.3 From 5053383829ab2b17dc4766a832004c848f4af9df Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 12 May 2025 10:57:11 +0800 Subject: mm: khugepaged: convert set_huge_pmd() to take a folio We've already gotten the stable locked folio in collapse_pte_mapped_thp(), so just use folio for set_huge_pmd() to set the PMD entry, which is more straightforward. Moreover, we will check the folio size in do_set_pmd(), so we can remove the unnecessary VM_BUG_ON() in set_huge_pmd(). While we are at it, we can also remove the PageTransHuge(), as it currently has no callers. Link: https://lkml.kernel.org/r/110c3e1ec5fe7854a0e2c95ffcbc985817180ed7.1747017104.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Cc: Dev Jain Cc: Johannes Weiner Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 37b11f15dbd9..1c1d49554c71 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -907,20 +907,6 @@ FOLIO_FLAG_FALSE(partially_mapped) #define PG_head_mask ((1UL << PG_head)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE -/* - * PageHuge() only returns true for hugetlbfs pages, but not for - * normal or transparent huge pages. - * - * PageTransHuge() returns true for both transparent huge and - * hugetlbfs pages, but not normal pages. PageTransHuge() can only be - * called only in the core VM paths where hugetlbfs pages can't exist. - */ -static inline int PageTransHuge(const struct page *page) -{ - VM_BUG_ON_PAGE(PageTail(page), page); - return PageHead(page); -} - /* * PageTransCompound returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known @@ -931,7 +917,6 @@ static inline int PageTransCompound(const struct page *page) return PageCompound(page); } #else -TESTPAGEFLAG_FALSE(TransHuge, transhuge) TESTPAGEFLAG_FALSE(TransCompound, transcompound) #endif -- cgit v1.2.3 From 698c0089cdf0b27d37f0b24824b682c23de5f72d Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 12 May 2025 10:57:12 +0800 Subject: mm: convert do_set_pmd() to take a folio In do_set_pmd(), we always use the folio->page to build PMD mappings for the entire folio. Since all callers of do_set_pmd() already hold a stable folio, converting do_set_pmd() to take a folio is safe and more straightforward. In addition, to ensure the extensibility of do_set_pmd() for supporting larger folios beyond PMD size, we keep the 'page' parameter to specify which page within the folio should be mapped. No functional changes expected. Link: https://lkml.kernel.org/r/9b488f4ecb4d3fd8634e3d448dd0ed6964482480.1747017104.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reviewed-by: Zi Yan Acked-by: David Hildenbrand Cc: Dev Jain Cc: Johannes Weiner Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a916ea42cfd5..cd2e513189d6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1235,7 +1235,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) return pte; } -vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page); +vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *page); void set_pte_range(struct vm_fault *vmf, struct folio *folio, struct page *page, unsigned int nr, unsigned long addr); -- cgit v1.2.3 From 6669d1aaa0c45a50a4cc5f1756ab03578eaebd18 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 14 May 2025 09:40:24 +0100 Subject: mm: remove WARN_ON_ONCE() in file_has_valid_mmap_hooks() Having encountered a trinity report in linux-next (Linked in the 'Closes' tag) it appears that there are legitimate situations where a file-backed mapping can be acquired but no file->f_op->mmap or file->f_op->mmap_prepare is set, at which point do_mmap() should simply error out with -ENODEV. Since previously we did not warn in this scenario and it appears we rely upon this, restore this situation, while retaining a WARN_ON_ONCE() for the case where both are set, which is absolutely incorrect and must be addressed and thus always requires a warning. If further work is required to chase down precisely what is causing this, then we can later restore this, but it makes no sense to hold up this series to do so, as this is existing and apparently expected behaviour. Link: https://lkml.kernel.org/r/20250514084024.29148-1-lorenzo.stoakes@oracle.com Fixes: c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback") Signed-off-by: Lorenzo Stoakes Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202505141434.96ce5e5d-lkp@intel.com Reviewed-by: Vlastimil Babka Reviewed-by: Pedro Falcato Acked-by: David Hildenbrand Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Cc: Liam Howlett Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e2721a1ff13d..09c8495dacdb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2248,7 +2248,7 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) /* Hooks are mutually exclusive. */ if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) return false; - if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare)) + if (!has_mmap && !has_mmap_prepare) return false; return true; -- cgit v1.2.3 From 2aad4edf6e1018b28b7000faec56b7b6e585c8e1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 16 May 2025 17:34:46 -0700 Subject: mm: rename try_alloc_pages() to alloc_pages_nolock() The "try_" prefix is confusing, since it made people believe that try_alloc_pages() is analogous to spin_trylock() and NULL return means EAGAIN. This is not the case. If it returns NULL there is no reason to call it again. It will most likely return NULL again. Hence rename it to alloc_pages_nolock() to make it symmetrical to free_pages_nolock() and document that NULL means ENOMEM. Link: https://lkml.kernel.org/r/20250517003446.60260-1-alexei.starovoitov@gmail.com Signed-off-by: Alexei Starovoitov Acked-by: Vlastimil Babka Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Acked-by: Harry Yoo Cc: Andrii Nakryiko Cc: Kumar Kartikeya Dwivedi Cc: Michal Hocko Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/gfp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index c9fa6309c903..be160e8d8bcb 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -45,13 +45,13 @@ static inline bool gfpflags_allow_spinning(const gfp_t gfp_flags) * !__GFP_DIRECT_RECLAIM -> direct claim is not allowed. * !__GFP_KSWAPD_RECLAIM -> it's not safe to wake up kswapd. * All GFP_* flags including GFP_NOWAIT use one or both flags. - * try_alloc_pages() is the only API that doesn't specify either flag. + * alloc_pages_nolock() is the only API that doesn't specify either flag. * * This is stronger than GFP_NOWAIT or GFP_ATOMIC because * those are guaranteed to never block on a sleeping lock. * Here we are enforcing that the allocation doesn't ever spin * on any locks (i.e. only trylocks). There is no high level - * GFP_$FOO flag for this use in try_alloc_pages() as the + * GFP_$FOO flag for this use in alloc_pages_nolock() as the * regular page allocator doesn't fully support this * allocation mode. */ @@ -354,8 +354,8 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp, } #define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__)) -struct page *try_alloc_pages_noprof(int nid, unsigned int order); -#define try_alloc_pages(...) alloc_hooks(try_alloc_pages_noprof(__VA_ARGS__)) +struct page *alloc_pages_nolock_noprof(int nid, unsigned int order); +#define alloc_pages_nolock(...) alloc_hooks(alloc_pages_nolock_noprof(__VA_ARGS__)) extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order); #define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__)) -- cgit v1.2.3 From cc79061b8fc119807111b615aa791562374b15b2 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 13 May 2025 14:56:35 +0800 Subject: mm: khugepaged: decouple SHMEM and file folios' collapse Originally, the file pages collapse was intended for tmpfs/shmem to merge into THP in the background. However, now not only tmpfs/shmem can support large folios, but some other file systems (such as XFS, erofs ...) also support large folios. Therefore, it is time to decouple the support of file folios collapse from SHMEM. Link: https://lkml.kernel.org/r/ce5c2314e0368cf34bda26f9bacf01c982d4da17.1747119309.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Acked-by: Zi Yan Cc: Dev Jain Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mariano Pache Cc: Michal Hocko Cc: Mike Rapoport Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/khugepaged.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 1f46046080f5..b8d69cfbb58b 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -15,16 +15,8 @@ extern void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags); extern void khugepaged_min_free_kbytes_update(void); extern bool current_is_khugepaged(void); -#ifdef CONFIG_SHMEM extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool install_pmd); -#else -static inline int collapse_pte_mapped_thp(struct mm_struct *mm, - unsigned long addr, bool install_pmd) -{ - return 0; -} -#endif static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) { -- cgit v1.2.3 From 8814e3b8692b31e0150a894dd70c14ca0b7b746a Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 14 May 2025 11:41:54 -0700 Subject: memcg: make mod_memcg_state re-entrant safe against irqs Let's make mod_memcg_state re-entrant safe against irqs. The only thing needed is to convert the usage of __this_cpu_add() to this_cpu_add(). In addition, with re-entrant safety, there is no need to disable irqs. mod_memcg_state() is not safe against nmi, so let's add warning if someone tries to call it in nmi context. Link: https://lkml.kernel.org/r/20250514184158.3471331-4-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka Cc: Alexei Starovoitov Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Sebastian Andrzej Siewior Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9ed75f82b858..92861ff3c43f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -903,19 +903,9 @@ struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim, struct mem_cgroup *oom_domain); void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); -void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, - int val); - /* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_memcg_state(memcg, idx, val); - local_irq_restore(flags); -} +void mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, int val); static inline void mod_memcg_page_state(struct page *page, enum memcg_stat_item idx, int val) @@ -1375,12 +1365,6 @@ static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) { } -static inline void __mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, - int nr) -{ -} - static inline void mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, int nr) -- cgit v1.2.3 From e52401e7247bb36cabba389ae32fb75a12a6e94b Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 14 May 2025 11:41:55 -0700 Subject: memcg: make count_memcg_events re-entrant safe against irqs Let's make count_memcg_events re-entrant safe against irqs. The only thing needed is to convert the usage of __this_cpu_add() to this_cpu_add(). In addition, with re-entrant safety, there is no need to disable irqs. Also add warnings for in_nmi() as it is not safe against nmi context. Link: https://lkml.kernel.org/r/20250514184158.3471331-5-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka Cc: Alexei Starovoitov Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Sebastian Andrzej Siewior Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 92861ff3c43f..f7848f73f41c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -942,19 +942,8 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, local_irq_restore(flags); } -void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, - unsigned long count); - -static inline void count_memcg_events(struct mem_cgroup *memcg, - enum vm_event_item idx, - unsigned long count) -{ - unsigned long flags; - - local_irq_save(flags); - __count_memcg_events(memcg, idx, count); - local_irq_restore(flags); -} +void count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, + unsigned long count); static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) @@ -1418,12 +1407,6 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, } static inline void count_memcg_events(struct mem_cgroup *memcg, - enum vm_event_item idx, - unsigned long count) -{ -} - -static inline void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count) { -- cgit v1.2.3 From 027a362fb36b479030beecbaaec30711ddabf8fa Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Tue, 20 May 2025 12:57:49 -0700 Subject: drm/xe/ptl: Update the PTL pci id table Update to current bspec table. Bspec: 72574 Signed-off-by: Matt Atwood Reviewed-by: Tejas Upadhyay Reviewed-by: Clint Taylor Link: https://lore.kernel.org/r/20250520195749.371748-1-matthew.s.atwood@intel.com Signed-off-by: Matt Roper (cherry picked from commit 49c6dc74b5968885f421f9f1b45eb4890b955870) Signed-off-by: Lucas De Marchi --- include/drm/intel/pciids.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index d212848d07f3..a7ce9523c50d 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -861,6 +861,10 @@ MACRO__(0xB081, ## __VA_ARGS__), \ MACRO__(0xB082, ## __VA_ARGS__), \ MACRO__(0xB083, ## __VA_ARGS__), \ + MACRO__(0xB084, ## __VA_ARGS__), \ + MACRO__(0xB085, ## __VA_ARGS__), \ + MACRO__(0xB086, ## __VA_ARGS__), \ + MACRO__(0xB087, ## __VA_ARGS__), \ MACRO__(0xB08F, ## __VA_ARGS__), \ MACRO__(0xB090, ## __VA_ARGS__), \ MACRO__(0xB0A0, ## __VA_ARGS__), \ -- cgit v1.2.3 From ba1a455393c430c97235cb593d40a3ea7ad8acca Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Sun, 16 Mar 2025 19:18:52 +0100 Subject: mfd: bcm590xx: Drop unused "id" member of bcm590xx struct The "id" member of the bcm590xx struct is unused and will be confusing once we add an actual PMU ID storage value. Drop it; a replacement will be introduced in a future commit. Signed-off-by: Artur Weber Reviewed-by: "Rob Herring (Arm)" Link: https://lore.kernel.org/r/20250316-bcm59054-v7-4-4281126be1b8@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/bcm590xx.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h index 09d4ae054242..5a5783abd47b 100644 --- a/include/linux/mfd/bcm590xx.h +++ b/include/linux/mfd/bcm590xx.h @@ -43,7 +43,6 @@ struct bcm590xx { struct i2c_client *i2c_sec; struct regmap *regmap_pri; struct regmap *regmap_sec; - unsigned int id; /* PMU ID value; also used as device type */ u8 pmu_id; -- cgit v1.2.3 From b3379422b460ea8c0556df300d547d63e5569cda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 9 Apr 2025 21:37:25 +0100 Subject: mfd: sec-core: Drop non-existing forward declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sec_irq_resume() was removed in commit 6445b84abf91 ("mfd: Add s2mps11 irq driver") and sec_irq_exit() in commit 3dc6f4aaafbe ("mfd: sec: Use devm_mfd_add_devices and devm_regmap_add_irq_chip") while the prototypes were left. They should be removed. Do so. Reviewed-by: Krzysztof Kozlowski Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250409-s2mpg10-v4-4-d66d5f39b6bf@linaro.org Signed-off-by: Lee Jones --- include/linux/mfd/samsung/core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index f35314458fd2..b7008b50392a 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -72,8 +72,6 @@ struct sec_pmic_dev { }; int sec_irq_init(struct sec_pmic_dev *sec_pmic); -void sec_irq_exit(struct sec_pmic_dev *sec_pmic); -int sec_irq_resume(struct sec_pmic_dev *sec_pmic); struct sec_platform_data { struct sec_regulator_data *regulators; -- cgit v1.2.3 From 8b88b5e4d58151d8a37250afc978f253cd8cc4fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 9 Apr 2025 21:37:28 +0100 Subject: mfd: sec: Move private internal API to internal header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sec_irq_init() is an internal API for the core driver, and doesn't belong into the public header. Due to an upcoming split of the driver into a core and i2c driver, we'll also be adding more internal APIs, which again shouldn't be in the public header. Move it into a new internal include. Reviewed-by: Krzysztof Kozlowski Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250409-s2mpg10-v4-7-d66d5f39b6bf@linaro.org Signed-off-by: Lee Jones --- include/linux/mfd/samsung/core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index b7008b50392a..8a4e660854bb 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -71,8 +71,6 @@ struct sec_pmic_dev { struct regmap_irq_chip_data *irq_data; }; -int sec_irq_init(struct sec_pmic_dev *sec_pmic); - struct sec_platform_data { struct sec_regulator_data *regulators; struct sec_opmode_data *opmode; -- cgit v1.2.3 From 5338709089b75f57a9b3b7b17a0424ecab5f2fd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 9 Apr 2025 21:37:30 +0100 Subject: mfd: sec: Add support for S2MPG10 PMIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for Samsung's S2MPG10 PMIC, which is a Power Management IC for mobile applications with buck converters, various LDOs, power meters, RTC, clock outputs, and additional GPIOs interfaces. Contrary to existing Samsung S2M series PMICs supported, communication is not via I2C, but via the Samsung ACPM firmware. This commit adds the core driver. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250409-s2mpg10-v4-9-d66d5f39b6bf@linaro.org Signed-off-by: Lee Jones --- include/linux/mfd/samsung/core.h | 1 + include/linux/mfd/samsung/irq.h | 103 ++++++++ include/linux/mfd/samsung/rtc.h | 37 +++ include/linux/mfd/samsung/s2mpg10.h | 454 ++++++++++++++++++++++++++++++++++++ 4 files changed, 595 insertions(+) create mode 100644 include/linux/mfd/samsung/s2mpg10.h (limited to 'include') diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index 8a4e660854bb..c1102324172a 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -39,6 +39,7 @@ enum sec_device_type { S5M8767X, S2DOS05, S2MPA01, + S2MPG10, S2MPS11X, S2MPS13X, S2MPS14X, diff --git a/include/linux/mfd/samsung/irq.h b/include/linux/mfd/samsung/irq.h index 978f7af66f74..b4805cbd949b 100644 --- a/include/linux/mfd/samsung/irq.h +++ b/include/linux/mfd/samsung/irq.h @@ -57,6 +57,109 @@ enum s2mpa01_irq { #define S2MPA01_IRQ_B24_TSD_MASK (1 << 4) #define S2MPA01_IRQ_B35_TSD_MASK (1 << 5) +enum s2mpg10_irq { + /* PMIC */ + S2MPG10_IRQ_PWRONF, + S2MPG10_IRQ_PWRONR, + S2MPG10_IRQ_JIGONBF, + S2MPG10_IRQ_JIGONBR, + S2MPG10_IRQ_ACOKBF, + S2MPG10_IRQ_ACOKBR, + S2MPG10_IRQ_PWRON1S, + S2MPG10_IRQ_MRB, +#define S2MPG10_IRQ_PWRONF_MASK BIT(0) +#define S2MPG10_IRQ_PWRONR_MASK BIT(1) +#define S2MPG10_IRQ_JIGONBF_MASK BIT(2) +#define S2MPG10_IRQ_JIGONBR_MASK BIT(3) +#define S2MPG10_IRQ_ACOKBF_MASK BIT(4) +#define S2MPG10_IRQ_ACOKBR_MASK BIT(5) +#define S2MPG10_IRQ_PWRON1S_MASK BIT(6) +#define S2MPG10_IRQ_MRB_MASK BIT(7) + + S2MPG10_IRQ_RTC60S, + S2MPG10_IRQ_RTCA1, + S2MPG10_IRQ_RTCA0, + S2MPG10_IRQ_RTC1S, + S2MPG10_IRQ_WTSR_COLDRST, + S2MPG10_IRQ_WTSR, + S2MPG10_IRQ_WRST, + S2MPG10_IRQ_SMPL, +#define S2MPG10_IRQ_RTC60S_MASK BIT(0) +#define S2MPG10_IRQ_RTCA1_MASK BIT(1) +#define S2MPG10_IRQ_RTCA0_MASK BIT(2) +#define S2MPG10_IRQ_RTC1S_MASK BIT(3) +#define S2MPG10_IRQ_WTSR_COLDRST_MASK BIT(4) +#define S2MPG10_IRQ_WTSR_MASK BIT(5) +#define S2MPG10_IRQ_WRST_MASK BIT(6) +#define S2MPG10_IRQ_SMPL_MASK BIT(7) + + S2MPG10_IRQ_120C, + S2MPG10_IRQ_140C, + S2MPG10_IRQ_TSD, + S2MPG10_IRQ_PIF_TIMEOUT1, + S2MPG10_IRQ_PIF_TIMEOUT2, + S2MPG10_IRQ_SPD_PARITY_ERR, + S2MPG10_IRQ_SPD_ABNORMAL_STOP, + S2MPG10_IRQ_PMETER_OVERF, +#define S2MPG10_IRQ_INT120C_MASK BIT(0) +#define S2MPG10_IRQ_INT140C_MASK BIT(1) +#define S2MPG10_IRQ_TSD_MASK BIT(2) +#define S2MPG10_IRQ_PIF_TIMEOUT1_MASK BIT(3) +#define S2MPG10_IRQ_PIF_TIMEOUT2_MASK BIT(4) +#define S2MPG10_IRQ_SPD_PARITY_ERR_MASK BIT(5) +#define S2MPG10_IRQ_SPD_ABNORMAL_STOP_MASK BIT(6) +#define S2MPG10_IRQ_PMETER_OVERF_MASK BIT(7) + + S2MPG10_IRQ_OCP_B1M, + S2MPG10_IRQ_OCP_B2M, + S2MPG10_IRQ_OCP_B3M, + S2MPG10_IRQ_OCP_B4M, + S2MPG10_IRQ_OCP_B5M, + S2MPG10_IRQ_OCP_B6M, + S2MPG10_IRQ_OCP_B7M, + S2MPG10_IRQ_OCP_B8M, +#define S2MPG10_IRQ_OCP_B1M_MASK BIT(0) +#define S2MPG10_IRQ_OCP_B2M_MASK BIT(1) +#define S2MPG10_IRQ_OCP_B3M_MASK BIT(2) +#define S2MPG10_IRQ_OCP_B4M_MASK BIT(3) +#define S2MPG10_IRQ_OCP_B5M_MASK BIT(4) +#define S2MPG10_IRQ_OCP_B6M_MASK BIT(5) +#define S2MPG10_IRQ_OCP_B7M_MASK BIT(6) +#define S2MPG10_IRQ_OCP_B8M_MASK BIT(7) + + S2MPG10_IRQ_OCP_B9M, + S2MPG10_IRQ_OCP_B10M, + S2MPG10_IRQ_WLWP_ACC, + S2MPG10_IRQ_SMPL_TIMEOUT, + S2MPG10_IRQ_WTSR_TIMEOUT, + S2MPG10_IRQ_SPD_SRP_PKT_RST, +#define S2MPG10_IRQ_OCP_B9M_MASK BIT(0) +#define S2MPG10_IRQ_OCP_B10M_MASK BIT(1) +#define S2MPG10_IRQ_WLWP_ACC_MASK BIT(2) +#define S2MPG10_IRQ_SMPL_TIMEOUT_MASK BIT(5) +#define S2MPG10_IRQ_WTSR_TIMEOUT_MASK BIT(6) +#define S2MPG10_IRQ_SPD_SRP_PKT_RST_MASK BIT(7) + + S2MPG10_IRQ_PWR_WARN_CH0, + S2MPG10_IRQ_PWR_WARN_CH1, + S2MPG10_IRQ_PWR_WARN_CH2, + S2MPG10_IRQ_PWR_WARN_CH3, + S2MPG10_IRQ_PWR_WARN_CH4, + S2MPG10_IRQ_PWR_WARN_CH5, + S2MPG10_IRQ_PWR_WARN_CH6, + S2MPG10_IRQ_PWR_WARN_CH7, +#define S2MPG10_IRQ_PWR_WARN_CH0_MASK BIT(0) +#define S2MPG10_IRQ_PWR_WARN_CH1_MASK BIT(1) +#define S2MPG10_IRQ_PWR_WARN_CH2_MASK BIT(2) +#define S2MPG10_IRQ_PWR_WARN_CH3_MASK BIT(3) +#define S2MPG10_IRQ_PWR_WARN_CH4_MASK BIT(4) +#define S2MPG10_IRQ_PWR_WARN_CH5_MASK BIT(5) +#define S2MPG10_IRQ_PWR_WARN_CH6_MASK BIT(6) +#define S2MPG10_IRQ_PWR_WARN_CH7_MASK BIT(7) + + S2MPG10_IRQ_NR, +}; + enum s2mps11_irq { S2MPS11_IRQ_PWRONF, S2MPS11_IRQ_PWRONR, diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h index 0204decfc9aa..51c4239a1fa6 100644 --- a/include/linux/mfd/samsung/rtc.h +++ b/include/linux/mfd/samsung/rtc.h @@ -72,6 +72,37 @@ enum s2mps_rtc_reg { S2MPS_RTC_REG_MAX, }; +enum s2mpg10_rtc_reg { + S2MPG10_RTC_CTRL, + S2MPG10_RTC_UPDATE, + S2MPG10_RTC_SMPL, + S2MPG10_RTC_WTSR, + S2MPG10_RTC_CAP_SEL, + S2MPG10_RTC_MSEC, + S2MPG10_RTC_SEC, + S2MPG10_RTC_MIN, + S2MPG10_RTC_HOUR, + S2MPG10_RTC_WEEK, + S2MPG10_RTC_DAY, + S2MPG10_RTC_MON, + S2MPG10_RTC_YEAR, + S2MPG10_RTC_A0SEC, + S2MPG10_RTC_A0MIN, + S2MPG10_RTC_A0HOUR, + S2MPG10_RTC_A0WEEK, + S2MPG10_RTC_A0DAY, + S2MPG10_RTC_A0MON, + S2MPG10_RTC_A0YEAR, + S2MPG10_RTC_A1SEC, + S2MPG10_RTC_A1MIN, + S2MPG10_RTC_A1HOUR, + S2MPG10_RTC_A1WEEK, + S2MPG10_RTC_A1DAY, + S2MPG10_RTC_A1MON, + S2MPG10_RTC_A1YEAR, + S2MPG10_RTC_OSC_CTRL, +}; + #define RTC_I2C_ADDR (0x0C >> 1) #define HOUR_12 (1 << 7) @@ -124,10 +155,16 @@ enum s2mps_rtc_reg { #define ALARM_ENABLE_SHIFT 7 #define ALARM_ENABLE_MASK (1 << ALARM_ENABLE_SHIFT) +/* WTSR & SMPL registers */ #define SMPL_ENABLE_SHIFT 7 #define SMPL_ENABLE_MASK (1 << SMPL_ENABLE_SHIFT) #define WTSR_ENABLE_SHIFT 6 #define WTSR_ENABLE_MASK (1 << WTSR_ENABLE_SHIFT) +#define S2MPG10_WTSR_COLDTIMER GENMASK(6, 5) +#define S2MPG10_WTSR_COLDRST BIT(4) +#define S2MPG10_WTSR_WTSRT GENMASK(3, 1) +#define S2MPG10_WTSR_WTSR_EN BIT(0) + #endif /* __LINUX_MFD_SEC_RTC_H */ diff --git a/include/linux/mfd/samsung/s2mpg10.h b/include/linux/mfd/samsung/s2mpg10.h new file mode 100644 index 000000000000..9f5919b89a3c --- /dev/null +++ b/include/linux/mfd/samsung/s2mpg10.h @@ -0,0 +1,454 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright 2015 Samsung Electronics + * Copyright 2020 Google Inc + * Copyright 2025 Linaro Ltd. + */ + +#ifndef __LINUX_MFD_S2MPG10_H +#define __LINUX_MFD_S2MPG10_H + +/* Common registers (type 0x000) */ +enum s2mpg10_common_reg { + S2MPG10_COMMON_CHIPID, + S2MPG10_COMMON_INT, + S2MPG10_COMMON_INT_MASK, + S2MPG10_COMMON_SPD_CTRL1 = 0x0a, + S2MPG10_COMMON_SPD_CTRL2, + S2MPG10_COMMON_SPD_CTRL3, + S2MPG10_COMMON_MON1SEL = 0x1a, + S2MPG10_COMMON_MON2SEL, + S2MPG10_COMMON_MONR, + S2MPG10_COMMON_DEBUG_CTRL1, + S2MPG10_COMMON_DEBUG_CTRL2, + S2MPG10_COMMON_DEBUG_CTRL3, + S2MPG10_COMMON_DEBUG_CTRL4, + S2MPG10_COMMON_DEBUG_CTRL5, + S2MPG10_COMMON_DEBUG_CTRL6, + S2MPG10_COMMON_DEBUG_CTRL7, + S2MPG10_COMMON_DEBUG_CTRL8, + S2MPG10_COMMON_TEST_MODE1, + S2MPG10_COMMON_TEST_MODE2, + S2MPG10_COMMON_SPD_DEBUG1, + S2MPG10_COMMON_SPD_DEBUG2, + S2MPG10_COMMON_SPD_DEBUG3, + S2MPG10_COMMON_SPD_DEBUG4, +}; + +/* For S2MPG10_COMMON_INT and S2MPG10_COMMON_INT_MASK */ +#define S2MPG10_COMMON_INT_SRC GENMASK(7, 0) +#define S2MPG10_COMMON_INT_SRC_PMIC BIT(0) + +/* PMIC registers (type 0x100) */ +enum s2mpg10_pmic_reg { + S2MPG10_PMIC_INT1, + S2MPG10_PMIC_INT2, + S2MPG10_PMIC_INT3, + S2MPG10_PMIC_INT4, + S2MPG10_PMIC_INT5, + S2MPG10_PMIC_INT6, + S2MPG10_PMIC_INT1M, + S2MPG10_PMIC_INT2M, + S2MPG10_PMIC_INT3M, + S2MPG10_PMIC_INT4M, + S2MPG10_PMIC_INT5M, + S2MPG10_PMIC_INT6M, + S2MPG10_PMIC_STATUS1, + S2MPG10_PMIC_STATUS2, + S2MPG10_PMIC_PWRONSRC, + S2MPG10_PMIC_OFFSRC, + S2MPG10_PMIC_BU_CHG, + S2MPG10_PMIC_RTCBUF, + S2MPG10_PMIC_COMMON_CTRL1, + S2MPG10_PMIC_COMMON_CTRL2, + S2MPG10_PMIC_COMMON_CTRL3, + S2MPG10_PMIC_COMMON_CTRL4, + S2MPG10_PMIC_SMPL_WARN_CTRL, + S2MPG10_PMIC_MIMICKING_CTRL, + S2MPG10_PMIC_B1M_CTRL, + S2MPG10_PMIC_B1M_OUT1, + S2MPG10_PMIC_B1M_OUT2, + S2MPG10_PMIC_B2M_CTRL, + S2MPG10_PMIC_B2M_OUT1, + S2MPG10_PMIC_B2M_OUT2, + S2MPG10_PMIC_B3M_CTRL, + S2MPG10_PMIC_B3M_OUT1, + S2MPG10_PMIC_B3M_OUT2, + S2MPG10_PMIC_B4M_CTRL, + S2MPG10_PMIC_B4M_OUT1, + S2MPG10_PMIC_B4M_OUT2, + S2MPG10_PMIC_B5M_CTRL, + S2MPG10_PMIC_B5M_OUT1, + S2MPG10_PMIC_B5M_OUT2, + S2MPG10_PMIC_B6M_CTRL, + S2MPG10_PMIC_B6M_OUT1, + S2MPG10_PMIC_B6M_OUT2, + S2MPG10_PMIC_B7M_CTRL, + S2MPG10_PMIC_B7M_OUT1, + S2MPG10_PMIC_B7M_OUT2, + S2MPG10_PMIC_B8M_CTRL, + S2MPG10_PMIC_B8M_OUT1, + S2MPG10_PMIC_B8M_OUT2, + S2MPG10_PMIC_B9M_CTRL, + S2MPG10_PMIC_B9M_OUT1, + S2MPG10_PMIC_B9M_OUT2, + S2MPG10_PMIC_B10M_CTRL, + S2MPG10_PMIC_B10M_OUT1, + S2MPG10_PMIC_B10M_OUT2, + S2MPG10_PMIC_BUCK1M_USONIC, + S2MPG10_PMIC_BUCK2M_USONIC, + S2MPG10_PMIC_BUCK3M_USONIC, + S2MPG10_PMIC_BUCK4M_USONIC, + S2MPG10_PMIC_BUCK5M_USONIC, + S2MPG10_PMIC_BUCK6M_USONIC, + S2MPG10_PMIC_BUCK7M_USONIC, + S2MPG10_PMIC_BUCK8M_USONIC, + S2MPG10_PMIC_BUCK9M_USONIC, + S2MPG10_PMIC_BUCK10M_USONIC, + S2MPG10_PMIC_L1M_CTRL, + S2MPG10_PMIC_L2M_CTRL, + S2MPG10_PMIC_L3M_CTRL, + S2MPG10_PMIC_L4M_CTRL, + S2MPG10_PMIC_L5M_CTRL, + S2MPG10_PMIC_L6M_CTRL, + S2MPG10_PMIC_L7M_CTRL, + S2MPG10_PMIC_L8M_CTRL, + S2MPG10_PMIC_L9M_CTRL, + S2MPG10_PMIC_L10M_CTRL, + S2MPG10_PMIC_L11M_CTRL1, + S2MPG10_PMIC_L11M_CTRL2, + S2MPG10_PMIC_L12M_CTRL1, + S2MPG10_PMIC_L12M_CTRL2, + S2MPG10_PMIC_L13M_CTRL1, + S2MPG10_PMIC_L13M_CTRL2, + S2MPG10_PMIC_L14M_CTRL, + S2MPG10_PMIC_L15M_CTRL1, + S2MPG10_PMIC_L15M_CTRL2, + S2MPG10_PMIC_L16M_CTRL, + S2MPG10_PMIC_L17M_CTRL, + S2MPG10_PMIC_L18M_CTRL, + S2MPG10_PMIC_L19M_CTRL, + S2MPG10_PMIC_L20M_CTRL, + S2MPG10_PMIC_L21M_CTRL, + S2MPG10_PMIC_L22M_CTRL, + S2MPG10_PMIC_L23M_CTRL, + S2MPG10_PMIC_L24M_CTRL, + S2MPG10_PMIC_L25M_CTRL, + S2MPG10_PMIC_L26M_CTRL, + S2MPG10_PMIC_L27M_CTRL, + S2MPG10_PMIC_L28M_CTRL, + S2MPG10_PMIC_L29M_CTRL, + S2MPG10_PMIC_L30M_CTRL, + S2MPG10_PMIC_L31M_CTRL, + S2MPG10_PMIC_LDO_CTRL1, + S2MPG10_PMIC_LDO_CTRL2, + S2MPG10_PMIC_LDO_DSCH1, + S2MPG10_PMIC_LDO_DSCH2, + S2MPG10_PMIC_LDO_DSCH3, + S2MPG10_PMIC_LDO_DSCH4, + S2MPG10_PMIC_LDO_BUCK7M_HLIMIT, + S2MPG10_PMIC_LDO_BUCK7M_LLIMIT, + S2MPG10_PMIC_LDO_LDO21M_HLIMIT, + S2MPG10_PMIC_LDO_LDO21M_LLIMIT, + S2MPG10_PMIC_LDO_LDO11M_HLIMIT, + S2MPG10_PMIC_DVS_RAMP1, + S2MPG10_PMIC_DVS_RAMP2, + S2MPG10_PMIC_DVS_RAMP3, + S2MPG10_PMIC_DVS_RAMP4, + S2MPG10_PMIC_DVS_RAMP5, + S2MPG10_PMIC_DVS_RAMP6, + S2MPG10_PMIC_DVS_SYNC_CTRL1, + S2MPG10_PMIC_DVS_SYNC_CTRL2, + S2MPG10_PMIC_DVS_SYNC_CTRL3, + S2MPG10_PMIC_DVS_SYNC_CTRL4, + S2MPG10_PMIC_DVS_SYNC_CTRL5, + S2MPG10_PMIC_DVS_SYNC_CTRL6, + S2MPG10_PMIC_OFF_CTRL1, + S2MPG10_PMIC_OFF_CTRL2, + S2MPG10_PMIC_OFF_CTRL3, + S2MPG10_PMIC_OFF_CTRL4, + S2MPG10_PMIC_SEQ_CTRL1, + S2MPG10_PMIC_SEQ_CTRL2, + S2MPG10_PMIC_SEQ_CTRL3, + S2MPG10_PMIC_SEQ_CTRL4, + S2MPG10_PMIC_SEQ_CTRL5, + S2MPG10_PMIC_SEQ_CTRL6, + S2MPG10_PMIC_SEQ_CTRL7, + S2MPG10_PMIC_SEQ_CTRL8, + S2MPG10_PMIC_SEQ_CTRL9, + S2MPG10_PMIC_SEQ_CTRL10, + S2MPG10_PMIC_SEQ_CTRL11, + S2MPG10_PMIC_SEQ_CTRL12, + S2MPG10_PMIC_SEQ_CTRL13, + S2MPG10_PMIC_SEQ_CTRL14, + S2MPG10_PMIC_SEQ_CTRL15, + S2MPG10_PMIC_SEQ_CTRL16, + S2MPG10_PMIC_SEQ_CTRL17, + S2MPG10_PMIC_SEQ_CTRL18, + S2MPG10_PMIC_SEQ_CTRL19, + S2MPG10_PMIC_SEQ_CTRL20, + S2MPG10_PMIC_SEQ_CTRL21, + S2MPG10_PMIC_SEQ_CTRL22, + S2MPG10_PMIC_SEQ_CTRL23, + S2MPG10_PMIC_SEQ_CTRL24, + S2MPG10_PMIC_SEQ_CTRL25, + S2MPG10_PMIC_SEQ_CTRL26, + S2MPG10_PMIC_SEQ_CTRL27, + S2MPG10_PMIC_SEQ_CTRL28, + S2MPG10_PMIC_SEQ_CTRL29, + S2MPG10_PMIC_SEQ_CTRL30, + S2MPG10_PMIC_SEQ_CTRL31, + S2MPG10_PMIC_SEQ_CTRL32, + S2MPG10_PMIC_SEQ_CTRL33, + S2MPG10_PMIC_SEQ_CTRL34, + S2MPG10_PMIC_SEQ_CTRL35, + S2MPG10_PMIC_OFF_SEQ_CTRL1, + S2MPG10_PMIC_OFF_SEQ_CTRL2, + S2MPG10_PMIC_OFF_SEQ_CTRL3, + S2MPG10_PMIC_OFF_SEQ_CTRL4, + S2MPG10_PMIC_OFF_SEQ_CTRL5, + S2MPG10_PMIC_OFF_SEQ_CTRL6, + S2MPG10_PMIC_OFF_SEQ_CTRL7, + S2MPG10_PMIC_OFF_SEQ_CTRL8, + S2MPG10_PMIC_OFF_SEQ_CTRL9, + S2MPG10_PMIC_OFF_SEQ_CTRL10, + S2MPG10_PMIC_OFF_SEQ_CTRL11, + S2MPG10_PMIC_OFF_SEQ_CTRL12, + S2MPG10_PMIC_OFF_SEQ_CTRL13, + S2MPG10_PMIC_OFF_SEQ_CTRL14, + S2MPG10_PMIC_OFF_SEQ_CTRL15, + S2MPG10_PMIC_OFF_SEQ_CTRL16, + S2MPG10_PMIC_OFF_SEQ_CTRL17, + S2MPG10_PMIC_OFF_SEQ_CTRL18, + S2MPG10_PMIC_PCTRLSEL1, + S2MPG10_PMIC_PCTRLSEL2, + S2MPG10_PMIC_PCTRLSEL3, + S2MPG10_PMIC_PCTRLSEL4, + S2MPG10_PMIC_PCTRLSEL5, + S2MPG10_PMIC_PCTRLSEL6, + S2MPG10_PMIC_PCTRLSEL7, + S2MPG10_PMIC_PCTRLSEL8, + S2MPG10_PMIC_PCTRLSEL9, + S2MPG10_PMIC_PCTRLSEL10, + S2MPG10_PMIC_PCTRLSEL11, + S2MPG10_PMIC_PCTRLSEL12, + S2MPG10_PMIC_PCTRLSEL13, + S2MPG10_PMIC_DCTRLSEL1, + S2MPG10_PMIC_DCTRLSEL2, + S2MPG10_PMIC_DCTRLSEL3, + S2MPG10_PMIC_DCTRLSEL4, + S2MPG10_PMIC_DCTRLSEL5, + S2MPG10_PMIC_DCTRLSEL6, + S2MPG10_PMIC_DCTRLSEL7, + S2MPG10_PMIC_GPIO_CTRL1, + S2MPG10_PMIC_GPIO_CTRL2, + S2MPG10_PMIC_GPIO_CTRL3, + S2MPG10_PMIC_GPIO_CTRL4, + S2MPG10_PMIC_GPIO_CTRL5, + S2MPG10_PMIC_GPIO_CTRL6, + S2MPG10_PMIC_GPIO_CTRL7, + S2MPG10_PMIC_B2M_OCP_WARN, + S2MPG10_PMIC_B2M_OCP_WARN_X, + S2MPG10_PMIC_B2M_OCP_WARN_Y, + S2MPG10_PMIC_B2M_OCP_WARN_Z, + S2MPG10_PMIC_B3M_OCP_WARN, + S2MPG10_PMIC_B3M_OCP_WARN_X, + S2MPG10_PMIC_B3M_OCP_WARN_Y, + S2MPG10_PMIC_B3M_OCP_WARN_Z, + S2MPG10_PMIC_B10M_OCP_WARN, + S2MPG10_PMIC_B10M_OCP_WARN_X, + S2MPG10_PMIC_B10M_OCP_WARN_Y, + S2MPG10_PMIC_B10M_OCP_WARN_Z, + S2MPG10_PMIC_B2M_SOFT_OCP_WARN, + S2MPG10_PMIC_B2M_SOFT_OCP_WARN_X, + S2MPG10_PMIC_B2M_SOFT_OCP_WARN_Y, + S2MPG10_PMIC_B2M_SOFT_OCP_WARN_Z, + S2MPG10_PMIC_B3M_SOFT_OCP_WARN, + S2MPG10_PMIC_B3M_SOFT_OCP_WARN_X, + S2MPG10_PMIC_B3M_SOFT_OCP_WARN_Y, + S2MPG10_PMIC_B3M_SOFT_OCP_WARN_Z, + S2MPG10_PMIC_B10M_SOFT_OCP_WARN, + S2MPG10_PMIC_B10M_SOFT_OCP_WARN_X, + S2MPG10_PMIC_B10M_SOFT_OCP_WARN_Y, + S2MPG10_PMIC_B10M_SOFT_OCP_WARN_Z, + S2MPG10_PMIC_BUCK_OCP_EN1, + S2MPG10_PMIC_BUCK_OCP_EN2, + S2MPG10_PMIC_BUCK_OCP_PD_EN1, + S2MPG10_PMIC_BUCK_OCP_PD_EN2, + S2MPG10_PMIC_BUCK_OCP_CTRL1, + S2MPG10_PMIC_BUCK_OCP_CTRL2, + S2MPG10_PMIC_BUCK_OCP_CTRL3, + S2MPG10_PMIC_BUCK_OCP_CTRL4, + S2MPG10_PMIC_BUCK_OCP_CTRL5, + S2MPG10_PMIC_PIF_CTRL, + S2MPG10_PMIC_BUCK_HR_MODE1, + S2MPG10_PMIC_BUCK_HR_MODE2, + S2MPG10_PMIC_FAULTOUT_CTRL, + S2MPG10_PMIC_LDO_SENSE1, + S2MPG10_PMIC_LDO_SENSE2, + S2MPG10_PMIC_LDO_SENSE3, + S2MPG10_PMIC_LDO_SENSE4, +}; + +/* Meter registers (type 0xa00) */ +enum s2mpg10_meter_reg { + S2MPG10_METER_CTRL1, + S2MPG10_METER_CTRL2, + S2MPG10_METER_CTRL3, + S2MPG10_METER_CTRL4, + S2MPG10_METER_BUCKEN1, + S2MPG10_METER_BUCKEN2, + S2MPG10_METER_MUXSEL0, + S2MPG10_METER_MUXSEL1, + S2MPG10_METER_MUXSEL2, + S2MPG10_METER_MUXSEL3, + S2MPG10_METER_MUXSEL4, + S2MPG10_METER_MUXSEL5, + S2MPG10_METER_MUXSEL6, + S2MPG10_METER_MUXSEL7, + S2MPG10_METER_LPF_C0_0, + S2MPG10_METER_LPF_C0_1, + S2MPG10_METER_LPF_C0_2, + S2MPG10_METER_LPF_C0_3, + S2MPG10_METER_LPF_C0_4, + S2MPG10_METER_LPF_C0_5, + S2MPG10_METER_LPF_C0_6, + S2MPG10_METER_LPF_C0_7, + S2MPG10_METER_PWR_WARN0, + S2MPG10_METER_PWR_WARN1, + S2MPG10_METER_PWR_WARN2, + S2MPG10_METER_PWR_WARN3, + S2MPG10_METER_PWR_WARN4, + S2MPG10_METER_PWR_WARN5, + S2MPG10_METER_PWR_WARN6, + S2MPG10_METER_PWR_WARN7, + S2MPG10_METER_PWR_HYS1, + S2MPG10_METER_PWR_HYS2, + S2MPG10_METER_PWR_HYS3, + S2MPG10_METER_PWR_HYS4, + S2MPG10_METER_ACC_DATA_CH0_1 = 0x40, + S2MPG10_METER_ACC_DATA_CH0_2, + S2MPG10_METER_ACC_DATA_CH0_3, + S2MPG10_METER_ACC_DATA_CH0_4, + S2MPG10_METER_ACC_DATA_CH0_5, + S2MPG10_METER_ACC_DATA_CH0_6, + S2MPG10_METER_ACC_DATA_CH1_1, + S2MPG10_METER_ACC_DATA_CH1_2, + S2MPG10_METER_ACC_DATA_CH1_3, + S2MPG10_METER_ACC_DATA_CH1_4, + S2MPG10_METER_ACC_DATA_CH1_5, + S2MPG10_METER_ACC_DATA_CH1_6, + S2MPG10_METER_ACC_DATA_CH2_1, + S2MPG10_METER_ACC_DATA_CH2_2, + S2MPG10_METER_ACC_DATA_CH2_3, + S2MPG10_METER_ACC_DATA_CH2_4, + S2MPG10_METER_ACC_DATA_CH2_5, + S2MPG10_METER_ACC_DATA_CH2_6, + S2MPG10_METER_ACC_DATA_CH3_1, + S2MPG10_METER_ACC_DATA_CH3_2, + S2MPG10_METER_ACC_DATA_CH3_3, + S2MPG10_METER_ACC_DATA_CH3_4, + S2MPG10_METER_ACC_DATA_CH3_5, + S2MPG10_METER_ACC_DATA_CH3_6, + S2MPG10_METER_ACC_DATA_CH4_1, + S2MPG10_METER_ACC_DATA_CH4_2, + S2MPG10_METER_ACC_DATA_CH4_3, + S2MPG10_METER_ACC_DATA_CH4_4, + S2MPG10_METER_ACC_DATA_CH4_5, + S2MPG10_METER_ACC_DATA_CH4_6, + S2MPG10_METER_ACC_DATA_CH5_1, + S2MPG10_METER_ACC_DATA_CH5_2, + S2MPG10_METER_ACC_DATA_CH5_3, + S2MPG10_METER_ACC_DATA_CH5_4, + S2MPG10_METER_ACC_DATA_CH5_5, + S2MPG10_METER_ACC_DATA_CH5_6, + S2MPG10_METER_ACC_DATA_CH6_1, + S2MPG10_METER_ACC_DATA_CH6_2, + S2MPG10_METER_ACC_DATA_CH6_3, + S2MPG10_METER_ACC_DATA_CH6_4, + S2MPG10_METER_ACC_DATA_CH6_5, + S2MPG10_METER_ACC_DATA_CH6_6, + S2MPG10_METER_ACC_DATA_CH7_1, + S2MPG10_METER_ACC_DATA_CH7_2, + S2MPG10_METER_ACC_DATA_CH7_3, + S2MPG10_METER_ACC_DATA_CH7_4, + S2MPG10_METER_ACC_DATA_CH7_5, + S2MPG10_METER_ACC_DATA_CH7_6, + S2MPG10_METER_ACC_COUNT_1, + S2MPG10_METER_ACC_COUNT_2, + S2MPG10_METER_ACC_COUNT_3, + S2MPG10_METER_LPF_DATA_CH0_1, + S2MPG10_METER_LPF_DATA_CH0_2, + S2MPG10_METER_LPF_DATA_CH0_3, + S2MPG10_METER_LPF_DATA_CH1_1, + S2MPG10_METER_LPF_DATA_CH1_2, + S2MPG10_METER_LPF_DATA_CH1_3, + S2MPG10_METER_LPF_DATA_CH2_1, + S2MPG10_METER_LPF_DATA_CH2_2, + S2MPG10_METER_LPF_DATA_CH2_3, + S2MPG10_METER_LPF_DATA_CH3_1, + S2MPG10_METER_LPF_DATA_CH3_2, + S2MPG10_METER_LPF_DATA_CH3_3, + S2MPG10_METER_LPF_DATA_CH4_1, + S2MPG10_METER_LPF_DATA_CH4_2, + S2MPG10_METER_LPF_DATA_CH4_3, + S2MPG10_METER_LPF_DATA_CH5_1, + S2MPG10_METER_LPF_DATA_CH5_2, + S2MPG10_METER_LPF_DATA_CH5_3, + S2MPG10_METER_LPF_DATA_CH6_1, + S2MPG10_METER_LPF_DATA_CH6_2, + S2MPG10_METER_LPF_DATA_CH6_3, + S2MPG10_METER_LPF_DATA_CH7_1, + S2MPG10_METER_LPF_DATA_CH7_2, + S2MPG10_METER_LPF_DATA_CH7_3, + S2MPG10_METER_DSM_TRIM_OFFSET = 0xee, + S2MPG10_METER_BUCK_METER_TRIM3 = 0xf1, +}; + +/* S2MPG10 regulator IDs */ +enum s2mpg10_regulators { + S2MPG10_LDO1, + S2MPG10_LDO2, + S2MPG10_LDO3, + S2MPG10_LDO4, + S2MPG10_LDO5, + S2MPG10_LDO6, + S2MPG10_LDO7, + S2MPG10_LDO8, + S2MPG10_LDO9, + S2MPG10_LDO10, + S2MPG10_LDO11, + S2MPG10_LDO12, + S2MPG10_LDO13, + S2MPG10_LDO14, + S2MPG10_LDO15, + S2MPG10_LDO16, + S2MPG10_LDO17, + S2MPG10_LDO18, + S2MPG10_LDO19, + S2MPG10_LDO20, + S2MPG10_LDO21, + S2MPG10_LDO22, + S2MPG10_LDO23, + S2MPG10_LDO24, + S2MPG10_LDO25, + S2MPG10_LDO26, + S2MPG10_LDO27, + S2MPG10_LDO28, + S2MPG10_LDO29, + S2MPG10_LDO30, + S2MPG10_LDO31, + S2MPG10_BUCK1, + S2MPG10_BUCK2, + S2MPG10_BUCK3, + S2MPG10_BUCK4, + S2MPG10_BUCK5, + S2MPG10_BUCK6, + S2MPG10_BUCK7, + S2MPG10_BUCK8, + S2MPG10_BUCK9, + S2MPG10_BUCK10, + S2MPG10_REGULATOR_MAX, +}; + +#endif /* __LINUX_MFD_S2MPG10_H */ -- cgit v1.2.3 From adf91d9e1983dec3d5fabc273e8ff89918b852c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 9 Apr 2025 21:37:39 +0100 Subject: mfd: sec: Change device_type to int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that sec-i2c doesn't match device type by pointer casting anymore, we can switch the device type from unsigned long to int easily. This saves a few bytes in struct sec_pmic_dev due to member alignment. Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250409-s2mpg10-v4-18-d66d5f39b6bf@linaro.org Signed-off-by: Lee Jones --- include/linux/mfd/samsung/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index c1102324172a..d785e101fe79 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -67,7 +67,7 @@ struct sec_pmic_dev { struct regmap *regmap_pmic; struct i2c_client *i2c; - unsigned long device_type; + int device_type; int irq; struct regmap_irq_chip_data *irq_data; }; -- cgit v1.2.3 From 34c5f34df95f71fc500ff0e942210ebc97d2ef65 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 9 May 2025 18:35:20 +0100 Subject: mfd: sm501: Remove unused sm501_find_clock sm501_find_clock() was added in 2007 as part of commit b6d6454fdb66 ("[PATCH] mfd: SM501 core driver") but hasn't been used. Remove it. Signed-off-by: "Dr. David Alan Gilbert" Link: https://lore.kernel.org/r/20250509173521.49596-1-linux@treblig.org Signed-off-by: Lee Jones --- include/linux/sm501.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sm501.h b/include/linux/sm501.h index 2f3488b2875d..bcda27a46e7a 100644 --- a/include/linux/sm501.h +++ b/include/linux/sm501.h @@ -12,9 +12,6 @@ extern int sm501_unit_power(struct device *dev, extern unsigned long sm501_set_clock(struct device *dev, int clksrc, unsigned long freq); -extern unsigned long sm501_find_clock(struct device *dev, - int clksrc, unsigned long req_freq); - /* sm501_misc_control * * Modify the SM501's MISC_CONTROL register -- cgit v1.2.3 From db26d62d79e4068934ad0dccdb92715df36352b9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 May 2025 08:57:52 +0100 Subject: netfs: Fix undifferentiation of DIO reads from unbuffered reads On cifs, "DIO reads" (specified by O_DIRECT) need to be differentiated from "unbuffered reads" (specified by cache=none in the mount parameters). The difference is flagged in the protocol and the server may behave differently: Windows Server will, for example, mandate that DIO reads are block aligned. Fix this by adding a NETFS_UNBUFFERED_READ to differentiate this from NETFS_DIO_READ, parallelling the write differentiation that already exists. cifs will then do the right thing. Fixes: 016dc8516aec ("netfs: Implement unbuffered/DIO read support") Signed-off-by: David Howells Link: https://lore.kernel.org/3444961.1747987072@warthog.procyon.org.uk Reviewed-by: "Paulo Alcantara (Red Hat)" Reviewed-by: Viacheslav Dubeyko cc: Steve French cc: netfs@lists.linux.dev cc: v9fs@lists.linux.dev cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-nfs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 + include/trace/events/netfs.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 7a649cfedc09..065c17385e53 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -203,6 +203,7 @@ enum netfs_io_origin { NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */ NETFS_READ_SINGLE, /* This read should be treated as a single object */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ + NETFS_UNBUFFERED_READ, /* This is an unbuffered read */ NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index d7ceae7e15c1..333d2e38dd2c 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -39,6 +39,7 @@ EM(NETFS_READ_GAPS, "RG") \ EM(NETFS_READ_SINGLE, "R1") \ EM(NETFS_READ_FOR_WRITE, "RW") \ + EM(NETFS_UNBUFFERED_READ, "UR") \ EM(NETFS_DIO_READ, "DR") \ EM(NETFS_WRITEBACK, "WB") \ EM(NETFS_WRITEBACK_SINGLE, "W1") \ -- cgit v1.2.3 From 4e83ae6ec87dddac070ba349d3b839589b1bb957 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 23 May 2025 10:47:06 +0200 Subject: mips, net: ensure that SOCK_COREDUMP is defined For historical reasons mips has to override the socket enum values but the defines are all the same. So simply move the ARCH_HAS_SOCKET_TYPES scope. Fixes: a9194f88782a ("coredump: add coredump socket") Suggested-by: Arnd Bergmann Signed-off-by: Christian Brauner --- include/linux/net.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 139c85d0f2ea..f60fff91e1df 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -70,6 +70,7 @@ enum sock_type { SOCK_DCCP = 6, SOCK_PACKET = 10, }; +#endif /* ARCH_HAS_SOCKET_TYPES */ #define SOCK_MAX (SOCK_PACKET + 1) /* Mask which covers at least up to SOCK_MASK-1. The @@ -83,8 +84,6 @@ enum sock_type { #endif #define SOCK_COREDUMP O_NOCTTY -#endif /* ARCH_HAS_SOCKET_TYPES */ - /** * enum sock_shutdown_cmd - Shutdown types * @SHUT_RD: shutdown receptions -- cgit v1.2.3 From 35de409aa30269a3b106fe957a95f7a2b7e21a60 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 23 May 2025 17:20:59 +0800 Subject: Revert "crypto: testmgr - Add hash export format testing" This reverts commit 18c438b228558e05ede7dccf947a6547516fc0c7. The s390 hmac and sha3 algorithms are failing the test. Revert the change until they have been fixed. Reported-by: Ingo Franzki Link: https://lore.kernel.org/all/623a7fcb-b4cb-48e6-9833-57ad2b32a252@linux.ibm.com/ Signed-off-by: Herbert Xu --- include/crypto/internal/hash.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index f052afa6e7b0..0f85c543f80b 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -91,12 +91,6 @@ static inline bool crypto_hash_alg_needs_key(struct hash_alg_common *alg) !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); } -static inline bool crypto_hash_no_export_core(struct crypto_ahash *tfm) -{ - return crypto_hash_alg_common(tfm)->base.cra_flags & - CRYPTO_AHASH_ALG_NO_EXPORT_CORE; -} - int crypto_grab_ahash(struct crypto_ahash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); -- cgit v1.2.3 From 38b95d588f8fd07027ad8dbca3e1d2b5c13413ae Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:54 -0700 Subject: scm: Move scm_recv() from scm.h to scm.c. scm_recv() has been placed in scm.h since the pre-git era for no particular reason (I think), which makes the file really fragile. For example, when you move SOCK_PASSCRED from include/linux/net.h to enum sock_flags in include/net/sock.h, you will see weird build failure due to terrible dependency. To avoid the build failure in the future, let's move scm_recv(_unix())? and its callees to scm.c. Note that only scm_recv() needs to be exported for Bluetooth. scm_send() should be moved to scm.c too, but I'll revisit later. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/scm.h | 121 ++---------------------------------------------------- 1 file changed, 4 insertions(+), 117 deletions(-) (limited to 'include') diff --git a/include/net/scm.h b/include/net/scm.h index 22bb49589fde..84c4707e78a5 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -102,123 +102,10 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, return __scm_send(sock, msg, scm); } -#ifdef CONFIG_SECURITY_NETWORK -static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) -{ - struct lsm_context ctx; - int err; - - if (test_bit(SOCK_PASSSEC, &sock->flags)) { - err = security_secid_to_secctx(scm->secid, &ctx); - - if (err >= 0) { - put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, ctx.len, - ctx.context); - security_release_secctx(&ctx); - } - } -} - -static inline bool scm_has_secdata(struct socket *sock) -{ - return test_bit(SOCK_PASSSEC, &sock->flags); -} -#else -static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) -{ } - -static inline bool scm_has_secdata(struct socket *sock) -{ - return false; -} -#endif /* CONFIG_SECURITY_NETWORK */ - -static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) -{ - struct file *pidfd_file = NULL; - int len, pidfd; - - /* put_cmsg() doesn't return an error if CMSG is truncated, - * that's why we need to opencode these checks here. - */ - if (msg->msg_flags & MSG_CMSG_COMPAT) - len = sizeof(struct compat_cmsghdr) + sizeof(int); - else - len = sizeof(struct cmsghdr) + sizeof(int); - - if (msg->msg_controllen < len) { - msg->msg_flags |= MSG_CTRUNC; - return; - } - - if (!scm->pid) - return; - - pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file); - - if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { - if (pidfd_file) { - put_unused_fd(pidfd); - fput(pidfd_file); - } - - return; - } - - if (pidfd_file) - fd_install(pidfd, pidfd_file); -} - -static inline bool __scm_recv_common(struct socket *sock, struct msghdr *msg, - struct scm_cookie *scm, int flags) -{ - if (!msg->msg_control) { - if (test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags) || - scm->fp || scm_has_secdata(sock)) - msg->msg_flags |= MSG_CTRUNC; - scm_destroy(scm); - return false; - } - - if (test_bit(SOCK_PASSCRED, &sock->flags)) { - struct user_namespace *current_ns = current_user_ns(); - struct ucred ucreds = { - .pid = scm->creds.pid, - .uid = from_kuid_munged(current_ns, scm->creds.uid), - .gid = from_kgid_munged(current_ns, scm->creds.gid), - }; - put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds); - } - - scm_passec(sock, msg, scm); - - if (scm->fp) - scm_detach_fds(msg, scm); - - return true; -} - -static inline void scm_recv(struct socket *sock, struct msghdr *msg, - struct scm_cookie *scm, int flags) -{ - if (!__scm_recv_common(sock, msg, scm, flags)) - return; - - scm_destroy_cred(scm); -} - -static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg, - struct scm_cookie *scm, int flags) -{ - if (!__scm_recv_common(sock, msg, scm, flags)) - return; - - if (test_bit(SOCK_PASSPIDFD, &sock->flags)) - scm_pidfd_recv(msg, scm); - - scm_destroy_cred(scm); -} +void scm_recv(struct socket *sock, struct msghdr *msg, + struct scm_cookie *scm, int flags); +void scm_recv_unix(struct socket *sock, struct msghdr *msg, + struct scm_cookie *scm, int flags); static inline int scm_recv_one_fd(struct file *f, int __user *ufd, unsigned int flags) -- cgit v1.2.3 From 7d8d93fdde50b86bbbf46a203c368ed320e729ab Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:56 -0700 Subject: net: Restrict SO_PASS{CRED,PIDFD,SEC} to AF_{UNIX,NETLINK,BLUETOOTH}. SCM_CREDENTIALS and SCM_SECURITY can be recv()ed by calling scm_recv() or scm_recv_unix(), and SCM_PIDFD is only used by scm_recv_unix(). scm_recv() is called from AF_NETLINK and AF_BLUETOOTH. scm_recv_unix() is literally called from AF_UNIX. Let's restrict SO_PASSCRED and SO_PASSSEC to such sockets and SO_PASSPIDFD to AF_UNIX only. Later, SOCK_PASS{CRED,PIDFD,SEC} will be moved to struct sock and united with another field. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 35ca6b13c6d2..483522377955 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2773,9 +2773,14 @@ static inline bool sk_is_udp(const struct sock *sk) sk->sk_protocol == IPPROTO_UDP; } +static inline bool sk_is_unix(const struct sock *sk) +{ + return sk->sk_family == AF_UNIX; +} + static inline bool sk_is_stream_unix(const struct sock *sk) { - return sk->sk_family == AF_UNIX && sk->sk_type == SOCK_STREAM; + return sk_is_unix(sk) && sk->sk_type == SOCK_STREAM; } static inline bool sk_is_vsock(const struct sock *sk) @@ -2783,6 +2788,13 @@ static inline bool sk_is_vsock(const struct sock *sk) return sk->sk_family == AF_VSOCK; } +static inline bool sk_may_scm_recv(const struct sock *sk) +{ + return (IS_ENABLED(CONFIG_UNIX) && sk->sk_family == AF_UNIX) || + sk->sk_family == AF_NETLINK || + (IS_ENABLED(CONFIG_BT) && sk->sk_family == AF_BLUETOOTH); +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from -- cgit v1.2.3 From 0e81cfd971dc4833c699dcd8924e54a5021bc4e8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:57 -0700 Subject: af_unix: Move SOCK_PASS{CRED,PIDFD,SEC} to struct sock. As explained in the next patch, SO_PASSRIGHTS would have a problem if we assigned a corresponding bit to socket->flags, so it must be managed in struct sock. Mixing socket->flags and sk->sk_flags for similar options will look confusing, and sk->sk_flags does not have enough space on 32bit system. Also, as mentioned in commit 16e572626961 ("af_unix: dont send SCM_CREDENTIALS by default"), SOCK_PASSCRED and SOCK_PASSPID handling is known to be slow, and managing the flags in struct socket cannot avoid that for embryo sockets. Let's move SOCK_PASS{CRED,PIDFD,SEC} to struct sock. While at it, other SOCK_XXX flags in net.h are grouped as enum. Note that assign_bit() was atomic, so the writer side is moved down after lock_sock() in setsockopt(), but the bit is only read once in sendmsg() and recvmsg(), so lock_sock() is not needed there. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/net.h | 15 +++++++-------- include/net/sock.h | 16 +++++++++++++++- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 0ff950eecc6b..f8418d6e33e0 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -36,14 +36,13 @@ struct net; * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected. * Eventually all flags will be in sk->sk_wq->flags. */ -#define SOCKWQ_ASYNC_NOSPACE 0 -#define SOCKWQ_ASYNC_WAITDATA 1 -#define SOCK_NOSPACE 2 -#define SOCK_PASSCRED 3 -#define SOCK_PASSSEC 4 -#define SOCK_SUPPORT_ZC 5 -#define SOCK_CUSTOM_SOCKOPT 6 -#define SOCK_PASSPIDFD 7 +enum socket_flags { + SOCKWQ_ASYNC_NOSPACE, + SOCKWQ_ASYNC_WAITDATA, + SOCK_NOSPACE, + SOCK_SUPPORT_ZC, + SOCK_CUSTOM_SOCKOPT, +}; #ifndef ARCH_HAS_SOCKET_TYPES /** diff --git a/include/net/sock.h b/include/net/sock.h index 483522377955..d90a71f66ab8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -337,6 +337,11 @@ struct sk_filter; * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags + * @sk_scm_recv_flags: all flags used by scm_recv() + * @sk_scm_credentials: flagged by SO_PASSCRED to recv SCM_CREDENTIALS + * @sk_scm_security: flagged by SO_PASSSEC to recv SCM_SECURITY + * @sk_scm_pidfd: flagged by SO_PASSPIDFD to recv SCM_PIDFD + * @sk_scm_unused: unused flags for scm_recv() * @ns_tracker: tracker for netns reference * @sk_user_frags: xarray of pages the user is holding a reference on. * @sk_owner: reference to the real owner of the socket that calls @@ -523,7 +528,16 @@ struct sock { #endif int sk_disconnects; - u8 sk_txrehash; + union { + u8 sk_txrehash; + u8 sk_scm_recv_flags; + struct { + u8 sk_scm_credentials : 1, + sk_scm_security : 1, + sk_scm_pidfd : 1, + sk_scm_unused : 5; + }; + }; u8 sk_clockid; u8 sk_txtime_deadline_mode : 1, sk_txtime_report_errors : 1, -- cgit v1.2.3 From 77cbe1a6d8730a07f99f9263c2d5f2304cf5e830 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:59 -0700 Subject: af_unix: Introduce SO_PASSRIGHTS. As long as recvmsg() or recvmmsg() is used with cmsg, it is not possible to avoid receiving file descriptors via SCM_RIGHTS. This behaviour has occasionally been flagged as problematic, as it can be (ab)used to trigger DoS during close(), for example, by passing a FUSE-controlled fd or a hung NFS fd. For instance, as noted on the uAPI Group page [0], an untrusted peer could send a file descriptor pointing to a hung NFS mount and then close it. Once the receiver calls recvmsg() with msg_control, the descriptor is automatically installed, and then the responsibility for the final close() now falls on the receiver, which may result in blocking the process for a long time. Regarding this, systemd calls cmsg_close_all() [1] after each recvmsg() to close() unwanted file descriptors sent via SCM_RIGHTS. However, this cannot work around the issue at all, because the final fput() may still occur on the receiver's side once sendmsg() with SCM_RIGHTS succeeds. Also, even filtering by LSM at recvmsg() does not work for the same reason. Thus, we need a better way to refuse SCM_RIGHTS at sendmsg(). Let's introduce SO_PASSRIGHTS to disable SCM_RIGHTS. Note that this option is enabled by default for backward compatibility. Link: https://uapi-group.org/kernel-features/#disabling-reception-of-scm_rights-for-af_unix-sockets #[0] Link: https://github.com/systemd/systemd/blob/v257.5/src/basic/fd-util.c#L612-L628 #[1] Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 4 +++- include/uapi/asm-generic/socket.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index d90a71f66ab8..92e7c1aae3cc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -341,6 +341,7 @@ struct sk_filter; * @sk_scm_credentials: flagged by SO_PASSCRED to recv SCM_CREDENTIALS * @sk_scm_security: flagged by SO_PASSSEC to recv SCM_SECURITY * @sk_scm_pidfd: flagged by SO_PASSPIDFD to recv SCM_PIDFD + * @sk_scm_rights: flagged by SO_PASSRIGHTS to recv SCM_RIGHTS * @sk_scm_unused: unused flags for scm_recv() * @ns_tracker: tracker for netns reference * @sk_user_frags: xarray of pages the user is holding a reference on. @@ -535,7 +536,8 @@ struct sock { u8 sk_scm_credentials : 1, sk_scm_security : 1, sk_scm_pidfd : 1, - sk_scm_unused : 5; + sk_scm_rights : 1, + sk_scm_unused : 4; }; }; u8 sk_clockid; diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index aa5016ff3d91..f333a0ac4ee4 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -145,6 +145,8 @@ #define SO_RCVPRIORITY 82 +#define SO_PASSRIGHTS 83 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3 From d1d89e8eee6f0e91cfad2a0375ad679fd5c1ae83 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 May 2025 15:41:46 +0200 Subject: USB: gadget: fix up const issue with struct usb_function_instance In struct usb_function, the struct usb_function_instance pointer variable "fi" is listed as const, but it is written to in numerous places, making the const marking of it a total lie. Fix this up by just removing the const pointer attribute as this is modified in numerous places. Link: https://lore.kernel.org/r/2025052145-undress-puma-f7cf@gregkh Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 6e38fb9d2117..d8c4e9f73839 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -237,7 +237,7 @@ struct usb_function { /* internals */ struct list_head list; DECLARE_BITMAP(endpoints, 32); - const struct usb_function_instance *fi; + struct usb_function_instance *fi; unsigned int bind_deactivated:1; }; -- cgit v1.2.3 From 9a119669fb1924cd9658c16da39a5a585e129e50 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 May 2025 11:38:48 +0200 Subject: netfilter: nf_tables: nft_fib: consistent l3mdev handling fib has two modes: 1. Obtain output device according to source or destination address 2. Obtain the type of the address, e.g. local, unicast, multicast. 'fib daddr type' should return 'local' if the address is configured in this netns or unicast otherwise. 'fib daddr . iif type' should return 'local' if the address is configured on the input interface or unicast otherwise, i.e. more restrictive. However, if the interface is part of a VRF, then 'fib daddr type' returns unicast even if the address is configured on the incoming interface. This is broken for both ipv4 and ipv6. In the ipv4 case, inet_dev_addr_type must only be used if the 'iif' or 'oif' (strict mode) was requested. Else inet_addr_type_dev_table() needs to be used and the correct dev argument must be passed as well so the correct fib (vrf) table is used. In the ipv6 case, the bug is similar, without strict mode, dev is NULL so .flowi6_l3mdev will be set to 0. Add a new 'nft_fib_l3mdev_master_ifindex_rcu()' helper and use that to init the .l3mdev structure member. For ipv6, use it from nft_fib6_flowi_init() which gets called from both the 'type' and the 'route' mode eval functions. This provides consistent behaviour for all modes for both ipv4 and ipv6: If strict matching is requested, the input respectively output device of the netfilter hooks is used. Otherwise, use skb->dev to obtain the l3mdev ifindex. Without this, most type checks in updated nft_fib.sh selftest fail: FAIL: did not find veth0 . 10.9.9.1 . local in fibtype4 FAIL: did not find veth0 . dead:1::1 . local in fibtype6 FAIL: did not find veth0 . dead:9::1 . local in fibtype6 FAIL: did not find tvrf . 10.0.1.1 . local in fibtype4 FAIL: did not find tvrf . 10.9.9.1 . local in fibtype4 FAIL: did not find tvrf . dead:1::1 . local in fibtype6 FAIL: did not find tvrf . dead:9::1 . local in fibtype6 FAIL: fib expression address types match (iif in vrf) (fib errounously returns 'unicast' for all of them, even though all of these addresses are local to the vrf). Fixes: f6d0cbcf09c5 ("netfilter: nf_tables: add fib expression") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_fib.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 6e202ed5e63f..7370fba844ef 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -2,6 +2,7 @@ #ifndef _NFT_FIB_H_ #define _NFT_FIB_H_ +#include #include struct nft_fib { @@ -39,6 +40,14 @@ static inline bool nft_fib_can_skip(const struct nft_pktinfo *pkt) return nft_fib_is_loopback(pkt->skb, indev); } +static inline int nft_fib_l3mdev_master_ifindex_rcu(const struct nft_pktinfo *pkt, + const struct net_device *iif) +{ + const struct net_device *dev = iif ? iif : pkt->skb->dev; + + return l3mdev_master_ifindex_rcu(dev); +} + int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset); int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); -- cgit v1.2.3 From a1f1acb9c5db9b385c9b3eb1f27f897c06df49ae Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 May 2025 12:28:44 +0200 Subject: netfilter: nf_dup{4, 6}: Move duplication check to task_struct nf_skb_duplicated is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Due to the recursion involved, the simplest change is to make it a per-task variable. Move the per-CPU variable nf_skb_duplicated to task_struct and name it in_nf_duplicate. Add it to the existing bitfield so it doesn't use additional memory. Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Valentin Schneider Acked-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 11 ----------- include/linux/sched.h | 1 + 2 files changed, 1 insertion(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2b8aac2c70ad..892d12823ed4 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -497,17 +497,6 @@ struct nf_defrag_hook { extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook; extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook; -/* - * nf_skb_duplicated - TEE target has sent a packet - * - * When a xtables target sends a packet, the OUTPUT and POSTROUTING - * hooks are traversed again, i.e. nft and xtables are invoked recursively. - * - * This is used by xtables TEE target to prevent the duplicated skb from - * being duplicated again. - */ -DECLARE_PER_CPU(bool, nf_skb_duplicated); - /* * Contains bitmask of ctnetlink event subscribers, if any. * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag. diff --git a/include/linux/sched.h b/include/linux/sched.h index f96ac1982893..52d9c52dc8f2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1044,6 +1044,7 @@ struct task_struct { /* delay due to memory thrashing */ unsigned in_thrashing:1; #endif + unsigned in_nf_duplicate:1; #ifdef CONFIG_PREEMPT_RT struct netdev_xmit net_xmit; #endif -- cgit v1.2.3 From f37ad91270397a6d053e8623bdb3cf79859691d2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 May 2025 12:28:46 +0200 Subject: netfilter: nf_dup_netdev: Move the recursion counter struct netdev_xmit nf_dup_skb_recursion is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Move nf_dup_skb_recursion to struct netdev_xmit, provide wrappers. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice_xmit.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h index 848735b3a7c0..813a19122ebb 100644 --- a/include/linux/netdevice_xmit.h +++ b/include/linux/netdevice_xmit.h @@ -11,6 +11,9 @@ struct netdev_xmit { #if IS_ENABLED(CONFIG_NET_ACT_MIRRED) u8 sched_mirred_nest; #endif +#if IS_ENABLED(CONFIG_NF_DUP_NETDEV) + u8 nf_dup_skb_recursion; +#endif }; #endif -- cgit v1.2.3 From 90869f43d06dfc836def2f53850a878f829e443e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 22 May 2025 15:49:33 +0200 Subject: netfilter: conntrack: make nf_conntrack_id callable without a module dependency While nf_conntrack_id() doesn't need any functionaliy from conntrack, it does reside in nf_conntrack_core.c -- callers add a module dependency on conntrack. Followup patch will need to compute the conntrack id from nf_tables_trace.c to include it in nf_trace messages emitted to userspace via netlink. I don't want to introduce a module dependency between nf_tables and conntrack for this. Since trace is slowpath, the added indirection is ok. One alternative is to move nf_conntrack_id to the netfilter/core.c, but I don't see a compelling reason so far. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 892d12823ed4..20947f2c685b 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -470,6 +470,7 @@ struct nf_ct_hook { void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); int (*confirm)(struct sk_buff *skb); + u32 (*get_id)(const struct nf_conntrack *nfct); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; -- cgit v1.2.3 From 7e5c6aa67e6f6133c5a2c53852e1dd9af2c0c3fc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 22 May 2025 15:49:34 +0200 Subject: netfilter: nf_tables: add packets conntrack state to debug trace info Add the minimal relevant info needed for userspace ("nftables monitor trace") to provide the conntrack view of the packet: - state (new, related, established) - direction (original, reply) - status (e.g., if connection is subject to dnat) - id (allows to query ctnetlink for remaining conntrack state info) Example: trace id a62 inet filter PRE_RAW packet: iif "enp0s3" ether [..] [..] trace id a62 inet filter PRE_MANGLE conntrack: ct direction original ct state new ct id 32 trace id a62 inet filter PRE_MANGLE packet: [..] [..] trace id a62 inet filter IN conntrack: ct direction original ct state new ct status dnat-done ct id 32 [..] In this case one can see that while NAT is active, the new connection isn't subject to a translation. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 7d6bc19a0153..2beb30be2c5f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1841,6 +1841,10 @@ enum nft_xfrm_keys { * @NFTA_TRACE_MARK: nfmark (NLA_U32) * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32) * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32) + * @NFTA_TRACE_CT_ID: conntrack id (NLA_U32) + * @NFTA_TRACE_CT_DIRECTION: packets direction (NLA_U8) + * @NFTA_TRACE_CT_STATUS: conntrack status (NLA_U32) + * @NFTA_TRACE_CT_STATE: packet state (new, established, ...) (NLA_U32) */ enum nft_trace_attributes { NFTA_TRACE_UNSPEC, @@ -1861,6 +1865,10 @@ enum nft_trace_attributes { NFTA_TRACE_NFPROTO, NFTA_TRACE_POLICY, NFTA_TRACE_PAD, + NFTA_TRACE_CT_ID, + NFTA_TRACE_CT_DIRECTION, + NFTA_TRACE_CT_STATUS, + NFTA_TRACE_CT_STATE, __NFTA_TRACE_MAX }; #define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1) -- cgit v1.2.3 From e225376d78fb2d85e99a2436a9e65765dc1ac234 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 21 May 2025 22:44:23 +0200 Subject: netfilter: nf_tables: Introduce nft_hook_find_ops{,_rcu}() Also a pretty dull wrapper around the hook->ops.dev comparison for now. Will search the embedded nf_hook_ops list in future. The ugly cast to eliminate the const qualifier will vanish then, too. Since this future list will be RCU-protected, also introduce an _rcu() variant here. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 803d5f1601f9..df0b151743a2 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1205,6 +1205,11 @@ struct nft_hook { u8 ifnamelen; }; +struct nf_hook_ops *nft_hook_find_ops(const struct nft_hook *hook, + const struct net_device *dev); +struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook, + const struct net_device *dev); + /** * struct nft_base_chain - nf_tables base chain * -- cgit v1.2.3 From 73319a8ee18b9cf0b2dac87f8521595e0381ba0c Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 21 May 2025 22:44:26 +0200 Subject: netfilter: nf_tables: Have a list of nf_hook_ops in nft_hook Supporting a 1:n relationship between nft_hook and nf_hook_ops is convenient since a chain's or flowtable's nft_hooks may remain in place despite matching interfaces disappearing. This stabilizes ruleset dumps in that regard and opens the possibility to claim newly added interfaces which match the spec. Also it prepares for wildcard interface specs since these will potentially match multiple interfaces. All spots dealing with hook registration are updated to handle a list of multiple nf_hook_ops, but nft_netdev_hook_alloc() only adds a single item for now to retain the old behaviour. The only expected functional change here is how vanishing interfaces are handled: Instead of dropping the respective nft_hook, only the matching nf_hook_ops are dropped. To safely remove individual ops from the list in netdev handlers, an rcu_head is added to struct nf_hook_ops so kfree_rcu() may be used. There is at least nft_flowtable_find_dev() which may be iterating through the list at the same time. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 3 +++ include/net/netfilter/nf_tables.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 20947f2c685b..5f896fcc074d 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -95,6 +95,9 @@ enum nf_hook_ops_type { }; struct nf_hook_ops { + struct list_head list; + struct rcu_head rcu; + /* User fills in from here down. */ nf_hookfn *hook; struct net_device *dev; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index df0b151743a2..5e49619ae49c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1199,7 +1199,7 @@ struct nft_stats { struct nft_hook { struct list_head list; - struct nf_hook_ops ops; + struct list_head ops_list; struct rcu_head rcu; char ifname[IFNAMSIZ]; u8 ifnamelen; -- cgit v1.2.3 From 465b9ee0ee7bc268d7f261356afd6c4262e48d82 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 21 May 2025 22:44:33 +0200 Subject: netfilter: nf_tables: Add notifications for hook changes Notify user space if netdev hooks are updated due to netdev add/remove events. Send minimal notification messages by introducing NFT_MSG_NEWDEV/DELDEV message types describing a single device only. Upon NETDEV_CHANGENAME, the callback has no information about the interface's old name. To provide a clear message to user space, include the hook's stored interface name in the notification. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++++ include/uapi/linux/netfilter/nf_tables.h | 10 ++++++++++ include/uapi/linux/netfilter/nfnetlink.h | 2 ++ 3 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5e49619ae49c..e4d8e451e935 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1142,6 +1142,11 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); +struct nft_hook; +void nf_tables_chain_device_notify(const struct nft_chain *chain, + const struct nft_hook *hook, + const struct net_device *dev, int event); + enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, NFT_CHAIN_T_ROUTE, diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2beb30be2c5f..518ba144544c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -142,6 +142,8 @@ enum nf_tables_msg_types { NFT_MSG_DESTROYOBJ, NFT_MSG_DESTROYFLOWTABLE, NFT_MSG_GETSETELEM_RESET, + NFT_MSG_NEWDEV, + NFT_MSG_DELDEV, NFT_MSG_MAX, }; @@ -1784,10 +1786,18 @@ enum nft_synproxy_attributes { * enum nft_device_attributes - nf_tables device netlink attributes * * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) + * @NFTA_DEVICE_TABLE: table containing the flowtable or chain hooking into the device (NLA_STRING) + * @NFTA_DEVICE_FLOWTABLE: flowtable hooking into the device (NLA_STRING) + * @NFTA_DEVICE_CHAIN: chain hooking into the device (NLA_STRING) + * @NFTA_DEVICE_SPEC: hook spec matching the device (NLA_STRING) */ enum nft_devices_attributes { NFTA_DEVICE_UNSPEC, NFTA_DEVICE_NAME, + NFTA_DEVICE_TABLE, + NFTA_DEVICE_FLOWTABLE, + NFTA_DEVICE_CHAIN, + NFTA_DEVICE_SPEC, __NFTA_DEVICE_MAX }; #define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 6cd58cd2a6f0..50d807af2649 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -25,6 +25,8 @@ enum nfnetlink_groups { #define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA NFNLGRP_NFTRACE, #define NFNLGRP_NFTRACE NFNLGRP_NFTRACE + NFNLGRP_NFT_DEV, +#define NFNLGRP_NFT_DEV NFNLGRP_NFT_DEV __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) -- cgit v1.2.3 From 101f2bbab541116ab861b9c3ac0ece07a7eaa756 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Wed, 7 May 2025 15:34:01 -0700 Subject: fs: convert mount flags to enum In prior kernel versions (5.8-6.8), commit 9f6c61f96f2d9 ("proc/mounts: add cursor") introduced MNT_CURSOR, a flag used by readers from /proc/mounts to keep their place while reading the file. Later, commit 2eea9ce4310d8 ("mounts: keep list of mounts in an rbtree") removed this flag and its value has since been repurposed. For debuggers iterating over the list of mounts, cursors should be skipped as they are irrelevant. Detecting whether an element is a cursor can be difficult. Since the MNT_CURSOR flag is a preprocessor constant, it's not present in debuginfo, and since its value is repurposed, we cannot hard-code it. For this specific issue, cursors are possible to detect in other ways, but ideally, we would be able to read the mount flag definitions out of the debuginfo. For that reason, convert the mount flags to an enum. Link: https://github.com/osandov/drgn/pull/496 Signed-off-by: Stephen Brennan Link: https://lore.kernel.org/20250507223402.2795029-1-stephen.s.brennan@oracle.com Signed-off-by: Christian Brauner --- include/linux/mount.h | 87 ++++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index dcc17ce8a959..6904ad33ee7a 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -22,48 +22,51 @@ struct fs_context; struct file; struct path; -#define MNT_NOSUID 0x01 -#define MNT_NODEV 0x02 -#define MNT_NOEXEC 0x04 -#define MNT_NOATIME 0x08 -#define MNT_NODIRATIME 0x10 -#define MNT_RELATIME 0x20 -#define MNT_READONLY 0x40 /* does the user want this to be r/o? */ -#define MNT_NOSYMFOLLOW 0x80 - -#define MNT_SHRINKABLE 0x100 -#define MNT_WRITE_HOLD 0x200 - -#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ -#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ -/* - * MNT_SHARED_MASK is the set of flags that should be cleared when a - * mount becomes shared. Currently, this is only the flag that says a - * mount cannot be bind mounted, since this is how we create a mount - * that shares events with another mount. If you add a new MNT_* - * flag, consider how it interacts with shared mounts. - */ -#define MNT_SHARED_MASK (MNT_UNBINDABLE) -#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ - | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ - | MNT_READONLY | MNT_NOSYMFOLLOW) -#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) - -#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) - -#define MNT_INTERNAL 0x4000 - -#define MNT_LOCK_ATIME 0x040000 -#define MNT_LOCK_NOEXEC 0x080000 -#define MNT_LOCK_NOSUID 0x100000 -#define MNT_LOCK_NODEV 0x200000 -#define MNT_LOCK_READONLY 0x400000 -#define MNT_LOCKED 0x800000 -#define MNT_DOOMED 0x1000000 -#define MNT_SYNC_UMOUNT 0x2000000 -#define MNT_MARKED 0x4000000 -#define MNT_UMOUNT 0x8000000 +enum mount_flags { + MNT_NOSUID = 0x01, + MNT_NODEV = 0x02, + MNT_NOEXEC = 0x04, + MNT_NOATIME = 0x08, + MNT_NODIRATIME = 0x10, + MNT_RELATIME = 0x20, + MNT_READONLY = 0x40, /* does the user want this to be r/o? */ + MNT_NOSYMFOLLOW = 0x80, + + MNT_SHRINKABLE = 0x100, + MNT_WRITE_HOLD = 0x200, + + MNT_SHARED = 0x1000, /* if the vfsmount is a shared mount */ + MNT_UNBINDABLE = 0x2000, /* if the vfsmount is a unbindable mount */ + + MNT_INTERNAL = 0x4000, + + MNT_LOCK_ATIME = 0x040000, + MNT_LOCK_NOEXEC = 0x080000, + MNT_LOCK_NOSUID = 0x100000, + MNT_LOCK_NODEV = 0x200000, + MNT_LOCK_READONLY = 0x400000, + MNT_LOCKED = 0x800000, + MNT_DOOMED = 0x1000000, + MNT_SYNC_UMOUNT = 0x2000000, + MNT_MARKED = 0x4000000, + MNT_UMOUNT = 0x8000000, + + /* + * MNT_SHARED_MASK is the set of flags that should be cleared when a + * mount becomes shared. Currently, this is only the flag that says a + * mount cannot be bind mounted, since this is how we create a mount + * that shares events with another mount. If you add a new MNT_* + * flag, consider how it interacts with shared mounts. + */ + MNT_SHARED_MASK = MNT_UNBINDABLE, + MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC + | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME + | MNT_READONLY | MNT_NOSYMFOLLOW, + MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, + + MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED, +}; struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ -- cgit v1.2.3 From 927244f6efff6df5f8f2ab58c7d1eec9f90cc3f2 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Thu, 22 May 2025 19:21:10 +0530 Subject: traceevent/block: Add REQ_ATOMIC flag to block trace events Filesystems like XFS can implement atomic write I/O using either REQ_ATOMIC flag set in the bio or via CoW operation. It will be useful if we have a flag in trace events to distinguish between the two. This patch adds char 'U' (Untorn writes) to rwbs field of the trace events if REQ_ATOMIC flag is set in the bio. ================= xfs_io-4238 [009] ..... 4148.126843: block_rq_issue: 259,0 WFSU 16384 () 768 + 32 none,0,0 [xfs_io] -0 [009] d.h1. 4148.129864: block_rq_complete: 259,0 WFSU () 768 + 32 none,0,0 [0] =============== xfs_io-4237 [010] ..... 4143.325616: block_rq_issue: 259,0 WS 16384 () 768 + 32 none,0,0 [xfs_io] -0 [010] d.H1. 4143.329138: block_rq_complete: 259,0 WS () 768 + 32 none,0,0 [0] Signed-off-by: Ritesh Harjani (IBM) Reviewed-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/44317cb2ec4588f6a2c1501a96684e6a1196e8ba.1747921498.git.ritesh.list@gmail.com Signed-off-by: Jens Axboe --- include/trace/events/block.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index ad36e73b8579..14a924c0e303 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -11,7 +11,7 @@ #include #include -#define RWBS_LEN 8 +#define RWBS_LEN 9 #define IOPRIO_CLASS_STRINGS \ { IOPRIO_CLASS_NONE, "none" }, \ -- cgit v1.2.3 From b465ae7b2524170cb14fa25dbcb84923bfb1a0a9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 23 May 2025 00:35:20 +0800 Subject: ublk: add feature UBLK_F_QUIESCE Add feature UBLK_F_QUIESCE, which adds control command `UBLK_U_CMD_QUIESCE_DEV` for quiescing device, then device state can become `UBLK_S_DEV_QUIESCED` or `UBLK_S_DEV_FAIL_IO` finally from ublk_ch_release() with ublk server cooperation. This feature can help to support to upgrade ublk server application by shutting down ublk server gracefully, meantime keep ublk block device persistent during the upgrading period. The feature is only available for UBLK_F_USER_RECOVERY. Suggested-by: Yoav Cohen Link: https://lore.kernel.org/linux-block/DM4PR12MB632807AB7CDCE77D1E5AB7D0A9B92@DM4PR12MB6328.namprd12.prod.outlook.com/ Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250522163523.406289-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 1c40632cb164..56c7e3fc666f 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -53,6 +53,8 @@ _IOR('u', 0x14, struct ublksrv_ctrl_cmd) #define UBLK_U_CMD_UPDATE_SIZE \ _IOWR('u', 0x15, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_QUIESCE_DEV \ + _IOWR('u', 0x16, struct ublksrv_ctrl_cmd) /* * 64bits are enough now, and it should be easy to extend in case of @@ -253,6 +255,23 @@ */ #define UBLK_F_AUTO_BUF_REG (1ULL << 11) +/* + * Control command `UBLK_U_CMD_QUIESCE_DEV` is added for quiescing device, + * which state can be transitioned to `UBLK_S_DEV_QUIESCED` or + * `UBLK_S_DEV_FAIL_IO` finally, and it needs ublk server cooperation for + * handling `UBLK_IO_RES_ABORT` correctly. + * + * Typical use case is for supporting to upgrade ublk server application, + * meantime keep ublk block device persistent during the period. + * + * This feature is only available when UBLK_F_USER_RECOVERY is enabled. + * + * Note, this command returns -EBUSY in case that all IO commands are being + * handled by ublk server and not completed in specified time period which + * is passed from the control command parameter. + */ +#define UBLK_F_QUIESCE (1ULL << 12) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From 09683a6184ad6fd635831402316651392b56cc3a Mon Sep 17 00:00:00 2001 From: Karolina Stolarek Date: Thu, 22 May 2025 18:21:21 -0500 Subject: PCI/AER: Rename struct aer_stats to aer_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update name to reflect the broader definition of structs/variables that are stored (e.g. ratelimits). This is a preparatory patch for adding rate limit support. [bhelgaas: "aer_report" -> "aer_info"] Signed-off-by: Karolina Stolarek Signed-off-by: Bjorn Helgaas Tested-by: Krzysztof Wilczyński Reviewed-by: Ilpo Järvinen Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250522232339.1525671-16-helgaas@kernel.org --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e96..81a81dbfc873 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -346,7 +346,7 @@ struct pci_dev { u8 hdr_type; /* PCI header type (`multi' flag masked out) */ #ifdef CONFIG_PCIEAER u16 aer_cap; /* AER capability offset */ - struct aer_stats *aer_stats; /* AER stats for this device */ + struct aer_info *aer_info; /* AER info for this device */ #endif #ifdef CONFIG_PCIEPORTBUS struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ -- cgit v1.2.3 From 13423063c7cb7d1c34104a78cae85eb0281bae90 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Mon, 28 Apr 2025 14:07:32 -0700 Subject: arm64: kvm, smccc: Introduce and use API for getting hypervisor UUID The KVM/arm64 uses SMCCC to detect hypervisor presence. That code is private, and it follows the SMCCC specification. Other existing and emerging hypervisor guest implementations can and should use that standard approach as well. Factor out a common infrastructure that the guests can use, update KVM to employ the new API. The central notion of the SMCCC method is the UUID of the hypervisor, and the new API follows that. No functional changes. Validated with a KVM/arm64 guest. Signed-off-by: Roman Kisel Acked-by: Arnd Bergmann Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20250428210742.435282-2-romank@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250428210742.435282-2-romank@linux.microsoft.com> --- include/linux/arm-smccc.h | 64 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index a3863da1510e..784ebe4607a4 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -7,6 +7,11 @@ #include #include + +#ifndef __ASSEMBLY__ +#include +#endif + #include /* @@ -107,10 +112,10 @@ ARM_SMCCC_FUNC_QUERY_CALL_UID) /* KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 */ -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM UUID_INIT(\ + 0xb66fb428, 0xc52e, 0xe911, \ + 0xa9, 0xca, 0x4b, 0x56, \ + 0x4d, 0x00, 0x3a, 0x74) /* KVM "vendor specific" services */ #define ARM_SMCCC_KVM_FUNC_FEATURES 0 @@ -348,6 +353,57 @@ s32 arm_smccc_get_soc_id_version(void); */ s32 arm_smccc_get_soc_id_revision(void); +#ifndef __ASSEMBLY__ + +/* + * Returns whether a specific hypervisor UUID is advertised for the + * Vendor Specific Hypervisor Service range. + */ +bool arm_smccc_hypervisor_has_uuid(const uuid_t *uuid); + +static inline uuid_t smccc_res_to_uuid(u32 r0, u32 r1, u32 r2, u32 r3) +{ + uuid_t uuid = { + .b = { + [0] = (r0 >> 0) & 0xff, + [1] = (r0 >> 8) & 0xff, + [2] = (r0 >> 16) & 0xff, + [3] = (r0 >> 24) & 0xff, + + [4] = (r1 >> 0) & 0xff, + [5] = (r1 >> 8) & 0xff, + [6] = (r1 >> 16) & 0xff, + [7] = (r1 >> 24) & 0xff, + + [8] = (r2 >> 0) & 0xff, + [9] = (r2 >> 8) & 0xff, + [10] = (r2 >> 16) & 0xff, + [11] = (r2 >> 24) & 0xff, + + [12] = (r3 >> 0) & 0xff, + [13] = (r3 >> 8) & 0xff, + [14] = (r3 >> 16) & 0xff, + [15] = (r3 >> 24) & 0xff, + }, + }; + + return uuid; +} + +static inline u32 smccc_uuid_to_reg(const uuid_t *uuid, int reg) +{ + u32 val = 0; + + val |= (u32)(uuid->b[4 * reg + 0] << 0); + val |= (u32)(uuid->b[4 * reg + 1] << 8); + val |= (u32)(uuid->b[4 * reg + 2] << 16); + val |= (u32)(uuid->b[4 * reg + 3] << 24); + + return val; +} + +#endif /* !__ASSEMBLY__ */ + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 -- cgit v1.2.3 From e7e6902fbd19b25630cf6a258c44cb385f16b1c8 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Mon, 28 Apr 2025 14:07:35 -0700 Subject: Drivers: hv: Provide arch-neutral implementation of get_vtl() To run in the VTL mode, Hyper-V drivers have to know what VTL the system boots in, and the arm64/hyperv code does not have the means to compute that. Refactor the code to hoist the function that detects VTL, make it arch-neutral to be able to employ it to get the VTL on arm64. Signed-off-by: Roman Kisel Reviewed-by: Michael Kelley Reviewed-by: Tianyu Lan Link: https://lore.kernel.org/r/20250428210742.435282-5-romank@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250428210742.435282-5-romank@linux.microsoft.com> --- include/asm-generic/mshyperv.h | 6 ++++++ include/hyperv/hvgdk_mini.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index ccccb1cbf7df..6c51a25ed7b5 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -378,4 +378,10 @@ static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u3 } #endif /* CONFIG_MSHV_ROOT */ +#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) +u8 __init get_vtl(void); +#else +static inline u8 get_vtl(void) { return 0; } +#endif + #endif diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index abf0bd76e370..cf0923dc727d 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -1228,7 +1228,7 @@ struct hv_send_ipi { /* HV_INPUT_SEND_SYNTHETIC_CLUSTER_IPI */ u64 cpu_mask; } __packed; -#define HV_X64_VTL_MASK GENMASK(3, 0) +#define HV_VTL_MASK GENMASK(3, 0) /* Hyper-V memory host visibility */ enum hv_mem_host_visibility { -- cgit v1.2.3 From 18a34bb5221e2b79dbcba5bb50d92beb45b68e15 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Mon, 28 Apr 2025 14:07:40 -0700 Subject: Drivers: hv: vmbus: Introduce hv_get_vmbus_root_device() The ARM64 PCI code for hyperv needs to know the VMBus root device, and it is private. Provide a function that returns it. Rename it from "hv_dev" as "hv_dev" as a symbol is very overloaded. No functional changes. Signed-off-by: Roman Kisel Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20250428210742.435282-10-romank@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250428210742.435282-10-romank@linux.microsoft.com> --- include/linux/hyperv.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index d6ffe01962c2..23b3c3a2ed8c 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1283,6 +1283,8 @@ static inline void *hv_get_drvdata(struct hv_device *dev) return dev_get_drvdata(&dev->device); } +struct device *hv_get_vmbus_root_device(void); + struct hv_ring_buffer_debug_info { u32 current_interrupt_mask; u32 current_read_index; -- cgit v1.2.3 From ab7e531a8212394608838c31e9dfac69fafa6c8a Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Mon, 28 Apr 2025 14:07:41 -0700 Subject: ACPI: irq: Introduce acpi_get_gsi_dispatcher() Using acpi_irq_create_hierarchy() in the cases where the code also handles OF leads to code duplication as the ACPI subsystem doesn't provide means to compute the IRQ domain parent whereas the OF does. Introduce acpi_get_gsi_dispatcher() so that the drivers relying on both ACPI and OF may use irq_domain_create_hierarchy() in the common code paths. No functional changes. Signed-off-by: Roman Kisel Reviewed-by: Michael Kelley Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20250428210742.435282-11-romank@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250428210742.435282-11-romank@linux.microsoft.com> --- include/linux/acpi.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3f2e93ed9730..14e8871b5787 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -335,8 +335,11 @@ int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); +typedef struct fwnode_handle *(*acpi_gsi_domain_disp_fn)(u32); + void acpi_set_irq_model(enum acpi_irq_model_id model, - struct fwnode_handle *(*)(u32)); + acpi_gsi_domain_disp_fn fn); +acpi_gsi_domain_disp_fn acpi_get_gsi_dispatcher(void); void acpi_set_gsi_to_irq_fallback(u32 (*)(u32)); struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags, -- cgit v1.2.3 From 86c48271e0d60c82665e9fd61277002391efcef7 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Wed, 7 May 2025 11:22:25 -0700 Subject: x86/hyperv: Fix APIC ID and VP index confusion in hv_snp_boot_ap() To start an application processor in SNP-isolated guest, a hypercall is used that takes a virtual processor index. The hv_snp_boot_ap() function uses that START_VP hypercall but passes as VP index to it what it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. As those two aren't generally interchangeable, that may lead to hung APs if the VP index and the APIC ID don't match up. Update the parameter names to avoid confusion as to what the parameter is. Use the APIC ID to the VP index conversion to provide the correct input to the hypercall. Cc: stable@vger.kernel.org Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP guest") Signed-off-by: Roman Kisel Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20250507182227.7421-2-romank@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250507182227.7421-2-romank@linux.microsoft.com> --- include/hyperv/hvgdk_mini.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index cf0923dc727d..2d431b53f587 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -475,7 +475,7 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_CREATE_PORT 0x0095 #define HVCALL_CONNECT_PORT 0x0096 #define HVCALL_START_VP 0x0099 -#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a +#define HVCALL_GET_VP_INDEX_FROM_APIC_ID 0x009a #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 #define HVCALL_SIGNAL_EVENT_DIRECT 0x00c0 -- cgit v1.2.3 From cd1769e1fef9ab8fbdfafb67b2c327418867afb6 Mon Sep 17 00:00:00 2001 From: Long Li Date: Mon, 5 May 2025 17:56:37 -0700 Subject: Drivers: hv: Remove hv_alloc/free_* helpers There are no users for those functions, remove them. Signed-off-by: Long Li Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1746492997-4599-6-git-send-email-longli@linuxonhyperv.com Signed-off-by: Wei Liu Message-ID: <1746492997-4599-6-git-send-email-longli@linuxonhyperv.com> --- include/asm-generic/mshyperv.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 6c51a25ed7b5..a729b77983fa 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -236,10 +236,6 @@ int hv_common_cpu_init(unsigned int cpu); int hv_common_cpu_die(unsigned int cpu); void hv_identify_partition_type(void); -void *hv_alloc_hyperv_page(void); -void *hv_alloc_hyperv_zeroed_page(void); -void hv_free_hyperv_page(void *addr); - /** * hv_cpu_number_to_vp_number() - Map CPU to VP. * @cpu_number: CPU number in Linux terms -- cgit v1.2.3 From a539e2a6d51d1c12d89eec149ccc72ec561639bc Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Tue, 20 May 2025 14:01:17 +0100 Subject: btf: Allow mmap of vmlinux btf User space needs access to kernel BTF for many modern features of BPF. Right now each process needs to read the BTF blob either in pieces or as a whole. Allow mmaping the sysfs file so that processes can directly access the memory allocated for it in the kernel. remap_pfn_range is used instead of vm_insert_page due to aarch64 compatibility issues. Signed-off-by: Lorenz Bauer Signed-off-by: Andrii Nakryiko Tested-by: Alan Maguire Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/bpf/20250520-vmlinux-mmap-v5-1-e8c941acc414@isovalent.com --- include/asm-generic/vmlinux.lds.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 58a635a6d5bd..1750390735fa 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) */ #ifdef CONFIG_DEBUG_INFO_BTF #define BTF \ + . = ALIGN(PAGE_SIZE); \ .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \ BOUNDED_SECTION_BY(.BTF, _BTF) \ } \ - . = ALIGN(4); \ + . = ALIGN(PAGE_SIZE); \ .BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \ *(.BTF_ids) \ } -- cgit v1.2.3 From 059717c2ba1f745f35f4bbb8991e408c12031f1e Mon Sep 17 00:00:00 2001 From: Anil S Keshavamurthy Date: Fri, 23 May 2025 13:20:01 -0400 Subject: ACPI: MRRM: Fix default max memory region Per the spec, the default max memory region must be 1 covering all system memory. When platform does not provide ACPI MRRM table or when CONFIG_ACPI is opted out, the acpi_mrrm_max_mem_region() function defaults to returning 1 region complying to RDT spec. Signed-off-by: Anil S Keshavamurthy Reviewed-by: Tony Luck Link: https://patch.msgid.link/20250523172001.1761634-1-anil.s.keshavamurthy@intel.com Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c409f4cecb09..d9ba7b4fa2cd 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1098,7 +1098,7 @@ static inline acpi_handle acpi_get_processor_handle(int cpu) static inline int acpi_mrrm_max_mem_region(void) { - return -ENOENT; + return 1; } #endif /* !CONFIG_ACPI */ -- cgit v1.2.3 From 588ca944c27729c7f950d1f44c6d6700a919969a Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Wed, 21 May 2025 13:47:45 +0100 Subject: cxl/edac: Add CXL memory device memory sparing control feature Memory sparing is defined as a repair function that replaces a portion of memory with a portion of functional memory at that same DPA. The subclasses for this operation vary in terms of the scope of the sparing being performed. The cacheline sparing subclass refers to a sparing action that can replace a full cacheline. Row sparing is provided as an alternative to PPR sparing functions and its scope is that of a single DDR row. As per CXL r3.2 Table 8-125 foot note 1. Memory sparing is preferred over PPR when possible. Bank sparing allows an entire bank to be replaced. Rank sparing is defined as an operation in which an entire DDR rank is replaced. Memory sparing maintenance operations may be supported by CXL devices that implement CXL.mem protocol. A sparing maintenance operation requests the CXL device to perform a repair operation on its media. For example, a CXL device with DRAM components that support memory sparing features may implement sparing maintenance operations. The host may issue a query command by setting query resources flag in the input payload (CXL spec 3.2 Table 8-120) to determine availability of sparing resources for a given address. In response to a query request, the device shall report the resource availability by producing the memory sparing event record (CXL spec 3.2 Table 8-60) in which the Channel, Rank, Nibble Mask, Bank Group, Bank, Row, Column, Sub-Channel fields are a copy of the values specified in the request. During the execution of a sparing maintenance operation, a CXL memory device: - may not retain data - may not be able to process CXL.mem requests correctly. These CXL memory device capabilities are specified by restriction flags in the memory sparing feature readable attributes. When a CXL device identifies error on a memory component, the device may inform the host about the need for a memory sparing maintenance operation by using DRAM event record, where the 'maintenance needed' flag may set. The event record contains some of the DPA, Channel, Rank, Nibble Mask, Bank Group, Bank, Row, Column, Sub-Channel fields that should be repaired. The userspace tool requests for maintenance operation if the 'maintenance needed' flag set in the CXL DRAM error record. CXL spec 3.2 section 8.2.10.7.1.4 describes the device's memory sparing maintenance operation feature. CXL spec 3.2 section 8.2.10.7.2.3 describes the memory sparing feature discovery and configuration. Add support for controlling CXL memory device memory sparing feature. Register with EDAC driver, which gets the memory repair attr descriptors from the EDAC memory repair driver and exposes sysfs repair control attributes for memory sparing to the userspace. For example CXL memory sparing control for the CXL mem0 device is exposed in /sys/bus/edac/devices/cxl_mem0/mem_repairX/ Use case ======== 1. CXL device identifies a failure in a memory component, report to userspace in a CXL DRAM trace event with DPA and other attributes of memory to repair such as channel, rank, nibble mask, bank Group, bank, row, column, sub-channel. 2. Rasdaemon process the trace event and may issue query request in sysfs check resources available for memory sparing if either of the following conditions met. - 'maintenance needed' flag set in the event record. - 'threshold event' flag set for CVME threshold feature. - When the number of corrected error reported on a CXL.mem media to the userspace exceeds the threshold value for corrected error count defined by the userspace policy. 3. Rasdaemon process the memory sparing trace event and issue repair request for memory sparing. Kernel CXL driver shall report memory sparing event record to the userspace with the resource availability in order rasdaemon to process the event record and issue a repair request in sysfs for the memory sparing operation in the CXL device. Note: Based on the feedbacks from the community 'query' sysfs attribute is removed and reporting memory sparing error record to the userspace are not supported. Instead userspace issues sparing operation and kernel does the same to the CXL memory device, when 'maintenance needed' flag set in the DRAM event record. Add checks to ensure the memory to be repaired is offline and if online, then originates from a CXL DRAM error record reported in the current boot before requesting a memory sparing operation on the device. Note: Tested memory sparing feature control with QEMU patch "hw/cxl: Add emulation for memory sparing control feature" https://lore.kernel.org/linux-cxl/20250509172229.726-1-shiju.jose@huawei.com/T/#m5f38512a95670d75739f9dad3ee91b95c7f5c8d6 Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Shiju Jose Reviewed-by: Alison Schofield Acked-by: Dan Williams Link: https://patch.msgid.link/20250521124749.817-8-shiju.jose@huawei.com Signed-off-by: Dave Jiang --- include/linux/edac.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/edac.h b/include/linux/edac.h index 451f9c152c99..fa32f2aca22f 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -745,9 +745,16 @@ static inline int edac_ecs_get_desc(struct device *ecs_dev, #endif /* CONFIG_EDAC_ECS */ enum edac_mem_repair_type { + EDAC_REPAIR_PPR, + EDAC_REPAIR_CACHELINE_SPARING, + EDAC_REPAIR_ROW_SPARING, + EDAC_REPAIR_BANK_SPARING, + EDAC_REPAIR_RANK_SPARING, EDAC_REPAIR_MAX }; +extern const char * const edac_repair_type[]; + enum edac_mem_repair_cmd { EDAC_DO_MEM_REPAIR = 1, }; -- cgit v1.2.3 From e7d952cc39fca34386ec9f15f68cb2eaac01b5ae Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 24 May 2025 11:23:25 +0200 Subject: perf/headers: Clean up a bit Do a bit of readability spring cleaning: - Fix misaligned structure member in perf_addr_filter: the new struct perf_addr_filter::action member was too long, but when it was added it was not aligned properly. Align all fields to the customary column 41 alignment of most of the rest of the header. - Adjust the vertical alignment of the definition of other structures and definitions as well, so that the 'most of' in the previous paragraph changes to 'all of'. ;-) - Prettify the assignments in perf_clear_branch_entry_bitfields() - Move comments from CPP definitions to outside the macro - Move perf_guest_info_callbacks and related defines from the front of the header closer to where it's used within the header. - And more #endif markers for larger CPP blocks and standardize #if/#else/#endif blocks to the following nomenclature: #ifdef CONFIG_FOO ... #else /* !CONFIG_FOO: */ ... #endif /* !CONFIG_FOO */ - Standardize on consistently using the 'extern' storage class where appropriate, we had cases where method prototypes sometimes omitted the storage class: extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); int perf_event_read_local(struct perf_event *event, u64 *value, u64 *enabled, u64 *running); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); Which is obviously a bit confusing and adds unnecessary noise. - s/__u64/u64 and similar cleanups: there's no point in using __u64 in non-UAPI headers, and doing so only adds unnecessary visual noise. - Harmonize all multi-parameter function prototypes along the following style: extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, struct task_struct *task, perf_overflow_handler_t callback, void *context); - etc. Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Ian Rogers Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 282 +++++++++++++++++++++++++-------------------- 1 file changed, 155 insertions(+), 127 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a96c00e2ceca..52dc7cfab0e0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -26,18 +26,9 @@ # include #endif -#define PERF_GUEST_ACTIVE 0x01 -#define PERF_GUEST_USER 0x02 - -struct perf_guest_info_callbacks { - unsigned int (*state)(void); - unsigned long (*get_ip)(void); - unsigned int (*handle_intel_pt_intr)(void); -}; - #ifdef CONFIG_HAVE_HW_BREAKPOINT -#include -#include +# include +# include #endif #include @@ -62,19 +53,20 @@ struct perf_guest_info_callbacks { #include #include #include + #include struct perf_callchain_entry { - __u64 nr; - __u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */ + u64 nr; + u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */ }; struct perf_callchain_entry_ctx { - struct perf_callchain_entry *entry; - u32 max_stack; - u32 nr; - short contexts; - bool contexts_maxed; + struct perf_callchain_entry *entry; + u32 max_stack; + u32 nr; + short contexts; + bool contexts_maxed; }; typedef unsigned long (*perf_copy_f)(void *dst, const void *src, @@ -121,8 +113,8 @@ static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag) * already stored in age order, the hw_idx should be 0. */ struct perf_branch_stack { - __u64 nr; - __u64 hw_idx; + u64 nr; + u64 hw_idx; struct perf_branch_entry entries[]; }; @@ -132,10 +124,10 @@ struct task_struct; * extra PMU register associated with an event */ struct hw_perf_event_extra { - u64 config; /* register value */ - unsigned int reg; /* register address or index */ - int alloc; /* extra register already allocated */ - int idx; /* index in shared_regs->regs[] */ + u64 config; /* register value */ + unsigned int reg; /* register address or index */ + int alloc; /* extra register already allocated */ + int idx; /* index in shared_regs->regs[] */ }; /** @@ -144,8 +136,8 @@ struct hw_perf_event_extra { * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific * usage. */ -#define PERF_EVENT_FLAG_ARCH 0x0fffffff -#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 +#define PERF_EVENT_FLAG_ARCH 0x0fffffff +#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0); @@ -227,9 +219,14 @@ struct hw_perf_event { /* * hw_perf_event::state flags; used to track the PERF_EF_* state. */ -#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ -#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ -#define PERF_HES_ARCH 0x04 + +/* the counter is stopped */ +#define PERF_HES_STOPPED 0x01 + +/* event->count up-to-date */ +#define PERF_HES_UPTODATE 0x02 + +#define PERF_HES_ARCH 0x04 int state; @@ -278,7 +275,7 @@ struct hw_perf_event { */ u64 freq_time_stamp; u64 freq_count_stamp; -#endif +#endif /* CONFIG_PERF_EVENTS */ }; struct perf_event; @@ -287,29 +284,33 @@ struct perf_event_pmu_context; /* * Common implementation detail of pmu::{start,commit,cancel}_txn */ -#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ -#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ + +/* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_ADD 0x1 + +/* txn to read event group from PMU */ +#define PERF_PMU_TXN_READ 0x2 /** * pmu::capabilities flags */ -#define PERF_PMU_CAP_NO_INTERRUPT 0x0001 -#define PERF_PMU_CAP_NO_NMI 0x0002 -#define PERF_PMU_CAP_AUX_NO_SG 0x0004 -#define PERF_PMU_CAP_EXTENDED_REGS 0x0008 -#define PERF_PMU_CAP_EXCLUSIVE 0x0010 -#define PERF_PMU_CAP_ITRACE 0x0020 -#define PERF_PMU_CAP_NO_EXCLUDE 0x0040 -#define PERF_PMU_CAP_AUX_OUTPUT 0x0080 -#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 -#define PERF_PMU_CAP_AUX_PAUSE 0x0200 -#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400 +#define PERF_PMU_CAP_NO_INTERRUPT 0x0001 +#define PERF_PMU_CAP_NO_NMI 0x0002 +#define PERF_PMU_CAP_AUX_NO_SG 0x0004 +#define PERF_PMU_CAP_EXTENDED_REGS 0x0008 +#define PERF_PMU_CAP_EXCLUSIVE 0x0010 +#define PERF_PMU_CAP_ITRACE 0x0020 +#define PERF_PMU_CAP_NO_EXCLUDE 0x0040 +#define PERF_PMU_CAP_AUX_OUTPUT 0x0080 +#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 +#define PERF_PMU_CAP_AUX_PAUSE 0x0200 +#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400 /** * pmu::scope */ enum perf_pmu_scope { - PERF_PMU_SCOPE_NONE = 0, + PERF_PMU_SCOPE_NONE = 0, PERF_PMU_SCOPE_CORE, PERF_PMU_SCOPE_DIE, PERF_PMU_SCOPE_CLUSTER, @@ -393,11 +394,21 @@ struct pmu { * Flags for ->add()/->del()/ ->start()/->stop(). There are * matching hw_perf_event::state flags. */ -#define PERF_EF_START 0x01 /* start the counter when adding */ -#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ -#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ -#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */ -#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */ + +/* start the counter when adding */ +#define PERF_EF_START 0x01 + +/* reload the counter when starting */ +#define PERF_EF_RELOAD 0x02 + +/* update the counter when stopping */ +#define PERF_EF_UPDATE 0x04 + +/* AUX area event, pause tracing */ +#define PERF_EF_PAUSE 0x08 + +/* AUX area event, resume tracing */ +#define PERF_EF_RESUME 0x10 /* * Adds/Removes a counter to/from the PMU, can be done inside a @@ -596,10 +607,10 @@ enum perf_addr_filter_action_t { * This is a hardware-agnostic filter configuration as specified by the user. */ struct perf_addr_filter { - struct list_head entry; - struct path path; - unsigned long offset; - unsigned long size; + struct list_head entry; + struct path path; + unsigned long offset; + unsigned long size; enum perf_addr_filter_action_t action; }; @@ -614,14 +625,14 @@ struct perf_addr_filter { * bundled together; see perf_event_addr_filters(). */ struct perf_addr_filters_head { - struct list_head list; - raw_spinlock_t lock; - unsigned int nr_file_filters; + struct list_head list; + raw_spinlock_t lock; + unsigned int nr_file_filters; }; struct perf_addr_filter_range { - unsigned long start; - unsigned long size; + unsigned long start; + unsigned long size; }; /** @@ -669,24 +680,24 @@ struct swevent_hlist { struct rcu_head rcu_head; }; -#define PERF_ATTACH_CONTEXT 0x0001 -#define PERF_ATTACH_GROUP 0x0002 -#define PERF_ATTACH_TASK 0x0004 -#define PERF_ATTACH_TASK_DATA 0x0008 -#define PERF_ATTACH_GLOBAL_DATA 0x0010 -#define PERF_ATTACH_SCHED_CB 0x0020 -#define PERF_ATTACH_CHILD 0x0040 -#define PERF_ATTACH_EXCLUSIVE 0x0080 -#define PERF_ATTACH_CALLCHAIN 0x0100 -#define PERF_ATTACH_ITRACE 0x0200 +#define PERF_ATTACH_CONTEXT 0x0001 +#define PERF_ATTACH_GROUP 0x0002 +#define PERF_ATTACH_TASK 0x0004 +#define PERF_ATTACH_TASK_DATA 0x0008 +#define PERF_ATTACH_GLOBAL_DATA 0x0010 +#define PERF_ATTACH_SCHED_CB 0x0020 +#define PERF_ATTACH_CHILD 0x0040 +#define PERF_ATTACH_EXCLUSIVE 0x0080 +#define PERF_ATTACH_CALLCHAIN 0x0100 +#define PERF_ATTACH_ITRACE 0x0200 struct bpf_prog; struct perf_cgroup; struct perf_buffer; struct pmu_event_list { - raw_spinlock_t lock; - struct list_head list; + raw_spinlock_t lock; + struct list_head list; }; /* @@ -696,12 +707,12 @@ struct pmu_event_list { * disabled is sufficient since it will hold-off the IPIs. */ #ifdef CONFIG_PROVE_LOCKING -#define lockdep_assert_event_ctx(event) \ +# define lockdep_assert_event_ctx(event) \ WARN_ON_ONCE(__lockdep_enabled && \ (this_cpu_read(hardirqs_enabled) && \ lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD)) #else -#define lockdep_assert_event_ctx(event) +# define lockdep_assert_event_ctx(event) #endif #define for_each_sibling_event(sibling, event) \ @@ -859,9 +870,9 @@ struct perf_event { #ifdef CONFIG_EVENT_TRACING struct trace_event_call *tp_event; struct event_filter *filter; -#ifdef CONFIG_FUNCTION_TRACER +# ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops ftrace_ops; -#endif +# endif #endif #ifdef CONFIG_CGROUP_PERF @@ -880,7 +891,7 @@ struct perf_event { * of it. event->orig_type contains original 'type' requested by * user. */ - __u32 orig_type; + u32 orig_type; #endif /* CONFIG_PERF_EVENTS */ }; @@ -945,8 +956,8 @@ static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc) } struct perf_event_groups { - struct rb_root tree; - u64 index; + struct rb_root tree; + u64 index; }; @@ -1189,16 +1200,18 @@ extern void perf_pmu_resched(struct pmu *pmu); extern int perf_event_refresh(struct perf_event *event, int refresh); extern void perf_event_update_userpage(struct perf_event *event); extern int perf_event_release_kernel(struct perf_event *event); + extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, - int cpu, - struct task_struct *task, - perf_overflow_handler_t callback, - void *context); + int cpu, + struct task_struct *task, + perf_overflow_handler_t callback, + void *context); + extern void perf_pmu_migrate_context(struct pmu *pmu, - int src_cpu, int dst_cpu); -int perf_event_read_local(struct perf_event *event, u64 *value, - u64 *enabled, u64 *running); + int src_cpu, int dst_cpu); +extern int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -1415,14 +1428,14 @@ static inline u32 perf_sample_data_size(struct perf_sample_data *data, */ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br) { - br->mispred = 0; - br->predicted = 0; - br->in_tx = 0; - br->abort = 0; - br->cycles = 0; - br->type = 0; - br->spec = PERF_BR_SPEC_NA; - br->reserved = 0; + br->mispred = 0; + br->predicted = 0; + br->in_tx = 0; + br->abort = 0; + br->cycles = 0; + br->type = 0; + br->spec = PERF_BR_SPEC_NA; + br->reserved = 0; } extern void perf_output_sample(struct perf_output_handle *handle, @@ -1611,7 +1624,17 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, enum perf_bpf_event_type type, u16 flags); +#define PERF_GUEST_ACTIVE 0x01 +#define PERF_GUEST_USER 0x02 + +struct perf_guest_info_callbacks { + unsigned int (*state)(void); + unsigned long (*get_ip)(void); + unsigned int (*handle_intel_pt_intr)(void); +}; + #ifdef CONFIG_GUEST_PERF_EVENTS + extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state); @@ -1622,21 +1645,27 @@ static inline unsigned int perf_guest_state(void) { return static_call(__perf_guest_state)(); } + static inline unsigned long perf_guest_get_ip(void) { return static_call(__perf_guest_get_ip)(); } + static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return static_call(__perf_guest_handle_intel_pt_intr)(); } + extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); -#else + +#else /* !CONFIG_GUEST_PERF_EVENTS: */ + static inline unsigned int perf_guest_state(void) { return 0; } static inline unsigned long perf_guest_get_ip(void) { return 0; } static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; } -#endif /* CONFIG_GUEST_PERF_EVENTS */ + +#endif /* !CONFIG_GUEST_PERF_EVENTS */ extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); @@ -1666,6 +1695,7 @@ static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx * { if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) { struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; ++ctx->contexts; return 0; @@ -1679,6 +1709,7 @@ static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 { if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) { struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; ++ctx->nr; return 0; @@ -1705,7 +1736,7 @@ static inline int perf_is_paranoid(void) return sysctl_perf_event_paranoid > -1; } -int perf_allow_kernel(void); +extern int perf_allow_kernel(void); static inline int perf_allow_cpu(void) { @@ -1827,7 +1858,7 @@ extern int perf_output_begin_backward(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern unsigned int perf_output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len); + const void *buf, unsigned int len); extern unsigned int perf_output_skip(struct perf_output_handle *handle, unsigned int len); extern long perf_output_copy_aux(struct perf_output_handle *aux_handle, @@ -1844,7 +1875,9 @@ extern void perf_event_task_tick(void); extern int perf_event_account_interrupt(struct perf_event *event); extern int perf_event_period(struct perf_event *event, u64 value); extern u64 perf_event_pause(struct perf_event *event, bool reset); + #else /* !CONFIG_PERF_EVENTS: */ + static inline void * perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event) { return NULL; } @@ -1922,19 +1955,14 @@ static inline void perf_event_disable(struct perf_event *event) { } static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } -static inline int perf_event_period(struct perf_event *event, u64 value) -{ - return -EINVAL; -} -static inline u64 perf_event_pause(struct perf_event *event, bool reset) -{ - return 0; -} -static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) -{ - return 0; -} -#endif +static inline int +perf_event_period(struct perf_event *event, u64 value) { return -EINVAL; } +static inline u64 +perf_event_pause(struct perf_event *event, bool reset) { return 0; } +static inline int +perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { return 0; } + +#endif /* !CONFIG_PERF_EVENTS */ #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) extern void perf_restore_debug_store(void); @@ -1942,31 +1970,31 @@ extern void perf_restore_debug_store(void); static inline void perf_restore_debug_store(void) { } #endif -#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) +#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) struct perf_pmu_events_attr { - struct device_attribute attr; - u64 id; - const char *event_str; + struct device_attribute attr; + u64 id; + const char *event_str; }; struct perf_pmu_events_ht_attr { - struct device_attribute attr; - u64 id; - const char *event_str_ht; - const char *event_str_noht; + struct device_attribute attr; + u64 id; + const char *event_str_ht; + const char *event_str_noht; }; struct perf_pmu_events_hybrid_attr { - struct device_attribute attr; - u64 id; - const char *event_str; - u64 pmu_type; + struct device_attribute attr; + u64 id; + const char *event_str; + u64 pmu_type; }; struct perf_pmu_format_hybrid_attr { - struct device_attribute attr; - u64 pmu_type; + struct device_attribute attr; + u64 pmu_type; }; ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, @@ -2008,11 +2036,11 @@ static struct device_attribute format_attr_##_name = __ATTR_RO(_name) /* Performance counter hotplug functions */ #ifdef CONFIG_PERF_EVENTS -int perf_event_init_cpu(unsigned int cpu); -int perf_event_exit_cpu(unsigned int cpu); +extern int perf_event_init_cpu(unsigned int cpu); +extern int perf_event_exit_cpu(unsigned int cpu); #else -#define perf_event_init_cpu NULL -#define perf_event_exit_cpu NULL +# define perf_event_init_cpu NULL +# define perf_event_exit_cpu NULL #endif extern void arch_perf_update_userpage(struct perf_event *event, -- cgit v1.2.3 From 12ca42c237756182aad8ab04654c952765cb9061 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 16 May 2025 17:07:39 -0700 Subject: alloc_tag: allocate percpu counters for module tags dynamically When a module gets unloaded it checks whether any of its tags are still in use and if so, we keep the memory containing module's allocation tags alive until all tags are unused. However percpu counters referenced by the tags are freed by free_module(). This will lead to UAF if the memory allocated by a module is accessed after module was unloaded. To fix this we allocate percpu counters for module allocation tags dynamically and we keep it alive for tags which are still in use after module unloading. This also removes the requirement of a larger PERCPU_MODULE_RESERVE when memory allocation profiling is enabled because percpu memory for counters does not need to be reserved anymore. Link: https://lkml.kernel.org/r/20250517000739.5930-1-surenb@google.com Fixes: 0db6f8d7820a ("alloc_tag: load module tags into separate contiguous memory") Signed-off-by: Suren Baghdasaryan Reported-by: David Wang <00107082@163.com> Closes: https://lore.kernel.org/all/20250516131246.6244-1-00107082@163.com/ Tested-by: David Wang <00107082@163.com> Cc: Christoph Lameter (Ampere) Cc: Dennis Zhou Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton --- include/linux/alloc_tag.h | 12 ++++++++++++ include/linux/codetag.h | 8 ++++---- include/linux/percpu.h | 4 ---- 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index a946e0203e6d..8f7931eb7d16 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -104,6 +104,16 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #else /* ARCH_NEEDS_WEAK_PER_CPU */ +#ifdef MODULE + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section(ALLOC_TAG_SECTION_NAME) = { \ + .ct = CODE_TAG_INIT, \ + .counters = NULL }; + +#else /* MODULE */ + #define DEFINE_ALLOC_TAG(_alloc_tag) \ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ static struct alloc_tag _alloc_tag __used __aligned(8) \ @@ -111,6 +121,8 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); .ct = CODE_TAG_INIT, \ .counters = &_alloc_tag_cntr }; +#endif /* MODULE */ + #endif /* ARCH_NEEDS_WEAK_PER_CPU */ DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, diff --git a/include/linux/codetag.h b/include/linux/codetag.h index d14dbd26b370..0ee4c21c6dbc 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -36,10 +36,10 @@ union codetag_ref { struct codetag_type_desc { const char *section; size_t tag_size; - void (*module_load)(struct codetag_type *cttype, - struct codetag_module *cmod); - void (*module_unload)(struct codetag_type *cttype, - struct codetag_module *cmod); + void (*module_load)(struct module *mod, + struct codetag *start, struct codetag *end); + void (*module_unload)(struct module *mod, + struct codetag *start, struct codetag *end); #ifdef CONFIG_MODULES void (*module_replaced)(struct module *mod, struct module *new_mod); bool (*needs_section_mem)(struct module *mod, unsigned long size); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 52b5ea663b9f..85bf8dd9f087 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -15,11 +15,7 @@ /* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#ifdef CONFIG_MEM_ALLOC_PROFILING -#define PERCPU_MODULE_RESERVE (8 << 13) -#else #define PERCPU_MODULE_RESERVE (8 << 10) -#endif #else #define PERCPU_MODULE_RESERVE 0 #endif -- cgit v1.2.3 From ee40c9920ac286c5bfe7c811e66ff899266d2582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= Date: Fri, 23 May 2025 14:19:10 +0200 Subject: mm: fix copy_vma() error handling for hugetlb mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If, during a mremap() operation for a hugetlb-backed memory mapping, copy_vma() fails after the source vma has been duplicated and opened (ie. vma_link() fails), the error is handled by closing the new vma. This updates the hugetlbfs reservation counter of the reservation map which at this point is referenced by both the source vma and the new copy. As a result, once the new vma has been freed and copy_vma() returns, the reservation counter for the source vma will be incorrect. This patch addresses this corner case by clearing the hugetlb private page reservation reference for the new vma and decrementing the reference before closing the vma, so that vma_close() won't update the reservation counter. This is also what copy_vma_and_data() does with the source vma if copy_vma() succeeds, so a helper function has been added to do the fixup in both functions. The issue was reported by a private syzbot instance and can be reproduced using the C reproducer in [1]. It's also a possible duplicate of public syzbot report [2]. The WARNING report is: ============================================================ page_counter underflow: -1024 nr_pages=1024 WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120 Modules linked in: CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014 RIP: 0010:page_counter_cancel+0xf6/0x120 Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81 RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246 RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000 RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000 R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400 FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0 Call Trace: page_counter_uncharge+0x33/0x80 hugetlb_cgroup_uncharge_counter+0xcb/0x120 hugetlb_vm_op_close+0x579/0x960 ? __pfx_hugetlb_vm_op_close+0x10/0x10 remove_vma+0x88/0x130 exit_mmap+0x71e/0xe00 ? __pfx_exit_mmap+0x10/0x10 ? __mutex_unlock_slowpath+0x22e/0x7f0 ? __pfx_exit_aio+0x10/0x10 ? __up_read+0x256/0x690 ? uprobe_clear_state+0x274/0x290 ? mm_update_next_owner+0xa9/0x810 __mmput+0xc9/0x370 exit_mm+0x203/0x2f0 ? __pfx_exit_mm+0x10/0x10 ? taskstats_exit+0x32b/0xa60 do_exit+0x921/0x2740 ? do_raw_spin_lock+0x155/0x3b0 ? __pfx_do_exit+0x10/0x10 ? __pfx_do_raw_spin_lock+0x10/0x10 ? _raw_spin_lock_irq+0xc5/0x100 do_group_exit+0x20c/0x2c0 get_signal+0x168c/0x1720 ? __pfx_get_signal+0x10/0x10 ? schedule+0x165/0x360 arch_do_signal_or_restart+0x8e/0x7d0 ? __pfx_arch_do_signal_or_restart+0x10/0x10 ? __pfx___se_sys_futex+0x10/0x10 syscall_exit_to_user_mode+0xb8/0x2c0 do_syscall_64+0x75/0x120 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x422dcd Code: Unable to access opcode bytes at 0x422da3. RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720 R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0 R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000 ============================================================ Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df1a8cfefd@igalia.com Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter_cancel__repro.c [1] Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2] Signed-off-by: Ricardo Cañuelo Navarro Suggested-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: Florent Revest Cc: Jann Horn Cc: Oscar Salvador Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8f3ac832ee7f..4861a7e304bb 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); +void fixup_hugetlb_reservations(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ +} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write -- cgit v1.2.3 From 707f853d7fa3ce323a6875487890c213e34d81a0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 2 May 2025 16:12:09 +0200 Subject: module: Provide EXPORT_SYMBOL_GPL_FOR_MODULES() helper Helper macro to more easily limit the export of a symbol to a given list of modules. Eg: EXPORT_SYMBOL_GPL_FOR_MODULES(preempt_notifier_inc, "kvm"); will limit the use of said function to kvm.ko, any other module trying to use this symbol will refure to load (and get modpost build failures). Requested-by: Masahiro Yamada Requested-by: Christoph Hellwig Signed-off-by: Peter Zijlstra Reviewed-by: Petr Pavlu Signed-off-by: Masahiro Yamada --- include/linux/export.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/export.h b/include/linux/export.h index a8c23d945634..f35d03b4113b 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -24,11 +24,17 @@ .long sym #endif -#define ___EXPORT_SYMBOL(sym, license, ns) \ +/* + * LLVM integrated assembler cam merge adjacent string literals (like + * C and GNU-as) passed to '.ascii', but not to '.asciz' and chokes on: + * + * .asciz "MODULE_" "kvm" ; + */ +#define ___EXPORT_SYMBOL(sym, license, ns...) \ .section ".export_symbol","a" ASM_NL \ __export_symbol_##sym: ASM_NL \ .asciz license ASM_NL \ - .asciz ns ASM_NL \ + .ascii ns "\0" ASM_NL \ __EXPORT_SYMBOL_REF(sym) ASM_NL \ .previous @@ -85,4 +91,6 @@ #define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns) #define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns) +#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) + #endif /* _LINUX_EXPORT_H */ -- cgit v1.2.3 From 45611fe821af0e09cb9dc2dfd2418b2e1e1831f3 Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Wed, 21 May 2025 14:34:17 +0300 Subject: IB/cm: Remove dead code and adjust naming Drop ib_send_cm_mra parameters which are always constant. Remove branch which is never taken. Adjust name to ib_prepare_cm_mra, which better reflects its functionality - no MRA is actually sent. Adjust name of related tracepoints. Push setting of the constant service timeout to cm.c and drop IB_CM_MRA_FLAG_DELAY. Signed-off-by: Vlad Dumitrescu Reviewed-by: Sean Hefty Link: https://patch.msgid.link/cdd2a237acf2b495c19ce02e4b1c42c41c6751c2.1747827207.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_cm.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index a2ac62b4a6cf..1fa3786f82f4 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -480,23 +480,12 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); -#define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */ - /** - * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection - * message. + * ib_prepare_cm_mra - Prepares to send a message receipt acknowledgment to a + connection message in case duplicates are received. * @cm_id: Connection identifier associated with the connection message. - * @service_timeout: The lower 5-bits specify the maximum time required for - * the sender to reply to the connection message. The upper 3-bits - * specify additional control flags. - * @private_data: Optional user-defined private data sent with the - * message receipt acknowledgement. - * @private_data_len: Size of the private data buffer, in bytes. */ -int ib_send_cm_mra(struct ib_cm_id *cm_id, - u8 service_timeout, - const void *private_data, - u8 private_data_len); +int ib_prepare_cm_mra(struct ib_cm_id *cm_id); /** * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning -- cgit v1.2.3 From 478ad02d6844217cc7568619aeb0809d93ade43d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 May 2025 15:43:36 -0700 Subject: Disable FOP_DONTCACHE for now due to bugs This is kind of last-minute, but Al Viro reported that the new FOP_DONTCACHE flag causes memory corruption due to use-after-free issues. This was triggered by commit 974c5e6139db ("xfs: flag as supporting FOP_DONTCACHE"), but that is not the underlying bug - it is just the first user of the flag. Vlastimil Babka suspects the underlying problem stems from the folio_end_writeback() logic introduced in commit fb7d3bc414939 ("mm/filemap: drop streaming/uncached pages when writeback completes"). The most straightforward fix would be to just revert the commit that exposed this, but Matthew Wilcox points out that other filesystems are also starting to enable the FOP_DONTCACHE logic, so this instead disables that bit globally for now. The fix will hopefully end up being trivial and we can just re-enable this logic after more testing, but until such a time we'll have to disable the new FOP_DONTCACHE flag. Reported-by: Al Viro Link: https://lore.kernel.org/all/20250525083209.GS2023217@ZenIV/ Triggered-by: 974c5e6139db ("xfs: flag as supporting FOP_DONTCACHE") Cc: Vlastimil Babka Cc: Matthew Wilcox Cc: Jan Kara Cc: Jens Axboe Cc: Christoph Hellwig Cc: Darrick J. Wong Cc: Christian Brauner Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..a4fd649e2c3f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2186,7 +2186,7 @@ struct file_operations { /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ -#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) +#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */ /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, -- cgit v1.2.3 From 384492c48e6a88c9a7f0376d8e8ac7f557988e92 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 20 May 2025 13:30:42 -0700 Subject: net: devmem: support single IOV with sendmsg sendmsg() with a single iov becomes ITER_UBUF, sendmsg() with multiple iovs becomes ITER_IOVEC. iter_iov_len does not return correct value for UBUF, so teach to treat UBUF differently. Cc: Al Viro Cc: Pavel Begunkov Cc: Mina Almasry Fixes: bd61848900bf ("net: devmem: Implement TX path") Signed-off-by: Stanislav Fomichev Acked-by: Mina Almasry Reviewed-by: Pavel Begunkov Signed-off-by: David S. Miller --- include/linux/uio.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uio.h b/include/linux/uio.h index 49ece9e1888f..393d0622cc28 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -99,7 +99,13 @@ static inline const struct iovec *iter_iov(const struct iov_iter *iter) } #define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset) -#define iter_iov_len(iter) (iter_iov(iter)->iov_len - (iter)->iov_offset) + +static inline size_t iter_iov_len(const struct iov_iter *i) +{ + if (i->iter_type == ITER_UBUF) + return i->count; + return iter_iov(i)->iov_len - i->iov_offset; +} static inline enum iter_type iov_iter_type(const struct iov_iter *i) { -- cgit v1.2.3 From e45b7196df60a4aef86c3998611c91fcc93d21f3 Mon Sep 17 00:00:00 2001 From: Qiu Yutan Date: Wed, 21 May 2025 10:14:08 +0800 Subject: net: neigh: use kfree_skb_reason() in neigh_resolve_output() and neigh_connected_output() Replace kfree_skb() used in neigh_resolve_output() and neigh_connected_output() with kfree_skb_reason(). Following new skb drop reason is added: /* failed to fill the device hard header */ SKB_DROP_REASON_NEIGH_HH_FILLFAIL Signed-off-by: Qiu Yutan Signed-off-by: Jiang Kun Reviewed-by: Kuniyuki Iwashima Reviewed-by: Xu Xin Signed-off-by: David S. Miller --- include/net/dropreason-core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index bea77934a235..bcf9d7467e1a 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -62,6 +62,7 @@ FN(NEIGH_FAILED) \ FN(NEIGH_QUEUEFULL) \ FN(NEIGH_DEAD) \ + FN(NEIGH_HH_FILLFAIL) \ FN(TC_EGRESS) \ FN(SECURITY_HOOK) \ FN(QDISC_DROP) \ @@ -348,6 +349,8 @@ enum skb_drop_reason { SKB_DROP_REASON_NEIGH_QUEUEFULL, /** @SKB_DROP_REASON_NEIGH_DEAD: neigh entry is dead */ SKB_DROP_REASON_NEIGH_DEAD, + /** @SKB_DROP_REASON_NEIGH_HH_FILLFAIL: failed to fill the device hard header */ + SKB_DROP_REASON_NEIGH_HH_FILLFAIL, /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */ SKB_DROP_REASON_TC_EGRESS, /** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */ -- cgit v1.2.3 From 45ca7e9f0730ae36fc610e675b990e9cc9ca0714 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 21 May 2025 14:17:05 +0200 Subject: vsock/virtio: fix `rx_bytes` accounting for stream sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In `struct virtio_vsock_sock`, we maintain two counters: - `rx_bytes`: used internally to track how many bytes have been read. This supports mechanisms like .stream_has_data() and sock_rcvlowat(). - `fwd_cnt`: used for the credit mechanism to inform available receive buffer space to the remote peer. These counters are updated via virtio_transport_inc_rx_pkt() and virtio_transport_dec_rx_pkt(). Since the beginning with commit 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko"), we call virtio_transport_dec_rx_pkt() in virtio_transport_stream_do_dequeue() only when we consume the entire packet, so partial reads, do not update `rx_bytes` and `fwd_cnt`. This is fine for `fwd_cnt`, because we still have space used for the entire packet, and we don't want to update the credit for the other peer until we free the space of the entire packet. However, this causes `rx_bytes` to be stale on partial reads. Previously, this didn’t cause issues because `rx_bytes` was used only by .stream_has_data(), and any unread portion of a packet implied data was still available. However, since commit 93b808876682 ("virtio/vsock: fix logic which reduces credit update messages"), we now rely on `rx_bytes` to determine if a credit update should be sent when the data in the RX queue drops below SO_RCVLOWAT value. This patch fixes the accounting by updating `rx_bytes` with the number of bytes actually read, even on partial reads, while leaving `fwd_cnt` untouched until the packet is fully consumed. Also introduce a new `buf_used` counter to check that the remote peer is honoring the given credit; this was previously done via `rx_bytes`. Fixes: 93b808876682 ("virtio/vsock: fix logic which reduces credit update messages") Signed-off-by: Stefano Garzarella Link: https://patch.msgid.link/20250521121705.196379-1-sgarzare@redhat.com Signed-off-by: Paolo Abeni --- include/linux/virtio_vsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 0387d64e2c66..36fb3edfa403 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -140,6 +140,7 @@ struct virtio_vsock_sock { u32 last_fwd_cnt; u32 rx_bytes; u32 buf_alloc; + u32 buf_used; struct sk_buff_head rx_queue; u32 msg_count; }; -- cgit v1.2.3 From 9be022476fea62b32aa05d9d370e0175155731e6 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 11 Apr 2025 21:14:12 +0800 Subject: mailbox: Remove devm_mbox_controller_unregister Commit e898d9cdd3a9("mailbox: Add device-managed registration functions") introduced device-managed API for mailbox, but in the past 7 years, there is no user for devm_mbox_controller_unregister. So remove it. Signed-off-by: Peng Fan Signed-off-by: Jassi Brar --- include/linux/mailbox_controller.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index 5fb0b65f45a2..ad01c4082358 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -134,7 +134,4 @@ void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */ int devm_mbox_controller_register(struct device *dev, struct mbox_controller *mbox); -void devm_mbox_controller_unregister(struct device *dev, - struct mbox_controller *mbox); - #endif /* __MAILBOX_CONTROLLER_H */ -- cgit v1.2.3 From ed449ddbd867f2cc02d6890c231431f264a876eb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 May 2025 13:46:09 -0700 Subject: net: core: Convert inet_addr_is_any() to sockaddr_storage All the callers of inet_addr_is_any() have a sockaddr_storage-backed sockaddr. Avoid casts and switch prototype to the actual object being used. Reviewed-by: Kuniyuki Iwashima Reviewed-by: Martin K. Petersen # SCSI Signed-off-by: Kees Cook Link: https://patch.msgid.link/20250521204619.2301870-1-kees@kernel.org Signed-off-by: Paolo Abeni --- include/linux/inet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/inet.h b/include/linux/inet.h index bd8276e96e60..9158772f3559 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -55,6 +55,6 @@ extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char extern int inet_pton_with_scope(struct net *net, unsigned short af, const char *src, const char *port, struct sockaddr_storage *addr); -extern bool inet_addr_is_any(struct sockaddr *addr); +bool inet_addr_is_any(struct sockaddr_storage *addr); #endif /* _LINUX_INET_H */ -- cgit v1.2.3 From 161972650d6795ea00f8b72557cf3c3e593ed250 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 May 2025 13:46:10 -0700 Subject: net: core: Switch netif_set_mac_address() to struct sockaddr_storage In order to avoid passing around struct sockaddr that has a size the compiler cannot reason about (nor track at runtime), convert netif_set_mac_address() to take struct sockaddr_storage. This is just a cast conversion, so there is are no binary changes. Following patches will make actual allocation changes. Acked-by: Gustavo A. R. Silva Signed-off-by: Kees Cook Link: https://patch.msgid.link/20250521204619.2301870-2-kees@kernel.org Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ea9d335de130..47200a394a02 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4212,7 +4212,7 @@ int netif_set_mtu(struct net_device *dev, int new_mtu); int dev_set_mtu(struct net_device *, int); int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); -int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, +int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); -- cgit v1.2.3 From 9ca6804ab7c34f65fcf2e29333a39e7807c30b60 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 May 2025 13:46:14 -0700 Subject: net: core: Convert dev_set_mac_address() to struct sockaddr_storage All users of dev_set_mac_address() are now using a struct sockaddr_storage. Convert the internal data type to struct sockaddr_storage, drop the casts, and update pointer types. Acked-by: Gustavo A. R. Silva Signed-off-by: Kees Cook Link: https://patch.msgid.link/20250521204619.2301870-6-kees@kernel.org Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 47200a394a02..b4242b997373 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4214,7 +4214,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); -int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, +int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); -- cgit v1.2.3 From ae9fcd5a0f8ab7e12619e1c66312a03b842935c3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 May 2025 13:46:16 -0700 Subject: net: core: Convert dev_set_mac_address_user() to use struct sockaddr_storage Convert callers of dev_set_mac_address_user() to use struct sockaddr_storage. Add sanity checks on dev->addr_len usage. Signed-off-by: Kees Cook Acked-by: Gustavo A. R. Silva Link: https://patch.msgid.link/20250521204619.2301870-8-kees@kernel.org Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b4242b997373..adb14db25798 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4216,7 +4216,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); -int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, -- cgit v1.2.3 From ba3d7b93dbe3202bf8ead473d75885af773068bc Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Wed, 21 May 2025 23:27:06 +0200 Subject: wireguard: allowedips: add WGALLOWEDIP_F_REMOVE_ME flag The current netlink API for WireGuard does not directly support removal of allowed ips from a peer. A user can remove an allowed ip from a peer in one of two ways: 1. By using the WGPEER_F_REPLACE_ALLOWEDIPS flag and providing a new list of allowed ips which omits the allowed ip that is to be removed. 2. By reassigning an allowed ip to a "dummy" peer then removing that peer with WGPEER_F_REMOVE_ME. With the first approach, the driver completely rebuilds the allowed ip list for a peer. If my current configuration is such that a peer has allowed ips 192.168.0.2 and 192.168.0.3 and I want to remove 192.168.0.2 the actual transition looks like this. [192.168.0.2, 192.168.0.3] <-- Initial state [] <-- Step 1: Allowed ips removed for peer [192.168.0.3] <-- Step 2: Allowed ips added back for peer This is true even if the allowed ip list is small and the update does not need to be batched into multiple WG_CMD_SET_DEVICE requests, as the removal and subsequent addition of ips is non-atomic within a single request. Consequently, wg_allowedips_lookup_dst and wg_allowedips_lookup_src may return NULL while reconfiguring a peer even for packets bound for ips a user did not intend to remove leading to unintended interruptions in connectivity. This presents in userspace as failed calls to sendto and sendmsg for UDP sockets. In my case, I ran netperf while repeatedly reconfiguring the allowed ips for a peer with wg. /usr/local/bin/netperf -H 10.102.73.72 -l 10m -t UDP_STREAM -- -R 1 -m 1024 send_data: data send error: No route to host (errno 113) netperf: send_omni: send_data failed: No route to host While this may not be of particular concern for environments where peers and allowed ips are mostly static, systems like Cilium manage peers and allowed ips in a dynamic environment where peers (i.e. Kubernetes nodes) and allowed ips (i.e. pods running on those nodes) can frequently change making WGPEER_F_REPLACE_ALLOWEDIPS problematic. The second approach avoids any possible connectivity interruptions but is hacky and less direct, requiring the creation of a temporary peer just to dispose of an allowed ip. Introduce a new flag called WGALLOWEDIP_F_REMOVE_ME which in the same way that WGPEER_F_REMOVE_ME allows a user to remove a single peer from a WireGuard device's configuration allows a user to remove an ip from a peer's set of allowed ips. This enables incremental updates to a device's configuration without any connectivity blips or messy workarounds. A corresponding patch for wg extends the existing `wg set` interface to leverage this feature. $ wg set wg0 peer allowed-ips +192.168.88.0/24,-192.168.0.1/32 When '+' or '-' is prepended to any ip in the list, wg clears WGPEER_F_REPLACE_ALLOWEDIPS and sets the WGALLOWEDIP_F_REMOVE_ME flag on any ip prefixed with '-'. Signed-off-by: Jordan Rife [Jason: minor style nits, fixes to selftest, bump of wireguard-tools version] Signed-off-by: Jason A. Donenfeld Link: https://patch.msgid.link/20250521212707.1767879-5-Jason@zx2c4.com Signed-off-by: Paolo Abeni --- include/uapi/linux/wireguard.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h index ae88be14c947..8c26391196d5 100644 --- a/include/uapi/linux/wireguard.h +++ b/include/uapi/linux/wireguard.h @@ -101,6 +101,10 @@ * WGALLOWEDIP_A_FAMILY: NLA_U16 * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * WGALLOWEDIP_A_FLAGS: NLA_U32, WGALLOWEDIP_F_REMOVE_ME if + * the specified IP should be removed; + * otherwise, this IP will be added if + * it is not already present. * 0: NLA_NESTED * ... * 0: NLA_NESTED @@ -184,11 +188,16 @@ enum wgpeer_attribute { }; #define WGPEER_A_MAX (__WGPEER_A_LAST - 1) +enum wgallowedip_flag { + WGALLOWEDIP_F_REMOVE_ME = 1U << 0, + __WGALLOWEDIP_F_ALL = WGALLOWEDIP_F_REMOVE_ME +}; enum wgallowedip_attribute { WGALLOWEDIP_A_UNSPEC, WGALLOWEDIP_A_FAMILY, WGALLOWEDIP_A_IPADDR, WGALLOWEDIP_A_CIDR_MASK, + WGALLOWEDIP_A_FLAGS, __WGALLOWEDIP_A_LAST }; #define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) -- cgit v1.2.3 From 5ec40864aaecc4bd66fe67541d4a41091ed664a5 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 22 May 2025 01:18:22 +0200 Subject: vsock: Move lingering logic to af_vsock core Lingering should be transport-independent in the long run. In preparation for supporting other transports, as well as the linger on shutdown(), move code to core. Generalize by querying vsock_transport::unsent_bytes(), guard against the callback being unimplemented. Do not pass sk_lingertime explicitly. Pull SOCK_LINGER check into vsock_linger(). Flatten the function. Remove the nested block by inverting the condition: return early on !timeout. Suggested-by: Stefano Garzarella Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20250522-vsock-linger-v6-2-2ad00b0e447e@rbox.co Signed-off-by: Paolo Abeni --- include/net/af_vsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 9e85424c8343..d56e6e135158 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -221,6 +221,7 @@ void vsock_for_each_connected_socket(struct vsock_transport *transport, void (*fn)(struct sock *sk)); int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); bool vsock_find_cid(unsigned int cid); +void vsock_linger(struct sock *sk); /**** TAP ****/ -- cgit v1.2.3 From 0623c759276885c3ae88197ba6fb5c9c6ba8612f Mon Sep 17 00:00:00 2001 From: Peter Hilber Date: Fri, 9 May 2025 18:07:22 +0200 Subject: virtio_rtc: Add module and driver core Add the virtio_rtc module and driver core. The virtio_rtc module implements a driver compatible with the proposed Virtio RTC device specification. The Virtio RTC (Real Time Clock) device provides information about current time. The device can provide different clocks, e.g. for the UTC or TAI time standards, or for physical time elapsed since some past epoch. The driver can read the clocks with simple or more accurate methods. Implement the core, which interacts with the Virtio RTC device. Apart from this, the core does not expose functionality outside of the virtio_rtc module. Follow-up patches will expose PTP clocks and an RTC Class device. Provide synchronous messaging, which is enough for the expected time synchronization use cases through PTP clocks (similar to ptp_kvm) or RTC Class device. Signed-off-by: Peter Hilber Message-Id: <20250509160734.1772-2-quic_philber@quicinc.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_rtc.h | 151 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 include/uapi/linux/virtio_rtc.h (limited to 'include') diff --git a/include/uapi/linux/virtio_rtc.h b/include/uapi/linux/virtio_rtc.h new file mode 100644 index 000000000000..6b3af4e9bbfb --- /dev/null +++ b/include/uapi/linux/virtio_rtc.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * Copyright (C) 2022-2024 OpenSynergy GmbH + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _LINUX_VIRTIO_RTC_H +#define _LINUX_VIRTIO_RTC_H + +#include + +/* read request message types */ + +#define VIRTIO_RTC_REQ_READ 0x0001 +#define VIRTIO_RTC_REQ_READ_CROSS 0x0002 + +/* control request message types */ + +#define VIRTIO_RTC_REQ_CFG 0x1000 +#define VIRTIO_RTC_REQ_CLOCK_CAP 0x1001 +#define VIRTIO_RTC_REQ_CROSS_CAP 0x1002 + +/* Message headers */ + +/** common request header */ +struct virtio_rtc_req_head { + __le16 msg_type; + __u8 reserved[6]; +}; + +/** common response header */ +struct virtio_rtc_resp_head { +#define VIRTIO_RTC_S_OK 0 +#define VIRTIO_RTC_S_EOPNOTSUPP 2 +#define VIRTIO_RTC_S_ENODEV 3 +#define VIRTIO_RTC_S_EINVAL 4 +#define VIRTIO_RTC_S_EIO 5 + __u8 status; + __u8 reserved[7]; +}; + +/* read requests */ + +/* VIRTIO_RTC_REQ_READ message */ + +struct virtio_rtc_req_read { + struct virtio_rtc_req_head head; + __le16 clock_id; + __u8 reserved[6]; +}; + +struct virtio_rtc_resp_read { + struct virtio_rtc_resp_head head; + __le64 clock_reading; +}; + +/* VIRTIO_RTC_REQ_READ_CROSS message */ + +struct virtio_rtc_req_read_cross { + struct virtio_rtc_req_head head; + __le16 clock_id; +/* Arm Generic Timer Counter-timer Virtual Count Register (CNTVCT_EL0) */ +#define VIRTIO_RTC_COUNTER_ARM_VCT 0 +/* x86 Time-Stamp Counter */ +#define VIRTIO_RTC_COUNTER_X86_TSC 1 +/* Invalid */ +#define VIRTIO_RTC_COUNTER_INVALID 0xFF + __u8 hw_counter; + __u8 reserved[5]; +}; + +struct virtio_rtc_resp_read_cross { + struct virtio_rtc_resp_head head; + __le64 clock_reading; + __le64 counter_cycles; +}; + +/* control requests */ + +/* VIRTIO_RTC_REQ_CFG message */ + +struct virtio_rtc_req_cfg { + struct virtio_rtc_req_head head; + /* no request params */ +}; + +struct virtio_rtc_resp_cfg { + struct virtio_rtc_resp_head head; + /** # of clocks -> clock ids < num_clocks are valid */ + __le16 num_clocks; + __u8 reserved[6]; +}; + +/* VIRTIO_RTC_REQ_CLOCK_CAP message */ + +struct virtio_rtc_req_clock_cap { + struct virtio_rtc_req_head head; + __le16 clock_id; + __u8 reserved[6]; +}; + +struct virtio_rtc_resp_clock_cap { + struct virtio_rtc_resp_head head; +#define VIRTIO_RTC_CLOCK_UTC 0 +#define VIRTIO_RTC_CLOCK_TAI 1 +#define VIRTIO_RTC_CLOCK_MONOTONIC 2 +#define VIRTIO_RTC_CLOCK_UTC_SMEARED 3 +#define VIRTIO_RTC_CLOCK_UTC_MAYBE_SMEARED 4 + __u8 type; +#define VIRTIO_RTC_SMEAR_UNSPECIFIED 0 +#define VIRTIO_RTC_SMEAR_NOON_LINEAR 1 +#define VIRTIO_RTC_SMEAR_UTC_SLS 2 + __u8 leap_second_smearing; + __u8 reserved[6]; +}; + +/* VIRTIO_RTC_REQ_CROSS_CAP message */ + +struct virtio_rtc_req_cross_cap { + struct virtio_rtc_req_head head; + __le16 clock_id; + __u8 hw_counter; + __u8 reserved[5]; +}; + +struct virtio_rtc_resp_cross_cap { + struct virtio_rtc_resp_head head; +#define VIRTIO_RTC_FLAG_CROSS_CAP (1 << 0) + __u8 flags; + __u8 reserved[7]; +}; + +/** Union of request types for requestq */ +union virtio_rtc_req_requestq { + struct virtio_rtc_req_read read; + struct virtio_rtc_req_read_cross read_cross; + struct virtio_rtc_req_cfg cfg; + struct virtio_rtc_req_clock_cap clock_cap; + struct virtio_rtc_req_cross_cap cross_cap; +}; + +/** Union of response types for requestq */ +union virtio_rtc_resp_requestq { + struct virtio_rtc_resp_read read; + struct virtio_rtc_resp_read_cross read_cross; + struct virtio_rtc_resp_cfg cfg; + struct virtio_rtc_resp_clock_cap clock_cap; + struct virtio_rtc_resp_cross_cap cross_cap; +}; + +#endif /* _LINUX_VIRTIO_RTC_H */ -- cgit v1.2.3 From 9d4f22fd563e0cd02e8448e84d057e7c0132a586 Mon Sep 17 00:00:00 2001 From: Peter Hilber Date: Fri, 9 May 2025 18:07:25 +0200 Subject: virtio_rtc: Add RTC class driver Expose the virtio-rtc UTC-like clock as an RTC clock to userspace - if it is present, and if it does not step on leap seconds. The RTC class enables the virtio-rtc device to resume the system from sleep states on RTC alarm. Support RTC alarm if the virtio-rtc alarm feature is present. The virtio-rtc device signals an alarm by marking an alarmq buffer as used. Peculiarities ------------- A virtio-rtc clock is a bit special for an RTC clock in that - the clock may step (also backwards) autonomously at any time and - the device, and its notification mechanism, will be reset during boot or resume from sleep. The virtio-rtc device avoids that the driver might miss an alarm. The device signals an alarm whenever the clock has reached or passed the alarm time, and also when the device is reset (on boot or resume from sleep), if the alarm time is in the past. Open Issue ---------- The CLOCK_BOOTTIME_ALARM will use the RTC clock to wake up from sleep, and implicitly assumes that no RTC clock steps will occur during sleep. The RTC class driver does not know whether the current alarm is a real-time alarm or a boot-time alarm. Perhaps this might be handled by the driver also setting a virtio-rtc monotonic alarm (which uses a clock similar to CLOCK_BOOTTIME_ALARM). The virtio-rtc monotonic alarm would just be used to wake up in case it was a CLOCK_BOOTTIME_ALARM alarm. Otherwise, the behavior should not differ from other RTC class drivers. Signed-off-by: Peter Hilber Acked-by: Alexandre Belloni Message-Id: <20250509160734.1772-5-quic_philber@quicinc.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_rtc.h | 88 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_rtc.h b/include/uapi/linux/virtio_rtc.h index 6b3af4e9bbfb..85ee8f013661 100644 --- a/include/uapi/linux/virtio_rtc.h +++ b/include/uapi/linux/virtio_rtc.h @@ -9,6 +9,9 @@ #include +/* alarm feature */ +#define VIRTIO_RTC_F_ALARM 0 + /* read request message types */ #define VIRTIO_RTC_REQ_READ 0x0001 @@ -19,6 +22,13 @@ #define VIRTIO_RTC_REQ_CFG 0x1000 #define VIRTIO_RTC_REQ_CLOCK_CAP 0x1001 #define VIRTIO_RTC_REQ_CROSS_CAP 0x1002 +#define VIRTIO_RTC_REQ_READ_ALARM 0x1003 +#define VIRTIO_RTC_REQ_SET_ALARM 0x1004 +#define VIRTIO_RTC_REQ_SET_ALARM_ENABLED 0x1005 + +/* alarmq message types */ + +#define VIRTIO_RTC_NOTIF_ALARM 0x2000 /* Message headers */ @@ -39,6 +49,12 @@ struct virtio_rtc_resp_head { __u8 reserved[7]; }; +/** common notification header */ +struct virtio_rtc_notif_head { + __le16 msg_type; + __u8 reserved[6]; +}; + /* read requests */ /* VIRTIO_RTC_REQ_READ message */ @@ -111,7 +127,9 @@ struct virtio_rtc_resp_clock_cap { #define VIRTIO_RTC_SMEAR_NOON_LINEAR 1 #define VIRTIO_RTC_SMEAR_UTC_SLS 2 __u8 leap_second_smearing; - __u8 reserved[6]; +#define VIRTIO_RTC_FLAG_ALARM_CAP (1 << 0) + __u8 flags; + __u8 reserved[5]; }; /* VIRTIO_RTC_REQ_CROSS_CAP message */ @@ -130,6 +148,53 @@ struct virtio_rtc_resp_cross_cap { __u8 reserved[7]; }; +/* VIRTIO_RTC_REQ_READ_ALARM message */ + +struct virtio_rtc_req_read_alarm { + struct virtio_rtc_req_head head; + __le16 clock_id; + __u8 reserved[6]; +}; + +struct virtio_rtc_resp_read_alarm { + struct virtio_rtc_resp_head head; + __le64 alarm_time; +#define VIRTIO_RTC_FLAG_ALARM_ENABLED (1 << 0) + __u8 flags; + __u8 reserved[7]; +}; + +/* VIRTIO_RTC_REQ_SET_ALARM message */ + +struct virtio_rtc_req_set_alarm { + struct virtio_rtc_req_head head; + __le64 alarm_time; + __le16 clock_id; + /* flag VIRTIO_RTC_FLAG_ALARM_ENABLED */ + __u8 flags; + __u8 reserved[5]; +}; + +struct virtio_rtc_resp_set_alarm { + struct virtio_rtc_resp_head head; + /* no response params */ +}; + +/* VIRTIO_RTC_REQ_SET_ALARM_ENABLED message */ + +struct virtio_rtc_req_set_alarm_enabled { + struct virtio_rtc_req_head head; + __le16 clock_id; + /* flag VIRTIO_RTC_ALARM_ENABLED */ + __u8 flags; + __u8 reserved[5]; +}; + +struct virtio_rtc_resp_set_alarm_enabled { + struct virtio_rtc_resp_head head; + /* no response params */ +}; + /** Union of request types for requestq */ union virtio_rtc_req_requestq { struct virtio_rtc_req_read read; @@ -137,6 +202,9 @@ union virtio_rtc_req_requestq { struct virtio_rtc_req_cfg cfg; struct virtio_rtc_req_clock_cap clock_cap; struct virtio_rtc_req_cross_cap cross_cap; + struct virtio_rtc_req_read_alarm read_alarm; + struct virtio_rtc_req_set_alarm set_alarm; + struct virtio_rtc_req_set_alarm_enabled set_alarm_enabled; }; /** Union of response types for requestq */ @@ -146,6 +214,24 @@ union virtio_rtc_resp_requestq { struct virtio_rtc_resp_cfg cfg; struct virtio_rtc_resp_clock_cap clock_cap; struct virtio_rtc_resp_cross_cap cross_cap; + struct virtio_rtc_resp_read_alarm read_alarm; + struct virtio_rtc_resp_set_alarm set_alarm; + struct virtio_rtc_resp_set_alarm_enabled set_alarm_enabled; +}; + +/* alarmq notifications */ + +/* VIRTIO_RTC_NOTIF_ALARM notification */ + +struct virtio_rtc_notif_alarm { + struct virtio_rtc_notif_head head; + __le16 clock_id; + __u8 reserved[6]; +}; + +/** Union of notification types for alarmq */ +union virtio_rtc_notif_alarmq { + struct virtio_rtc_notif_alarm alarm; }; #endif /* _LINUX_VIRTIO_RTC_H */ -- cgit v1.2.3 From c5b6ababd21ab6bb251e5a2b4db110e76fb00a26 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 12 May 2025 14:04:02 -0400 Subject: locking/mutex: implement mutex_trylock_nested Despite the fact that several lockdep-related checks are skipped when calling trylock* versions of the locking primitives, for example mutex_trylock, each time the mutex is acquired, a held_lock is still placed onto the lockdep stack by __lock_acquire() which is called regardless of whether the trylock* or regular locking API was used. This means that if the caller successfully acquires more than MAX_LOCK_DEPTH locks of the same class, even when using mutex_trylock, lockdep will still complain that the maximum depth of the held lock stack has been reached and disable itself. For example, the following error currently occurs in the ARM version of KVM, once the code tries to lock all vCPUs of a VM configured with more than MAX_LOCK_DEPTH vCPUs, a situation that can easily happen on modern systems, where having more than 48 CPUs is common, and it's also common to run VMs that have vCPU counts approaching that number: [ 328.171264] BUG: MAX_LOCK_DEPTH too low! [ 328.175227] turning off the locking correctness validator. [ 328.180726] Please attach the output of /proc/lock_stat to the bug report [ 328.187531] depth: 48 max: 48! [ 328.190678] 48 locks held by qemu-kvm/11664: [ 328.194957] #0: ffff800086de5ba0 (&kvm->lock){+.+.}-{3:3}, at: kvm_ioctl_create_device+0x174/0x5b0 [ 328.204048] #1: ffff0800e78800b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.212521] #2: ffff07ffeee51e98 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.220991] #3: ffff0800dc7d80b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.229463] #4: ffff07ffe0c980b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.237934] #5: ffff0800a3883c78 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 [ 328.246405] #6: ffff07fffbe480b8 (&vcpu->mutex){+.+.}-{3:3}, at: lock_all_vcpus+0x16c/0x2a0 Luckily, in all instances that require locking all vCPUs, the 'kvm->lock' is taken a priori, and that fact makes it possible to use the little known feature of lockdep, called a 'nest_lock', to avoid this warning and subsequent lockdep self-disablement. The action of 'nested lock' being provided to lockdep's lock_acquire(), causes the lockdep to detect that the top of the held lock stack contains a lock of the same class and then increment its reference counter instead of pushing a new held_lock item onto that stack. See __lock_acquire for more information. Signed-off-by: Maxim Levitsky Acked-by: Peter Zijlstra (Intel) Message-ID: <20250512180407.659015-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/mutex.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 2143d05116be..da4518cfd59c 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -193,7 +193,22 @@ extern void mutex_lock_io(struct mutex *lock); * * Returns 1 if the mutex has been acquired successfully, and 0 on contention. */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern int _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); + +#define mutex_trylock_nest_lock(lock, nest_lock) \ +( \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map), \ + _mutex_trylock_nest_lock(lock, &(nest_lock)->dep_map) \ +) + +#define mutex_trylock(lock) _mutex_trylock_nest_lock(lock, NULL) +#else extern int mutex_trylock(struct mutex *lock); +#define mutex_trylock_nest_lock(lock, nest_lock) mutex_trylock(lock) +#endif + extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); -- cgit v1.2.3 From fb49f07ba1d9d8c9bd8854878ae6b5b21ff9ac45 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 12 May 2025 14:04:03 -0400 Subject: locking/mutex: implement mutex_lock_killable_nest_lock KVM's SEV intra-host migration code needs to lock all vCPUs of the source and the target VM, before it proceeds with the migration. The number of vCPUs that belong to each VM is not bounded by anything except a self-imposed KVM limit of CONFIG_KVM_MAX_NR_VCPUS vCPUs which is significantly larger than the depth of lockdep's lock stack. Luckily, the locks in both of the cases mentioned above, are held under the 'kvm->lock' of each VM, which means that we can use the little known lockdep feature called a "nest_lock" to support this use case in a cleaner way, compared to the way it's currently done. Implement and expose 'mutex_lock_killable_nest_lock' for this purpose. Signed-off-by: Maxim Levitsky Acked-by: Peter Zijlstra (Intel) Message-ID: <20250512180407.659015-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/mutex.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index da4518cfd59c..a039fa8c1780 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -156,16 +156,15 @@ static inline int __devm_mutex_init(struct device *dev, struct mutex *lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); - extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); -extern int __must_check mutex_lock_killable_nested(struct mutex *lock, - unsigned int subclass); +extern int __must_check _mutex_lock_killable(struct mutex *lock, + unsigned int subclass, struct lockdep_map *nest_lock); extern void mutex_lock_io_nested(struct mutex *lock, unsigned int subclass); #define mutex_lock(lock) mutex_lock_nested(lock, 0) #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0) -#define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0) +#define mutex_lock_killable(lock) _mutex_lock_killable(lock, 0, NULL) #define mutex_lock_io(lock) mutex_lock_io_nested(lock, 0) #define mutex_lock_nest_lock(lock, nest_lock) \ @@ -174,6 +173,15 @@ do { \ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ } while (0) +#define mutex_lock_killable_nest_lock(lock, nest_lock) \ +( \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map), \ + _mutex_lock_killable(lock, 0, &(nest_lock)->dep_map) \ +) + +#define mutex_lock_killable_nested(lock, subclass) \ + _mutex_lock_killable(lock, subclass, NULL) + #else extern void mutex_lock(struct mutex *lock); extern int __must_check mutex_lock_interruptible(struct mutex *lock); @@ -183,6 +191,7 @@ extern void mutex_lock_io(struct mutex *lock); # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) +# define mutex_lock_killable_nest_lock(lock, nest_lock) mutex_lock_killable(lock) # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) # define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock) #endif -- cgit v1.2.3 From e4a454ced74c0ac97c8bd32f086ee3ad74528780 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 12 May 2025 14:04:04 -0400 Subject: KVM: add kvm_lock_all_vcpus and kvm_trylock_all_vcpus In a few cases, usually in the initialization code, KVM locks all vCPUs of a VM to ensure that userspace doesn't do funny things while KVM performs an operation that affects the whole VM. Until now, all these operations were implemented using custom code, and all of them share the same problem: Lockdep can't cope with simultaneous locking of a large number of locks of the same class. However if these locks are taken while another lock is already held, which is luckily the case, it is possible to take advantage of little known _nest_lock feature of lockdep which allows in this case to have an unlimited number of locks of same class to be taken. To implement this, create two functions: kvm_lock_all_vcpus() and kvm_trylock_all_vcpus() Both functions are needed because some code that will be replaced in the subsequent patches, uses mutex_trylock, instead of regular mutex_lock. Suggested-by: Paolo Bonzini Signed-off-by: Maxim Levitsky Acked-by: Marc Zyngier Acked-by: Peter Zijlstra (Intel) Message-ID: <20250512180407.659015-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1dedc421b3e3..a6140415c693 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1015,6 +1015,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) void kvm_destroy_vcpus(struct kvm *kvm); +int kvm_trylock_all_vcpus(struct kvm *kvm); +int kvm_lock_all_vcpus(struct kvm *kvm); +void kvm_unlock_all_vcpus(struct kvm *kvm); + void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 89f9dba365e1b85caef556d28654c6d336629eef Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Thu, 22 May 2025 23:04:25 +0000 Subject: dma-buf: Rename debugfs symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the debugfs list and mutex so it's clear they are now usable without the need for CONFIG_DEBUG_FS. The list will always be populated to support the creation of a BPF iterator for dmabufs. Signed-off-by: T.J. Mercier Reviewed-by: Christian König Acked-by: Song Liu Link: https://lore.kernel.org/r/20250522230429.941193-2-tjmercier@google.com Signed-off-by: Alexei Starovoitov --- include/linux/dma-buf.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 36216d28d8bd..8ff4add71f88 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -370,10 +370,8 @@ struct dma_buf { */ struct module *owner; -#if IS_ENABLED(CONFIG_DEBUG_FS) /** @list_node: node for dma_buf accounting and debugging. */ struct list_head list_node; -#endif /** @priv: exporter specific private data for this buffer object. */ void *priv; -- cgit v1.2.3 From 76ea95534995adde1aa3cb1aa97ef33f50a617a9 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Thu, 22 May 2025 23:04:26 +0000 Subject: bpf: Add dmabuf iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dmabuf iterator traverses the list of all DMA buffers. DMA buffers are refcounted through their associated struct file. A reference is taken on each buffer as the list is iterated to ensure each buffer persists for the duration of the bpf program execution without holding the list mutex. Signed-off-by: T.J. Mercier Reviewed-by: Christian König Acked-by: Song Liu Link: https://lore.kernel.org/r/20250522230429.941193-3-tjmercier@google.com Signed-off-by: Alexei Starovoitov --- include/linux/dma-buf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 8ff4add71f88..7af2ea839f58 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -634,4 +634,6 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map); int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); +struct dma_buf *dma_buf_iter_begin(void); +struct dma_buf *dma_buf_iter_next(struct dma_buf *dmbuf); #endif /* __DMA_BUF_H__ */ -- cgit v1.2.3 From 7b2b67dbd449afd00fc7279b1ab7ffa3d26fe0c9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:54 -0600 Subject: Revert "Disable FOP_DONTCACHE for now due to bugs" This reverts commit 478ad02d6844217cc7568619aeb0809d93ade43d. Both the read and write side dirty && writeback races should be resolved now, revert the commit that disabled FOP_DONTCACHE for filesystems. Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/ Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-4-axboe@kernel.dk Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0db87f8e676c..57c3db3ef6ad 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2207,7 +2207,7 @@ struct file_operations { /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ -#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */ +#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, -- cgit v1.2.3 From e2d2115e56c4a02377189bfc3a9a7933552a7b0f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 23 May 2025 21:13:35 -0700 Subject: bpf: Do not include stack ptr register in precision backtracking bookkeeping Yi Lai reported an issue ([1]) where the following warning appears in kernel dmesg: [ 60.643604] verifier backtracking bug [ 60.643635] WARNING: CPU: 10 PID: 2315 at kernel/bpf/verifier.c:4302 __mark_chain_precision+0x3a6c/0x3e10 [ 60.648428] Modules linked in: bpf_testmod(OE) [ 60.650471] CPU: 10 UID: 0 PID: 2315 Comm: test_progs Tainted: G OE 6.15.0-rc4-gef11287f8289-dirty #327 PREEMPT(full) [ 60.654385] Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE [ 60.656682] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 60.660475] RIP: 0010:__mark_chain_precision+0x3a6c/0x3e10 [ 60.662814] Code: 5a 30 84 89 ea e8 c4 d9 01 00 80 3d 3e 7d d8 04 00 0f 85 60 fa ff ff c6 05 31 7d d8 04 01 48 c7 c7 00 58 30 84 e8 c4 06 a5 ff <0f> 0b e9 46 fa ff ff 48 ... [ 60.668720] RSP: 0018:ffff888116cc7298 EFLAGS: 00010246 [ 60.671075] RAX: 54d70e82dfd31900 RBX: ffff888115b65e20 RCX: 0000000000000000 [ 60.673659] RDX: 0000000000000001 RSI: 0000000000000004 RDI: 00000000ffffffff [ 60.676241] RBP: 0000000000000400 R08: ffff8881f6f23bd3 R09: 1ffff1103ede477a [ 60.678787] R10: dffffc0000000000 R11: ffffed103ede477b R12: ffff888115b60ae8 [ 60.681420] R13: 1ffff11022b6cbc4 R14: 00000000fffffff2 R15: 0000000000000001 [ 60.684030] FS: 00007fc2aedd80c0(0000) GS:ffff88826fa8a000(0000) knlGS:0000000000000000 [ 60.686837] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 60.689027] CR2: 000056325369e000 CR3: 000000011088b002 CR4: 0000000000370ef0 [ 60.691623] Call Trace: [ 60.692821] [ 60.693960] ? __pfx_verbose+0x10/0x10 [ 60.695656] ? __pfx_disasm_kfunc_name+0x10/0x10 [ 60.697495] check_cond_jmp_op+0x16f7/0x39b0 [ 60.699237] do_check+0x58fa/0xab10 ... Further analysis shows the warning is at line 4302 as below: 4294 /* static subprog call instruction, which 4295 * means that we are exiting current subprog, 4296 * so only r1-r5 could be still requested as 4297 * precise, r0 and r6-r10 or any stack slot in 4298 * the current frame should be zero by now 4299 */ 4300 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { 4301 verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); 4302 WARN_ONCE(1, "verifier backtracking bug"); 4303 return -EFAULT; 4304 } With the below test (also in the next patch): __used __naked static void __bpf_jmp_r10(void) { asm volatile ( "r2 = 2314885393468386424 ll;" "goto +0;" "if r2 <= r10 goto +3;" "if r1 >= -1835016 goto +0;" "if r2 <= 8 goto +0;" "if r3 <= 0 goto +0;" "exit;" ::: __clobber_all); } SEC("?raw_tp") __naked void bpf_jmp_r10(void) { asm volatile ( "r3 = 0 ll;" "call __bpf_jmp_r10;" "r0 = 0;" "exit;" ::: __clobber_all); } The following is the verifier failure log: 0: (18) r3 = 0x0 ; R3_w=0 2: (85) call pc+2 caller: R10=fp0 callee: frame1: R1=ctx() R3_w=0 R10=fp0 5: frame1: R1=ctx() R3_w=0 R10=fp0 ; asm volatile (" \ @ verifier_precision.c:184 5: (18) r2 = 0x20202000256c6c78 ; frame1: R2_w=0x20202000256c6c78 7: (05) goto pc+0 8: (bd) if r2 <= r10 goto pc+3 ; frame1: R2_w=0x20202000256c6c78 R10=fp0 9: (35) if r1 >= 0xffe3fff8 goto pc+0 ; frame1: R1=ctx() 10: (b5) if r2 <= 0x8 goto pc+0 mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1 mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0 mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3 mark_precise: frame1: regs=r2,r10 stack= before 7: (05) goto pc+0 mark_precise: frame1: regs=r2,r10 stack= before 5: (18) r2 = 0x20202000256c6c78 mark_precise: frame1: regs=r10 stack= before 2: (85) call pc+2 BUG regs 400 The main failure reason is due to r10 in precision backtracking bookkeeping. Actually r10 is always precise and there is no need to add it for the precision backtracking bookkeeping. One way to fix the issue is to prevent bt_set_reg() if any src/dst reg is r10. Andrii suggested to go with push_insn_history() approach to avoid explicitly checking r10 in backtrack_insn(). This patch added push_insn_history() support for cond_jmp like 'rX rY' operations. In check_cond_jmp_op(), if any of rX or rY is a stack pointer, push_insn_history() will record such information, and later backtrack_insn() will do bt_set_reg() properly for those register(s). [1] https://lore.kernel.org/bpf/Z%2F8q3xzpU59CIYQE@ly-workstation/ Reported by: Yi Lai Fixes: 407958a0e980 ("bpf: encapsulate precision backtracking bookkeeping") Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250524041335.4046126-1-yonghong.song@linux.dev --- include/linux/bpf_verifier.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 78c97e12ea4e..256274acb1d8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -356,7 +356,11 @@ enum { INSN_F_SPI_MASK = 0x3f, /* 6 bits */ INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ - INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ + INSN_F_STACK_ACCESS = BIT(9), + + INSN_F_DST_REG_STACK = BIT(10), /* dst_reg is PTR_TO_STACK */ + INSN_F_SRC_REG_STACK = BIT(11), /* src_reg is PTR_TO_STACK */ + /* total 12 bits are used now. */ }; static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); @@ -365,9 +369,9 @@ static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); struct bpf_insn_hist_entry { u32 idx; /* insn idx can't be bigger than 1 million */ - u32 prev_idx : 22; - /* special flags, e.g., whether insn is doing register stack spill/load */ - u32 flags : 10; + u32 prev_idx : 20; + /* special INSN_F_xxx flags */ + u32 flags : 12; /* additional registers that need precision tracking when this * jump is backtracked, vector of six 10-bit records */ -- cgit v1.2.3 From e9cb929670a1e98b592b30f03f06e9e20110f318 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 May 2025 13:21:47 +0200 Subject: net: phy: fix up const issues in to_mdio_device() and to_phy_device() Both to_mdio_device() and to_phy_device() "throw away" the const pointer attribute passed to them and return a non-const pointer, which generally is not a good thing overall. Fix this up by using container_of_const() which was designed for this very problem. Cc: Alexander Lobakin Cc: Andrew Lunn Cc: Heiner Kallweit Cc: Russell King Fixes: 7eab14de73a8 ("mdio, phy: fix -Wshadow warnings triggered by nested container_of()") Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2025052246-conduit-glory-8fc9@gregkh Signed-off-by: Jakub Kicinski --- include/linux/mdio.h | 5 +---- include/linux/phy.h | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 3c3deac57894..e43ff9f980a4 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -45,10 +45,7 @@ struct mdio_device { unsigned int reset_deassert_delay; }; -static inline struct mdio_device *to_mdio_device(const struct device *dev) -{ - return container_of(dev, struct mdio_device, dev); -} +#define to_mdio_device(__dev) container_of_const(__dev, struct mdio_device, dev) /* struct mdio_driver_common: Common to all MDIO drivers */ struct mdio_driver_common { diff --git a/include/linux/phy.h b/include/linux/phy.h index 32b9da274115..e194dad1623d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -744,10 +744,7 @@ struct phy_device { #define PHY_F_NO_IRQ 0x80000000 #define PHY_F_RXC_ALWAYS_ON 0x40000000 -static inline struct phy_device *to_phy_device(const struct device *dev) -{ - return container_of(to_mdio_device(dev), struct phy_device, mdio); -} +#define to_phy_device(__dev) container_of_const(to_mdio_device(__dev), struct phy_device, mdio) /** * struct phy_tdr_config - Configuration of a TDR raw test -- cgit v1.2.3 From 33f1b3677a13dda60a2a59858f7916672e7f1546 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 May 2025 07:47:45 +0200 Subject: sctp: mark sctp_do_peeloff static sctp_do_peeloff is only used inside of net/sctp/socket.c, so mark it static. Signed-off-by: Christoph Hellwig Acked-by: Xin Long Link: https://patch.msgid.link/20250526054745.2329201-1-hch@lst.de Signed-off-by: Jakub Kicinski --- include/net/sctp/sctp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index d8da764cf6de..e96d1bd087f6 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -364,8 +364,6 @@ sctp_assoc_to_state(const struct sctp_association *asoc) /* Look up the association by its id. */ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id); -int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp); - /* A macro to walk a list of skbs. */ #define sctp_skb_for_each(pos, head, tmp) \ skb_queue_walk_safe(head, pos, tmp) -- cgit v1.2.3 From 81edb1ba3232afd45ae7f3f492a91019571b18c9 Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Mon, 5 May 2025 11:22:42 -0700 Subject: mm/hugetlb: refactor unmap_hugepage_range() to take folio instead of page The function unmap_hugepage_range() has two kinds of users: 1) unmap_ref_private(), which passes in the head page of a folio. Since unmap_ref_private() already takes folio and there are no other uses of the folio struct in the function, it is natural for unmap_hugepage_range() to take folio also. 2) All other uses, which pass in NULL pointer. In both cases, we can pass in folio. Refactor unmap_hugepage_range() to take folio. Link: https://lkml.kernel.org/r/20250505182345.506888-4-nifan.cxl@gmail.com Signed-off-by: Fan Ni Reviewed-by: Muchun Song Reviewed-by: Sidhartha Kumar Reviewed-by: Oscar Salvador Acked-by: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: "Vishal Moola (Oracle)" Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 23ebf49c5d6a..f6d5f24e793c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -129,8 +129,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *, struct vm_area_struct *); void unmap_hugepage_range(struct vm_area_struct *, - unsigned long, unsigned long, struct page *, - zap_flags_t); + unsigned long start, unsigned long end, + struct folio *, zap_flags_t); void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, -- cgit v1.2.3 From 7f4b6065d9a842721a04632fc219aa453d1b2f5c Mon Sep 17 00:00:00 2001 From: Fan Ni Date: Mon, 5 May 2025 11:22:43 -0700 Subject: mm/hugetlb: refactor __unmap_hugepage_range() to take folio instead of page The function __unmap_hugepage_range() has two kinds of users: 1) unmap_hugepage_range(), which passes in the head page of a folio. Since unmap_hugepage_range() already takes folio and there are no other uses of the folio struct in the function, it is natural for __unmap_hugepage_range() to take folio also. 2) All other uses, which pass in NULL pointer. In both cases, we can pass in folio. Refactor __unmap_hugepage_range() to take folio. Link: https://lkml.kernel.org/r/20250505182345.506888-5-nifan.cxl@gmail.com Signed-off-by: Fan Ni Acked-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Sidhartha Kumar Cc: "Vishal Moola (Oracle)" Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f6d5f24e793c..eb21619206af 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -134,7 +134,7 @@ void unmap_hugepage_range(struct vm_area_struct *, void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, - struct page *ref_page, zap_flags_t zap_flags); + struct folio *, zap_flags_t zap_flags); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(char *buf, int len, int nid); void hugetlb_show_meminfo_node(int nid); @@ -455,7 +455,7 @@ static inline long hugetlb_change_protection( static inline void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, - unsigned long end, struct page *ref_page, + unsigned long end, struct folio *folio, zap_flags_t zap_flags) { BUG(); -- cgit v1.2.3 From 375700bab5b150e876e42d894a9a7470881f8a61 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 May 2025 13:13:11 -0600 Subject: llist: make llist_add_batch() a static inline The function is small enough that it should be, and it's a (very) hot path for io_uring. Doing this actually reduces my vmlinux text size for my standard build/test box. Before: axboe@r7625 ~/g/linux (test)> size vmlinux text data bss dec hex filename 19892174 5938310 2470432 28300916 1afd674 vmlinux After: axboe@r7625 ~/g/linux (test)> size vmlinux text data bss dec hex filename 19891878 5938310 2470436 28300624 1afd550 vmlinux Link: https://lkml.kernel.org/r/f1d104c6-7ac8-457a-a53d-6bb741421b2f@kernel.dk Signed-off-by: Jens Axboe Signed-off-by: Andrew Morton --- include/linux/llist.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/llist.h b/include/linux/llist.h index 2c982ff7475a..27b17f64bcee 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -223,9 +223,26 @@ static inline struct llist_node *llist_next(struct llist_node *node) return node->next; } -extern bool llist_add_batch(struct llist_node *new_first, - struct llist_node *new_last, - struct llist_head *head); +/** + * llist_add_batch - add several linked entries in batch + * @new_first: first entry in batch to be added + * @new_last: last entry in batch to be added + * @head: the head for your lock-less list + * + * Return whether list is empty before adding. + */ +static inline bool llist_add_batch(struct llist_node *new_first, + struct llist_node *new_last, + struct llist_head *head) +{ + struct llist_node *first = READ_ONCE(head->first); + + do { + new_last->next = first; + } while (!try_cmpxchg(&head->first, &first, new_first)); + + return !first; +} static inline bool __llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, -- cgit v1.2.3 From 290e5d3c49f687c1567bde634dc33d57b0674919 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 19 May 2025 09:20:36 -0700 Subject: net: mana: Add support for Multi Vports on Bare metal To support Multi Vports on Bare metal, increase the device config response version. And, skip the register HW vport, and register filter steps, when the Bare metal hostmode is set. Signed-off-by: Haiyang Zhang Link: https://patch.msgid.link/1747671636-5810-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Paolo Abeni --- include/net/mana/mana.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 0f78065de8fe..38238c1d00bf 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -408,6 +408,7 @@ struct mana_context { struct gdma_dev *gdma_dev; u16 num_ports; + u8 bm_hostmode; struct mana_eq *eqs; struct dentry *mana_eqs_debugfs; @@ -557,7 +558,8 @@ struct mana_query_device_cfg_resp { u64 pf_cap_flags4; u16 max_num_vports; - u16 reserved; + u8 bm_hostmode; /* response v3: Bare Metal Host Mode */ + u8 reserved; u32 max_num_eqs; /* response v2: */ -- cgit v1.2.3 From 206cc44588f72b49ad4d7e21a7472ab2a72a83df Mon Sep 17 00:00:00 2001 From: Sami Uddin Date: Mon, 12 May 2025 07:51:53 +0930 Subject: virtio: reject shm region if length is zero Prevent usage of shared memory regions where the length is zero, as such configurations are not valid and may lead to unexpected behavior. Signed-off-by: Sami Uddin Message-Id: <20250511222153.2332-1-sami.md.ko@gmail.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 169c7d367fac..b3e1d30c765b 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -329,6 +329,8 @@ static inline bool virtio_get_shm_region(struct virtio_device *vdev, struct virtio_shm_region *region, u8 id) { + if (!region->len) + return false; if (!vdev->config->get_shm_region) return false; return vdev->config->get_shm_region(vdev, region, id); -- cgit v1.2.3 From 34ecde3c56066ba79e5ec3d93c5b14ea83e3603e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 17:01:31 -0600 Subject: iomap: don't lose folio dropbehind state for overwrites DONTCACHE I/O must have the completion punted to a workqueue, just like what is done for unwritten extents, as the completion needs task context to perform the invalidation of the folio(s). However, if writeback is started off filemap_fdatawrite_range() off generic_sync() and it's an overwrite, then the DONTCACHE marking gets lost as iomap_add_to_ioend() don't look at the folio being added and no further state is passed down to help it know that this is a dropbehind/DONTCACHE write. Check if the folio being added is marked as dropbehind, and set IOMAP_IOEND_DONTCACHE if that is the case. Then XFS can factor this into the decision making of completion context in xfs_submit_ioend(). Additionally include this ioend flag in the NOMERGE flags, to avoid mixing it with unrelated IO. Since this is the 3rd flag that will cause XFS to punt the completion to a workqueue, add a helper so that each one of them can get appropriately commented. This fixes extra page cache being instantiated when the write performed is an overwrite, rather than newly instantiated blocks. Fixes: b2cd5ae693a3 ("iomap: make buffered writes work with RWF_DONTCACHE") Signed-off-by: Jens Axboe Link: https://lore.kernel.org/5153f6e8-274d-4546-bf55-30a5018e0d03@kernel.dk Reviewed-by: Dave Chinner Signed-off-by: Christian Brauner --- include/linux/iomap.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 68416b135151..522644d62f30 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -377,13 +377,16 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, #define IOMAP_IOEND_BOUNDARY (1U << 2) /* is direct I/O */ #define IOMAP_IOEND_DIRECT (1U << 3) +/* is DONTCACHE I/O */ +#define IOMAP_IOEND_DONTCACHE (1U << 4) /* * Flags that if set on either ioend prevent the merge of two ioends. * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way) */ #define IOMAP_IOEND_NOMERGE_FLAGS \ - (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT) + (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT | \ + IOMAP_IOEND_DONTCACHE) /* * Structure for writeback I/O completions. -- cgit v1.2.3 From 793908d60b8745c386b9f4e29eb702f74ceb0886 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 24 Apr 2025 10:34:04 +0200 Subject: PCI: endpoint: Retain fixed-size BAR size as well as aligned size When allocating space for an endpoint function on a BAR with a fixed size, the size saved in 'struct pci_epf_bar.size' should be the fixed size as expected by pci_epc_set_bar(). However, if pci_epf_alloc_space() increased the allocation size to accommodate iATU alignment requirements, it previously saved the larger aligned size in .size, which broke pci_epc_set_bar(). To solve this, keep the fixed BAR size in .size and save the aligned size in a new .aligned_size for use when deallocating it. Fixes: 2a9a801620ef ("PCI: endpoint: Add support to specify alignment for buffers allocated to BARs") Signed-off-by: Jerome Brunet [mani: commit message fixup] Signed-off-by: Manivannan Sadhasivam [bhelgaas: more specific subject, commit log, wrap comment to match file] Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel Link: https://patch.msgid.link/20250424-pci-ep-size-alignment-v5-1-2d4ec2af23f5@baylibre.com --- include/linux/pci-epf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 879d19cebd4f..749cee0bcf2c 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -114,6 +114,8 @@ struct pci_epf_driver { * @phys_addr: physical address that should be mapped to the BAR * @addr: virtual address corresponding to the @phys_addr * @size: the size of the address space present in BAR + * @aligned_size: the size actually allocated to accommodate the iATU alignment + * requirement * @barno: BAR number * @flags: flags that are set for the BAR */ @@ -121,6 +123,7 @@ struct pci_epf_bar { dma_addr_t phys_addr; void *addr; size_t size; + size_t aligned_size; enum pci_barno barno; int flags; }; -- cgit v1.2.3 From d2e1d783f2c619cf9d8242b67a0ab0d2915f2920 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 11 Apr 2025 15:47:32 -0400 Subject: NFS: Add support for fallocate(FALLOC_FL_ZERO_RANGE) This implements a suggestion from Trond that we can mimic FALLOC_FL_ZERO_RANGE by sending a compound that first does a DEALLOCATE to punch a hole in a file, and then an ALLOCATE to fill the hole with zeroes. There might technically be a race here, but once the DEALLOCATE finishes any reads from the region would return zeroes anyway, so I don't expect it to cause problems. Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 1 + include/linux/nfs_fs_sb.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index d8cad844870a..07635f87a3fd 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -678,6 +678,7 @@ enum { NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, + NFSPROC4_CLNT_ZERO_RANGE, NFSPROC4_CLNT_LAYOUTSTATS, NFSPROC4_CLNT_CLONE, NFSPROC4_CLNT_COPY, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e02f4c4e9cc4..63141320c2a8 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -304,6 +304,7 @@ struct nfs_server { #define NFS_CAP_CASE_PRESERVING (1U << 7) #define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8) #define NFS_CAP_OFFLOAD_STATUS (1U << 9) +#define NFS_CAP_ZERO_RANGE (1U << 10) #define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) -- cgit v1.2.3 From 77be29b7a3f896bd96808ba6781481a1f6afa66c Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 1 May 2025 08:29:42 -0400 Subject: NFSv4: Allow FREE_STATEID to clean up delegations The NFS client's list of delegations can grow quite large (well beyond the delegation watermark) if the server is revoking or there are repeated events that expire state. Once this happens, the revoked delegations can cause a performance problem for subsequent walks of the servers->delegations list when the client tries to test and free state. If we can determine that the FREE_STATEID operation has completed without error, we can prune the delegation from the list. Since the NFS client combines TEST_STATEID with FREE_STATEID in its minor version operations, there isn't an easy way to communicate success of FREE_STATEID. Rather than re-arrange quite a number of calling paths to break out the separate procedures, let's signal the success of FREE_STATEID by setting the stateid's type. Set NFS4_FREED_STATEID_TYPE for stateids that have been successfully discarded from the server, and use that type to signal that the delegation can be cleaned up. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 07635f87a3fd..e947af6a3684 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -72,6 +72,7 @@ struct nfs4_stateid_struct { NFS4_LAYOUT_STATEID_TYPE, NFS4_PNFS_DS_STATEID_TYPE, NFS4_REVOKED_STATEID_TYPE, + NFS4_FREED_STATEID_TYPE, } type; }; -- cgit v1.2.3 From 77e82fb2c6c27c122e785f543ae0062f7783c886 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 May 2025 10:46:39 +1000 Subject: nfs_localio: always hold nfsd net ref with nfsd_file ref Having separate nfsd_file_put and nfsd_file_put_local in struct nfsd_localio_operations doesn't make much sense. The difference is that nfsd_file_put doesn't drop a reference to the nfs_net which is what keeps nfsd from shutting down. Currently, if nfsd tries to shutdown it will invalidate the files stored in the list from the nfs_uuid and this will drop all references to the nfsd net that the client holds. But the client could still hold some references to nfsd_files for active IO. So nfsd might think is has completely shut down local IO, but hasn't and has no way to wait for those active IO requests to complete. So this patch changes nfsd_file_get to nfsd_file_get_local and has it increase the ref count on the nfsd net and it replaces all calls to ->nfsd_put_file to ->nfsd_put_file_local. It also changes ->nfsd_open_local_fh to return with the refcount on the net elevated precisely when a valid nfsd_file is returned. This means that whenever the client holds a valid nfsd_file, there will be an associated count on the nfsd net, and so the count can only reach zero when all nfsd_files have been returned. nfs_local_file_put() is changed to call nfs_to_nfsd_file_put_local() instead of replacing calls to one with calls to the other because this will help a later patch which changes nfs_to_nfsd_file_put_local() to take an __rcu pointer while nfs_local_file_put() doesn't. Fixes: 86e00412254a ("nfs: cache all open LOCALIO nfsd_file(s) in client") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- include/linux/nfslocalio.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 9aa8a43843d7..c3f34bae60e1 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -66,8 +66,7 @@ struct nfsd_localio_operations { const struct nfs_fh *, const fmode_t); struct net *(*nfsd_file_put_local)(struct nfsd_file *); - struct nfsd_file *(*nfsd_file_get)(struct nfsd_file *); - void (*nfsd_file_put)(struct nfsd_file *); + struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; -- cgit v1.2.3 From c25a89770d1f216dcedfc2d25d56b604f62ce0bd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 May 2025 10:46:43 +1000 Subject: nfs_localio: change nfsd_file_put_local() to take a pointer to __rcu pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of calling xchg() and unrcu_pointer() before nfsd_file_put_local(), we now pass pointer to the __rcu pointer and call xchg() and unrcu_pointer() inside that function. Where unrcu_pointer() is currently called the internals of "struct nfsd_file" are not known and that causes older compilers such as gcc-8 to complain. In some cases we have a __kernel (aka normal) pointer not an __rcu pointer so we need to cast it to __rcu first. This is strictly a weakening so no information is lost. Somewhat surprisingly, this cast is accepted by gcc-8. This has the pleasing result that the cmpxchg() which sets ro_file and rw_file, and also the xchg() which clears them, are both now in the nfsd code. Reported-by: Pali Rohár Reported-by: Vincent Mailhol Fixes: 86e00412254a ("nfs: cache all open LOCALIO nfsd_file(s) in client") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- include/linux/nfslocalio.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index c3f34bae60e1..5c7c92659e73 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -50,10 +50,6 @@ void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, spinlock_t *nn_local_clients_lock); /* localio needs to map filehandle -> struct nfsd_file */ -extern struct nfsd_file * -nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *, - const struct cred *, const struct nfs_fh *, - const fmode_t) __must_hold(rcu); void nfs_close_local_fh(struct nfs_file_localio *); struct nfsd_localio_operations { @@ -64,8 +60,9 @@ struct nfsd_localio_operations { struct rpc_clnt *, const struct cred *, const struct nfs_fh *, + struct nfsd_file __rcu **pnf, const fmode_t); - struct net *(*nfsd_file_put_local)(struct nfsd_file *); + struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **); struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; @@ -76,6 +73,7 @@ extern const struct nfsd_localio_operations *nfs_to; struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *, struct rpc_clnt *, const struct cred *, const struct nfs_fh *, struct nfs_file_localio *, + struct nfsd_file __rcu **pnf, const fmode_t); static inline void nfs_to_nfsd_net_put(struct net *net) @@ -90,16 +88,19 @@ static inline void nfs_to_nfsd_net_put(struct net *net) rcu_read_unlock(); } -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file __rcu **localio) { /* - * Must not hold RCU otherwise nfsd_file_put() can easily trigger: - * "Voluntary context switch within RCU read-side critical section!" - * by scheduling deep in underlying filesystem (e.g. XFS). + * Either *localio must be guaranteed to be non-NULL, or caller + * must prevent nfsd shutdown from completing as nfs_close_local_fh() + * does by blocking the nfs_uuid from being finally put. */ - struct net *net = nfs_to->nfsd_file_put_local(localio); + struct net *net; - nfs_to_nfsd_net_put(net); + net = nfs_to->nfsd_file_put_local(localio); + + if (net) + nfs_to_nfsd_net_put(net); } #else /* CONFIG_NFS_LOCALIO */ -- cgit v1.2.3 From f62da6e7270c2db5aef8a8b14f465896961a9372 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 14 May 2025 09:43:18 +0200 Subject: PCI: endpoint: Align pci_epc_set_msi(), pci_epc_ops::set_msi() nr_irqs encoding The kdoc for pci_epc_set_msi() says: "Invoke to set the required number of MSI interrupts." The kdoc for the callback pci_epc_ops::set_msi() says: "ops to set the requested number of MSI interrupts in the MSI capability register" pci_epc_ops::set_msi() does however expect the parameter 'interrupts' to be in the encoding as defined by the Multiple Message Capable (MMC) field of the MSI capability structure. Nowhere in the kdoc does it say that the number of interrupts should be in MMC encoding. It is very confusing that the API pci_epc_set_msi() and the callback function pci_epc_ops::set_msi() both take a parameter named 'interrupts', but they expect completely different encodings. Clean up the API and the callback function to have the same semantics, i.e. the parameter represents the number of interrupts, regardless of the internal encoding of that value. Also rename the parameter 'interrupts' to 'nr_irqs', in both the wrapper function and the callback function, such that the name is unambiguous. [bhelgaas: more specific subject] Signed-off-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Cc: stable+noautosel@kernel.org # this is simply a cleanup Link: https://patch.msgid.link/20250514074313.283156-13-cassel@kernel.org --- include/linux/pci-epc.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 82837008b56f..15d10c07c9f1 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -100,7 +100,7 @@ struct pci_epc_ops { void (*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t addr); int (*set_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - u8 interrupts); + u8 nr_irqs); int (*get_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no); int (*set_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u16 interrupts, enum pci_barno, u32 offset); @@ -286,8 +286,7 @@ int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u64 pci_addr, size_t size); void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t phys_addr); -int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - u8 interrupts); +int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u8 nr_irqs); int pci_epc_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no); int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u16 interrupts, enum pci_barno, u32 offset); -- cgit v1.2.3 From de0321bcc5fdd83631f0c2a6fdebfe0ad4e23449 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 14 May 2025 09:43:19 +0200 Subject: PCI: endpoint: Align pci_epc_set_msix(), pci_epc_ops::set_msix() nr_irqs encoding The kdoc for pci_epc_set_msix() says: "Invoke to set the required number of MSI-X interrupts." The kdoc for the callback pci_epc_ops->set_msix() says: "ops to set the requested number of MSI-X interrupts in the MSI-X capability register" pci_epc_ops::set_msix() does however expect the parameter 'interrupts' to be in the encoding as defined by the Table Size field. Nowhere in the kdoc does it say that the number of interrupts should be in Table Size encoding. It is very confusing that the API pci_epc_set_msix() and the callback function pci_epc_ops::set_msix() both take a parameter named 'interrupts', but they expect completely different encodings. Clean up the API and the callback function to have the same semantics, i.e. the parameter represents the number of interrupts, regardless of the internal encoding of that value. Also rename the parameter 'interrupts' to 'nr_irqs', in both the wrapper function and the callback function, such that the name is unambiguous. [bhelgaas: more specific subject] Signed-off-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Cc: stable+noautosel@kernel.org # this is simply a cleanup Link: https://patch.msgid.link/20250514074313.283156-14-cassel@kernel.org --- include/linux/pci-epc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 15d10c07c9f1..4286bfdbfdfa 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -103,7 +103,7 @@ struct pci_epc_ops { u8 nr_irqs); int (*get_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no); int (*set_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - u16 interrupts, enum pci_barno, u32 offset); + u16 nr_irqs, enum pci_barno, u32 offset); int (*get_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no); int (*raise_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, unsigned int type, u16 interrupt_num); @@ -288,8 +288,8 @@ void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t phys_addr); int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u8 nr_irqs); int pci_epc_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no); -int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - u16 interrupts, enum pci_barno, u32 offset); +int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u16 nr_irqs, + enum pci_barno, u32 offset); int pci_epc_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no); int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t phys_addr, u8 interrupt_num, -- cgit v1.2.3 From c31f91c6af96a5eb0632f4aee8d4e39cad7d7559 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 16 Apr 2025 10:53:58 +0200 Subject: fuse: don't allow signals to interrupt getdents copying When getting the directory contents, the entries are first fetched to a kernel buffer, then they are copied to userspace with dir_emit(). This second phase is non-blocking as long as the userspace buffer is not paged out, making it interruptible makes zero sense. Overload d_type as flags, since it only uses 4 bits from 32. Reviewed-by: Bernd Schubert Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..0f2a1a572e3a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2073,6 +2073,9 @@ struct dir_context { loff_t pos; }; +/* If OR-ed with d_type, pending signals are not checked */ +#define FILLDIR_FLAG_NOINTR 0x1000 + /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. -- cgit v1.2.3 From 467e245d47e6662a4cbf4184d9e6d0b1b120c0bf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 16 Apr 2025 12:44:55 +0200 Subject: readdir: supply dir_context.count as readdir buffer size hint This is a preparation for large readdir buffers in fuse. Simply setting the fuse buffer size to the userspace buffer size should work, the record sizes are similar (fuse's is slightly larger than libc's, so no overflow should ever happen). Signed-off-by: Miklos Szeredi Signed-off-by: Jaco Kroon --- include/linux/fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0f2a1a572e3a..dfc5a3327124 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2071,6 +2071,13 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, struct dir_context { filldir_t actor; loff_t pos; + /* + * Filesystems MUST NOT MODIFY count, but may use as a hint: + * 0 unknown + * > 0 space in buffer (assume at least one entry) + * INT_MAX unlimited + */ + int count; }; /* If OR-ed with d_type, pending signals are not checked */ -- cgit v1.2.3 From 5402c4d4d2000a9baa30c1157c97152ec6383733 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 25 May 2025 12:47:31 +0200 Subject: exportfs: require ->fh_to_parent() to encode connectable file handles When user requests a connectable file handle explicitly with the AT_HANDLE_CONNECTABLE flag, fail the request if filesystem (e.g. nfs) does not know how to decode a connected non-dir dentry. Fixes: c374196b2b9f ("fs: name_to_handle_at() support for "explicit connectable" file handles") Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/20250525104731.1461704-1-amir73il@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/exportfs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fc93f0abf513..25c4a5afbd44 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -314,6 +314,9 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop) static inline bool exportfs_can_encode_fh(const struct export_operations *nop, int fh_flags) { + if (!nop) + return false; + /* * If a non-decodeable file handle was requested, we only need to make * sure that filesystem did not opt-out of encoding fid. @@ -321,6 +324,13 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop, if (fh_flags & EXPORT_FH_FID) return exportfs_can_encode_fid(nop); + /* + * If a connectable file handle was requested, we need to make sure that + * filesystem can also decode connected file handles. + */ + if ((fh_flags & EXPORT_FH_CONNECTABLE) && !nop->fh_to_parent) + return false; + /* * If a decodeable file handle was requested, we need to make sure that * filesystem can also decode file handles. -- cgit v1.2.3 From 05b9405f2fa1848e984f231708fa1e5d385e4d27 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 23 May 2025 12:47:20 -0700 Subject: KVM: arm64: Resolve vLPI by host IRQ in vgic_v4_unset_forwarding() The virtual mapping and "GSI" routing of a particular vLPI is subject to change in response to the guest / userspace. This can be pretty annoying to deal with when KVM needs to track the physical state that's managed for vLPI direct injection. Make vgic_v4_unset_forwarding() resilient by using the host IRQ to resolve the vgic IRQ. Since this uses the LPI xarray directly, finding the ITS by doorbell address + grabbing it's its_lock is no longer necessary. Note that matching the right ITS / ITE is already handled in vgic_v4_set_forwarding(), and unless there's a bug in KVM's VGIC ITS emulation the virtual mapping that should remain stable for the lifetime of the vLPI mapping. Tested-by: Sweet Tea Dorminy Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20250523194722.4066715-4-oliver.upton@linux.dev Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 714cef854c1c..4a34f7f0a864 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -434,8 +434,7 @@ struct kvm_kernel_irq_routing_entry; int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq, struct kvm_kernel_irq_routing_entry *irq_entry); -int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, - struct kvm_kernel_irq_routing_entry *irq_entry); +int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq); int vgic_v4_load(struct kvm_vcpu *vcpu); void vgic_v4_commit(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From b9802b54d41bbe98f673e08bc148b0c563fdc02e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 30 May 2025 12:16:58 +0800 Subject: asm-generic: Add sched.h inclusion in simd.h Commit 7ba8df47810f073 ("asm-generic: Make simd.h more resilient") causes a build error for PREEMPT_RT kernels: CC lib/crypto/sha256.o In file included from ./include/asm-generic/simd.h:6, from ./arch/loongarch/include/generated/asm/simd.h:1, from ./include/crypto/internal/simd.h:9, from ./include/crypto/internal/sha2.h:6, from lib/crypto/sha256.c:15: ./include/asm-generic/simd.h: In function 'may_use_simd': ./include/linux/preempt.h:111:34: error: 'current' undeclared (first use in this function) 111 | # define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) | ^~~~~~~ ./include/linux/preempt.h:112:82: note: in expansion of macro 'softirq_count' 112 | # define irq_count() ((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | softirq_count()) | ^~~~~~~~~~~~~ ./include/linux/preempt.h:143:34: note: in expansion of macro 'irq_count' 143 | #define in_interrupt() (irq_count()) | ^~~~~~~~~ ./include/asm-generic/simd.h:18:17: note: in expansion of macro 'in_interrupt' 18 | return !in_interrupt(); | ^~~~~~~~~~~~ So add sched.h inclusion in simd.h to fix it. Fixes: 7ba8df47810f073 ("asm-generic: Make simd.h more resilient") Signed-off-by: Huacai Chen Signed-off-by: Herbert Xu --- include/asm-generic/simd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h index ac29a22eb7cf..70c8716ad32a 100644 --- a/include/asm-generic/simd.h +++ b/include/asm-generic/simd.h @@ -4,6 +4,7 @@ #include #include +#include #include /* -- cgit v1.2.3 From d1c696dba120624256ab335ab8247f535b872309 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 8 May 2025 12:40:32 +0530 Subject: PCI: host-common: Convert to library for host controller drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This common library will be used as a placeholder for helper functions shared by the host controller drivers. This avoids placing the host controller drivers specific helpers in drivers/pci/*.c, to avoid enlarging the kernel image on platforms that do not use host controller drivers at all (like x86/ACPI platforms). Suggested-by: Lukas Wunner Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250508-pcie-reset-slot-v4-3-7050093e2b50@linaro.org --- include/linux/pci-ecam.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 3a10f8cfc3ad..d930651473b4 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -93,10 +93,4 @@ extern const struct pci_ecam_ops al_pcie_ops; /* Amazon Annapurna Labs PCIe */ extern const struct pci_ecam_ops tegra194_pcie_ops; /* Tegra194 PCIe */ extern const struct pci_ecam_ops loongson_pci_ecam_ops; /* Loongson PCIe */ #endif - -#if IS_ENABLED(CONFIG_PCI_HOST_COMMON) -/* for DT-based PCI controllers that support ECAM */ -int pci_host_common_probe(struct platform_device *pdev); -void pci_host_common_remove(struct platform_device *pdev); -#endif #endif -- cgit v1.2.3 From 6043b794c7668c19dabc4a93c75b924a19474d59 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 29 May 2025 12:28:05 +0200 Subject: net: Fix checksum update for ILA adj-transport During ILA address translations, the L4 checksums can be handled in different ways. One of them, adj-transport, consist in parsing the transport layer and updating any found checksum. This logic relies on inet_proto_csum_replace_by_diff and produces an incorrect skb->csum when in state CHECKSUM_COMPLETE. This bug can be reproduced with a simple ILA to SIR mapping, assuming packets are received with CHECKSUM_COMPLETE: $ ip a show dev eth0 14: eth0@if15: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 62:ae:35:9e:0f:8d brd ff:ff:ff:ff:ff:ff link-netnsid 0 inet6 3333:0:0:1::c078/64 scope global valid_lft forever preferred_lft forever inet6 fd00:10:244:1::c078/128 scope global nodad valid_lft forever preferred_lft forever inet6 fe80::60ae:35ff:fe9e:f8d/64 scope link proto kernel_ll valid_lft forever preferred_lft forever $ ip ila add loc_match fd00:10:244:1 loc 3333:0:0:1 \ csum-mode adj-transport ident-type luid dev eth0 Then I hit [fd00:10:244:1::c078]:8000 with a server listening only on [3333:0:0:1::c078]:8000. With the bug, the SYN packet is dropped with SKB_DROP_REASON_TCP_CSUM after inet_proto_csum_replace_by_diff changed skb->csum. The translation and drop are visible on pwru [1] traces: IFACE TUPLE FUNC eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ipv6_rcv eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ip6_rcv_core eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) nf_hook_slow eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) inet_proto_csum_replace_by_diff eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_early_demux eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_route_input eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input_finish eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_protocol_deliver_rcu eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) raw6_local_deliver eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ipv6_raw_deliver eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_rcv eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) __skb_checksum_complete eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skb_reason(SKB_DROP_REASON_TCP_CSUM) eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_head_state eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_data eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_free_head eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skbmem This is happening because inet_proto_csum_replace_by_diff is updating skb->csum when it shouldn't. The L4 checksum is updated such that it "cancels" the IPv6 address change in terms of checksum computation, so the impact on skb->csum is null. Note this would be different for an IPv4 packet since three fields would be updated: the IPv4 address, the IP checksum, and the L4 checksum. Two would cancel each other and skb->csum would still need to be updated to take the L4 checksum change into account. This patch fixes it by passing an ipv6 flag to inet_proto_csum_replace_by_diff, to skip the skb->csum update if we're in the IPv6 case. Note the behavior of the only other user of inet_proto_csum_replace_by_diff, the BPF subsystem, is left as is in this patch and fixed in the subsequent patch. With the fix, using the reproduction from above, I can confirm skb->csum is not touched by inet_proto_csum_replace_by_diff and the TCP SYN proceeds to the application after the ILA translation. Link: https://github.com/cilium/pwru [1] Fixes: 65d7ab8de582 ("net: Identifier Locator Addressing module") Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Link: https://patch.msgid.link/b5539869e3550d46068504feb02d37653d939c0b.1748509484.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- include/net/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/checksum.h b/include/net/checksum.h index e57986b173f8..3cbab35de5ab 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -152,7 +152,7 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, - __wsum diff, bool pseudohdr); + __wsum diff, bool pseudohdr, bool ipv6); static __always_inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, -- cgit v1.2.3 From ead7f9b8de65632ef8060b84b0c55049a33cfea1 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 29 May 2025 12:28:35 +0200 Subject: bpf: Fix L4 csum update on IPv6 in CHECKSUM_COMPLETE In Cilium, we use bpf_csum_diff + bpf_l4_csum_replace to, among other things, update the L4 checksum after reverse SNATing IPv6 packets. That use case is however not currently supported and leads to invalid skb->csum values in some cases. This patch adds support for IPv6 address changes in bpf_l4_csum_update via a new flag. When calling bpf_l4_csum_replace in Cilium, it ends up calling inet_proto_csum_replace_by_diff: 1: void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, 2: __wsum diff, bool pseudohdr) 3: { 4: if (skb->ip_summed != CHECKSUM_PARTIAL) { 5: csum_replace_by_diff(sum, diff); 6: if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) 7: skb->csum = ~csum_sub(diff, skb->csum); 8: } else if (pseudohdr) { 9: *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); 10: } 11: } The bug happens when we're in the CHECKSUM_COMPLETE state. We've just updated one of the IPv6 addresses. The helper now updates the L4 header checksum on line 5. Next, it updates skb->csum on line 7. It shouldn't. For an IPv6 packet, the updates of the IPv6 address and of the L4 checksum will cancel each other. The checksums are set such that computing a checksum over the packet including its checksum will result in a sum of 0. So the same is true here when we update the L4 checksum on line 5. We'll update it as to cancel the previous IPv6 address update. Hence skb->csum should remain untouched in this case. The same bug doesn't affect IPv4 packets because, in that case, three fields are updated: the IPv4 address, the IP checksum, and the L4 checksum. The change to the IPv4 address and one of the checksums still cancel each other in skb->csum, but we're left with one checksum update and should therefore update skb->csum accordingly. That's exactly what inet_proto_csum_replace_by_diff does. This special case for IPv6 L4 checksums is also described atop inet_proto_csum_replace16, the function we should be using in this case. This patch introduces a new bpf_l4_csum_replace flag, BPF_F_IPV6, to indicate that we're updating the L4 checksum of an IPv6 packet. When the flag is set, inet_proto_csum_replace_by_diff will skip the skb->csum update. Fixes: 7d672345ed295 ("bpf: add generic bpf_csum_diff helper") Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Link: https://patch.msgid.link/96a6bc3a443e6f0b21ff7b7834000e17fb549e05.1748509484.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 85180e4aaa5a..0b4a2f124d11 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2056,6 +2056,7 @@ union bpf_attr { * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates * that the modified header field is part of the pseudo-header. + * Flag **BPF_F_IPV6** should be set for IPv6 packets. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -6072,6 +6073,7 @@ enum { BPF_F_PSEUDO_HDR = (1ULL << 4), BPF_F_MARK_MANGLED_0 = (1ULL << 5), BPF_F_MARK_ENFORCE = (1ULL << 6), + BPF_F_IPV6 = (1ULL << 7), }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -- cgit v1.2.3 From c9b26429c8c7aa2836322aeab38be4254d166e02 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 24 Apr 2025 14:56:52 +0300 Subject: fbdev: atyfb: Remove unused PCI vendor ID The custom definition of PCI vendor ID in video/mach64.h is unused. Remove it. Note, that the proper one is available in pci_ids.h. Signed-off-by: Andy Shevchenko Signed-off-by: Helge Deller --- include/video/mach64.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/video/mach64.h b/include/video/mach64.h index d96e3c189634..f1709f7c8421 100644 --- a/include/video/mach64.h +++ b/include/video/mach64.h @@ -934,9 +934,6 @@ #define MEM_BNDRY_EN 0x00040000 #define ONE_MB 0x100000 -/* ATI PCI constants */ -#define PCI_ATI_VENDOR_ID 0x1002 - /* CNFG_CHIP_ID register constants */ #define CFG_CHIP_TYPE 0x0000FFFF -- cgit v1.2.3 From ab03a61c66149327e022bdafa5843c6f82be267e Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Thu, 29 May 2025 17:47:10 -0600 Subject: ublk: have a per-io daemon instead of a per-queue daemon Currently, ublk_drv associates to each hardware queue (hctx) a unique task (called the queue's ubq_daemon) which is allowed to issue COMMIT_AND_FETCH commands against the hctx. If any other task attempts to do so, the command fails immediately with EINVAL. When considered together with the block layer architecture, the result is that for each CPU C on the system, there is a unique ublk server thread which is allowed to handle I/O submitted on CPU C. This can lead to suboptimal performance under imbalanced load generation. For an extreme example, suppose all the load is generated on CPUs mapping to a single ublk server thread. Then that thread may be fully utilized and become the bottleneck in the system, while other ublk server threads are totally idle. This issue can also be addressed directly in the ublk server without kernel support by having threads dequeue I/Os and pass them around to ensure even load. But this solution requires inter-thread communication at least twice for each I/O (submission and completion), which is generally a bad pattern for performance. The problem gets even worse with zero copy, as more inter-thread communication would be required to have the buffer register/unregister calls to come from the correct thread. Therefore, address this issue in ublk_drv by allowing each I/O to have its own daemon task. Two I/Os in the same queue are now allowed to be serviced by different daemon tasks - this was not possible before. Imbalanced load can then be balanced across all ublk server threads by having the ublk server threads issue FETCH_REQs in a round-robin manner. As a small toy example, consider a system with a single ublk device having 2 queues, each of depth 4. A ublk server having 4 threads could issue its FETCH_REQs against this device as follows (where each entry is the qid,tag pair that the FETCH_REQ targets): ublk server thread: T0 T1 T2 T3 0,0 0,1 0,2 0,3 1,3 1,0 1,1 1,2 This setup allows for load that is concentrated on one hctx/ublk_queue to be spread out across all ublk server threads, alleviating the issue described above. Add the new UBLK_F_PER_IO_DAEMON feature to ublk_drv, which ublk servers can use to essentially test for the presence of this change and tailor their behavior accordingly. Signed-off-by: Uday Shankar Reviewed-by: Caleb Sander Mateos Reviewed-by: Ming Lei Reviewed-by: Jens Axboe Link: https://lore.kernel.org/r/20250529-ublk_task_per_io-v8-1-e9d3b119336a@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 56c7e3fc666f..77d9d6af46da 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -272,6 +272,15 @@ */ #define UBLK_F_QUIESCE (1ULL << 12) +/* + * If this feature is set, ublk_drv supports each (qid,tag) pair having + * its own independent daemon task that is responsible for handling it. + * If it is not set, daemons are per-queue instead, so for two pairs + * (qid1,tag1) and (qid2,tag2), if qid1 == qid2, then the same task must + * be responsible for handling (qid1,tag1) and (qid2,tag2). + */ +#define UBLK_F_PER_IO_DAEMON (1ULL << 13) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From acc53a0b4c156877773da6e9eea4113dc7e770ae Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 14 May 2025 19:15:07 +0100 Subject: mm: rename page->index to page->__folio_index All users of page->index have been converted to not refer to it any more. Update a few pieces of documentation that were missed and prevent new users from appearing (or at least make them easy to grep for). Link: https://lkml.kernel.org/r/20250514181508.3019795-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- include/linux/mm_types.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index cd2e513189d6..5009c53ff1fe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1276,9 +1276,9 @@ vm_fault_t finish_fault(struct vm_fault *vmf); * the page's disk buffers. PG_private must be set to tell the VM to call * into the filesystem to release these pages. * - * A page may belong to an inode's memory mapping. In this case, page->mapping - * is the pointer to the inode, and page->index is the file offset of the page, - * in units of PAGE_SIZE. + * A folio may belong to an inode's memory mapping. In this case, + * folio->mapping points to the inode, and folio->index is the file + * offset of the folio, in units of PAGE_SIZE. * * If pagecache pages are not associated with an inode, they are said to be * anonymous pages. These may become associated with the swapcache, and in that diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3e934dc6057c..17e0dcb87aae 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -107,7 +107,7 @@ struct page { /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; union { - pgoff_t index; /* Our offset within mapping. */ + pgoff_t __folio_index; /* Our offset within mapping. */ unsigned long share; /* share count for fsdax */ }; /** @@ -488,7 +488,7 @@ FOLIO_MATCH(flags, flags); FOLIO_MATCH(lru, lru); FOLIO_MATCH(mapping, mapping); FOLIO_MATCH(compound_head, lru); -FOLIO_MATCH(index, index); +FOLIO_MATCH(__folio_index, index); FOLIO_MATCH(private, private); FOLIO_MATCH(_mapcount, _mapcount); FOLIO_MATCH(_refcount, _refcount); @@ -589,7 +589,7 @@ TABLE_MATCH(flags, __page_flags); TABLE_MATCH(compound_head, pt_list); TABLE_MATCH(compound_head, _pt_pad_1); TABLE_MATCH(mapping, __page_mapping); -TABLE_MATCH(index, pt_index); +TABLE_MATCH(__folio_index, pt_index); TABLE_MATCH(rcu_head, pt_rcu_head); TABLE_MATCH(page_type, __page_type); TABLE_MATCH(_refcount, __page_refcount); -- cgit v1.2.3 From d9736929445e7f4c60f0093af61ff0b52e2d4412 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 14 May 2025 18:06:04 +0100 Subject: iov: remove copy_page_from_iter_atomic() All callers now use copy_folio_from_iter_atomic(), so convert copy_page_from_iter_atomic(). While I'm in there, use kmap_local_folio() and pagefault_disable() instead of kmap_atomic(). That allows preemption and/or task migration to happen during the copy_from_user(). Also use the new folio_test_partial_kmap() predicate instead of open-coding it. Link: https://lkml.kernel.org/r/20250514170607.3000994-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Alexander Viro Cc: Hugh Dickins Cc: Konstantin Komarov Signed-off-by: Andrew Morton --- include/linux/uio.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/uio.h b/include/linux/uio.h index 49ece9e1888f..e46477482663 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -176,8 +176,6 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } -size_t copy_page_from_iter_atomic(struct page *page, size_t offset, - size_t bytes, struct iov_iter *i); void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes); @@ -187,6 +185,8 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); +size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset, + size_t bytes, struct iov_iter *i); size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); @@ -204,12 +204,6 @@ static inline size_t copy_folio_from_iter(struct folio *folio, size_t offset, return copy_page_from_iter(&folio->page, offset, bytes, i); } -static inline size_t copy_folio_from_iter_atomic(struct folio *folio, - size_t offset, size_t bytes, struct iov_iter *i) -{ - return copy_page_from_iter_atomic(&folio->page, offset, bytes, i); -} - size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes, struct iov_iter *i); -- cgit v1.2.3 From 940b01fc8dc1aead398819215650727cb9e7335e Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Sun, 18 May 2025 23:31:39 -0700 Subject: memcg: nmi safe memcg stats for specific archs There are archs which have NMI but does not support this_cpu_* ops safely in the nmi context but they support safe atomic ops in nmi context. For such archs, let's add infra to use atomic ops for the memcg stats which can be updated in nmi. At the moment, the memcg stats which get updated in the objcg charging path are MEMCG_KMEM, NR_SLAB_RECLAIMABLE_B & NR_SLAB_UNRECLAIMABLE_B. Rather than adding support for all memcg stats to be nmi safe, let's just add infra to make these three stats nmi safe which this patch is doing. Link: https://lkml.kernel.org/r/20250519063142.111219-3-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka Cc: Alexei Starovoitov Cc: Johannes Weiner Cc: Mathieu Desnoyers Cc: Michal Hocko Cc: Muchun Song Cc: Peter Zijlstra Cc: Roman Gushchin Cc: Sebastian Andrzej Siewior Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f7848f73f41c..87b6688f124a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -113,6 +113,12 @@ struct mem_cgroup_per_node { CACHELINE_PADDING(_pad2_); unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter; + +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC + /* slab stats for nmi context */ + atomic_t slab_reclaimable; + atomic_t slab_unreclaimable; +#endif }; struct mem_cgroup_threshold { @@ -236,6 +242,10 @@ struct mem_cgroup { atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS]; +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC + /* MEMCG_KMEM for nmi context */ + atomic_t kmem_stat; +#endif /* * Hint of reclaim pressure for socket memroy management. Note * that this indicator should NOT be used in legacy cgroup mode -- cgit v1.2.3 From bfe125f1b1870c7b5f05b489a525042d6715fcc1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 22 May 2025 01:28:38 +0000 Subject: mmu_gather: move tlb flush for VM_PFNMAP/VM_MIXEDMAP vmas into free_pgtables() Commit b67fbebd4cf9 ("mmu_gather: Force tlb-flush VM_PFNMAP vmas") added a forced tlbflush to tlb_vma_end(), which is required to avoid a race between munmap() and unmap_mapping_range(). However it added some overhead to other paths where tlb_vma_end() is used, but vmas are not removed, e.g. madvise(MADV_DONTNEED). Fix this by moving the tlb flush out of tlb_end_vma() into new tlb_flush_vmas() called from free_pgtables(), somewhat similar to the stable version of the original commit: commit 895428ee124a ("mm: Force TLB flush for PFNMAP mappings before unlink_file_vma()"). Note, that if tlb->fullmm is set, no flush is required, as the whole mm is about to be destroyed. Link: https://lkml.kernel.org/r/20250522012838.163876-1-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Reviewed-by: Jann Horn Acked-by: Hugh Dickins Acked-by: Peter Zijlstra (Intel) Cc: Will Deacon Cc: "Aneesh Kumar K.V" Cc: Nick Piggin Signed-off-by: Andrew Morton --- include/asm-generic/tlb.h | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 88a42973fa47..1fff717cae51 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -58,6 +58,11 @@ * Defaults to flushing at tlb_end_vma() to reset the range; helps when * there's large holes between the VMAs. * + * - tlb_free_vmas() + * + * tlb_free_vmas() marks the start of unlinking of one or more vmas + * and freeing page-tables. + * * - tlb_remove_table() * * tlb_remove_table() is the basic primitive to free page-table directories @@ -464,7 +469,12 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) */ tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); - tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)); + + /* + * Track if there's at least one VM_PFNMAP/VM_MIXEDMAP vma + * in the tracked range, see tlb_free_vmas(). + */ + tlb->vma_pfn |= !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)); } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) @@ -547,23 +557,39 @@ static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct * } static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + if (tlb->fullmm || IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) + return; + + /* + * Do a TLB flush and reset the range at VMA boundaries; this avoids + * the ranges growing with the unused space between consecutive VMAs, + * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on + * this. + */ + tlb_flush_mmu_tlbonly(tlb); +} + +static inline void tlb_free_vmas(struct mmu_gather *tlb) { if (tlb->fullmm) return; /* * VM_PFNMAP is more fragile because the core mm will not track the - * page mapcount -- there might not be page-frames for these PFNs after - * all. Force flush TLBs for such ranges to avoid munmap() vs - * unmap_mapping_range() races. + * page mapcount -- there might not be page-frames for these PFNs + * after all. + * + * Specifically() there is a race between munmap() and + * unmap_mapping_range(), where munmap() will unlink the VMA, such + * that unmap_mapping_range() will no longer observe the VMA and + * no-op, without observing the TLBI, returning prematurely. + * + * So if we're about to unlink such a VMA, and we have pending + * TLBI for such a vma, flush things now. */ - if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) { - /* - * Do a TLB flush and reset the range at VMA boundaries; this avoids - * the ranges growing with the unused space between consecutive VMAs. - */ + if (tlb->vma_pfn) tlb_flush_mmu_tlbonly(tlb); - } } /* -- cgit v1.2.3 From 49c69504f4d340d870f2c3f3d2f404c118ff7b23 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 23 May 2025 00:30:17 +0200 Subject: mmu_notifiers: remove leftover stub macros Commit ec8832d007cb ("mmu_notifiers: don't invalidate secondary TLBs as part of mmu_notifier_invalidate_range_end()") removed the main definitions of {ptep,pmdp_huge,pudp_huge}_clear_flush_notify; just their !CONFIG_MMU_NOTIFIER stubs are left behind, remove them. Link: https://lkml.kernel.org/r/20250523-mmu-notifier-cleanup-unused-v1-1-cc1f47ebec33@google.com Signed-off-by: Jann Horn Reviewed-by: Alistair Popple Reviewed-by: Qi Zheng Reviewed-by: Jason Gunthorpe Signed-off-by: Andrew Morton --- include/linux/mmu_notifier.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index bc2402a45741..d1094c2d5fb6 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -654,9 +654,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) #define pmdp_clear_flush_young_notify pmdp_clear_flush_young #define ptep_clear_young_notify ptep_test_and_clear_young #define pmdp_clear_young_notify pmdp_test_and_clear_young -#define ptep_clear_flush_notify ptep_clear_flush -#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush -#define pudp_huge_clear_flush_notify pudp_huge_clear_flush static inline void mmu_notifier_synchronize(void) { -- cgit v1.2.3 From e13e7922d03439e374c263049af5f740ceae6346 Mon Sep 17 00:00:00 2001 From: Juan Yescas Date: Wed, 21 May 2025 14:57:45 -0700 Subject: mm: add CONFIG_PAGE_BLOCK_ORDER to select page block order Problem: On large page size configurations (16KiB, 64KiB), the CMA alignment requirement (CMA_MIN_ALIGNMENT_BYTES) increases considerably, and this causes the CMA reservations to be larger than necessary. This means that system will have less available MIGRATE_UNMOVABLE and MIGRATE_RECLAIMABLE page blocks since MIGRATE_CMA can't fallback to them. The CMA_MIN_ALIGNMENT_BYTES increases because it depends on MAX_PAGE_ORDER which depends on ARCH_FORCE_MAX_ORDER. The value of ARCH_FORCE_MAX_ORDER increases on 16k and 64k kernels. For example, in ARM, the CMA alignment requirement when: - CONFIG_ARCH_FORCE_MAX_ORDER default value is used - CONFIG_TRANSPARENT_HUGEPAGE is set: PAGE_SIZE | MAX_PAGE_ORDER | pageblock_order | CMA_MIN_ALIGNMENT_BYTES ----------------------------------------------------------------------- 4KiB | 10 | 9 | 4KiB * (2 ^ 9) = 2MiB 16Kib | 11 | 11 | 16KiB * (2 ^ 11) = 32MiB 64KiB | 13 | 13 | 64KiB * (2 ^ 13) = 512MiB There are some extreme cases for the CMA alignment requirement when: - CONFIG_ARCH_FORCE_MAX_ORDER maximum value is set - CONFIG_TRANSPARENT_HUGEPAGE is NOT set: - CONFIG_HUGETLB_PAGE is NOT set PAGE_SIZE | MAX_PAGE_ORDER | pageblock_order | CMA_MIN_ALIGNMENT_BYTES ------------------------------------------------------------------------ 4KiB | 15 | 15 | 4KiB * (2 ^ 15) = 128MiB 16Kib | 13 | 13 | 16KiB * (2 ^ 13) = 128MiB 64KiB | 13 | 13 | 64KiB * (2 ^ 13) = 512MiB This affects the CMA reservations for the drivers. If a driver in a 4KiB kernel needs 4MiB of CMA memory, in a 16KiB kernel, the minimal reservation has to be 32MiB due to the alignment requirements: reserved-memory { ... cma_test_reserve: cma_test_reserve { compatible = "shared-dma-pool"; size = <0x0 0x400000>; /* 4 MiB */ ... }; }; reserved-memory { ... cma_test_reserve: cma_test_reserve { compatible = "shared-dma-pool"; size = <0x0 0x2000000>; /* 32 MiB */ ... }; }; Solution: Add a new config CONFIG_PAGE_BLOCK_ORDER that allows to set the page block order in all the architectures. The maximum page block order will be given by ARCH_FORCE_MAX_ORDER. By default, CONFIG_PAGE_BLOCK_ORDER will have the same value that ARCH_FORCE_MAX_ORDER. This will make sure that current kernel configurations won't be affected by this change. It is a opt-in change. This patch will allow to have the same CMA alignment requirements for large page sizes (16KiB, 64KiB) as that in 4kb kernels by setting a lower pageblock_order. Tests: - Verified that HugeTLB pages work when pageblock_order is 1, 7, 10 on 4k and 16k kernels. - Verified that Transparent Huge Pages work when pageblock_order is 1, 7, 10 on 4k and 16k kernels. - Verified that dma-buf heaps allocations work when pageblock_order is 1, 7, 10 on 4k and 16k kernels. Benchmarks: The benchmarks compare 16kb kernels with pageblock_order 10 and 7. The reason for the pageblock_order 7 is because this value makes the min CMA alignment requirement the same as that in 4kb kernels (2MB). - Perform 100K dma-buf heaps (/dev/dma_heap/system) allocations of SZ_8M, SZ_4M, SZ_2M, SZ_1M, SZ_64, SZ_8, SZ_4. Use simpleperf (https://developer.android.com/ndk/guides/simpleperf) to measure the # of instructions and page-faults on 16k kernels. The benchmark was executed 10 times. The averages are below: # instructions | #page-faults order 10 | order 7 | order 10 | order 7 -------------------------------------------------------- 13,891,765,770 | 11,425,777,314 | 220 | 217 14,456,293,487 | 12,660,819,302 | 224 | 219 13,924,261,018 | 13,243,970,736 | 217 | 221 13,910,886,504 | 13,845,519,630 | 217 | 221 14,388,071,190 | 13,498,583,098 | 223 | 224 13,656,442,167 | 12,915,831,681 | 216 | 218 13,300,268,343 | 12,930,484,776 | 222 | 218 13,625,470,223 | 14,234,092,777 | 219 | 218 13,508,964,965 | 13,432,689,094 | 225 | 219 13,368,950,667 | 13,683,587,37 | 219 | 225 ------------------------------------------------------------------- 13,803,137,433 | 13,131,974,268 | 220 | 220 Averages There were 4.85% #instructions when order was 7, in comparison with order 10. 13,803,137,433 - 13,131,974,268 = -671,163,166 (-4.86%) The number of page faults in order 7 and 10 were the same. These results didn't show any significant regression when the pageblock_order is set to 7 on 16kb kernels. - Run speedometer 3.1 (https://browserbench.org/Speedometer3.1/) 5 times on the 16k kernels with pageblock_order 7 and 10. order 10 | order 7 | order 7 - order 10 | (order 7 - order 10) % ------------------------------------------------------------------- 15.8 | 16.4 | 0.6 | 3.80% 16.4 | 16.2 | -0.2 | -1.22% 16.6 | 16.3 | -0.3 | -1.81% 16.8 | 16.3 | -0.5 | -2.98% 16.6 | 16.8 | 0.2 | 1.20% ------------------------------------------------------------------- 16.44 16.4 -0.04 -0.24% Averages The results didn't show any significant regression when the pageblock_order is set to 7 on 16kb kernels. Link: https://lkml.kernel.org/r/20250521215807.1860663-1-jyescas@google.com Signed-off-by: Juan Yescas Acked-by: Zi Yan Reviewed-by: Vlastimil Babka Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: David Hildenbrand Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Minchan Kim Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 16 ++++++++++++++++ include/linux/pageblock-flags.h | 8 ++++---- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b19a98c20de8..87a667533d6d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -37,6 +37,22 @@ #define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) +/* Defines the order for the number of pages that have a migrate type. */ +#ifndef CONFIG_PAGE_BLOCK_ORDER +#define PAGE_BLOCK_ORDER MAX_PAGE_ORDER +#else +#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER +#endif /* CONFIG_PAGE_BLOCK_ORDER */ + +/* + * The MAX_PAGE_ORDER, which defines the max order of pages to be allocated + * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER, + * which defines the order for the number of pages that can have a migrate type + */ +#if (PAGE_BLOCK_ORDER > MAX_PAGE_ORDER) +#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_ORDER +#endif + /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index fc6b9c87cb0a..e73a4292ef02 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,18 +41,18 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #elif defined(CONFIG_TRANSPARENT_HUGEPAGE) -#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER) #else /* CONFIG_TRANSPARENT_HUGEPAGE */ -/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */ -#define pageblock_order MAX_PAGE_ORDER +/* If huge pages are not used, group by PAGE_BLOCK_ORDER */ +#define pageblock_order PAGE_BLOCK_ORDER #endif /* CONFIG_HUGETLB_PAGE */ -- cgit v1.2.3 From ad6b26b6a0a79166b53209df2ca1cf8636296382 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 23 May 2025 20:51:15 +0800 Subject: sched/numa: add statistics of numa balance task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On systems with NUMA balancing enabled, it has been found that tracking task activities resulting from NUMA balancing is beneficial. NUMA balancing employs two mechanisms for task migration: one is to migrate a task to an idle CPU within its preferred node, and the other is to swap tasks located on different nodes when they are on each other's preferred nodes. The kernel already provides NUMA page migration statistics in /sys/fs/cgroup/mytest/memory.stat and /proc/{PID}/sched. However, it lacks statistics regarding task migration and swapping. Therefore, relevant counts for task migration and swapping should be added. The following two new fields: numa_task_migrated numa_task_swapped will be shown in /sys/fs/cgroup/{GROUP}/memory.stat, /proc/{PID}/sched and /proc/vmstat. Introducing both per-task and per-memory cgroup (memcg) NUMA balancing statistics facilitates a rapid evaluation of the performance and resource utilization of the target workload. For instance, users can first identify the container with high NUMA balancing activity and then further pinpoint a specific task within that group, and subsequently adjust the memory policy for that task. In short, although it is possible to iterate through /proc/$pid/sched to locate the problematic task, the introduction of aggregated NUMA balancing activity for tasks within each memcg can assist users in identifying the task more efficiently through a divide-and-conquer approach. As Libo Chen pointed out, the memcg event relies on the text names in vmstat_text, and /proc/vmstat generates corresponding items based on vmstat_text. Thus, the relevant task migration and swapping events introduced in vmstat_text also need to be populated by count_vm_numa_event(), otherwise these values are zero in /proc/vmstat. In theory, task migration and swap events are part of the scheduler's activities. The reason for exposing them through the memory.stat/vmstat interface is that we already have NUMA balancing statistics in memory.stat/vmstat, and these events are closely related to each other. Following Shakeel's suggestion, we describe the end-to-end flow/story of all these events occurring on a timeline for future reference: The goal of NUMA balancing is to co-locate a task and its memory pages on the same NUMA node. There are two strategies: migrate the pages to the task's node, or migrate the task to the node where its pages reside. Suppose a task p1 is running on Node 0, but its pages are located on Node 1. NUMA page fault statistics for p1 reveal its "page footprint" across nodes. If NUMA balancing detects that most of p1's pages are on Node 1: 1.Page Migration Attempt: The Numa balance first tries to migrate p1's pages to Node 0. The numa_page_migrate counter increments. 2.Task Migration Strategies: After the page migration finishes, Numa balance checks every 1 second to see if p1 can be migrated to Node 1. Case 2.1: Idle CPU Available If Node 1 has an idle CPU, p1 is directly scheduled there. This event is logged as numa_task_migrated. Case 2.2: No Idle CPU (Task Swap) If all CPUs on Node1 are busy, direct migration could cause CPU contention or load imbalance. Instead: The Numa balance selects a candidate task p2 on Node 1 that prefers Node 0 (e.g., due to its own page footprint). p1 and p2 are swapped. This cross-node swap is recorded as numa_task_swapped. Link: https://lkml.kernel.org/r/d00edb12ba0f0de3c5222f61487e65f2ac58f5b1.1748493462.git.yu.c.chen@intel.com Link: https://lkml.kernel.org/r/7ef90a88602ed536be46eba7152ed0d33bad5790.1748002400.git.yu.c.chen@intel.com Signed-off-by: Chen Yu Tested-by: K Prateek Nayak Tested-by: Madadi Vineeth Reddy Acked-by: Peter Zijlstra (Intel) Tested-by: Venkat Rao Bagalkote Cc: Aubrey Li Cc: Ayush Jain Cc: "Chen, Tim C" Cc: Ingo Molnar Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Libo Chen Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/sched.h | 4 ++++ include/linux/vm_event_item.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f96ac1982893..1c50e30b5c01 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -549,6 +549,10 @@ struct sched_statistics { u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; +#ifdef CONFIG_NUMA_BALANCING + u64 numa_task_migrated; + u64 numa_task_swapped; +#endif u64 nr_wakeups; u64 nr_wakeups_sync; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 9e15a088ba38..91a3ce9a2687 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -66,6 +66,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, + NUMA_TASK_MIGRATE, + NUMA_TASK_SWAP, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, -- cgit v1.2.3 From 5c78e793f78732b60276401f75cc1a101f9ad121 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 30 May 2025 12:06:47 -0700 Subject: overflow: Introduce __DEFINE_FLEX for having no initializer While not yet in the tree, there is a proposed patch[1] that was depending on the prior behavior of _DEFINE_FLEX, which did not have an explicit initializer. Provide this via __DEFINE_FLEX now, which can also have attributes applied (e.g. __uninitialized). Examples of the resulting initializer behaviors can be seen here: https://godbolt.org/z/P7Go8Tr33 Link: https://lore.kernel.org/netdev/20250520205920.2134829-9-anthony.l.nguyen@intel.com [1] Fixes: 47e36ed78406 ("overflow: Fix direct struct member initialization in _DEFINE_FLEX()") Signed-off-by: Kees Cook --- include/linux/overflow.h | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 7b7be27ca113..154ed0dbb43f 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -389,24 +389,37 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) struct_size((type *)NULL, member, count) /** - * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. - * Enables caller macro to pass (different) initializer. + * __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass arbitrary trailing expressions * * @type: structure type name, including "struct" keyword. * @name: Name for a variable to define. * @member: Name of the array member. * @count: Number of elements in the array; must be compile-time const. - * @initializer: Initializer expression (e.g., pass `= { }` at minimum). + * @trailer: Trailing expressions for attributes and/or initializers. */ -#define _DEFINE_FLEX(type, name, member, count, initializer...) \ +#define __DEFINE_FLEX(type, name, member, count, trailer...) \ _Static_assert(__builtin_constant_p(count), \ "onstack flex array members require compile-time const count"); \ union { \ u8 bytes[struct_size_t(type, member, count)]; \ type obj; \ - } name##_u = { .obj initializer }; \ + } name##_u trailer; \ type *name = (type *)&name##_u +/** + * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass (different) initializer. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * @initializer: Initializer expression (e.g., pass `= { }` at minimum). + */ +#define _DEFINE_FLEX(type, name, member, count, initializer...) \ + __DEFINE_FLEX(type, name, member, count, = { .obj initializer }) + /** * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing * flexible array member, when it does not have a __counted_by annotation. @@ -424,7 +437,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * elements in array @member. */ #define DEFINE_RAW_FLEX(type, name, member, count) \ - _DEFINE_FLEX(type, name, member, count, = {}) + __DEFINE_FLEX(type, name, member, count, = { }) /** * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing -- cgit v1.2.3 From 1c8a0ed2043c30cee97facd1eb8cff88b6c7ea4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 7 Apr 2025 13:12:14 +0300 Subject: PCI: Remove unused pci_printk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit include/linux/pci.h provides low-level pci_printk() interface that is not used since the commits fab874e12593 ("PCI/AER: Descope pci_printk() to aer_printk()") and 588021b28642 ("PCI: shpchp: Remove 'shpchp_debug' module parameter"). PCI logging should not use pci_printk() but pci_*() wrappers that follow the usual logging wrapper patterns. Remove pci_printk(). Signed-off-by: Ilpo Järvinen Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20250407101215.1376-1-ilpo.jarvinen@linux.intel.com --- include/linux/pci.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e96..e293ad5d840d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2694,9 +2694,6 @@ void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #include -#define pci_printk(level, pdev, fmt, arg...) \ - dev_printk(level, &(pdev)->dev, fmt, ##arg) - #define pci_emerg(pdev, fmt, arg...) dev_emerg(&(pdev)->dev, fmt, ##arg) #define pci_alert(pdev, fmt, arg...) dev_alert(&(pdev)->dev, fmt, ##arg) #define pci_crit(pdev, fmt, arg...) dev_crit(&(pdev)->dev, fmt, ##arg) -- cgit v1.2.3 From e27e43a5cbda77d211757eb83101f11f67788204 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 29 May 2025 16:05:50 -0400 Subject: xdp: Remove unused mem_return_failed event The change to allow page_pool to handle its own page destruction instead of relying on XDP removed the trace_mem_return_failed() tracepoint caller, but did not remove the mem_return_failed trace event. As trace events take up memory when they are created regardless of if they are used or not, having this unused event around wastes around 5K of memory. Remove the unused event. Link: https://lore.kernel.org/all/20250529130138.544ffec4@gandalf.local.home/ Cc: netdev Cc: Jonathan Lemon Cc: Mathieu Desnoyers Cc: Masami Hiramatsu Link: https://lore.kernel.org/20250529160550.1f888b15@gandalf.local.home Fixes: c3f812cea0d7 ("page_pool: do not release pool until inflight == 0.") Acked-by: Jesper Dangaard Brouer Acked-by: Jakub Kicinski Signed-off-by: Steven Rostedt (Google) --- include/trace/events/xdp.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index a7e5452b5d21..d3ef86c97ae3 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -379,32 +379,6 @@ TRACE_EVENT(mem_connect, ) ); -TRACE_EVENT(mem_return_failed, - - TP_PROTO(const struct xdp_mem_info *mem, - const struct page *page), - - TP_ARGS(mem, page), - - TP_STRUCT__entry( - __field(const struct page *, page) - __field(u32, mem_id) - __field(u32, mem_type) - ), - - TP_fast_assign( - __entry->page = page; - __entry->mem_id = mem->id; - __entry->mem_type = mem->type; - ), - - TP_printk("mem_id=%d mem_type=%s page=%p", - __entry->mem_id, - __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), - __entry->page - ) -); - TRACE_EVENT(bpf_xdp_link_attach_failed, TP_PROTO(const char *msg), -- cgit v1.2.3 From 167d7ede0007d320a891494be57b635b4c069995 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 29 May 2025 13:57:39 -0400 Subject: genirq/matrix: Remove unused irq_matrix_alloc_reserved tracepoint The tracepoint irq_matrix_alloc_reserved was added but never used. Remove it. Link: https://lore.kernel.org/all/20250529130138.544ffec4@gandalf.local.home/ Cc: Juergen Gross Cc: Mathieu Desnoyers Cc: Masami Hiramatsu Link: https://lore.kernel.org/20250529135739.26e5c075@gandalf.local.home Fixes: ec0f7cd273dc4 ("genirq/matrix: Add tracepoints") Reviewed-by: Thomas Gleixner Signed-off-by: Steven Rostedt (Google) --- include/trace/events/irq_matrix.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/irq_matrix.h b/include/trace/events/irq_matrix.h index 267d4cbbf360..93244078b4e6 100644 --- a/include/trace/events/irq_matrix.h +++ b/include/trace/events/irq_matrix.h @@ -138,14 +138,6 @@ DEFINE_EVENT(irq_matrix_global_update, irq_matrix_assign_system, TP_ARGS(bit, matrix) ); -DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc_reserved, - - TP_PROTO(int bit, unsigned int cpu, - struct irq_matrix *matrix, struct cpumap *cmap), - - TP_ARGS(bit, cpu, matrix, cmap) -); - DEFINE_EVENT(irq_matrix_cpu, irq_matrix_reserve_managed, TP_PROTO(int bit, unsigned int cpu, -- cgit v1.2.3 From b8628379957d0c8c057782ec1a8512de6d4ba29c Mon Sep 17 00:00:00 2001 From: Sumanth Gavini Date: Mon, 19 May 2025 19:08:05 -0700 Subject: mfd: maxim: Correct Samsung "Electronics" spelling in headers Fix the misspelling of 'Electronics' in MFD driver headers. Link: https://lore.kernel.org/lkml/3aa30119-60e5-4dcb-b13a-1753966ca775@sirena.org.uk/#t Link: https://lore.kernel.org/r/20250520020808.159586-1-sumanth.gavini@yahoo.com Signed-off-by: Sumanth Gavini Signed-off-by: Lee Jones --- include/linux/mfd/max8997-private.h | 2 +- include/linux/mfd/max8998-private.h | 2 +- include/linux/mfd/max8998.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h index f70eea0f2264..261c0aae7d00 100644 --- a/include/linux/mfd/max8997-private.h +++ b/include/linux/mfd/max8997-private.h @@ -2,7 +2,7 @@ /* * max8997-private.h - Voltage regulator driver for the Maxim 8997 * - * Copyright (C) 2010 Samsung Electrnoics + * Copyright (C) 2010 Samsung Electronics * MyungJoo Ham */ diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h index 6deb5f577602..d77dc18db6eb 100644 --- a/include/linux/mfd/max8998-private.h +++ b/include/linux/mfd/max8998-private.h @@ -2,7 +2,7 @@ /* * max8998-private.h - Voltage regulator driver for the Maxim 8998 * - * Copyright (C) 2009-2010 Samsung Electrnoics + * Copyright (C) 2009-2010 Samsung Electronics * Kyungmin Park * Marek Szyprowski */ diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h index a054e55c8646..5473f1983e31 100644 --- a/include/linux/mfd/max8998.h +++ b/include/linux/mfd/max8998.h @@ -2,7 +2,7 @@ /* * max8998.h - Voltage regulator driver for the Maxim 8998 * - * Copyright (C) 2009-2010 Samsung Electrnoics + * Copyright (C) 2009-2010 Samsung Electronics * Kyungmin Park * Marek Szyprowski */ -- cgit v1.2.3 From ffb006aa433e8109ec79320c344fb69947997ba1 Mon Sep 17 00:00:00 2001 From: Sumanth Gavini Date: Mon, 19 May 2025 16:20:23 -0700 Subject: mfd: maxim: Correct Samsung "Electronics" spelling in copyright headers Fix the misspelling of 'Electronics' in MFD driver copyright headers. Link: https://lore.kernel.org/lkml/3aa30119-60e5-4dcb-b13a-1753966ca775@sirena.org.uk/#t Link: https://lore.kernel.org/r/20250519232025.152769-1-sumanth.gavini@yahoo.com Signed-off-by: Sumanth Gavini Signed-off-by: Lee Jones --- include/linux/mfd/max14577-private.h | 2 +- include/linux/mfd/max14577.h | 2 +- include/linux/mfd/max77686-private.h | 2 +- include/linux/mfd/max77686.h | 2 +- include/linux/mfd/max77693-private.h | 2 +- include/linux/mfd/max77693.h | 2 +- include/linux/mfd/max8997.h | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h index a21374f8ad26..dd51a37fa37f 100644 --- a/include/linux/mfd/max14577-private.h +++ b/include/linux/mfd/max14577-private.h @@ -2,7 +2,7 @@ /* * max14577-private.h - Common API for the Maxim 14577/77836 internal sub chip * - * Copyright (C) 2014 Samsung Electrnoics + * Copyright (C) 2014 Samsung Electronics * Chanwoo Choi * Krzysztof Kozlowski */ diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h index 8b3ef891ba42..0fda5c2e745a 100644 --- a/include/linux/mfd/max14577.h +++ b/include/linux/mfd/max14577.h @@ -2,7 +2,7 @@ /* * max14577.h - Driver for the Maxim 14577/77836 * - * Copyright (C) 2014 Samsung Electrnoics + * Copyright (C) 2014 Samsung Electronics * Chanwoo Choi * Krzysztof Kozlowski * diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h index ea635d12a741..e6b8b4014dc0 100644 --- a/include/linux/mfd/max77686-private.h +++ b/include/linux/mfd/max77686-private.h @@ -2,7 +2,7 @@ /* * max77686-private.h - Voltage regulator driver for the Maxim 77686/802 * - * Copyright (C) 2012 Samsung Electrnoics + * Copyright (C) 2012 Samsung Electronics * Chiwoong Byun */ diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h index d0fb510875e6..7c4624acd1db 100644 --- a/include/linux/mfd/max77686.h +++ b/include/linux/mfd/max77686.h @@ -2,7 +2,7 @@ /* * max77686.h - Driver for the Maxim 77686/802 * - * Copyright (C) 2012 Samsung Electrnoics + * Copyright (C) 2012 Samsung Electronics * Chiwoong Byun * * This driver is based on max8997.h diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index c324d548619e..8e7c35b5ea1c 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -2,7 +2,7 @@ /* * max77693-private.h - Voltage regulator driver for the Maxim 77693 * - * Copyright (C) 2012 Samsung Electrnoics + * Copyright (C) 2012 Samsung Electronics * SangYoung Son * * This program is not provided / owned by Maxim Integrated Products. diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h index c67c16ba8649..8e77ebeb7cf1 100644 --- a/include/linux/mfd/max77693.h +++ b/include/linux/mfd/max77693.h @@ -2,7 +2,7 @@ /* * max77693.h - Driver for the Maxim 77693 * - * Copyright (C) 2012 Samsung Electrnoics + * Copyright (C) 2012 Samsung Electronics * SangYoung Son * * This program is not provided / owned by Maxim Integrated Products. diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h index 5c2cc1103437..fb36e1386069 100644 --- a/include/linux/mfd/max8997.h +++ b/include/linux/mfd/max8997.h @@ -2,7 +2,7 @@ /* * max8997.h - Driver for the Maxim 8997/8966 * - * Copyright (C) 2009-2010 Samsung Electrnoics + * Copyright (C) 2009-2010 Samsung Electronics * MyungJoo Ham * * This driver is based on max8998.h -- cgit v1.2.3 From a4a45a9a72f3a9eaa17ec502d6e97c8eaa901825 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 29 May 2025 15:22:11 -0400 Subject: fsdax: Remove unused trace events for dax insert mapping When the dax_fault_actor() helper was factored out, it removed the calls to the dax_pmd_insert_mapping and dax_insert_mapping events but never removed the events themselves. As each event created takes up memory (roughly 5K each), this is a waste as it is never used. Remove the unused dax_pmd_insert_mapping and dax_insert_mapping trace events. Link: https://lore.kernel.org/all/20250529130138.544ffec4@gandalf.local.home/ Cc: Shiyang Ruan Cc: "Darrick J. Wong" Cc: Ross Zwisler Cc: Andrew Morton Link: https://lore.kernel.org/20250529152211.688800c9@gandalf.local.home Fixes: c2436190e492 ("fsdax: factor out a dax_fault_actor() helper") Acked-by: Dan Williams Reviewed-by: Alison Schofield Signed-off-by: Steven Rostedt (Google) --- include/trace/events/fs_dax.h | 78 ------------------------------------------- 1 file changed, 78 deletions(-) (limited to 'include') diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index 86fe6aecff1e..76b56f78abb0 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -102,54 +102,6 @@ DEFINE_EVENT(dax_pmd_load_hole_class, name, \ DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole); DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole_fallback); -DECLARE_EVENT_CLASS(dax_pmd_insert_mapping_class, - TP_PROTO(struct inode *inode, struct vm_fault *vmf, - long length, pfn_t pfn, void *radix_entry), - TP_ARGS(inode, vmf, length, pfn, radix_entry), - TP_STRUCT__entry( - __field(unsigned long, ino) - __field(unsigned long, vm_flags) - __field(unsigned long, address) - __field(long, length) - __field(u64, pfn_val) - __field(void *, radix_entry) - __field(dev_t, dev) - __field(int, write) - ), - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->vm_flags = vmf->vma->vm_flags; - __entry->address = vmf->address; - __entry->write = vmf->flags & FAULT_FLAG_WRITE; - __entry->length = length; - __entry->pfn_val = pfn.val; - __entry->radix_entry = radix_entry; - ), - TP_printk("dev %d:%d ino %#lx %s %s address %#lx length %#lx " - "pfn %#llx %s radix_entry %#lx", - MAJOR(__entry->dev), - MINOR(__entry->dev), - __entry->ino, - __entry->vm_flags & VM_SHARED ? "shared" : "private", - __entry->write ? "write" : "read", - __entry->address, - __entry->length, - __entry->pfn_val & ~PFN_FLAGS_MASK, - __print_flags_u64(__entry->pfn_val & PFN_FLAGS_MASK, "|", - PFN_FLAGS_TRACE), - (unsigned long)__entry->radix_entry - ) -) - -#define DEFINE_PMD_INSERT_MAPPING_EVENT(name) \ -DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \ - TP_PROTO(struct inode *inode, struct vm_fault *vmf, \ - long length, pfn_t pfn, void *radix_entry), \ - TP_ARGS(inode, vmf, length, pfn, radix_entry)) - -DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping); - DECLARE_EVENT_CLASS(dax_pte_fault_class, TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result), TP_ARGS(inode, vmf, result), @@ -194,36 +146,6 @@ DEFINE_PTE_FAULT_EVENT(dax_load_hole); DEFINE_PTE_FAULT_EVENT(dax_insert_pfn_mkwrite_no_entry); DEFINE_PTE_FAULT_EVENT(dax_insert_pfn_mkwrite); -TRACE_EVENT(dax_insert_mapping, - TP_PROTO(struct inode *inode, struct vm_fault *vmf, void *radix_entry), - TP_ARGS(inode, vmf, radix_entry), - TP_STRUCT__entry( - __field(unsigned long, ino) - __field(unsigned long, vm_flags) - __field(unsigned long, address) - __field(void *, radix_entry) - __field(dev_t, dev) - __field(int, write) - ), - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->vm_flags = vmf->vma->vm_flags; - __entry->address = vmf->address; - __entry->write = vmf->flags & FAULT_FLAG_WRITE; - __entry->radix_entry = radix_entry; - ), - TP_printk("dev %d:%d ino %#lx %s %s address %#lx radix_entry %#lx", - MAJOR(__entry->dev), - MINOR(__entry->dev), - __entry->ino, - __entry->vm_flags & VM_SHARED ? "shared" : "private", - __entry->write ? "write" : "read", - __entry->address, - (unsigned long)__entry->radix_entry - ) -) - DECLARE_EVENT_CLASS(dax_writeback_range_class, TP_PROTO(struct inode *inode, pgoff_t start_index, pgoff_t end_index), TP_ARGS(inode, start_index, end_index), -- cgit v1.2.3 From 10f4a7cd724e34b7a6ff96e57ac49dc0cadececc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 20 May 2025 13:20:37 -0700 Subject: nvme: fix command limits status code The command specific status code, 0x183, was introduced in the NVMe 2.0 specification defined to "Command Size Limits Exceeded" and only ever applied to DSM and Copy commands. Fix the name and, remove the incorrect translation to error codes and special treatment in the target code for it. Fixes: 3b7c33b28a44d4 ("nvme.h: add Write Zeroes definitions") Cc: Chaitanya Kulkarni Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 51308f65b72f..b65a1b9f2116 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -2171,7 +2171,7 @@ enum { NVME_SC_BAD_ATTRIBUTES = 0x180, NVME_SC_INVALID_PI = 0x181, NVME_SC_READ_ONLY = 0x182, - NVME_SC_ONCS_NOT_SUPPORTED = 0x183, + NVME_SC_CMD_SIZE_LIM_EXCEEDED = 0x183, /* * I/O Command Set Specific - Fabrics commands: -- cgit v1.2.3 From 434d7f9b0e24e1f0166d05f10881a8ab386845b7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 30 May 2025 16:30:11 +0800 Subject: timens: Add struct seq_file forward declaration Add forward declaration of struct seq_file before using it in a function prototype. Fixes: 04a8682a71be ("fs/proc: Introduce /proc/pid/timens_offsets") Signed-off-by: Herbert Xu Signed-off-by: Thomas Gleixner Acked-by: Andrei Vagin Link: https://lore.kernel.org/all/aDlskzKIAULMlwPj@gondor.apana.org.au --- include/linux/time_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 0b8b32bf0655..bb2c52f4fc94 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -12,6 +12,7 @@ struct user_namespace; extern struct user_namespace init_user_ns; +struct seq_file; struct vm_area_struct; struct timens_offsets { -- cgit v1.2.3 From 1e1f706fc2ce90eaaf3480b3d5f27885960d751c Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 3 Jun 2025 15:35:38 +1000 Subject: wifi: cfg80211/mac80211: correctly parse S1G beacon optional elements S1G beacons are not traditional beacons but a type of extension frame. Extension frames contain the frame control and duration fields, followed by zero or more optional fields before the frame body. These optional fields are distinct from the variable length elements. The presence of optional fields is indicated in the frame control field. To correctly locate the elements offset, the frame control must be parsed to identify which optional fields are present. Currently, mac80211 parses S1G beacons based on fixed assumptions about the frame layout, without inspecting the frame control field. This can result in incorrect offsets to the "variable" portion of the frame. Properly parse S1G beacon frames by using the field lengths defined in IEEE 802.11-2024, section 9.3.4.3, ensuring that the elements offset is calculated accurately. Fixes: 9eaffe5078ca ("cfg80211: convert S1G beacon to scan results") Fixes: cd418ba63f0c ("mac80211: convert S1G beacon to scan results") Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250603053538.468562-1-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 79 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 420c7f9aa6ee..ce377f7fb912 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -111,6 +111,8 @@ /* bits unique to S1G beacon */ #define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 +#define IEEE80211_S1G_BCN_CSSID 0x200 +#define IEEE80211_S1G_BCN_ANO 0x400 /* see 802.11ah-2016 9.9 NDP CMAC frames */ #define IEEE80211_S1G_1MHZ_NDP_BITS 25 @@ -153,9 +155,6 @@ #define IEEE80211_ANO_NETTYPE_WILD 15 -/* bits unique to S1G beacon */ -#define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 - /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */ #define IEEE80211_CTL_EXT_POLL 0x2000 #define IEEE80211_CTL_EXT_SPR 0x3000 @@ -627,6 +626,42 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON); } +/** + * ieee80211_s1g_has_next_tbtt - check if IEEE80211_S1G_BCN_NEXT_TBTT + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * next TBTT field + */ +static inline bool ieee80211_s1g_has_next_tbtt(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); +} + +/** + * ieee80211_s1g_has_ano - check if IEEE80211_S1G_BCN_ANO + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * ANO field + */ +static inline bool ieee80211_s1g_has_ano(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_ANO)); +} + +/** + * ieee80211_s1g_has_cssid - check if IEEE80211_S1G_BCN_CSSID + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * compressed SSID field + */ +static inline bool ieee80211_s1g_has_cssid(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_CSSID)); +} + /** * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon * @fc: frame control bytes in little-endian byteorder @@ -1245,16 +1280,40 @@ struct ieee80211_ext { u8 change_seq; u8 variable[0]; } __packed s1g_beacon; - struct { - u8 sa[ETH_ALEN]; - __le32 timestamp; - u8 change_seq; - u8 next_tbtt[3]; - u8 variable[0]; - } __packed s1g_short_beacon; } u; } __packed __aligned(2); +/** + * ieee80211_s1g_optional_len - determine length of optional S1G beacon fields + * @fc: frame control bytes in little-endian byteorder + * Return: total length in bytes of the optional fixed-length fields + * + * S1G beacons may contain up to three optional fixed-length fields that + * precede the variable-length elements. Whether these fields are present + * is indicated by flags in the frame control field. + * + * From IEEE 802.11-2024 section 9.3.4.3: + * - Next TBTT field may be 0 or 3 bytes + * - Short SSID field may be 0 or 4 bytes + * - Access Network Options (ANO) field may be 0 or 1 byte + */ +static inline size_t +ieee80211_s1g_optional_len(__le16 fc) +{ + size_t len = 0; + + if (ieee80211_s1g_has_next_tbtt(fc)) + len += 3; + + if (ieee80211_s1g_has_cssid(fc)) + len += 4; + + if (ieee80211_s1g_has_ano(fc)) + len += 1; + + return len; +} + #define IEEE80211_TWT_CONTROL_NDP BIT(0) #define IEEE80211_TWT_CONTROL_RESP_MODE BIT(1) #define IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST BIT(3) -- cgit v1.2.3 From 69a58ef4fa77759b0e0c2f79834fa51b00a50c0b Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 22 May 2025 15:54:04 -0700 Subject: drm/xe/pxp: Clarify PXP queue creation behavior if PXP is not ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The expected flow of operations when using PXP is to query the PXP status and wait for it to transition to "ready" before attempting to create an exec_queue. This flow is followed by the Mesa driver, but there is no guarantee that an incorrectly coded (or malicious) app will not attempt to create the queue first without querying the status. Therefore, we need to clarify what the expected behavior of the queue creation ioctl is in this scenario. Currently, the ioctl always fails with an -EBUSY code no matter the error, but for consistency it is better to distinguish between "failed to init" (-EIO) and "not ready" (-EBUSY), the same way the query ioctl does. Note that, while this is a change in the return code of an ioctl, the behavior of the ioctl in this particular corner case was not clearly spec'd, so no one should have been relying on it (and we know that Mesa, which is the only known userspace for this, didn't). v2: Minor rework of the doc (Rodrigo) Fixes: 72d479601d67 ("drm/xe/pxp/uapi: Add userspace and LRC support for PXP-using queues") Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: José Roberto de Souza Reviewed-by: José Roberto de Souza Reviewed-by: John Harrison Acked-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250522225401.3953243-7-daniele.ceraolospurio@intel.com (cherry picked from commit 21784ca96025b62d95b670b7639ad70ddafa69b8) Signed-off-by: Thomas Hellström --- include/uapi/drm/xe_drm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 9c08738c3b91..6a702ba7817c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1210,6 +1210,11 @@ struct drm_xe_vm_bind { * there is no need to explicitly set that. When a queue of type * %DRM_XE_PXP_TYPE_HWDRM is created, the PXP default HWDRM session * (%XE_PXP_HWDRM_DEFAULT_SESSION) will be started, if isn't already running. + * The user is expected to query the PXP status via the query ioctl (see + * %DRM_XE_DEVICE_QUERY_PXP_STATUS) and to wait for PXP to be ready before + * attempting to create a queue with this property. When a queue is created + * before PXP is ready, the ioctl will return -EBUSY if init is still in + * progress or -EIO if init failed. * Given that going into a power-saving state kills PXP HWDRM sessions, * runtime PM will be blocked while queues of this type are alive. * All PXP queues will be killed if a PXP invalidation event occurs. -- cgit v1.2.3 From 500e626c4a5bf2fa7cc6f1cd6dd86c77b5b127f2 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Tue, 8 Apr 2025 02:08:28 +0800 Subject: kernel: ftrace: export ftrace_sync_ipi The following ftrace patch for riscv uses a data store to update ftrace function. Therefore, a romote fence is required to order it against function_trace_op updates. The mechanism is similar to the fence between function_trace_op and update_ftrace_func in the generic ftrace, so we leverage the same ftrace_sync_ipi function. [ alex: Fix build warning when !CONFIG_DYNAMIC_FTRACE ] Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20250407180838.42877-4-andybnac@gmail.com Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fbabc3d848b3..30374478cb07 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -635,6 +635,8 @@ enum { #define ftrace_get_symaddr(fentry_ip) (0) #endif +void ftrace_sync_ipi(void *data); + #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_arch_code_modify_prepare(void); -- cgit v1.2.3 From 8c21c4111128365f81a88573eeb2844fa696b299 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 2 Jun 2025 19:55:36 +0900 Subject: module: make __mod_device_table__* symbols static The __mod_device_table__* symbols are only parsed by modpost to generate MODULE_ALIAS() entries from MODULE_DEVICE_TABLE(). Therefore, these symbols do not need to be globally visible, or globally unique. If they are in the global scope, we would worry about the symbol uniqueness, but modpost is fine with parsing multiple symbols with the same name. Signed-off-by: Masahiro Yamada Reviewed-by: Petr Pavlu --- include/linux/module.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 8050f77c3b64..92e1420fccdf 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -249,8 +249,8 @@ struct module_kobject *lookup_or_create_module_kobject(const char *name); #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ -extern typeof(name) __mod_device_table__##type##__##name \ - __attribute__ ((unused, alias(__stringify(name)))) +static typeof(name) __mod_device_table__##type##__##name \ + __attribute__ ((used, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name) #endif -- cgit v1.2.3 From 6093faaf9593fca92f96f165c95ff4b53353b1f4 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 5 Mar 2025 16:37:06 +0800 Subject: raid6: Add RISC-V SIMD syndrome and recovery calculations The assembly is originally based on the ARM NEON and int.uc, but uses RISC-V vector instructions to implement the RAID6 syndrome and recovery calculations. The functions are tested on QEMU running with the option "-icount shift=0": raid6: rvvx1 gen() 1008 MB/s raid6: rvvx2 gen() 1395 MB/s raid6: rvvx4 gen() 1584 MB/s raid6: rvvx8 gen() 1694 MB/s raid6: int64x8 gen() 113 MB/s raid6: int64x4 gen() 116 MB/s raid6: int64x2 gen() 272 MB/s raid6: int64x1 gen() 229 MB/s raid6: using algorithm rvvx8 gen() 1694 MB/s raid6: .... xor() 1000 MB/s, rmw enabled raid6: using rvv recovery algorithm [Charlie: - Fixup vector options] Signed-off-by: Charlie Jenkins Signed-off-by: Chunyan Zhang Reviewed-by: Charlie Jenkins Tested-by: Charlie Jenkins Link: https://lore.kernel.org/r/20250305083707.74218-1-zhangchunyan@iscas.ac.cn Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- include/linux/raid/pq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 98030accf641..72ff44cca864 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -108,6 +108,10 @@ extern const struct raid6_calls raid6_vpermxor4; extern const struct raid6_calls raid6_vpermxor8; extern const struct raid6_calls raid6_lsx; extern const struct raid6_calls raid6_lasx; +extern const struct raid6_calls raid6_rvvx1; +extern const struct raid6_calls raid6_rvvx2; +extern const struct raid6_calls raid6_rvvx4; +extern const struct raid6_calls raid6_rvvx8; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); @@ -125,6 +129,7 @@ extern const struct raid6_recov_calls raid6_recov_s390xc; extern const struct raid6_recov_calls raid6_recov_neon; extern const struct raid6_recov_calls raid6_recov_lsx; extern const struct raid6_recov_calls raid6_recov_lasx; +extern const struct raid6_recov_calls raid6_recov_rvv; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; -- cgit v1.2.3 From 044d2aee6c575231ed4a24fb3d119bad0937488b Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 21 May 2025 09:06:02 -0700 Subject: alloc_tag: handle module codetag load errors as module load failures Failures inside codetag_load_module() are currently ignored. As a result an error there would not cause a module load failure and freeing of the associated resources. Correct this behavior by propagating the error code to the caller and handling possible errors. With this change, error to allocate percpu counters, which happens at this stage, will not be ignored and will cause a module load failure and freeing of resources. With this change we also do not need to disable memory allocation profiling when this error happens, instead we fail to load the module. Link: https://lkml.kernel.org/r/20250521160602.1940771-1-surenb@google.com Fixes: 10075262888b ("alloc_tag: allocate percpu counters for module tags dynamically") Signed-off-by: Suren Baghdasaryan Reported-by: Casey Chen Closes: https://lore.kernel.org/all/20250520231620.15259-1-cachen@purestorage.com/ Cc: Daniel Gomez Cc: David Wang <00107082@163.com> Cc: Kent Overstreet Cc: Luis Chamberalin Cc: Petr Pavlu Cc: Sami Tolvanen Cc: Signed-off-by: Andrew Morton --- include/linux/codetag.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/codetag.h b/include/linux/codetag.h index 0ee4c21c6dbc..5f2b9a1f722c 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -36,8 +36,8 @@ union codetag_ref { struct codetag_type_desc { const char *section; size_t tag_size; - void (*module_load)(struct module *mod, - struct codetag *start, struct codetag *end); + int (*module_load)(struct module *mod, + struct codetag *start, struct codetag *end); void (*module_unload)(struct module *mod, struct codetag *start, struct codetag *end); #ifdef CONFIG_MODULES @@ -89,7 +89,7 @@ void *codetag_alloc_module_section(struct module *mod, const char *name, unsigned long align); void codetag_free_module_sections(struct module *mod); void codetag_module_replaced(struct module *mod, struct module *new_mod); -void codetag_load_module(struct module *mod); +int codetag_load_module(struct module *mod); void codetag_unload_module(struct module *mod); #else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ @@ -103,7 +103,7 @@ codetag_alloc_module_section(struct module *mod, const char *name, unsigned long align) { return NULL; } static inline void codetag_free_module_sections(struct module *mod) {} static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {} -static inline void codetag_load_module(struct module *mod) {} +static inline int codetag_load_module(struct module *mod) { return 0; } static inline void codetag_unload_module(struct module *mod) {} #endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ -- cgit v1.2.3 From 081056dc00a27bccb55ccc3c6f230a3d5fd3f7e0 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 27 May 2025 23:23:53 +0200 Subject: mm/hugetlb: unshare page tables during VMA split, not before Currently, __split_vma() triggers hugetlb page table unsharing through vm_ops->may_split(). This happens before the VMA lock and rmap locks are taken - which is too early, it allows racing VMA-locked page faults in our process and racing rmap walks from other processes to cause page tables to be shared again before we actually perform the split. Fix it by explicitly calling into the hugetlb unshare logic from __split_vma() in the same place where THP splitting also happens. At that point, both the VMA and the rmap(s) are write-locked. An annoying detail is that we can now call into the helper hugetlb_unshare_pmds() from two different locking contexts: 1. from hugetlb_split(), holding: - mmap lock (exclusively) - VMA lock - file rmap lock (exclusively) 2. hugetlb_unshare_all_pmds(), which I think is designed to be able to call us with only the mmap lock held (in shared mode), but currently only runs while holding mmap lock (exclusively) and VMA lock Backporting note: This commit fixes a racy protection that was introduced in commit b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that commit claimed to fix an issue introduced in 5.13, but it should actually also go all the way back. [jannh@google.com: v2] Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1a@google.com Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1a@google.com Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58a@google.com Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page") Signed-off-by: Jann Horn Cc: Liam Howlett Reviewed-by: Lorenzo Stoakes Reviewed-by: Oscar Salvador Cc: Lorenzo Stoakes Cc: Vlastimil Babka Cc: [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs] Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0598f36931de..42f374e828a2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -279,6 +279,7 @@ bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); void fixup_hugetlb_reservations(struct vm_area_struct *vma); +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); #else /* !CONFIG_HUGETLB_PAGE */ @@ -476,6 +477,8 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) { } +static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write -- cgit v1.2.3 From de6fdc076d39c97c0f7ed4873bfc84f58909f479 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Jun 2025 16:21:06 -0400 Subject: tracing: PM: Remove unused clock events The events clock_enable, clock_disable, and clock_set_rate were added back in 2010. In 2011 they were used by the arm architecture but removed in 2013. These events add around 7K of memory which was wasted for the last 12 years. Remove them. Link: https://lore.kernel.org/all/20250529130138.544ffec4@gandalf.local.home/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Kajetan Puchalski Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/20250605162106.1a459dad@gandalf.local.home Fixes: 74704ac6ea402 ("tracing, perf: Add more power related events") Signed-off-by: Steven Rostedt (Google) --- include/trace/events/power.h | 47 -------------------------------------------- 1 file changed, 47 deletions(-) (limited to 'include') diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 9253e83b9bb4..6c631eec23e3 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -337,53 +337,6 @@ DEFINE_EVENT(wakeup_source, wakeup_source_deactivate, TP_ARGS(name, state) ); -/* - * The clock events are used for clock enable/disable and for - * clock rate change - */ -DECLARE_EVENT_CLASS(clock, - - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), - - TP_ARGS(name, state, cpu_id), - - TP_STRUCT__entry( - __string( name, name ) - __field( u64, state ) - __field( u64, cpu_id ) - ), - - TP_fast_assign( - __assign_str(name); - __entry->state = state; - __entry->cpu_id = cpu_id; - ), - - TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), - (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) -); - -DEFINE_EVENT(clock, clock_enable, - - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), - - TP_ARGS(name, state, cpu_id) -); - -DEFINE_EVENT(clock, clock_disable, - - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), - - TP_ARGS(name, state, cpu_id) -); - -DEFINE_EVENT(clock, clock_set_rate, - - TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), - - TP_ARGS(name, state, cpu_id) -); - /* * The power domain events are used for power domains transitions */ -- cgit v1.2.3 From bab77c0d191e241d2d59a845c7ed68bfa6e1b257 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 May 2025 13:28:37 -0400 Subject: finish_automount(): don't leak MNT_LOCKED from parent to child Intention for MNT_LOCKED had always been to protect the internal mountpoints within a subtree that got copied across the userns boundary, not the mountpoint that tree got attached to - after all, it _was_ exposed before the copying. For roots of secondary copies that is enforced in attach_recursive_mnt() - MNT_LOCKED is explicitly stripped for those. For the root of primary copy we are almost always guaranteed that MNT_LOCKED won't be there, so attach_recursive_mnt() doesn't bother. Unfortunately, one call chain got overlooked - triggering e.g. NFS referral will have the submount inherit the public flags from parent; that's fine for such things as read-only, nosuid, etc., but not for MNT_LOCKED. This is particularly pointless since the mount attached by finish_automount() is usually expirable, which makes any protection granted by MNT_LOCKED null and void; just wait for a while and that mount will go away on its own. Include MNT_LOCKED into the set of flags to be ignored by do_add_mount() - it really is an internal flag. Reviewed-by: Christian Brauner Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Signed-off-by: Al Viro --- include/linux/mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 6904ad33ee7a..1a3136e53eaa 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -65,7 +65,8 @@ enum mount_flags { MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED, + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | + MNT_LOCKED, }; struct vfsmount { -- cgit v1.2.3 From 41cb08555c4164996d67c78b3bf1c658075b75f1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 9 May 2025 07:51:14 +0200 Subject: treewide, timers: Rename from_timer() to timer_container_of() Move this API to the canonical timer_*() namespace. [ tglx: Redone against pre rc1 ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/aB2X0jCKQO56WdMt@gmail.com --- include/linux/timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index f636f55c427d..0414d9e6b4fc 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -129,7 +129,7 @@ extern void timer_destroy_on_stack(struct timer_list *timer); static inline void timer_destroy_on_stack(struct timer_list *timer) { } #endif -#define from_timer(var, callback_timer, timer_fieldname) \ +#define timer_container_of(var, callback_timer, timer_fieldname) \ container_of(callback_timer, typeof(*var), timer_fieldname) /** -- cgit v1.2.3 From 1a4eabf662543c62ae1e71a26d1c8e6643c66388 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:01 +0100 Subject: mfd: adp5585: Refactor how regmap defaults are handled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only thing changing between variants is the regmap default registers. Hence, instead of having a regmap configuration for every variant (duplicating lots of fields), add a chip info type of structure with a regmap ID to identify which defaults to use and populate regmap_config at runtime given a template plus the id. Also note that between variants, the defaults can be the same which means the chip info structure can be used in more than one compatible. This will also make it simpler adding new chips with more variants. Also note that the chip info structures are deliberately not const as they will also contain lots of members that are the same between the different devices variants and so we will fill those at runtime. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-6-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 016033cd68e4..c56af8d8d76c 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -119,8 +119,19 @@ struct regmap; +enum adp5585_variant { + ADP5585_00 = 1, + ADP5585_01, + ADP5585_02, + ADP5585_03, + ADP5585_04, + ADP5585_MAX +}; + struct adp5585_dev { + struct device *dev; struct regmap *regmap; + enum adp5585_variant variant; }; #endif -- cgit v1.2.3 From 0190a72f28ee0995c546fd4fcf80ed25a0fc4b28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:02 +0100 Subject: mfd: adp5585: Add support for adp5589 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ADP5589 is a 19 I/O port expander with built-in keypad matrix decoder, programmable logic, reset generator, and PWM generator. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-7-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index c56af8d8d76c..70e58122a36a 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -117,6 +117,12 @@ #define ADP5585_BANK(n) ((n) >= 6 ? 1 : 0) #define ADP5585_BIT(n) ((n) >= 6 ? BIT((n) - 6) : BIT(n)) +/* ADP5589 */ +#define ADP5589_MAN_ID_VALUE 0x10 +#define ADP5589_GPI_STATUS_C 0x18 +#define ADP5589_INT_EN 0x4e +#define ADP5589_MAX_REG ADP5589_INT_EN + struct regmap; enum adp5585_variant { @@ -125,6 +131,9 @@ enum adp5585_variant { ADP5585_02, ADP5585_03, ADP5585_04, + ADP5589_00, + ADP5589_01, + ADP5589_02, ADP5585_MAX }; @@ -132,6 +141,7 @@ struct adp5585_dev { struct device *dev; struct regmap *regmap; enum adp5585_variant variant; + unsigned int id; }; #endif -- cgit v1.2.3 From 7077fb501b95360c7fe35553f2bdb1ccf34edd16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:03 +0100 Subject: mfd: adp5585: Add a per chip reg struture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some differences in the register map between the devices. Hence, add a register structure per device. This will be needed in following patches. On top of that adp5585_fill_regmap_config() is renamed and reworked so that the current struct adp5585_info act as template (they indeed contain all the different data between variants) which can then be complemented depending on the device (as identified by the id register). This is done like this since a lot of the data is pretty much the same between variants of the same device. Reviewed-by: Lee Jones Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-8-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 70e58122a36a..6ecb90a6276c 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -120,6 +120,7 @@ /* ADP5589 */ #define ADP5589_MAN_ID_VALUE 0x10 #define ADP5589_GPI_STATUS_C 0x18 +#define ADP5589_PIN_CONFIG_D 0x4C #define ADP5589_INT_EN 0x4e #define ADP5589_MAX_REG ADP5589_INT_EN @@ -137,9 +138,14 @@ enum adp5585_variant { ADP5585_MAX }; +struct adp5585_regs { + unsigned int ext_cfg; +}; + struct adp5585_dev { struct device *dev; struct regmap *regmap; + const struct adp5585_regs *regs; enum adp5585_variant variant; unsigned int id; }; -- cgit v1.2.3 From 9f425bf713b511b1078e0fea5a88c497e13dbb64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:04 +0100 Subject: gpio: adp5585: add support for the adp5589 expander MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support the adp5589 I/O expander which supports up to 19 pins. We need to add a chip_info based struct since accessing register "banks" and "bits" differs between devices. Also some register addresses are different. While at it move ADP558X_GPIO_MAX defines to the main header file and rename them. That information will be needed by the top level device in a following change. Acked-by: Linus Walleij Acked-by: Bartosz Golaszewski Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-9-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 6ecb90a6276c..d26f722cf31a 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -107,23 +107,23 @@ #define ADP5585_MAX_REG ADP5585_INT_EN -/* - * Bank 0 covers pins "GPIO 1/R0" to "GPIO 6/R5", numbered 0 to 5 by the - * driver, and bank 1 covers pins "GPIO 7/C0" to "GPIO 11/C4", numbered 6 to - * 10. Some variants of the ADP5585 don't support "GPIO 6/R5". As the driver - * uses identical GPIO numbering for all variants to avoid confusion, GPIO 5 is - * marked as reserved in the device tree for variants that don't support it. - */ -#define ADP5585_BANK(n) ((n) >= 6 ? 1 : 0) -#define ADP5585_BIT(n) ((n) >= 6 ? BIT((n) - 6) : BIT(n)) +#define ADP5585_PIN_MAX 11 /* ADP5589 */ #define ADP5589_MAN_ID_VALUE 0x10 +#define ADP5589_GPI_STATUS_A 0x16 #define ADP5589_GPI_STATUS_C 0x18 +#define ADP5589_RPULL_CONFIG_A 0x19 +#define ADP5589_DEBOUNCE_DIS_A 0x27 +#define ADP5589_GPO_DATA_OUT_A 0x2a +#define ADP5589_GPO_OUT_MODE_A 0x2d +#define ADP5589_GPIO_DIRECTION_A 0x30 #define ADP5589_PIN_CONFIG_D 0x4C #define ADP5589_INT_EN 0x4e #define ADP5589_MAX_REG ADP5589_INT_EN +#define ADP5589_PIN_MAX 19 + struct regmap; enum adp5585_variant { -- cgit v1.2.3 From 75024f97e82e63d02b0743500efb1e264a1c2dd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:05 +0100 Subject: pwm: adp5585: add support for adp5589 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the adp5589 I/O expander. From a PWM point of view it is pretty similar to adp5585. Main difference is the address of registers meaningful for configuring the PWM. Acked-by: Uwe Kleine-König Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-10-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index d26f722cf31a..77f7c74f084d 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -118,6 +118,9 @@ #define ADP5589_GPO_DATA_OUT_A 0x2a #define ADP5589_GPO_OUT_MODE_A 0x2d #define ADP5589_GPIO_DIRECTION_A 0x30 +#define ADP5589_PWM_OFFT_LOW 0x3e +#define ADP5589_PWM_ONT_LOW 0x40 +#define ADP5589_PWM_CFG 0x42 #define ADP5589_PIN_CONFIG_D 0x4C #define ADP5589_INT_EN 0x4e #define ADP5589_MAX_REG ADP5589_INT_EN -- cgit v1.2.3 From 47a1f759b776ec9287f675f5d4fbf60b94cc566d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:07 +0100 Subject: mfd: adp5585: Add support for event handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These devices are capable of generate FIFO based events based on KEY or GPI presses. Add support for handling these events. This is in preparation of adding full support for keymap and gpis based events. Reviewed-by: Lee Jones Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-12-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 77f7c74f084d..43a33a3d3f5a 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -10,13 +10,20 @@ #define __MFD_ADP5585_H_ #include +#include #define ADP5585_ID 0x00 #define ADP5585_MAN_ID_VALUE 0x20 #define ADP5585_MAN_ID_MASK GENMASK(7, 4) +#define ADP5585_REV_ID_MASK GENMASK(3, 0) #define ADP5585_INT_STATUS 0x01 +#define ADP5585_OVRFLOW_INT BIT(2) +#define ADP5585_EVENT_INT BIT(0) #define ADP5585_STATUS 0x02 +#define ADP5585_EC_MASK GENMASK(4, 0) #define ADP5585_FIFO_1 0x03 +#define ADP5585_KEV_EV_PRESS_MASK BIT(7) +#define ADP5585_KEY_EVENT_MASK GENMASK(6, 0) #define ADP5585_FIFO_2 0x04 #define ADP5585_FIFO_3 0x05 #define ADP5585_FIFO_4 0x06 @@ -32,6 +39,7 @@ #define ADP5585_FIFO_14 0x10 #define ADP5585_FIFO_15 0x11 #define ADP5585_FIFO_16 0x12 +#define ADP5585_EV_MAX (ADP5585_FIFO_16 - ADP5585_FIFO_1 + 1) #define ADP5585_GPI_INT_STAT_A 0x13 #define ADP5585_GPI_INT_STAT_B 0x14 #define ADP5585_GPI_STATUS_A 0x15 @@ -104,6 +112,8 @@ #define ADP5585_INT_CFG BIT(1) #define ADP5585_RST_CFG BIT(0) #define ADP5585_INT_EN 0x3c +#define ADP5585_OVRFLOW_IEN BIT(2) +#define ADP5585_EVENT_IEN BIT(0) #define ADP5585_MAX_REG ADP5585_INT_EN @@ -121,7 +131,9 @@ #define ADP5589_PWM_OFFT_LOW 0x3e #define ADP5589_PWM_ONT_LOW 0x40 #define ADP5589_PWM_CFG 0x42 +#define ADP5589_POLL_PTIME_CFG 0x48 #define ADP5589_PIN_CONFIG_D 0x4C +#define ADP5589_GENERAL_CFG 0x4d #define ADP5589_INT_EN 0x4e #define ADP5589_MAX_REG ADP5589_INT_EN @@ -142,15 +154,21 @@ enum adp5585_variant { }; struct adp5585_regs { + unsigned int gen_cfg; unsigned int ext_cfg; + unsigned int int_en; + unsigned int poll_ptime_cfg; }; struct adp5585_dev { struct device *dev; struct regmap *regmap; const struct adp5585_regs *regs; + struct blocking_notifier_head event_notifier; enum adp5585_variant variant; unsigned int id; + int irq; + unsigned int ev_poll_time; }; #endif -- cgit v1.2.3 From 333812da70d5f71bf5e176f6d55a5f716301b5fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:08 +0100 Subject: mfd: adp5585: Support reset and unlock events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ADP558x family of devices can be programmed to respond to some especial events, In case of the unlock events, one can lock the keypad and use KEYS or GPIs events to unlock it. For the reset events, one can again use a combinations of GPIs/KEYs in order to generate an event that will trigger the device to generate an output reset pulse. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-13-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 43a33a3d3f5a..db483ef9693a 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -68,6 +68,7 @@ #define ADP5585_GPIO_DIRECTION_A 0x27 #define ADP5585_GPIO_DIRECTION_B 0x28 #define ADP5585_RESET1_EVENT_A 0x29 +#define ADP5585_RESET_EV_PRESS BIT(7) #define ADP5585_RESET1_EVENT_B 0x2a #define ADP5585_RESET1_EVENT_C 0x2b #define ADP5585_RESET2_EVENT_A 0x2c @@ -118,6 +119,13 @@ #define ADP5585_MAX_REG ADP5585_INT_EN #define ADP5585_PIN_MAX 11 +#define ADP5585_MAX_UNLOCK_TIME_SEC 7 +#define ADP5585_KEY_EVENT_START 1 +#define ADP5585_KEY_EVENT_END 25 +#define ADP5585_GPI_EVENT_START 37 +#define ADP5585_GPI_EVENT_END 47 +#define ADP5585_ROW5_KEY_EVENT_START 1 +#define ADP5585_ROW5_KEY_EVENT_END 30 /* ADP5589 */ #define ADP5589_MAN_ID_VALUE 0x10 @@ -128,6 +136,20 @@ #define ADP5589_GPO_DATA_OUT_A 0x2a #define ADP5589_GPO_OUT_MODE_A 0x2d #define ADP5589_GPIO_DIRECTION_A 0x30 +#define ADP5589_UNLOCK1 0x33 +#define ADP5589_UNLOCK_EV_PRESS BIT(7) +#define ADP5589_UNLOCK_TIMERS 0x36 +#define ADP5589_UNLOCK_TIMER GENMASK(2, 0) +#define ADP5589_LOCK_CFG 0x37 +#define ADP5589_LOCK_EN BIT(0) +#define ADP5589_RESET1_EVENT_A 0x38 +#define ADP5589_RESET2_EVENT_A 0x3B +#define ADP5589_RESET_CFG 0x3D +#define ADP5585_RESET2_POL BIT(7) +#define ADP5585_RESET1_POL BIT(6) +#define ADP5585_RST_PASSTHRU_EN BIT(5) +#define ADP5585_RESET_TRIG_TIME GENMASK(4, 2) +#define ADP5585_PULSE_WIDTH GENMASK(1, 0) #define ADP5589_PWM_OFFT_LOW 0x3e #define ADP5589_PWM_ONT_LOW 0x40 #define ADP5589_PWM_CFG 0x42 @@ -138,6 +160,11 @@ #define ADP5589_MAX_REG ADP5589_INT_EN #define ADP5589_PIN_MAX 19 +#define ADP5589_KEY_EVENT_START 1 +#define ADP5589_KEY_EVENT_END 88 +#define ADP5589_GPI_EVENT_START 97 +#define ADP5589_GPI_EVENT_END 115 +#define ADP5589_UNLOCK_WILDCARD 127 struct regmap; @@ -158,6 +185,9 @@ struct adp5585_regs { unsigned int ext_cfg; unsigned int int_en; unsigned int poll_ptime_cfg; + unsigned int reset_cfg; + unsigned int reset1_event_a; + unsigned int reset2_event_a; }; struct adp5585_dev { @@ -167,8 +197,18 @@ struct adp5585_dev { struct blocking_notifier_head event_notifier; enum adp5585_variant variant; unsigned int id; + bool has_unlock; + bool has_pin6; int irq; unsigned int ev_poll_time; + unsigned int unlock_time; + unsigned int unlock_keys[2]; + unsigned int nkeys_unlock; + unsigned int reset1_keys[3]; + unsigned int nkeys_reset1; + unsigned int reset2_keys[2]; + unsigned int nkeys_reset2; + u8 reset_cfg; }; #endif -- cgit v1.2.3 From bd113a13e1fa51789f55987369b80e1d8bc19389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:09 +0100 Subject: mfd: adp5585: Add support for input devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ADP558x family supports a built in keypad matrix decoder which can be added as an Input device. In order to both support the Input and the GPIO device, we need to create a bitmap of the supported pins and track their usage since they can either be used as GPIOs (GPIs) or as part of the keymap. We also need to mark special pins busy in case some features are being used (ex: pwm or reset events). Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-14-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index db483ef9693a..41c5d2e1cc7c 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -126,6 +126,10 @@ #define ADP5585_GPI_EVENT_END 47 #define ADP5585_ROW5_KEY_EVENT_START 1 #define ADP5585_ROW5_KEY_EVENT_END 30 +#define ADP5585_PWM_OUT 3 +#define ADP5585_RESET1_OUT 4 +#define ADP5585_RESET2_OUT 9 +#define ADP5585_ROW5 5 /* ADP5589 */ #define ADP5589_MAN_ID_VALUE 0x10 @@ -154,6 +158,7 @@ #define ADP5589_PWM_ONT_LOW 0x40 #define ADP5589_PWM_CFG 0x42 #define ADP5589_POLL_PTIME_CFG 0x48 +#define ADP5589_PIN_CONFIG_A 0x49 #define ADP5589_PIN_CONFIG_D 0x4C #define ADP5589_GENERAL_CFG 0x4d #define ADP5589_INT_EN 0x4e @@ -165,6 +170,7 @@ #define ADP5589_GPI_EVENT_START 97 #define ADP5589_GPI_EVENT_END 115 #define ADP5589_UNLOCK_WILDCARD 127 +#define ADP5589_RESET2_OUT 12 struct regmap; @@ -188,6 +194,7 @@ struct adp5585_regs { unsigned int reset_cfg; unsigned int reset1_event_a; unsigned int reset2_event_a; + unsigned int pin_cfg_a; }; struct adp5585_dev { @@ -195,6 +202,9 @@ struct adp5585_dev { struct regmap *regmap; const struct adp5585_regs *regs; struct blocking_notifier_head event_notifier; + unsigned long *pin_usage; + unsigned int n_pins; + unsigned int reset2_out; enum adp5585_variant variant; unsigned int id; bool has_unlock; -- cgit v1.2.3 From 988b28a83b658137e58123f4dafc3a1588e1cb2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 1 Jul 2025 15:32:10 +0100 Subject: gpio: adp5585: support gpi events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for adding GPIs to the event FIFO. This is done by adding irq_chip support. Like this, one can use the input gpio_keys driver as a "frontend" device and input handler. As part of this change, we now implement .request() and .free() as we can't blindly consume all available pins as GPIOs (example: some pins can be used for forming a keymap matrix). Also note that the number of pins can now be obtained from the parent, top level device. Hence the 'max_gpio' variable can be removed. Reviewed-by: Linus Walleij Acked-by: Bartosz Golaszewski Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250701-dev-adp5589-fw-v7-15-b1fcfe9e9826@analog.com Signed-off-by: Lee Jones --- include/linux/mfd/adp5585.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h index 41c5d2e1cc7c..5237da6b4a9f 100644 --- a/include/linux/mfd/adp5585.h +++ b/include/linux/mfd/adp5585.h @@ -136,6 +136,8 @@ #define ADP5589_GPI_STATUS_A 0x16 #define ADP5589_GPI_STATUS_C 0x18 #define ADP5589_RPULL_CONFIG_A 0x19 +#define ADP5589_GPI_INT_LEVEL_A 0x1e +#define ADP5589_GPI_EVENT_EN_A 0x21 #define ADP5589_DEBOUNCE_DIS_A 0x27 #define ADP5589_GPO_DATA_OUT_A 0x2a #define ADP5589_GPO_OUT_MODE_A 0x2d -- cgit v1.2.3