From 594c11d0e1d445f580898a2b8c850f2e3f099368 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 27 Jan 2026 07:22:35 -0600 Subject: ipmi: Fix use-after-free and list corruption on sender error The analysis from Breno: When the SMI sender returns an error, smi_work() delivers an error response but then jumps back to restart without cleaning up properly: 1. intf->curr_msg is not cleared, so no new message is pulled 2. newmsg still points to the message, causing sender() to be called again with the same message 3. If sender() fails again, deliver_err_response() is called with the same recv_msg that was already queued for delivery This causes list_add corruption ("list_add double add") because the recv_msg is added to the user_msgs list twice. Subsequently, the corrupted list leads to use-after-free when the memory is freed and reused, and eventually a NULL pointer dereference when accessing recv_msg->done. The buggy sequence: sender() fails -> deliver_err_response(recv_msg) // recv_msg queued for delivery -> goto restart // curr_msg not cleared! sender() fails again (same message!) -> deliver_err_response(recv_msg) // tries to queue same recv_msg -> LIST CORRUPTION Fix this by freeing the message and setting it to NULL on a send error. Also, always free the newmsg on a send error, otherwise it will leak. Reported-by: Breno Leitao Closes: https://lore.kernel.org/lkml/20260127-ipmi-v1-0-ba5cc90f516f@debian.org/ Fixes: 9cf93a8fa9513 ("ipmi: Allow an SMI sender to return an error") Cc: stable@vger.kernel.org # 4.18 Reviewed-by: Breno Leitao Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 3f48fc6ab596..a590a67294e2 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -4852,8 +4852,15 @@ restart: if (newmsg->recv_msg) deliver_err_response(intf, newmsg->recv_msg, cc); - else - ipmi_free_smi_msg(newmsg); + if (!run_to_completion) + spin_lock_irqsave(&intf->xmit_msgs_lock, + flags); + intf->curr_msg = NULL; + if (!run_to_completion) + spin_unlock_irqrestore(&intf->xmit_msgs_lock, + flags); + ipmi_free_smi_msg(newmsg); + newmsg = NULL; goto restart; } } -- cgit v1.2.3 From 1d90e6c1a56f6ab83e5c9d30ded19e7ac8155713 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 27 Jan 2026 07:35:02 -0600 Subject: ipmi: Consolidate the run to completion checking for xmit msgs lock It made things hard to read, move the check to a function. Signed-off-by: Corey Minyard Reviewed-by: Breno Leitao --- drivers/char/ipmi/ipmi_msghandler.c | 42 +++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index a590a67294e2..a042b1596933 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -602,6 +602,22 @@ static int __ipmi_bmc_register(struct ipmi_smi *intf, static int __scan_channels(struct ipmi_smi *intf, struct ipmi_device_id *id, bool rescan); +static void ipmi_lock_xmit_msgs(struct ipmi_smi *intf, int run_to_completion, + unsigned long *flags) +{ + if (run_to_completion) + return; + spin_lock_irqsave(&intf->xmit_msgs_lock, *flags); +} + +static void ipmi_unlock_xmit_msgs(struct ipmi_smi *intf, int run_to_completion, + unsigned long *flags) +{ + if (run_to_completion) + return; + spin_unlock_irqrestore(&intf->xmit_msgs_lock, *flags); +} + static void free_ipmi_user(struct kref *ref) { struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount); @@ -1878,11 +1894,9 @@ static void smi_send(struct ipmi_smi *intf, int run_to_completion = READ_ONCE(intf->run_to_completion); unsigned long flags = 0; - if (!run_to_completion) - spin_lock_irqsave(&intf->xmit_msgs_lock, flags); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); smi_msg = smi_add_send_msg(intf, smi_msg, priority); - if (!run_to_completion) - spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); if (smi_msg) handlers->sender(intf->send_info, smi_msg); @@ -4826,8 +4840,7 @@ static void smi_work(struct work_struct *t) * message delivery. */ restart: - if (!run_to_completion) - spin_lock_irqsave(&intf->xmit_msgs_lock, flags); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); if (intf->curr_msg == NULL && !intf->in_shutdown) { struct list_head *entry = NULL; @@ -4843,8 +4856,7 @@ restart: intf->curr_msg = newmsg; } } - if (!run_to_completion) - spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); if (newmsg) { cc = intf->handlers->sender(intf->send_info, newmsg); @@ -4852,13 +4864,9 @@ restart: if (newmsg->recv_msg) deliver_err_response(intf, newmsg->recv_msg, cc); - if (!run_to_completion) - spin_lock_irqsave(&intf->xmit_msgs_lock, - flags); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); intf->curr_msg = NULL; - if (!run_to_completion) - spin_unlock_irqrestore(&intf->xmit_msgs_lock, - flags); + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); ipmi_free_smi_msg(newmsg); newmsg = NULL; goto restart; @@ -4928,16 +4936,14 @@ void ipmi_smi_msg_received(struct ipmi_smi *intf, spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock, flags); - if (!run_to_completion) - spin_lock_irqsave(&intf->xmit_msgs_lock, flags); + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); /* * We can get an asynchronous event or receive message in addition * to commands we send. */ if (msg == intf->curr_msg) intf->curr_msg = NULL; - if (!run_to_completion) - spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags); + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); if (run_to_completion) smi_work(&intf->smi_work); -- cgit v1.2.3 From 9f235ccecd03c436cb1683eac16b12f119e54aa9 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Tue, 13 Jan 2026 17:41:34 +0800 Subject: ipmi: ipmb: initialise event handler read bytes IPMB doesn't use i2c reads, but the handler needs to set a value. Otherwise an i2c read will return an uninitialised value from the bus driver. Fixes: 63c4eb347164 ("ipmi:ipmb: Add initial support for IPMI over IPMB") Signed-off-by: Matt Johnston Message-ID: <20260113-ipmb-read-init-v1-1-a9cbce7b94e3@codeconstruct.com.au> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ipmb.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/char/ipmi/ipmi_ipmb.c b/drivers/char/ipmi/ipmi_ipmb.c index 3a51e58b2487..28818952a7a4 100644 --- a/drivers/char/ipmi/ipmi_ipmb.c +++ b/drivers/char/ipmi/ipmi_ipmb.c @@ -202,11 +202,16 @@ static int ipmi_ipmb_slave_cb(struct i2c_client *client, break; case I2C_SLAVE_READ_REQUESTED: + *val = 0xff; + ipmi_ipmb_check_msg_done(iidev); + break; + case I2C_SLAVE_STOP: ipmi_ipmb_check_msg_done(iidev); break; case I2C_SLAVE_READ_PROCESSED: + *val = 0xff; break; } -- cgit v1.2.3 From 6b157b408d0c7d125e4d7c62e11e7d9376a5d150 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 16 Jan 2026 17:22:01 -0600 Subject: ipmi:ls2k: Make ipmi_ls2k_platform_driver static No need for it to be global. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202601170753.3zDBerGP-lkp@intel.com/ Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_ls2k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/char/ipmi/ipmi_si_ls2k.c b/drivers/char/ipmi/ipmi_si_ls2k.c index 45442c257efd..4c1da80f256c 100644 --- a/drivers/char/ipmi/ipmi_si_ls2k.c +++ b/drivers/char/ipmi/ipmi_si_ls2k.c @@ -168,7 +168,7 @@ static void ipmi_ls2k_remove(struct platform_device *pdev) ipmi_si_remove_by_dev(&pdev->dev); } -struct platform_driver ipmi_ls2k_platform_driver = { +static struct platform_driver ipmi_ls2k_platform_driver = { .driver = { .name = "ls2k-ipmi-si", }, -- cgit v1.2.3 From 211ecfaaef186ee5230a77d054cdec7fbfc6724a Mon Sep 17 00:00:00 2001 From: Brad Spengler Date: Wed, 7 Jan 2026 12:12:36 -0500 Subject: drm/vmwgfx: Fix invalid kref_put callback in vmw_bo_dirty_release The kref_put() call uses (void *)kvfree as the release callback, which is incorrect. kref_put() expects a function with signature void (*release)(struct kref *), but kvfree has signature void (*)(const void *). Calling through an incompatible function pointer is undefined behavior. The code only worked by accident because ref_count is the first member of vmw_bo_dirty, making the kref pointer equal to the struct pointer. Fix this by adding a proper release callback that uses container_of() to retrieve the containing structure before freeing. Fixes: c1962742ffff ("drm/vmwgfx: Use kref in vmw_bo_dirty") Signed-off-by: Brad Spengler Signed-off-by: Zack Rusin Cc: Ian Forbes Link: https://patch.msgid.link/20260107171236.3573118-1-zack.rusin@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index fd4e76486f2d..45561bc1c9ef 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -260,6 +260,13 @@ out_no_dirty: return ret; } +static void vmw_bo_dirty_free(struct kref *kref) +{ + struct vmw_bo_dirty *dirty = container_of(kref, struct vmw_bo_dirty, ref_count); + + kvfree(dirty); +} + /** * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object * @vbo: The buffer object @@ -274,7 +281,7 @@ void vmw_bo_dirty_release(struct vmw_bo *vbo) { struct vmw_bo_dirty *dirty = vbo->dirty; - if (dirty && kref_put(&dirty->ref_count, (void *)kvfree)) + if (dirty && kref_put(&dirty->ref_count, vmw_bo_dirty_free)) vbo->dirty = NULL; } -- cgit v1.2.3 From 922f9dec5d19df4cfbb7070275e5c131d10c80f3 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Fri, 9 Jan 2026 09:51:39 -0600 Subject: drm/vmwgfx: Set a unique ID for each submitted command buffer These IDs are logged by the Hypervisor when debug logging is enabled. Having the IDs in the log makes it much easier to see when command buffers start and finish. They can also be used by logging/tracing in the Guest to help correlate between Guest and Hypervisor logs. Signed-off-by: Ian Forbes Signed-off-by: Zack Rusin Link: https://patch.msgid.link/20260109155139.3259493-1-ian.forbes@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c index 94e8982f5616..1ee37690b940 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c @@ -105,6 +105,7 @@ struct vmw_cmdbuf_context { * @handle: DMA address handle for the command buffer space if @using_mob is * false. Immutable. * @size: The size of the command buffer space. Immutable. + * @id: Monotonically increasing ID of the last cmdbuf submitted. * @num_contexts: Number of contexts actually enabled. */ struct vmw_cmdbuf_man { @@ -132,6 +133,7 @@ struct vmw_cmdbuf_man { bool has_pool; dma_addr_t handle; size_t size; + u64 id; u32 num_contexts; }; @@ -303,6 +305,8 @@ static int vmw_cmdbuf_header_submit(struct vmw_cmdbuf_header *header) struct vmw_cmdbuf_man *man = header->man; u32 val; + header->cb_header->id = man->id++; + val = upper_32_bits(header->handle); vmw_write(man->dev_priv, SVGA_REG_COMMAND_HIGH, val); -- cgit v1.2.3 From 5023ca80f9589295cb60735016e39fc5cc714243 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Tue, 13 Jan 2026 11:53:57 -0600 Subject: drm/vmwgfx: Return the correct value in vmw_translate_ptr functions Before the referenced fixes these functions used a lookup function that returned a pointer. This was changed to another lookup function that returned an error code with the pointer becoming an out parameter. The error path when the lookup failed was not changed to reflect this change and the code continued to return the PTR_ERR of the now uninitialized pointer. This could cause the vmw_translate_ptr functions to return success when they actually failed causing further uninitialized and OOB accesses. Reported-by: Kuzey Arda Bulut Fixes: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Ian Forbes Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patch.msgid.link/20260113175357.129285-1-ian.forbes@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 3057f8baa7d2..e1f18020170a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1143,7 +1143,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); if (ret != 0) { drm_dbg(&dev_priv->drm, "Could not find or use MOB buffer.\n"); - return PTR_ERR(vmw_bo); + return ret; } vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB); ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo); @@ -1199,7 +1199,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); if (ret != 0) { drm_dbg(&dev_priv->drm, "Could not find or use GMR region.\n"); - return PTR_ERR(vmw_bo); + return ret; } vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM); -- cgit v1.2.3 From 52c9ee202edd21d0599ac3b5a6fe1da2a2f053e5 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 6 Feb 2026 09:59:32 -0600 Subject: ipmi:si: Handle waiting messages when BMC failure detected If a BMC failure is detected, the current message is returned with an error. However, if there was a waiting message, it would not be handled. Add a check for the waiting message after handling the current message. Suggested-by: Guenter Roeck Reported-by: Rafael J. Wysocki Closes: https://lore.kernel.org/linux-acpi/CAK8fFZ58fidGUCHi5WFX0uoTPzveUUDzT=k=AAm4yWo3bAuCFg@mail.gmail.com/ Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 5459ffdde8dc..ff159b1162b9 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -809,6 +809,12 @@ restart: */ return_hosed_msg(smi_info, IPMI_BUS_ERR); } + if (smi_info->waiting_msg != NULL) { + /* Also handle if there was a message waiting. */ + smi_info->curr_msg = smi_info->waiting_msg; + smi_info->waiting_msg = NULL; + return_hosed_msg(smi_info, IPMI_BUS_ERR); + } smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_HOSED); goto out; } -- cgit v1.2.3 From c3bb3295637cc9bf514f690941ca9a385bf30113 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 6 Feb 2026 10:33:52 -0600 Subject: ipmi:si: Use a long timeout when the BMC is misbehaving If the driver goes into HOSED state, don't reset the timeout to the short timeout in the timeout handler. Reported-by: Igor Raits Closes: https://lore.kernel.org/linux-acpi/CAK8fFZ58fidGUCHi5WFX0uoTPzveUUDzT=k=AAm4yWo3bAuCFg@mail.gmail.com/ Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index ff159b1162b9..0049e3792ba1 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -1119,7 +1119,9 @@ static void smi_timeout(struct timer_list *t) * SI_USEC_PER_JIFFY); smi_result = smi_event_handler(smi_info, time_diff); - if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) { + if (smi_info->si_state == SI_HOSED) { + timeout = jiffies + SI_TIMEOUT_HOSED; + } else if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) { /* Running with interrupts, only do long timeouts. */ timeout = jiffies + SI_TIMEOUT_JIFFIES; smi_inc_stat(smi_info, long_timeouts); -- cgit v1.2.3 From fef0e649f8b42bdffe4a916dd46e1b1e9ad2f207 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Fri, 30 Jan 2026 00:21:19 +0800 Subject: drm/logicvc: Fix device node reference leak in logicvc_drm_config_parse() The logicvc_drm_config_parse() function calls of_get_child_by_name() to find the "layers" node but fails to release the reference, leading to a device node reference leak. Fix this by using the __free(device_node) cleanup attribute to automatic release the reference when the variable goes out of scope. Fixes: efeeaefe9be5 ("drm: Add support for the LogiCVC display controller") Signed-off-by: Felix Gu Reviewed-by: Luca Ceresoli Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20260130-logicvc_drm-v1-1-04366463750c@gmail.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/logicvc/logicvc_drm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/logicvc/logicvc_drm.c b/drivers/gpu/drm/logicvc/logicvc_drm.c index 204b0fee55d0..bbebf4fc7f51 100644 --- a/drivers/gpu/drm/logicvc/logicvc_drm.c +++ b/drivers/gpu/drm/logicvc/logicvc_drm.c @@ -92,7 +92,6 @@ static int logicvc_drm_config_parse(struct logicvc_drm *logicvc) struct device *dev = drm_dev->dev; struct device_node *of_node = dev->of_node; struct logicvc_drm_config *config = &logicvc->config; - struct device_node *layers_node; int ret; logicvc_of_property_parse_bool(of_node, LOGICVC_OF_PROPERTY_DITHERING, @@ -128,7 +127,8 @@ static int logicvc_drm_config_parse(struct logicvc_drm *logicvc) if (ret) return ret; - layers_node = of_get_child_by_name(of_node, "layers"); + struct device_node *layers_node __free(device_node) = + of_get_child_by_name(of_node, "layers"); if (!layers_node) { drm_err(drm_dev, "Missing non-optional layers node\n"); return -EINVAL; -- cgit v1.2.3 From 0b87d51690dd5131cbe9fbd23746b037aab89815 Mon Sep 17 00:00:00 2001 From: Franz Schnyder Date: Fri, 6 Feb 2026 13:37:36 +0100 Subject: drm/bridge: ti-sn65dsi86: Enable HPD polling if IRQ is not used Fallback to polling to detect hotplug events on systems without interrupts. On systems where the interrupt line of the bridge is not connected, the bridge cannot notify hotplug events. Only add the DRM_BRIDGE_OP_HPD flag if an interrupt has been registered otherwise remain in polling mode. Fixes: 55e8ff842051 ("drm/bridge: ti-sn65dsi86: Add HPD for DisplayPort connector type") Cc: stable@vger.kernel.org # 6.16: 9133bc3f0564: drm/bridge: ti-sn65dsi86: Add Signed-off-by: Franz Schnyder Reviewed-by: Douglas Anderson [dianders: Adjusted Fixes/stable line based on discussion] Signed-off-by: Douglas Anderson Link: https://patch.msgid.link/20260206123758.374555-1-fra.schnyder@gmail.com --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 276d05d25ad8..98d64ad791d0 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -1415,6 +1415,7 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, { struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent); struct device_node *np = pdata->dev->of_node; + const struct i2c_client *client = to_i2c_client(pdata->dev); int ret; pdata->next_bridge = devm_drm_of_get_bridge(&adev->dev, np, 1, 0); @@ -1433,8 +1434,9 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, ? DRM_MODE_CONNECTOR_DisplayPort : DRM_MODE_CONNECTOR_eDP; if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) { - pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT | - DRM_BRIDGE_OP_HPD; + pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT; + if (client->irq) + pdata->bridge.ops |= DRM_BRIDGE_OP_HPD; /* * If comms were already enabled they would have been enabled * with the wrong value of HPD_DISABLE. Update it now. Comms -- cgit v1.2.3 From 9478c166c46934160135e197b049b5a05753f2ad Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Nov 2024 11:46:01 +1000 Subject: nouveau/gsp: drop WARN_ON in ACPI probes These WARN_ONs seem to trigger a lot, and we don't seem to have a plan to fix them, so just drop them, as they are most likely harmless. Cc: stable@vger.kernel.org Fixes: 176fdcbddfd2 ("drm/nouveau/gsp/r535: add support for booting GSP-RM") Signed-off-by: Dave Airlie Link: https://patch.msgid.link/20241121014601.229391-1-airlied@gmail.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c index 7fb13434c051..a575a8dbf727 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c @@ -737,8 +737,8 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) if (!obj) goto done; - if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || - WARN_ON(obj->buffer.length != 4)) + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length != 4) goto done; caps->status = 0; @@ -773,8 +773,8 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) if (!obj) goto done; - if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || - WARN_ON(obj->buffer.length != 4)) + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length != 4) goto done; jt->status = 0; @@ -861,8 +861,8 @@ r535_gsp_acpi_dod(acpi_handle handle, DOD_METHOD_DATA *dod) _DOD = output.pointer; - if (WARN_ON(_DOD->type != ACPI_TYPE_PACKAGE) || - WARN_ON(_DOD->package.count > ARRAY_SIZE(dod->acpiIdList))) + if (_DOD->type != ACPI_TYPE_PACKAGE || + _DOD->package.count > ARRAY_SIZE(dod->acpiIdList)) return; for (int i = 0; i < _DOD->package.count; i++) { -- cgit v1.2.3 From 46120745bb4e7e1f09959624716b4c5d6e2c2e9e Mon Sep 17 00:00:00 2001 From: Ethan Tidmore Date: Sun, 15 Feb 2026 22:04:38 -0600 Subject: drm/tiny: sharp-memory: fix pointer error dereference The function devm_drm_dev_alloc() returns a pointer error upon failure not NULL. Change null check to pointer error check. Detected by Smatch: drivers/gpu/drm/tiny/sharp-memory.c:549 sharp_memory_probe() error: 'smd' dereferencing possible ERR_PTR() Fixes: b8f9f21716fec ("drm/tiny: Add driver for Sharp Memory LCD") Signed-off-by: Ethan Tidmore Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260216040438.43702-1-ethantidmore06@gmail.com --- drivers/gpu/drm/tiny/sharp-memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/tiny/sharp-memory.c b/drivers/gpu/drm/tiny/sharp-memory.c index 64272cd0f6e2..cbf69460ebf3 100644 --- a/drivers/gpu/drm/tiny/sharp-memory.c +++ b/drivers/gpu/drm/tiny/sharp-memory.c @@ -541,8 +541,8 @@ static int sharp_memory_probe(struct spi_device *spi) smd = devm_drm_dev_alloc(dev, &sharp_memory_drm_driver, struct sharp_memory_device, drm); - if (!smd) - return -ENOMEM; + if (IS_ERR(smd)) + return PTR_ERR(smd); spi_set_drvdata(spi, smd); -- cgit v1.2.3 From 1072020685f4b81f6efad3b412cdae0bd62bb043 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 12 Feb 2026 12:41:25 +0100 Subject: irqchip/sifive-plic: Fix frozen interrupt due to affinity setting PLIC ignores interrupt completion message for disabled interrupt, explained by the specification: The PLIC signals it has completed executing an interrupt handler by writing the interrupt ID it received from the claim to the claim/complete register. The PLIC does not check whether the completion ID is the same as the last claim ID for that target. If the completion ID does not match an interrupt source that is currently enabled for the target, the completion is silently ignored. This caused problems in the past, because an interrupt can be disabled while still being handled and plic_irq_eoi() had no effect. That was fixed by checking if the interrupt is disabled, and if so enable it, before sending the completion message. That check is done with irqd_irq_disabled(). However, that is not sufficient because the enable bit for the handling hart can be zero despite irqd_irq_disabled(d) being false. This can happen when affinity setting is changed while a hart is still handling the interrupt. This problem is easily reproducible by dumping a large file to uart (which generates lots of interrupts) and at the same time keep changing the uart interrupt's affinity setting. The uart port becomes frozen almost instantaneously. Fix this by checking PLIC's enable bit instead of irqd_irq_disabled(). Fixes: cc9f04f9a84f ("irqchip/sifive-plic: Implement irq_set_affinity() for SMP host") Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260212114125.3148067-1-namcao@linutronix.de --- drivers/irqchip/irq-sifive-plic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 60fd8f91762b..70058871d2fb 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -172,8 +172,13 @@ static void plic_irq_disable(struct irq_data *d) static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); + u32 __iomem *reg; + bool enabled; + + reg = handler->enable_base + (d->hwirq / 32) * sizeof(u32); + enabled = readl(reg) & BIT(d->hwirq % 32); - if (unlikely(irqd_irq_disabled(d))) { + if (unlikely(!enabled)) { plic_toggle(handler, d->hwirq, 1); writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); plic_toggle(handler, d->hwirq, 0); -- cgit v1.2.3 From ce9e40a9a5e5cff0b1b0d2fa582b3d71a8ce68e8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 6 Feb 2026 15:48:16 +0000 Subject: irqchip/gic-v3-its: Limit number of per-device MSIs to the range the ITS supports The ITS driver blindly assumes that EventIDs are in abundant supply, to the point where it never checks how many the hardware actually supports. It turns out that some pretty esoteric integrations make it so that only a few bits are available, all the way down to a single bit. Enforce the advertised limitation at the point of allocating the device structure, and hope that the endpoint driver can deal with such limitation. Fixes: 84a6a2e7fc18d ("irqchip: GICv3: ITS: device allocation and configuration") Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Reviewed-by: Robin Murphy Reviewed-by: Zenghui Yu Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260206154816.3582887-1-maz@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 2988def30972..a51e8e6a8181 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3475,6 +3475,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, int lpi_base; int nr_lpis; int nr_ites; + int id_bits; int sz; if (!its_alloc_device_table(its, dev_id)) @@ -3486,7 +3487,10 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, /* * Even if the device wants a single LPI, the ITT must be * sized as a power of two (and you need at least one bit...). + * Also honor the ITS's own EID limit. */ + id_bits = FIELD_GET(GITS_TYPER_IDBITS, its->typer) + 1; + nvecs = min_t(unsigned int, nvecs, BIT(id_bits)); nr_ites = max(2, nvecs); sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1); sz = max(sz, ITS_ITT_ALIGN); -- cgit v1.2.3 From f0617176be5e497b67b3c87bac35b26ebccac499 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 16 Feb 2026 12:04:50 +0100 Subject: irqchip/mmp: Make icu_irq_chip variable static const File-scope 'icu_irq_chip' is not used outside of this unit and is not modified anywhere, so make it static const to silence sparse warning: irq-mmp.c:139:17: warning: symbol 'icu_irq_chip' was not declared. Should it be static? Signed-off-by: Krzysztof Kozlowski Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260216110449.160277-2-krzysztof.kozlowski@oss.qualcomm.com --- drivers/irqchip/irq-mmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 09e640430208..8e210cb451be 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -136,7 +136,7 @@ static void icu_unmask_irq(struct irq_data *d) } } -struct irq_chip icu_irq_chip = { +static const struct irq_chip icu_irq_chip = { .name = "icu_irq", .irq_mask = icu_mask_irq, .irq_mask_ack = icu_mask_ack_irq, -- cgit v1.2.3 From bffda93a51b40afd67c11bf558dc5aae83ca0943 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 12 Feb 2026 11:23:27 -0800 Subject: scsi: lpfc: Properly set WC for DPP mapping Using set_memory_wc() to enable write-combining for the DPP portion of the MMIO mapping is wrong as set_memory_*() is meant to operate on RAM only, not MMIO mappings. In fact, as used currently triggers a BUG_ON() with enabled CONFIG_DEBUG_VIRTUAL. Simply map the DPP region separately and in addition to the already existing mappings, avoiding any possible negative side effects for these. Fixes: 1351e69fc6db ("scsi: lpfc: Add push-to-adapter support to sli4") Signed-off-by: Mathias Krause Signed-off-by: Justin Tee Reviewed-by: Mathias Krause Link: https://patch.msgid.link/20260212192327.141104-1-justintee8345@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 2 ++ drivers/scsi/lpfc/lpfc_sli.c | 36 ++++++++++++++++++++++++++++++------ drivers/scsi/lpfc/lpfc_sli4.h | 3 +++ 3 files changed, 35 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index a116a16c4a6f..b5e53c7d33e7 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -12039,6 +12039,8 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba) iounmap(phba->sli4_hba.conf_regs_memmap_p); if (phba->sli4_hba.dpp_regs_memmap_p) iounmap(phba->sli4_hba.dpp_regs_memmap_p); + if (phba->sli4_hba.dpp_regs_memmap_wc_p) + iounmap(phba->sli4_hba.dpp_regs_memmap_wc_p); break; case LPFC_SLI_INTF_IF_TYPE_1: break; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 734af3d039f8..690763e0aa55 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -15981,6 +15981,32 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) return NULL; } +static __maybe_unused void __iomem * +lpfc_dpp_wc_map(struct lpfc_hba *phba, uint8_t dpp_barset) +{ + + /* DPP region is supposed to cover 64-bit BAR2 */ + if (dpp_barset != WQ_PCI_BAR_4_AND_5) { + lpfc_log_msg(phba, KERN_WARNING, LOG_INIT, + "3273 dpp_barset x%x != WQ_PCI_BAR_4_AND_5\n", + dpp_barset); + return NULL; + } + + if (!phba->sli4_hba.dpp_regs_memmap_wc_p) { + void __iomem *dpp_map; + + dpp_map = ioremap_wc(phba->pci_bar2_map, + pci_resource_len(phba->pcidev, + PCI_64BIT_BAR4)); + + if (dpp_map) + phba->sli4_hba.dpp_regs_memmap_wc_p = dpp_map; + } + + return phba->sli4_hba.dpp_regs_memmap_wc_p; +} + /** * lpfc_modify_hba_eq_delay - Modify Delay Multiplier on EQs * @phba: HBA structure that EQs are on. @@ -16944,9 +16970,6 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, uint8_t dpp_barset; uint32_t dpp_offset; uint8_t wq_create_version; -#ifdef CONFIG_X86 - unsigned long pg_addr; -#endif /* sanity check on queue memory */ if (!wq || !cq) @@ -17132,14 +17155,15 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, #ifdef CONFIG_X86 /* Enable combined writes for DPP aperture */ - pg_addr = (unsigned long)(wq->dpp_regaddr) & PAGE_MASK; - rc = set_memory_wc(pg_addr, 1); - if (rc) { + bar_memmap_p = lpfc_dpp_wc_map(phba, dpp_barset); + if (!bar_memmap_p) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3272 Cannot setup Combined " "Write on WQ[%d] - disable DPP\n", wq->queue_id); phba->cfg_enable_dpp = 0; + } else { + wq->dpp_regaddr = bar_memmap_p + dpp_offset; } #else phba->cfg_enable_dpp = 0; diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index ee58383492b2..b6d90604bb61 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -785,6 +785,9 @@ struct lpfc_sli4_hba { void __iomem *dpp_regs_memmap_p; /* Kernel memory mapped address for * dpp registers */ + void __iomem *dpp_regs_memmap_wc_p;/* Kernel memory mapped address for + * dpp registers with write combining + */ union { struct { /* IF Type 0, BAR 0 PCI cfg space reg mem map */ -- cgit v1.2.3 From 57297736c08233987e5d29ce6584c6ca2a831b12 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 29 Jan 2026 15:30:39 +0100 Subject: scsi: storvsc: Fix scheduling while atomic on PREEMPT_RT This resolves the follow splat and lock-up when running with PREEMPT_RT enabled on Hyper-V: [ 415.140818] BUG: scheduling while atomic: stress-ng-iomix/1048/0x00000002 [ 415.140822] INFO: lockdep is turned off. [ 415.140823] Modules linked in: intel_rapl_msr intel_rapl_common intel_uncore_frequency_common intel_pmc_core pmt_telemetry pmt_discovery pmt_class intel_pmc_ssram_telemetry intel_vsec ghash_clmulni_intel aesni_intel rapl binfmt_misc nls_ascii nls_cp437 vfat fat snd_pcm hyperv_drm snd_timer drm_client_lib drm_shmem_helper snd sg soundcore drm_kms_helper pcspkr hv_balloon hv_utils evdev joydev drm configfs efi_pstore nfnetlink vsock_loopback vmw_vsock_virtio_transport_common hv_sock vmw_vsock_vmci_transport vsock vmw_vmci efivarfs autofs4 ext4 crc16 mbcache jbd2 sr_mod sd_mod cdrom hv_storvsc serio_raw hid_generic scsi_transport_fc hid_hyperv scsi_mod hid hv_netvsc hyperv_keyboard scsi_common [ 415.140846] Preemption disabled at: [ 415.140847] [] storvsc_queuecommand+0x2e1/0xbe0 [hv_storvsc] [ 415.140854] CPU: 8 UID: 0 PID: 1048 Comm: stress-ng-iomix Not tainted 6.19.0-rc7 #30 PREEMPT_{RT,(full)} [ 415.140856] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 09/04/2024 [ 415.140857] Call Trace: [ 415.140861] [ 415.140861] ? storvsc_queuecommand+0x2e1/0xbe0 [hv_storvsc] [ 415.140863] dump_stack_lvl+0x91/0xb0 [ 415.140870] __schedule_bug+0x9c/0xc0 [ 415.140875] __schedule+0xdf6/0x1300 [ 415.140877] ? rtlock_slowlock_locked+0x56c/0x1980 [ 415.140879] ? rcu_is_watching+0x12/0x60 [ 415.140883] schedule_rtlock+0x21/0x40 [ 415.140885] rtlock_slowlock_locked+0x502/0x1980 [ 415.140891] rt_spin_lock+0x89/0x1e0 [ 415.140893] hv_ringbuffer_write+0x87/0x2a0 [ 415.140899] vmbus_sendpacket_mpb_desc+0xb6/0xe0 [ 415.140900] ? rcu_is_watching+0x12/0x60 [ 415.140902] storvsc_queuecommand+0x669/0xbe0 [hv_storvsc] [ 415.140904] ? HARDIRQ_verbose+0x10/0x10 [ 415.140908] ? __rq_qos_issue+0x28/0x40 [ 415.140911] scsi_queue_rq+0x760/0xd80 [scsi_mod] [ 415.140926] __blk_mq_issue_directly+0x4a/0xc0 [ 415.140928] blk_mq_issue_direct+0x87/0x2b0 [ 415.140931] blk_mq_dispatch_queue_requests+0x120/0x440 [ 415.140933] blk_mq_flush_plug_list+0x7a/0x1a0 [ 415.140935] __blk_flush_plug+0xf4/0x150 [ 415.140940] __submit_bio+0x2b2/0x5c0 [ 415.140944] ? submit_bio_noacct_nocheck+0x272/0x360 [ 415.140946] submit_bio_noacct_nocheck+0x272/0x360 [ 415.140951] ext4_read_bh_lock+0x3e/0x60 [ext4] [ 415.140995] ext4_block_write_begin+0x396/0x650 [ext4] [ 415.141018] ? __pfx_ext4_da_get_block_prep+0x10/0x10 [ext4] [ 415.141038] ext4_da_write_begin+0x1c4/0x350 [ext4] [ 415.141060] generic_perform_write+0x14e/0x2c0 [ 415.141065] ext4_buffered_write_iter+0x6b/0x120 [ext4] [ 415.141083] vfs_write+0x2ca/0x570 [ 415.141087] ksys_write+0x76/0xf0 [ 415.141089] do_syscall_64+0x99/0x1490 [ 415.141093] ? rcu_is_watching+0x12/0x60 [ 415.141095] ? finish_task_switch.isra.0+0xdf/0x3d0 [ 415.141097] ? rcu_is_watching+0x12/0x60 [ 415.141098] ? lock_release+0x1f0/0x2a0 [ 415.141100] ? rcu_is_watching+0x12/0x60 [ 415.141101] ? finish_task_switch.isra.0+0xe4/0x3d0 [ 415.141103] ? rcu_is_watching+0x12/0x60 [ 415.141104] ? __schedule+0xb34/0x1300 [ 415.141106] ? hrtimer_try_to_cancel+0x1d/0x170 [ 415.141109] ? do_nanosleep+0x8b/0x160 [ 415.141111] ? hrtimer_nanosleep+0x89/0x100 [ 415.141114] ? __pfx_hrtimer_wakeup+0x10/0x10 [ 415.141116] ? xfd_validate_state+0x26/0x90 [ 415.141118] ? rcu_is_watching+0x12/0x60 [ 415.141120] ? do_syscall_64+0x1e0/0x1490 [ 415.141121] ? do_syscall_64+0x1e0/0x1490 [ 415.141123] ? rcu_is_watching+0x12/0x60 [ 415.141124] ? do_syscall_64+0x1e0/0x1490 [ 415.141125] ? do_syscall_64+0x1e0/0x1490 [ 415.141127] ? irqentry_exit+0x140/0x7e0 [ 415.141129] entry_SYSCALL_64_after_hwframe+0x76/0x7e get_cpu() disables preemption while the spinlock hv_ringbuffer_write is using is converted to an rt-mutex under PREEMPT_RT. Signed-off-by: Jan Kiszka Tested-by: Florian Bezdeka Reviewed-by: Michael Kelley Tested-by: Michael Kelley Link: https://patch.msgid.link/0c7fb5cd-fb21-4760-8593-e04bade84744@siemens.com Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 19e18939d3a5..19ff0004e259 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1855,8 +1855,9 @@ static enum scsi_qc_status storvsc_queuecommand(struct Scsi_Host *host, cmd_request->payload_sz = payload_sz; /* Invokes the vsc to start an IO */ - ret = storvsc_do_io(dev, cmd_request, get_cpu()); - put_cpu(); + migrate_disable(); + ret = storvsc_do_io(dev, cmd_request, smp_processor_id()); + migrate_enable(); if (ret) scsi_dma_unmap(scmnd); -- cgit v1.2.3 From 2e6b5cd6a4b37a95b78cf8c39a979b58c915c8ed Mon Sep 17 00:00:00 2001 From: Alexey Charkov Date: Mon, 9 Feb 2026 19:17:34 +0400 Subject: scsi: ufs: core: Fix RPMB region size detection for UFS 2.2 Older UFS spec devices (2.2 and earlier) do not expose per-region RPMB sizes, as only one RPMB region is supported. In such cases, the size of the single RPMB region can be deduced from the Logical Block Count and Logical Block Size fields in the RPMB Unit Descriptor. Add a fallback mechanism to calculate the RPMB region size from these fields if the device implements an older spec, so that the RPMB driver can work with such devices - otherwise it silently skips the whole RPMB. Section 14.1.4.6 (RPMB Unit Descriptor) Link: https://www.jedec.org/system/files/docs/JESD220C-2_2.pdf Cc: stable@vger.kernel.org Fixes: b06b8c421485 ("scsi: ufs: core: Add OP-TEE based RPMB driver for UFS devices") Reviewed-by: Bean Huo Signed-off-by: Alexey Charkov Link: https://patch.msgid.link/20260209-ufs-rpmb-v3-1-b1804e71bd38@flipper.net Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'drivers') diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 8349fe2090db..12f1bdc70587 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -5248,6 +5249,25 @@ static void ufshcd_lu_init(struct ufs_hba *hba, struct scsi_device *sdev) hba->dev_info.rpmb_region_size[1] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION1_SIZE]; hba->dev_info.rpmb_region_size[2] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION2_SIZE]; hba->dev_info.rpmb_region_size[3] = desc_buf[RPMB_UNIT_DESC_PARAM_REGION3_SIZE]; + + if (hba->dev_info.wspecversion <= 0x0220) { + /* + * These older spec chips have only one RPMB region, + * sized between 128 kB minimum and 16 MB maximum. + * No per region size fields are provided (respective + * REGIONX_SIZE fields always contain zeros), so get + * it from the logical block count and size fields for + * compatibility + * + * (See JESD220C-2_2 Section 14.1.4.6 + * RPMB Unit Descriptor,* offset 13h, 4 bytes) + */ + hba->dev_info.rpmb_region_size[0] = + (get_unaligned_be64(desc_buf + + RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_COUNT) + << desc_buf[RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_SIZE]) + / SZ_128K; + } } -- cgit v1.2.3 From 70ca8caa96ce473647054f5c7b9dab5423902402 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Tue, 10 Feb 2026 20:18:50 +0100 Subject: scsi: ses: Fix devices attaching to different hosts On a multipath SAS system some devices don't end up with correct symlinks from the SCSI device to its enclosure. Some devices even have enclosure links pointing to enclosures attached to different SCSI hosts. ses_match_to_enclosure() calls enclosure_for_each_device() which iterates over all enclosures on the system, not just enclosures attached to the current SCSI host. Replace the iteration with a direct call to ses_enclosure_find_by_addr(). Reviewed-by: David Jeffery Signed-off-by: Tomas Henzl Link: https://patch.msgid.link/20260210191850.36784-1-thenzl@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/ses.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 789b170da652..98a7367d2f20 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -528,9 +528,8 @@ struct efd { }; static int ses_enclosure_find_by_addr(struct enclosure_device *edev, - void *data) + struct efd *efd) { - struct efd *efd = data; int i; struct ses_component *scomp; @@ -683,7 +682,7 @@ static void ses_match_to_enclosure(struct enclosure_device *edev, if (efd.addr) { efd.dev = &sdev->sdev_gendev; - enclosure_for_each_device(ses_enclosure_find_by_addr, &efd); + ses_enclosure_find_by_addr(edev, &efd); } } -- cgit v1.2.3 From 5b313760059c9df7d60aba7832279bcb81b4aec0 Mon Sep 17 00:00:00 2001 From: Won Jung Date: Wed, 11 Feb 2026 15:01:05 +0900 Subject: scsi: ufs: core: Reset urgent_bkops_lvl to allow runtime PM power mode Ensures that UFS Runtime PM can achieve power saving after System PM suspend by resetting hba->urgent_bkops_lvl. Also modify the ufshcd_bkops_exception_event_handler to avoid setting urgent_bkops_lvl when status is 0, which helps maintain optimal power management. On UFS devices supporting UFSHCD_CAP_AUTO_BKOPS_SUSPEND, a BKOPS exception event can lead to a situation where UFS Runtime PM can't enter low-power mode states even after the BKOPS exception has been resolved. BKOPS exception with bkops status 0 occurs, the driver logs: "ufshcd_bkops_exception_event_handler: device raised urgent BKOPS exception for bkops status 0" When a BKOPS exception occurs, ufshcd_bkops_exception_event_handler() reads the BKOPS status and sets hba->urgent_bkops_lvl to BKOPS_STATUS_NO_OP(0). This allows the device to perform Runtime PM without changing the UFS power mode. (__ufshcd_wl_suspend(hba, UFS_RUNTIME_PM)) During system PM suspend, ufshcd_disable_auto_bkops() is called, disabling auto bkops. After UFS System PM Resume, when runtime PM attempts to suspend again, ufshcd_urgent_bkops() is invoked. Since hba->urgent_bkops_lvl remains at BKOPS_STATUS_NO_OP(0), ufshcd_enable_auto_bkops() is triggered. However, in ufshcd_bkops_ctrl(), the driver compares the current BKOPS status with hba->urgent_bkops_lvl, and only enables auto bkops if curr_status >= hba->urgent_bkops_lvl. Since both values are 0, the condition is met As a result, __ufshcd_wl_suspend(hba, UFS_RUNTIME_PM) skips power mode transitions and remains in an active state, preventing power saving even though no urgent BKOPS condition exists. Signed-off-by: Won Jung Reviewed-by: Peter Wang Link: https://patch.msgid.link/1891546521.01770806581968.JavaMail.epsvc@epcpadp2new Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 12f1bdc70587..3bbc2b51c87b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5982,6 +5982,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) hba->auto_bkops_enabled = false; trace_ufshcd_auto_bkops_state(hba, "Disabled"); + hba->urgent_bkops_lvl = BKOPS_STATUS_PERF_IMPACT; hba->is_urgent_bkops_lvl_checked = false; out: return err; @@ -6085,7 +6086,7 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba) * impacted or critical. Handle these device by determining their urgent * bkops status at runtime. */ - if (curr_status < BKOPS_STATUS_PERF_IMPACT) { + if ((curr_status > BKOPS_STATUS_NO_OP) && (curr_status < BKOPS_STATUS_PERF_IMPACT)) { dev_err(hba->dev, "%s: device raised urgent BKOPS exception for bkops status %d\n", __func__, curr_status); /* update the current status as the urgent bkops level */ -- cgit v1.2.3 From fa96392ebebc8fade2b878acb14cce0f71016503 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Thu, 12 Feb 2026 12:30:26 +0530 Subject: scsi: mpi3mr: Add NULL checks when resetting request and reply queues The driver encountered a crash during resource cleanup when the reply and request queues were NULL due to freed memory. This issue occurred when the creation of reply or request queues failed, and the driver freed the memory first, but attempted to mem set the content of the freed memory, leading to a system crash. Add NULL pointer checks for reply and request queues before accessing the reply/request memory during cleanup Signed-off-by: Ranjan Kumar Link: https://patch.msgid.link/20260212070026.30263-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 1cfbdb773353..04d4a2aea7d7 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -4806,21 +4806,25 @@ void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc) } for (i = 0; i < mrioc->num_queues; i++) { - mrioc->op_reply_qinfo[i].qid = 0; - mrioc->op_reply_qinfo[i].ci = 0; - mrioc->op_reply_qinfo[i].num_replies = 0; - mrioc->op_reply_qinfo[i].ephase = 0; - atomic_set(&mrioc->op_reply_qinfo[i].pend_ios, 0); - atomic_set(&mrioc->op_reply_qinfo[i].in_use, 0); - mpi3mr_memset_op_reply_q_buffers(mrioc, i); - - mrioc->req_qinfo[i].ci = 0; - mrioc->req_qinfo[i].pi = 0; - mrioc->req_qinfo[i].num_requests = 0; - mrioc->req_qinfo[i].qid = 0; - mrioc->req_qinfo[i].reply_qid = 0; - spin_lock_init(&mrioc->req_qinfo[i].q_lock); - mpi3mr_memset_op_req_q_buffers(mrioc, i); + if (mrioc->op_reply_qinfo) { + mrioc->op_reply_qinfo[i].qid = 0; + mrioc->op_reply_qinfo[i].ci = 0; + mrioc->op_reply_qinfo[i].num_replies = 0; + mrioc->op_reply_qinfo[i].ephase = 0; + atomic_set(&mrioc->op_reply_qinfo[i].pend_ios, 0); + atomic_set(&mrioc->op_reply_qinfo[i].in_use, 0); + mpi3mr_memset_op_reply_q_buffers(mrioc, i); + } + + if (mrioc->req_qinfo) { + mrioc->req_qinfo[i].ci = 0; + mrioc->req_qinfo[i].pi = 0; + mrioc->req_qinfo[i].num_requests = 0; + mrioc->req_qinfo[i].qid = 0; + mrioc->req_qinfo[i].reply_qid = 0; + spin_lock_init(&mrioc->req_qinfo[i].q_lock); + mpi3mr_memset_op_req_q_buffers(mrioc, i); + } } atomic_set(&mrioc->pend_large_data_sz, 0); -- cgit v1.2.3 From 38353c26db28efd984f51d426eac2396d299cca7 Mon Sep 17 00:00:00 2001 From: Salomon Dushimirimana Date: Fri, 13 Feb 2026 19:28:06 +0000 Subject: scsi: pm8001: Fix use-after-free in pm8001_queue_command() Commit e29c47fe8946 ("scsi: pm8001: Simplify pm8001_task_exec()") refactors pm8001_queue_command(), however it introduces a potential cause of a double free scenario when it changes the function to return -ENODEV in case of phy down/device gone state. In this path, pm8001_queue_command() updates task status and calls task_done to indicate to upper layer that the task has been handled. However, this also frees the underlying SAS task. A -ENODEV is then returned to the caller. When libsas sas_ata_qc_issue() receives this error value, it assumes the task wasn't handled/queued by LLDD and proceeds to clean up and free the task again, resulting in a double free. Since pm8001_queue_command() handles the SAS task in this case, it should return 0 to the caller indicating that the task has been handled. Fixes: e29c47fe8946 ("scsi: pm8001: Simplify pm8001_task_exec()") Signed-off-by: Salomon Dushimirimana Reviewed-by: Damien Le Moal Link: https://patch.msgid.link/20260213192806.439432-1-salomondush@google.com Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_sas.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 6a8d35aea93a..645524f3fe2d 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -525,8 +525,9 @@ int pm8001_queue_command(struct sas_task *task, gfp_t gfp_flags) } else { task->task_done(task); } - rc = -ENODEV; - goto err_out; + spin_unlock_irqrestore(&pm8001_ha->lock, flags); + pm8001_dbg(pm8001_ha, IO, "pm8001_task_exec device gone\n"); + return 0; } ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_dev, task); -- cgit v1.2.3 From af3973e7b4fd91e6884b312526168869fb013d68 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Mon, 16 Feb 2026 15:10:55 +0100 Subject: scsi: snic: Remove unused linkstatus The (struct vnic_dev).linkstatus buffer is freed in svnic_dev_unregister() and referenced in svnic_dev_link_status() but never alloc'd. This means (struct vnic_dev).linkstatus is always null and the dealloc the reference in svnic_dev_link_status() is dead code. Signed-off-by: Thomas Fourier Acked-by: Karan Tilak Kumar Link: https://patch.msgid.link/20260216141056.59429-2-fourier.thomas@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/snic/vnic_dev.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/snic/vnic_dev.c b/drivers/scsi/snic/vnic_dev.c index 760f3f22095c..c4df0b17c86c 100644 --- a/drivers/scsi/snic/vnic_dev.c +++ b/drivers/scsi/snic/vnic_dev.c @@ -42,8 +42,6 @@ struct vnic_dev { struct vnic_devcmd_notify *notify; struct vnic_devcmd_notify notify_copy; dma_addr_t notify_pa; - u32 *linkstatus; - dma_addr_t linkstatus_pa; struct vnic_stats *stats; dma_addr_t stats_pa; struct vnic_devcmd_fw_info *fw_info; @@ -650,8 +648,6 @@ int svnic_dev_init(struct vnic_dev *vdev, int arg) int svnic_dev_link_status(struct vnic_dev *vdev) { - if (vdev->linkstatus) - return *vdev->linkstatus; if (!vnic_dev_notify_ready(vdev)) return 0; @@ -686,11 +682,6 @@ void svnic_dev_unregister(struct vnic_dev *vdev) sizeof(struct vnic_devcmd_notify), vdev->notify, vdev->notify_pa); - if (vdev->linkstatus) - dma_free_coherent(&vdev->pdev->dev, - sizeof(u32), - vdev->linkstatus, - vdev->linkstatus_pa); if (vdev->stats) dma_free_coherent(&vdev->pdev->dev, sizeof(struct vnic_stats), -- cgit v1.2.3 From 7be41fb00e2c2a823f271a8318b453ca11812f1e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Oct 2025 08:30:11 +0300 Subject: accel: ethosu: Fix shift overflow in cmd_to_addr() The "((cmd[0] & 0xff0000) << 16)" shift is zero. This was intended to be (((u64)cmd[0] & 0xff0000) << 16). Move the cast to the correct location. Fixes: 5a5e9c0228e6 ("accel: Add Arm Ethos-U NPU driver") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aQGmY64tWcwOGFP4@stanley.mountain Signed-off-by: Rob Herring (Arm) --- drivers/accel/ethosu/ethosu_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/accel/ethosu/ethosu_gem.c b/drivers/accel/ethosu/ethosu_gem.c index 473b5f5d7514..7b073116314b 100644 --- a/drivers/accel/ethosu/ethosu_gem.c +++ b/drivers/accel/ethosu/ethosu_gem.c @@ -154,7 +154,7 @@ static void cmd_state_init(struct cmd_state *st) static u64 cmd_to_addr(u32 *cmd) { - return ((u64)((cmd[0] & 0xff0000) << 16)) | cmd[1]; + return (((u64)cmd[0] & 0xff0000) << 16) | cmd[1]; } static u64 dma_length(struct ethosu_validated_cmdstream_info *info, -- cgit v1.2.3 From 023cd6d90f8aa2ef7b72d84be84a18e61ecebd64 Mon Sep 17 00:00:00 2001 From: Piotr Mazek Date: Thu, 5 Feb 2026 23:05:02 +0100 Subject: ACPI: PM: Save NVS memory on Lenovo G70-35 [821d6f0359b0614792ab8e2fb93b503e25a65079] prevented machines produced later than 2012 from saving NVS region to accelerate S3. Despite being made after 2012, Lenovo G70-35 still needs NVS memory saving during S3. A quirk is introduced for this platform. Signed-off-by: Piotr Mazek [ rjw: Subject adjustment ] Link: https://patch.msgid.link/GV2PPF3CD5B63CC2442EE3F76F8443EAD90D499A@GV2PPF3CD5B63CC.EURP251.PROD.OUTLOOK.COM Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 66ec81e306d4..132a9df98471 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -386,6 +386,14 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "80E1"), }, }, + { + .callback = init_nvs_save_s3, + .ident = "Lenovo G70-35", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "80Q5"), + }, + }, /* * ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using * the Low Power S0 Idle firmware interface (see -- cgit v1.2.3 From ab140365fb62c0bdab22b2f516aff563b2559e3b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 19 Feb 2026 15:20:12 +0100 Subject: drbd: fix "LOGIC BUG" in drbd_al_begin_io_nonblock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even though we check that we "should" be able to do lc_get_cumulative() while holding the device->al_lock spinlock, it may still fail, if some other code path decided to do lc_try_lock() with bad timing. If that happened, we logged "LOGIC BUG for enr=...", but still did not return an error. The rest of the code now assumed that this request has references for the relevant activity log extents. The implcations are that during an active resync, mutual exclusivity of resync versus application IO is not guaranteed. And a potential crash at this point may not realizs that these extents could have been target of in-flight IO and would need to be resynced just in case. Also, once the request completes, it will give up activity log references it does not even hold, which will trigger a BUG_ON(refcnt == 0) in lc_put(). Fix: Do not crash the kernel for a condition that is harmless during normal operation: also catch "e->refcnt == 0", not only "e == NULL" when being noisy about "al_complete_io() called on inactive extent %u\n". And do not try to be smart and "guess" whether something will work, then be surprised when it does not. Deal with the fact that it may or may not work. If it does not, remember a possible "partially in activity log" state (only possible for requests that cross extent boundaries), and return an error code from drbd_al_begin_io_nonblock(). A latter call for the same request will then resume from where we left off. Cc: stable@vger.kernel.org Signed-off-by: Lars Ellenberg Signed-off-by: Christoph Böhmwalder Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 53 +++++++++++++++++--------------------- drivers/block/drbd/drbd_interval.h | 5 +++- 2 files changed, 27 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 742b2908ff68..b3dbf6c76e98 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -483,38 +483,20 @@ void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i) int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) { - struct lru_cache *al = device->act_log; /* for bios crossing activity log extent boundaries, * we may need to activate two extents in one go */ unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); - unsigned nr_al_extents; - unsigned available_update_slots; unsigned enr; - D_ASSERT(device, first <= last); - - nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */ - available_update_slots = min(al->nr_elements - al->used, - al->max_pending_changes - al->pending_changes); - - /* We want all necessary updates for a given request within the same transaction - * We could first check how many updates are *actually* needed, - * and use that instead of the worst-case nr_al_extents */ - if (available_update_slots < nr_al_extents) { - /* Too many activity log extents are currently "hot". - * - * If we have accumulated pending changes already, - * we made progress. - * - * If we cannot get even a single pending change through, - * stop the fast path until we made some progress, - * or requests to "cold" extents could be starved. */ - if (!al->pending_changes) - __set_bit(__LC_STARVING, &device->act_log->flags); - return -ENOBUFS; + if (i->partially_in_al_next_enr) { + D_ASSERT(device, first < i->partially_in_al_next_enr); + D_ASSERT(device, last >= i->partially_in_al_next_enr); + first = i->partially_in_al_next_enr; } + D_ASSERT(device, first <= last); + /* Is resync active in this area? */ for (enr = first; enr <= last; enr++) { struct lc_element *tmp; @@ -529,14 +511,21 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval * } } - /* Checkout the refcounts. - * Given that we checked for available elements and update slots above, - * this has to be successful. */ + /* Try to checkout the refcounts. */ for (enr = first; enr <= last; enr++) { struct lc_element *al_ext; al_ext = lc_get_cumulative(device->act_log, enr); - if (!al_ext) - drbd_info(device, "LOGIC BUG for enr=%u\n", enr); + + if (!al_ext) { + /* Did not work. We may have exhausted the possible + * changes per transaction. Or raced with someone + * "locking" it against changes. + * Remember where to continue from. + */ + if (enr > first) + i->partially_in_al_next_enr = enr; + return -ENOBUFS; + } } return 0; } @@ -556,7 +545,11 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i) for (enr = first; enr <= last; enr++) { extent = lc_find(device->act_log, enr); - if (!extent) { + /* Yes, this masks a bug elsewhere. However, during normal + * operation this is harmless, so no need to crash the kernel + * by the BUG_ON(refcount == 0) in lc_put(). + */ + if (!extent || extent->refcnt == 0) { drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr); continue; } diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index 366489b72fe9..5d3213b81eed 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -8,12 +8,15 @@ struct drbd_interval { struct rb_node rb; sector_t sector; /* start sector of the interval */ - unsigned int size; /* size in bytes */ sector_t end; /* highest interval end in subtree */ + unsigned int size; /* size in bytes */ unsigned int local:1 /* local or remote request? */; unsigned int waiting:1; /* someone is waiting for completion */ unsigned int completed:1; /* this has been completed already; * ignore for conflict detection */ + + /* to resume a partially successful drbd_al_begin_io_nonblock(); */ + unsigned int partially_in_al_next_enr; }; static inline void drbd_clear_interval(struct drbd_interval *i) -- cgit v1.2.3 From 81b1f046ff8a5ad5da2c970cff354b61dfa1d6b1 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 12 Jan 2026 18:04:12 +0100 Subject: drbd: Replace deprecated strcpy with strscpy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit strcpy() has been deprecated [1] because it performs no bounds checking on the destination buffer, which can lead to buffer overflows. Replace it with the safer strscpy(). No functional changes. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy [1] Signed-off-by: Thorsten Blum Reviewed-by: Christoph Böhmwalder Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 14 +++++++++----- drivers/block/drbd/drbd_receiver.c | 4 ++-- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 1f6ac9202b66..ba00b5f21a49 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -732,9 +733,9 @@ int drbd_send_sync_param(struct drbd_peer_device *peer_device) } if (apv >= 88) - strcpy(p->verify_alg, nc->verify_alg); + strscpy(p->verify_alg, nc->verify_alg); if (apv >= 89) - strcpy(p->csums_alg, nc->csums_alg); + strscpy(p->csums_alg, nc->csums_alg); rcu_read_unlock(); return drbd_send_command(peer_device, sock, cmd, size, NULL, 0); @@ -745,6 +746,7 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm struct drbd_socket *sock; struct p_protocol *p; struct net_conf *nc; + size_t integrity_alg_len; int size, cf; sock = &connection->data; @@ -762,8 +764,10 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm } size = sizeof(*p); - if (connection->agreed_pro_version >= 87) - size += strlen(nc->integrity_alg) + 1; + if (connection->agreed_pro_version >= 87) { + integrity_alg_len = strlen(nc->integrity_alg) + 1; + size += integrity_alg_len; + } p->protocol = cpu_to_be32(nc->wire_protocol); p->after_sb_0p = cpu_to_be32(nc->after_sb_0p); @@ -778,7 +782,7 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm p->conn_flags = cpu_to_be32(cf); if (connection->agreed_pro_version >= 87) - strcpy(p->integrity_alg, nc->integrity_alg); + strscpy(p->integrity_alg, nc->integrity_alg, integrity_alg_len); rcu_read_unlock(); return __conn_send_command(connection, sock, cmd, size, NULL, 0); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3de919b6f0e1..3d0a061b6c40 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3801,14 +3801,14 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i *new_net_conf = *old_net_conf; if (verify_tfm) { - strcpy(new_net_conf->verify_alg, p->verify_alg); + strscpy(new_net_conf->verify_alg, p->verify_alg); new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; crypto_free_shash(peer_device->connection->verify_tfm); peer_device->connection->verify_tfm = verify_tfm; drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { - strcpy(new_net_conf->csums_alg, p->csums_alg); + strscpy(new_net_conf->csums_alg, p->csums_alg); new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; crypto_free_shash(peer_device->connection->csums_tfm); peer_device->connection->csums_tfm = csums_tfm; -- cgit v1.2.3 From 64868f5ecadeb359a49bc4485bfa7c497047f13a Mon Sep 17 00:00:00 2001 From: Ziyi Guo Date: Tue, 17 Feb 2026 17:50:12 +0000 Subject: net: usb: kaweth: remove TX queue manipulation in kaweth_set_rx_mode kaweth_set_rx_mode(), the ndo_set_rx_mode callback, calls netif_stop_queue() and netif_wake_queue(). These are TX queue flow control functions unrelated to RX multicast configuration. The premature netif_wake_queue() can re-enable TX while tx_urb is still in-flight, leading to a double usb_submit_urb() on the same URB: kaweth_start_xmit() { netif_stop_queue(); usb_submit_urb(kaweth->tx_urb); } kaweth_set_rx_mode() { netif_stop_queue(); netif_wake_queue(); // wakes TX queue before URB is done } kaweth_start_xmit() { netif_stop_queue(); usb_submit_urb(kaweth->tx_urb); // URB submitted while active } This triggers the WARN in usb_submit_urb(): "URB submitted while active" This is a similar class of bug fixed in rtl8150 by - commit 958baf5eaee3 ("net: usb: Remove disruptive netif_wake_queue in rtl8150_set_multicast"). Also kaweth_set_rx_mode() is already functionally broken, the real set_rx_mode action is performed by kaweth_async_set_rx_mode(), which in turn is not a no-op only at ndo_open() time. Suggested-by: Paolo Abeni Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ziyi Guo Link: https://patch.msgid.link/20260217175012.1234494-1-n7l8m4@u.northwestern.edu Signed-off-by: Jakub Kicinski --- drivers/net/usb/kaweth.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index c9efb7df892e..e01d14f6c366 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -765,7 +765,6 @@ static void kaweth_set_rx_mode(struct net_device *net) netdev_dbg(net, "Setting Rx mode to %d\n", packet_filter_bitmap); - netif_stop_queue(net); if (net->flags & IFF_PROMISC) { packet_filter_bitmap |= KAWETH_PACKET_FILTER_PROMISCUOUS; @@ -775,7 +774,6 @@ static void kaweth_set_rx_mode(struct net_device *net) } kaweth->packet_filter_bitmap = packet_filter_bitmap; - netif_wake_queue(net); } /**************************************************************** -- cgit v1.2.3 From f1e2f0ce704e4a14e3f367d3b97d3dd2d8e183b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20P=C3=A5lsson?= Date: Wed, 18 Feb 2026 05:28:22 +0000 Subject: net: usb: lan78xx: scan all MDIO addresses on LAN7801 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LAN7801 is designed exclusively for external PHYs (unlike the LAN7800/LAN7850 which have internal PHYs), but lan78xx_mdio_init() restricts PHY scanning to MDIO addresses 0-7 by setting phy_mask to ~(0xFF). This prevents discovery of external PHYs wired to addresses outside that range. One such case is the DP83TC814 100BASE-T1 PHY, which is typically configured at MDIO address 10 via PHYAD bootstrap pins and goes undetected with the current mask. Remove the restrictive phy_mask assignment for the LAN7801 so that the default mask of 0 applies, allowing all 32 MDIO addresses to be scanned during bus registration. Fixes: 02dc1f3d613d ("lan78xx: add LAN7801 MAC only support") Signed-off-by: Martin Pålsson Link: https://patch.msgid.link/0110019c6f388aff-98d99cf0-4425-4fff-b16b-dea5ad8fafe0-000000@eu-north-1.amazonses.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 00397a807393..065588c9cfa6 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2094,8 +2094,6 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev) dev->mdiobus->phy_mask = ~(1 << 1); break; case ID_REV_CHIP_ID_7801_: - /* scan thru PHYAD[2..0] */ - dev->mdiobus->phy_mask = ~(0xFF); break; } -- cgit v1.2.3 From bfd264fbbbca7a39ea430d7bc2baf8ee2ea958e4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 18 Feb 2026 18:05:51 +0200 Subject: net: dsa: sja1105: protect link replay helpers against NULL phylink instance There is a crash when unbinding the sja1105 driver under special circumstances: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000030 Call trace: phylink_run_resolve_and_disable+0x10/0x90 sja1105_static_config_reload+0xc0/0x410 sja1105_vlan_filtering+0x100/0x140 dsa_port_vlan_filtering+0x13c/0x368 dsa_port_reset_vlan_filtering.isra.0+0xe8/0x198 dsa_port_bridge_leave+0x130/0x248 dsa_user_changeupper.part.0+0x74/0x158 dsa_user_netdevice_event+0x50c/0xa50 notifier_call_chain+0x78/0x148 raw_notifier_call_chain+0x20/0x38 call_netdevice_notifiers_info+0x58/0xa8 __netdev_upper_dev_unlink+0xac/0x220 netdev_upper_dev_unlink+0x38/0x70 del_nbp+0x1a4/0x320 br_del_if+0x3c/0xd8 br_device_event+0xf8/0x2d8 notifier_call_chain+0x78/0x148 raw_notifier_call_chain+0x20/0x38 call_netdevice_notifiers_info+0x58/0xa8 unregister_netdevice_many_notify+0x314/0x848 unregister_netdevice_queue+0xe8/0xf8 dsa_user_destroy+0x50/0xa8 dsa_port_teardown+0x80/0x98 dsa_switch_teardown_ports+0x4c/0xb8 dsa_switch_deinit+0x94/0xb8 dsa_switch_put_tree+0x2c/0xc0 dsa_unregister_switch+0x38/0x60 sja1105_remove+0x24/0x40 spi_remove+0x38/0x60 device_remove+0x54/0x90 device_release_driver_internal+0x1d4/0x230 device_driver_detach+0x20/0x38 unbind_store+0xbc/0xc8 ---[ end trace 0000000000000000 ]--- which requires an explanation. When a port offloads a bridge, the switch must be reset to change the VLAN awareness state (the SJA1105_VLAN_FILTERING reason for sja1105_static_config_reload()). When the port leaves a VLAN-aware bridge, it must also be reset for the same reason: it is returning to operation as a VLAN-unaware standalone port. sja1105_static_config_reload() triggers the phylink link replay helpers. Because sja1105 is a switch, it has multiple user ports. During unbind, ports are torn down one by one in dsa_switch_teardown_ports() -> dsa_port_teardown() -> dsa_user_destroy(). The crash happens when the first user port is not part of the VLAN-aware bridge, but any other user port is. Tearing down the first user port causes phylink_destroy() to be called on dp->pl, and this pointer to be set to NULL. Then, when the second user port is torn down, this was offloading a VLAN-aware bridge port, so indirectly it will trigger sja1105_static_config_reload(). The latter function iterates using dsa_switch_for_each_available_port(), and unconditionally dereferences dp->pl, including for the aforementioned torn down previous port, and passes that to phylink. This is where the NULL pointer is coming from. There are multiple levels at which this could be avoided: - add an "if (dp->pl)" in sja1105_static_config_reload() - make the phylink replay helpers NULL-tolerant - mark ports as DSA_PORT_TYPE_UNUSED after dsa_port_phylink_destroy() has run, such that subsequent dsa_switch_for_each_available_port() iterations skip them - disconnect the entire switch at once from switchdev and NETDEV_CHANGEUPPER events while unbinding, not just port by port, likely using a "ds->unbinding = true" mechanism or similar however options 3 and 4 are quite heavy and might have side effects. Although 2 allows to keep the driver simpler, the phylink API it not NULL-tolerant in general and is not responsible for the NULL pointer (this is something done by dsa_port_phylink_destroy()). So I went with 1. Functionally speaking, skipping the replay helpers for ports without a phylink instance is fine, because that only happens during driver removal (an operation which cannot be cancelled). The ports are not required to work (although they probably still will - untested assumption - as long as we don't overwrite the last port speed with SJA1105_SPEED_AUTO). Fixes: 0b2edc531e0b ("net: dsa: sja1105: let phylink help with the replay of link callbacks") Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260218160551.194782-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 71a817c07a90..94d17b06da40 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2278,6 +2278,12 @@ int sja1105_static_config_reload(struct sja1105_private *priv, * change it through the dynamic interface later. */ dsa_switch_for_each_available_port(dp, ds) { + /* May be called during unbind when we unoffload a VLAN-aware + * bridge from port 1 while port 0 was already torn down + */ + if (!dp->pl) + continue; + phylink_replay_link_begin(dp->pl); mac[dp->index].speed = priv->info->port_speed[SJA1105_SPEED_AUTO]; } @@ -2334,7 +2340,8 @@ int sja1105_static_config_reload(struct sja1105_private *priv, } dsa_switch_for_each_available_port(dp, ds) - phylink_replay_link_end(dp->pl); + if (dp->pl) + phylink_replay_link_end(dp->pl); rc = sja1105_reload_cbs(priv); if (rc < 0) -- cgit v1.2.3 From c5f8658f97ec392eeaf355d4e9775ae1f23ca1d3 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Wed, 4 Feb 2026 17:06:29 +0800 Subject: drm/imx: parallel-display: check return value of devm_drm_bridge_add() in imx_pd_probe() Return the value of devm_drm_bridge_add() in order to propagate the error properly, if it fails due to resource allocation failure or bridge registration failure. This ensures that the probe function fails safely rather than proceeding with a potentially incomplete bridge setup. Fixes: bf7e97910b9f ("drm/imx: parallel-display: add the bridge before attaching it") Signed-off-by: Chen Ni Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20260204090629.2209542-1-nichen@iscas.ac.cn Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/imx/ipuv3/parallel-display.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/imx/ipuv3/parallel-display.c b/drivers/gpu/drm/imx/ipuv3/parallel-display.c index 6fbf505d2801..590120a33fa0 100644 --- a/drivers/gpu/drm/imx/ipuv3/parallel-display.c +++ b/drivers/gpu/drm/imx/ipuv3/parallel-display.c @@ -256,7 +256,9 @@ static int imx_pd_probe(struct platform_device *pdev) platform_set_drvdata(pdev, imxpd); - devm_drm_bridge_add(dev, &imxpd->bridge); + ret = devm_drm_bridge_add(dev, &imxpd->bridge); + if (ret) + return ret; return component_add(dev, &imx_pd_ops); } -- cgit v1.2.3 From 496daa2759260374bb9c9b2196a849aa3bc513a8 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Fri, 6 Feb 2026 12:06:21 +0800 Subject: drm/bridge: synopsys: dw-dp: Check return value of devm_drm_bridge_add() in dw_dp_bind() Return the value of devm_drm_bridge_add() in order to propagate the error properly, if it fails due to resource allocation failure or bridge registration failure. This ensures that the bind function fails safely rather than proceeding with a potentially incomplete bridge setup. Fixes: b726970486d8 ("drm/bridge: synopsys: dw-dp: add bridge before attaching") Signed-off-by: Chen Ni Reviewed-by: Andy Yan Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20260206040621.4095517-1-nichen@iscas.ac.cn Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/synopsys/dw-dp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/bridge/synopsys/dw-dp.c b/drivers/gpu/drm/bridge/synopsys/dw-dp.c index 432342452484..07f7a2e0d9f2 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-dp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-dp.c @@ -2049,7 +2049,9 @@ struct dw_dp *dw_dp_bind(struct device *dev, struct drm_encoder *encoder, bridge->type = DRM_MODE_CONNECTOR_DisplayPort; bridge->ycbcr_420_allowed = true; - devm_drm_bridge_add(dev, bridge); + ret = devm_drm_bridge_add(dev, bridge); + if (ret) + return ERR_PTR(ret); dp->aux.dev = dev; dp->aux.drm_dev = encoder->dev; -- cgit v1.2.3 From 803ec1faf7c1823e6e3b1f2aaa81be18528c9436 Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Mon, 9 Feb 2026 19:41:14 +0100 Subject: drm/bridge: samsung-dsim: Fix memory leak in error path In samsung_dsim_host_attach(), drm_bridge_add() is called to add the bridge. However, if samsung_dsim_register_te_irq() or pdata->host_ops->attach() fails afterwards, the function returns without removing the bridge, causing a memory leak. Fix this by adding proper error handling with goto labels to ensure drm_bridge_remove() is called in all error paths. Also ensure that samsung_dsim_unregister_te_irq() is called if the attach operation fails after the TE IRQ has been registered. samsung_dsim_unregister_te_irq() function is moved without changes to be before samsung_dsim_host_attach() to avoid forward declaration. Fixes: e7447128ca4a ("drm: bridge: Generalize Exynos-DSI driver into a Samsung DSIM bridge") Cc: stable@vger.kernel.org Signed-off-by: Osama Abdelkader Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20260209184115.10937-1-osama.abdelkader@gmail.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/samsung-dsim.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index eabc4c32f6ab..ad8c6aa49d48 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -1881,6 +1881,14 @@ static int samsung_dsim_register_te_irq(struct samsung_dsim *dsi, struct device return 0; } +static void samsung_dsim_unregister_te_irq(struct samsung_dsim *dsi) +{ + if (dsi->te_gpio) { + free_irq(gpiod_to_irq(dsi->te_gpio), dsi); + gpiod_put(dsi->te_gpio); + } +} + static int samsung_dsim_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { @@ -1955,13 +1963,13 @@ of_find_panel_or_bridge: if (!(device->mode_flags & MIPI_DSI_MODE_VIDEO)) { ret = samsung_dsim_register_te_irq(dsi, &device->dev); if (ret) - return ret; + goto err_remove_bridge; } if (pdata->host_ops && pdata->host_ops->attach) { ret = pdata->host_ops->attach(dsi, device); if (ret) - return ret; + goto err_unregister_te_irq; } dsi->lanes = device->lanes; @@ -1969,14 +1977,13 @@ of_find_panel_or_bridge: dsi->mode_flags = device->mode_flags; return 0; -} -static void samsung_dsim_unregister_te_irq(struct samsung_dsim *dsi) -{ - if (dsi->te_gpio) { - free_irq(gpiod_to_irq(dsi->te_gpio), dsi); - gpiod_put(dsi->te_gpio); - } +err_unregister_te_irq: + if (!(device->mode_flags & MIPI_DSI_MODE_VIDEO)) + samsung_dsim_unregister_te_irq(dsi); +err_remove_bridge: + drm_bridge_remove(&dsi->bridge); + return ret; } static int samsung_dsim_host_detach(struct mipi_dsi_host *host, -- cgit v1.2.3 From 0d195d3b205ca90db30d70d09d7bb6909aac178f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=B6hmwalder?= Date: Fri, 20 Feb 2026 12:39:37 +0100 Subject: drbd: fix null-pointer dereference on local read error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In drbd_request_endio(), READ_COMPLETED_WITH_ERROR is passed to __req_mod() with a NULL peer_device: __req_mod(req, what, NULL, &m); The READ_COMPLETED_WITH_ERROR handler then unconditionally passes this NULL peer_device to drbd_set_out_of_sync(), which dereferences it, causing a null-pointer dereference. Fix this by obtaining the peer_device via first_peer_device(device), matching how drbd_req_destroy() handles the same situation. Cc: stable@vger.kernel.org Reported-by: Tuo Li Link: https://lore.kernel.org/linux-block/20260104165355.151864-1-islituo@gmail.com Signed-off-by: Christoph Böhmwalder Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d15826f6ee81..70f75ef07945 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -621,7 +621,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case READ_COMPLETED_WITH_ERROR: - drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size); + drbd_set_out_of_sync(first_peer_device(device), + req->i.sector, req->i.size); drbd_report_io_error(device, req); __drbd_chk_io_error(device, DRBD_READ_ERROR); fallthrough; -- cgit v1.2.3 From 2bb995e6155cb4f254574598cbd6fe1dcc99766a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 18 Feb 2026 16:56:00 -0800 Subject: net: phy: qcom: qca807x: normalize return value of gpio_get The GPIO get callback is expected to return 0 or 1 (or a negative error code). Ensure that the value returned by qca807x_gpio_get() is normalized to the [0, 1] range. Fixes: 86ef402d805d ("gpiolib: sanitize the return value of gpio_chip::get()") Signed-off-by: Dmitry Torokhov Reviewed-by: Bartosz Golaszewski Reviewed-by: Linus Walleij Link: https://patch.msgid.link/aZZeyr2ysqqk2GqA@google.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/qcom/qca807x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c index d8f1ce5a7128..6004da5741af 100644 --- a/drivers/net/phy/qcom/qca807x.c +++ b/drivers/net/phy/qcom/qca807x.c @@ -375,7 +375,7 @@ static int qca807x_gpio_get(struct gpio_chip *gc, unsigned int offset) reg = QCA807X_MMD7_LED_FORCE_CTRL(offset); val = phy_read_mmd(priv->phy, MDIO_MMD_AN, reg); - return FIELD_GET(QCA807X_GPIO_FORCE_MODE_MASK, val); + return !!FIELD_GET(QCA807X_GPIO_FORCE_MODE_MASK, val); } static int qca807x_gpio_set(struct gpio_chip *gc, unsigned int offset, int value) -- cgit v1.2.3 From 594163ea88a03bdb412063af50fc7177ef3cbeae Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 19 Feb 2026 12:38:50 +0100 Subject: net: ethernet: xscale: Check for PTP support properly In ixp4xx_get_ts_info() ixp46x_ptp_find() is called unconditionally despite this feature only existing on ixp46x, leading to the following splat from tcpdump: root@OpenWrt:~# tcpdump -vv -X -i eth0 (...) Unable to handle kernel NULL pointer dereference at virtual address 00000238 when read (...) Call trace: ptp_clock_index from ixp46x_ptp_find+0x1c/0x38 ixp46x_ptp_find from ixp4xx_get_ts_info+0x4c/0x64 ixp4xx_get_ts_info from __ethtool_get_ts_info+0x90/0x108 __ethtool_get_ts_info from __dev_ethtool+0xa00/0x2648 __dev_ethtool from dev_ethtool+0x160/0x234 dev_ethtool from dev_ioctl+0x2cc/0x460 dev_ioctl from sock_ioctl+0x1ec/0x524 sock_ioctl from sys_ioctl+0x51c/0xa94 sys_ioctl from ret_fast_syscall+0x0/0x44 (...) Segmentation fault Check for ixp46x in ixp46x_ptp_find() before trying to set up PTP to avoid this. To avoid altering the returned error code from ixp4xx_hwtstamp_set() which before this patch was -EOPNOTSUPP, we return -EOPNOTSUPP from ixp4xx_hwtstamp_set() if ixp46x_ptp_find() fails no matter the error code. The helper function ixp46x_ptp_find() helper returns -ENODEV. Fixes: 9055a2f59162 ("ixp4xx_eth: make ptp support a platform driver") Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20260219-ixp4xx-fix-ethernet-v3-1-f235ccc3cd46@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xscale/ixp4xx_eth.c | 5 +---- drivers/net/ethernet/xscale/ptp_ixp46x.c | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index e1e7f65553e7..b0faa0f1780d 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -403,15 +403,12 @@ static int ixp4xx_hwtstamp_set(struct net_device *netdev, int ret; int ch; - if (!cpu_is_ixp46x()) - return -EOPNOTSUPP; - if (!netif_running(netdev)) return -EINVAL; ret = ixp46x_ptp_find(&port->timesync_regs, &port->phc_index); if (ret) - return ret; + return -EOPNOTSUPP; ch = PORT2CHANNEL(port); regs = port->timesync_regs; diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c index 94203eb46e6b..93c64db22a69 100644 --- a/drivers/net/ethernet/xscale/ptp_ixp46x.c +++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c @@ -232,6 +232,9 @@ static struct ixp_clock ixp_clock; int ixp46x_ptp_find(struct ixp46x_ts_regs *__iomem *regs, int *phc_index) { + if (!cpu_is_ixp46x()) + return -ENODEV; + *regs = ixp_clock.regs; *phc_index = ptp_clock_index(ixp_clock.ptp_clock); -- cgit v1.2.3 From e123d9302d223767bd910bfbcfe607bae909f8ac Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Thu, 19 Feb 2026 10:53:11 -0800 Subject: bnxt_en: Fix RSS context delete logic We need to free the corresponding RSS context VNIC in FW everytime an RSS context is deleted in driver. Commit 667ac333dbb7 added a check to delete the VNIC in FW only when netif_running() is true to help delete RSS contexts with interface down. Having that condition will make the driver leak VNICs in FW whenever close() happens with active RSS contexts. On the subsequent open(), as part of RSS context restoration, we will end up trying to create extra VNICs for which we did not make any reservation. FW can fail this request, thereby making us lose active RSS contexts. Suppose an RSS context is deleted already and we try to process a delete request again, then the HWRM functions will check for validity of the request and they simply return if the resource is already freed. So, even for delete-when-down cases, netif_running() check is not necessary. Remove the netif_running() condition check when deleting an RSS context. Reported-by: Jakub Kicinski Fixes: 667ac333dbb7 ("eth: bnxt: allow deleting RSS contexts when the device is down") Reviewed-by: Andy Gospodarek Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260219185313.2682148-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index fb45e1dd1dd7..7768d9753e2d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10889,12 +10889,10 @@ void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, struct bnxt_ntuple_filter *ntp_fltr; int i; - if (netif_running(bp->dev)) { - bnxt_hwrm_vnic_free_one(bp, &rss_ctx->vnic); - for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) { - if (vnic->fw_rss_cos_lb_ctx[i] != INVALID_HW_RING_ID) - bnxt_hwrm_vnic_ctx_free_one(bp, vnic, i); - } + bnxt_hwrm_vnic_free_one(bp, &rss_ctx->vnic); + for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) { + if (vnic->fw_rss_cos_lb_ctx[i] != INVALID_HW_RING_ID) + bnxt_hwrm_vnic_ctx_free_one(bp, vnic, i); } if (!all) return; -- cgit v1.2.3 From c1bbd9900d65ac65b9fce9f129e3369a04871570 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Thu, 19 Feb 2026 10:53:12 -0800 Subject: bnxt_en: Fix deleting of Ntuple filters Ntuple filters can be deleted when the interface is down. The current code blindly sends the filter delete command to FW. When the interface is down, all the VNICs are deleted in the FW. When the VNIC is freed in the FW, all the associated filters are also freed. We need not send the free command explicitly. Sending such command will generate FW error in the dmesg. In order to fix this, we can safely return from bnxt_hwrm_cfa_ntuple_filter_free() when BNXT_STATE_OPEN is not true which confirms the VNICs have been deleted. Fixes: 8336a974f37d ("bnxt_en: Save user configured filters in a lookup list") Suggested-by: Michael Chan Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260219185313.2682148-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7768d9753e2d..8c73a723d23f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6240,6 +6240,9 @@ int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, int rc; set_bit(BNXT_FLTR_FW_DELETED, &fltr->base.state); + if (!test_bit(BNXT_STATE_OPEN, &bp->state)) + return 0; + rc = hwrm_req_init(bp, req, HWRM_CFA_NTUPLE_FILTER_FREE); if (rc) return rc; -- cgit v1.2.3 From d4f687fbbce45b5e88438e89b5e26c0c15847992 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Wed, 18 Feb 2026 21:08:26 +0100 Subject: ovpn: tcp - fix packet extraction from stream When processing TCP stream data in ovpn_tcp_recv, we receive large cloned skbs from __strp_rcv that may contain multiple coalesced packets. The current implementation has two bugs: 1. Header offset overflow: Using pskb_pull with large offsets on coalesced skbs causes skb->data - skb->head to exceed the u16 storage of skb->network_header. This causes skb_reset_network_header to fail on the inner decapsulated packet, resulting in packet drops. 2. Unaligned protocol headers: Extracting packets from arbitrary positions within the coalesced TCP stream provides no alignment guarantees for the packet data causing performance penalties on architectures without efficient unaligned access. Additionally, openvpn's 2-byte length prefix on TCP packets causes the subsequent 4-byte opcode and packet ID fields to be inherently misaligned. Fix both issues by allocating a new skb for each openvpn packet and using skb_copy_bits to extract only the packet content into the new buffer, skipping the 2-byte length prefix. Also, check the length before invoking the function that performs the allocation to avoid creating an invalid skb. If the packet has to be forwarded to userspace the 2-byte prefix can be pushed to the head safely, without misalignment. As a side effect, this approach also avoids the expensive linearization that pskb_pull triggers on cloned skbs with page fragments. In testing, this resulted in TCP throughput improvements of up to 74%. Fixes: 11851cbd60ea ("ovpn: implement TCP transport") Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/ovpn/tcp.c | 53 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c index ec2bbc28c196..5499c1572f3e 100644 --- a/drivers/net/ovpn/tcp.c +++ b/drivers/net/ovpn/tcp.c @@ -70,37 +70,56 @@ static void ovpn_tcp_to_userspace(struct ovpn_peer *peer, struct sock *sk, peer->tcp.sk_cb.sk_data_ready(sk); } -static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb) +static struct sk_buff *ovpn_tcp_skb_packet(const struct ovpn_peer *peer, + struct sk_buff *orig_skb, + const int pkt_len, const int pkt_off) { - struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp); - struct strp_msg *msg = strp_msg(skb); - size_t pkt_len = msg->full_len - 2; - size_t off = msg->offset + 2; - u8 opcode; + struct sk_buff *ovpn_skb; + int err; - /* ensure skb->data points to the beginning of the openvpn packet */ - if (!pskb_pull(skb, off)) { - net_warn_ratelimited("%s: packet too small for peer %u\n", - netdev_name(peer->ovpn->dev), peer->id); + /* create a new skb with only the content of the current packet */ + ovpn_skb = netdev_alloc_skb(peer->ovpn->dev, pkt_len); + if (unlikely(!ovpn_skb)) goto err; - } - /* strparser does not trim the skb for us, therefore we do it now */ - if (pskb_trim(skb, pkt_len) != 0) { - net_warn_ratelimited("%s: trimming skb failed for peer %u\n", + skb_copy_header(ovpn_skb, orig_skb); + err = skb_copy_bits(orig_skb, pkt_off, skb_put(ovpn_skb, pkt_len), + pkt_len); + if (unlikely(err)) { + net_warn_ratelimited("%s: skb_copy_bits failed for peer %u\n", netdev_name(peer->ovpn->dev), peer->id); + kfree_skb(ovpn_skb); goto err; } - /* we need the first 4 bytes of data to be accessible + consume_skb(orig_skb); + return ovpn_skb; +err: + kfree_skb(orig_skb); + return NULL; +} + +static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb) +{ + struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp); + struct strp_msg *msg = strp_msg(skb); + int pkt_len = msg->full_len - 2; + u8 opcode; + + /* we need at least 4 bytes of data in the packet * to extract the opcode and the key ID later on */ - if (!pskb_may_pull(skb, OVPN_OPCODE_SIZE)) { + if (unlikely(pkt_len < OVPN_OPCODE_SIZE)) { net_warn_ratelimited("%s: packet too small to fetch opcode for peer %u\n", netdev_name(peer->ovpn->dev), peer->id); goto err; } + /* extract the packet into a new skb */ + skb = ovpn_tcp_skb_packet(peer, skb, pkt_len, msg->offset + 2); + if (unlikely(!skb)) + goto err; + /* DATA_V2 packets are handled in kernel, the rest goes to user space */ opcode = ovpn_opcode_from_skb(skb, 0); if (unlikely(opcode != OVPN_DATA_V2)) { @@ -113,7 +132,7 @@ static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb) /* The packet size header must be there when sending the packet * to userspace, therefore we put it back */ - skb_push(skb, 2); + *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(pkt_len); ovpn_tcp_to_userspace(peer, strp->sk, skb); return; } -- cgit v1.2.3 From 2692c614f8f05929d692b3dbfd3faef1f00fbaf0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 10 Feb 2026 14:58:22 +0100 Subject: device property: Allow secondary lookup in fwnode_get_next_child_node() When device_get_child_node_count() got split to the fwnode and device respective APIs, the fwnode didn't inherit the ability to traverse over the secondary fwnode. Hence any user, that switches from device to fwnode API misses this feature. In particular, this was revealed by the commit 1490cbb9dbfd ("device property: Split fwnode_get_child_node_count()") that effectively broke the GPIO enumeration on Intel Galileo boards. Fix this by moving the secondary lookup from device to fwnode API. Note, in general no device_*() API should go into the depth of the fwnode implementation. Fixes: 114dbb4fa7c4 ("drivers property: When no children in primary, try secondary") Cc: stable@vger.kernel.org Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki (Intel) Reviewed-by: Sakari Ailus Link: https://patch.msgid.link/20260210135822.47335-1-andriy.shevchenko@linux.intel.com Signed-off-by: Danilo Krummrich --- drivers/base/property.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/base/property.c b/drivers/base/property.c index 6a63860579dd..8d9a34be57fb 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -797,7 +797,18 @@ struct fwnode_handle * fwnode_get_next_child_node(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { - return fwnode_call_ptr_op(fwnode, get_next_child_node, child); + struct fwnode_handle *next; + + if (IS_ERR_OR_NULL(fwnode)) + return NULL; + + /* Try to find a child in primary fwnode */ + next = fwnode_call_ptr_op(fwnode, get_next_child_node, child); + if (next) + return next; + + /* When no more children in primary, continue with secondary */ + return fwnode_call_ptr_op(fwnode->secondary, get_next_child_node, child); } EXPORT_SYMBOL_GPL(fwnode_get_next_child_node); @@ -841,19 +852,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node); struct fwnode_handle *device_get_next_child_node(const struct device *dev, struct fwnode_handle *child) { - const struct fwnode_handle *fwnode = dev_fwnode(dev); - struct fwnode_handle *next; - - if (IS_ERR_OR_NULL(fwnode)) - return NULL; - - /* Try to find a child in primary fwnode */ - next = fwnode_get_next_child_node(fwnode, child); - if (next) - return next; - - /* When no more children in primary, continue with secondary */ - return fwnode_get_next_child_node(fwnode->secondary, child); + return fwnode_get_next_child_node(dev_fwnode(dev), child); } EXPORT_SYMBOL_GPL(device_get_next_child_node); -- cgit v1.2.3 From 243307a0d1b0d01538e202c00454c28b21d4432e Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 3 Feb 2026 11:21:33 +0100 Subject: wifi: brcmfmac: Fix potential kernel oops when probe fails When probe of the sdio brcmfmac device fails for some reasons (i.e. missing firmware), the sdiodev->bus is set to error instead of NULL, thus the cleanup later in brcmf_sdio_remove() tries to free resources via invalid bus pointer. This happens because sdiodev->bus is set 2 times: first in brcmf_sdio_probe() and second time in brcmf_sdiod_probe(). Fix this by chaning the brcmf_sdio_probe() function to return the error code and set sdio->bus only there. Fixes: 0ff0843310b7 ("wifi: brcmfmac: Add optional lpo clock enable support") Signed-off-by: Marek Szyprowski Acked-by: Arend van Spriel Link: https://patch.msgid.link/20260203102133.1478331-1-m.szyprowski@samsung.com Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 7 +++---- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 7 ++++--- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index 6a3f187320fc..13952dfeb3e3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -951,11 +951,10 @@ int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev) goto out; /* try to attach to the target device */ - sdiodev->bus = brcmf_sdio_probe(sdiodev); - if (IS_ERR(sdiodev->bus)) { - ret = PTR_ERR(sdiodev->bus); + ret = brcmf_sdio_probe(sdiodev); + if (ret) goto out; - } + brcmf_sdiod_host_fixup(sdiodev->func2->card->host); out: if (ret) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 8cf9d7e7c3f7..4e6ed02c1591 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4445,7 +4445,7 @@ brcmf_sdio_prepare_fw_request(struct brcmf_sdio *bus) return fwreq; } -struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) +int brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) { int ret; struct brcmf_sdio *bus; @@ -4551,11 +4551,12 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) goto fail; } - return bus; + return 0; fail: brcmf_sdio_remove(bus); - return ERR_PTR(ret); + sdiodev->bus = NULL; + return ret; } /* Detach and free everything */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h index 0d18ed15b403..80180d5c6c87 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h @@ -358,7 +358,7 @@ void brcmf_sdiod_freezer_uncount(struct brcmf_sdio_dev *sdiodev); int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev); int brcmf_sdiod_remove(struct brcmf_sdio_dev *sdiodev); -struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev); +int brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev); void brcmf_sdio_remove(struct brcmf_sdio *bus); void brcmf_sdio_isr(struct brcmf_sdio *bus, bool in_isr); -- cgit v1.2.3 From 25723454f67980712234a42caca606b6710fd1e1 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 10 Feb 2026 18:03:34 +0800 Subject: wifi: mwifiex: Fix dev_alloc_name() return value check dev_alloc_name() returns the allocated ID on success, which could be over 0. Fix the return value check to check for negative error codes. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aYmsQfujoAe5qO02@stanley.mountain/ Fixes: 7bab5bdb81e3 ("wifi: mwifiex: Allocate dev name earlier for interface workqueue name") Signed-off-by: Chen-Yu Tsai Reviewed-by: Francesco Dolcini Link: https://patch.msgid.link/20260210100337.1131279-1-wenst@chromium.org Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index a66d18e380fc..bc0a946e8e6e 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -3148,7 +3148,7 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, SET_NETDEV_DEV(dev, adapter->dev); ret = dev_alloc_name(dev, name); - if (ret) + if (ret < 0) goto err_alloc_name; priv->dfs_cac_workqueue = alloc_workqueue("MWIFIEX_DFS_CAC-%s", -- cgit v1.2.3 From 03cc8f90d0537fcd4985c3319b4fafbf2e3fb1f0 Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Fri, 6 Feb 2026 14:53:56 -0500 Subject: wifi: libertas: fix use-after-free in lbs_free_adapter() The lbs_free_adapter() function uses timer_delete() (non-synchronous) for both command_timer and tx_lockup_timer before the structure is freed. This is incorrect because timer_delete() does not wait for any running timer callback to complete. If a timer callback is executing when lbs_free_adapter() is called, the callback will access freed memory since lbs_cfg_free() frees the containing structure immediately after lbs_free_adapter() returns. Both timer callbacks (lbs_cmd_timeout_handler and lbs_tx_lockup_handler) access priv->driver_lock, priv->cur_cmd, priv->dev, and other fields, which would all be use-after-free violations. Use timer_delete_sync() instead to ensure any running timer callback has completed before returning. This bug was introduced in commit 8f641d93c38a ("libertas: detect TX lockups and reset hardware") where del_timer() was used instead of del_timer_sync() in the cleanup path. The command_timer has had the same issue since the driver was first written. Fixes: 8f641d93c38a ("libertas: detect TX lockups and reset hardware") Fixes: 954ee164f4f4 ("[PATCH] libertas: reorganize and simplify init sequence") Cc: stable@vger.kernel.org Signed-off-by: Daniel Hodges Link: https://patch.msgid.link/20260206195356.15647-1-git@danielhodges.dev Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/libertas/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c index d44e02c6fe38..dd97f1b61f4d 100644 --- a/drivers/net/wireless/marvell/libertas/main.c +++ b/drivers/net/wireless/marvell/libertas/main.c @@ -799,8 +799,8 @@ static void lbs_free_adapter(struct lbs_private *priv) { lbs_free_cmd_buffer(priv); kfifo_free(&priv->event_fifo); - timer_delete(&priv->command_timer); - timer_delete(&priv->tx_lockup_timer); + timer_delete_sync(&priv->command_timer); + timer_delete_sync(&priv->tx_lockup_timer); } static const struct net_device_ops lbs_netdev_ops = { -- cgit v1.2.3 From 32e0a7ad9c841f46549ccac0f1cca347a40d8685 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Fri, 20 Feb 2026 17:34:51 +0800 Subject: gpio: shared: fix memory leaks On a Snapdragon X1 Elite laptop (Lenovo Yoga Slim 7x), kmemleak reports three sets of: unreferenced object 0xffff00080187f400 (size 1024): comm "swapper/0", pid 1, jiffies 4294667327 hex dump (first 32 bytes): 58 bd 70 01 08 00 ff ff 58 bd 70 01 08 00 ff ff X.p.....X.p..... 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 ................ backtrace (crc 1665d1f8): kmemleak_alloc+0xf4/0x12c __kmalloc_cache_noprof+0x370/0x49c gpio_shared_make_ref+0x70/0x16c gpio_shared_of_traverse+0x4e8/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_init+0x34/0x1c4 do_one_initcall+0x50/0x280 kernel_init_freeable+0x290/0x33c kernel_init+0x28/0x14c ret_from_fork+0x10/0x20 unreferenced object 0xffff00080170c140 (size 8): comm "swapper/0", pid 1, jiffies 4294667327 hex dump (first 8 bytes): 72 65 73 65 74 00 00 00 reset... backtrace (crc fc24536): kmemleak_alloc+0xf4/0x12c __kmalloc_node_track_caller_noprof+0x3c4/0x584 kstrdup+0x4c/0xcc gpio_shared_make_ref+0x8c/0x16c gpio_shared_of_traverse+0x4e8/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_of_traverse+0x200/0x5f4 gpio_shared_init+0x34/0x1c4 do_one_initcall+0x50/0x280 kernel_init_freeable+0x290/0x33c kernel_init+0x28/0x14c ret_from_fork+0x10/0x20 Fix this by decrementing the reference count of each list entry rather than only the first. Fix verified on the same laptop. Fixes: a060b8c511abb gpiolib: implement low-level, shared GPIO support Signed-off-by: Daniel J Blueman Link: https://patch.msgid.link/20260220093452.101655-1-daniel@quora.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-shared.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpio/gpiolib-shared.c b/drivers/gpio/gpiolib-shared.c index d2614ace4de1..17a7128b6bd9 100644 --- a/drivers/gpio/gpiolib-shared.c +++ b/drivers/gpio/gpiolib-shared.c @@ -748,14 +748,14 @@ static bool gpio_shared_entry_is_really_shared(struct gpio_shared_entry *entry) static void gpio_shared_free_exclusive(void) { struct gpio_shared_entry *entry, *epos; + struct gpio_shared_ref *ref, *rpos; list_for_each_entry_safe(entry, epos, &gpio_shared_list, list) { if (gpio_shared_entry_is_really_shared(entry)) continue; - gpio_shared_drop_ref(list_first_entry(&entry->refs, - struct gpio_shared_ref, - list)); + list_for_each_entry_safe(ref, rpos, &entry->refs, list) + gpio_shared_drop_ref(ref); gpio_shared_drop_entry(entry); } } -- cgit v1.2.3 From eb4a7139e97374f42b7242cc754e77f1623fbcd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Thu, 12 Feb 2026 08:27:31 +0200 Subject: drm/i915/alpm: ALPM disable fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PORT_ALPM_CTL is supposed to be written only before link training. Remove writing it from ALPM disable. Also clearing ALPM_CTL_ALPM_AUX_LESS_ENABLE and is not about disabling ALPM but switching to AUX-Wake ALPM. Stop touching this bit on ALPM disable. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7153 Fixes: 1ccbf135862b ("drm/i915/psr: Enable ALPM on source side for eDP Panel replay") Cc: Animesh Manna Cc: Jani Nikula Cc: # v6.10+ Signed-off-by: Jouni Högander Reviewed-by: Michał Grzelak Link: https://patch.msgid.link/20260212062731.397801-1-jouni.hogander@intel.com (cherry picked from commit 008304c9ae75c772d3460040de56e12112cdf5e6) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_alpm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index 7ce8c674bb03..07ffee38974b 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -562,12 +562,7 @@ void intel_alpm_disable(struct intel_dp *intel_dp) mutex_lock(&intel_dp->alpm.lock); intel_de_rmw(display, ALPM_CTL(display, cpu_transcoder), - ALPM_CTL_ALPM_ENABLE | ALPM_CTL_LOBF_ENABLE | - ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0); - - intel_de_rmw(display, - PORT_ALPM_CTL(cpu_transcoder), - PORT_ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0); + ALPM_CTL_ALPM_ENABLE | ALPM_CTL_LOBF_ENABLE, 0); drm_dbg_kms(display->drm, "Disabling ALPM\n"); mutex_unlock(&intel_dp->alpm.lock); -- cgit v1.2.3 From ec2cceadfae72304ca19650f9cac4b2a97b8a2fc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 19 Feb 2026 10:51:33 +0100 Subject: gpiolib: normalize the return value of gc->get() on behalf of buggy drivers Commit 86ef402d805d ("gpiolib: sanitize the return value of gpio_chip::get()") started checking the return value of the .get() callback in struct gpio_chip. Now - almost a year later - it turns out that there are quite a few drivers in tree that can break with this change. Partially revert it: normalize the return value in GPIO core but also emit a warning. Cc: stable@vger.kernel.org Fixes: 86ef402d805d ("gpiolib: sanitize the return value of gpio_chip::get()") Reported-by: Dmitry Torokhov Closes: https://lore.kernel.org/all/aZSkqGTqMp_57qC7@google.com/ Reviewed-by: Linus Walleij Reviewed-by: Dmitry Torokhov Link: https://patch.msgid.link/20260219-gpiolib-set-normalize-v2-1-f84630e45796@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 86a171e96b0e..ada572aaebd6 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3267,8 +3267,12 @@ static int gpiochip_get(struct gpio_chip *gc, unsigned int offset) /* Make sure this is called after checking for gc->get(). */ ret = gc->get(gc, offset); - if (ret > 1) - ret = -EBADE; + if (ret > 1) { + gpiochip_warn(gc, + "invalid return value from gc->get(): %d, consider fixing the driver\n", + ret); + ret = !!ret; + } return ret; } -- cgit v1.2.3 From af12e64ae0661546e8b4f5d30d55c5f53a11efe7 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 20 Jan 2026 22:26:46 +0800 Subject: mmc: mmci: Fix device_node reference leak in of_get_dml_pipe_index() When calling of_parse_phandle_with_args(), the caller is responsible to call of_node_put() to release the reference of device node. In of_get_dml_pipe_index(), it does not release the reference. Fixes: 9cb15142d0e3 ("mmc: mmci: Add qcom dml support to the driver.") Signed-off-by: Felix Gu Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci_qcom_dml.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/mmc/host/mmci_qcom_dml.c b/drivers/mmc/host/mmci_qcom_dml.c index 3da6112fbe39..67371389cc33 100644 --- a/drivers/mmc/host/mmci_qcom_dml.c +++ b/drivers/mmc/host/mmci_qcom_dml.c @@ -109,6 +109,7 @@ static int of_get_dml_pipe_index(struct device_node *np, const char *name) &dma_spec)) return -ENODEV; + of_node_put(dma_spec.np); if (dma_spec.args_count) return dma_spec.args[0]; -- cgit v1.2.3 From 6465a8bbb0f6ad98aeb66dc9ea19c32c193a610b Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Fri, 16 Jan 2026 08:55:30 +0800 Subject: mmc: dw_mmc-rockchip: Fix runtime PM support for internal phase support RK3576 is the first platform to introduce internal phase support, and subsequent platforms are expected to adopt a similar design. In this architecture, runtime suspend powers off the attached power domain, which resets registers, including vendor-specific ones such as SDMMC_TIMING_CON0, SDMMC_TIMING_CON1, and SDMMC_MISC_CON. These registers must be saved and restored, a requirement that falls outside the scope of the dw_mmc core. Fixes: 59903441f5e4 ("mmc: dw_mmc-rockchip: Add internal phase support") Signed-off-by: Shawn Lin Tested-by: Marco Schirrmeister Reviewed-by: Heiko Stuebner Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc-rockchip.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c index 4e3423a19bdf..ac069d0c42b2 100644 --- a/drivers/mmc/host/dw_mmc-rockchip.c +++ b/drivers/mmc/host/dw_mmc-rockchip.c @@ -36,6 +36,8 @@ struct dw_mci_rockchip_priv_data { int default_sample_phase; int num_phases; bool internal_phase; + int sample_phase; + int drv_phase; }; /* @@ -573,9 +575,43 @@ static void dw_mci_rockchip_remove(struct platform_device *pdev) dw_mci_pltfm_remove(pdev); } +static int dw_mci_rockchip_runtime_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_mci *host = platform_get_drvdata(pdev); + struct dw_mci_rockchip_priv_data *priv = host->priv; + + if (priv->internal_phase) { + priv->sample_phase = rockchip_mmc_get_phase(host, true); + priv->drv_phase = rockchip_mmc_get_phase(host, false); + } + + return dw_mci_runtime_suspend(dev); +} + +static int dw_mci_rockchip_runtime_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_mci *host = platform_get_drvdata(pdev); + struct dw_mci_rockchip_priv_data *priv = host->priv; + int ret; + + ret = dw_mci_runtime_resume(dev); + if (ret) + return ret; + + if (priv->internal_phase) { + rockchip_mmc_set_phase(host, true, priv->sample_phase); + rockchip_mmc_set_phase(host, false, priv->drv_phase); + mci_writel(host, MISC_CON, MEM_CLK_AUTOGATE_ENABLE); + } + + return ret; +} + static const struct dev_pm_ops dw_mci_rockchip_dev_pm_ops = { SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) - RUNTIME_PM_OPS(dw_mci_runtime_suspend, dw_mci_runtime_resume, NULL) + RUNTIME_PM_OPS(dw_mci_rockchip_runtime_suspend, dw_mci_rockchip_runtime_resume, NULL) }; static struct platform_driver dw_mci_rockchip_pltfm_driver = { -- cgit v1.2.3 From 79ad471530e0baef0dce991816013df55e401d9c Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Mon, 16 Feb 2026 14:15:43 -0500 Subject: mmc: sdhci-brcmstb: use correct register offset for V1 pin_sel restore The restore path for SDIO_CFG_CORE_V1 was incorrectly using SDIO_CFG_SD_PIN_SEL (offset 0x44) instead of SDIO_CFG_V1_SD_PIN_SEL (offset 0x54), causing the wrong register to be written on resume. The save path already uses the correct V1-specific offset. This affects BCM7445 and BCM72116 platforms which use the V1 config core. Fixes: b7e614802e3f ("mmc: sdhci-brcmstb: save and restore registers during PM") Signed-off-by: Kamal Dasu Cc: stable@vger.kernel.org Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-brcmstb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index c9442499876c..57e45951644e 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c @@ -116,7 +116,7 @@ static void sdhci_brcmstb_restore_regs(struct mmc_host *mmc, enum cfg_core_ver v writel(sr->boot_main_ctl, priv->boot_regs + SDIO_BOOT_MAIN_CTL); if (ver == SDIO_CFG_CORE_V1) { - writel(sr->sd_pin_sel, cr + SDIO_CFG_SD_PIN_SEL); + writel(sr->sd_pin_sel, cr + SDIO_CFG_V1_SD_PIN_SEL); return; } -- cgit v1.2.3 From 7a73801fdaf8aee90d23ba77976082a48d156a21 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 22 Dec 2025 21:29:41 +0100 Subject: pmdomain: imx: gpcv2: Discard pm_runtime_put() return value Passing pm_runtime_put() return value to the callers is not particularly useful. Returning an error code from pm_runtime_put() merely means that it has not queued up a work item to check whether or not the device can be suspended and there are many perfectly valid situations in which that can happen, like after writing "on" to the devices' runtime PM "control" attribute in sysfs for one example. Accordingly, update imx_pgc_domain_suspend() to simply discard the return value of pm_runtime_put() and always return success to the caller. This will facilitate a planned change of the pm_runtime_put() return type to void in the future. Signed-off-by: Rafael J. Wysocki Acked-by: Peng Fan Acked-by: Ulf Hansson Link: https://patch.msgid.link/15658107.tv2OnDr8pf@rafael.j.wysocki --- drivers/pmdomain/imx/gpcv2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/pmdomain/imx/gpcv2.c b/drivers/pmdomain/imx/gpcv2.c index cff738e4d546..a829f8da5be7 100644 --- a/drivers/pmdomain/imx/gpcv2.c +++ b/drivers/pmdomain/imx/gpcv2.c @@ -1416,7 +1416,9 @@ static int imx_pgc_domain_suspend(struct device *dev) static int imx_pgc_domain_resume(struct device *dev) { - return pm_runtime_put(dev); + pm_runtime_put(dev); + + return 0; } #endif -- cgit v1.2.3 From 4b73231b2a61c4142a027613d277a19c484dfcc3 Mon Sep 17 00:00:00 2001 From: Yufan Chen Date: Sun, 22 Feb 2026 18:40:35 +0800 Subject: regulator: tps65185: check devm_kzalloc() result in probe tps65185_probe() dereferences the allocation result immediately by using data->regmap. If devm_kzalloc() returns NULL under memory pressure, this leads to a NULL pointer dereference. Add the missing allocation check and return -ENOMEM on failure. Signed-off-by: Yufan Chen Link: https://patch.msgid.link/20260222104035.90790-1-ericterminal@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/tps65185.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/regulator/tps65185.c b/drivers/regulator/tps65185.c index 3286c9ab33d0..786622d8d598 100644 --- a/drivers/regulator/tps65185.c +++ b/drivers/regulator/tps65185.c @@ -332,6 +332,9 @@ static int tps65185_probe(struct i2c_client *client) int i; data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->regmap = devm_regmap_init_i2c(client, ®map_config); if (IS_ERR(data->regmap)) return dev_err_probe(&client->dev, PTR_ERR(data->regmap), -- cgit v1.2.3 From f895e5df80316a308c2f7d64d13a78494630ea05 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 12 Feb 2026 21:52:48 -0600 Subject: ipmi:si: Don't block module unload if the BMC is messed up If the BMC is in a bad state, don't bother waiting for queues messages since there can't be any. Otherwise the unload is blocked until the BMC is back in a good state. Reported-by: Rafael J. Wysocki Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Corey Minyard Reviewed-by: Rafael J. Wysocki (Intel) --- drivers/char/ipmi/ipmi_si_intf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 0049e3792ba1..3667033fcc51 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2234,7 +2234,8 @@ static void wait_msg_processed(struct smi_info *smi_info) unsigned long jiffies_now; long time_diff; - while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) { + while (smi_info->si_state != SI_HOSED && + (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL))) { jiffies_now = jiffies; time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies) * SI_USEC_PER_JIFFY); -- cgit v1.2.3 From 62cd145453d577113f993efd025f258dd86aa183 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 12 Feb 2026 21:56:54 -0600 Subject: ipmi:msghandler: Handle error returns from the SMI sender It used to be, until recently, that the sender operation on the low level interfaces would not fail. That's not the case any more with recent changes. So check the return value from the sender operation, and propagate it back up from there and handle the errors in all places. Reported-by: Rafael J. Wysocki Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Corey Minyard Reviewed-by: Rafael J. Wysocki (Intel) --- drivers/char/ipmi/ipmi_msghandler.c | 100 ++++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 32 deletions(-) (limited to 'drivers') diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index a042b1596933..f8c3c1e44520 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -1887,19 +1887,32 @@ static struct ipmi_smi_msg *smi_add_send_msg(struct ipmi_smi *intf, return smi_msg; } -static void smi_send(struct ipmi_smi *intf, +static int smi_send(struct ipmi_smi *intf, const struct ipmi_smi_handlers *handlers, struct ipmi_smi_msg *smi_msg, int priority) { int run_to_completion = READ_ONCE(intf->run_to_completion); unsigned long flags = 0; + int rv = 0; ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); smi_msg = smi_add_send_msg(intf, smi_msg, priority); ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); - if (smi_msg) - handlers->sender(intf->send_info, smi_msg); + if (smi_msg) { + rv = handlers->sender(intf->send_info, smi_msg); + if (rv) { + ipmi_lock_xmit_msgs(intf, run_to_completion, &flags); + intf->curr_msg = NULL; + ipmi_unlock_xmit_msgs(intf, run_to_completion, &flags); + /* + * Something may have been added to the transmit + * queue, so schedule a check for that. + */ + queue_work(system_wq, &intf->smi_work); + } + } + return rv; } static bool is_maintenance_mode_cmd(struct kernel_ipmi_msg *msg) @@ -2312,6 +2325,7 @@ static int i_ipmi_request(struct ipmi_user *user, struct ipmi_recv_msg *recv_msg; int run_to_completion = READ_ONCE(intf->run_to_completion); int rv = 0; + bool in_seq_table = false; if (supplied_recv) { recv_msg = supplied_recv; @@ -2365,33 +2379,50 @@ static int i_ipmi_request(struct ipmi_user *user, rv = i_ipmi_req_ipmb(intf, addr, msgid, msg, smi_msg, recv_msg, source_address, source_lun, retries, retry_time_ms); + in_seq_table = true; } else if (is_ipmb_direct_addr(addr)) { rv = i_ipmi_req_ipmb_direct(intf, addr, msgid, msg, smi_msg, recv_msg, source_lun); } else if (is_lan_addr(addr)) { rv = i_ipmi_req_lan(intf, addr, msgid, msg, smi_msg, recv_msg, source_lun, retries, retry_time_ms); + in_seq_table = true; } else { - /* Unknown address type. */ + /* Unknown address type. */ ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; } - if (rv) { -out_err: - if (!supplied_smi) - ipmi_free_smi_msg(smi_msg); - if (!supplied_recv) - ipmi_free_recv_msg(recv_msg); - } else { + if (!rv) { dev_dbg(intf->si_dev, "Send: %*ph\n", smi_msg->data_size, smi_msg->data); - smi_send(intf, intf->handlers, smi_msg, priority); + rv = smi_send(intf, intf->handlers, smi_msg, priority); + if (rv != IPMI_CC_NO_ERROR) + /* smi_send() returns an IPMI err, return a Linux one. */ + rv = -EIO; + if (rv && in_seq_table) { + /* + * If it's in the sequence table, it will be + * retried later, so ignore errors. + */ + rv = 0; + /* But we need to fix the timeout. */ + intf_start_seq_timer(intf, smi_msg->msgid); + ipmi_free_smi_msg(smi_msg); + smi_msg = NULL; + } } +out_err: if (!run_to_completion) mutex_unlock(&intf->users_mutex); + if (rv) { + if (!supplied_smi) + ipmi_free_smi_msg(smi_msg); + if (!supplied_recv) + ipmi_free_recv_msg(recv_msg); + } return rv; } @@ -3965,12 +3996,12 @@ static int handle_ipmb_get_msg_cmd(struct ipmi_smi *intf, dev_dbg(intf->si_dev, "Invalid command: %*ph\n", msg->data_size, msg->data); - smi_send(intf, intf->handlers, msg, 0); - /* - * We used the message, so return the value that - * causes it to not be freed or queued. - */ - rv = -1; + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR) + /* + * We used the message, so return the value that + * causes it to not be freed or queued. + */ + rv = -1; } else if (!IS_ERR(recv_msg)) { /* Extract the source address from the data. */ ipmb_addr = (struct ipmi_ipmb_addr *) &recv_msg->addr; @@ -4044,12 +4075,12 @@ static int handle_ipmb_direct_rcv_cmd(struct ipmi_smi *intf, msg->data[4] = IPMI_INVALID_CMD_COMPLETION_CODE; msg->data_size = 5; - smi_send(intf, intf->handlers, msg, 0); - /* - * We used the message, so return the value that - * causes it to not be freed or queued. - */ - rv = -1; + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR) + /* + * We used the message, so return the value that + * causes it to not be freed or queued. + */ + rv = -1; } else if (!IS_ERR(recv_msg)) { /* Extract the source address from the data. */ daddr = (struct ipmi_ipmb_direct_addr *)&recv_msg->addr; @@ -4189,7 +4220,7 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf, struct ipmi_smi_msg *msg) { struct cmd_rcvr *rcvr; - int rv = 0; + int rv = 0; /* Free by default */ unsigned char netfn; unsigned char cmd; unsigned char chan; @@ -4242,12 +4273,12 @@ static int handle_lan_get_msg_cmd(struct ipmi_smi *intf, dev_dbg(intf->si_dev, "Invalid command: %*ph\n", msg->data_size, msg->data); - smi_send(intf, intf->handlers, msg, 0); - /* - * We used the message, so return the value that - * causes it to not be freed or queued. - */ - rv = -1; + if (smi_send(intf, intf->handlers, msg, 0) == IPMI_CC_NO_ERROR) + /* + * We used the message, so return the value that + * causes it to not be freed or queued. + */ + rv = -1; } else if (!IS_ERR(recv_msg)) { /* Extract the source address from the data. */ lan_addr = (struct ipmi_lan_addr *) &recv_msg->addr; @@ -5056,7 +5087,12 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent, ipmi_inc_stat(intf, retransmitted_ipmb_commands); - smi_send(intf, intf->handlers, smi_msg, 0); + /* If this fails we'll retry later or timeout. */ + if (smi_send(intf, intf->handlers, smi_msg, 0) != IPMI_CC_NO_ERROR) { + /* But fix the timeout. */ + intf_start_seq_timer(intf, smi_msg->msgid); + ipmi_free_smi_msg(smi_msg); + } } else ipmi_free_smi_msg(smi_msg); -- cgit v1.2.3 From cae66f1a1dcd23e17da5a015ef9d731129f9d2dd Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 13 Feb 2026 00:15:04 -0600 Subject: ipmi:si: Fix check for a misbehaving BMC There is a race on checking the state in the sender, it needs to be checked under a lock. But you also need a check to avoid issues with a misbehaving BMC for run to completion mode. So leave the check at the beginning for run to completion, and add a check under the lock to avoid the race. Reported-by: Rafael J. Wysocki Fixes: bc3a9d217755 ("ipmi:si: Gracefully handle if the BMC is non-functional") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Corey Minyard Reviewed-by: Rafael J. Wysocki (Intel) --- drivers/char/ipmi/ipmi_si_intf.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 3667033fcc51..6eda61664aaa 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -924,9 +924,14 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg) { struct smi_info *smi_info = send_info; unsigned long flags; + int rv = IPMI_CC_NO_ERROR; debug_timestamp(smi_info, "Enqueue"); + /* + * Check here for run to completion mode. A check under lock is + * later. + */ if (smi_info->si_state == SI_HOSED) return IPMI_BUS_ERR; @@ -940,18 +945,15 @@ static int sender(void *send_info, struct ipmi_smi_msg *msg) } spin_lock_irqsave(&smi_info->si_lock, flags); - /* - * The following two lines don't need to be under the lock for - * the lock's sake, but they do need SMP memory barriers to - * avoid getting things out of order. We are already claiming - * the lock, anyway, so just do it under the lock to avoid the - * ordering problem. - */ - BUG_ON(smi_info->waiting_msg); - smi_info->waiting_msg = msg; - check_start_timer_thread(smi_info); + if (smi_info->si_state == SI_HOSED) { + rv = IPMI_BUS_ERR; + } else { + BUG_ON(smi_info->waiting_msg); + smi_info->waiting_msg = msg; + check_start_timer_thread(smi_info); + } spin_unlock_irqrestore(&smi_info->si_lock, flags); - return IPMI_CC_NO_ERROR; + return rv; } static void set_run_to_completion(void *send_info, bool i_run_to_completion) -- cgit v1.2.3 From 318c58852e686c009825ae8c071080b9ccdd2af0 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Wed, 11 Feb 2026 14:22:27 -0500 Subject: cxl/memdev: fix deadlock in cxl_memdev_autoremove() on attach failure cxl_memdev_autoremove() takes device_lock(&cxlmd->dev) via guard(device) and then calls cxl_memdev_unregister() when the attach callback was provided but cxl_mem_probe() failed to bind. cxl_memdev_unregister() calls cdev_device_del() device_del() bus_remove_device() device_release_driver() This path is reached when a driver uses the @attach parameter to devm_cxl_add_memdev() and the CXL topology fails to enumerate (e.g. DVSEC range registers decode outside platform-defined CXL ranges, causing the endpoint port probe to fail). Add cxl_memdev_attach_failed() to set the scope of the check correctly. Reported-by: kreview-c94b85d6d2 Fixes: 29317f8dc6ed ("cxl/mem: Introduce cxl_memdev_attach for CXL-dependent operation") Signed-off-by: Gregory Price Reviewed-by: Dan Williams Reviewed-by: Davidlohr Bueso Link: https://patch.msgid.link/20260211192228.2148713-1-gourry@gourry.net Signed-off-by: Dave Jiang --- drivers/cxl/core/memdev.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index f547d8ac34c7..273c22118d3d 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -1089,10 +1089,8 @@ static int cxlmd_add(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds) DEFINE_FREE(put_cxlmd, struct cxl_memdev *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) -static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd) +static bool cxl_memdev_attach_failed(struct cxl_memdev *cxlmd) { - int rc; - /* * If @attach is provided fail if the driver is not attached upon * return. Note that failure here could be the result of a race to @@ -1100,7 +1098,14 @@ static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd) * succeeded and then cxl_mem unbound before the lock is acquired. */ guard(device)(&cxlmd->dev); - if (cxlmd->attach && !cxlmd->dev.driver) { + return (cxlmd->attach && !cxlmd->dev.driver); +} + +static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd) +{ + int rc; + + if (cxl_memdev_attach_failed(cxlmd)) { cxl_memdev_unregister(cxlmd); return ERR_PTR(-ENXIO); } -- cgit v1.2.3 From 822655e6751dde2df7ddaa828c5aba217726c5a2 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 23 Feb 2026 09:29:00 -0700 Subject: cxl/port: Introduce port_to_host() helper In CXL subsystem, a port has its own host device for the port creation and removal. The host of CXL root and all the first level ports is the platform firmware device, the host of other ports is their parent port's device. Create this new helper to much easier to get the host of a cxl port. Introduce port_to_host() and use it to replace all places where using open coded to get the host of a port. Remove endpoint_host() as its functionality can be replaced by port_to_host(). [dj: Squashed commit 1 and 3 in the series to commit 1. ] Signed-off-by: Li Ming Tested-by: Alison Schofield Reviewed-by: Dan Williams Link: https://patch.msgid.link/20260210-fix-port-enumeration-failure-v3-1-06acce0b9ead@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/core.h | 18 ++++++++++++++++++ drivers/cxl/core/port.c | 29 +++-------------------------- 2 files changed, 21 insertions(+), 26 deletions(-) (limited to 'drivers') diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 007b8aff0238..5b0570df0fd9 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -152,6 +152,24 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +static inline struct device *port_to_host(struct cxl_port *port) +{ + struct cxl_port *parent = is_cxl_root(port) ? NULL : + to_cxl_port(port->dev.parent); + + /* + * The host of CXL root port and the first level of ports is + * the platform firmware device, the host of all other ports + * is their parent port. + */ + if (!parent) + return port->uport_dev; + else if (is_cxl_root(parent)) + return parent->uport_dev; + else + return &parent->dev; +} + static inline struct device *dport_to_host(struct cxl_dport *dport) { struct cxl_port *port = dport->port; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b69c2529744c..6be150e645b6 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -615,22 +615,8 @@ struct cxl_port *parent_port_of(struct cxl_port *port) static void unregister_port(void *_port) { struct cxl_port *port = _port; - struct cxl_port *parent = parent_port_of(port); - struct device *lock_dev; - /* - * CXL root port's and the first level of ports are unregistered - * under the platform firmware device lock, all other ports are - * unregistered while holding their parent port lock. - */ - if (!parent) - lock_dev = port->uport_dev; - else if (is_cxl_root(parent)) - lock_dev = parent->uport_dev; - else - lock_dev = &parent->dev; - - device_lock_assert(lock_dev); + device_lock_assert(port_to_host(port)); port->dead = true; device_unregister(&port->dev); } @@ -1427,20 +1413,11 @@ static struct device *grandparent(struct device *dev) return NULL; } -static struct device *endpoint_host(struct cxl_port *endpoint) -{ - struct cxl_port *port = to_cxl_port(endpoint->dev.parent); - - if (is_cxl_root(port)) - return port->uport_dev; - return &port->dev; -} - static void delete_endpoint(void *data) { struct cxl_memdev *cxlmd = data; struct cxl_port *endpoint = cxlmd->endpoint; - struct device *host = endpoint_host(endpoint); + struct device *host = port_to_host(endpoint); scoped_guard(device, host) { if (host->driver && !endpoint->dead) { @@ -1456,7 +1433,7 @@ static void delete_endpoint(void *data) int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) { - struct device *host = endpoint_host(endpoint); + struct device *host = port_to_host(endpoint); struct device *dev = &cxlmd->dev; get_device(host); -- cgit v1.2.3 From 0066688dbcdcf51680f499936faffe6d0e94194e Mon Sep 17 00:00:00 2001 From: Li Ming Date: Tue, 10 Feb 2026 19:46:57 +0800 Subject: cxl/port: Hold port host lock during dport adding. CXL testing environment can trigger following trace Oops: general protection fault, probably for non-canonical address 0xdffffc0000000092: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000490-0x0000000000000497] RIP: 0010:cxl_dpa_to_region+0x105/0x1f0 [cxl_core] Call Trace: cxl_event_trace_record+0xd1/0xa70 [cxl_core] __cxl_event_trace_record+0x12f/0x1e0 [cxl_core] cxl_mem_get_records_log+0x261/0x500 [cxl_core] cxl_mem_get_event_records+0x7c/0xc0 [cxl_core] cxl_mock_mem_probe+0xd38/0x1c60 [cxl_mock_mem] platform_probe+0x9d/0x130 really_probe+0x1c8/0x960 __driver_probe_device+0x187/0x3e0 driver_probe_device+0x45/0x120 __device_attach_driver+0x15d/0x280 When CXL subsystem adds a cxl port to a hierarchy, there is a small window where the new port becomes visible before it is bound to a driver. This happens because device_add() adds a device to bus device list before bus_probe_device() binds it to a driver. So if two cxl memdevs are trying to add a dport to a same port via devm_cxl_enumerate_ports(), the second cxl memdev may observe the port and attempt to add a dport, but fails because the port has not yet been attached to cxl port driver. That causes the memdev->endpoint can not be updated. The sequence is like: CPU 0 CPU 1 devm_cxl_enumerate_ports() # port not found, add it add_port_attach_ep() # hold the parent port lock # to add the new port devm_cxl_create_port() device_add() # Add dev to bus devs list bus_add_device() devm_cxl_enumerate_ports() # found the port find_cxl_port_by_uport() # hold port lock to add a dport device_lock(the port) find_or_add_dport() cxl_port_add_dport() return -ENXIO because port->dev.driver is NULL device_unlock(the port) bus_probe_device() # hold the port lock # for attaching device_lock(the port) attaching the new port device_unlock(the port) To fix this race, require that dport addition holds the host lock of the target port(the host of CXL root and all cxl host bridge ports is the platform firmware device, the host of all other ports is their parent port). The CXL subsystem already requires holding the host lock while attaching a new port. Therefore, successfully acquiring the host lock guarantees that port attaching has completed. Fixes: 4f06d81e7c6a ("cxl: Defer dport allocation for switch ports") Signed-off-by: Li Ming Reviewed-by: Dan Williams Tested-by: Alison Schofield Link: https://patch.msgid.link/20260210-fix-port-enumeration-failure-v3-2-06acce0b9ead@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/port.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 6be150e645b6..0c5957d1d329 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1767,7 +1767,16 @@ static struct cxl_dport *find_or_add_dport(struct cxl_port *port, { struct cxl_dport *dport; - device_lock_assert(&port->dev); + /* + * The port is already visible in CXL hierarchy, but it may still + * be in the process of binding to the CXL port driver at this point. + * + * port creation and driver binding are protected by the port's host + * lock, so acquire the host lock here to ensure the port has completed + * driver binding before proceeding with dport addition. + */ + guard(device)(port_to_host(port)); + guard(device)(&port->dev); dport = cxl_find_dport_by_dev(port, dport_dev); if (!dport) { dport = probe_dport(port, dport_dev); @@ -1834,13 +1843,11 @@ retry: * RP port enumerated by cxl_acpi without dport will * have the dport added here. */ - scoped_guard(device, &port->dev) { - dport = find_or_add_dport(port, dport_dev); - if (IS_ERR(dport)) { - if (PTR_ERR(dport) == -EAGAIN) - goto retry; - return PTR_ERR(dport); - } + dport = find_or_add_dport(port, dport_dev); + if (IS_ERR(dport)) { + if (PTR_ERR(dport) == -EAGAIN) + goto retry; + return PTR_ERR(dport); } rc = cxl_add_ep(dport, &cxlmd->dev); -- cgit v1.2.3 From 08fe1b5166fdc81b010d7bf39cd6440620e7931e Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 5 Feb 2026 22:02:37 -0800 Subject: accel/amdxdna: Remove buffer size check when creating command BO Large command buffers may be used, and they do not always need to be mapped or accessed by the driver. Performing a size check at command BO creation time unnecessarily rejects valid use cases. Remove the buffer size check from command BO creation, and defer vmap and size validation to the paths where the driver actually needs to map and access the command buffer. Fixes: ac49797c1815 ("accel/amdxdna: Add GEM buffer object management") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260206060237.4050492-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/amdxdna_gem.c | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index 8c290ddd3251..d60db49ead71 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -21,8 +21,6 @@ #include "amdxdna_pci_drv.h" #include "amdxdna_ubuf.h" -#define XDNA_MAX_CMD_BO_SIZE SZ_32K - MODULE_IMPORT_NS("DMA_BUF"); static int @@ -745,12 +743,6 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, { struct amdxdna_dev *xdna = to_xdna_dev(dev); struct amdxdna_gem_obj *abo; - int ret; - - if (args->size > XDNA_MAX_CMD_BO_SIZE) { - XDNA_ERR(xdna, "Command bo size 0x%llx too large", args->size); - return ERR_PTR(-EINVAL); - } if (args->size < sizeof(struct amdxdna_cmd)) { XDNA_DBG(xdna, "Command BO size 0x%llx too small", args->size); @@ -764,17 +756,7 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, abo->type = AMDXDNA_BO_CMD; abo->client = filp->driver_priv; - ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva); - if (ret) { - XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); - goto release_obj; - } - return abo; - -release_obj: - drm_gem_object_put(to_gobj(abo)); - return ERR_PTR(ret); } int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -871,6 +853,7 @@ struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client, struct amdxdna_dev *xdna = client->xdna; struct amdxdna_gem_obj *abo; struct drm_gem_object *gobj; + int ret; gobj = drm_gem_object_lookup(client->filp, bo_hdl); if (!gobj) { @@ -879,9 +862,26 @@ struct amdxdna_gem_obj *amdxdna_gem_get_obj(struct amdxdna_client *client, } abo = to_xdna_obj(gobj); - if (bo_type == AMDXDNA_BO_INVALID || abo->type == bo_type) + if (bo_type != AMDXDNA_BO_INVALID && abo->type != bo_type) + goto put_obj; + + if (bo_type != AMDXDNA_BO_CMD || abo->mem.kva) return abo; + if (abo->mem.size > SZ_32K) { + XDNA_ERR(xdna, "Cmd bo is too big %ld", abo->mem.size); + goto put_obj; + } + + ret = amdxdna_gem_obj_vmap(abo, &abo->mem.kva); + if (ret) { + XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); + goto put_obj; + } + + return abo; + +put_obj: drm_gem_object_put(gobj); return NULL; } -- cgit v1.2.3 From c68a6af400ca80596e8c37de0a1cb564aa9da8a4 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 5 Feb 2026 22:02:51 -0800 Subject: accel/amdxdna: Switch to always use chained command Preempt commands are only supported when submitted as chained commands. To ensure preempt support works consistently, always submit commands in chained command format. Set force_cmdlist to true so that single commands are filled using the chained command layout, enabling correct handling of preempt commands. Fixes: 3a0ff7b98af4 ("accel/amdxdna: Support preemption requests") Reviewed-by: Karol Wachowski Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260206060251.4050512-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 4503c7c77a3e..7140c3f96362 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -23,9 +23,9 @@ #include "amdxdna_pci_drv.h" #include "amdxdna_pm.h" -static bool force_cmdlist; +static bool force_cmdlist = true; module_param(force_cmdlist, bool, 0600); -MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)"); +MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default true)"); #define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */ -- cgit v1.2.3 From 8363c02863332992a1822688da41f881d88d1631 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 5 Feb 2026 22:03:06 -0800 Subject: accel/amdxdna: Fix crash when destroying a suspended hardware context If userspace issues an ioctl to destroy a hardware context that has already been automatically suspended, the driver may crash because the mailbox channel pointer is NULL for the suspended context. Fix this by checking the mailbox channel pointer in aie2_destroy_context() before accessing it. Fixes: 97f27573837e ("accel/amdxdna: Fix potential NULL pointer dereference in context cleanup") Reviewed-by: Karol Wachowski Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260206060306.4050531-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 7d7dcfeaf794..ab1178850c47 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -318,6 +318,9 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc struct amdxdna_dev *xdna = ndev->xdna; int ret; + if (!hwctx->priv->mbox_chann) + return 0; + xdna_mailbox_stop_channel(hwctx->priv->mbox_chann); ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id); xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann); -- cgit v1.2.3 From 57aa3917a3b3bd805a3679371f97a1ceda3c5510 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 10 Feb 2026 10:42:51 -0600 Subject: accel/amdxdna: Reduce log noise during process termination During process termination, several error messages are logged that are not actual errors but expected conditions when a process is killed or interrupted. This creates unnecessary noise in the kernel log. The specific scenarios are: 1. HMM invalidation returns -ERESTARTSYS when the wait is interrupted by a signal during process cleanup. This is expected when a process is being terminated and should not be logged as an error. 2. Context destruction returns -ENODEV when the firmware or device has already stopped, which commonly occurs during cleanup if the device was already torn down. This is also an expected condition during orderly shutdown. Downgrade these expected error conditions from error level to debug level to reduce log noise while still keeping genuine errors visible. Fixes: 97f27573837e ("accel/amdxdna: Fix potential NULL pointer dereference in context cleanup") Reviewed-by: Lizhi Hou Signed-off-by: Mario Limonciello Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260210164521.1094274-3-mario.limonciello@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 6 ++++-- drivers/accel/amdxdna/aie2_message.c | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 7140c3f96362..e13be7608462 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -497,7 +497,7 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx) if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) { ret = aie2_destroy_context(xdna->dev_handle, hwctx); - if (ret) + if (ret && ret != -ENODEV) XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret); } else { ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); @@ -1070,6 +1070,8 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, true, MAX_SCHEDULE_TIMEOUT); - if (!ret || ret == -ERESTARTSYS) + if (!ret) XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); + else if (ret == -ERESTARTSYS) + XDNA_DBG(xdna, "Wait for bo interrupted by signal"); } diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index ab1178850c47..5d80c5837745 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -216,8 +216,10 @@ static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, u32 id) req.context_id = id; ret = aie2_send_mgmt_msg_wait(ndev, &msg); - if (ret) + if (ret && ret != -ENODEV) XDNA_WARN(xdna, "Destroy context failed, ret %d", ret); + else if (ret == -ENODEV) + XDNA_DBG(xdna, "Destroy context: device already stopped"); return ret; } -- cgit v1.2.3 From 1aa82181a3c285c7351523d587f7981ae4c015c8 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 11 Feb 2026 12:46:44 -0800 Subject: accel/amdxdna: Fix dead lock for suspend and resume When an application issues a query IOCTL while auto suspend is running, a deadlock can occur. The query path holds dev_lock and then calls pm_runtime_resume_and_get(), which waits for the ongoing suspend to complete. Meanwhile, the suspend callback attempts to acquire dev_lock and blocks, resulting in a deadlock. Fix this by releasing dev_lock before calling pm_runtime_resume_and_get() and reacquiring it after the call completes. Also acquire dev_lock in the resume callback to keep the locking consistent. Fixes: 063db451832b ("accel/amdxdna: Enhance runtime power management") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260211204644.722758-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 4 ++-- drivers/accel/amdxdna/aie2_pci.c | 7 +++---- drivers/accel/amdxdna/aie2_pm.c | 2 +- drivers/accel/amdxdna/amdxdna_ctx.c | 19 +++++++------------ drivers/accel/amdxdna/amdxdna_pm.c | 2 ++ drivers/accel/amdxdna/amdxdna_pm.h | 11 +++++++++++ 6 files changed, 26 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index e13be7608462..8d79fafd889a 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -629,7 +629,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) goto free_entity; } - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto free_col_list; @@ -760,7 +760,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size if (!hwctx->cus) return -ENOMEM; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto free_cus; diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 2a51b2658bfc..07e369507818 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -451,7 +451,6 @@ static int aie2_hw_suspend(struct amdxdna_dev *xdna) { struct amdxdna_client *client; - guard(mutex)(&xdna->dev_lock); list_for_each_entry(client, &xdna->client_list, node) aie2_hwctx_suspend(client); @@ -951,7 +950,7 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto dev_exit; @@ -1044,7 +1043,7 @@ static int aie2_get_array(struct amdxdna_client *client, if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto dev_exit; @@ -1134,7 +1133,7 @@ static int aie2_set_state(struct amdxdna_client *client, if (!drm_dev_enter(&xdna->ddev, &idx)) return -ENODEV; - ret = amdxdna_pm_resume_get(xdna); + ret = amdxdna_pm_resume_get_locked(xdna); if (ret) goto dev_exit; diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c index 579b8be13b18..29bd4403a94d 100644 --- a/drivers/accel/amdxdna/aie2_pm.c +++ b/drivers/accel/amdxdna/aie2_pm.c @@ -31,7 +31,7 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) { int ret; - ret = amdxdna_pm_resume_get(ndev->xdna); + ret = amdxdna_pm_resume_get_locked(ndev->xdna); if (ret) return ret; diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 59fa3800b9d3..5173456bbb61 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -266,9 +266,9 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr struct amdxdna_drm_config_hwctx *args = data; struct amdxdna_dev *xdna = to_xdna_dev(dev); struct amdxdna_hwctx *hwctx; - int ret, idx; u32 buf_size; void *buf; + int ret; u64 val; if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad))) @@ -310,20 +310,17 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr return -EINVAL; } - mutex_lock(&xdna->dev_lock); - idx = srcu_read_lock(&client->hwctx_srcu); + guard(mutex)(&xdna->dev_lock); hwctx = xa_load(&client->hwctx_xa, args->handle); if (!hwctx) { XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle); ret = -EINVAL; - goto unlock_srcu; + goto free_buf; } ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size); -unlock_srcu: - srcu_read_unlock(&client->hwctx_srcu, idx); - mutex_unlock(&xdna->dev_lock); +free_buf: kfree(buf); return ret; } @@ -334,7 +331,7 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl) struct amdxdna_hwctx *hwctx; struct amdxdna_gem_obj *abo; struct drm_gem_object *gobj; - int ret, idx; + int ret; if (!xdna->dev_info->ops->hwctx_sync_debug_bo) return -EOPNOTSUPP; @@ -345,17 +342,15 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl) abo = to_xdna_obj(gobj); guard(mutex)(&xdna->dev_lock); - idx = srcu_read_lock(&client->hwctx_srcu); hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx); if (!hwctx) { ret = -EINVAL; - goto unlock_srcu; + goto put_obj; } ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl); -unlock_srcu: - srcu_read_unlock(&client->hwctx_srcu, idx); +put_obj: drm_gem_object_put(gobj); return ret; } diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c index d024d480521c..b1fafddd7ad5 100644 --- a/drivers/accel/amdxdna/amdxdna_pm.c +++ b/drivers/accel/amdxdna/amdxdna_pm.c @@ -16,6 +16,7 @@ int amdxdna_pm_suspend(struct device *dev) struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); int ret = -EOPNOTSUPP; + guard(mutex)(&xdna->dev_lock); if (xdna->dev_info->ops->suspend) ret = xdna->dev_info->ops->suspend(xdna); @@ -28,6 +29,7 @@ int amdxdna_pm_resume(struct device *dev) struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); int ret = -EOPNOTSUPP; + guard(mutex)(&xdna->dev_lock); if (xdna->dev_info->ops->resume) ret = xdna->dev_info->ops->resume(xdna); diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/amdxdna/amdxdna_pm.h index 77b2d6e45570..3d26b973e0e3 100644 --- a/drivers/accel/amdxdna/amdxdna_pm.h +++ b/drivers/accel/amdxdna/amdxdna_pm.h @@ -15,4 +15,15 @@ void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna); void amdxdna_pm_init(struct amdxdna_dev *xdna); void amdxdna_pm_fini(struct amdxdna_dev *xdna); +static inline int amdxdna_pm_resume_get_locked(struct amdxdna_dev *xdna) +{ + int ret; + + mutex_unlock(&xdna->dev_lock); + ret = amdxdna_pm_resume_get(xdna); + mutex_lock(&xdna->dev_lock); + + return ret; +} + #endif /* _AMDXDNA_PM_H_ */ -- cgit v1.2.3 From fdb65acfe655f844ae1e88696b9656d3ef5bb8fb Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 11 Feb 2026 12:47:16 -0800 Subject: accel/amdxdna: Fix suspend failure after enabling turbo mode Enabling turbo mode disables hardware clock gating. Suspend requires hardware clock gating to be re-enabled, otherwise suspend will fail. Fix this by calling aie2_runtime_cfg() from aie2_hw_stop() to re-enable clock gating during suspend. Also ensure that firmware is initialized in aie2_hw_start() before modifying clock-gating settings during resume. Fixes: f4d7b8a6bc8c ("accel/amdxdna: Enhance power management settings") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260211204716.722788-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_pci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 07e369507818..4b3e6bb97bd2 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -323,6 +323,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna) return; } + aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL); aie2_mgmt_fw_fini(ndev); xdna_mailbox_stop_channel(ndev->mgmt_chann); xdna_mailbox_destroy_channel(ndev->mgmt_chann); @@ -406,15 +407,15 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) goto stop_psp; } - ret = aie2_pm_init(ndev); + ret = aie2_mgmt_fw_init(ndev); if (ret) { - XDNA_ERR(xdna, "failed to init pm, ret %d", ret); + XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret); goto destroy_mgmt_chann; } - ret = aie2_mgmt_fw_init(ndev); + ret = aie2_pm_init(ndev); if (ret) { - XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret); + XDNA_ERR(xdna, "failed to init pm, ret %d", ret); goto destroy_mgmt_chann; } -- cgit v1.2.3 From 07efce5a6611af6714ea3ef65694e0c8dd7e44f5 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 11 Feb 2026 12:53:41 -0800 Subject: accel/amdxdna: Fix command hang on suspended hardware context When a hardware context is suspended, the job scheduler is stopped. If a command is submitted while the context is suspended, the job is queued in the scheduler but aie2_sched_job_run() is never invoked to restart the hardware context. As a result, the command hangs. Fix this by modifying the hardware context suspend routine to keep the job scheduler running so that queued jobs can trigger context restart properly. Fixes: aac243092b70 ("accel/amdxdna: Add command execution") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260211205341.722982-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_ctx.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 8d79fafd889a..25845bd5e507 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -53,6 +53,7 @@ static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwct { drm_sched_stop(&hwctx->priv->sched, bad_job); aie2_destroy_context(xdna->dev_handle, hwctx); + drm_sched_start(&hwctx->priv->sched, 0); } static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) @@ -80,7 +81,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw } out: - drm_sched_start(&hwctx->priv->sched, 0); XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); return ret; } @@ -297,19 +297,23 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; int ret; - if (!hwctx->priv->mbox_chann) + ret = amdxdna_pm_resume_get(hwctx->client->xdna); + if (ret) + return NULL; + + if (!hwctx->priv->mbox_chann) { + amdxdna_pm_suspend_put(hwctx->client->xdna); return NULL; + } - if (!mmget_not_zero(job->mm)) + if (!mmget_not_zero(job->mm)) { + amdxdna_pm_suspend_put(hwctx->client->xdna); return ERR_PTR(-ESRCH); + } kref_get(&job->refcnt); fence = dma_fence_get(job->fence); - ret = amdxdna_pm_resume_get(hwctx->client->xdna); - if (ret) - goto out; - if (job->drv_cmd) { switch (job->drv_cmd->opcode) { case SYNC_DEBUG_BO: -- cgit v1.2.3 From 1110a949675ebd56b3f0286e664ea543f745801c Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Tue, 17 Feb 2026 10:54:15 -0800 Subject: accel/amdxdna: Fix out-of-bounds memset in command slot handling The remaining space in a command slot may be smaller than the size of the command header. Clearing the command header with memset() before verifying the available slot space can result in an out-of-bounds write and memory corruption. Fix this by moving the memset() call after the size validation. Fixes: 3d32eb7a5ecf ("accel/amdxdna: Fix cu_idx being cleared by memset() during command setup") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260217185415.1781908-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 5d80c5837745..277a27bce850 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -699,11 +699,11 @@ aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *siz u32 cmd_len; void *cmd; - memset(npu_slot, 0, sizeof(*npu_slot)); cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); if (*size < sizeof(*npu_slot) + cmd_len) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; @@ -724,7 +724,6 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si u32 cmd_len; u32 arg_sz; - memset(npu_slot, 0, sizeof(*npu_slot)); sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*sn); if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -733,6 +732,7 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; @@ -756,7 +756,6 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t u32 cmd_len; u32 arg_sz; - memset(npu_slot, 0, sizeof(*npu_slot)); pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*pd); if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -765,6 +764,7 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo); if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; @@ -792,7 +792,6 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si u32 cmd_len; u32 arg_sz; - memset(npu_slot, 0, sizeof(*npu_slot)); pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*pd); if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -801,6 +800,7 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; + memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->type = EXEC_NPU_TYPE_ELF; npu_slot->inst_buf_addr = pd->inst_buf; npu_slot->save_buf_addr = pd->save_buf; -- cgit v1.2.3 From 03808abb1d868aed7478a11a82e5bb4b3f1ca6d6 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Tue, 17 Feb 2026 11:28:15 -0800 Subject: accel/amdxdna: Prevent ubuf size overflow The ubuf size calculation may overflow, resulting in an undersized allocation and possible memory corruption. Use check_add_overflow() helpers to validate the size calculation before allocation. Fixes: bd72d4acda10 ("accel/amdxdna: Support user space allocated buffer") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260217192815.1784689-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/amdxdna_ubuf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.c b/drivers/accel/amdxdna/amdxdna_ubuf.c index b509f10b155c..fb71d6e3f44d 100644 --- a/drivers/accel/amdxdna/amdxdna_ubuf.c +++ b/drivers/accel/amdxdna/amdxdna_ubuf.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -176,7 +177,10 @@ struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev, goto free_ent; } - exp_info.size += va_ent[i].len; + if (check_add_overflow(exp_info.size, va_ent[i].len, &exp_info.size)) { + ret = -EINVAL; + goto free_ent; + } } ubuf->nr_pages = exp_info.size >> PAGE_SHIFT; -- cgit v1.2.3 From 901ec3470994006bc8dd02399e16b675566c3416 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 19 Feb 2026 13:19:46 -0800 Subject: accel/amdxdna: Validate command buffer payload count The count field in the command header is used to determine the valid payload size. Verify that the valid payload does not exceed the remaining buffer space. Fixes: aac243092b70 ("accel/amdxdna: Add command execution") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260219211946.1920485-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/amdxdna_ctx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 5173456bbb61..263d36072540 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -104,7 +104,10 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size) if (size) { count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header); - if (unlikely(count <= num_masks)) { + if (unlikely(count <= num_masks || + count * sizeof(u32) + + offsetof(struct amdxdna_cmd, data[0]) > + abo->mem.size)) { *size = 0; return NULL; } -- cgit v1.2.3 From e7e222ad73d93fe54d6e6e3a15253a0ecf081a1b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 11 Feb 2026 17:31:23 -0700 Subject: cxl: Move devm_cxl_add_nvdimm_bridge() to cxl_pmem.ko Moving the symbol devm_cxl_add_nvdimm_bridge() to drivers/cxl/cxl_pmem.c, so that cxl_pmem can export a symbol that gives cxl_acpi a depedency on cxl_pmem kernel module. This is a prepatory patch to resolve the issue of a race for nvdimm_bus object that is created during cxl_acpi_probe(). No functional changes besides moving code. Suggested-by: Dan Williams Acked-by: Ira Weiny Tested-by: Alison Schofield Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20260205001633.1813643-2-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pmem.c | 13 +++---------- drivers/cxl/cxl.h | 2 ++ drivers/cxl/pmem.c | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 3c6e76721522..b652e3486038 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -115,15 +115,8 @@ static void unregister_nvb(void *_cxl_nvb) device_unregister(&cxl_nvb->dev); } -/** - * devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology - * @host: platform firmware root device - * @port: CXL port at the root of a CXL topology - * - * Return: bridge device that can host cxl_nvdimm objects - */ -struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, - struct cxl_port *port) +struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host, + struct cxl_port *port) { struct cxl_nvdimm_bridge *cxl_nvb; struct device *dev; @@ -155,7 +148,7 @@ err: put_device(dev); return ERR_PTR(rc); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm_bridge, "CXL"); +EXPORT_SYMBOL_FOR_MODULES(__devm_cxl_add_nvdimm_bridge, "cxl_pmem"); static void cxl_nvdimm_release(struct device *dev) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 04c673e7cdb0..f5850800f400 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -920,6 +920,8 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv); struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev); struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_port *port); +struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host, + struct cxl_port *port); struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port, diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 6a97e4e490b6..c67b30b516bf 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -13,6 +13,20 @@ static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); +/** + * __devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology + * @host: platform firmware root device + * @port: CXL port at the root of a CXL topology + * + * Return: bridge device that can host cxl_nvdimm objects + */ +struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, + struct cxl_port *port) +{ + return __devm_cxl_add_nvdimm_bridge(host, port); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm_bridge, "CXL"); + static void clear_exclusive(void *mds) { clear_exclusive_cxl_commands(mds, exclusive_cmds); -- cgit v1.2.3 From 43d37df67f7770d8d261fdcb64ecc8c314e91303 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Feb 2026 14:30:59 -0800 Subject: drm/xe/wa: Steer RMW of MCR registers while building default LRC When generating the default LRC, if a register is not masked, we apply any save-restore programming necessary via a read-modify-write sequence that will ensure we only update the relevant bits/fields without clobbering the rest of the register. However some of the registers that need to be updated might be MCR registers which require steering to a non-terminated instance to ensure we can read back a valid, non-zero value. The steering of reads originating from a command streamer is controlled by register CS_MMIO_GROUP_INSTANCE_SELECT. Emit additional MI_LRI commands to update the steering before any RMW of an MCR register to ensure the reads are performed properly. Note that needing to perform a RMW of an MCR register while building the default LRC is pretty rare. Most of the MCR registers that are part of an engine's LRCs are also masked registers, so no MCR is necessary. Fixes: f2f90989ccff ("drm/xe: Avoid reading RMW registers in emit_wa_job") Cc: Michal Wajdeczko Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260206223058.387014-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper (cherry picked from commit 6c2e331c915ba9e774aa847921262805feb00863) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 6 +++ drivers/gpu/drm/xe/xe_gt.c | 66 ++++++++++++++++++++++++++------ 2 files changed, 60 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 68172b0248a6..dc5a4fafa70c 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -96,6 +96,12 @@ #define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13) #define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED) + +#define CS_MMIO_GROUP_INSTANCE_SELECT(base) XE_REG((base) + 0xcc) +#define SELECTIVE_READ_ADDRESSING REG_BIT(30) +#define SELECTIVE_READ_GROUP REG_GENMASK(29, 23) +#define SELECTIVE_READ_INSTANCE REG_GENMASK(22, 16) + /* * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. * The lsb of each can be considered a separate enabling bit for encryption. diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 9d090d0f2438..df6d04704823 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -210,11 +210,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return ret; } +/* Dwords required to emit a RMW of a register */ +#define EMIT_RMW_DW 20 + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { - struct xe_reg_sr *sr = &q->hwe->reg_lrc; + struct xe_hw_engine *hwe = q->hwe; + struct xe_reg_sr *sr = &hwe->reg_lrc; struct xe_reg_sr_entry *entry; - int count_rmw = 0, count = 0, ret; + int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret; unsigned long idx; struct xe_bb *bb; size_t bb_len = 0; @@ -224,6 +228,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) ++count; + else if (entry->reg.mcr) + ++count_rmw_mcr; else ++count_rmw; } @@ -231,17 +237,35 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (count) bb_len += count * 2 + 1; - if (count_rmw) - bb_len += count_rmw * 20 + 7; + /* + * RMW of MCR registers is the same as a normal RMW, except an + * additional LRI (3 dwords) is required per register to steer the read + * to a nom-terminated instance. + * + * We could probably shorten the batch slightly by eliding the + * steering for consecutive MCR registers that have the same + * group/instance target, but it's not worth the extra complexity to do + * so. + */ + bb_len += count_rmw * EMIT_RMW_DW; + bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3); + + /* + * After doing all RMW, we need 7 trailing dwords to clean up, + * plus an additional 3 dwords to reset steering if any of the + * registers were MCR. + */ + if (count_rmw || count_rmw_mcr) + bb_len += 7 + (count_rmw_mcr ? 3 : 0); - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + if (hwe->class == XE_ENGINE_CLASS_RENDER) /* * Big enough to emit all of the context's 3DSTATE via * xe_lrc_emit_hwe_state_instructions() */ - bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32); - xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len); bb = xe_bb_new(gt, bb_len, false); if (IS_ERR(bb)) @@ -276,13 +300,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) } } - if (count_rmw) { - /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ - + if (count_rmw || count_rmw_mcr) { xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) continue; + if (entry->reg.mcr) { + struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw }; + u8 group, instance; + + xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr; + *cs++ = SELECTIVE_READ_ADDRESSING | + REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) | + REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance); + } + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; *cs++ = entry->reg.addr; *cs++ = CS_GPR_REG(0, 0).addr; @@ -308,8 +342,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) *cs++ = CS_GPR_REG(0, 0).addr; *cs++ = entry->reg.addr; - xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", - entry->reg.addr, entry->clr_bits, entry->set_bits); + xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n", + entry->reg.addr, entry->clr_bits, entry->set_bits, + entry->reg.mcr ? " (MCR)" : ""); } /* reset used GPR */ @@ -321,6 +356,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) *cs++ = 0; *cs++ = CS_GPR_REG(0, 2).addr; *cs++ = 0; + + /* reset steering */ + if (count_rmw_mcr) { + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr; + *cs++ = 0; + } } cs = xe_lrc_emit_hwe_state_instructions(q, cs); -- cgit v1.2.3 From 7dff99b354601dd01829e1511711846e04340a69 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 23 Feb 2026 11:18:48 -0800 Subject: Remove WARN_ALL_UNSEEDED_RANDOM kernel config option This config option goes way back - it used to be an internal debug option to random.c (at that point called DEBUG_RANDOM_BOOT), then was renamed and exposed as a config option as CONFIG_WARN_UNSEEDED_RANDOM, and then further renamed to the current CONFIG_WARN_ALL_UNSEEDED_RANDOM. It was all done with the best of intentions: the more limited rate-limited reports were reporting some cases, but if you wanted to see all the gory details, you'd enable this "ALL" option. However, it turns out - perhaps not surprisingly - that when people don't care about and fix the first rate-limited cases, they most certainly don't care about any others either, and so warning about all of them isn't actually helping anything. And the non-ratelimited reporting causes problems, where well-meaning people enable debug options, but the excessive flood of messages that nobody cares about will hide actual real information when things go wrong. I just got a kernel bug report (which had nothing to do with randomness) where two thirds of the the truncated dmesg was just variations of random: get_random_u32 called from __get_random_u32_below+0x10/0x70 with crng_init=0 and in the process early boot messages had been lost (in addition to making the messages that _hadn't_ been lost harder to read). The proper way to find these things for the hypothetical developer that cares - if such a person exists - is almost certainly with boot time tracing. That gives you the option to get call graphs etc too, which is likely a requirement for fixing any problems anyway. See Documentation/trace/boottime-trace.rst for that option. And if we for some reason do want to re-introduce actual printing of these things, it will need to have some uniqueness filtering rather than this "just print it all" model. Fixes: cc1e127bfa95 ("random: remove ratelimiting for in-kernel unseeded randomness") Acked-by: Jason Donenfeld Signed-off-by: Linus Torvalds --- drivers/char/random.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/char/random.c b/drivers/char/random.c index dcd002e1b890..7ff4d29911fd 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -96,8 +96,7 @@ static ATOMIC_NOTIFIER_HEAD(random_ready_notifier); /* Control how we warn userspace. */ static struct ratelimit_state urandom_warning = RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE); -static int ratelimit_disable __read_mostly = - IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); +static int ratelimit_disable __read_mostly = 0; module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); @@ -168,12 +167,6 @@ int __cold execute_with_initialized_rng(struct notifier_block *nb) return ret; } -#define warn_unseeded_randomness() \ - if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \ - printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \ - __func__, (void *)_RET_IP_, crng_init) - - /********************************************************************* * * Fast key erasure RNG, the "crng". @@ -434,7 +427,6 @@ static void _get_random_bytes(void *buf, size_t len) */ void get_random_bytes(void *buf, size_t len) { - warn_unseeded_randomness(); _get_random_bytes(buf, len); } EXPORT_SYMBOL(get_random_bytes); @@ -523,8 +515,6 @@ type get_random_ ##type(void) \ struct batch_ ##type *batch; \ unsigned long next_gen; \ \ - warn_unseeded_randomness(); \ - \ if (!crng_ready()) { \ _get_random_bytes(&ret, sizeof(ret)); \ return ret; \ -- cgit v1.2.3 From 16cb1a64dce56708a1684bf755ad52fb52c41f87 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Feb 2026 13:12:00 +0100 Subject: RDMA/uverbs: select CONFIG_DMA_SHARED_BUFFER The addition of dmabuf support in uverbs means that it is no longer possible to build infiniband support if that is disabled: arm-linux-gnueabi-ld: drivers/infiniband/core/ib_core_uverbs.o: in function `rdma_user_mmap_entry_remove.part.0': ib_core_uverbs.c:(.text+0x508): undefined reference to `dma_buf_move_notify' (dma_buf_move_notify): Unknown destination type (ARM/Thumb) in drivers/infiniband/core/ib_core_uverbs.o ib_core_uverbs.c:(.text+0x518): undefined reference to `dma_resv_wait_timeout' (dma_resv_wait_timeout): Unknown destination type (ARM/Thumb) in drivers/infiniband/core/ib_core_uverbs.o Select this from Kconfig, as we do for the other users. Fixes: 0ac6f4056c4a ("RDMA/uverbs: Add DMABUF object type and operations") Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20260216121213.2088910-1-arnd@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 794b9778816b..78ac2ff5befd 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -6,6 +6,7 @@ menuconfig INFINIBAND depends on INET depends on m || IPV6 != m depends on !ALPHA + select DMA_SHARED_BUFFER select IRQ_POLL select DIMLIB help -- cgit v1.2.3 From 5c4e9a8b18457ad28b57069ef0f14661e3192b2e Mon Sep 17 00:00:00 2001 From: Jinwang Li Date: Thu, 5 Feb 2026 14:26:00 +0800 Subject: Bluetooth: hci_qca: Cleanup on all setup failures The setup process previously combined error handling and retry gating under one condition. As a result, the final failed attempt exited without performing cleanup. Update the failure path to always perform power and port cleanup on setup failure, and reopen the port only when retrying. Fixes: 9e80587aba4c ("Bluetooth: hci_qca: Enhance retry logic in qca_setup") Signed-off-by: Jinwang Li Reviewed-by: Bartosz Golaszewski Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index c511546f793e..c6de2602b49d 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2046,19 +2046,23 @@ retry: } out: - if (ret && retries < MAX_INIT_RETRIES) { - bt_dev_warn(hdev, "Retry BT power ON:%d", retries); + if (ret) { qca_power_shutdown(hu); - if (hu->serdev) { - serdev_device_close(hu->serdev); - ret = serdev_device_open(hu->serdev); - if (ret) { - bt_dev_err(hdev, "failed to open port"); - return ret; + + if (retries < MAX_INIT_RETRIES) { + bt_dev_warn(hdev, "Retry BT power ON:%d", retries); + if (hu->serdev) { + serdev_device_close(hu->serdev); + ret = serdev_device_open(hu->serdev); + if (ret) { + bt_dev_err(hdev, "failed to open port"); + return ret; + } } + retries++; + goto retry; } - retries++; - goto retry; + return ret; } /* Setup bdaddr */ -- cgit v1.2.3 From 1bfd7575092420ba5a0b944953c95b74a5646ff8 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 19 Feb 2026 23:35:18 +0000 Subject: drm/xe/sync: Cleanup partially initialized sync on parse failure xe_sync_entry_parse() can allocate references (syncobj, fence, chain fence, or user fence) before hitting a later failure path. Several of those paths returned directly, leaving partially initialized state and leaking refs. Route these error paths through a common free_sync label and call xe_sync_entry_cleanup(sync) before returning the error. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260219233516.2938172-5-shuicheng.lin@intel.com (cherry picked from commit f939bdd9207a5d1fc55cced5459858480686ce22) Cc: stable@vger.kernel.org Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sync.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index eb136390dafd..ebf6c96d7a41 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -146,8 +146,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (!signal) { sync->fence = drm_syncobj_fence_get(sync->syncobj); - if (XE_IOCTL_DBG(xe, !sync->fence)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, !sync->fence)) { + err = -EINVAL; + goto free_sync; + } } break; @@ -167,17 +169,21 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (signal) { sync->chain_fence = dma_fence_chain_alloc(); - if (!sync->chain_fence) - return -ENOMEM; + if (!sync->chain_fence) { + err = -ENOMEM; + goto free_sync; + } } else { sync->fence = drm_syncobj_fence_get(sync->syncobj); - if (XE_IOCTL_DBG(xe, !sync->fence)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, !sync->fence)) { + err = -EINVAL; + goto free_sync; + } err = dma_fence_chain_find_seqno(&sync->fence, sync_in.timeline_value); if (err) - return err; + goto free_sync; } break; @@ -216,6 +222,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, sync->timeline_value = sync_in.timeline_value; return 0; + +free_sync: + xe_sync_entry_cleanup(sync); + return err; } ALLOW_ERROR_INJECTION(xe_sync_entry_parse, ERRNO); -- cgit v1.2.3 From 0879c3f04f67e2a1677c25dcc24669ce21eb6a6c Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 19 Feb 2026 23:35:19 +0000 Subject: drm/xe/sync: Fix user fence leak on alloc failure When dma_fence_chain_alloc() fails, properly release the user fence reference to prevent a memory leak. Fixes: 0995c2fc39b0 ("drm/xe: Enforce correct user fence signaling order using") Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260219233516.2938172-6-shuicheng.lin@intel.com (cherry picked from commit a5d5634cde48a9fcd68c8504aa07f89f175074a0) Cc: stable@vger.kernel.org Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sync.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index ebf6c96d7a41..24d6d9af20d6 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -206,8 +206,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) return PTR_ERR(sync->ufence); sync->ufence_chain_fence = dma_fence_chain_alloc(); - if (!sync->ufence_chain_fence) - return -ENOMEM; + if (!sync->ufence_chain_fence) { + err = -ENOMEM; + goto free_sync; + } sync->ufence_syncobj = ufence_syncobj; } -- cgit v1.2.3 From eddb98ad9364b4e778768785d46cfab04ce52100 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 20 Feb 2026 13:43:00 +0900 Subject: ata: libata-eh: correctly handle deferred qc timeouts A deferred qc may timeout while waiting for the device queue to drain to be submitted. In such case, since the qc is not active, ata_scsi_cmd_error_handler() ends up calling scsi_eh_finish_cmd(), which frees the qc. But as the port deferred_qc field still references this finished/freed qc, the deferred qc work may eventually attempt to call ata_qc_issue() against this invalid qc, leading to errors such as reported by UBSAN (syzbot run): UBSAN: shift-out-of-bounds in drivers/ata/libata-core.c:5166:24 shift exponent 4210818301 is too large for 64-bit type 'long long unsigned int' ... Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x100/0x190 lib/dump_stack.c:120 ubsan_epilogue+0xa/0x30 lib/ubsan.c:233 __ubsan_handle_shift_out_of_bounds+0x279/0x2a0 lib/ubsan.c:494 ata_qc_issue.cold+0x38/0x9f drivers/ata/libata-core.c:5166 ata_scsi_deferred_qc_work+0x154/0x1f0 drivers/ata/libata-scsi.c:1679 process_one_work+0x9d7/0x1920 kernel/workqueue.c:3275 process_scheduled_works kernel/workqueue.c:3358 [inline] worker_thread+0x5da/0xe40 kernel/workqueue.c:3439 kthread+0x370/0x450 kernel/kthread.c:467 ret_from_fork+0x754/0xd80 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Fix this by checking if the qc of a timed out SCSI command is a deferred one, and in such case, clear the port deferred_qc field and finish the SCSI command with DID_TIME_OUT. Reported-by: syzbot+1f77b8ca15336fff21ff@syzkaller.appspotmail.com Fixes: 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Igor Pylypiv --- drivers/ata/libata-eh.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 72a22b6c9682..b373cceb95d2 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -640,12 +640,28 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, set_host_byte(scmd, DID_OK); ata_qc_for_each_raw(ap, qc, i) { - if (qc->flags & ATA_QCFLAG_ACTIVE && - qc->scsicmd == scmd) + if (qc->scsicmd != scmd) + continue; + if ((qc->flags & ATA_QCFLAG_ACTIVE) || + qc == ap->deferred_qc) break; } - if (i < ATA_MAX_QUEUE) { + if (qc == ap->deferred_qc) { + /* + * This is a deferred command that timed out while + * waiting for the command queue to drain. Since the qc + * is not active yet (deferred_qc is still set, so the + * deferred qc work has not issued the command yet), + * simply signal the timeout by finishing the SCSI + * command and clear the deferred qc to prevent the + * deferred qc work from issuing this qc. + */ + WARN_ON_ONCE(qc->flags & ATA_QCFLAG_ACTIVE); + ap->deferred_qc = NULL; + set_host_byte(scmd, DID_TIME_OUT); + scsi_eh_finish_cmd(scmd, &ap->eh_done_q); + } else if (i < ATA_MAX_QUEUE) { /* the scmd has an associated qc */ if (!(qc->flags & ATA_QCFLAG_EH)) { /* which hasn't failed yet, timeout */ -- cgit v1.2.3 From 55db009926634b20955bd8abbee921adbc8d2cb4 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 20 Feb 2026 12:09:12 +0900 Subject: ata: libata-core: fix cancellation of a port deferred qc work cancel_work_sync() is a sleeping function so it cannot be called with the spin lock of a port being held. Move the call to this function in ata_port_detach() after EH completes, with the port lock released, together with other work cancellation calls. Fixes: 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Igor Pylypiv --- drivers/ata/libata-core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 11909417f017..ccbf320524da 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6269,10 +6269,6 @@ static void ata_port_detach(struct ata_port *ap) } } - /* Make sure the deferred qc work finished. */ - cancel_work_sync(&ap->deferred_qc_work); - WARN_ON(ap->deferred_qc); - /* Tell EH to disable all devices */ ap->pflags |= ATA_PFLAG_UNLOADING; ata_port_schedule_eh(ap); @@ -6283,9 +6279,11 @@ static void ata_port_detach(struct ata_port *ap) /* wait till EH commits suicide */ ata_port_wait_eh(ap); - /* it better be dead now */ + /* It better be dead now and not have any remaining deferred qc. */ WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED)); + WARN_ON(ap->deferred_qc); + cancel_work_sync(&ap->deferred_qc_work); cancel_delayed_work_sync(&ap->hotplug_task); cancel_delayed_work_sync(&ap->scsi_rescan_task); -- cgit v1.2.3 From fb868db5f4bccd7a78219313ab2917429f715cea Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Fri, 20 Feb 2026 13:53:24 -0800 Subject: gve: fix incorrect buffer cleanup in gve_tx_clean_pending_packets for QPL In DQ-QPL mode, gve_tx_clean_pending_packets() incorrectly uses the RDA buffer cleanup path. It iterates num_bufs times and attempts to unmap entries in the dma array. This leads to two issues: 1. The dma array shares storage with tx_qpl_buf_ids (union). Interpreting buffer IDs as DMA addresses results in attempting to unmap incorrect memory locations. 2. num_bufs in QPL mode (counting 2K chunks) can significantly exceed the size of the dma array, causing out-of-bounds access warnings (trace below is how we noticed this issue). UBSAN: array-index-out-of-bounds in drivers/net/ethernet/drivers/net/ethernet/google/gve/gve_tx_dqo.c:178:5 index 18 is out of range for type 'dma_addr_t[18]' (aka 'unsigned long long[18]') Workqueue: gve gve_service_task [gve] Call Trace: dump_stack_lvl+0x33/0xa0 __ubsan_handle_out_of_bounds+0xdc/0x110 gve_tx_stop_ring_dqo+0x182/0x200 [gve] gve_close+0x1be/0x450 [gve] gve_reset+0x99/0x120 [gve] gve_service_task+0x61/0x100 [gve] process_scheduled_works+0x1e9/0x380 Fix this by properly checking for QPL mode and delegating to gve_free_tx_qpl_bufs() to reclaim the buffers. Cc: stable@vger.kernel.org Fixes: a6fb8d5a8b69 ("gve: Tx path for DQO-QPL") Signed-off-by: Ankit Garg Reviewed-by: Jordan Rhee Reviewed-by: Harshitha Ramamurthy Signed-off-by: Joshua Washington Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260220215324.1631350-1-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 56 +++++++++++++--------------- 1 file changed, 25 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 28e85730f785..b57e8f13cb51 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -167,6 +167,25 @@ gve_free_pending_packet(struct gve_tx_ring *tx, } } +static void gve_unmap_packet(struct device *dev, + struct gve_tx_pending_packet_dqo *pkt) +{ + int i; + + if (!pkt->num_bufs) + return; + + /* SKB linear portion is guaranteed to be mapped */ + dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), + dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); + for (i = 1; i < pkt->num_bufs; i++) { + netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), + dma_unmap_len(pkt, len[i]), + DMA_TO_DEVICE, 0); + } + pkt->num_bufs = 0; +} + /* gve_tx_free_desc - Cleans up all pending tx requests and buffers. */ static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx) @@ -176,21 +195,12 @@ static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx) for (i = 0; i < tx->dqo.num_pending_packets; i++) { struct gve_tx_pending_packet_dqo *cur_state = &tx->dqo.pending_packets[i]; - int j; - - for (j = 0; j < cur_state->num_bufs; j++) { - if (j == 0) { - dma_unmap_single(tx->dev, - dma_unmap_addr(cur_state, dma[j]), - dma_unmap_len(cur_state, len[j]), - DMA_TO_DEVICE); - } else { - dma_unmap_page(tx->dev, - dma_unmap_addr(cur_state, dma[j]), - dma_unmap_len(cur_state, len[j]), - DMA_TO_DEVICE); - } - } + + if (tx->dqo.qpl) + gve_free_tx_qpl_bufs(tx, cur_state); + else + gve_unmap_packet(tx->dev, cur_state); + if (cur_state->skb) { dev_consume_skb_any(cur_state->skb); cur_state->skb = NULL; @@ -1157,22 +1167,6 @@ static void remove_from_list(struct gve_tx_ring *tx, } } -static void gve_unmap_packet(struct device *dev, - struct gve_tx_pending_packet_dqo *pkt) -{ - int i; - - /* SKB linear portion is guaranteed to be mapped */ - dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), - dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); - for (i = 1; i < pkt->num_bufs; i++) { - netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), - dma_unmap_len(pkt, len[i]), - DMA_TO_DEVICE, 0); - } - pkt->num_bufs = 0; -} - /* Completion types and expected behavior: * No Miss compl + Packet compl = Packet completed normally. * Miss compl + Re-inject compl = Packet completed normally. -- cgit v1.2.3 From 4cfe066a82cdf9e83e48b16000f55280efc98325 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 20 Feb 2026 16:57:54 +0100 Subject: dpll: zl3073x: fix REF_PHASE_OFFSET_COMP register width for some chip IDs The REF_PHASE_OFFSET_COMP register is 48-bit wide on most zl3073x chip variants, but only 32-bit wide on chip IDs 0x0E30, 0x0E93..0x0E97 and 0x1F60. The driver unconditionally uses 48-bit read/write operations, which on 32-bit variants causes reading 2 bytes past the register boundary (corrupting the value) and writing 2 bytes into the adjacent register. Fix this by storing the chip ID in the device structure during probe and adding a helper to detect the affected variants. Use the correct register width for read/write operations and the matching sign extension bit (31 vs 47) when interpreting the phase compensation value. Fixes: 6287262f761e ("dpll: zl3073x: Add support to adjust phase") Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260220155755.448185-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/core.c | 1 + drivers/dpll/zl3073x/core.h | 28 ++++++++++++++++++++++++++++ drivers/dpll/zl3073x/dpll.c | 7 +++++-- drivers/dpll/zl3073x/ref.c | 25 ++++++++++++++++++++----- drivers/dpll/zl3073x/regs.h | 1 + 5 files changed, 55 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 63bd97181b9e..8c5ee28e02f4 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -1026,6 +1026,7 @@ int zl3073x_dev_probe(struct zl3073x_dev *zldev, "Unknown or non-match chip ID: 0x%0x\n", id); } + zldev->chip_id = id; /* Read revision, firmware version and custom config version */ rc = zl3073x_read_u16(zldev, ZL_REG_REVISION, &revision); diff --git a/drivers/dpll/zl3073x/core.h b/drivers/dpll/zl3073x/core.h index dddfcacea5c0..fd2af3c62a7d 100644 --- a/drivers/dpll/zl3073x/core.h +++ b/drivers/dpll/zl3073x/core.h @@ -35,6 +35,7 @@ struct zl3073x_dpll; * @dev: pointer to device * @regmap: regmap to access device registers * @multiop_lock: to serialize multiple register operations + * @chip_id: chip ID read from hardware * @ref: array of input references' invariants * @out: array of outs' invariants * @synth: array of synths' invariants @@ -48,6 +49,7 @@ struct zl3073x_dev { struct device *dev; struct regmap *regmap; struct mutex multiop_lock; + u16 chip_id; /* Invariants */ struct zl3073x_ref ref[ZL3073X_NUM_REFS]; @@ -144,6 +146,32 @@ int zl3073x_write_hwreg_seq(struct zl3073x_dev *zldev, int zl3073x_ref_phase_offsets_update(struct zl3073x_dev *zldev, int channel); +/** + * zl3073x_dev_is_ref_phase_comp_32bit - check ref phase comp register size + * @zldev: pointer to zl3073x device + * + * Some chip IDs have a 32-bit wide ref_phase_offset_comp register instead + * of the default 48-bit. + * + * Return: true if the register is 32-bit, false if 48-bit + */ +static inline bool +zl3073x_dev_is_ref_phase_comp_32bit(struct zl3073x_dev *zldev) +{ + switch (zldev->chip_id) { + case 0x0E30: + case 0x0E93: + case 0x0E94: + case 0x0E95: + case 0x0E96: + case 0x0E97: + case 0x1F60: + return true; + default: + return false; + } +} + static inline bool zl3073x_is_n_pin(u8 id) { diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 78edc36b17fb..8ffbede117c6 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -475,8 +475,11 @@ zl3073x_dpll_input_pin_phase_adjust_get(const struct dpll_pin *dpll_pin, ref_id = zl3073x_input_pin_ref_get(pin->id); ref = zl3073x_ref_state_get(zldev, ref_id); - /* Perform sign extension for 48bit signed value */ - phase_comp = sign_extend64(ref->phase_comp, 47); + /* Perform sign extension based on register width */ + if (zl3073x_dev_is_ref_phase_comp_32bit(zldev)) + phase_comp = sign_extend64(ref->phase_comp, 31); + else + phase_comp = sign_extend64(ref->phase_comp, 47); /* Reverse two's complement negation applied during set and convert * to 32bit signed int diff --git a/drivers/dpll/zl3073x/ref.c b/drivers/dpll/zl3073x/ref.c index aa2de13effa8..6b65e6103999 100644 --- a/drivers/dpll/zl3073x/ref.c +++ b/drivers/dpll/zl3073x/ref.c @@ -121,8 +121,16 @@ int zl3073x_ref_state_fetch(struct zl3073x_dev *zldev, u8 index) return rc; /* Read phase compensation register */ - rc = zl3073x_read_u48(zldev, ZL_REG_REF_PHASE_OFFSET_COMP, - &ref->phase_comp); + if (zl3073x_dev_is_ref_phase_comp_32bit(zldev)) { + u32 val; + + rc = zl3073x_read_u32(zldev, ZL_REG_REF_PHASE_OFFSET_COMP_32, + &val); + ref->phase_comp = val; + } else { + rc = zl3073x_read_u48(zldev, ZL_REG_REF_PHASE_OFFSET_COMP, + &ref->phase_comp); + } if (rc) return rc; @@ -179,9 +187,16 @@ int zl3073x_ref_state_set(struct zl3073x_dev *zldev, u8 index, if (!rc && dref->sync_ctrl != ref->sync_ctrl) rc = zl3073x_write_u8(zldev, ZL_REG_REF_SYNC_CTRL, ref->sync_ctrl); - if (!rc && dref->phase_comp != ref->phase_comp) - rc = zl3073x_write_u48(zldev, ZL_REG_REF_PHASE_OFFSET_COMP, - ref->phase_comp); + if (!rc && dref->phase_comp != ref->phase_comp) { + if (zl3073x_dev_is_ref_phase_comp_32bit(zldev)) + rc = zl3073x_write_u32(zldev, + ZL_REG_REF_PHASE_OFFSET_COMP_32, + ref->phase_comp); + else + rc = zl3073x_write_u48(zldev, + ZL_REG_REF_PHASE_OFFSET_COMP, + ref->phase_comp); + } if (rc) return rc; diff --git a/drivers/dpll/zl3073x/regs.h b/drivers/dpll/zl3073x/regs.h index d837bee72b17..5573d7188406 100644 --- a/drivers/dpll/zl3073x/regs.h +++ b/drivers/dpll/zl3073x/regs.h @@ -194,6 +194,7 @@ #define ZL_REF_CONFIG_DIFF_EN BIT(2) #define ZL_REG_REF_PHASE_OFFSET_COMP ZL_REG(10, 0x28, 6) +#define ZL_REG_REF_PHASE_OFFSET_COMP_32 ZL_REG(10, 0x28, 4) #define ZL_REG_REF_SYNC_CTRL ZL_REG(10, 0x2e, 1) #define ZL_REF_SYNC_CTRL_MODE GENMASK(2, 0) -- cgit v1.2.3 From 9af0feae8016ba58ad7ff784a903404986b395b1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 27 Jan 2026 10:38:39 +0100 Subject: RDMA/core: Fix stale RoCE GIDs during netdev events at registration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RoCE GID entries become stale when netdev properties change during the IB device registration window. This is reproducible with a udev rule that sets a MAC address when a VF netdev appears: ACTION=="add", SUBSYSTEM=="net", KERNEL=="eth4", \ RUN+="/sbin/ip link set eth4 address 88:22:33:44:55:66" After VF creation, show_gids displays GIDs derived from the original random MAC rather than the configured one. The root cause is a race between netdev event processing and device registration: CPU 0 (driver) CPU 1 (udev/workqueue) ────────────── ────────────────────── ib_register_device() ib_cache_setup_one() gid_table_setup_one() _gid_table_setup_one() ← GID table allocated rdma_roce_rescan_device() ← GIDs populated with OLD MAC ip link set eth4 addr NEW_MAC NETDEV_CHANGEADDR queued netdevice_event_work_handler() ib_enum_all_roce_netdevs() ← Iterates DEVICE_REGISTERED ← Device NOT marked yet, SKIP! enable_device_and_get() xa_set_mark(DEVICE_REGISTERED) ← Too late, event was lost The netdev event handler uses ib_enum_all_roce_netdevs() which only iterates devices marked DEVICE_REGISTERED. However, this mark is set late in the registration process, after the GID cache is already populated. Events arriving in this window are silently dropped. Fix this by introducing a new xarray mark DEVICE_GID_UPDATES that is set immediately after the GID table is allocated and initialized. Use the new mark in ib_enum_all_roce_netdevs() function to iterate devices instead of DEVICE_REGISTERED. This is safe because: - After _gid_table_setup_one(), all required structures exist (port_data, immutable, cache.gid) - The GID table mutex serializes concurrent access between the initial rescan and event handlers - Event handlers correctly update stale GIDs even when racing with rescan - The mark is cleared in ib_cache_cleanup_one() before teardown This also fixes similar races for IP address events (inetaddr_event, inet6addr_event) which use the same enumeration path. Fixes: 0df91bb67334 ("RDMA/devices: Use xarray to store the client_data") Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260127093839.126291-1-jiri@resnulli.us Reported-by: syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84 Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cache.c | 13 +++++++++++++ drivers/infiniband/core/core_priv.h | 3 +++ drivers/infiniband/core/device.c | 34 +++++++++++++++++++++++++++++++++- 3 files changed, 49 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index b415d4aad04f..ee4a2bc68fb2 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -926,6 +926,13 @@ static int gid_table_setup_one(struct ib_device *ib_dev) if (err) return err; + /* + * Mark the device as ready for GID cache updates. This allows netdev + * event handlers to update the GID cache even before the device is + * fully registered. + */ + ib_device_enable_gid_updates(ib_dev); + rdma_roce_rescan_device(ib_dev); return err; @@ -1637,6 +1644,12 @@ void ib_cache_release_one(struct ib_device *device) void ib_cache_cleanup_one(struct ib_device *device) { + /* + * Clear the GID updates mark first to prevent event handlers from + * accessing the device while it's being torn down. + */ + ib_device_disable_gid_updates(device); + /* The cleanup function waits for all in-progress workqueue * elements and cleans up the GID cache. This function should be * called after the device was removed from the devices list and diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 05102769a918..a2c36666e6fc 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -100,6 +100,9 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, roce_netdev_callback cb, void *cookie); +void ib_device_enable_gid_updates(struct ib_device *device); +void ib_device_disable_gid_updates(struct ib_device *device); + typedef int (*nldev_callback)(struct ib_device *device, struct sk_buff *skb, struct netlink_callback *cb, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 1b5f1ee0a557..558b73940d66 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -93,6 +93,7 @@ static struct workqueue_struct *ib_unreg_wq; static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); static DECLARE_RWSEM(devices_rwsem); #define DEVICE_REGISTERED XA_MARK_1 +#define DEVICE_GID_UPDATES XA_MARK_2 static u32 highest_client_id; #define CLIENT_REGISTERED XA_MARK_1 @@ -2412,11 +2413,42 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, unsigned long index; down_read(&devices_rwsem); - xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) + xa_for_each_marked(&devices, index, dev, DEVICE_GID_UPDATES) ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); up_read(&devices_rwsem); } +/** + * ib_device_enable_gid_updates - Mark device as ready for GID cache updates + * @device: Device to mark + * + * Called after GID table is allocated and initialized. After this mark is set, + * netdevice event handlers can update the device's GID cache. This allows + * events that arrive during device registration to be processed, avoiding + * stale GID entries when netdev properties change during the device + * registration process. + */ +void ib_device_enable_gid_updates(struct ib_device *device) +{ + down_write(&devices_rwsem); + xa_set_mark(&devices, device->index, DEVICE_GID_UPDATES); + up_write(&devices_rwsem); +} + +/** + * ib_device_disable_gid_updates - Clear the GID updates mark + * @device: Device to unmark + * + * Called before GID table cleanup to prevent event handlers from accessing + * the device while it's being torn down. + */ +void ib_device_disable_gid_updates(struct ib_device *device) +{ + down_write(&devices_rwsem); + xa_clear_mark(&devices, device->index, DEVICE_GID_UPDATES); + up_write(&devices_rwsem); +} + /* * ib_enum_all_devs - enumerate all ib_devices * @cb: Callback to call for each found ib_device -- cgit v1.2.3 From 7a23af417d9dd57b4382356b2e7442e5d2bf5bea Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Wed, 18 Feb 2026 09:12:45 +0000 Subject: RDMA/bng_re: Remove unnessary validity checks Fix below smatch warning: drivers/infiniband/hw/bng_re/bng_dev.c:113 bng_re_net_ring_free() warn: variable dereferenced before check 'rdev' (see line 107) current driver has unnessary validity checks. So, removing these unnessary validity checks. Fixes: 4f830cd8d7fe ("RDMA/bng_re: Add infrastructure for enabling Firmware channel") Fixes: 745065770c2d ("RDMA/bng_re: Register and get the resources from bnge driver") Fixes: 04e031ff6e60 ("RDMA/bng_re: Initialize the Firmware and Hardware") Fixes: d0da769c19d0 ("RDMA/bng_re: Add Auxiliary interface") Reported-by: Simon Horman Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202601010413.sWadrQel-lkp@intel.com/ Signed-off-by: Siva Reddy Kallam Link: https://patch.msgid.link/20260218091246.1764808-2-siva.kallam@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bng_re/bng_dev.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/bng_re/bng_dev.c b/drivers/infiniband/hw/bng_re/bng_dev.c index cf3bf08e5f26..b8df807c3a64 100644 --- a/drivers/infiniband/hw/bng_re/bng_dev.c +++ b/drivers/infiniband/hw/bng_re/bng_dev.c @@ -54,9 +54,6 @@ static void bng_re_destroy_chip_ctx(struct bng_re_dev *rdev) { struct bng_re_chip_ctx *chip_ctx; - if (!rdev->chip_ctx) - return; - kfree(rdev->dev_attr); rdev->dev_attr = NULL; @@ -124,12 +121,6 @@ static int bng_re_net_ring_free(struct bng_re_dev *rdev, struct bnge_fw_msg fw_msg = {}; int rc = -EINVAL; - if (!rdev) - return rc; - - if (!aux_dev) - return rc; - bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE); req.ring_type = type; req.ring_id = cpu_to_le16(fw_ring_id); @@ -150,10 +141,7 @@ static int bng_re_net_ring_alloc(struct bng_re_dev *rdev, struct hwrm_ring_alloc_input req = {}; struct hwrm_ring_alloc_output resp; struct bnge_fw_msg fw_msg = {}; - int rc = -EINVAL; - - if (!aux_dev) - return rc; + int rc; bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC); req.enables = 0; @@ -184,10 +172,7 @@ static int bng_re_stats_ctx_free(struct bng_re_dev *rdev) struct hwrm_stat_ctx_free_input req = {}; struct hwrm_stat_ctx_free_output resp = {}; struct bnge_fw_msg fw_msg = {}; - int rc = -EINVAL; - - if (!aux_dev) - return rc; + int rc; bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE); req.stat_ctx_id = cpu_to_le32(rdev->stats_ctx.fw_id); @@ -208,13 +193,10 @@ static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev) struct hwrm_stat_ctx_alloc_output resp = {}; struct hwrm_stat_ctx_alloc_input req = {}; struct bnge_fw_msg fw_msg = {}; - int rc = -EINVAL; + int rc; stats->fw_id = BNGE_INVALID_STATS_CTX_ID; - if (!aux_dev) - return rc; - bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(stats->dma_map); @@ -486,8 +468,7 @@ static void bng_re_remove(struct auxiliary_device *adev) rdev = dev_info->rdev; - if (rdev) - bng_re_remove_device(rdev, adev); + bng_re_remove_device(rdev, adev); kfree(dev_info); } -- cgit v1.2.3 From 3d2e5d12a2eef0ca8a629a422aa593673235c77c Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Wed, 18 Feb 2026 09:12:46 +0000 Subject: RDMA/bng_re: Unwind bng_re_dev_init properly Fix below smatch warning: drivers/infiniband/hw/bng_re/bng_dev.c:270 bng_re_dev_init() warn: missing unwind goto? Current bng_re_dev_init function is not having clear unwinding code. So, added proper unwinding with ladder. Fixes: 4f830cd8d7fe ("RDMA/bng_re: Add infrastructure for enabling Firmware channel") Reported-by: Simon Horman Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202601010413.sWadrQel-lkp@intel.com/ Signed-off-by: Siva Reddy Kallam Link: https://patch.msgid.link/20260218091246.1764808-3-siva.kallam@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bng_re/bng_dev.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/bng_re/bng_dev.c b/drivers/infiniband/hw/bng_re/bng_dev.c index b8df807c3a64..d34b5f88cd40 100644 --- a/drivers/infiniband/hw/bng_re/bng_dev.c +++ b/drivers/infiniband/hw/bng_re/bng_dev.c @@ -285,7 +285,7 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) if (rc) { ibdev_err(&rdev->ibdev, "Failed to register with netedev: %#x\n", rc); - return -EINVAL; + goto reg_netdev_fail; } set_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); @@ -294,19 +294,16 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) ibdev_err(&rdev->ibdev, "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n", rdev->aux_dev->auxr_info->msix_requested); - bnge_unregister_dev(rdev->aux_dev); - clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); - return -EINVAL; + rc = -EINVAL; + goto msix_ctx_fail; } ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", rdev->aux_dev->auxr_info->msix_requested); rc = bng_re_setup_chip_ctx(rdev); if (rc) { - bnge_unregister_dev(rdev->aux_dev); - clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); ibdev_err(&rdev->ibdev, "Failed to get chip context\n"); - return -EINVAL; + goto msix_ctx_fail; } bng_re_query_hwrm_version(rdev); @@ -315,16 +312,14 @@ static int bng_re_dev_init(struct bng_re_dev *rdev) if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate RCFW Channel: %#x\n", rc); - goto fail; + goto alloc_fw_chl_fail; } /* Allocate nq record memory */ rdev->nqr = kzalloc_obj(*rdev->nqr); if (!rdev->nqr) { - bng_re_destroy_chip_ctx(rdev); - bnge_unregister_dev(rdev->aux_dev); - clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); - return -ENOMEM; + rc = -ENOMEM; + goto nq_alloc_fail; } rdev->nqr->num_msix = rdev->aux_dev->auxr_info->msix_requested; @@ -393,9 +388,15 @@ disable_rcfw: free_ring: bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type); free_rcfw: + kfree(rdev->nqr); +nq_alloc_fail: bng_re_free_rcfw_channel(&rdev->rcfw); -fail: - bng_re_dev_uninit(rdev); +alloc_fw_chl_fail: + bng_re_destroy_chip_ctx(rdev); +msix_ctx_fail: + bnge_unregister_dev(rdev->aux_dev); + clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); +reg_netdev_fail: return rc; } -- cgit v1.2.3 From bae8a5d2e759da2e0cba33ab2080deee96a09373 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 19 Feb 2026 20:46:37 +0800 Subject: net: wan: farsync: Fix use-after-free bugs caused by unfinished tasklets When the FarSync T-series card is being detached, the fst_card_info is deallocated in fst_remove_one(). However, the fst_tx_task or fst_int_task may still be running or pending, leading to use-after-free bugs when the already freed fst_card_info is accessed in fst_process_tx_work_q() or fst_process_int_work_q(). A typical race condition is depicted below: CPU 0 (cleanup) | CPU 1 (tasklet) | fst_start_xmit() fst_remove_one() | tasklet_schedule() unregister_hdlc_device()| | fst_process_tx_work_q() //handler kfree(card) //free | do_bottom_half_tx() | card-> //use The following KASAN trace was captured: ================================================================== BUG: KASAN: slab-use-after-free in do_bottom_half_tx+0xb88/0xd00 Read of size 4 at addr ffff88800aad101c by task ksoftirqd/3/32 ... Call Trace: dump_stack_lvl+0x55/0x70 print_report+0xcb/0x5d0 ? do_bottom_half_tx+0xb88/0xd00 kasan_report+0xb8/0xf0 ? do_bottom_half_tx+0xb88/0xd00 do_bottom_half_tx+0xb88/0xd00 ? _raw_spin_lock_irqsave+0x85/0xe0 ? __pfx__raw_spin_lock_irqsave+0x10/0x10 ? __pfx___hrtimer_run_queues+0x10/0x10 fst_process_tx_work_q+0x67/0x90 tasklet_action_common+0x1fa/0x720 ? hrtimer_interrupt+0x31f/0x780 handle_softirqs+0x176/0x530 __irq_exit_rcu+0xab/0xe0 sysvec_apic_timer_interrupt+0x70/0x80 ... Allocated by task 41 on cpu 3 at 72.330843s: kasan_save_stack+0x24/0x50 kasan_save_track+0x17/0x60 __kasan_kmalloc+0x7f/0x90 fst_add_one+0x1a5/0x1cd0 local_pci_probe+0xdd/0x190 pci_device_probe+0x341/0x480 really_probe+0x1c6/0x6a0 __driver_probe_device+0x248/0x310 driver_probe_device+0x48/0x210 __device_attach_driver+0x160/0x320 bus_for_each_drv+0x101/0x190 __device_attach+0x198/0x3a0 device_initial_probe+0x78/0xa0 pci_bus_add_device+0x81/0xc0 pci_bus_add_devices+0x7e/0x190 enable_slot+0x9b9/0x1130 acpiphp_check_bridge.part.0+0x2e1/0x460 acpiphp_hotplug_notify+0x36c/0x3c0 acpi_device_hotplug+0x203/0xb10 acpi_hotplug_work_fn+0x59/0x80 ... Freed by task 41 on cpu 1 at 75.138639s: kasan_save_stack+0x24/0x50 kasan_save_track+0x17/0x60 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x43/0x70 kfree+0x135/0x410 fst_remove_one+0x2ca/0x540 pci_device_remove+0xa6/0x1d0 device_release_driver_internal+0x364/0x530 pci_stop_bus_device+0x105/0x150 pci_stop_and_remove_bus_device+0xd/0x20 disable_slot+0x116/0x260 acpiphp_disable_and_eject_slot+0x4b/0x190 acpiphp_hotplug_notify+0x230/0x3c0 acpi_device_hotplug+0x203/0xb10 acpi_hotplug_work_fn+0x59/0x80 ... The buggy address belongs to the object at ffff88800aad1000 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 28 bytes inside of freed 1024-byte region The buggy address belongs to the physical page: page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xaad0 head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0x100000000000040(head|node=0|zone=1) page_type: f5(slab) raw: 0100000000000040 ffff888007042dc0 dead000000000122 0000000000000000 raw: 0000000000000000 0000000080100010 00000000f5000000 0000000000000000 head: 0100000000000040 ffff888007042dc0 dead000000000122 0000000000000000 head: 0000000000000000 0000000080100010 00000000f5000000 0000000000000000 head: 0100000000000003 ffffea00002ab401 00000000ffffffff 00000000ffffffff head: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88800aad0f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88800aad0f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88800aad1000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88800aad1080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88800aad1100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fix this by ensuring that both fst_tx_task and fst_int_task are properly canceled before the fst_card_info is released. Add tasklet_kill() in fst_remove_one() to synchronize with any pending or running tasklets. Since unregister_hdlc_device() stops data transmission and reception, and fst_disable_intr() prevents further interrupts, it is appropriate to place tasklet_kill() after these calls. The bugs were identified through static analysis. To reproduce the issue and validate the fix, a FarSync T-series card was simulated in QEMU and delays(e.g., mdelay()) were introduced within the tasklet handler to increase the likelihood of triggering the race condition. Fixes: 2f623aaf9f31 ("net: farsync: Fix kmemleak when rmmods farsync") Signed-off-by: Duoming Zhou Reviewed-by: Jijie Shao Link: https://patch.msgid.link/20260219124637.72578-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni --- drivers/net/wan/farsync.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 5b01642ca44e..6b2d1e63855e 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -2550,6 +2550,8 @@ fst_remove_one(struct pci_dev *pdev) fst_disable_intr(card); free_irq(card->irq, card); + tasklet_kill(&fst_tx_task); + tasklet_kill(&fst_int_task); iounmap(card->ctlmem); iounmap(card->mem); -- cgit v1.2.3 From 82aec772fca2223bc5774bd9af486fd95766e578 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 19 Feb 2026 11:50:21 -0800 Subject: netconsole: avoid OOB reads, msg is not nul-terminated msg passed to netconsole from the console subsystem is not guaranteed to be nul-terminated. Before recent commit 7eab73b18630 ("netconsole: convert to NBCON console infrastructure") the message would be placed in printk_shared_pbufs, a static global buffer, so KASAN had harder time catching OOB accesses. Now we see: printk: console [netcon_ext0] enabled BUG: KASAN: slab-out-of-bounds in string+0x1f7/0x240 Read of size 1 at addr ffff88813b6d4c00 by task pr/netcon_ext0/594 CPU: 65 UID: 0 PID: 594 Comm: pr/netcon_ext0 Not tainted 6.19.0-11754-g4246fd6547c9 Call Trace: kasan_report+0xe4/0x120 string+0x1f7/0x240 vsnprintf+0x655/0xba0 scnprintf+0xba/0x120 netconsole_write+0x3fe/0xa10 nbcon_emit_next_record+0x46e/0x860 nbcon_kthread_func+0x623/0x750 Allocated by task 1: nbcon_alloc+0x1ea/0x450 register_console+0x26b/0xe10 init_netconsole+0xbb0/0xda0 The buggy address belongs to the object at ffff88813b6d4000 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 0 bytes to the right of allocated 3072-byte region [ffff88813b6d4000, ffff88813b6d4c00) Fixes: c62c0a17f9b7 ("netconsole: Append kernel version to message") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260219195021.2099699-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/netconsole.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index d144787b2947..1b6a4135ec08 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1679,7 +1679,8 @@ static void send_msg_no_fragmentation(struct netconsole_target *nt, if (release_len) { release = init_utsname()->release; - scnprintf(nt->buf, MAX_PRINT_CHUNK, "%s,%s", release, msg); + scnprintf(nt->buf, MAX_PRINT_CHUNK, "%s,%.*s", release, + msg_len, msg); msg_len += release_len; } else { memcpy(nt->buf, msg, msg_len); -- cgit v1.2.3 From fd80bd7105f88189f47d465ca8cb7d115570de30 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Fri, 20 Feb 2026 17:21:26 -0500 Subject: RDMA/ionic: Fix potential NULL pointer dereference in ionic_query_port The function ionic_query_port() calls ib_device_get_netdev() without checking the return value which could lead to NULL pointer dereference, Fix it by checking the return value and return -ENODEV if the 'ndev' is NULL. Fixes: 2075bbe8ef03 ("RDMA/ionic: Register device ops for miscellaneous functionality") Signed-off-by: Kamal Heib Link: https://patch.msgid.link/20260220222125.16973-2-kheib@redhat.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_ibdev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index 164046d00e5d..bd4c73e530d0 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -81,6 +81,8 @@ static int ionic_query_port(struct ib_device *ibdev, u32 port, return -EINVAL; ndev = ib_device_get_netdev(ibdev, port); + if (!ndev) + return -ENODEV; if (netif_running(ndev) && netif_carrier_ok(ndev)) { attr->state = IB_PORT_ACTIVE; -- cgit v1.2.3 From f22c77ce49db0589103d96487dca56f5b2136362 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 16 Feb 2026 11:02:47 -0400 Subject: RDMA/efa: Fix typo in efa_alloc_mr() The pattern is to check the entire driver request space, not just sizeof something unrelated. Fixes: 40909f664d27 ("RDMA/efa: Add EFA verbs implementation") Signed-off-by: Jason Gunthorpe Link: https://patch.msgid.link/1-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com Acked-by: Michael Margolin Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/efa/efa_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index b5b93b42e6c4..b0aa7c0238ed 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1661,7 +1661,7 @@ static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, struct efa_mr *mr; if (udata && udata->inlen && - !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { + !ib_is_udata_cleared(udata, 0, udata->inlen)) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, udata not cleared\n"); return ERR_PTR(-EINVAL); -- cgit v1.2.3 From 117942ca43e2e3c3d121faae530989931b7f67e1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 16 Feb 2026 11:02:48 -0400 Subject: IB/mthca: Add missed mthca_unmap_user_db() for mthca_create_srq() Fix a user triggerable leak on the system call failure path. Cc: stable@vger.kernel.org Fixes: ec34a922d243 ("[PATCH] IB/mthca: Add SRQ implementation") Signed-off-by: Jason Gunthorpe Link: https://patch.msgid.link/2-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mthca/mthca_provider.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index ef0635064fba..5c182ae4fc2f 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -428,6 +428,8 @@ static int mthca_create_srq(struct ib_srq *ibsrq, if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { mthca_free_srq(to_mdev(ibsrq->device), srq); + mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar, + context->db_tab, ucmd.db_index); return -EFAULT; } @@ -436,6 +438,7 @@ static int mthca_create_srq(struct ib_srq *ibsrq, static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { + mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); if (udata) { struct mthca_ucontext *context = rdma_udata_to_drv_context( @@ -446,8 +449,6 @@ static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) mthca_unmap_user_db(to_mdev(srq->device), &context->uar, context->db_tab, to_msrq(srq)->db_index); } - - mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); return 0; } -- cgit v1.2.3 From 74586c6da9ea222a61c98394f2fc0a604748438c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 16 Feb 2026 11:02:49 -0400 Subject: RDMA/irdma: Fix kernel stack leak in irdma_create_user_ah() struct irdma_create_ah_resp { // 8 bytes, no padding __u32 ah_id; // offset 0 - SET (uresp.ah_id = ah->sc_ah.ah_info.ah_idx) __u8 rsvd[4]; // offset 4 - NEVER SET <- LEAK }; rsvd[4]: 4 bytes of stack memory leaked unconditionally. Only ah_id is assigned before ib_respond_udata(). The reserved members of the structure were not zeroed. Cc: stable@vger.kernel.org Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Jason Gunthorpe Link: https://patch.msgid.link/3-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 15af53237217..7251cd7a2147 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -5212,7 +5212,7 @@ static int irdma_create_user_ah(struct ib_ah *ibah, #define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd) struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah); struct irdma_device *iwdev = to_iwdev(ibah->pd->device); - struct irdma_create_ah_resp uresp; + struct irdma_create_ah_resp uresp = {}; struct irdma_ah *parent_ah; int err; -- cgit v1.2.3 From faa72102b178c7ae6c6afea23879e7c84fc59b4e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 16 Feb 2026 11:02:50 -0400 Subject: RDMA/ionic: Fix kernel stack leak in ionic_create_cq() struct ionic_cq_resp resp { __u32 cqid[2]; // offset 0 - PARTIALLY SET (see below) __u8 udma_mask; // offset 8 - SET (resp.udma_mask = vcq->udma_mask) __u8 rsvd[7]; // offset 9 - NEVER SET <- LEAK }; rsvd[7]: 7 bytes of stack memory leaked unconditionally. cqid[2]: The loop at line 1256 iterates over udma_idx but skips indices where !(vcq->udma_mask & BIT(udma_idx)). The array has 2 entries but udma_count could be 1, meaning cqid[1] might never be written via ionic_create_cq_common(). If udma_mask only has bit 0 set, cqid[1] (4 bytes) is also leaked. So potentially 11 bytes leaked. Cc: stable@vger.kernel.org Fixes: e8521822c733 ("RDMA/ionic: Register device ops for control path") Signed-off-by: Jason Gunthorpe Link: https://patch.msgid.link/4-v1-83e918d69e73+a9-rdma_udata_rc_jgg@nvidia.com Acked-by: Abhijit Gangurde Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/ionic/ionic_controlpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c index 5b3f40bd98d8..4842931f5316 100644 --- a/drivers/infiniband/hw/ionic/ionic_controlpath.c +++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c @@ -1218,7 +1218,7 @@ int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx); struct ionic_vcq *vcq = to_ionic_vcq(ibcq); struct ionic_tbl_buf buf = {}; - struct ionic_cq_resp resp; + struct ionic_cq_resp resp = {}; struct ionic_cq_req req; int udma_idx = 0, rc; -- cgit v1.2.3 From 3d7e6ce34f4fcc7083510c28b17a7c36462a25d4 Mon Sep 17 00:00:00 2001 From: Ziyi Guo Date: Sun, 22 Feb 2026 05:06:33 +0000 Subject: net: usb: pegasus: enable basic endpoint checking pegasus_probe() fills URBs with hardcoded endpoint pipes without verifying the endpoint descriptors: - usb_rcvbulkpipe(dev, 1) for RX data - usb_sndbulkpipe(dev, 2) for TX data - usb_rcvintpipe(dev, 3) for status interrupts A malformed USB device can present these endpoints with transfer types that differ from what the driver assumes. Add a pegasus_usb_ep enum for endpoint numbers, replacing magic constants throughout. Add usb_check_bulk_endpoints() and usb_check_int_endpoints() calls before any resource allocation to verify endpoint types before use, rejecting devices with mismatched descriptors at probe time, and avoid triggering assertion. Similar fix to - commit 90b7f2961798 ("net: usb: rtl8150: enable basic endpoint checking") - commit 9e7021d2aeae ("net: usb: catc: enable basic endpoint checking") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ziyi Guo Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260222050633.410165-1-n7l8m4@u.northwestern.edu Signed-off-by: Paolo Abeni --- drivers/net/usb/pegasus.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 7b6d6eb60709..b84968c5f074 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -28,6 +28,17 @@ static const char driver_name[] = "pegasus"; BMSR_100FULL | BMSR_ANEGCAPABLE) #define CARRIER_CHECK_DELAY (2 * HZ) +/* + * USB endpoints. + */ + +enum pegasus_usb_ep { + PEGASUS_USB_EP_CONTROL = 0, + PEGASUS_USB_EP_BULK_IN = 1, + PEGASUS_USB_EP_BULK_OUT = 2, + PEGASUS_USB_EP_INT_IN = 3, +}; + static bool loopback; static bool mii_mode; static char *devid; @@ -542,7 +553,7 @@ static void read_bulk_callback(struct urb *urb) goto tl_sched; goon: usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, - usb_rcvbulkpipe(pegasus->usb, 1), + usb_rcvbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_IN), pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); rx_status = usb_submit_urb(pegasus->rx_urb, GFP_ATOMIC); @@ -582,7 +593,7 @@ static void rx_fixup(struct tasklet_struct *t) return; } usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, - usb_rcvbulkpipe(pegasus->usb, 1), + usb_rcvbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_IN), pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); try_again: @@ -710,7 +721,7 @@ static netdev_tx_t pegasus_start_xmit(struct sk_buff *skb, ((__le16 *) pegasus->tx_buff)[0] = cpu_to_le16(l16); skb_copy_from_linear_data(skb, pegasus->tx_buff + 2, skb->len); usb_fill_bulk_urb(pegasus->tx_urb, pegasus->usb, - usb_sndbulkpipe(pegasus->usb, 2), + usb_sndbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_OUT), pegasus->tx_buff, count, write_bulk_callback, pegasus); if ((res = usb_submit_urb(pegasus->tx_urb, GFP_ATOMIC))) { @@ -837,7 +848,7 @@ static int pegasus_open(struct net_device *net) set_registers(pegasus, EthID, 6, net->dev_addr); usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, - usb_rcvbulkpipe(pegasus->usb, 1), + usb_rcvbulkpipe(pegasus->usb, PEGASUS_USB_EP_BULK_IN), pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); if ((res = usb_submit_urb(pegasus->rx_urb, GFP_KERNEL))) { @@ -848,7 +859,7 @@ static int pegasus_open(struct net_device *net) } usb_fill_int_urb(pegasus->intr_urb, pegasus->usb, - usb_rcvintpipe(pegasus->usb, 3), + usb_rcvintpipe(pegasus->usb, PEGASUS_USB_EP_INT_IN), pegasus->intr_buff, sizeof(pegasus->intr_buff), intr_callback, pegasus, pegasus->intr_interval); if ((res = usb_submit_urb(pegasus->intr_urb, GFP_KERNEL))) { @@ -1133,10 +1144,24 @@ static int pegasus_probe(struct usb_interface *intf, pegasus_t *pegasus; int dev_index = id - pegasus_ids; int res = -ENOMEM; + static const u8 bulk_ep_addr[] = { + PEGASUS_USB_EP_BULK_IN | USB_DIR_IN, + PEGASUS_USB_EP_BULK_OUT | USB_DIR_OUT, + 0}; + static const u8 int_ep_addr[] = { + PEGASUS_USB_EP_INT_IN | USB_DIR_IN, + 0}; if (pegasus_blacklisted(dev)) return -ENODEV; + /* Verify that all required endpoints are present */ + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || + !usb_check_int_endpoints(intf, int_ep_addr)) { + dev_err(&intf->dev, "Missing or invalid endpoints\n"); + return -ENODEV; + } + net = alloc_etherdev(sizeof(struct pegasus)); if (!net) goto out; -- cgit v1.2.3 From c8dbdc6e380e7e96a51706db3e4b7870d8a9402d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 22 Feb 2026 16:26:01 +0100 Subject: net: phy: register phy led_triggers during probe to avoid AB-BA deadlock There is an AB-BA deadlock when both LEDS_TRIGGER_NETDEV and LED_TRIGGER_PHY are enabled: [ 1362.049207] [<8054e4b8>] led_trigger_register+0x5c/0x1fc <-- Trying to get lock "triggers_list_lock" via down_write(&triggers_list_lock); [ 1362.054536] [<80662830>] phy_led_triggers_register+0xd0/0x234 [ 1362.060329] [<8065e200>] phy_attach_direct+0x33c/0x40c [ 1362.065489] [<80651fc4>] phylink_fwnode_phy_connect+0x15c/0x23c [ 1362.071480] [<8066ee18>] mtk_open+0x7c/0xba0 [ 1362.075849] [<806d714c>] __dev_open+0x280/0x2b0 [ 1362.080384] [<806d7668>] __dev_change_flags+0x244/0x24c [ 1362.085598] [<806d7698>] dev_change_flags+0x28/0x78 [ 1362.090528] [<807150e4>] dev_ioctl+0x4c0/0x654 <-- Hold lock "rtnl_mutex" by calling rtnl_lock(); [ 1362.094985] [<80694360>] sock_ioctl+0x2f4/0x4e0 [ 1362.099567] [<802e9c4c>] sys_ioctl+0x32c/0xd8c [ 1362.104022] [<80014504>] syscall_common+0x34/0x58 Here LED_TRIGGER_PHY is registering LED triggers during phy_attach while holding RTNL and then taking triggers_list_lock. [ 1362.191101] [<806c2640>] register_netdevice_notifier+0x60/0x168 <-- Trying to get lock "rtnl_mutex" via rtnl_lock(); [ 1362.197073] [<805504ac>] netdev_trig_activate+0x194/0x1e4 [ 1362.202490] [<8054e28c>] led_trigger_set+0x1d4/0x360 <-- Hold lock "triggers_list_lock" by down_read(&triggers_list_lock); [ 1362.207511] [<8054eb38>] led_trigger_write+0xd8/0x14c [ 1362.212566] [<80381d98>] sysfs_kf_bin_write+0x80/0xbc [ 1362.217688] [<8037fcd8>] kernfs_fop_write_iter+0x17c/0x28c [ 1362.223174] [<802cbd70>] vfs_write+0x21c/0x3c4 [ 1362.227712] [<802cc0c4>] ksys_write+0x78/0x12c [ 1362.232164] [<80014504>] syscall_common+0x34/0x58 Here LEDS_TRIGGER_NETDEV is being enabled on an LED. It first takes triggers_list_lock and then RTNL. A classical AB-BA deadlock. phy_led_triggers_registers() does not require the RTNL, it does not make any calls into the network stack which require protection. There is also no requirement the PHY has been attached to a MAC, the triggers only make use of phydev state. This allows the call to phy_led_triggers_registers() to be placed elsewhere. PHY probe() and release() don't hold RTNL, so solving the AB-BA deadlock. Reported-by: Shiji Yang Closes: https://lore.kernel.org/all/OS7PR01MB13602B128BA1AD3FA38B6D1FFBC69A@OS7PR01MB13602.jpnprd01.prod.outlook.com/ Fixes: 06f502f57d0d ("leds: trigger: Introduce a NETDEV trigger") Cc: stable@vger.kernel.org Signed-off-by: Andrew Lunn Tested-by: Shiji Yang Link: https://patch.msgid.link/20260222152601.1978655-1-andrew@lunn.ch Signed-off-by: Paolo Abeni --- drivers/net/phy/phy_device.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9b8eaac63b90..cbb4af604aa5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1866,8 +1866,6 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, goto error; phy_resume(phydev); - if (!phydev->is_on_sfp_module) - phy_led_triggers_register(phydev); /** * If the external phy used by current mac interface is managed by @@ -1982,9 +1980,6 @@ void phy_detach(struct phy_device *phydev) phydev->phy_link_change = NULL; phydev->phylink = NULL; - if (!phydev->is_on_sfp_module) - phy_led_triggers_unregister(phydev); - if (phydev->mdio.dev.driver) module_put(phydev->mdio.dev.driver->owner); @@ -3778,16 +3773,27 @@ static int phy_probe(struct device *dev) /* Set the state to READY by default */ phydev->state = PHY_READY; + /* Register the PHY LED triggers */ + if (!phydev->is_on_sfp_module) + phy_led_triggers_register(phydev); + /* Get the LEDs from the device tree, and instantiate standard * LEDs for them. */ - if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev)) + if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev)) { err = of_phy_leds(phydev); + if (err) + goto out; + } + + return 0; out: + if (!phydev->is_on_sfp_module) + phy_led_triggers_unregister(phydev); + /* Re-assert the reset signal on error */ - if (err) - phy_device_reset(phydev, 1); + phy_device_reset(phydev, 1); return err; } @@ -3801,6 +3807,9 @@ static int phy_remove(struct device *dev) if (IS_ENABLED(CONFIG_PHYLIB_LEDS) && !phy_driver_is_genphy(phydev)) phy_leds_unregister(phydev); + if (!phydev->is_on_sfp_module) + phy_led_triggers_unregister(phydev); + phydev->state = PHY_DOWN; phy_cleanup_ports(phydev); -- cgit v1.2.3 From 78437ab3b769f80526416570f60173c89858dd84 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 13 Feb 2026 00:58:11 +0100 Subject: clk: scu/imx8qxp: do not register driver in probe() imx_clk_scu_init() registers the imx_clk_scu_driver while commonly being called from IMX driver's probe() callbacks. However, it neither makes sense to register drivers from probe() callbacks of other drivers, nor does the driver core allow registering drivers with a device lock already being held. The latter was revealed by commit dc23806a7c47 ("driver core: enforce device_lock for driver_match_device()") leading to a deadlock condition described in [1]. Besides that, nothing seems to unregister the imx_clk_scu_driver once the corresponding driver module is unloaded, which leaves the driver-core with a dangling pointer. Also, if there are multiple matching devices for the imx8qxp_clk_driver, imx8qxp_clk_probe() calls imx_clk_scu_init() multiple times. However, any subsequent call after the first one will fail, since the driver-core does not allow to register the same struct platform_driver multiple times. Hence, register the imx_clk_scu_driver from module_init() and unregister it in module_exit(). Note that we first register the imx8qxp_clk_driver and then call imx_clk_scu_module_init() to avoid having to call imx_clk_scu_module_exit() in the unwind path of imx8qxp_clk_init(). Fixes: dc23806a7c47 ("driver core: enforce device_lock for driver_match_device()") Fixes: 220175cd3979 ("clk: imx: scu: fix build break when compiled as modules") Reported-by: Alexander Stein Closes: https://lore.kernel.org/lkml/13955113.uLZWGnKmhe@steina-w/ Tested-by: Alexander Stein # TQMa8x/MBa8x Link: https://lore.kernel.org/lkml/DFU7CEPUSG9A.1KKGVW4HIPMSH@kernel.org/ [1] Acked-by: Abel Vesa Reviewed-by: Daniel Baluta Link: https://patch.msgid.link/20260212235842.85934-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/clk/imx/clk-imx8qxp.c | 24 +++++++++++++++++++++++- drivers/clk/imx/clk-scu.c | 12 +++++++++++- drivers/clk/imx/clk-scu.h | 2 ++ 3 files changed, 36 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c index 3ae162625bb1..c781425a005e 100644 --- a/drivers/clk/imx/clk-imx8qxp.c +++ b/drivers/clk/imx/clk-imx8qxp.c @@ -346,7 +346,29 @@ static struct platform_driver imx8qxp_clk_driver = { }, .probe = imx8qxp_clk_probe, }; -module_platform_driver(imx8qxp_clk_driver); + +static int __init imx8qxp_clk_init(void) +{ + int ret; + + ret = platform_driver_register(&imx8qxp_clk_driver); + if (ret) + return ret; + + ret = imx_clk_scu_module_init(); + if (ret) + platform_driver_unregister(&imx8qxp_clk_driver); + + return ret; +} +module_init(imx8qxp_clk_init); + +static void __exit imx8qxp_clk_exit(void) +{ + imx_clk_scu_module_exit(); + platform_driver_unregister(&imx8qxp_clk_driver); +} +module_exit(imx8qxp_clk_exit); MODULE_AUTHOR("Aisheng Dong "); MODULE_DESCRIPTION("NXP i.MX8QXP clock driver"); diff --git a/drivers/clk/imx/clk-scu.c b/drivers/clk/imx/clk-scu.c index 33e637ad3579..a85ec48a798b 100644 --- a/drivers/clk/imx/clk-scu.c +++ b/drivers/clk/imx/clk-scu.c @@ -191,6 +191,16 @@ static bool imx_scu_clk_is_valid(u32 rsrc_id) return p != NULL; } +int __init imx_clk_scu_module_init(void) +{ + return platform_driver_register(&imx_clk_scu_driver); +} + +void __exit imx_clk_scu_module_exit(void) +{ + return platform_driver_unregister(&imx_clk_scu_driver); +} + int imx_clk_scu_init(struct device_node *np, const struct imx_clk_scu_rsrc_table *data) { @@ -215,7 +225,7 @@ int imx_clk_scu_init(struct device_node *np, rsrc_table = data; } - return platform_driver_register(&imx_clk_scu_driver); + return 0; } /* diff --git a/drivers/clk/imx/clk-scu.h b/drivers/clk/imx/clk-scu.h index af7b697f51ca..ca82f2cce897 100644 --- a/drivers/clk/imx/clk-scu.h +++ b/drivers/clk/imx/clk-scu.h @@ -25,6 +25,8 @@ extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8dxl; extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8qxp; extern const struct imx_clk_scu_rsrc_table imx_clk_scu_rsrc_imx8qm; +int __init imx_clk_scu_module_init(void); +void __exit imx_clk_scu_module_exit(void); int imx_clk_scu_init(struct device_node *np, const struct imx_clk_scu_rsrc_table *data); struct clk_hw *imx_scu_of_clk_src_get(struct of_phandle_args *clkspec, -- cgit v1.2.3 From ab39cc4cb8ceecdc2b61747433e7237f1ac2b789 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Tue, 24 Feb 2026 07:21:06 -0500 Subject: cpufreq: intel_pstate: Fix NULL pointer dereference in update_cpu_qos_request() The update_cpu_qos_request() function attempts to initialize the 'freq' variable by dereferencing 'cpudata' before verifying if the 'policy' is valid. This issue occurs on systems booted with the "nosmt" parameter, where all_cpu_data[cpu] is NULL for the SMT sibling threads. As a result, any call to update_qos_requests() will result in a NULL pointer dereference as the code will attempt to access pstate.turbo_freq using the NULL cpudata pointer. Also, pstate.turbo_freq may be updated by intel_pstate_get_hwp_cap() after initializing the 'freq' variable, so it is better to defer the 'freq' until intel_pstate_get_hwp_cap() has been called. Fix this by deferring the 'freq' assignment until after the policy and driver_data have been validated. Fixes: ae1bdd23b99f ("cpufreq: intel_pstate: Adjust frequency percentage computations") Reported-by: Jirka Hladky Closes: https://lore.kernel.org/all/CAE4VaGDfiPvz3AzrwrwM4kWB3SCkMci25nPO8W1JmTBd=xHzZg@mail.gmail.com/ Signed-off-by: David Arcari Cc: 6.18+ # 6.18+ [ rjw: Added one paragraph to the changelog ] Link: https://patch.msgid.link/20260224122106.228116-1-darcari@redhat.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a48af3540c74..bdc37080d319 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1647,8 +1647,8 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, static void update_cpu_qos_request(int cpu, enum freq_qos_req_type type) { struct cpudata *cpudata = all_cpu_data[cpu]; - unsigned int freq = cpudata->pstate.turbo_freq; struct freq_qos_request *req; + unsigned int freq; struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (!policy) @@ -1661,6 +1661,8 @@ static void update_cpu_qos_request(int cpu, enum freq_qos_req_type type) if (hwp_active) intel_pstate_get_hwp_cap(cpudata); + freq = cpudata->pstate.turbo_freq; + if (type == FREQ_QOS_MIN) { freq = DIV_ROUND_UP(freq * global.min_perf_pct, 100); } else { -- cgit v1.2.3 From 5ede90206273ff156a778254f0f972a55e973c89 Mon Sep 17 00:00:00 2001 From: Sofia Schneider Date: Sun, 22 Feb 2026 23:52:40 -0300 Subject: ACPI: OSI: Add DMI quirk for Acer Aspire One D255 The screen backlight turns off during boot (specifically during udev device initialization) when returning true for _OSI("Windows 2009"). Analyzing the device's DSDT reveals that the firmware takes a different code path when Windows 7 is reported, which leads to the backlight shutoff. Add a DMI quirk to invoke dmi_disable_osi_win7 for this model. Signed-off-by: Sofia Schneider Link: https://patch.msgid.link/20260223025240.518509-1-sofia@schn.dev Signed-off-by: Rafael J. Wysocki --- drivers/acpi/osi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers') diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c index f2c943b934be..9470f1830ff5 100644 --- a/drivers/acpi/osi.c +++ b/drivers/acpi/osi.c @@ -389,6 +389,19 @@ static const struct dmi_system_id acpi_osi_dmi_table[] __initconst = { }, }, + /* + * The screen backlight turns off during udev device creation + * when returning true for _OSI("Windows 2009") + */ + { + .callback = dmi_disable_osi_win7, + .ident = "Acer Aspire One D255", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "AOD255"), + }, + }, + /* * The wireless hotkey does not work on those machines when * returning true for _OSI("Windows 2012") -- cgit v1.2.3 From 96a1fd0d84b17360840f344826897fa71049870e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 12 Feb 2026 14:50:38 -0700 Subject: cxl: Fix race of nvdimm_bus object when creating nvdimm objects Found issue during running of cxl-translate.sh unit test. Adding a 3s sleep right before the test seems to make the issue reproduce fairly consistently. The cxl_translate module has dependency on cxl_acpi and causes orphaned nvdimm objects to reprobe after cxl_acpi is removed. The nvdimm_bus object is registered by the cxl_nvb object when cxl_acpi_probe() is called. With the nvdimm_bus object missing, __nd_device_register() will trigger NULL pointer dereference when accessing the dev->parent that points to &nvdimm_bus->dev. [ 192.884510] BUG: kernel NULL pointer dereference, address: 000000000000006c [ 192.895383] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20250812-19.fc42 08/12/2025 [ 192.897721] Workqueue: cxl_port cxl_bus_rescan_queue [cxl_core] [ 192.899459] RIP: 0010:kobject_get+0xc/0x90 [ 192.924871] Call Trace: [ 192.925959] [ 192.926976] ? pm_runtime_init+0xb9/0xe0 [ 192.929712] __nd_device_register.part.0+0x4d/0xc0 [libnvdimm] [ 192.933314] __nvdimm_create+0x206/0x290 [libnvdimm] [ 192.936662] cxl_nvdimm_probe+0x119/0x1d0 [cxl_pmem] [ 192.940245] cxl_bus_probe+0x1a/0x60 [cxl_core] [ 192.943349] really_probe+0xde/0x380 This patch also relies on the previous change where devm_cxl_add_nvdimm_bridge() is called from drivers/cxl/pmem.c instead of drivers/cxl/core.c to ensure the dependency of cxl_acpi on cxl_pmem. 1. Set probe_type of cxl_nvb to PROBE_FORCE_SYNCHRONOUS to ensure the driver is probed synchronously when add_device() is called. 2. Add a check in __devm_cxl_add_nvdimm_bridge() to ensure that the cxl_nvb driver is attached during cxl_acpi_probe(). 3. Take the cxl_root uport_dev lock and the cxl_nvb->dev lock in devm_cxl_add_nvdimm() before checking nvdimm_bus is valid. 4. Set cxl_nvdimm flag to CXL_NVD_F_INVALIDATED so cxl_nvdimm_probe() will exit with -EBUSY. The removal of cxl_nvdimm devices should prevent any orphaned devices from probing once the nvdimm_bus is gone. [ dj: Fixed 0-day reported kdoc issue. ] [ dj: Fix cxl_nvb reference leak on error. Gregory (kreview-0811365) ] Suggested-by: Dan Williams Fixes: 8fdcb1704f61 ("cxl/pmem: Add initial infrastructure for pmem support") Tested-by: Alison Schofield Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20260205001633.1813643-3-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pmem.c | 29 +++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 5 +++++ drivers/cxl/pmem.c | 10 ++++++++-- 3 files changed, 42 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index b652e3486038..68462e38a977 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -115,6 +115,15 @@ static void unregister_nvb(void *_cxl_nvb) device_unregister(&cxl_nvb->dev); } +static bool cxl_nvdimm_bridge_failed_attach(struct cxl_nvdimm_bridge *cxl_nvb) +{ + struct device *dev = &cxl_nvb->dev; + + guard(device)(dev); + /* If the device has no driver, then it failed to attach. */ + return dev->driver == NULL; +} + struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_port *port) { @@ -138,6 +147,11 @@ struct cxl_nvdimm_bridge *__devm_cxl_add_nvdimm_bridge(struct device *host, if (rc) goto err; + if (cxl_nvdimm_bridge_failed_attach(cxl_nvb)) { + unregister_nvb(cxl_nvb); + return ERR_PTR(-ENODEV); + } + rc = devm_add_action_or_reset(host, unregister_nvb, cxl_nvb); if (rc) return ERR_PTR(rc); @@ -248,6 +262,21 @@ int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port, if (!cxl_nvb) return -ENODEV; + /* + * Take the uport_dev lock to guard against race of nvdimm_bus object. + * cxl_acpi_probe() registers the nvdimm_bus and is done under the + * root port uport_dev lock. + * + * Take the cxl_nvb device lock to ensure that cxl_nvb driver is in a + * consistent state. And the driver registers nvdimm_bus. + */ + guard(device)(cxl_nvb->port->uport_dev); + guard(device)(&cxl_nvb->dev); + if (!cxl_nvb->nvdimm_bus) { + rc = -ENODEV; + goto err_alloc; + } + cxl_nvd = cxl_nvdimm_alloc(cxl_nvb, cxlmd); if (IS_ERR(cxl_nvd)) { rc = PTR_ERR(cxl_nvd); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index f5850800f400..9b947286eb9b 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -574,11 +574,16 @@ struct cxl_nvdimm_bridge { #define CXL_DEV_ID_LEN 19 +enum { + CXL_NVD_F_INVALIDATED = 0, +}; + struct cxl_nvdimm { struct device dev; struct cxl_memdev *cxlmd; u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */ u64 dirty_shutdowns; + unsigned long flags; }; struct cxl_pmem_region_mapping { diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index c67b30b516bf..082ec0f1c3a0 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -14,7 +14,7 @@ static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); /** - * __devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology + * devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology * @host: platform firmware root device * @port: CXL port at the root of a CXL topology * @@ -143,6 +143,9 @@ static int cxl_nvdimm_probe(struct device *dev) struct nvdimm *nvdimm; int rc; + if (test_bit(CXL_NVD_F_INVALIDATED, &cxl_nvd->flags)) + return -EBUSY; + set_exclusive_cxl_commands(mds, exclusive_cmds); rc = devm_add_action_or_reset(dev, clear_exclusive, mds); if (rc) @@ -323,8 +326,10 @@ static int detach_nvdimm(struct device *dev, void *data) scoped_guard(device, dev) { if (dev->driver) { cxl_nvd = to_cxl_nvdimm(dev); - if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data) + if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data) { release = true; + set_bit(CXL_NVD_F_INVALIDATED, &cxl_nvd->flags); + } } } if (release) @@ -367,6 +372,7 @@ static struct cxl_driver cxl_nvdimm_bridge_driver = { .probe = cxl_nvdimm_bridge_probe, .id = CXL_DEVICE_NVDIMM_BRIDGE, .drv = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, .suppress_bind_attrs = true, }, }; -- cgit v1.2.3 From 60b5d1f68338aff2c5af0113f04aefa7169c50c2 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 19 Feb 2026 16:16:17 -0800 Subject: cxl/mbox: validate payload size before accessing contents in cxl_payload_from_user_allowed() cxl_payload_from_user_allowed() casts and dereferences the input payload without first verifying its size. When a raw mailbox command is sent with an undersized payload (ie: 1 byte for CXL_MBOX_OP_CLEAR_LOG, which expects a 16-byte UUID), uuid_equal() reads past the allocated buffer, triggering a KASAN splat: BUG: KASAN: slab-out-of-bounds in memcmp+0x176/0x1d0 lib/string.c:683 Read of size 8 at addr ffff88810130f5c0 by task syz.1.62/2258 CPU: 2 UID: 0 PID: 2258 Comm: syz.1.62 Not tainted 6.19.0-dirty #3 PREEMPT(voluntary) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0xab/0xe0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xce/0x650 mm/kasan/report.c:482 kasan_report+0xce/0x100 mm/kasan/report.c:595 memcmp+0x176/0x1d0 lib/string.c:683 uuid_equal include/linux/uuid.h:73 [inline] cxl_payload_from_user_allowed drivers/cxl/core/mbox.c:345 [inline] cxl_mbox_cmd_ctor drivers/cxl/core/mbox.c:368 [inline] cxl_validate_cmd_from_user drivers/cxl/core/mbox.c:522 [inline] cxl_send_cmd+0x9c0/0xb50 drivers/cxl/core/mbox.c:643 __cxl_memdev_ioctl drivers/cxl/core/memdev.c:698 [inline] cxl_memdev_ioctl+0x14f/0x190 drivers/cxl/core/memdev.c:713 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl fs/ioctl.c:583 [inline] __x64_sys_ioctl+0x18e/0x210 fs/ioctl.c:583 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xa8/0x330 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fdaf331ba79 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fdaf1d77038 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fdaf3585fa0 RCX: 00007fdaf331ba79 RDX: 00002000000001c0 RSI: 00000000c030ce02 RDI: 0000000000000003 RBP: 00007fdaf33749df R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fdaf3586038 R14: 00007fdaf3585fa0 R15: 00007ffced2af768 Add 'in_size' parameter to cxl_payload_from_user_allowed() and validate the payload is large enough. Fixes: 6179045ccc0c ("cxl/mbox: Block immediate mode in SET_PARTITION_INFO command") Fixes: 206f9fa9d555 ("cxl/mbox: Add Clear Log mailbox command") Signed-off-by: Davidlohr Bueso Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260220001618.963490-2-dave@stgolabs.net Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index fa6dd0c94656..e7a6452bf544 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -311,6 +311,7 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) * cxl_payload_from_user_allowed() - Check contents of in_payload. * @opcode: The mailbox command opcode. * @payload_in: Pointer to the input payload passed in from user space. + * @in_size: Size of @payload_in in bytes. * * Return: * * true - payload_in passes check for @opcode. @@ -325,12 +326,15 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) * * The specific checks are determined by the opcode. */ -static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) +static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in, + size_t in_size) { switch (opcode) { case CXL_MBOX_OP_SET_PARTITION_INFO: { struct cxl_mbox_set_partition_info *pi = payload_in; + if (in_size < sizeof(*pi)) + return false; if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG) return false; break; @@ -338,6 +342,8 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) case CXL_MBOX_OP_CLEAR_LOG: { const uuid_t *uuid = (uuid_t *)payload_in; + if (in_size < sizeof(uuid_t)) + return false; /* * Restrict the ‘Clear log’ action to only apply to * Vendor debug logs. @@ -365,7 +371,8 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd, if (IS_ERR(mbox_cmd->payload_in)) return PTR_ERR(mbox_cmd->payload_in); - if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) { + if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in, + in_size)) { dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n", cxl_mem_opcode_to_name(opcode)); kvfree(mbox_cmd->payload_in); -- cgit v1.2.3 From 0a70b7cd397e545e926c93715ff6366b67c716f6 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 23 Feb 2026 11:13:39 -0800 Subject: cxl: Test CXL_DECODER_F_LOCK as a bitmask The CXL decoder flags are defined as bitmasks, not bit indices. Using test_bit() to check them interprets the mask value as a bit index, which is the wrong test. For CXL_DECODER_F_LOCK the test reads beyond the defined bits, causing the test to always return false and allowing resets that should have been blocked. Replace test_bit() with a bitmask check. Fixes: 2230c4bdc412 ("cxl: Add handling of locked CXL decoder") Signed-off-by: Alison Schofield Reviewed-by: Gregory Price Tested-by: Gregory Price Link: https://patch.msgid.link/98851c4770e4631753cf9f75b58a3a6daeca2ea2.1771873256.git.alison.schofield@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/hdm.c | 2 +- drivers/cxl/core/region.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index e3f0c39e6812..c222e98ae736 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -904,7 +904,7 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld) if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) return; - if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) + if (cxld->flags & CXL_DECODER_F_LOCK) return; if (port->commit_end == id) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index fec37af1dfbf..780ec947ecf2 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1100,7 +1100,7 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr, static void cxl_region_setup_flags(struct cxl_region *cxlr, struct cxl_decoder *cxld) { - if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) { + if (cxld->flags & CXL_DECODER_F_LOCK) { set_bit(CXL_REGION_F_LOCK, &cxlr->flags); clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); } -- cgit v1.2.3 From e46f25f5a81f6f1a9ab93bcda80d5dfaea9f4897 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 23 Feb 2026 11:13:40 -0800 Subject: cxl/region: Test CXL_DECODER_F_NORMALIZED_ADDRESSING as a bitmask The CXL decoder flags are defined as bitmasks, not bit indices. Using test_bit() to check them interprets the mask value as a bit index, which is the wrong test. For CXL_DECODER_F_NORMALIZED_ADDRESSING the test reads beyond the flags word, making the flag sometimes appear set and blocking creation of CXL region debugfs attributes that support poison operations. Replace test_bit() with a bitmask check. Found with cxl-test. Fixes: 208f432406b7 ("cxl: Disable HPA/SPA translation handlers for Normalized Addressing") Signed-off-by: Alison Schofield Reviewed-by: Gregory Price Tested-by: Gregory Price Link: https://patch.msgid.link/63fe4a6203e40e404347f1cdc7a1c55cb4959b86.1771873256.git.alison.schofield@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 780ec947ecf2..42874948b589 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1105,7 +1105,7 @@ static void cxl_region_setup_flags(struct cxl_region *cxlr, clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); } - if (test_bit(CXL_DECODER_F_NORMALIZED_ADDRESSING, &cxld->flags)) + if (cxld->flags & CXL_DECODER_F_NORMALIZED_ADDRESSING) set_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags); } -- cgit v1.2.3 From 30df81f2228d65bddf492db3929d9fcaffd38fc5 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 23 Feb 2026 14:56:09 +0800 Subject: scsi: ufs: core: Fix possible NULL pointer dereference in ufshcd_add_command_trace() The kernel log indicates a crash in ufshcd_add_command_trace, due to a NULL pointer dereference when accessing hwq->id. This can happen if ufshcd_mcq_req_to_hwq() returns NULL. This patch adds a NULL check for hwq before accessing its id field to prevent a kernel crash. Kernel log excerpt: [] notify_die+0x4c/0x8c [] __die+0x60/0xb0 [] die+0x4c/0xe0 [] die_kernel_fault+0x74/0x88 [] __do_kernel_fault+0x314/0x318 [] do_page_fault+0xa4/0x5f8 [] do_translation_fault+0x34/0x54 [] do_mem_abort+0x50/0xa8 [] el1_abort+0x3c/0x64 [] el1h_64_sync_handler+0x44/0xcc [] el1h_64_sync+0x80/0x88 [] ufshcd_add_command_trace+0x23c/0x320 [] ufshcd_compl_one_cqe+0xa4/0x404 [] ufshcd_mcq_poll_cqe_lock+0xac/0x104 [] ufs_mtk_mcq_intr+0x54/0x74 [ufs_mediatek_mod] [] __handle_irq_event_percpu+0xc8/0x348 [] handle_irq_event+0x3c/0xa8 [] handle_fasteoi_irq+0xf8/0x294 [] generic_handle_domain_irq+0x54/0x80 [] gic_handle_irq+0x1d4/0x330 [] call_on_irq_stack+0x44/0x68 [] do_interrupt_handler+0x78/0xd8 [] el1_interrupt+0x48/0xa8 [] el1h_64_irq_handler+0x14/0x24 [] el1h_64_irq+0x80/0x88 [] arch_local_irq_enable+0x4/0x1c [] cpuidle_enter+0x34/0x54 [] do_idle+0x1dc/0x2f8 [] cpu_startup_entry+0x30/0x3c [] secondary_start_kernel+0x134/0x1ac [] __secondary_switched+0xc4/0xcc Signed-off-by: Peter Wang Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20260223065657.2432447-1-peter.wang@mediatek.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 571a73860561..4dc7fbc2a6db 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -518,8 +518,8 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, struct scsi_cmnd *cmd, if (hba->mcq_enabled) { struct ufs_hw_queue *hwq = ufshcd_mcq_req_to_hwq(hba, rq); - - hwq_id = hwq->id; + if (hwq) + hwq_id = hwq->id; } else { doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); } -- cgit v1.2.3 From 62c015373e1cdb1cdca824bd2dbce2dac0819467 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 23 Feb 2026 18:37:57 +0800 Subject: scsi: ufs: core: Move link recovery for hibern8 exit failure to wl_resume Move the link recovery trigger from ufshcd_uic_pwr_ctrl() to __ufshcd_wl_resume(). Ensure link recovery is only attempted when hibern8 exit fails during resume, not during hibern8 enter in suspend. Improve error handling and prevent unnecessary link recovery attempts. Fixes: 35dabf4503b9 ("scsi: ufs: core: Use link recovery when h8 exit fails during runtime resume") Signed-off-by: Peter Wang Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20260223103906.2533654-1-peter.wang@mediatek.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 4dc7fbc2a6db..85987373b961 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4390,14 +4390,6 @@ out_unlock: spin_unlock_irqrestore(hba->host->host_lock, flags); mutex_unlock(&hba->uic_cmd_mutex); - /* - * If the h8 exit fails during the runtime resume process, it becomes - * stuck and cannot be recovered through the error handler. To fix - * this, use link recovery instead of the error handler. - */ - if (ret && hba->pm_op_in_progress) - ret = ufshcd_link_recovery(hba); - return ret; } @@ -10200,7 +10192,15 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) } else { dev_err(hba->dev, "%s: hibern8 exit failed %d\n", __func__, ret); - goto vendor_suspend; + /* + * If the h8 exit fails during the runtime resume + * process, it becomes stuck and cannot be recovered + * through the error handler. To fix this, use link + * recovery instead of the error handler. + */ + ret = ufshcd_link_recovery(hba); + if (ret) + goto vendor_suspend; } } else if (ufshcd_is_link_off(hba)) { /* -- cgit v1.2.3 From 74b6e83942dcc9f3cca9e561b205a5b19940a344 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 19 Feb 2026 12:50:29 -0800 Subject: drm/gpusvm: Fix drm_gpusvm_pages_valid_unlocked() kernel-doc The kernel-doc for drm_gpusvm_pages_valid_unlocked() was stale and still referenced old range-based arguments and naming. Update the documentation to match the current function arguments and signature. Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Link: https://patch.msgid.link/20260219205029.1011336-1-matthew.brost@intel.com --- drivers/gpu/drm/drm_gpusvm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 24180bfdf5a2..9ef9e52c0547 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1338,14 +1338,14 @@ bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); /** - * drm_gpusvm_range_pages_valid_unlocked() - GPU SVM range pages valid unlocked + * drm_gpusvm_pages_valid_unlocked() - GPU SVM pages valid unlocked * @gpusvm: Pointer to the GPU SVM structure - * @range: Pointer to the GPU SVM range structure + * @svm_pages: Pointer to the GPU SVM pages structure * - * This function determines if a GPU SVM range pages are valid. Expected be - * called without holding gpusvm->notifier_lock. + * This function determines if a GPU SVM pages are valid. Expected be called + * without holding gpusvm->notifier_lock. * - * Return: True if GPU SVM range has valid pages, False otherwise + * Return: True if GPU SVM pages are valid, False otherwise */ static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm, struct drm_gpusvm_pages *svm_pages) -- cgit v1.2.3 From 0902010c8d163f7b62e655efda1a843529152c7c Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 24 Feb 2026 18:07:59 +0800 Subject: regulator: fp9931: Fix PM runtime reference leak in fp9931_hwmon_read() In fp9931_hwmon_read(), if regmap_read() failed, the function returned the error code without calling pm_runtime_put_autosuspend(), causing a PM reference leak. Fixes: 12d821bd13d4 ("regulator: Add FP9931/JD9930 driver") Signed-off-by: Felix Gu Reviewed-by: Andreas Kemnade Link: https://patch.msgid.link/20260224-fp9931-v1-1-1cf05cabef4a@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/fp9931.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/regulator/fp9931.c b/drivers/regulator/fp9931.c index 7fbcc6327cc6..abea3b69d8a0 100644 --- a/drivers/regulator/fp9931.c +++ b/drivers/regulator/fp9931.c @@ -144,13 +144,12 @@ static int fp9931_hwmon_read(struct device *dev, enum hwmon_sensor_types type, return ret; ret = regmap_read(data->regmap, FP9931_REG_TMST_VALUE, &val); - if (ret) - return ret; + if (!ret) + *temp = (s8)val * 1000; pm_runtime_put_autosuspend(data->dev); - *temp = (s8)val * 1000; - return 0; + return ret; } static umode_t fp9931_hwmon_is_visible(const void *data, -- cgit v1.2.3 From 4baaddaa44af01cd4ce239493060738fd0881835 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 24 Feb 2026 19:19:03 +0800 Subject: regulator: bq257xx: Fix device node reference leak in bq257xx_reg_dt_parse_gpio() In bq257xx_reg_dt_parse_gpio(), if fails to get subchild, it returns without calling of_node_put(child), causing the device node reference leak. Fixes: 981dd162b635 ("regulator: bq257xx: Add bq257xx boost regulator driver") Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260224-bq257-v1-1-8ebbc731c1c3@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/bq257xx-regulator.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/regulator/bq257xx-regulator.c b/drivers/regulator/bq257xx-regulator.c index fc1ccede4468..dab8f1ab4450 100644 --- a/drivers/regulator/bq257xx-regulator.c +++ b/drivers/regulator/bq257xx-regulator.c @@ -115,11 +115,10 @@ static void bq257xx_reg_dt_parse_gpio(struct platform_device *pdev) return; subchild = of_get_child_by_name(child, pdata->desc.of_match); + of_node_put(child); if (!subchild) return; - of_node_put(child); - pdata->otg_en_gpio = devm_fwnode_gpiod_get_index(&pdev->dev, of_fwnode_handle(subchild), "enable", 0, -- cgit v1.2.3 From bfd7db781e2e7a99b086d645a104d16e368f58ff Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 24 Feb 2026 20:23:30 +0800 Subject: regulator: Kconfig: fix a typo Fixes a typo in Kconfig, HWWON -> HWMON Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260224-kconfig-v1-1-b0c5459ed7a0@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index a708fc63f581..d10b6f9243d5 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -508,7 +508,7 @@ config REGULATOR_FP9931 This driver supports the FP9931/JD9930 voltage regulator chip which is used to provide power to Electronic Paper Displays so it is found in E-Book readers. - If HWWON is enabled, it also provides temperature measurement. + If HWMON is enabled, it also provides temperature measurement. config REGULATOR_LM363X tristate "TI LM363X voltage regulators" -- cgit v1.2.3 From e08f2adcf990b8cf272e90898401a9e481c1c667 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 24 Feb 2026 13:36:09 +0200 Subject: Revert "irqchip/ls-extirq: Use for_each_of_imap_item iterator" This reverts commit 3ac6dfe3d7a2396602b67667249b146504dfbd2a. The ls-extirq uses interrupt-map but it's a non-standard use documented in fsl,ls-extirq.yaml: # The driver(drivers/irqchip/irq-ls-extirq.c) have not use standard DT # function to parser interrupt-map. So it doesn't consider '#address-size' # in parent interrupt controller, such as GIC. # # When dt-binding verify interrupt-map, item data matrix is spitted at # incorrect position. Remove interrupt-map restriction because it always # wrong. This means that by using for_each_of_imap_item and the underlying of_irq_parse_imap_parent() on its interrupt-map property will effectively break its functionality Revert the patch making use of for_each_of_imap_item() in ls-extirq. Fixes: 3ac6dfe3d7a2 ("irqchip/ls-extirq: Use for_each_of_imap_item iterator") Signed-off-by: Ioana Ciornei Signed-off-by: Thomas Gleixner Acked-by: Herve Codina Link: https://patch.msgid.link/20260224113610.1129022-2-ioana.ciornei@nxp.com --- drivers/irqchip/irq-ls-extirq.c | 47 ++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-ls-extirq.c b/drivers/irqchip/irq-ls-extirq.c index a7e9c3885b09..96f9c20621cf 100644 --- a/drivers/irqchip/irq-ls-extirq.c +++ b/drivers/irqchip/irq-ls-extirq.c @@ -125,32 +125,45 @@ static const struct irq_domain_ops extirq_domain_ops = { static int ls_extirq_parse_map(struct ls_extirq_data *priv, struct device_node *node) { - struct of_imap_parser imap_parser; - struct of_imap_item imap_item; + const __be32 *map; + u32 mapsize; int ret; - ret = of_imap_parser_init(&imap_parser, node, &imap_item); - if (ret) - return ret; + map = of_get_property(node, "interrupt-map", &mapsize); + if (!map) + return -ENOENT; + if (mapsize % sizeof(*map)) + return -EINVAL; + mapsize /= sizeof(*map); - for_each_of_imap_item(&imap_parser, &imap_item) { + while (mapsize) { struct device_node *ipar; - u32 hwirq; - int i; + u32 hwirq, intsize, j; - hwirq = imap_item.child_imap[0]; - if (hwirq >= MAXIRQ) { - of_node_put(imap_item.parent_args.np); + if (mapsize < 3) + return -EINVAL; + hwirq = be32_to_cpup(map); + if (hwirq >= MAXIRQ) return -EINVAL; - } priv->nirq = max(priv->nirq, hwirq + 1); - ipar = of_node_get(imap_item.parent_args.np); - priv->map[hwirq].fwnode = of_fwnode_handle(ipar); + ipar = of_find_node_by_phandle(be32_to_cpup(map + 2)); + map += 3; + mapsize -= 3; + if (!ipar) + return -EINVAL; + priv->map[hwirq].fwnode = &ipar->fwnode; + ret = of_property_read_u32(ipar, "#interrupt-cells", &intsize); + if (ret) + return ret; + + if (intsize > mapsize) + return -EINVAL; - priv->map[hwirq].param_count = imap_item.parent_args.args_count; - for (i = 0; i < priv->map[hwirq].param_count; i++) - priv->map[hwirq].param[i] = imap_item.parent_args.args[i]; + priv->map[hwirq].param_count = intsize; + for (j = 0; j < intsize; ++j) + priv->map[hwirq].param[j] = be32_to_cpup(map++); + mapsize -= intsize; } return 0; } -- cgit v1.2.3 From fe5669e363b129cde285bfb4d45abb72d1d77cfc Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 24 Feb 2026 13:36:10 +0200 Subject: irqchip/ls-extirq: Fix devm_of_iomap() error check The devm_of_iomap() function returns an ERR_PTR() encoded error code on failure. Replace the incorrect check against NULL with IS_ERR(). Fixes: 05cd654829dd ("irqchip/ls-extirq: Convert to a platform driver to make it work again") Reported-by: Dan Carpenter Signed-off-by: Ioana Ciornei Signed-off-by: Thomas Gleixner Reviewed-by: Herve Codina Link: https://patch.msgid.link/20260224113610.1129022-3-ioana.ciornei@nxp.com Closes: https://lore.kernel.org/all/aYXvfbfT6w0TMsXS@stanley.mountain/ --- drivers/irqchip/irq-ls-extirq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-ls-extirq.c b/drivers/irqchip/irq-ls-extirq.c index 96f9c20621cf..d724fe843980 100644 --- a/drivers/irqchip/irq-ls-extirq.c +++ b/drivers/irqchip/irq-ls-extirq.c @@ -190,8 +190,10 @@ static int ls_extirq_probe(struct platform_device *pdev) return dev_err_probe(dev, -ENOMEM, "Failed to allocate memory\n"); priv->intpcr = devm_of_iomap(dev, node, 0, NULL); - if (!priv->intpcr) - return dev_err_probe(dev, -ENOMEM, "Cannot ioremap OF node %pOF\n", node); + if (IS_ERR(priv->intpcr)) { + return dev_err_probe(dev, PTR_ERR(priv->intpcr), + "Cannot ioremap OF node %pOF\n", node); + } ret = ls_extirq_parse_map(priv, node); if (ret) -- cgit v1.2.3 From 2f38fd99c0004676d835ae96ac4f3b54edc02c82 Mon Sep 17 00:00:00 2001 From: wangshuaiwei Date: Tue, 24 Feb 2026 14:32:28 +0800 Subject: scsi: ufs: core: Fix shift out of bounds when MAXQ=32 According to JESD223F, the maximum number of queues (MAXQ) is 32. When MCQ is enabled and ESI is disabled, nr_hw_queues=32 causes a shift overflow problem. Fix this by using 64-bit intermediate values to handle the nr_hw_queues=32 case safely. Signed-off-by: wangshuaiwei Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20260224063228.50112-1-wangshuaiwei1@xiaomi.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 85987373b961..899e663fea6e 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -7110,7 +7110,7 @@ static irqreturn_t ufshcd_handle_mcq_cq_events(struct ufs_hba *hba) ret = ufshcd_vops_get_outstanding_cqs(hba, &outstanding_cqs); if (ret) - outstanding_cqs = (1U << hba->nr_hw_queues) - 1; + outstanding_cqs = (1ULL << hba->nr_hw_queues) - 1; /* Exclude the poll queues */ nr_queues = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL]; -- cgit v1.2.3 From bfbc0b5b32a8f28ce284add619bf226716a59bc0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 24 Feb 2026 11:51:16 -0700 Subject: media: dvb-core: fix wrong reinitialization of ringbuffer on reopen dvb_dvr_open() calls dvb_ringbuffer_init() when a new reader opens the DVR device. dvb_ringbuffer_init() calls init_waitqueue_head(), which reinitializes the waitqueue list head to empty. Since dmxdev->dvr_buffer.queue is a shared waitqueue (all opens of the same DVR device share it), this orphans any existing waitqueue entries from io_uring poll or epoll, leaving them with stale prev/next pointers while the list head is reset to {self, self}. The waitqueue and spinlock in dvr_buffer are already properly initialized once in dvb_dmxdev_init(). The open path only needs to reset the buffer data pointer, size, and read/write positions. Replace the dvb_ringbuffer_init() call in dvb_dvr_open() with direct assignment of data/size and a call to dvb_ringbuffer_reset(), which properly resets pread, pwrite, and error with correct memory ordering without touching the waitqueue or spinlock. Cc: stable@vger.kernel.org Fixes: 34731df288a5f ("V4L/DVB (3501): Dmxdev: use dvb_ringbuffer") Reported-by: syzbot+ab12f0c08dd7ab8d057c@syzkaller.appspotmail.com Tested-by: syzbot+ab12f0c08dd7ab8d057c@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/698a26d3.050a0220.3b3015.007d.GAE@google.com/ Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- drivers/media/dvb-core/dmxdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index 7cfed24054d0..3c8bc75e4d6c 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -168,7 +168,9 @@ static int dvb_dvr_open(struct inode *inode, struct file *file) mutex_unlock(&dmxdev->mutex); return -ENOMEM; } - dvb_ringbuffer_init(&dmxdev->dvr_buffer, mem, DVR_BUFFER_SIZE); + dmxdev->dvr_buffer.data = mem; + dmxdev->dvr_buffer.size = DVR_BUFFER_SIZE; + dvb_ringbuffer_reset(&dmxdev->dvr_buffer); if (dmxdev->may_do_mmap) dvb_vb2_init(&dmxdev->dvr_vb2_ctx, "dvr", &dmxdev->mutex, -- cgit v1.2.3 From 6acf7860dcc79ed045cc9e6a79c8a8bb6959dba7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Feb 2026 06:21:44 -0800 Subject: zloop: advertise a volatile write cache Zloop is file system backed and thus needs to sync the underlying file system to persist data. Set BLK_FEAT_WRITE_CACHE so that the block layer actually send flush commands, and fix the flush implementation as sync_filesystem requires s_umount to be held and the code currently misses that. Fixes: eb0570c7df23 ("block: new zoned loop block device driver") Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Jens Axboe --- drivers/block/zloop.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index 8e334f5025fc..ae9bf2a85c21 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c @@ -542,6 +542,21 @@ out: zloop_put_cmd(cmd); } +/* + * Sync the entire FS containing the zone files instead of walking all files. + */ +static int zloop_flush(struct zloop_device *zlo) +{ + struct super_block *sb = file_inode(zlo->data_dir)->i_sb; + int ret; + + down_read(&sb->s_umount); + ret = sync_filesystem(sb); + up_read(&sb->s_umount); + + return ret; +} + static void zloop_handle_cmd(struct zloop_cmd *cmd) { struct request *rq = blk_mq_rq_from_pdu(cmd); @@ -562,11 +577,7 @@ static void zloop_handle_cmd(struct zloop_cmd *cmd) zloop_rw(cmd); return; case REQ_OP_FLUSH: - /* - * Sync the entire FS containing the zone files instead of - * walking all files - */ - cmd->ret = sync_filesystem(file_inode(zlo->data_dir)->i_sb); + cmd->ret = zloop_flush(zlo); break; case REQ_OP_ZONE_RESET: cmd->ret = zloop_reset_zone(zlo, rq_zone_no(rq)); @@ -981,7 +992,8 @@ static int zloop_ctl_add(struct zloop_options *opts) struct queue_limits lim = { .max_hw_sectors = SZ_1M >> SECTOR_SHIFT, .chunk_sectors = opts->zone_size, - .features = BLK_FEAT_ZONED, + .features = BLK_FEAT_ZONED | BLK_FEAT_WRITE_CACHE, + }; unsigned int nr_zones, i, j; struct zloop_device *zlo; -- cgit v1.2.3 From 3c4617117a2b7682cf037be5e5533e379707f050 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Feb 2026 06:21:45 -0800 Subject: zloop: check for spurious options passed to remove Zloop uses a command option parser for all control commands, but most options are only valid for adding a new device. Check for incorrectly specified options in the remove handler. Fixes: eb0570c7df23 ("block: new zoned loop block device driver") Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Jens Axboe --- drivers/block/zloop.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index ae9bf2a85c21..9e3bb538d5fc 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c @@ -1174,7 +1174,12 @@ static int zloop_ctl_remove(struct zloop_options *opts) int ret; if (!(opts->mask & ZLOOP_OPT_ID)) { - pr_err("No ID specified\n"); + pr_err("No ID specified for remove\n"); + return -EINVAL; + } + + if (opts->mask & ~ZLOOP_OPT_ID) { + pr_err("Invalid option specified for remove\n"); return -EINVAL; } -- cgit v1.2.3 From 2f61f38a217462411fed950e843b82bc119884cf Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 12:19:08 +0000 Subject: net: stmmac: fix timestamping configuration after suspend/resume When stmmac_init_timestamping() is called, it clears the receive and transmit path booleans that allow timestamps to be read. These are never re-initialised until after userspace requests timestamping features to be enabled. However, our copy of the timestamp configuration is not cleared, which means we return the old configuration to userspace when requested. This is inconsistent. Fix this by clearing the timestamp configuration. Fixes: d6228b7cdd6e ("net: stmmac: implement the SIOCGHWTSTAMP ioctl") Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Link: https://patch.msgid.link/E1vuUu4-0000000Afea-0j9B@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 82375d34ad57..91f638ce132e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -853,6 +853,7 @@ static int stmmac_init_timestamping(struct stmmac_priv *priv) netdev_info(priv->dev, "IEEE 1588-2008 Advanced Timestamp supported\n"); + memset(&priv->tstamp_config, 0, sizeof(priv->tstamp_config)); priv->hwts_tx_en = 0; priv->hwts_rx_en = 0; -- cgit v1.2.3 From c601fd5414315fc515f746b499110e46272e7243 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Tue, 24 Feb 2026 22:12:28 +0000 Subject: drm/client: Do not destroy NULL modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'modes' in drm_client_modeset_probe may fail to kcalloc. If this occurs, we jump to 'out', calling modes_destroy on it, which dereferences it. This may result in a NULL pointer dereference in the error case. Prevent that. Fixes: 3039cc0c0653 ("drm/client: Make copies of modes") Signed-off-by: Jonathan Cavitt Cc: Ville Syrjälä Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20260224221227.69126-2-jonathan.cavitt@intel.com --- drivers/gpu/drm/drm_client_modeset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 262b1b8773c5..bb49b8361271 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -930,7 +930,8 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, mutex_unlock(&client->modeset_mutex); out: kfree(crtcs); - modes_destroy(dev, modes, connector_count); + if (modes) + modes_destroy(dev, modes, connector_count); kfree(modes); kfree(offsets); kfree(enabled); -- cgit v1.2.3 From 29c8b85adb47daefc213381bc1831787f512d89b Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 25 Feb 2026 08:31:40 +0000 Subject: irqchip/gic-v5: Fix inversion of IRS_IDR0.virt flag It appears that a !! became ! during a cleanup, resulting in inverted logic when detecting if a host GICv5 implementation is capable of virtualization. Re-add the missing !, fixing the behaviour. Fixes: 3227c3a89d65f ("irqchip/gic-v5: Check if impl is virt capable") Signed-off-by: Sascha Bischoff Link: https://patch.msgid.link/20260225083130.3378490-1-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v5-irs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-gic-v5-irs.c b/drivers/irqchip/irq-gic-v5-irs.c index eeeb40fb0eaa..4ed2bfa17c01 100644 --- a/drivers/irqchip/irq-gic-v5-irs.c +++ b/drivers/irqchip/irq-gic-v5-irs.c @@ -744,7 +744,7 @@ static int __init gicv5_irs_init(struct device_node *node) */ if (list_empty(&irs_nodes)) { idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR0); - gicv5_global_data.virt_capable = !FIELD_GET(GICV5_IRS_IDR0_VIRT, idr); + gicv5_global_data.virt_capable = !!FIELD_GET(GICV5_IRS_IDR0_VIRT, idr); idr = irs_readl_relaxed(irs_data, GICV5_IRS_IDR1); irs_setup_pri_bits(idr); -- cgit v1.2.3 From 93a4a9b732fbb479f95d327aa867d094aed3f712 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 24 Feb 2026 17:59:52 +0100 Subject: RDMA/core: Check id_priv->restricted_node_type in cma_listen_on_dev() When listening on wildcard addresses we have a global list for the application layer rdma_cm_id and for any existing device or any device added in future we try to listen on any wildcard listener. When the listener has a restricted_node_type we should prevent listening on devices with a different node type. While there fix the documentation comment of rdma_restrict_node_type() to include rdma_resolve_addr() instead of having rdma_bind_addr() twice. Fixes: a760e80e90f5 ("RDMA/core: introduce rdma_restrict_node_type()") Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Steve French Cc: Namjae Jeon Cc: Tom Talpey Cc: Long Li Cc: linux-rdma@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Link: https://patch.msgid.link/20260224165951.3582093-2-metze@samba.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e54c07c74575..9480d1a51c11 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2729,6 +2729,9 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, *to_destroy = NULL; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return 0; + if (id_priv->restricted_node_type != RDMA_NODE_UNSPECIFIED && + id_priv->restricted_node_type != cma_dev->device->node_type) + return 0; dev_id_priv = __rdma_create_id(net, cma_listen_handler, id_priv, @@ -2736,6 +2739,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, if (IS_ERR(dev_id_priv)) return PTR_ERR(dev_id_priv); + dev_id_priv->restricted_node_type = id_priv->restricted_node_type; dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); @@ -4194,7 +4198,7 @@ int rdma_restrict_node_type(struct rdma_cm_id *id, u8 node_type) } mutex_lock(&lock); - if (id_priv->cma_dev) + if (READ_ONCE(id_priv->state) != RDMA_CM_IDLE) ret = -EALREADY; else id_priv->restricted_node_type = node_type; -- cgit v1.2.3 From 104016eb671e19709721c1b0048dd912dc2e96be Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Tue, 24 Feb 2026 23:41:53 +0000 Subject: RDMA/umem: Fix double dma_buf_unpin in failure path In ib_umem_dmabuf_get_pinned_with_dma_device(), the call to ib_umem_dmabuf_map_pages() can fail. If this occurs, the dmabuf is immediately unpinned but the umem_dmabuf->pinned flag is still set. Then, when ib_umem_release() is called, it calls ib_umem_dmabuf_revoke() which will call dma_buf_unpin() again. Fix this by removing the immediate unpin upon failure and just let the ib_umem_release/revoke path handle it. This also ensures the proper unmap-unpin unwind ordering if the dmabuf_map_pages call happened to fail due to dma_resv_wait_timeout (and therefore has a non-NULL umem_dmabuf->sgt). Fixes: 1e4df4a21c5a ("RDMA/umem: Allow pinned dmabuf umem usage") Signed-off-by: Jacob Moroni Link: https://patch.msgid.link/20260224234153.1207849-1-jmoroni@google.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/umem_dmabuf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index f5298c33e581..d30f24b90bca 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -218,13 +218,11 @@ ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device, err = ib_umem_dmabuf_map_pages(umem_dmabuf); if (err) - goto err_unpin; + goto err_release; dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); return umem_dmabuf; -err_unpin: - dma_buf_unpin(umem_dmabuf->attach); err_release: dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); ib_umem_release(&umem_dmabuf->umem); -- cgit v1.2.3 From 6b050482ec40569429d963ac52afa878691b04c9 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 24 Feb 2026 16:17:52 -0800 Subject: cpufreq: intel_pstate: Fix crash during turbo disable When the system is booted with kernel command line argument "nosmt" or "maxcpus" to limit the number of CPUs, disabling turbo via: echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo results in a crash: PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP PTI ... RIP: 0010:store_no_turbo+0x100/0x1f0 ... This occurs because for_each_possible_cpu() returns CPUs even if they are not online. For those CPUs, all_cpu_data[] will be NULL. Since commit 973207ae3d7c ("cpufreq: intel_pstate: Rearrange max frequency updates handling code"), all_cpu_data[] is dereferenced even for CPUs which are not online, causing the NULL pointer dereference. To fix that, pass CPU number to intel_pstate_update_max_freq() and use all_cpu_data[] for those CPUs for which there is a valid cpufreq policy. Fixes: 973207ae3d7c ("cpufreq: intel_pstate: Rearrange max frequency updates handling code") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221068 Signed-off-by: Srinivas Pandruvada Cc: 6.16+ # 6.16+ Link: https://patch.msgid.link/20260225001752.890164-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index bdc37080d319..11c58af41900 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1476,13 +1476,13 @@ static void __intel_pstate_update_max_freq(struct cpufreq_policy *policy, refresh_frequency_limits(policy); } -static bool intel_pstate_update_max_freq(struct cpudata *cpudata) +static bool intel_pstate_update_max_freq(int cpu) { - struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); if (!policy) return false; - __intel_pstate_update_max_freq(policy, cpudata); + __intel_pstate_update_max_freq(policy, all_cpu_data[cpu]); return true; } @@ -1501,7 +1501,7 @@ static void intel_pstate_update_limits_for_all(void) int cpu; for_each_possible_cpu(cpu) - intel_pstate_update_max_freq(all_cpu_data[cpu]); + intel_pstate_update_max_freq(cpu); mutex_lock(&hybrid_capacity_lock); @@ -1910,7 +1910,7 @@ static void intel_pstate_notify_work(struct work_struct *work) struct cpudata *cpudata = container_of(to_delayed_work(work), struct cpudata, hwp_notify_work); - if (intel_pstate_update_max_freq(cpudata)) { + if (intel_pstate_update_max_freq(cpudata->cpu)) { /* * The driver will not be unregistered while this function is * running, so update the capacity without acquiring the driver -- cgit v1.2.3 From 468711a40d5dfc01bf0a24c1981246a2c93ac405 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 10 Feb 2026 19:12:25 +0100 Subject: PCI: dwc: ep: Refresh MSI Message Address cache on change Endpoint drivers use dw_pcie_ep_raise_msi_irq() to raise MSI interrupts to the host. After 8719c64e76bf ("PCI: dwc: ep: Cache MSI outbound iATU mapping"), dw_pcie_ep_raise_msi_irq() caches the Message Address from the MSI Capability in ep->msi_msg_addr. But that Message Address is controlled by the host, and it may change. For example, if: - firmware on the host configures the Message Address and triggers an MSI, - a driver on the Endpoint raises the MSI via dw_pcie_ep_raise_msi_irq(), which caches the Message Address, - a kernel on the host reconfigures the Message Address and the host kernel driver triggers another MSI, dw_pcie_ep_raise_msi_irq() notices that the Message Address no longer matches the cached ep->msi_msg_addr, warns about it, and returns error instead of raising the MSI. The host kernel may hang because it never receives the MSI. This was seen with the nvmet_pci_epf_driver: the host UEFI performs NVMe commands, e.g. Identify Controller to get the name of the controller, nvmet-pci-epf posts the completion queue entry and raises an IRQ using dw_pcie_ep_raise_msi_irq(). When the host boots Linux, we see a WARN_ON_ONCE() from dw_pcie_ep_raise_msi_irq(), and the host kernel hangs because the nvme driver never gets an IRQ. Remove the warning when dw_pcie_ep_raise_msi_irq() notices that Message Address has changed, remap using the new address, and update the ep->msi_msg_addr cache. Fixes: 8719c64e76bf ("PCI: dwc: ep: Cache MSI outbound iATU mapping") Signed-off-by: Niklas Cassel [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Tested-by: Shin'ichiro Kawasaki Tested-by: Koichiro Den Acked-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260210181225.3926165-2-cassel@kernel.org --- drivers/pci/controller/dwc/pcie-designware-ep.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 295076cf70de..35d8fc5e1d20 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -905,6 +905,19 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no, * supported, so we avoid reprogramming the region on every MSI, * specifically unmapping immediately after writel(). */ + if (ep->msi_iatu_mapped && (ep->msi_msg_addr != msg_addr || + ep->msi_map_size != map_size)) { + /* + * The host changed the MSI target address or the required + * mapping size changed. Reprogramming the iATU when there are + * operations in flight is unsafe on this controller. However, + * there is no unified way to check if we have operations in + * flight, thus we don't know if we should WARN() or not. + */ + dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys); + ep->msi_iatu_mapped = false; + } + if (!ep->msi_iatu_mapped) { ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, @@ -915,15 +928,6 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no, ep->msi_iatu_mapped = true; ep->msi_msg_addr = msg_addr; ep->msi_map_size = map_size; - } else if (WARN_ON_ONCE(ep->msi_msg_addr != msg_addr || - ep->msi_map_size != map_size)) { - /* - * The host changed the MSI target address or the required - * mapping size changed. Reprogramming the iATU at runtime is - * unsafe on this controller, so bail out instead of trying to - * update the existing region. - */ - return -EINVAL; } writel(msg_data | (interrupt_num - 1), ep->msi_mem + offset); -- cgit v1.2.3 From c22533c66ccae10511ad6a7afc34bb26c47577e3 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 11 Feb 2026 18:55:41 +0100 Subject: PCI: dwc: ep: Flush MSI-X write before unmapping its ATU entry Endpoint drivers use dw_pcie_ep_raise_msix_irq() to raise an MSI-X interrupt to the host using a writel(), which generates a PCI posted write transaction. There's no completion for posted writes, so the writel() may return before the PCI write completes. dw_pcie_ep_raise_msix_irq() also unmaps the outbound ATU entry used for the PCI write, so the write races with the unmap. If the PCI write loses the race with the ATU unmap, the write may corrupt host memory or cause IOMMU errors, e.g., these when running fio with a larger queue depth against nvmet-pci-epf: arm-smmu-v3 fc900000.iommu: 0x0000010000000010 arm-smmu-v3 fc900000.iommu: 0x0000020000000000 arm-smmu-v3 fc900000.iommu: 0x000000090000f040 arm-smmu-v3 fc900000.iommu: 0x0000000000000000 arm-smmu-v3 fc900000.iommu: event: F_TRANSLATION client: 0000:01:00.0 sid: 0x100 ssid: 0x0 iova: 0x90000f040 ipa: 0x0 arm-smmu-v3 fc900000.iommu: unpriv data write s1 "Input address caused fault" stag: 0x0 Flush the write by performing a readl() of the same address to ensure that the write has reached the destination before the ATU entry is unmapped. The same problem was solved for dw_pcie_ep_raise_msi_irq() in commit 8719c64e76bf ("PCI: dwc: ep: Cache MSI outbound iATU mapping"), but there it was solved by dedicating an outbound iATU only for MSI. We can't do the same for MSI-X because each vector can have a different msg_addr and the msg_addr may be changed while the vector is masked. Fixes: beb4641a787d ("PCI: dwc: Add MSI-X callbacks handler") Signed-off-by: Niklas Cassel [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Frank Li Link: https://patch.msgid.link/20260211175540.105677-2-cassel@kernel.org --- drivers/pci/controller/dwc/pcie-designware-ep.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 35d8fc5e1d20..c57ae4d6c5c0 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -1014,6 +1014,9 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, writel(msg_data, ep->msi_mem + offset); + /* flush posted write before unmap */ + readl(ep->msi_mem + offset); + dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys); return 0; -- cgit v1.2.3 From 75c151ceaacf5ca8f2f34ebf863d88002fb12587 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 25 Feb 2026 12:47:52 -0800 Subject: accel/amdxdna: Use a different name for latest firmware Using legacy driver with latest firmware causes a power off issue. Fix this by assigning a different filename (npu_7.sbin) to the latest firmware. The driver attempts to load the latest firmware first and falls back to the previous firmware version if loading fails. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5009 Fixes: f1eac46fe5f7 ("accel/amdxdna: Update firmware version check for latest firmware") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260225204752.2711734-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_pci.c | 20 +++++++++++++++++++- drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 +++ drivers/accel/amdxdna/npu1_regs.c | 2 +- drivers/accel/amdxdna/npu4_regs.c | 2 +- drivers/accel/amdxdna/npu5_regs.c | 2 +- drivers/accel/amdxdna/npu6_regs.c | 2 +- 6 files changed, 26 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 4b3e6bb97bd2..85079b6fc5d9 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -32,6 +32,11 @@ static int aie2_max_col = XRS_MAX_COL; module_param(aie2_max_col, uint, 0600); MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used"); +static char *npu_fw[] = { + "npu_7.sbin", + "npu.sbin" +}; + /* * The management mailbox channel is allocated by firmware. * The related register and ring buffer information is on SRAM BAR. @@ -489,6 +494,7 @@ static int aie2_init(struct amdxdna_dev *xdna) struct psp_config psp_conf; const struct firmware *fw; unsigned long bars = 0; + char *fw_full_path; int i, nvec, ret; if (!hypervisor_is_type(X86_HYPER_NATIVE)) { @@ -503,7 +509,19 @@ static int aie2_init(struct amdxdna_dev *xdna) ndev->priv = xdna->dev_info->dev_priv; ndev->xdna = xdna; - ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev); + for (i = 0; i < ARRAY_SIZE(npu_fw); i++) { + fw_full_path = kasprintf(GFP_KERNEL, "%s%s", ndev->priv->fw_path, npu_fw[i]); + if (!fw_full_path) + return -ENOMEM; + + ret = firmware_request_nowarn(&fw, fw_full_path, &pdev->dev); + kfree(fw_full_path); + if (!ret) { + XDNA_INFO(xdna, "Load firmware %s%s", ndev->priv->fw_path, npu_fw[i]); + break; + } + } + if (ret) { XDNA_ERR(xdna, "failed to request_firmware %s, ret %d", ndev->priv->fw_path, ret); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 4ada45d06fcf..a4384593bdcc 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -23,6 +23,9 @@ MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin"); MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin"); MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin"); MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); +MODULE_FIRMWARE("amdnpu/1502_00/npu_7.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_10/npu_7.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin"); /* * 0.0: Initial version diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c index 6f36a27b5a02..6e3d3ca69c04 100644 --- a/drivers/accel/amdxdna/npu1_regs.c +++ b/drivers/accel/amdxdna/npu1_regs.c @@ -72,7 +72,7 @@ static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = { }; static const struct amdxdna_dev_priv npu1_dev_priv = { - .fw_path = "amdnpu/1502_00/npu.sbin", + .fw_path = "amdnpu/1502_00/", .rt_config = npu1_default_rt_cfg, .dpm_clk_tbl = npu1_dpm_clk_table, .fw_feature_tbl = npu1_fw_feature_table, diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c index a8d6f76dde5f..ce25eef5fc34 100644 --- a/drivers/accel/amdxdna/npu4_regs.c +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -98,7 +98,7 @@ const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = { }; static const struct amdxdna_dev_priv npu4_dev_priv = { - .fw_path = "amdnpu/17f0_10/npu.sbin", + .fw_path = "amdnpu/17f0_10/", .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table, diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c index c0a35cfd886c..c0ac5daf32ee 100644 --- a/drivers/accel/amdxdna/npu5_regs.c +++ b/drivers/accel/amdxdna/npu5_regs.c @@ -63,7 +63,7 @@ #define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE static const struct amdxdna_dev_priv npu5_dev_priv = { - .fw_path = "amdnpu/17f0_11/npu.sbin", + .fw_path = "amdnpu/17f0_11/", .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table, diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c index 1fb07df99186..ce591ed0d483 100644 --- a/drivers/accel/amdxdna/npu6_regs.c +++ b/drivers/accel/amdxdna/npu6_regs.c @@ -63,7 +63,7 @@ #define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE static const struct amdxdna_dev_priv npu6_dev_priv = { - .fw_path = "amdnpu/17f0_10/npu.sbin", + .fw_path = "amdnpu/17f0_10/", .rt_config = npu4_default_rt_cfg, .dpm_clk_tbl = npu4_dpm_clk_table, .fw_feature_tbl = npu4_fw_feature_table, -- cgit v1.2.3 From 49abfa812617a7f2d0132c70d23ac98b389c6ec1 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Feb 2026 12:41:30 +0000 Subject: drm/amdgpu/userq: Fix reference leak in amdgpu_userq_wait_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop reference to syncobj and timeline fence when aborting the ioctl due output array being too small. Reviewed-by: Alex Deucher Signed-off-by: Tvrtko Ursulin Fixes: a292fdecd728 ("drm/amdgpu: Implement userqueue signal/wait IOCTL") Cc: Arunpravin Paneer Selvam Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 68951e9c3e6bb22396bc42ef2359751c8315dd27) Cc: # v6.16+ --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 8013260e29dc..9b9947b94b89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -876,6 +876,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, dma_fence_unwrap_for_each(f, &iter, fence) { if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { r = -EINVAL; + dma_fence_put(fence); goto free_fences; } @@ -900,6 +901,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { r = -EINVAL; + dma_fence_put(fence); goto free_fences; } -- cgit v1.2.3 From 7b7d7693a55d606d700beb9549c9f7f0e5d9c24f Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Feb 2026 12:41:31 +0000 Subject: drm/amdgpu/userq: Do not allow userspace to trivially triger kernel warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace can either deliberately pass in the too small num_fences, or the required number can legitimately grow between the two calls to the userq wait ioctl. In both cases we do not want the emit the kernel warning backtrace since nothing is wrong with the kernel and userspace will simply get an errno reported back. So lets simply drop the WARN_ONs. Reviewed-by: Alex Deucher Signed-off-by: Tvrtko Ursulin Fixes: a292fdecd728 ("drm/amdgpu: Implement userqueue signal/wait IOCTL") Cc: Arunpravin Paneer Selvam Cc: Christian König Cc: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 2c333ea579de6cc20ea7bc50e9595ef72863e65c) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 9b9947b94b89..d972dc46f5a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -833,7 +833,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, DMA_RESV_USAGE_READ, fence) { - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; goto free_fences; } @@ -850,7 +850,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, DMA_RESV_USAGE_WRITE, fence) { - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; goto free_fences; } @@ -874,7 +874,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, goto free_fences; dma_fence_unwrap_for_each(f, &iter, fence) { - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; dma_fence_put(fence); goto free_fences; @@ -899,7 +899,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, if (r) goto free_fences; - if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + if (num_fences >= wait_info->num_fences) { r = -EINVAL; dma_fence_put(fence); goto free_fences; -- cgit v1.2.3 From ea78f8c68f4f6211c557df49174c54d167821962 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 20 Feb 2026 13:47:58 +0530 Subject: drm/amdgpu: add upper bound check on user inputs in signal ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Huge input values in amdgpu_userq_signal_ioctl can lead to a OOM and could be exploited. So check these input value against AMDGPU_USERQ_MAX_HANDLES which is big enough value for genuine use cases and could potentially avoid OOM. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit be267e15f99bc97cbe202cd556717797cdcf79a5) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index d972dc46f5a8..c5f5af20af75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -35,6 +35,8 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops; static struct kmem_cache *amdgpu_userq_fence_slab; +#define AMDGPU_USERQ_MAX_HANDLES (1U << 16) + int amdgpu_userq_fence_slab_init(void) { amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", @@ -478,6 +480,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; + if (args->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES || + args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || + args->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) + return -EINVAL; + num_syncobj_handles = args->num_syncobj_handles; syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), size_mul(sizeof(u32), num_syncobj_handles)); -- cgit v1.2.3 From 64ac7c09fc44985ec9bb6a9db740899fa40ca613 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 24 Feb 2026 12:13:09 +0530 Subject: drm/amdgpu: add upper bound check on user inputs in wait ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Huge input values in amdgpu_userq_wait_ioctl can lead to a OOM and could be exploited. So check these input value against AMDGPU_USERQ_MAX_HANDLES which is big enough value for genuine use cases and could potentially avoid OOM. v2: squash in Srini's fix Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit fcec012c664247531aed3e662f4280ff804d1476) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index c5f5af20af75..7e9cf1868cc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -671,6 +671,11 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; + if (wait_info->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES || + wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || + wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) + return -EINVAL; + num_read_bo_handles = wait_info->num_bo_read_handles; bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), size_mul(sizeof(u32), num_read_bo_handles)); -- cgit v1.2.3 From 28dfe4317541e57fe52f9a290394cd29c348228b Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Mon, 23 Feb 2026 12:45:37 +0100 Subject: drm/amd/display: Use GFP_ATOMIC in dc_create_stream_for_sink This can be called while preemption is disabled, for example by dcn32_internal_validate_bw which is called with the FPU active. Fixes "BUG: scheduling while atomic" messages I encounter on my Navi31 machine. Signed-off-by: Natalie Vock Signed-off-by: Alex Deucher (cherry picked from commit b42dae2ebc5c84a68de63ec4ffdfec49362d53f1) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 246893d80f1f..baf820e6eae8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -170,11 +170,11 @@ struct dc_stream_state *dc_create_stream_for_sink( if (sink == NULL) goto fail; - stream = kzalloc_obj(struct dc_stream_state); + stream = kzalloc_obj(struct dc_stream_state, GFP_ATOMIC); if (stream == NULL) goto fail; - stream->update_scratch = kzalloc((int32_t) dc_update_scratch_space_size(), GFP_KERNEL); + stream->update_scratch = kzalloc((int32_t) dc_update_scratch_space_size(), GFP_ATOMIC); if (stream->update_scratch == NULL) goto fail; -- cgit v1.2.3 From 5e0bcc7b88bcd081aaae6f481b10d9ab294fcb69 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 23 Feb 2026 14:00:07 -0800 Subject: drm/amdgpu: Unlock a mutex before destroying it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mutexes must be unlocked before these are destroyed. This has been detected by the Clang thread-safety analyzer. Cc: Alex Deucher Cc: Christian König Cc: Yang Wang Cc: Hawking Zhang Cc: amd-gfx@lists.freedesktop.org Fixes: f5e4cc8461c4 ("drm/amdgpu: implement RAS ACA driver framework") Reviewed-by: Yang Wang Acked-by: Christian König Signed-off-by: Bart Van Assche Signed-off-by: Alex Deucher (cherry picked from commit 270258ba320beb99648dceffb67e86ac76786e55) --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index afe5ca81beec..db7858fe0c3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -641,6 +641,7 @@ static void aca_error_fini(struct aca_error *aerr) aca_bank_error_remove(aerr, bank_error); out_unlock: + mutex_unlock(&aerr->lock); mutex_destroy(&aerr->lock); } -- cgit v1.2.3 From 480ad5f6ead4a47b969aab6618573cd6822bb6a4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 23 Feb 2026 13:50:23 -0800 Subject: drm/amdgpu: Fix locking bugs in error paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not unlock psp->ras_context.mutex if it has not been locked. This has been detected by the Clang thread-safety analyzer. Cc: Alex Deucher Cc: Christian König Cc: YiPeng Chai Cc: Hawking Zhang Cc: amd-gfx@lists.freedesktop.org Fixes: b3fb79cda568 ("drm/amdgpu: add mutex to protect ras shared memory") Acked-by: Christian König Signed-off-by: Bart Van Assche Signed-off-by: Alex Deucher (cherry picked from commit 6fa01b4335978051d2cd80841728fd63cc597970) --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 6e8aad91bcd3..0d3c18f04ac3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -332,13 +332,13 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size if (!context || !context->initialized) { dev_err(adev->dev, "TA is not initialized\n"); ret = -EINVAL; - goto err_free_shared_buf; + goto free_shared_buf; } if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) { dev_err(adev->dev, "Unsupported function to invoke TA\n"); ret = -EOPNOTSUPP; - goto err_free_shared_buf; + goto free_shared_buf; } context->session_id = ta_id; @@ -346,7 +346,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size mutex_lock(&psp->ras_context.mutex); ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len); if (ret) - goto err_free_shared_buf; + goto unlock; ret = psp_fn_ta_invoke(psp, cmd_id); if (ret || context->resp_status) { @@ -354,15 +354,17 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size ret, context->resp_status); if (!ret) { ret = -EINVAL; - goto err_free_shared_buf; + goto unlock; } } if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len)) ret = -EFAULT; -err_free_shared_buf: +unlock: mutex_unlock(&psp->ras_context.mutex); + +free_shared_buf: kfree(shared_buf); return ret; -- cgit v1.2.3 From a5fe1a54513196e4bc8f9170006057dc31e7155e Mon Sep 17 00:00:00 2001 From: sguttula Date: Sat, 21 Feb 2026 10:03:32 +0530 Subject: drm/amdgpu/vcn5: Add SMU dpm interface type This will set AMDGPU_VCN_SMU_DPM_INTERFACE_* smu_type based on soc type and fixing ring timeout issue seen for DPM enabled case. Signed-off-by: sguttula Reviewed-by: Pratik Vishwakarma Signed-off-by: Alex Deucher (cherry picked from commit f0f23c315b38c55e8ce9484cf59b65811f350630) --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 0202df5db1e1..6109124f852e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -174,6 +174,10 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->sq.is_enabled = 1; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); + fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? + AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; + if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); -- cgit v1.2.3 From b57c4ec98c17789136a4db948aec6daadceb5024 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 24 Feb 2026 10:18:51 +0530 Subject: drm/amdgpu: Fix error handling in slot reset If the device has not recovered after slot reset is called, it goes to out label for error handling. There it could make decision based on uninitialized hive pointer and could result in accessing an uninitialized list. Initialize the list and hive properly so that it handles the error situation and also releases the reset domain lock which is acquired during error_detected callback. Fixes: 732c6cefc1ec ("drm/amdgpu: Replace tmp_adev with hive in amdgpu_pci_slot_reset") Signed-off-by: Lijo Lazar Reviewed-by: Ce Sun Signed-off-by: Alex Deucher (cherry picked from commit bb71362182e59caa227e4192da5a612b09349696) --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d9789e0b5201..3e19b51a2763 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -7059,6 +7059,15 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) dev_info(adev->dev, "PCI error: slot reset callback!!\n"); memset(&reset_context, 0, sizeof(reset_context)); + INIT_LIST_HEAD(&device_list); + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + mutex_lock(&hive->hive_lock); + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + list_add_tail(&tmp_adev->reset_list, &device_list); + } else { + list_add_tail(&adev->reset_list, &device_list); + } if (adev->pcie_reset_ctx.swus) link_dev = adev->pcie_reset_ctx.swus; @@ -7099,19 +7108,13 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); - INIT_LIST_HEAD(&device_list); - hive = amdgpu_get_xgmi_hive(adev); if (hive) { - mutex_lock(&hive->hive_lock); reset_context.hive = hive; - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) tmp_adev->pcie_reset_ctx.in_link_reset = true; - list_add_tail(&tmp_adev->reset_list, &device_list); - } } else { set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); - list_add_tail(&adev->reset_list, &device_list); } r = amdgpu_device_asic_reset(adev, &device_list, &reset_context); -- cgit v1.2.3 From 6b0d812971370c64b837a2db4275410f478272fe Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 25 Feb 2026 10:51:16 -0600 Subject: drm/amd: Disable MES LR compute W/A A workaround was introduced in commit 1fb710793ce2 ("drm/amdgpu: Enable MES lr_compute_wa by default") to help with some hangs observed in gfx1151. This WA didn't fully fix the issue. It was actually fixed by adjusting the VGPR size to the correct value that matched the hardware in commit b42f3bf9536c ("drm/amdkfd: bump minimum vgpr size for gfx1151"). There are reports of instability on other products with newer GC microcode versions, and I believe they're caused by this workaround. As we don't need the workaround any more, remove it. Fixes: b42f3bf9536c ("drm/amdkfd: bump minimum vgpr size for gfx1151") Acked-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 9973e64bd6ee7642860a6f3b6958cbf14e89cabd) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 5 ----- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 5 ----- 2 files changed, 10 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 09ebb13ca5e8..a926a330700e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -720,11 +720,6 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; - if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f) - mes_set_hw_res_pkt.enable_lr_compute_wa = 1; - else - dev_info_once(mes->adev->dev, - "MES FW version must be >= 0x7f to enable LR compute workaround.\n"); if (amdgpu_mes_log_enable) { mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index b1c864dc79a8..5bfa5d1d0b36 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -779,11 +779,6 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; - if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82) - mes_set_hw_res_pkt.enable_lr_compute_wa = 1; - else - dev_info_once(adev->dev, - "MES FW version must be >= 0x82 to enable LR compute workaround.\n"); /* * Keep oversubscribe timer for sdma . When we have unmapped doorbell -- cgit v1.2.3 From 12133a483dfa832241fbbf09321109a0ea8a520e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 12:28:30 +0100 Subject: nfc: pn533: properly drop the usb interface reference on disconnect When the device is disconnected from the driver, there is a "dangling" reference count on the usb interface that was grabbed in the probe callback. Fix this up by properly dropping the reference after we are done with it. Cc: stable Signed-off-by: Greg Kroah-Hartman Reviewed-by: Simon Horman Fixes: c46ee38620a2 ("NFC: pn533: add NXP pn533 nfc device driver") Link: https://patch.msgid.link/2026022329-flashing-ought-7573@gregkh Signed-off-by: Jakub Kicinski --- drivers/nfc/pn533/usb.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 018a80674f06..0f12f86ebb02 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -628,6 +628,7 @@ static void pn533_usb_disconnect(struct usb_interface *interface) usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); kfree(phy->ack_buffer); + usb_put_dev(phy->udev); nfc_info(&interface->dev, "NXP PN533 NFC device disconnected\n"); } -- cgit v1.2.3 From 11de1d3ae5565ed22ef1f89d73d8f2d00322c699 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 13:58:48 +0100 Subject: net: usb: pegasus: validate USB endpoints The pegasus driver should validate that the device it is probing has the proper number and types of USB endpoints it is expecting before it binds to it. If a malicious device were to not have the same urbs the driver will crash later on when it blindly accesses these endpoints. Cc: Petko Manolov Cc: stable Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026022347-legibly-attest-cc5c@gregkh Signed-off-by: Jakub Kicinski --- drivers/net/usb/pegasus.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index b84968c5f074..c497202b78e8 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -812,8 +812,19 @@ static void unlink_all_urbs(pegasus_t *pegasus) static int alloc_urbs(pegasus_t *pegasus) { + static const u8 bulk_ep_addr[] = { + 1 | USB_DIR_IN, + 2 | USB_DIR_OUT, + 0}; + static const u8 int_ep_addr[] = { + 3 | USB_DIR_IN, + 0}; int res = -ENOMEM; + if (!usb_check_bulk_endpoints(pegasus->intf, bulk_ep_addr) || + !usb_check_int_endpoints(pegasus->intf, int_ep_addr)) + return -ENODEV; + pegasus->rx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!pegasus->rx_urb) { return res; @@ -1168,6 +1179,7 @@ static int pegasus_probe(struct usb_interface *intf, pegasus = netdev_priv(net); pegasus->dev_index = dev_index; + pegasus->intf = intf; res = alloc_urbs(pegasus); if (res < 0) { @@ -1179,7 +1191,6 @@ static int pegasus_probe(struct usb_interface *intf, INIT_DELAYED_WORK(&pegasus->carrier_check, check_carrier); - pegasus->intf = intf; pegasus->usb = dev; pegasus->net = net; -- cgit v1.2.3 From c58b6c29a4c9b8125e8ad3bca0637e00b71e2693 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 13:59:26 +0100 Subject: net: usb: kalmia: validate USB endpoints The kalmia driver should validate that the device it is probing has the proper number and types of USB endpoints it is expecting before it binds to it. If a malicious device were to not have the same urbs the driver will crash later on when it blindly accesses these endpoints. Cc: stable Signed-off-by: Greg Kroah-Hartman Reviewed-by: Simon Horman Fixes: d40261236e8e ("net/usb: Add Samsung Kalmia driver for Samsung GT-B3730") Link: https://patch.msgid.link/2026022326-shack-headstone-ef6f@gregkh Signed-off-by: Jakub Kicinski --- drivers/net/usb/kalmia.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c index 613fc6910f14..ee9c48f7f68f 100644 --- a/drivers/net/usb/kalmia.c +++ b/drivers/net/usb/kalmia.c @@ -132,11 +132,18 @@ kalmia_bind(struct usbnet *dev, struct usb_interface *intf) { int status; u8 ethernet_addr[ETH_ALEN]; + static const u8 ep_addr[] = { + 1 | USB_DIR_IN, + 2 | USB_DIR_OUT, + 0}; /* Don't bind to AT command interface */ if (intf->cur_altsetting->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) return -EINVAL; + if (!usb_check_bulk_endpoints(intf, ep_addr)) + return -ENODEV; + dev->in = usb_rcvbulkpipe(dev->udev, 0x81 & USB_ENDPOINT_NUMBER_MASK); dev->out = usb_sndbulkpipe(dev->udev, 0x02 & USB_ENDPOINT_NUMBER_MASK); dev->status = NULL; -- cgit v1.2.3 From 4b063c002ca759d1b299988ee23f564c9609c875 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Feb 2026 14:00:06 +0100 Subject: net: usb: kaweth: validate USB endpoints The kaweth driver should validate that the device it is probing has the proper number and types of USB endpoints it is expecting before it binds to it. If a malicious device were to not have the same urbs the driver will crash later on when it blindly accesses these endpoints. Cc: stable Signed-off-by: Greg Kroah-Hartman Reviewed-by: Simon Horman Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://patch.msgid.link/2026022305-substance-virtual-c728@gregkh Signed-off-by: Jakub Kicinski --- drivers/net/usb/kaweth.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers') diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index e01d14f6c366..cb2472b59e10 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -883,6 +883,13 @@ static int kaweth_probe( const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; int result = 0; int rv = -EIO; + static const u8 bulk_ep_addr[] = { + 1 | USB_DIR_IN, + 2 | USB_DIR_OUT, + 0}; + static const u8 int_ep_addr[] = { + 3 | USB_DIR_IN, + 0}; dev_dbg(dev, "Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n", @@ -896,6 +903,12 @@ static int kaweth_probe( (int)udev->descriptor.bLength, (int)udev->descriptor.bDescriptorType); + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || + !usb_check_int_endpoints(intf, int_ep_addr)) { + dev_err(dev, "couldn't find required endpoints\n"); + return -ENODEV; + } + netdev = alloc_etherdev(sizeof(*kaweth)); if (!netdev) return -ENOMEM; -- cgit v1.2.3 From 676c7af91fcd740d34e7cb788cbc58e3bcafde39 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 24 Feb 2026 19:04:04 +0800 Subject: dpll: zl3073x: Remove redundant cleanup in devm_dpll_init() The devm_add_action_or_reset() function already executes the cleanup action on failure before returning an error, so the explicit goto error and subsequent zl3073x_dev_dpll_fini() call causes double cleanup. Fixes: ebb1031c5137 ("dpll: zl3073x: Refactor DPLL initialization") Reviewed-by: Ivan Vecera Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260224-dpll-v2-1-d7786414a830@gmail.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/core.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 8c5ee28e02f4..37f3c33570ee 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -981,11 +981,7 @@ zl3073x_devm_dpll_init(struct zl3073x_dev *zldev, u8 num_dplls) } /* Add devres action to release DPLL related resources */ - rc = devm_add_action_or_reset(zldev->dev, zl3073x_dev_dpll_fini, zldev); - if (rc) - goto error; - - return 0; + return devm_add_action_or_reset(zldev->dev, zl3073x_dev_dpll_fini, zldev); error: zl3073x_dev_dpll_fini(zldev); -- cgit v1.2.3 From f975a0955276579e2176a134366ed586071c7c6a Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Tue, 24 Feb 2026 04:38:36 -0800 Subject: net: mana: Fix double destroy_workqueue on service rescan PCI path While testing corner cases in the driver, a use-after-free crash was found on the service rescan PCI path. When mana_serv_reset() calls mana_gd_suspend(), mana_gd_cleanup() destroys gc->service_wq. If the subsequent mana_gd_resume() fails with -ETIMEDOUT or -EPROTO, the code falls through to mana_serv_rescan() which triggers pci_stop_and_remove_bus_device(). This invokes the PCI .remove callback (mana_gd_remove), which calls mana_gd_cleanup() a second time, attempting to destroy the already- freed workqueue. Fix this by NULL-checking gc->service_wq in mana_gd_cleanup() and setting it to NULL after destruction. Call stack of issue for reference: [Sat Feb 21 18:53:48 2026] Call Trace: [Sat Feb 21 18:53:48 2026] [Sat Feb 21 18:53:48 2026] mana_gd_cleanup+0x33/0x70 [mana] [Sat Feb 21 18:53:48 2026] mana_gd_remove+0x3a/0xc0 [mana] [Sat Feb 21 18:53:48 2026] pci_device_remove+0x41/0xb0 [Sat Feb 21 18:53:48 2026] device_remove+0x46/0x70 [Sat Feb 21 18:53:48 2026] device_release_driver_internal+0x1e3/0x250 [Sat Feb 21 18:53:48 2026] device_release_driver+0x12/0x20 [Sat Feb 21 18:53:48 2026] pci_stop_bus_device+0x6a/0x90 [Sat Feb 21 18:53:48 2026] pci_stop_and_remove_bus_device+0x13/0x30 [Sat Feb 21 18:53:48 2026] mana_do_service+0x180/0x290 [mana] [Sat Feb 21 18:53:48 2026] mana_serv_func+0x24/0x50 [mana] [Sat Feb 21 18:53:48 2026] process_one_work+0x190/0x3d0 [Sat Feb 21 18:53:48 2026] worker_thread+0x16e/0x2e0 [Sat Feb 21 18:53:48 2026] kthread+0xf7/0x130 [Sat Feb 21 18:53:48 2026] ? __pfx_worker_thread+0x10/0x10 [Sat Feb 21 18:53:48 2026] ? __pfx_kthread+0x10/0x10 [Sat Feb 21 18:53:48 2026] ret_from_fork+0x269/0x350 [Sat Feb 21 18:53:48 2026] ? __pfx_kthread+0x10/0x10 [Sat Feb 21 18:53:48 2026] ret_from_fork_asm+0x1a/0x30 [Sat Feb 21 18:53:48 2026] Fixes: 505cc26bcae0 ("net: mana: Add support for auxiliary device servicing events") Reviewed-by: Haiyang Zhang Signed-off-by: Dipayaan Roy Reviewed-by: Simon Horman Link: https://patch.msgid.link/aZ2bzL64NagfyHpg@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 5 ++++- drivers/net/ethernet/microsoft/mana/mana_en.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 0055c231acf6..3926d18f1840 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1946,7 +1946,10 @@ static void mana_gd_cleanup(struct pci_dev *pdev) mana_gd_remove_irqs(pdev); - destroy_workqueue(gc->service_wq); + if (gc->service_wq) { + destroy_workqueue(gc->service_wq); + gc->service_wq = NULL; + } dev_dbg(&pdev->dev, "mana gdma cleanup successful\n"); } diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9b5a72ada5c4..f69e42651359 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3762,7 +3762,9 @@ void mana_rdma_remove(struct gdma_dev *gd) } WRITE_ONCE(gd->rdma_teardown, true); - flush_workqueue(gc->service_wq); + + if (gc->service_wq) + flush_workqueue(gc->service_wq); if (gd->adev) remove_adev(gd); -- cgit v1.2.3 From bb4c698633c0e19717586a6524a33196cff01a32 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 24 Feb 2026 14:57:08 +0200 Subject: team: avoid NETDEV_CHANGEMTU event when unregistering slave syzbot is reporting unregister_netdevice: waiting for netdevsim0 to become free. Usage count = 3 ref_tracker: netdev@ffff88807dcf8618 has 1/2 users at __netdev_tracker_alloc include/linux/netdevice.h:4400 [inline] netdev_hold include/linux/netdevice.h:4429 [inline] inetdev_init+0x201/0x4e0 net/ipv4/devinet.c:286 inetdev_event+0x251/0x1610 net/ipv4/devinet.c:1600 notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85 call_netdevice_notifiers_mtu net/core/dev.c:2318 [inline] netif_set_mtu_ext+0x5aa/0x800 net/core/dev.c:9886 netif_set_mtu+0xd7/0x1b0 net/core/dev.c:9907 dev_set_mtu+0x126/0x260 net/core/dev_api.c:248 team_port_del+0xb07/0xcb0 drivers/net/team/team_core.c:1333 team_del_slave drivers/net/team/team_core.c:1936 [inline] team_device_event+0x207/0x5b0 drivers/net/team/team_core.c:2929 notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2281 [inline] call_netdevice_notifiers net/core/dev.c:2295 [inline] __dev_change_net_namespace+0xcb7/0x2050 net/core/dev.c:12592 do_setlink+0x2ce/0x4590 net/core/rtnetlink.c:3060 rtnl_changelink net/core/rtnetlink.c:3776 [inline] __rtnl_newlink net/core/rtnetlink.c:3935 [inline] rtnl_newlink+0x15a9/0x1be0 net/core/rtnetlink.c:4072 rtnetlink_rcv_msg+0x7d5/0xbe0 net/core/rtnetlink.c:6958 netlink_rcv_skb+0x232/0x4b0 net/netlink/af_netlink.c:2550 netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline] netlink_unicast+0x80f/0x9b0 net/netlink/af_netlink.c:1344 netlink_sendmsg+0x813/0xb40 net/netlink/af_netlink.c:1894 problem. Ido Schimmel found steps to reproduce ip link add name team1 type team ip link add name dummy1 mtu 1499 master team1 type dummy ip netns add ns1 ip link set dev dummy1 netns ns1 ip -n ns1 link del dev dummy1 and also found that the same issue was fixed in the bond driver in commit f51048c3e07b ("bonding: avoid NETDEV_CHANGEMTU event when unregistering slave"). Let's do similar thing for the team driver, with commit ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") and commit 303a8487a657 ("net: s/__dev_set_mtu/__netif_set_mtu/") also applied. Reported-by: syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=881d65229ca4f9ae8c84 Suggested-by: Ido Schimmel Reviewed-by: Jiri Pirko Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Signed-off-by: Tetsuo Handa Signed-off-by: Ido Schimmel Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260224125709.317574-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/team/team_core.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index c08a5c1bd6e4..a0fe998cc055 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -1292,7 +1292,7 @@ err_set_mtu: static void __team_port_change_port_removed(struct team_port *port); -static int team_port_del(struct team *team, struct net_device *port_dev) +static int team_port_del(struct team *team, struct net_device *port_dev, bool unregister) { struct net_device *dev = team->dev; struct team_port *port; @@ -1330,7 +1330,13 @@ static int team_port_del(struct team *team, struct net_device *port_dev) __team_port_change_port_removed(port); team_port_set_orig_dev_addr(port); - dev_set_mtu(port_dev, port->orig.mtu); + if (unregister) { + netdev_lock_ops(port_dev); + __netif_set_mtu(port_dev, port->orig.mtu); + netdev_unlock_ops(port_dev); + } else { + dev_set_mtu(port_dev, port->orig.mtu); + } kfree_rcu(port, rcu); netdev_info(dev, "Port device %s removed\n", portname); netdev_compute_master_upper_features(team->dev, true); @@ -1634,7 +1640,7 @@ static void team_uninit(struct net_device *dev) ASSERT_RTNL(); list_for_each_entry_safe(port, tmp, &team->port_list, list) - team_port_del(team, port->dev); + team_port_del(team, port->dev, false); __team_change_mode(team, NULL); /* cleanup */ __team_options_unregister(team, team_options, ARRAY_SIZE(team_options)); @@ -1933,7 +1939,16 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev) ASSERT_RTNL(); - return team_port_del(team, port_dev); + return team_port_del(team, port_dev, false); +} + +static int team_del_slave_on_unregister(struct net_device *dev, struct net_device *port_dev) +{ + struct team *team = netdev_priv(dev); + + ASSERT_RTNL(); + + return team_port_del(team, port_dev, true); } static netdev_features_t team_fix_features(struct net_device *dev, @@ -2926,7 +2941,7 @@ static int team_device_event(struct notifier_block *unused, !!netif_oper_up(port->dev)); break; case NETDEV_UNREGISTER: - team_del_slave(port->team->dev, dev); + team_del_slave_on_unregister(port->team->dev, dev); break; case NETDEV_FEAT_CHANGE: if (!port->team->notifier_ctx) { @@ -2999,3 +3014,4 @@ MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Jiri Pirko "); MODULE_DESCRIPTION("Ethernet team device driver"); MODULE_ALIAS_RTNL_LINK(DRV_NAME); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); -- cgit v1.2.3 From 2700b7e603af39ca55fe9fc876ca123efd44680f Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 24 Feb 2026 13:46:48 +0200 Subject: net/mlx5: DR, Fix circular locking dependency in dump Fix a circular locking dependency between dbg_mutex and the domain rx/tx mutexes that could lead to a deadlock. The dump path in dr_dump_domain_all() was acquiring locks in the order: dbg_mutex -> rx.mutex -> tx.mutex While the table/matcher creation paths acquire locks in the order: rx.mutex -> tx.mutex -> dbg_mutex This inverted lock ordering creates a circular dependency. Fix this by changing dr_dump_domain_all() to acquire the domain lock before dbg_mutex, matching the order used in mlx5dr_table_create() and mlx5dr_matcher_create(). Lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 6.19.0-rc6net_next_e817c4e #1 Not tainted ------------------------------------------------------ sos/30721 is trying to acquire lock: ffff888102df5900 (&dmn->info.rx.mutex){+.+.}-{4:4}, at: dr_dump_start+0x131/0x450 [mlx5_core] but task is already holding lock: ffff888102df5bc0 (&dmn->dump_info.dbg_mutex){+.+.}-{4:4}, at: dr_dump_start+0x10b/0x450 [mlx5_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&dmn->dump_info.dbg_mutex){+.+.}-{4:4}: __mutex_lock+0x91/0x1060 mlx5dr_matcher_create+0x377/0x5e0 [mlx5_core] mlx5_cmd_dr_create_flow_group+0x62/0xd0 [mlx5_core] mlx5_create_flow_group+0x113/0x1c0 [mlx5_core] mlx5_chains_create_prio+0x453/0x2290 [mlx5_core] mlx5_chains_get_table+0x2e2/0x980 [mlx5_core] esw_chains_create+0x1e6/0x3b0 [mlx5_core] esw_create_offloads_fdb_tables.cold+0x62/0x63f [mlx5_core] esw_offloads_enable+0x76f/0xd20 [mlx5_core] mlx5_eswitch_enable_locked+0x35a/0x500 [mlx5_core] mlx5_devlink_eswitch_mode_set+0x561/0x950 [mlx5_core] devlink_nl_eswitch_set_doit+0x67/0xe0 genl_family_rcv_msg_doit+0xe0/0x130 genl_rcv_msg+0x188/0x290 netlink_rcv_skb+0x4b/0xf0 genl_rcv+0x24/0x40 netlink_unicast+0x1ed/0x2c0 netlink_sendmsg+0x210/0x450 __sock_sendmsg+0x38/0x60 __sys_sendto+0x119/0x180 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x70/0xd00 entry_SYSCALL_64_after_hwframe+0x4b/0x53 -> #1 (&dmn->info.tx.mutex){+.+.}-{4:4}: __mutex_lock+0x91/0x1060 mlx5dr_table_create+0x11d/0x530 [mlx5_core] mlx5_cmd_dr_create_flow_table+0x62/0x140 [mlx5_core] __mlx5_create_flow_table+0x46f/0x960 [mlx5_core] mlx5_create_flow_table+0x16/0x20 [mlx5_core] esw_create_offloads_fdb_tables+0x136/0x240 [mlx5_core] esw_offloads_enable+0x76f/0xd20 [mlx5_core] mlx5_eswitch_enable_locked+0x35a/0x500 [mlx5_core] mlx5_devlink_eswitch_mode_set+0x561/0x950 [mlx5_core] devlink_nl_eswitch_set_doit+0x67/0xe0 genl_family_rcv_msg_doit+0xe0/0x130 genl_rcv_msg+0x188/0x290 netlink_rcv_skb+0x4b/0xf0 genl_rcv+0x24/0x40 netlink_unicast+0x1ed/0x2c0 netlink_sendmsg+0x210/0x450 __sock_sendmsg+0x38/0x60 __sys_sendto+0x119/0x180 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x70/0xd00 entry_SYSCALL_64_after_hwframe+0x4b/0x53 -> #0 (&dmn->info.rx.mutex){+.+.}-{4:4}: __lock_acquire+0x18b6/0x2eb0 lock_acquire+0xd3/0x2c0 __mutex_lock+0x91/0x1060 dr_dump_start+0x131/0x450 [mlx5_core] seq_read_iter+0xe3/0x410 seq_read+0xfb/0x130 full_proxy_read+0x53/0x80 vfs_read+0xba/0x330 ksys_read+0x65/0xe0 do_syscall_64+0x70/0xd00 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&dmn->dump_info.dbg_mutex); lock(&dmn->info.tx.mutex); lock(&dmn->dump_info.dbg_mutex); lock(&dmn->info.rx.mutex); *** DEADLOCK *** Fixes: 9222f0b27da2 ("net/mlx5: DR, Add support for dumping steering info") Signed-off-by: Shay Drory Reviewed-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224114652.1787431-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c index 8803fa071c50..18362e9c3314 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c @@ -1051,8 +1051,8 @@ static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn) struct mlx5dr_table *tbl; int ret; - mutex_lock(&dmn->dump_info.dbg_mutex); mlx5dr_domain_lock(dmn); + mutex_lock(&dmn->dump_info.dbg_mutex); ret = dr_dump_domain(file, dmn); if (ret < 0) @@ -1065,8 +1065,8 @@ static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn) } unlock_mutex: - mlx5dr_domain_unlock(dmn); mutex_unlock(&dmn->dump_info.dbg_mutex); + mlx5dr_domain_unlock(dmn); return ret; } -- cgit v1.2.3 From bd7b9f83fb9f85228c3ac9748d9cba9fab7fb5a2 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 24 Feb 2026 13:46:49 +0200 Subject: net/mlx5: LAG, disable MPESW in lag_disable_change() mlx5_lag_disable_change() unconditionally called mlx5_disable_lag() when LAG was active, which is incorrect for MLX5_LAG_MODE_MPESW. Hnece, call mlx5_disable_mpesw() when running in MPESW mode. Fixes: a32327a3a02c ("net/mlx5: Lag, Control MultiPort E-Switch single FDB mode") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224114652.1787431-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 8 ++++++-- drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 8 ++++---- drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h | 5 +++++ 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 9fe47c836ebd..859f042caf79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1869,8 +1869,12 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev) mutex_lock(&ldev->lock); ldev->mode_changes_in_progress++; - if (__mlx5_lag_is_active(ldev)) - mlx5_disable_lag(ldev); + if (__mlx5_lag_is_active(ldev)) { + if (ldev->mode == MLX5_LAG_MODE_MPESW) + mlx5_lag_disable_mpesw(ldev); + else + mlx5_disable_lag(ldev); + } mutex_unlock(&ldev->lock); mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 04762562d7d9..a63d48d18878 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -65,7 +65,7 @@ err_metadata: return err; } -static int enable_mpesw(struct mlx5_lag *ldev) +static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0; int err; @@ -126,7 +126,7 @@ err_add_devices: return err; } -static void disable_mpesw(struct mlx5_lag *ldev) +void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev) { if (ldev->mode == MLX5_LAG_MODE_MPESW) { mlx5_mpesw_metadata_cleanup(ldev); @@ -152,9 +152,9 @@ static void mlx5_mpesw_work(struct work_struct *work) } if (mpesww->op == MLX5_MPESW_OP_ENABLE) - mpesww->result = enable_mpesw(ldev); + mpesww->result = mlx5_lag_enable_mpesw(ldev); else if (mpesww->op == MLX5_MPESW_OP_DISABLE) - disable_mpesw(ldev); + mlx5_lag_disable_mpesw(ldev); unlock: mutex_unlock(&ldev->lock); mlx5_devcom_comp_unlock(devcom); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h index f5d9b5c97b0d..b767dbb4f457 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -31,6 +31,11 @@ int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev, bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev); int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev); +#ifdef CONFIG_MLX5_ESWITCH +void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev); +#else +static inline void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev) {} +#endif /* CONFIG_MLX5_ESWITCH */ #ifdef CONFIG_MLX5_ESWITCH void mlx5_mpesw_speed_update_work(struct work_struct *work); -- cgit v1.2.3 From d7073e8b978ae925f1f0f08754f33f84d8547ea7 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 24 Feb 2026 13:46:50 +0200 Subject: net/mlx5: E-switch, Clear legacy flag when moving to switchdev The cited commit introduced MLX5_PRIV_FLAGS_SWITCH_LEGACY to identify when a transition to legacy mode is requested via devlink. However, the logic failed to clear this flag if the mode was subsequently changed back to MLX5_ESWITCH_OFFLOADS (switchdev). Consequently, if a user toggled from legacy to switchdev, the flag remained set, leaving the driver with wrong state indicating Fix this by explicitly clearing the MLX5_PRIV_FLAGS_SWITCH_LEGACY bit when the requested mode is MLX5_ESWITCH_OFFLOADS. Fixes: 2a4f56fbcc47 ("net/mlx5e: Keep netdev when leave switchdev for devlink set legacy only") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224114652.1787431-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1b439cef3719..9d51d030596c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4068,6 +4068,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (mlx5_mode == MLX5_ESWITCH_LEGACY) esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY; + if (mlx5_mode == MLX5_ESWITCH_OFFLOADS) + esw->dev->priv.flags &= ~MLX5_PRIV_FLAGS_SWITCH_LEGACY; mlx5_eswitch_disable_locked(esw); if (mlx5_mode == MLX5_ESWITCH_OFFLOADS) { if (mlx5_devlink_trap_get_num_active(esw->dev)) { -- cgit v1.2.3 From 60253042c0b87b61596368489c44d12ba720d11c Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 24 Feb 2026 13:46:51 +0200 Subject: net/mlx5: Fix missing devlink lock in SRIOV enable error path The cited commit miss to add locking in the error path of mlx5_sriov_enable(). When pci_enable_sriov() fails, mlx5_device_disable_sriov() is called to clean up. This cleanup function now expects to be called with the devlink instance lock held. Add the missing devl_lock(devlink) and devl_unlock(devlink) Fixes: 84a433a40d0e ("net/mlx5: Lock mlx5 devlink reload callbacks") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224114652.1787431-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a2fc937d5461..172862a70c70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -193,7 +193,9 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) { mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); + devl_lock(devlink); mlx5_device_disable_sriov(dev, num_vfs, true, true); + devl_unlock(devlink); } return err; } -- cgit v1.2.3 From 859380694f434597407632c29f30fdb5e763e6cc Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 24 Feb 2026 13:46:52 +0200 Subject: net/mlx5e: Fix "scheduling while atomic" in IPsec MAC address query Fix a "scheduling while atomic" bug in mlx5e_ipsec_init_macs() by replacing mlx5_query_mac_address() with ether_addr_copy() to get the local MAC address directly from netdev->dev_addr. The issue occurs because mlx5_query_mac_address() queries the hardware which involves mlx5_cmd_exec() that can sleep, but it is called from the mlx5e_ipsec_handle_event workqueue which runs in atomic context. The MAC address is already available in netdev->dev_addr, so no need to query hardware. This avoids the sleeping call and resolves the bug. Call trace: BUG: scheduling while atomic: kworker/u112:2/69344/0x00000200 __schedule+0x7ab/0xa20 schedule+0x1c/0xb0 schedule_timeout+0x6e/0xf0 __wait_for_common+0x91/0x1b0 cmd_exec+0xa85/0xff0 [mlx5_core] mlx5_cmd_exec+0x1f/0x50 [mlx5_core] mlx5_query_nic_vport_mac_address+0x7b/0xd0 [mlx5_core] mlx5_query_mac_address+0x19/0x30 [mlx5_core] mlx5e_ipsec_init_macs+0xc1/0x720 [mlx5_core] mlx5e_ipsec_build_accel_xfrm_attrs+0x422/0x670 [mlx5_core] mlx5e_ipsec_handle_event+0x2b9/0x460 [mlx5_core] process_one_work+0x178/0x2e0 worker_thread+0x2ea/0x430 Fixes: cee137a63431 ("net/mlx5e: Handle ESN update events") Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224114652.1787431-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 9c7064187ed0..f03507a522b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -259,7 +259,6 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry, static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { - struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); struct mlx5e_ipsec_addr *addrs = &attrs->addrs; struct net_device *netdev = sa_entry->dev; struct xfrm_state *x = sa_entry->x; @@ -276,7 +275,7 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->type != XFRM_DEV_OFFLOAD_PACKET) return; - mlx5_query_mac_address(mdev, addr); + ether_addr_copy(addr, netdev->dev_addr); switch (attrs->dir) { case XFRM_DEV_OFFLOAD_IN: src = attrs->dmac; -- cgit v1.2.3 From 7c2889af823340d1d410939b9d547bf184d5fa54 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 25 Feb 2026 15:48:59 +0200 Subject: RDMA/uverbs: Import DMA-BUF module in uverbs_std_types_dmabuf file Fix the following compilation error: ERROR: modpost: module ib_uverbs uses symbol dma_buf_move_notify from namespace DMA_BUF, but does not import it. Fixes: 0ac6f4056c4a ("RDMA/uverbs: Add DMABUF object type and operations") Link: https://patch.msgid.link/20260225-fix-uverbs-compilation-v1-1-acf7b3d0f9fa@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_std_types_dmabuf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs_std_types_dmabuf.c b/drivers/infiniband/core/uverbs_std_types_dmabuf.c index dfdfcd1d1a44..4a7f8b6f9dc8 100644 --- a/drivers/infiniband/core/uverbs_std_types_dmabuf.c +++ b/drivers/infiniband/core/uverbs_std_types_dmabuf.c @@ -10,6 +10,8 @@ #include "rdma_core.h" #include "uverbs.h" +MODULE_IMPORT_NS("DMA_BUF"); + static int uverbs_dmabuf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attachment) { -- cgit v1.2.3 From 8a5752c6dcc085a3bfc78589925182e4e98468c5 Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Tue, 24 Feb 2026 19:05:56 +0800 Subject: dpaa2-switch: validate num_ifs to prevent out-of-bounds write The driver obtains sw_attr.num_ifs from firmware via dpsw_get_attributes() but never validates it against DPSW_MAX_IF (64). This value controls iteration in dpaa2_switch_fdb_get_flood_cfg(), which writes port indices into the fixed-size cfg->if_id[DPSW_MAX_IF] array. When firmware reports num_ifs >= 64, the loop can write past the array bounds. Add a bound check for num_ifs in dpaa2_switch_init(). dpaa2_switch_fdb_get_flood_cfg() appends the control interface (port num_ifs) after all matched ports. When num_ifs == DPSW_MAX_IF and all ports match the flood filter, the loop fills all 64 slots and the control interface write overflows by one entry. The check uses >= because num_ifs == DPSW_MAX_IF is also functionally broken. build_if_id_bitmap() silently drops any ID >= 64: if (id[i] < DPSW_MAX_IF) bmap[id[i] / 64] |= ... Fixes: 539dda3c5d19 ("staging: dpaa2-switch: properly setup switching domains") Signed-off-by: Junrui Luo Reviewed-by: Ioana Ciornei Link: https://patch.msgid.link/SYBPR01MB78812B47B7F0470B617C408AAF74A@SYBPR01MB7881.ausprd01.prod.outlook.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 66240c340492..78e21b46a5ba 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -3034,6 +3034,13 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev) goto err_close; } + if (ethsw->sw_attr.num_ifs >= DPSW_MAX_IF) { + dev_err(dev, "DPSW num_ifs %u exceeds max %u\n", + ethsw->sw_attr.num_ifs, DPSW_MAX_IF); + err = -EINVAL; + goto err_close; + } + err = dpsw_get_api_version(ethsw->mc_io, 0, ðsw->major, ðsw->minor); -- cgit v1.2.3 From e96493229a6399e902062213c6381162464cdd50 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Tue, 24 Feb 2026 16:09:22 +0100 Subject: spi: stm32: fix missing pointer assignment in case of dma chaining Commit c4f2c05ab029 ("spi: stm32: fix pointer-to-pointer variables usage") introduced a regression since dma descriptors generated as part of the stm32_spi_prepare_rx_dma_mdma_chaining function are not well propagated to the caller function, leading to mdma-dma chaining being no more functional. Fixes: c4f2c05ab029 ("spi: stm32: fix pointer-to-pointer variables usage") Signed-off-by: Alain Volmat Acked-by: Antonio Quartulli Link: https://patch.msgid.link/20260224-spi-stm32-chaining-fix-v1-1-5da7a4851b66@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index b99de8c4cc99..33f211e159ef 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -1625,6 +1625,9 @@ static int stm32_spi_prepare_rx_dma_mdma_chaining(struct stm32_spi *spi, return -EINVAL; } + *rx_mdma_desc = _mdma_desc; + *rx_dma_desc = _dma_desc; + return 0; } -- cgit v1.2.3 From 9197e5949a41cfb5d44a6b8a860766266340d558 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sat, 28 Feb 2026 11:56:03 +0900 Subject: firewire: ohci: initialize page array to use alloc_pages_bulk() correctly The call of alloc_pages_bulk() skips to fill entries of page array when the entries already have values. While, 1394 OHCI PCI driver passes the page array without initializing. It could cause invalid state at PFN validation in vmap(). Fixes: f2ae92780ab9 ("firewire: ohci: split page allocation from dma mapping") Reported-by: John Ogness Reported-and-tested-by: Harald Arnesen Reported-and-tested-by: David Gow Closes: https://lore.kernel.org/lkml/87tsv1vig5.fsf@jogness.linutronix.de/ Signed-off-by: Takashi Sakamoto Signed-off-by: Linus Torvalds --- drivers/firewire/ohci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 1c868c1e4a49..8153d62c58f0 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -848,7 +848,7 @@ static int ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci, { struct device *dev = ohci->card.device; unsigned int i; - struct page *pages[AR_BUFFERS + AR_WRAPAROUND_PAGES]; + struct page *pages[AR_BUFFERS + AR_WRAPAROUND_PAGES] = { NULL }; dma_addr_t dma_addrs[AR_BUFFERS]; void *vaddr; struct descriptor *d; -- cgit v1.2.3 From 23942b71f07cc99e39d9216a5b370df494759d8c Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Mon, 23 Feb 2026 02:24:34 +0800 Subject: regulator: mt6363: Fix incorrect and redundant IRQ disposal in probe In mt6363_regulator_probe(), devm_add_action_or_reset() is used to automatically dispose of the IRQ mapping if the probe fails or the device is removed. The manual call to irq_dispose_mapping() in the error path was redundant as the reset action already triggers mt6363_irq_remove(). Furthermore, the manual call incorrectly passed the hardware IRQ number (info->hwirq) instead of the virtual IRQ mapping (info->virq). Remove the redundant and incorrect manual disposal. Fixes: 3c36965df808 ("regulator: Add support for MediaTek MT6363 SPMI PMIC Regulators") Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260223-mt6363-v1-1-c99a2e8ac621@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/mt6363-regulator.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/regulator/mt6363-regulator.c b/drivers/regulator/mt6363-regulator.c index 03af5fa53600..0aebcbda0a19 100644 --- a/drivers/regulator/mt6363-regulator.c +++ b/drivers/regulator/mt6363-regulator.c @@ -899,10 +899,8 @@ static int mt6363_regulator_probe(struct platform_device *pdev) "Failed to map IRQ%d\n", info->hwirq); ret = devm_add_action_or_reset(dev, mt6363_irq_remove, &info->virq); - if (ret) { - irq_dispose_mapping(info->hwirq); + if (ret) return ret; - } config.driver_data = info; INIT_DELAYED_WORK(&info->oc_work, mt6363_oc_irq_enable_work); -- cgit v1.2.3 From 2d85ecd6fb0eb2fee0ffa040ec1ddea57b09bc38 Mon Sep 17 00:00:00 2001 From: Franz Schnyder Date: Wed, 18 Feb 2026 11:25:14 +0100 Subject: regulator: pf9453: Respect IRQ trigger settings from firmware The datasheet specifies, that the IRQ_B pin is pulled low when any unmasked interrupt bit status is changed, and it is released high once the application processor reads the INT1 register. As it specifies a level-low behavior, it should not force a falling-edge interrupt. Remove the IRQF_TRIGGER_FALLING to not force the falling-edge interrupt and instead rely on the flag from the device tree. Fixes: 0959b6706325 ("regulator: pf9453: add PMIC PF9453 support") Cc: stable@vger.kernel.org Signed-off-by: Franz Schnyder Link: https://patch.msgid.link/20260218102518.238943-2-fra.schnyder@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/pf9453-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/regulator/pf9453-regulator.c b/drivers/regulator/pf9453-regulator.c index 779a6fdb0574..eed3055d1c1c 100644 --- a/drivers/regulator/pf9453-regulator.c +++ b/drivers/regulator/pf9453-regulator.c @@ -809,7 +809,7 @@ static int pf9453_i2c_probe(struct i2c_client *i2c) } ret = devm_request_threaded_irq(pf9453->dev, pf9453->irq, NULL, pf9453_irq_handler, - (IRQF_TRIGGER_FALLING | IRQF_ONESHOT), + IRQF_ONESHOT, "pf9453-irq", pf9453); if (ret) return dev_err_probe(pf9453->dev, ret, "Failed to request IRQ: %d\n", pf9453->irq); -- cgit v1.2.3 From e84141846decb77d2826e553318a608b256804e5 Mon Sep 17 00:00:00 2001 From: Franz Schnyder Date: Mon, 2 Mar 2026 17:53:56 +0100 Subject: regulator: pf9453: Allow shared IRQ The PF9453 datasheet specifies the IRQ_B pin as an open drain output with level-low behavior. This makes it capable to share the interrupt line. To allow shared interrupts, the driver must handle the case if the interrupt has been triggered by another device. Set IRQF_SHARED to be able to share the interrupt line. If the interrupt has not been triggered by the PMIC, return IRQ_NONE. Signed-off-by: Franz Schnyder Link: https://patch.msgid.link/20260302165357.1797803-3-fra.schnyder@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/pf9453-regulator.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/regulator/pf9453-regulator.c b/drivers/regulator/pf9453-regulator.c index eed3055d1c1c..07cbcd312653 100644 --- a/drivers/regulator/pf9453-regulator.c +++ b/drivers/regulator/pf9453-regulator.c @@ -732,6 +732,9 @@ static irqreturn_t pf9453_irq_handler(int irq, void *data) return IRQ_NONE; } + if (!status) + return IRQ_NONE; + if (status & IRQ_RSTB) dev_warn(pf9453->dev, "IRQ_RSTB interrupt.\n"); @@ -809,7 +812,7 @@ static int pf9453_i2c_probe(struct i2c_client *i2c) } ret = devm_request_threaded_irq(pf9453->dev, pf9453->irq, NULL, pf9453_irq_handler, - IRQF_ONESHOT, + (IRQF_ONESHOT | IRQF_SHARED), "pf9453-irq", pf9453); if (ret) return dev_err_probe(pf9453->dev, ret, "Failed to request IRQ: %d\n", pf9453->irq); -- cgit v1.2.3