From 4c46a471be12216347ba707f8eadadbf5d68e698 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 3 Jun 2025 16:38:53 +0530 Subject: firmware: arm_ffa: Fix the missing entry in struct ffa_indirect_msg_hdr As per the spec, one 32 bit reserved entry is missing here, add it. Signed-off-by: Viresh Kumar Fixes: 910cc1acc9b4 ("firmware: arm_ffa: Add support for passing UUID in FFA_MSG_SEND2") Reviewed-by: Bertrand Marquis Message-Id: <28a624fbf416975de4fbe08cfbf7c2db89cb630e.1748948911.git.viresh.kumar@linaro.org> Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 5bded24dc24f..e1634897e159 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -283,6 +283,7 @@ struct ffa_indirect_msg_hdr { u32 offset; u32 send_recv_id; u32 size; + u32 res1; uuid_t uuid; }; -- cgit v1.2.3 From cf0233491b3a15933234a26efd9ecbc1c0764674 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 5 Jun 2025 14:25:49 +0300 Subject: phy: use per-PHY lockdep keys If the PHY driver uses another PHY internally (e.g. in case of eUSB2, repeaters are represented as PHYs), then it would trigger the following lockdep splat because all PHYs use a single static lockdep key and thus lockdep can not identify whether there is a dependency or not and reports a false positive. Make PHY subsystem use dynamic lockdep keys, assigning each driver a separate key. This way lockdep can correctly identify dependency graph between mutexes. ============================================ WARNING: possible recursive locking detected 6.15.0-rc7-next-20250522-12896-g3932f283970c #3455 Not tainted -------------------------------------------- kworker/u51:0/78 is trying to acquire lock: ffff0008116554f0 (&phy->mutex){+.+.}-{4:4}, at: phy_init+0x4c/0x12c but task is already holding lock: ffff000813c10cf0 (&phy->mutex){+.+.}-{4:4}, at: phy_init+0x4c/0x12c other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&phy->mutex); lock(&phy->mutex); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by kworker/u51:0/78: #0: ffff000800010948 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work+0x18c/0x5ec #1: ffff80008036bdb0 (deferred_probe_work){+.+.}-{0:0}, at: process_one_work+0x1b4/0x5ec #2: ffff0008094ac8f8 (&dev->mutex){....}-{4:4}, at: __device_attach+0x38/0x188 #3: ffff000813c10cf0 (&phy->mutex){+.+.}-{4:4}, at: phy_init+0x4c/0x12c stack backtrace: CPU: 0 UID: 0 PID: 78 Comm: kworker/u51:0 Not tainted 6.15.0-rc7-next-20250522-12896-g3932f283970c #3455 PREEMPT Hardware name: Qualcomm CRD, BIOS 6.0.240904.BOOT.MXF.2.4-00528.1-HAMOA-1 09/ 4/2024 Workqueue: events_unbound deferred_probe_work_func Call trace: show_stack+0x18/0x24 (C) dump_stack_lvl+0x90/0xd0 dump_stack+0x18/0x24 print_deadlock_bug+0x258/0x348 __lock_acquire+0x10fc/0x1f84 lock_acquire+0x1c8/0x338 __mutex_lock+0xb8/0x59c mutex_lock_nested+0x24/0x30 phy_init+0x4c/0x12c snps_eusb2_hsphy_init+0x54/0x1a0 phy_init+0xe0/0x12c dwc3_core_init+0x450/0x10b4 dwc3_core_probe+0xce4/0x15fc dwc3_probe+0x64/0xb0 platform_probe+0x68/0xc4 really_probe+0xbc/0x298 __driver_probe_device+0x78/0x12c driver_probe_device+0x3c/0x160 __device_attach_driver+0xb8/0x138 bus_for_each_drv+0x84/0xe0 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x8c/0xc8 process_one_work+0x208/0x5ec worker_thread+0x1c0/0x368 kthread+0x14c/0x20c ret_from_fork+0x10/0x20 Fixes: 3584f6392f09 ("phy: qcom: phy-qcom-snps-eusb2: Add support for eUSB2 repeater") Fixes: e2463559ff1d ("phy: amlogic: Add Amlogic AXG PCIE PHY Driver") Reviewed-by: Neil Armstrong Reviewed-by: Abel Vesa Reported-by: Johan Hovold Link: https://lore.kernel.org/lkml/ZnpoAVGJMG4Zu-Jw@hovoldconsulting.com/ Reviewed-by: Johan Hovold Tested-by: Johan Hovold Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250605-phy-subinit-v3-1-1e1e849e10cd@oss.qualcomm.com Signed-off-by: Vinod Koul --- include/linux/phy/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 437769e061b7..13add0c2c407 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -154,6 +154,7 @@ struct phy_attrs { * @id: id of the phy device * @ops: function pointers for performing phy operations * @mutex: mutex to protect phy_ops + * @lockdep_key: lockdep information for this mutex * @init_count: used to protect when the PHY is used by multiple consumers * @power_count: used to protect when the PHY is used by multiple consumers * @attrs: used to specify PHY specific attributes @@ -165,6 +166,7 @@ struct phy { int id; const struct phy_ops *ops; struct mutex mutex; + struct lock_class_key lockdep_key; int init_count; int power_count; struct phy_attrs attrs; -- cgit v1.2.3 From d8010d4ba43e9f790925375a7de100604a5e2dba Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 11 Sep 2024 10:53:08 +0200 Subject: x86/bugs: Add a Transient Scheduler Attacks mitigation Add the required features detection glue to bugs.c et all in order to support the TSA mitigation. Co-developed-by: Kim Phillips Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Pawan Gupta --- include/linux/cpu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 96a3a0d6a60e..6378370a952f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -82,6 +82,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From 3b18405763c1ebb1efc15feef5563c9cdb2cc3a7 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 11 Jun 2025 14:14:15 +0300 Subject: usb: acpi: fix device link removal The device link to the USB4 host interface has to be removed manually since it's no longer auto removed. Fixes: 623dae3e7084 ("usb: acpi: fix boot hang due to early incorrect 'tunneled' USB3 device links") Cc: stable Signed-off-by: Heikki Krogerus Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20250611111415.2707865-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 1b2545b4363b..92c752f5446f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -614,6 +614,7 @@ struct usb3_lpm_parameters { * FIXME -- complete doc * @authenticated: Crypto authentication passed * @tunnel_mode: Connection native or tunneled over USB4 + * @usb4_link: device link to the USB4 host interface * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -724,6 +725,7 @@ struct usb_device { unsigned reset_resume:1; unsigned port_is_suspended:1; enum usb_link_tunnel_mode tunnel_mode; + struct device_link *usb4_link; int slot_id; struct usb2_lpm_parameters l1_params; -- cgit v1.2.3 From 4580dbef5ce0f95a4bd8ac2d007bc4fbf1539332 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 20 Jun 2025 13:28:08 -0400 Subject: KVM: TDX: Exit to userspace for SetupEventNotifyInterrupt Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 37891580d05d..7a4c35ff03fe 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -467,6 +467,10 @@ struct kvm_run { __u64 leaf; __u64 r11, r12, r13, r14; } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; }; } tdx; /* Fix the size of the union. */ -- cgit v1.2.3 From 64f7548aad63d2fbca2eeb6eb33361c218ebd5a5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Jun 2025 21:19:40 +0200 Subject: lib/crypto: sha256: Mark sha256_choose_blocks as __always_inline When the compiler chooses to not inline sha256_choose_blocks() in the purgatory code, it fails to link against the missing CPU specific version: x86_64-linux-ld: arch/x86/purgatory/purgatory.ro: in function `sha256_choose_blocks.part.0': sha256.c:(.text+0x6a6): undefined reference to `irq_fpu_usable' sha256.c:(.text+0x6c7): undefined reference to `sha256_blocks_arch' sha256.c:(.text+0x6cc): undefined reference to `sha256_blocks_simd' Mark this function as __always_inline to prevent this, same as sha256_finup(). Fixes: 5b90a779bc54 ("crypto: lib/sha256 - Add helpers for block-based shash") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250620191952.1867578-1-arnd@kernel.org Signed-off-by: Eric Biggers --- include/crypto/internal/sha2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h index b9bccd3ff57f..21a27fd5e198 100644 --- a/include/crypto/internal/sha2.h +++ b/include/crypto/internal/sha2.h @@ -25,7 +25,7 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks); -static inline void sha256_choose_blocks( +static __always_inline void sha256_choose_blocks( u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks, bool force_generic, bool force_simd) { -- cgit v1.2.3 From cbe4134ea4bc493239786220bd69cb8a13493190 Mon Sep 17 00:00:00 2001 From: Shivank Garg Date: Fri, 20 Jun 2025 07:03:30 +0000 Subject: fs: export anon_inode_make_secure_inode() and fix secretmem LSM bypass Export anon_inode_make_secure_inode() to allow KVM guest_memfd to create anonymous inodes with proper security context. This replaces the current pattern of calling alloc_anon_inode() followed by inode_init_security_anon() for creating security context manually. This change also fixes a security regression in secretmem where the S_PRIVATE flag was not cleared after alloc_anon_inode(), causing LSM/SELinux checks to be bypassed for secretmem file descriptors. As guest_memfd currently resides in the KVM module, we need to export this symbol for use outside the core kernel. In the future, guest_memfd might be moved to core-mm, at which point the symbols no longer would have to be exported. When/if that happens is still unclear. Fixes: 2bfe15c52612 ("mm: create security context for memfd_secret inodes") Suggested-by: David Hildenbrand Suggested-by: Mike Rapoport Signed-off-by: Shivank Garg Link: https://lore.kernel.org/20250620070328.803704-3-shivankg@amd.com Acked-by: "Mike Rapoport (Microsoft)" Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4ec77da65f14..3a8e643c4279 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3606,6 +3606,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; -- cgit v1.2.3 From a24cc6ce1933eade12aa2b9859de0fcd2dac2c06 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 23 Jun 2025 10:34:08 +0200 Subject: futex: Initialize futex_phash_new during fork(). During a hash resize operation the new private hash is stored in mm_struct::futex_phash_new if the current hash can not be immediately replaced. The new hash must not be copied during fork() into the new task. Doing so will lead to a double-free of the memory by the two tasks. Initialize the mm_struct::futex_phash_new during fork(). Closes: https://lore.kernel.org/all/aFBQ8CBKmRzEqIfS@mozart.vkv.me/ Fixes: bd54df5ea7cad ("futex: Allow to resize the private local hash") Reported-by: Calvin Owens Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Tested-by: Calvin Owens Link: https://lkml.kernel.org/r/20250623083408.jTiJiC6_@linutronix.de --- include/linux/futex.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 005b040c4791..b37193653e6b 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -89,6 +89,7 @@ void futex_hash_free(struct mm_struct *mm); static inline void futex_mm_init(struct mm_struct *mm) { RCU_INIT_POINTER(mm->futex_phash, NULL); + mm->futex_phash_new = NULL; mutex_init(&mm->futex_hash_lock); } -- cgit v1.2.3 From af4db5a35a4ef7a68046883bfd12468007db38f1 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Wed, 18 Jun 2025 22:49:42 +0000 Subject: usb: typec: altmodes/displayport: do not index invalid pin_assignments A poorly implemented DisplayPort Alt Mode port partner can indicate that its pin assignment capabilities are greater than the maximum value, DP_PIN_ASSIGN_F. In this case, calls to pin_assignment_show will cause a BRK exception due to an out of bounds array access. Prevent for loop in pin_assignment_show from accessing invalid values in pin_assignments by adding DP_PIN_ASSIGN_MAX value in typec_dp.h and using i < DP_PIN_ASSIGN_MAX as a loop condition. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable Signed-off-by: RD Babiera Reviewed-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250618224943.3263103-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_dp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index f2da264d9c14..acb0ad03bdac 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -57,6 +57,7 @@ enum { DP_PIN_ASSIGN_D, DP_PIN_ASSIGN_E, DP_PIN_ASSIGN_F, /* Not supported after v1.0b */ + DP_PIN_ASSIGN_MAX, }; /* DisplayPort alt mode specific commands */ -- cgit v1.2.3 From 67caa528ae08cd05e485c0ea6aea0baaf6579b06 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Sat, 21 Jun 2025 10:28:41 -0600 Subject: ublk: fix narrowing warnings in UAPI header When a C++ file compiled with -Wc++11-narrowing includes the UAPI header linux/ublk_cmd.h, ublk_sqe_addr_to_auto_buf_reg()'s assignments of u64 values to u8, u16, and u32 fields result in compiler warnings. Add explicit casts to the intended types to avoid these warnings. Drop the unnecessary bitmasks. Reported-by: Uday Shankar Signed-off-by: Caleb Sander Mateos Fixes: 99c1e4eb6a3f ("ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG") Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250621162842.337452-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 77d9d6af46da..c062109cb686 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -450,10 +450,10 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( __u64 sqe_addr) { struct ublk_auto_buf_reg reg = { - .index = sqe_addr & 0xffff, - .flags = (sqe_addr >> 16) & 0xff, - .reserved0 = (sqe_addr >> 24) & 0xff, - .reserved1 = sqe_addr >> 32, + .index = (__u16)sqe_addr, + .flags = (__u8)(sqe_addr >> 16), + .reserved0 = (__u8)(sqe_addr >> 24), + .reserved1 = (__u32)(sqe_addr >> 32), }; return reg; -- cgit v1.2.3 From 81b4d1a1d03301dcca8af5c58eded9e535f1f6ed Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Sat, 21 Jun 2025 11:10:14 -0600 Subject: ublk: update UBLK_F_SUPPORT_ZERO_COPY comment in UAPI header UBLK_F_SUPPORT_ZERO_COPY has a very old comment describing the initial idea for how zero-copy would be implemented. The actual implementation added in commit 1f6540e2aabb ("ublk: zc register/unregister bvec") uses io_uring registered buffers rather than shared memory mapping. Remove the inaccurate remarks about mapping ublk request memory into the ublk server's address space and requiring 4K block size. Replace them with a description of the current zero-copy mechanism. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250621171015.354932-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index c062109cb686..c9751bdfd937 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -135,8 +135,28 @@ #define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) /* - * zero copy requires 4k block size, and can remap ublk driver's io - * request into ublksrv's vm space + * ublk server can register data buffers for incoming I/O requests with a sparse + * io_uring buffer table. The request buffer can then be used as the data buffer + * for io_uring operations via the fixed buffer index. + * Note that the ublk server can never directly access the request data memory. + * + * To use this feature, the ublk server must first register a sparse buffer + * table on an io_uring instance. + * When an incoming ublk request is received, the ublk server submits a + * UBLK_U_IO_REGISTER_IO_BUF command to that io_uring instance. The + * ublksrv_io_cmd's q_id and tag specify the request whose buffer to register + * and addr is the index in the io_uring's buffer table to install the buffer. + * SQEs can now be submitted to the io_uring to read/write the request's buffer + * by enabling fixed buffers (e.g. using IORING_OP_{READ,WRITE}_FIXED or + * IORING_URING_CMD_FIXED) and passing the registered buffer index in buf_index. + * Once the last io_uring operation using the request's buffer has completed, + * the ublk server submits a UBLK_U_IO_UNREGISTER_IO_BUF command with q_id, tag, + * and addr again specifying the request buffer to unregister. + * The ublk request is completed when its buffer is unregistered from all + * io_uring instances and the ublk server issues UBLK_U_IO_COMMIT_AND_FETCH_REQ. + * + * Not available for UBLK_F_UNPRIVILEGED_DEV, as a ublk server can leak + * uninitialized kernel memory by not reading into the full request buffer. */ #define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) -- cgit v1.2.3 From 51a4273dcab39dd1e850870945ccec664352d383 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Tue, 8 Apr 2025 15:02:11 +0530 Subject: KVM: SVM: Add missing member in SNP_LAUNCH_START command structure The sev_data_snp_launch_start structure should include a 4-byte desired_tsc_khz field before the gosvw field, which was missed in the initial implementation. As a result, the structure is 4 bytes shorter than expected by the firmware, causing the gosvw field to start 4 bytes early. Fix this by adding the missing 4-byte member for the desired TSC frequency. Fixes: 3a45dc2b419e ("crypto: ccp: Define the SEV-SNP commands") Cc: stable@vger.kernel.org Suggested-by: Tom Lendacky Reviewed-by: Tom Lendacky Tested-by: Vaishali Thakkar Signed-off-by: Nikunj A Dadhania Link: https://lore.kernel.org/r/20250408093213.57962-3-nikunj@amd.com Signed-off-by: Sean Christopherson --- include/linux/psp-sev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 0b3a36bdaa90..0f5f94137f6d 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -594,6 +594,7 @@ struct sev_data_snp_addr { * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the * purpose of guest-assisted migration. * @rsvd: reserved + * @desired_tsc_khz: hypervisor desired mean TSC freq in kHz of the guest * @gosvw: guest OS-visible workarounds, as defined by hypervisor */ struct sev_data_snp_launch_start { @@ -603,6 +604,7 @@ struct sev_data_snp_launch_start { u32 ma_en:1; /* In */ u32 imi_en:1; /* In */ u32 rsvd:30; + u32 desired_tsc_khz; /* In */ u8 gosvw[16]; /* In */ } __packed; -- cgit v1.2.3 From c8dc579169738a3546f57ecb38e62d3872a3cc04 Mon Sep 17 00:00:00 2001 From: Pratap Nirujogi Date: Mon, 9 Jun 2025 11:53:56 -0400 Subject: i2c: amd-isp: Initialize unique adapter name Initialize unique name for amdisp i2c adapter, which is used in the platform driver to detect the matching adapter for i2c_client creation. Add definition of amdisp i2c adapter name in a new header file (include/linux/soc/amd/isp4_misc.h) as it is referred in different driver modules. Tested-by: Randy Dunlap Signed-off-by: Pratap Nirujogi Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250609155601.1477055-3-pratap.nirujogi@amd.com --- include/linux/soc/amd/isp4_misc.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/linux/soc/amd/isp4_misc.h (limited to 'include') diff --git a/include/linux/soc/amd/isp4_misc.h b/include/linux/soc/amd/isp4_misc.h new file mode 100644 index 000000000000..6738796986a7 --- /dev/null +++ b/include/linux/soc/amd/isp4_misc.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright (C) 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __SOC_ISP4_MISC_H +#define __SOC_ISP4_MISC_H + +#define AMDISP_I2C_ADAP_NAME "AMDISP DesignWare I2C adapter" + +#endif -- cgit v1.2.3 From f5769359c5b241978e6933672bb78b3adc36aa18 Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Fri, 20 Jun 2025 02:31:54 +0800 Subject: mm/alloc_tag: fix the kmemleak false positive issue in the allocation of the percpu variable tag->counters When loading a module, as long as the module has memory allocation operations, kmemleak produces a false positive report that resembles the following: unreferenced object (percpu) 0x7dfd232a1650 (size 16): comm "modprobe", pid 1301, jiffies 4294940249 hex dump (first 16 bytes on cpu 2): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): kmemleak_alloc_percpu+0xb4/0xd0 pcpu_alloc_noprof+0x700/0x1098 load_module+0xd4/0x348 codetag_module_init+0x20c/0x450 codetag_load_module+0x70/0xb8 load_module+0xef8/0x1608 init_module_from_file+0xec/0x158 idempotent_init_module+0x354/0x608 __arm64_sys_finit_module+0xbc/0x150 invoke_syscall+0xd4/0x258 el0_svc_common.constprop.0+0xb4/0x240 do_el0_svc+0x48/0x68 el0_svc+0x40/0xf8 el0t_64_sync_handler+0x10c/0x138 el0t_64_sync+0x1ac/0x1b0 This is because the module can only indirectly reference alloc_tag_counters through the alloc_tag section, which misleads kmemleak. However, we don't have a kmemleak ignore interface for percpu allocations yet. So let's create one and invoke it for tag->counters. [gehao@kylinos.cn: fix build error when CONFIG_DEBUG_KMEMLEAK=n, s/igonore/ignore/] Link: https://lkml.kernel.org/r/20250620093102.2416767-1-hao.ge@linux.dev Link: https://lkml.kernel.org/r/20250619183154.2122608-1-hao.ge@linux.dev Fixes: 12ca42c23775 ("alloc_tag: allocate percpu counters for module tags dynamically") Signed-off-by: Hao Ge Reviewed-by: Catalin Marinas Acked-by: Suren Baghdasaryan [lib/alloc_tag.c] Cc: Kent Overstreet Signed-off-by: Andrew Morton --- include/linux/kmemleak.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 93a73c076d16..fbd424b2abb1 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -28,6 +28,7 @@ extern void kmemleak_update_trace(const void *ptr) __ref; extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_transient_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; +extern void kmemleak_ignore_percpu(const void __percpu *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, @@ -97,6 +98,9 @@ static inline void kmemleak_not_leak(const void *ptr) static inline void kmemleak_transient_leak(const void *ptr) { } +static inline void kmemleak_ignore_percpu(const void __percpu *ptr) +{ +} static inline void kmemleak_ignore(const void *ptr) { } -- cgit v1.2.3 From 12ffc3b1513ebc1f11ae77d053948504a94a68a6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 13 Jun 2025 16:43:44 -0500 Subject: PM: Restrict swap use to later in the suspend sequence Currently swap is restricted before drivers have had a chance to do their prepare() PM callbacks. Restricting swap this early means that if a driver needs to evict some content from memory into sawp in it's prepare callback, it won't be able to. On AMD dGPUs this can lead to failed suspends under memory pressure situations as all VRAM must be evicted to system memory or swap. Move the swap restriction to right after all devices have had a chance to do the prepare() callback. If there is any problem with the sequence, restore swap in the appropriate dpm resume callbacks or error handling paths. Closes: https://github.com/ROCm/ROCK-Kernel-Driver/issues/174 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2362 Signed-off-by: Mario Limonciello Tested-by: Nat Wittstock Tested-by: Lucian Langa Link: https://patch.msgid.link/20250613214413.4127087-1-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index b1c76c8f2c82..6a3f92098872 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -446,6 +446,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern void ksys_sync_helper(void); extern void pm_report_hw_sleep_time(u64 t); extern void pm_report_max_hw_sleep(u64 t); +void pm_restrict_gfp_mask(void); +void pm_restore_gfp_mask(void); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -492,6 +494,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline void pm_report_hw_sleep_time(u64 t) {}; static inline void pm_report_max_hw_sleep(u64 t) {}; +static inline void pm_restrict_gfp_mask(void) {} +static inline void pm_restore_gfp_mask(void) {} + static inline void ksys_sync_helper(void) {} #define pm_notifier(fn, pri) do { (void)(fn); } while (0) -- cgit v1.2.3 From 2d22b63f3a5aae2088708941d08cf0f01f430a58 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 26 Jun 2025 12:04:59 +0200 Subject: drm/mipi-dsi: Add dev_is_mipi_dsi function This will be especially useful for generic panels (like panel-simple) which can take different code path depending on if they are MIPI-DSI devices or platform devices. Reviewed-by: Javier Martinez Canillas Tested-by: Francesco Dolcini # Toradex Colibri iMX6 Link: https://lore.kernel.org/r/20250626-drm-panel-simple-fixes-v2-1-5afcaa608bdc@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_mipi_dsi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index b37860f4a895..6d2c08e81101 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -223,6 +223,9 @@ struct mipi_dsi_multi_context { #define to_mipi_dsi_device(__dev) container_of_const(__dev, struct mipi_dsi_device, dev) +extern const struct bus_type mipi_dsi_bus_type; +#define dev_is_mipi_dsi(dev) ((dev)->bus == &mipi_dsi_bus_type) + /** * mipi_dsi_pixel_format_to_bpp - obtain the number of bits per pixel for any * given pixel format defined by the MIPI DSI -- cgit v1.2.3 From ddaad4ad774d4ae02047ef873a8e38f62a4b7b01 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Wed, 18 Jun 2025 22:22:50 +0200 Subject: mtd: nand: qpic_common: prevent out of bounds access of BAM arrays The common QPIC code does not do any boundary checking when it handles the command elements and scatter gater list arrays of a BAM transaction, thus it allows to access out of bounds elements in those. Although it is the responsibility of the given driver to allocate enough space for all possible BAM transaction variations, however there can be mistakes in the driver code which can lead to hidden memory corruption issues which are hard to debug. This kind of problem has been observed during testing the 'spi-qpic-snand' driver. Although the driver has been fixed with a preceding patch, but it still makes sense to reduce the chance of having such errors again later. In order to prevent such errors, change the qcom_alloc_bam_transaction() function to store the number of elements of the arrays in the 'bam_transaction' strucutre during allocation. Also, add sanity checks to the qcom_prep_bam_dma_desc_{cmd,data}() functions to avoid using out of bounds indices for the arrays. Tested-by: Lakshmi Sowjanya D # on SDX75 Acked-by: Miquel Raynal Signed-off-by: Gabor Juhos Link: https://patch.msgid.link/20250618-qpic-snand-avoid-mem-corruption-v3-2-319c71296cda@gmail.com Signed-off-by: Mark Brown --- include/linux/mtd/nand-qpic-common.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h index e8462deda6db..f0aa098a395f 100644 --- a/include/linux/mtd/nand-qpic-common.h +++ b/include/linux/mtd/nand-qpic-common.h @@ -237,6 +237,9 @@ * @last_data_desc - last DMA desc in data channel (tx/rx). * @last_cmd_desc - last DMA desc in command channel. * @txn_done - completion for NAND transfer. + * @bam_ce_nitems - the number of elements in the @bam_ce array + * @cmd_sgl_nitems - the number of elements in the @cmd_sgl array + * @data_sgl_nitems - the number of elements in the @data_sgl array * @bam_ce_pos - the index in bam_ce which is available for next sgl * @bam_ce_start - the index in bam_ce which marks the start position ce * for current sgl. It will be used for size calculation @@ -255,6 +258,11 @@ struct bam_transaction { struct dma_async_tx_descriptor *last_data_desc; struct dma_async_tx_descriptor *last_cmd_desc; struct completion txn_done; + + unsigned int bam_ce_nitems; + unsigned int cmd_sgl_nitems; + unsigned int data_sgl_nitems; + struct_group(bam_positions, u32 bam_ce_pos; u32 bam_ce_start; -- cgit v1.2.3 From 96893cdd4760ad94a438c1523cc5ca2470e04670 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sun, 29 Jun 2025 18:15:32 +0200 Subject: spi: Raise limit on number of chip selects to 24 We have a system which uses 24 SPI chip selects, raise the hard coded limit accordingly. Signed-off-by: Marc Kleine-Budde Link: https://patch.msgid.link/20250629-spi-increase-number-of-cs-v2-1-85a0a09bab32@pengutronix.de Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 4789f91dae94..e9ea43234d9a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 16 +#define SPI_CS_CNT_MAX 24 struct dma_chan; struct software_node; -- cgit v1.2.3 From b1c26e059536d8acbf9d508374f4b76537e20fb7 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 10 Jun 2025 15:58:16 -0500 Subject: Move FCH header to a location accessible by all archs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new header fch.h was created to store registers used by different AMD drivers. This header was included by i2c-piix4 in commit 624b0d5696a8 ("i2c: piix4, x86/platform: Move the SB800 PIIX4 FCH definitions to "). To prevent compile failures on non-x86 archs i2c-piix4 was set to only compile on x86 by commit 7e173eb82ae9717 ("i2c: piix4: Make CONFIG_I2C_PIIX4 dependent on CONFIG_X86"). This was not a good decision because loongarch and mips both actually support i2c-piix4 and set it enabled in the defconfig. Move the header to a location accessible by all architectures. Fixes: 624b0d5696a89 ("i2c: piix4, x86/platform: Move the SB800 PIIX4 FCH definitions to ") Suggested-by: Hans de Goede Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250610205817.3912944-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/amd-fch.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/platform_data/x86/amd-fch.h (limited to 'include') diff --git a/include/linux/platform_data/x86/amd-fch.h b/include/linux/platform_data/x86/amd-fch.h new file mode 100644 index 000000000000..2cf5153edbc2 --- /dev/null +++ b/include/linux/platform_data/x86/amd-fch.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_FCH_H_ +#define _ASM_X86_AMD_FCH_H_ + +#define FCH_PM_BASE 0xFED80300 + +/* Register offsets from PM base: */ +#define FCH_PM_DECODEEN 0x00 +#define FCH_PM_DECODEEN_SMBUS0SEL GENMASK(20, 19) +#define FCH_PM_SCRATCH 0x80 +#define FCH_PM_S5_RESET_STATUS 0xC0 + +#endif /* _ASM_X86_AMD_FCH_H_ */ -- cgit v1.2.3 From 9df7b5ebead649b00bf9a53a798e4bf83a1318fd Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:37 +0100 Subject: netfs: Fix double put of request If a netfs request finishes during the pause loop, it will have the ref that belongs to the IN_PROGRESS flag removed at that point - however, if it then goes to the final wait loop, that will *also* put the ref because it sees that the IN_PROGRESS flag is clear and incorrectly assumes that this happened when it called the collector. In fact, since IN_PROGRESS is clear, we shouldn't call the collector again since it's done all the cleanup, such as calling ->ki_complete(). Fix this by making netfs_collect_in_app() just return, indicating that we're done if IN_PROGRESS is removed. Fixes: 2b1424cd131c ("netfs: Fix wait/wake to be consistent about the waitqueue used") Signed-off-by: David Howells Link: https://lore.kernel.org/20250701163852.2171681-3-dhowells@redhat.com Tested-by: Steve French Reviewed-by: Paulo Alcantara cc: Steve French cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 333d2e38dd2c..ba35dc66e986 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -56,6 +56,7 @@ EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \ -- cgit v1.2.3 From 5e1e6ec2e346c0850f304c31abdef4158007474e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:46 +0100 Subject: netfs: Merge i_size update functions Netfslib has two functions for updating the i_size after a write: one for buffered writes into the pagecache and one for direct/unbuffered writes. However, what needs to be done is much the same in both cases, so merge them together. This does raise one question, though: should updating the i_size after a direct write do the same estimated update of i_blocks as is done for buffered writes. Also get rid of the cleanup function pointer from netfs_io_request as it's only used for direct write to update i_size; instead do the i_size setting directly from write collection. Signed-off-by: David Howells Link: https://lore.kernel.org/20250701163852.2171681-12-dhowells@redhat.com cc: Steve French cc: Paulo Alcantara cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 065c17385e53..d8186b90fb38 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -279,7 +279,6 @@ struct netfs_io_request { #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; - void (*cleanup)(struct netfs_io_request *req); }; /* -- cgit v1.2.3 From 4e32541076833f5ce2e23523c9faa25f7b2cc96f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:47 +0100 Subject: netfs: Renumber the NETFS_RREQ_* flags to make traces easier to read Renumber the NETFS_RREQ_* flags to put the most useful status bits in the bottom nibble - and therefore the last hex digit in the trace output - making it easier to grasp the state at a glance. In particular, put the IN_PROGRESS flag in bit 0 and ALL_QUEUED at bit 1. Also make the flags field in /proc/fs/netfs/requests larger to accommodate all the flags. Also make the flags field in the netfs_sreq tracepoint larger to accommodate all the NETFS_SREQ_* flags. Signed-off-by: David Howells Link: https://lore.kernel.org/20250701163852.2171681-13-dhowells@redhat.com Reviewed-by: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 20 ++++++++++---------- include/trace/events/netfs.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index d8186b90fb38..f43f075852c0 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -265,17 +265,17 @@ struct netfs_io_request { bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ refcount_t ref; unsigned long flags; -#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ -#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ -#define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */ -#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ -#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ -#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ +#define NETFS_RREQ_IN_PROGRESS 0 /* Unlocked when the request completes (has ref) */ +#define NETFS_RREQ_ALL_QUEUED 1 /* All subreqs are now queued */ +#define NETFS_RREQ_PAUSE 2 /* Pause subrequest generation */ +#define NETFS_RREQ_FAILED 3 /* The request failed */ +#define NETFS_RREQ_RETRYING 4 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 5 /* Set if we have a short transfer */ +#define NETFS_RREQ_OFFLOAD_COLLECTION 8 /* Offload collection to workqueue */ +#define NETFS_RREQ_NO_UNLOCK_FOLIO 9 /* Don't unlock no_unlock_folio on completion */ +#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 10 /* Copy current folio to cache from read */ +#define NETFS_RREQ_UPLOAD_TO_SERVER 11 /* Need to write to the server */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ -#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ -#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ -#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index ba35dc66e986..c2d581429a7b 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -367,7 +367,7 @@ TRACE_EVENT(netfs_sreq, __entry->slot = sreq->io_iter.folioq_slot; ), - TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d", + TP_printk("R=%08x[%x] %s %s f=%03x s=%llx %zx/%zx s=%u e=%d", __entry->rreq, __entry->index, __print_symbolic(__entry->source, netfs_sreq_sources), __print_symbolic(__entry->what, netfs_sreq_traces), -- cgit v1.2.3 From 90b3ccf514578ca3a6ac25db51a29a48e34e0f1b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Jul 2025 17:38:48 +0100 Subject: netfs: Update tracepoints in a number of ways Make a number of updates to the netfs tracepoints: (1) Remove a duplicate trace from netfs_unbuffered_write_iter_locked(). (2) Move the trace in netfs_wake_rreq_flag() to after the flag is cleared so that the change appears in the trace. (3) Differentiate the use of netfs_rreq_trace_wait/woke_queue symbols. (4) Don't do so many trace emissions in the wait functions as some of them are redundant. (5) In netfs_collect_read_results(), differentiate a subreq that's being abandoned vs one that has been consumed in a regular way. (6) Add a tracepoint to indicate the call to ->ki_complete(). (7) Don't double-increment the subreq_counter when retrying a write. (8) Move the netfs_sreq_trace_io_progress tracepoint within cifs code to just MID_RESPONSE_RECEIVED and add different tracepoints for other MID states and note check failure. Signed-off-by: David Howells Co-developed-by: Paulo Alcantara Signed-off-by: Paulo Alcantara Link: https://lore.kernel.org/20250701163852.2171681-14-dhowells@redhat.com cc: Steve French cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner --- include/trace/events/netfs.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index c2d581429a7b..73e96ccbe830 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -50,12 +50,13 @@ #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS ") \ - EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_collect, "COLLECT") \ EM(netfs_rreq_trace_complete, "COMPLET") \ + EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ @@ -64,13 +65,15 @@ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ + EM(netfs_rreq_trace_unpause, "UNPAUSE") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ - EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ - EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \ + EM(netfs_rreq_trace_wait_pause, "--PAUSED--") \ + EM(netfs_rreq_trace_wait_quiesce, "WAIT-QUIESCE") \ + EM(netfs_rreq_trace_waited_ip, "DONE-IP") \ + EM(netfs_rreq_trace_waited_pause, "--UNPAUSED--") \ + EM(netfs_rreq_trace_waited_quiesce, "DONE-QUIESCE") \ EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \ - EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \ - EM(netfs_rreq_trace_unpause, "UNPAUSE") \ E_(netfs_rreq_trace_write_done, "WR-DONE") #define netfs_sreq_sources \ @@ -83,6 +86,7 @@ E_(NETFS_WRITE_TO_CACHE, "WRIT") #define netfs_sreq_traces \ + EM(netfs_sreq_trace_abandoned, "ABNDN") \ EM(netfs_sreq_trace_add_donations, "+DON ") \ EM(netfs_sreq_trace_added, "ADD ") \ EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \ @@ -90,6 +94,7 @@ EM(netfs_sreq_trace_cache_write, "CA-WR") \ EM(netfs_sreq_trace_cancel, "CANCL") \ EM(netfs_sreq_trace_clear, "CLEAR") \ + EM(netfs_sreq_trace_consumed, "CONSM") \ EM(netfs_sreq_trace_discard, "DSCRD") \ EM(netfs_sreq_trace_donate_to_prev, "DON-P") \ EM(netfs_sreq_trace_donate_to_next, "DON-N") \ @@ -97,7 +102,12 @@ EM(netfs_sreq_trace_fail, "FAIL ") \ EM(netfs_sreq_trace_free, "FREE ") \ EM(netfs_sreq_trace_hit_eof, "EOF ") \ - EM(netfs_sreq_trace_io_progress, "IO ") \ + EM(netfs_sreq_trace_io_bad, "I-BAD") \ + EM(netfs_sreq_trace_io_malformed, "I-MLF") \ + EM(netfs_sreq_trace_io_unknown, "I-UNK") \ + EM(netfs_sreq_trace_io_progress, "I-OK ") \ + EM(netfs_sreq_trace_io_req_submitted, "I-RSB") \ + EM(netfs_sreq_trace_io_retry_needed, "I-RTR") \ EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_need_clear, "N-CLR") \ EM(netfs_sreq_trace_partial_read, "PARTR") \ @@ -143,8 +153,8 @@ #define netfs_sreq_ref_traces \ EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \ - EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \ - EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \ + EM(netfs_sreq_trace_get_resubmit, "GET RESUBMT") \ + EM(netfs_sreq_trace_get_submit, "GET SUBMIT ") \ EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ EM(netfs_sreq_trace_new, "NEW ") \ EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \ -- cgit v1.2.3 From 8af39ec5cf2be522c8eb43a3d8005ed59e4daaee Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 13 Jun 2025 14:20:13 -0700 Subject: drm/sched: Increment job count before swapping tail spsc queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A small race exists between spsc_queue_push and the run-job worker, in which spsc_queue_push may return not-first while the run-job worker has already idled due to the job count being zero. If this race occurs, job scheduling stops, leading to hangs while waiting on the job’s DMA fences. Seal this race by incrementing the job count before appending to the SPSC queue. This race was observed on a drm-tip 6.16-rc1 build with the Xe driver in an SVM test case. Fixes: 1b1f42d8fde4 ("drm: move amd_gpu_scheduler into common location") Fixes: 27105db6c63a ("drm/amdgpu: Add SPSC queue to scheduler.") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250613212013.719312-1-matthew.brost@intel.com --- include/drm/spsc_queue.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/spsc_queue.h b/include/drm/spsc_queue.h index 125f096c88cb..ee9df8cc67b7 100644 --- a/include/drm/spsc_queue.h +++ b/include/drm/spsc_queue.h @@ -70,9 +70,11 @@ static inline bool spsc_queue_push(struct spsc_queue *queue, struct spsc_node *n preempt_disable(); + atomic_inc(&queue->job_count); + smp_mb__after_atomic(); + tail = (struct spsc_node **)atomic_long_xchg(&queue->tail, (long)&node->next); WRITE_ONCE(*tail, node); - atomic_inc(&queue->job_count); /* * In case of first element verify new node will be visible to the consumer -- cgit v1.2.3 From 59710a26a289ad4e7ef227d22063e964930928b0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 30 Jun 2025 15:37:46 -0400 Subject: Bluetooth: hci_core: Remove check of BDADDR_ANY in hci_conn_hash_lookup_big_state The check for destination to be BDADDR_ANY is no longer necessary with the introduction of BIS_LINK. Fixes: 23205562ffc8 ("Bluetooth: separate CIS_LINK and BIS_LINK link types") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9fc8f544e20e..0da011fc8146 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1350,8 +1350,7 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) || - c->state != state) + if (c->type != BIS_LINK || c->state != state) continue; if (handle == c->iso_qos.bcast.big) { -- cgit v1.2.3 From c5fd399a24c8e2865524361f7dc4d4a6899be4f4 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 1 Jul 2025 17:55:41 +1000 Subject: wifi: mac80211: correctly identify S1G short beacon mac80211 identifies a short beacon by the presence of the next TBTT field, however the standard actually doesn't explicitly state that the next TBTT can't be in a long beacon or even that it is required in a short beacon - and as a result this validation does not work for all vendor implementations. The standard explicitly states that an S1G long beacon shall contain the S1G beacon compatibility element as the first element in a beacon transmitted at a TBTT that is not a TSBTT (Target Short Beacon Transmission Time) as per IEEE80211-2024 11.1.3.10.1. This is validated by 9.3.4.3 Table 9-76 which states that the S1G beacon compatibility element is only allowed in the full set and is not allowed in the minimum set of elements permitted for use within short beacons. Correctly identify short beacons by the lack of an S1G beacon compatibility element as the first element in an S1G beacon frame. Fixes: 9eaffe5078ca ("cfg80211: convert S1G beacon to scan results") Signed-off-by: Simon Wadsworth Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250701075541.162619-1-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 22f39e5e2ff1..996be3c2cff0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -662,18 +662,6 @@ static inline bool ieee80211_s1g_has_cssid(__le16 fc) (fc & cpu_to_le16(IEEE80211_S1G_BCN_CSSID)); } -/** - * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon - * @fc: frame control bytes in little-endian byteorder - * Return: whether or not the frame is an S1G short beacon, - * i.e. it is an S1G beacon with 'next TBTT' flag set - */ -static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) -{ - return ieee80211_is_s1g_beacon(fc) && - (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); -} - /** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder @@ -4901,6 +4889,39 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb) return false; } +/** + * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon + * @fc: frame control bytes in little-endian byteorder + * @variable: pointer to the beacon frame elements + * @variable_len: length of the frame elements + * Return: whether or not the frame is an S1G short beacon. As per + * IEEE80211-2024 11.1.3.10.1, The S1G beacon compatibility element shall + * always be present as the first element in beacon frames generated at a + * TBTT (Target Beacon Transmission Time), so any frame not containing + * this element must have been generated at a TSBTT (Target Short Beacon + * Transmission Time) that is not a TBTT. Additionally, short beacons are + * prohibited from containing the S1G beacon compatibility element as per + * IEEE80211-2024 9.3.4.3 Table 9-76, so if we have an S1G beacon with + * either no elements or the first element is not the beacon compatibility + * element, we have a short beacon. + */ +static inline bool ieee80211_is_s1g_short_beacon(__le16 fc, const u8 *variable, + size_t variable_len) +{ + if (!ieee80211_is_s1g_beacon(fc)) + return false; + + /* + * If the frame does not contain at least 1 element (this is perfectly + * valid in a short beacon) and is an S1G beacon, we have a short + * beacon. + */ + if (variable_len < 2) + return true; + + return variable[0] != WLAN_EID_S1G_BCN_COMPAT; +} + struct element { u8 id; u8 datalen; -- cgit v1.2.3 From 4cdf1bdd45ac78a088773722f009883af30ad318 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Fri, 4 Jul 2025 11:21:34 +0200 Subject: block: reject bs > ps block devices when THP is disabled If THP is disabled and when a block device with logical block size > page size is present, the following null ptr deref panic happens during boot: [ [13.2 mK AOSAN: null-ptr-deref in range [0x0000000000000000-0x0000000000K0 0 0[07] [ 13.017749] RIP: 0010:create_empty_buffers+0x3b/0x380 [ 13.025448] Call Trace: [ 13.025692] [ 13.025895] block_read_full_folio+0x610/0x780 [ 13.026379] ? __pfx_blkdev_get_block+0x10/0x10 [ 13.027008] ? __folio_batch_add_and_move+0x1fa/0x2b0 [ 13.027548] ? __pfx_blkdev_read_folio+0x10/0x10 [ 13.028080] filemap_read_folio+0x9b/0x200 [ 13.028526] ? __pfx_filemap_read_folio+0x10/0x10 [ 13.029030] ? __filemap_get_folio+0x43/0x620 [ 13.029497] do_read_cache_folio+0x155/0x3b0 [ 13.029962] ? __pfx_blkdev_read_folio+0x10/0x10 [ 13.030381] read_part_sector+0xb7/0x2a0 [ 13.030805] read_lba+0x174/0x2c0 [ 13.045348] nvme_scan_ns+0x684/0x850 [nvme_core] [ 13.045858] ? __pfx_nvme_scan_ns+0x10/0x10 [nvme_core] [ 13.046414] ? _raw_spin_unlock+0x15/0x40 [ 13.046843] ? __switch_to+0x523/0x10a0 [ 13.047253] ? kvm_clock_get_cycles+0x14/0x30 [ 13.047742] ? __pfx_nvme_scan_ns_async+0x10/0x10 [nvme_core] [ 13.048353] async_run_entry_fn+0x96/0x4f0 [ 13.048787] process_one_work+0x667/0x10a0 [ 13.049219] worker_thread+0x63c/0xf60 As large folio support depends on THP, only allow bs > ps block devices if THP is enabled. Fixes: 47dd67532303 ("block/bdev: lift block size restrictions to 64k") Signed-off-by: Pankaj Raghav Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20250704092134.289491-1-p.raghav@samsung.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 332b56f323d9..369a8e63c865 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -269,11 +269,16 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER) * however we constrain this to what we can validate and test. */ #define BLK_MAX_BLOCK_SIZE SZ_64K +#else +#define BLK_MAX_BLOCK_SIZE PAGE_SIZE +#endif + /* blk_validate_limits() validates bsize, so drivers don't usually need to */ static inline int blk_validate_block_size(unsigned long bsize) -- cgit v1.2.3 From 1e3b66e326015f77bc4b36976bebeedc2ac0f588 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 3 Jul 2025 13:23:29 +0200 Subject: vsock: fix `vsock_proto` declaration From commit 634f1a7110b4 ("vsock: support sockmap"), `struct proto vsock_proto`, defined in af_vsock.c, is not static anymore, since it's used by vsock_bpf.c. If CONFIG_BPF_SYSCALL is not defined, `make C=2` will print a warning: $ make O=build C=2 W=1 net/vmw_vsock/ ... CC [M] net/vmw_vsock/af_vsock.o CHECK ../net/vmw_vsock/af_vsock.c ../net/vmw_vsock/af_vsock.c:123:14: warning: symbol 'vsock_proto' was not declared. Should it be static? Declare `vsock_proto` regardless of CONFIG_BPF_SYSCALL, since it's defined in af_vsock.c, which is built regardless of CONFIG_BPF_SYSCALL. Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20250703112329.28365-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- include/net/af_vsock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d56e6e135158..d40e978126e3 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -243,8 +243,8 @@ int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -#ifdef CONFIG_BPF_SYSCALL extern struct proto vsock_proto; +#ifdef CONFIG_BPF_SYSCALL int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void __init vsock_bpf_build_proto(void); #else -- cgit v1.2.3 From 70b9c0c11e55167b9552ef395bc00f4920299177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 30 Jun 2025 15:02:18 +0200 Subject: uapi: bitops: use UAPI-safe variant of BITS_PER_LONG again (2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BITS_PER_LONG does not exist in UAPI headers, so can't be used by the UAPI __GENMASK(). Instead __BITS_PER_LONG needs to be used. When __GENMASK() was introduced in commit 3c7a8e190bc5 ("uapi: introduce uapi-friendly macros for GENMASK"), the code was fine. A broken revert in 1e7933a575ed ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") introduced the incorrect usage of BITS_PER_LONG. That was fixed in commit 11fcf368506d ("uapi: bitops: use UAPI-safe variant of BITS_PER_LONG again"). But a broken sync of the kernel headers with the tools/ headers in commit fc92099902fb ("tools headers: Synchronize linux/bits.h with the kernel sources") undid the fix. Reapply the fix and while at it also fix the tools header. Fixes: fc92099902fb ("tools headers: Synchronize linux/bits.h with the kernel sources") Signed-off-by: Thomas Weißschuh Acked-by: Yury Norov (NVIDIA) Signed-off-by: Yury Norov (NVIDIA) --- include/uapi/linux/bits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) -- cgit v1.2.3 From fc582cd26e888b0652bc1494f252329453fd3b23 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Jul 2025 11:00:32 -0600 Subject: io_uring/msg_ring: ensure io_kiocb freeing is deferred for RCU syzbot reports that defer/local task_work adding via msg_ring can hit a request that has been freed: CPU: 1 UID: 0 PID: 19356 Comm: iou-wrk-19354 Not tainted 6.16.0-rc4-syzkaller-00108-g17bbde2e1716 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/07/2025 Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xd2/0x2b0 mm/kasan/report.c:521 kasan_report+0x118/0x150 mm/kasan/report.c:634 io_req_local_work_add io_uring/io_uring.c:1184 [inline] __io_req_task_work_add+0x589/0x950 io_uring/io_uring.c:1252 io_msg_remote_post io_uring/msg_ring.c:103 [inline] io_msg_data_remote io_uring/msg_ring.c:133 [inline] __io_msg_ring_data+0x820/0xaa0 io_uring/msg_ring.c:151 io_msg_ring_data io_uring/msg_ring.c:173 [inline] io_msg_ring+0x134/0xa00 io_uring/msg_ring.c:314 __io_issue_sqe+0x17e/0x4b0 io_uring/io_uring.c:1739 io_issue_sqe+0x165/0xfd0 io_uring/io_uring.c:1762 io_wq_submit_work+0x6e9/0xb90 io_uring/io_uring.c:1874 io_worker_handle_work+0x7cd/0x1180 io_uring/io-wq.c:642 io_wq_worker+0x42f/0xeb0 io_uring/io-wq.c:696 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 which is supposed to be safe with how requests are allocated. But msg ring requests alloc and free on their own, and hence must defer freeing to a sane time. Add an rcu_head and use kfree_rcu() in both spots where requests are freed. Only the one in io_msg_tw_complete() is strictly required as it has been visible on the other ring, but use it consistently in the other spot as well. This should not cause any other issues outside of KASAN rightfully complaining about it. Link: https://lore.kernel.org/io-uring/686cd2ea.a00a0220.338033.0007.GAE@google.com/ Reported-by: syzbot+54cbbfb4db9145d26fc2@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Fixes: 0617bb500bfa ("io_uring/msg_ring: improve handling of target CQE posting") Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 2922635986f5..a7efcec2e3d0 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -698,6 +698,8 @@ struct io_kiocb { struct hlist_node hash_node; /* For IOPOLL setup queues, with hybrid polling */ u64 iopoll_start; + /* for private io_kiocb freeing */ + struct rcu_head rcu_head; }; /* internal polling, see IORING_FEAT_FAST_POLL */ struct async_poll *apoll; -- cgit v1.2.3 From f6bfc9afc7510cb5e6fbe0a17c507917b0120280 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 7 Jul 2025 15:11:55 +0200 Subject: drm/framebuffer: Acquire internal references on GEM handles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acquire GEM handles in drm_framebuffer_init() and release them in the corresponding drm_framebuffer_cleanup(). Ties the handle's lifetime to the framebuffer. Not all GEM buffer objects have GEM handles. If not set, no refcounting takes place. This is the case for some fbdev emulation. This is not a problem as these GEM objects do not use dma-bufs and drivers will not release them while fbdev emulation is running. Framebuffer flags keep a bit per color plane of which the framebuffer holds a GEM handle reference. As all drivers use drm_framebuffer_init(), they will now all hold dma-buf references as fixed in commit 5307dce878d4 ("drm/gem: Acquire references on GEM handles for framebuffers"). In the GEM framebuffer helpers, restore the original ref counting on buffer objects. As the helpers for handle refcounting are now no longer called from outside the DRM core, unexport the symbols. v3: - don't mix internal flags with mode flags (Christian) v2: - track framebuffer handle refs by flag - drop gma500 cleanup (Christian) Signed-off-by: Thomas Zimmermann Fixes: 5307dce878d4 ("drm/gem: Acquire references on GEM handles for framebuffers") Reported-by: Bert Karwatzki Closes: https://lore.kernel.org/dri-devel/20250703115915.3096-1-spasswolf@web.de/ Tested-by: Bert Karwatzki Tested-by: Mario Limonciello Tested-by: Borislav Petkov (AMD) Cc: Thomas Zimmermann Cc: Anusha Srivatsa Cc: Christian König Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Sumit Semwal Cc: "Christian König" Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Cc: Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250707131224.249496-1-tzimmermann@suse.de --- include/drm/drm_framebuffer.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index 668077009fce..38b24fc8978d 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -23,6 +23,7 @@ #ifndef __DRM_FRAMEBUFFER_H__ #define __DRM_FRAMEBUFFER_H__ +#include #include #include #include @@ -100,6 +101,8 @@ struct drm_framebuffer_funcs { unsigned num_clips); }; +#define DRM_FRAMEBUFFER_HAS_HANDLE_REF(_i) BIT(0u + (_i)) + /** * struct drm_framebuffer - frame buffer object * @@ -188,6 +191,10 @@ struct drm_framebuffer { * DRM_MODE_FB_MODIFIERS. */ int flags; + /** + * @internal_flags: Framebuffer flags like DRM_FRAMEBUFFER_HAS_HANDLE_REF. + */ + unsigned int internal_flags; /** * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock. */ -- cgit v1.2.3 From bd46cece51a36ef088f22ef0416ac13b0a46d5b0 Mon Sep 17 00:00:00 2001 From: Simona Vetter Date: Mon, 7 Jul 2025 17:18:13 +0200 Subject: drm/gem: Fix race in drm_gem_handle_create_tail() Object creation is a careful dance where we must guarantee that the object is fully constructed before it is visible to other threads, and GEM buffer objects are no difference. Final publishing happens by calling drm_gem_handle_create(). After that the only allowed thing to do is call drm_gem_object_put() because a concurrent call to the GEM_CLOSE ioctl with a correctly guessed id (which is trivial since we have a linear allocator) can already tear down the object again. Luckily most drivers get this right, the very few exceptions I've pinged the relevant maintainers for. Unfortunately we also need drm_gem_handle_create() when creating additional handles for an already existing object (e.g. GETFB ioctl or the various bo import ioctl), and hence we cannot have a drm_gem_handle_create_and_put() as the only exported function to stop these issues from happening. Now unfortunately the implementation of drm_gem_handle_create() isn't living up to standards: It does correctly finishe object initialization at the global level, and hence is safe against a concurrent tear down. But it also sets up the file-private aspects of the handle, and that part goes wrong: We fully register the object in the drm_file.object_idr before calling drm_vma_node_allow() or obj->funcs->open, which opens up races against concurrent removal of that handle in drm_gem_handle_delete(). Fix this with the usual two-stage approach of first reserving the handle id, and then only registering the object after we've completed the file-private setup. Jacek reported this with a testcase of concurrently calling GEM_CLOSE on a freshly-created object (which also destroys the object), but it should be possible to hit this with just additional handles created through import or GETFB without completed destroying the underlying object with the concurrent GEM_CLOSE ioctl calls. Note that the close-side of this race was fixed in f6cd7daecff5 ("drm: Release driver references to handle before making it available again"), which means a cool 9 years have passed until someone noticed that we need to make this symmetry or there's still gaps left :-/ Without the 2-stage close approach we'd still have a race, therefore that's an integral part of this bugfix. More importantly, this means we can have NULL pointers behind allocated id in our drm_file.object_idr. We need to check for that now: - drm_gem_handle_delete() checks for ERR_OR_NULL already - drm_gem.c:object_lookup() also chekcs for NULL - drm_gem_release() should never be called if there's another thread still existing that could call into an IOCTL that creates a new handle, so cannot race. For paranoia I added a NULL check to drm_gem_object_release_handle() though. - most drivers (etnaviv, i915, msm) are find because they use idr_find(), which maps both ENOENT and NULL to NULL. - drivers using idr_for_each_entry() should also be fine, because idr_get_next does filter out NULL entries and continues the iteration. - The same holds for drm_show_memory_stats(). v2: Use drm_WARN_ON (Thomas) Reported-by: Jacek Lawrynowicz Tested-by: Jacek Lawrynowicz Reviewed-by: Thomas Zimmermann Cc: stable@vger.kernel.org Cc: Jacek Lawrynowicz Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Simona Vetter Signed-off-by: Simona Vetter Signed-off-by: Simona Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20250707151814.603897-1-simona.vetter@ffwll.ch --- include/drm/drm_file.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 5c3b2aa3e69d..d344d41e6cfe 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -300,6 +300,9 @@ struct drm_file { * * Mapping of mm object handles to object pointers. Used by the GEM * subsystem. Protected by @table_lock. + * + * Note that allocated entries might be NULL as a transient state when + * creating or deleting a handle. */ struct idr object_idr; -- cgit v1.2.3 From 82241a83cd15aaaf28200a40ad1a8b480012edaf Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 5 Jun 2025 20:58:29 +0800 Subject: mm: fix the inaccurate memory statistics issue for users On some large machines with a high number of CPUs running a 64K pagesize kernel, we found that the 'RES' field is always 0 displayed by the top command for some processes, which will cause a lot of confusion for users. PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 875525 root 20 0 12480 0 0 R 0.3 0.0 0:00.08 top 1 root 20 0 172800 0 0 S 0.0 0.0 0:04.52 systemd The main reason is that the batch size of the percpu counter is quite large on these machines, caching a significant percpu value, since converting mm's rss stats into percpu_counter by commit f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter"). Intuitively, the batch number should be optimized, but on some paths, performance may take precedence over statistical accuracy. Therefore, introducing a new interface to add the percpu statistical count and display it to users, which can remove the confusion. In addition, this change is not expected to be on a performance-critical path, so the modification should be acceptable. In addition, the 'mm->rss_stat' is updated by using add_mm_counter() and dec/inc_mm_counter(), which are all wrappers around percpu_counter_add_batch(). In percpu_counter_add_batch(), there is percpu batch caching to avoid 'fbc->lock' contention. This patch changes task_mem() and task_statm() to get the accurate mm counters under the 'fbc->lock', but this should not exacerbate kernel 'mm->rss_stat' lock contention due to the percpu batch caching of the mm counters. The following test also confirm the theoretical analysis. I run the stress-ng that stresses anon page faults in 32 threads on my 32 cores machine, while simultaneously running a script that starts 32 threads to busy-loop pread each stress-ng thread's /proc/pid/status interface. From the following data, I did not observe any obvious impact of this patch on the stress-ng tests. w/o patch: stress-ng: info: [6848] 4,399,219,085,152 CPU Cycles 67.327 B/sec stress-ng: info: [6848] 1,616,524,844,832 Instructions 24.740 B/sec (0.367 instr. per cycle) stress-ng: info: [6848] 39,529,792 Page Faults Total 0.605 M/sec stress-ng: info: [6848] 39,529,792 Page Faults Minor 0.605 M/sec w/patch: stress-ng: info: [2485] 4,462,440,381,856 CPU Cycles 68.382 B/sec stress-ng: info: [2485] 1,615,101,503,296 Instructions 24.750 B/sec (0.362 instr. per cycle) stress-ng: info: [2485] 39,439,232 Page Faults Total 0.604 M/sec stress-ng: info: [2485] 39,439,232 Page Faults Minor 0.604 M/sec On comparing a very simple app which just allocates & touches some memory against v6.1 (which doesn't have f1a7941243c1) and latest Linus tree (4c06e63b9203) I can see that on latest Linus tree the values for VmRSS, RssAnon and RssFile from /proc/self/status are all zeroes while they do report values on v6.1 and a Linus tree with this patch. Link: https://lkml.kernel.org/r/f4586b17f66f97c174f7fd1f8647374fdb53de1c.1749119050.git.baolin.wang@linux.alibaba.com Fixes: f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter") Signed-off-by: Baolin Wang Reviewed-by: Aboorva Devarajan Tested-by: Aboorva Devarajan Tested-by Donet Tom Acked-by: Shakeel Butt Acked-by: SeongJae Park Acked-by: Michal Hocko Reviewed-by: Vlastimil Babka Cc: David Hildenbrand Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ef2ba0c667a..fa538feaa8d9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2568,6 +2568,11 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return percpu_counter_read_positive(&mm->rss_stat[member]); } +static inline unsigned long get_mm_counter_sum(struct mm_struct *mm, int member) +{ + return percpu_counter_sum_positive(&mm->rss_stat[member]); +} + void mm_trace_rss_stat(struct mm_struct *mm, int member); static inline void add_mm_counter(struct mm_struct *mm, int member, long value) -- cgit v1.2.3 From db6cc3f4ac2e6cdc898fc9cbc8b32ae1bf56bdad Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 4 Jul 2025 21:56:20 +0800 Subject: Revert "sched/numa: add statistics of numa balance task" This reverts commit ad6b26b6a0a79166b53209df2ca1cf8636296382. This commit introduces per-memcg/task NUMA balance statistics, but unfortunately it introduced a NULL pointer exception due to the following race condition: After a swap task candidate was chosen, its mm_struct pointer was set to NULL due to task exit. Later, when performing the actual task swapping, the p->mm caused the problem. CPU0 CPU1 : ... task_numa_migrate task_numa_find_cpu task_numa_compare # a normal task p is chosen env->best_task = p # p exit: exit_signals(p); p->flags |= PF_EXITING exit_mm p->mm = NULL; migrate_swap_stop __migrate_swap_task((arg->src_task, arg->dst_cpu) count_memcg_event_mm(p->mm, NUMA_TASK_SWAP)# p->mm is NULL task_lock() should be held and the PF_EXITING flag needs to be checked to prevent this from happening. After discussion, the conclusion was that adding a lock is not worthwhile for some statistics calculations. Revert the change and rely on the tracepoint for this purpose. Link: https://lkml.kernel.org/r/20250704135620.685752-1-yu.c.chen@intel.com Link: https://lkml.kernel.org/r/20250708064917.BBD13C4CEED@smtp.kernel.org Fixes: ad6b26b6a0a7 ("sched/numa: add statistics of numa balance task") Signed-off-by: Chen Yu Reported-by: Jirka Hladky Closes: https://lore.kernel.org/all/CAE4VaGBLJxpd=NeRJXpSCuw=REhC5LWJpC29kDy-Zh2ZDyzQZA@mail.gmail.com/ Reported-by: Srikanth Aithal Reported-by: Suneeth D Acked-by: Michal Hocko Cc: Borislav Petkov Cc: Ingo Molnar Cc: Jiri Hladky Cc: Libo Chen Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/sched.h | 4 ---- include/linux/vm_event_item.h | 2 -- 2 files changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..aa9c5be7a632 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -548,10 +548,6 @@ struct sched_statistics { u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; -#ifdef CONFIG_NUMA_BALANCING - u64 numa_task_migrated; - u64 numa_task_swapped; -#endif u64 nr_wakeups; u64 nr_wakeups_sync; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 91a3ce9a2687..9e15a088ba38 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -66,8 +66,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, -- cgit v1.2.3 From dd831ac8221e691e9e918585b1003c7071df0379 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Sat, 5 Jul 2025 14:21:43 -0700 Subject: net/sched: sch_qfq: Fix null-deref in agg_dequeue To prevent a potential crash in agg_dequeue (net/sched/sch_qfq.c) when cl->qdisc->ops->peek(cl->qdisc) returns NULL, we check the return value before using it, similar to the existing approach in sch_hfsc.c. To avoid code duplication, the following changes are made: 1. Changed qdisc_warn_nonwc(include/net/pkt_sched.h) into a static inline function. 2. Moved qdisc_peek_len from net/sched/sch_hfsc.c to include/net/pkt_sched.h so that sch_qfq can reuse it. 3. Applied qdisc_peek_len in agg_dequeue to avoid crashing. Signed-off-by: Xiang Mei Reviewed-by: Cong Wang Link: https://patch.msgid.link/20250705212143.3982664-1-xmei5@asu.edu Signed-off-by: Paolo Abeni --- include/net/pkt_sched.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index d7b7b6cd4aa1..8a75c73fc555 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -114,7 +114,6 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct netlink_ext_ack *extack); void qdisc_put_rtab(struct qdisc_rate_table *tab); void qdisc_put_stab(struct qdisc_size_table *tab); -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, spinlock_t *root_lock, bool validate); @@ -290,4 +289,28 @@ static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, return true; } +static inline void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) +{ + if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { + pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", + txt, qdisc->ops->id, qdisc->handle >> 16); + qdisc->flags |= TCQ_F_WARN_NONWC; + } +} + +static inline unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + unsigned int len; + + skb = sch->ops->peek(sch); + if (unlikely(skb == NULL)) { + qdisc_warn_nonwc("qdisc_peek_len", sch); + return 0; + } + len = qdisc_pkt_len(skb); + + return len; +} + #endif -- cgit v1.2.3 From a8b289f0f2dcbadd8c207ad8f33cf7ba2b4eb088 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Jul 2025 10:00:12 +0200 Subject: irqchip/irq-msi-lib: Fix build with PCI disabled The armada-370-xp irqchip fails in some randconfig builds because of a missing declaration: In file included from drivers/irqchip/irq-armada-370-xp.c:23: include/linux/irqchip/irq-msi-lib.h:25:39: error: 'struct msi_domain_info' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] Add a forward declaration for the msi_domain_info structure. [ tglx: Fixed up the subsystem prefix. Is it really that hard to get right? ] Fixes: e51b27438a10 ("irqchip: Make irq-msi-lib.h globally available") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/all/20250710080021.2303640-1-arnd@kernel.org --- include/linux/irqchip/irq-msi-lib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h index dd8d1d138544..224ac28e88d7 100644 --- a/include/linux/irqchip/irq-msi-lib.h +++ b/include/linux/irqchip/irq-msi-lib.h @@ -17,6 +17,7 @@ #define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI) +struct msi_domain_info; int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); -- cgit v1.2.3 From 18cdb3d982da8976b28d57691eb256ec5688fad2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jul 2025 12:45:17 +0000 Subject: netfilter: flowtable: account for Ethernet header in nf_flow_pppoe_proto() syzbot found a potential access to uninit-value in nf_flow_pppoe_proto() Blamed commit forgot the Ethernet header. BUG: KMSAN: uninit-value in nf_flow_offload_inet_hook+0x7e4/0x940 net/netfilter/nf_flow_table_inet.c:27 nf_flow_offload_inet_hook+0x7e4/0x940 net/netfilter/nf_flow_table_inet.c:27 nf_hook_entry_hookfn include/linux/netfilter.h:157 [inline] nf_hook_slow+0xe1/0x3d0 net/netfilter/core.c:623 nf_hook_ingress include/linux/netfilter_netdev.h:34 [inline] nf_ingress net/core/dev.c:5742 [inline] __netif_receive_skb_core+0x4aff/0x70c0 net/core/dev.c:5837 __netif_receive_skb_one_core net/core/dev.c:5975 [inline] __netif_receive_skb+0xcc/0xac0 net/core/dev.c:6090 netif_receive_skb_internal net/core/dev.c:6176 [inline] netif_receive_skb+0x57/0x630 net/core/dev.c:6235 tun_rx_batched+0x1df/0x980 drivers/net/tun.c:1485 tun_get_user+0x4ee0/0x6b40 drivers/net/tun.c:1938 tun_chr_write_iter+0x3e9/0x5c0 drivers/net/tun.c:1984 new_sync_write fs/read_write.c:593 [inline] vfs_write+0xb4b/0x1580 fs/read_write.c:686 ksys_write fs/read_write.c:738 [inline] __do_sys_write fs/read_write.c:749 [inline] Reported-by: syzbot+bf6ed459397e307c3ad2@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/686bc073.a00a0220.c7b3.0086.GAE@google.com/T/#u Fixes: 87b3593bed18 ("netfilter: flowtable: validate pppoe header") Signed-off-by: Eric Dumazet Reviewed-by: Pablo Neira Ayuso Link: https://patch.msgid.link/20250707124517.614489-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/netfilter/nf_flow_table.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d711642e78b5..c003cd194fa2 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -370,7 +370,7 @@ static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) { - if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + if (!pskb_may_pull(skb, ETH_HLEN + PPPOE_SES_HLEN)) return false; *inner_proto = __nf_flow_pppoe_proto(skb); -- cgit v1.2.3 From 36a686c0784fcccdaa4f38b498a9ef0d42ea7cb8 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 10 Jul 2025 18:43:42 +0200 Subject: Revert "netfilter: nf_tables: Add notifications for hook changes" This reverts commit 465b9ee0ee7bc268d7f261356afd6c4262e48d82. Such notifications fit better into core or nfnetlink_hook code, following the NFNL_MSG_HOOK_GET message format. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 ----- include/uapi/linux/netfilter/nf_tables.h | 10 ---------- include/uapi/linux/netfilter/nfnetlink.h | 2 -- 3 files changed, 17 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e4d8e451e935..5e49619ae49c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1142,11 +1142,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); -struct nft_hook; -void nf_tables_chain_device_notify(const struct nft_chain *chain, - const struct nft_hook *hook, - const struct net_device *dev, int event); - enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, NFT_CHAIN_T_ROUTE, diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 518ba144544c..2beb30be2c5f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -142,8 +142,6 @@ enum nf_tables_msg_types { NFT_MSG_DESTROYOBJ, NFT_MSG_DESTROYFLOWTABLE, NFT_MSG_GETSETELEM_RESET, - NFT_MSG_NEWDEV, - NFT_MSG_DELDEV, NFT_MSG_MAX, }; @@ -1786,18 +1784,10 @@ enum nft_synproxy_attributes { * enum nft_device_attributes - nf_tables device netlink attributes * * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) - * @NFTA_DEVICE_TABLE: table containing the flowtable or chain hooking into the device (NLA_STRING) - * @NFTA_DEVICE_FLOWTABLE: flowtable hooking into the device (NLA_STRING) - * @NFTA_DEVICE_CHAIN: chain hooking into the device (NLA_STRING) - * @NFTA_DEVICE_SPEC: hook spec matching the device (NLA_STRING) */ enum nft_devices_attributes { NFTA_DEVICE_UNSPEC, NFTA_DEVICE_NAME, - NFTA_DEVICE_TABLE, - NFTA_DEVICE_FLOWTABLE, - NFTA_DEVICE_CHAIN, - NFTA_DEVICE_SPEC, __NFTA_DEVICE_MAX }; #define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 50d807af2649..6cd58cd2a6f0 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -25,8 +25,6 @@ enum nfnetlink_groups { #define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA NFNLGRP_NFTRACE, #define NFNLGRP_NFTRACE NFNLGRP_NFTRACE - NFNLGRP_NFT_DEV, -#define NFNLGRP_NFT_DEV NFNLGRP_NFT_DEV __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) -- cgit v1.2.3 From 444020f4bf06fb86805ee7e7ceec0375485fd94d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 14 Jul 2025 14:21:30 +0200 Subject: wifi: cfg80211: remove scan request n_channels counted_by This reverts commit e3eac9f32ec0 ("wifi: cfg80211: Annotate struct cfg80211_scan_request with __counted_by"). This really has been a completely failed experiment. There were no actual bugs found, and yet at this point we already have four "fixes" to it, with nothing to show for but code churn, and it never even made the code any safer. In all of the cases that ended up getting "fixed", the structure is also internally inconsistent after the n_channels setting as the channel list isn't actually filled yet. You cannot scan with such a structure, that's just wrong. In mac80211, the struct is also reused multiple times, so initializing it once is no good. Some previous "fixes" (e.g. one in brcm80211) are also just setting n_channels before accessing the array, under the assumption that the code is correct and the array can be accessed, further showing that the whole thing is just pointless when the allocation count and use count are not separate. If we really wanted to fix it, we'd need to separately track the number of channels allocated and the number of channels currently used, but given that no bugs were found despite the numerous syzbot reports, that'd just be a waste of time. Remove the __counted_by() annotation. We really should also remove a number of the n_channels settings that are setting up a structure that's inconsistent, but that can wait. Reported-by: syzbot+e834e757bd9b3d3e1251@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e834e757bd9b3d3e1251 Fixes: e3eac9f32ec0 ("wifi: cfg80211: Annotate struct cfg80211_scan_request with __counted_by") Link: https://patch.msgid.link/20250714142130.9b0bbb7e1f07.I09112ccde72d445e11348fc2bef68942cb2ffc94@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d1848dc8ec99..10248d527616 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2690,7 +2690,7 @@ struct cfg80211_scan_request { s8 tsf_report_link_id; /* keep last */ - struct ieee80211_channel *channels[] __counted_by(n_channels); + struct ieee80211_channel *channels[]; }; static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) -- cgit v1.2.3 From dfef8d87a031ac1a46dde3de804e0fcf3c3a6afd Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 14 Jul 2025 22:27:43 +0200 Subject: Bluetooth: hci_core: fix typos in macros The provided macro parameter is named 'dev' (rather than 'hdev', which may be a variable on the stack where the macro is used). Fixes: a9a830a676a9 ("Bluetooth: hci_event: Fix sending HCI_OP_READ_ENC_KEY_SIZE") Fixes: 6126ffabba6b ("Bluetooth: Introduce HCI_CONN_FLAG_DEVICE_PRIVACY device flag") Signed-off-by: Christian Eggers Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0da011fc8146..052c91613bb9 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1940,11 +1940,11 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) #define privacy_mode_capable(dev) (ll_privacy_capable(dev) && \ - (hdev->commands[39] & 0x04)) + ((dev)->commands[39] & 0x04)) #define read_key_size_capable(dev) \ ((dev)->commands[20] & 0x10 && \ - !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) + !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &(dev)->quirks)) #define read_voice_setting_capable(dev) \ ((dev)->commands[9] & 0x04 && \ -- cgit v1.2.3 From cdee6a4416b2a57c89082929cc60e2275bb32a3a Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 14 Jul 2025 22:27:44 +0200 Subject: Bluetooth: hci_core: add missing braces when using macro parameters Macro parameters should always be put into braces when accessing it. Fixes: 4fc9857ab8c6 ("Bluetooth: hci_sync: Add check simultaneous roles support") Signed-off-by: Christian Eggers Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 052c91613bb9..367ca43f45d1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -829,20 +829,20 @@ extern struct mutex hci_cb_list_lock; #define hci_dev_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), (hdev)->dev_flags) #define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags) -#define hci_dev_clear_volatile_flags(hdev) \ - do { \ - hci_dev_clear_flag(hdev, HCI_LE_SCAN); \ - hci_dev_clear_flag(hdev, HCI_LE_ADV); \ - hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\ - hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \ - hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \ +#define hci_dev_clear_volatile_flags(hdev) \ + do { \ + hci_dev_clear_flag((hdev), HCI_LE_SCAN); \ + hci_dev_clear_flag((hdev), HCI_LE_ADV); \ + hci_dev_clear_flag((hdev), HCI_LL_RPA_RESOLUTION); \ + hci_dev_clear_flag((hdev), HCI_PERIODIC_INQ); \ + hci_dev_clear_flag((hdev), HCI_QUALITY_REPORT); \ } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \ - (hdev->le_states[4] & 0x08) && /* Central */ \ - (hdev->le_states[4] & 0x40) && /* Peripheral */ \ - (hdev->le_states[3] & 0x10)) /* Simultaneous */ + (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &(hdev)->quirks) && \ + ((hdev)->le_states[4] & 0x08) && /* Central */ \ + ((hdev)->le_states[4] & 0x40) && /* Peripheral */ \ + ((hdev)->le_states[3] & 0x10)) /* Simultaneous */ /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); -- cgit v1.2.3 From 6851a0c228fc040dce8e4c393004209e7372e0a3 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 14 Jul 2025 22:27:45 +0200 Subject: Bluetooth: hci_dev: replace 'quirks' integer by 'quirk_flags' bitmap The 'quirks' member already ran out of bits on some platforms some time ago. Replace the integer member by a bitmap in order to have enough bits in future. Replace raw bit operations by accessor macros. Fixes: ff26b2dd6568 ("Bluetooth: Add quirk for broken READ_VOICE_SETTING") Fixes: 127881334eaa ("Bluetooth: Add quirk for broken READ_PAGE_SCAN_TYPE") Suggested-by: Pauli Virtanen Tested-by: Ivan Pravdin Signed-off-by: Kiran K Signed-off-by: Christian Eggers Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 2 ++ include/net/bluetooth/hci_core.h | 28 ++++++++++++++++------------ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 82cbd54443ac..c79901f2dc2a 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -377,6 +377,8 @@ enum { * This quirk must be set before hci_register_dev is called. */ HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, + + __HCI_NUM_QUIRKS, }; /* HCI device flags */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 367ca43f45d1..f79f59e67114 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -464,7 +464,7 @@ struct hci_dev { unsigned int auto_accept_delay; - unsigned long quirks; + DECLARE_BITMAP(quirk_flags, __HCI_NUM_QUIRKS); atomic_t cmd_cnt; unsigned int acl_cnt; @@ -656,6 +656,10 @@ struct hci_dev { u8 (*classify_pkt_type)(struct hci_dev *hdev, struct sk_buff *skb); }; +#define hci_set_quirk(hdev, nr) set_bit((nr), (hdev)->quirk_flags) +#define hci_clear_quirk(hdev, nr) clear_bit((nr), (hdev)->quirk_flags) +#define hci_test_quirk(hdev, nr) test_bit((nr), (hdev)->quirk_flags) + #define HCI_PHY_HANDLE(handle) (handle & 0xff) enum conn_reasons { @@ -839,7 +843,7 @@ extern struct mutex hci_cb_list_lock; } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &(hdev)->quirks) && \ + (!hci_test_quirk((hdev), HCI_QUIRK_BROKEN_LE_STATES) && \ ((hdev)->le_states[4] & 0x08) && /* Central */ \ ((hdev)->le_states[4] & 0x40) && /* Peripheral */ \ ((hdev)->le_states[3] & 0x10)) /* Simultaneous */ @@ -1931,8 +1935,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M)) #define le_coded_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_CODED) && \ - !test_bit(HCI_QUIRK_BROKEN_LE_CODED, \ - &(dev)->quirks)) + !hci_test_quirk((dev), \ + HCI_QUIRK_BROKEN_LE_CODED)) #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) @@ -1944,27 +1948,27 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define read_key_size_capable(dev) \ ((dev)->commands[20] & 0x10 && \ - !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE)) #define read_voice_setting_capable(dev) \ ((dev)->commands[9] & 0x04 && \ - !test_bit(HCI_QUIRK_BROKEN_READ_VOICE_SETTING, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_VOICE_SETTING)) /* Use enhanced synchronous connection if command is supported and its quirk * has not been set. */ #define enhanced_sync_conn_capable(dev) \ (((dev)->commands[29] & 0x08) && \ - !test_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN)) /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ ((dev)->commands[37] & 0x40) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_SCAN)) /* Use ext create connection if command is supported */ #define use_ext_conn(dev) (((dev)->commands[37] & 0x80) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, &(dev)->quirks)) + !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_CREATE_CONN)) /* Extended advertising support */ #define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV)) @@ -1979,8 +1983,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); */ #define use_enhanced_conn_complete(dev) ((ll_privacy_capable(dev) || \ ext_adv_capable(dev)) && \ - !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, \ - &(dev)->quirks)) + !hci_test_quirk((dev), \ + HCI_QUIRK_BROKEN_EXT_CREATE_CONN)) /* Periodic advertising support */ #define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV)) @@ -1997,7 +2001,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER) #define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \ - (!test_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &(dev)->quirks))) + (!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG))) /* ----- HCI protocols ----- */ #define HCI_PROTO_DEFER 0x01 -- cgit v1.2.3 From 2d72afb340657f03f7261e9243b44457a9228ac7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 16 Jul 2025 20:39:14 +0200 Subject: netfilter: nf_conntrack: fix crash due to removal of uninitialised entry A crash in conntrack was reported while trying to unlink the conntrack entry from the hash bucket list: [exception RIP: __nf_ct_delete_from_lists+172] [..] #7 [ff539b5a2b043aa0] nf_ct_delete at ffffffffc124d421 [nf_conntrack] #8 [ff539b5a2b043ad0] nf_ct_gc_expired at ffffffffc124d999 [nf_conntrack] #9 [ff539b5a2b043ae0] __nf_conntrack_find_get at ffffffffc124efbc [nf_conntrack] [..] The nf_conn struct is marked as allocated from slab but appears to be in a partially initialised state: ct hlist pointer is garbage; looks like the ct hash value (hence crash). ct->status is equal to IPS_CONFIRMED|IPS_DYING, which is expected ct->timeout is 30000 (=30s), which is unexpected. Everything else looks like normal udp conntrack entry. If we ignore ct->status and pretend its 0, the entry matches those that are newly allocated but not yet inserted into the hash: - ct hlist pointers are overloaded and store/cache the raw tuple hash - ct->timeout matches the relative time expected for a new udp flow rather than the absolute 'jiffies' value. If it were not for the presence of IPS_CONFIRMED, __nf_conntrack_find_get() would have skipped the entry. Theory is that we did hit following race: cpu x cpu y cpu z found entry E found entry E E is expired nf_ct_delete() return E to rcu slab init_conntrack E is re-inited, ct->status set to 0 reply tuplehash hnnode.pprev stores hash value. cpu y found E right before it was deleted on cpu x. E is now re-inited on cpu z. cpu y was preempted before checking for expiry and/or confirm bit. ->refcnt set to 1 E now owned by skb ->timeout set to 30000 If cpu y were to resume now, it would observe E as expired but would skip E due to missing CONFIRMED bit. nf_conntrack_confirm gets called sets: ct->status |= CONFIRMED This is wrong: E is not yet added to hashtable. cpu y resumes, it observes E as expired but CONFIRMED: nf_ct_expired() -> yes (ct->timeout is 30s) confirmed bit set. cpu y will try to delete E from the hashtable: nf_ct_delete() -> set DYING bit __nf_ct_delete_from_lists Even this scenario doesn't guarantee a crash: cpu z still holds the table bucket lock(s) so y blocks: wait for spinlock held by z CONFIRMED is set but there is no guarantee ct will be added to hash: "chaintoolong" or "clash resolution" logic both skip the insert step. reply hnnode.pprev still stores the hash value. unlocks spinlock return NF_DROP In case CPU z does insert the entry into the hashtable, cpu y will unlink E again right away but no crash occurs. Without 'cpu y' race, 'garbage' hlist is of no consequence: ct refcnt remains at 1, eventually skb will be free'd and E gets destroyed via: nf_conntrack_put -> nf_conntrack_destroy -> nf_ct_destroy. To resolve this, move the IPS_CONFIRMED assignment after the table insertion but before the unlock. Pablo points out that the confirm-bit-store could be reordered to happen before hlist add resp. the timeout fixup, so switch to set_bit and before_atomic memory barrier to prevent this. It doesn't matter if other CPUs can observe a newly inserted entry right before the CONFIRMED bit was set: Such event cannot be distinguished from above "E is the old incarnation" case: the entry will be skipped. Also change nf_ct_should_gc() to first check the confirmed bit. The gc sequence is: 1. Check if entry has expired, if not skip to next entry 2. Obtain a reference to the expired entry. 3. Call nf_ct_should_gc() to double-check step 1. nf_ct_should_gc() is thus called only for entries that already failed an expiry check. After this patch, once the confirmed bit check passes ct->timeout has been altered to reflect the absolute 'best before' date instead of a relative time. Step 3 will therefore not remove the entry. Without this change to nf_ct_should_gc() we could still get this sequence: 1. Check if entry has expired. 2. Obtain a reference. 3. Call nf_ct_should_gc() to double-check step 1: 4 - entry is still observed as expired 5 - meanwhile, ct->timeout is corrected to absolute value on other CPU and confirm bit gets set 6 - confirm bit is seen 7 - valid entry is removed again First do check 6), then 4) so the gc expiry check always picks up either confirmed bit unset (entry gets skipped) or expiry re-check failure for re-inited conntrack objects. This change cannot be backported to releases before 5.19. Without commit 8a75a2c17410 ("netfilter: conntrack: remove unconfirmed list") |= IPS_CONFIRMED line cannot be moved without further changes. Cc: Razvan Cojocaru Link: https://lore.kernel.org/netfilter-devel/20250627142758.25664-1-fw@strlen.de/ Link: https://lore.kernel.org/netfilter-devel/4239da15-83ff-4ca4-939d-faef283471bb@gmail.com/ Fixes: 1397af5bfd7d ("netfilter: conntrack: remove the percpu dying list") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 3f02a45773e8..ca26274196b9 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -306,8 +306,19 @@ static inline bool nf_ct_is_expired(const struct nf_conn *ct) /* use after obtaining a reference count */ static inline bool nf_ct_should_gc(const struct nf_conn *ct) { - return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && - !nf_ct_is_dying(ct); + if (!nf_ct_is_confirmed(ct)) + return false; + + /* load ct->timeout after is_confirmed() test. + * Pairs with __nf_conntrack_confirm() which: + * 1. Increases ct->timeout value + * 2. Inserts ct into rcu hlist + * 3. Sets the confirmed bit + * 4. Unlocks the hlist lock + */ + smp_acquire__after_ctrl_dep(); + + return nf_ct_is_expired(ct) && !nf_ct_is_dying(ct); } #define NF_CT_DAY (86400 * HZ) -- cgit v1.2.3 From 962fb1f651c2cf2083e0c3ef53ba69e3b96d3fbc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Jul 2025 08:43:42 +0100 Subject: rxrpc: Fix recv-recv race of completed call If a call receives an event (such as incoming data), the call gets placed on the socket's queue and a thread in recvmsg can be awakened to go and process it. Once the thread has picked up the call off of the queue, further events will cause it to be requeued, and once the socket lock is dropped (recvmsg uses call->user_mutex to allow the socket to be used in parallel), a second thread can come in and its recvmsg can pop the call off the socket queue again. In such a case, the first thread will be receiving stuff from the call and the second thread will be blocked on call->user_mutex. The first thread can, at this point, process both the event that it picked call for and the event that the second thread picked the call for and may see the call terminate - in which case the call will be "released", decoupling the call from the user call ID assigned to it (RXRPC_USER_CALL_ID in the control message). The first thread will return okay, but then the second thread will wake up holding the user_mutex and, if it sees that the call has been released by the first thread, it will BUG thusly: kernel BUG at net/rxrpc/recvmsg.c:474! Fix this by just dequeuing the call and ignoring it if it is seen to be already released. We can't tell userspace about it anyway as the user call ID has become stale. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Reported-by: Junvyyang, Tencent Zhuque Lab Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: LePremierHomme cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250717074350.3767366-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 378d2dfc7392..e7dcfb1369b6 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -330,12 +330,15 @@ EM(rxrpc_call_put_userid, "PUT user-id ") \ EM(rxrpc_call_see_accept, "SEE accept ") \ EM(rxrpc_call_see_activate_client, "SEE act-clnt") \ + EM(rxrpc_call_see_already_released, "SEE alrdy-rl") \ EM(rxrpc_call_see_connect_failed, "SEE con-fail") \ EM(rxrpc_call_see_connected, "SEE connect ") \ EM(rxrpc_call_see_conn_abort, "SEE conn-abt") \ + EM(rxrpc_call_see_discard, "SEE discard ") \ EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ + EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \ -- cgit v1.2.3 From 2fd895842d49c23137ae48252dd211e5d6d8a3ed Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Jul 2025 08:43:43 +0100 Subject: rxrpc: Fix notification vs call-release vs recvmsg When a call is released, rxrpc takes the spinlock and removes it from ->recvmsg_q in an effort to prevent racing recvmsg() invocations from seeing the same call. Now, rxrpc_recvmsg() only takes the spinlock when actually removing a call from the queue; it doesn't, however, take it in the lead up to that when it checks to see if the queue is empty. It *does* hold the socket lock, which prevents a recvmsg/recvmsg race - but this doesn't prevent sendmsg from ending the call because sendmsg() drops the socket lock and relies on the call->user_mutex. Fix this by firstly removing the bit in rxrpc_release_call() that dequeues the released call and, instead, rely on recvmsg() to simply discard released calls (done in a preceding fix). Secondly, rxrpc_notify_socket() is abandoned if the call is already marked as released rather than trying to be clever by setting both pointers in call->recvmsg_link to NULL to trick list_empty(). This isn't perfect and can still race, resulting in a released call on the queue, but recvmsg() will now clean that up. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: Junvyyang, Tencent Zhuque Lab cc: LePremierHomme cc: Simon Horman cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20250717074350.3767366-4-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e7dcfb1369b6..de6f6d25767c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -322,10 +322,10 @@ EM(rxrpc_call_put_kernel, "PUT kernel ") \ EM(rxrpc_call_put_poke, "PUT poke ") \ EM(rxrpc_call_put_recvmsg, "PUT recvmsg ") \ + EM(rxrpc_call_put_release_recvmsg_q, "PUT rls-rcmq") \ EM(rxrpc_call_put_release_sock, "PUT rls-sock") \ EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \ EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \ - EM(rxrpc_call_put_unnotify, "PUT unnotify") \ EM(rxrpc_call_put_userid_exists, "PUT u-exists") \ EM(rxrpc_call_put_userid, "PUT user-id ") \ EM(rxrpc_call_see_accept, "SEE accept ") \ @@ -338,6 +338,7 @@ EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ + EM(rxrpc_call_see_notify_released, "SEE nfy-rlsd") \ EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ -- cgit v1.2.3