From 45e1be5ddec98db71e7481fa7a3005673200d85c Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 2 Dec 2025 18:36:20 +0100 Subject: dt-bindings: power: qcom,rpmpd: Add SC8280XP_MXC_AO Not sure how useful it's gonna be in practice, but the definition is missing (unlike the previously-unused SC8280XP_MXC-non-_AO), so add it to allow the driver to create the corresponding pmdomain. Fixes: dbfb5f94e084 ("dt-bindings: power: rpmpd: Add sc8280xp RPMh power-domains") Acked-by: Rob Herring (Arm) Signed-off-by: Konrad Dybcio Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20251202-topic-8280_mxc-v2-1-46cdf47a829e@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/power/qcom,rpmhpd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/power/qcom,rpmhpd.h b/include/dt-bindings/power/qcom,rpmhpd.h index 50e7c886709d..06851363ae0e 100644 --- a/include/dt-bindings/power/qcom,rpmhpd.h +++ b/include/dt-bindings/power/qcom,rpmhpd.h @@ -264,5 +264,6 @@ #define SC8280XP_NSP 13 #define SC8280XP_QPHY 14 #define SC8280XP_XO 15 +#define SC8280XP_MXC_AO 16 #endif -- cgit v1.2.3 From 9910159f06590c17df4fbddedaabb4c0201cc4cb Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 15 Dec 2025 14:17:23 +0100 Subject: iio: core: add separate lockdep class for info_exist_lock When one iio device is a consumer of another, it is possible that the ->info_exist_lock of both ends up being taken when reading the value of the consumer device. Since they currently belong to the same lockdep class (being initialized in a single location with mutex_init()), that results in a lockdep warning CPU0 ---- lock(&iio_dev_opaque->info_exist_lock); lock(&iio_dev_opaque->info_exist_lock); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by sensors/414: #0: c31fd6dc (&p->lock){+.+.}-{3:3}, at: seq_read_iter+0x44/0x4e4 #1: c4f5a1c4 (&of->mutex){+.+.}-{3:3}, at: kernfs_seq_start+0x1c/0xac #2: c2827548 (kn->active#34){.+.+}-{0:0}, at: kernfs_seq_start+0x30/0xac #3: c1dd2b68 (&iio_dev_opaque->info_exist_lock){+.+.}-{3:3}, at: iio_read_channel_processed_scale+0x24/0xd8 stack backtrace: CPU: 0 UID: 0 PID: 414 Comm: sensors Not tainted 6.17.11 #5 NONE Hardware name: Generic AM33XX (Flattened Device Tree) Call trace: unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x44/0x60 dump_stack_lvl from print_deadlock_bug+0x2b8/0x334 print_deadlock_bug from __lock_acquire+0x13a4/0x2ab0 __lock_acquire from lock_acquire+0xd0/0x2c0 lock_acquire from __mutex_lock+0xa0/0xe8c __mutex_lock from mutex_lock_nested+0x1c/0x24 mutex_lock_nested from iio_read_channel_raw+0x20/0x6c iio_read_channel_raw from rescale_read_raw+0x128/0x1c4 rescale_read_raw from iio_channel_read+0xe4/0xf4 iio_channel_read from iio_read_channel_processed_scale+0x6c/0xd8 iio_read_channel_processed_scale from iio_hwmon_read_val+0x68/0xbc iio_hwmon_read_val from dev_attr_show+0x18/0x48 dev_attr_show from sysfs_kf_seq_show+0x80/0x110 sysfs_kf_seq_show from seq_read_iter+0xdc/0x4e4 seq_read_iter from vfs_read+0x238/0x2e4 vfs_read from ksys_read+0x6c/0xec ksys_read from ret_fast_syscall+0x0/0x1c Just as the mlock_key already has its own lockdep class, add a lock_class_key for the info_exist mutex. Note that this has in theory been a problem since before IIO first left staging, but it only occurs when a chain of consumers is in use and that is not often done. Fixes: ac917a81117c ("staging:iio:core set the iio_dev.info pointer to null on unregister under lock.") Signed-off-by: Rasmus Villemoes Reviewed-by: Peter Rosin Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/iio-opaque.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index 4247497f3f8b..b87841a355f8 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -14,6 +14,7 @@ * @mlock: lock used to prevent simultaneous device state changes * @mlock_key: lockdep class for iio_dev lock * @info_exist_lock: lock to prevent use during removal + * @info_exist_key: lockdep class for info_exist lock * @trig_readonly: mark the current trigger immutable * @event_interface: event chrdevs associated with interrupt lines * @attached_buffers: array of buffers statically attached by the driver @@ -47,6 +48,7 @@ struct iio_dev_opaque { struct mutex mlock; struct lock_class_key mlock_key; struct mutex info_exist_lock; + struct lock_class_key info_exist_key; bool trig_readonly; struct iio_event_interface *event_interface; struct iio_buffer **attached_buffers; -- cgit v1.2.3 From 6626734dd2b151753e134730e27d17e64784c345 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2026 15:46:37 +0000 Subject: mm_zone: Generalise has_managed_dma() It would be useful to be able to check for potential DMA pages beyond just ZONE_DMA - generalise the existing has_managed_dma() function to allow checking other zones too. Signed-off-by: Robin Murphy Acked-by: David Hildenbrand (Red Hat) Acked-by: Mike Rapoport (Microsoft) Tested-by: Vladimir Kondratiev Reviewed-by: Baoquan He Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/bd002d2351074e57be1ca08f03f333debac658fb.1768230104.git.robin.murphy@arm.com --- include/linux/mmzone.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 75ef7c9f9307..fc5d6c88d2f0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1648,14 +1648,15 @@ static inline int is_highmem(const struct zone *zone) return is_highmem_idx(zone_idx(zone)); } -#ifdef CONFIG_ZONE_DMA -bool has_managed_dma(void); -#else +bool has_managed_zone(enum zone_type zone); static inline bool has_managed_dma(void) { +#ifdef CONFIG_ZONE_DMA + return has_managed_zone(ZONE_DMA); +#else return false; -} #endif +} #ifndef CONFIG_NUMA -- cgit v1.2.3 From 69c88a6a49cfe1fd6bd5c1166d02a7dd29de9569 Mon Sep 17 00:00:00 2001 From: "Anirudh Rayabharam (Microsoft)" Date: Mon, 5 Jan 2026 12:28:36 +0000 Subject: mshv: add definitions for arm64 gpa intercepts Add definitions required for handling GPA intercepts on arm64. Signed-off-by: Anirudh Rayabharam (Microsoft) Reviewed-by: Stanislav Kinsburskii Signed-off-by: Wei Liu --- include/hyperv/hvhdk.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include') diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h index 469186df7826..08965970c17d 100644 --- a/include/hyperv/hvhdk.h +++ b/include/hyperv/hvhdk.h @@ -800,6 +800,53 @@ struct hv_x64_memory_intercept_message { u8 instruction_bytes[16]; } __packed; +#if IS_ENABLED(CONFIG_ARM64) +union hv_arm64_vp_execution_state { + u16 as_uint16; + struct { + u16 cpl:2; /* Exception Level (EL) */ + u16 debug_active:1; + u16 interruption_pending:1; + u16 vtl:4; + u16 virtualization_fault_active:1; + u16 reserved:7; + } __packed; +}; + +struct hv_arm64_intercept_message_header { + u32 vp_index; + u8 instruction_length; + u8 intercept_access_type; + union hv_arm64_vp_execution_state execution_state; + u64 pc; + u64 cpsr; +} __packed; + +union hv_arm64_memory_access_info { + u8 as_uint8; + struct { + u8 gva_valid:1; + u8 gva_gpa_valid:1; + u8 hypercall_output_pending:1; + u8 reserved:5; + } __packed; +}; + +struct hv_arm64_memory_intercept_message { + struct hv_arm64_intercept_message_header header; + u32 cache_type; /* enum hv_cache_type */ + u8 instruction_byte_count; + union hv_arm64_memory_access_info memory_access_info; + u16 reserved1; + u8 instruction_bytes[4]; + u32 reserved2; + u64 guest_virtual_address; + u64 guest_physical_address; + u64 syndrome; +} __packed; + +#endif /* CONFIG_ARM64 */ + /* * Dispatch state for the VP communicated by the hypervisor to the * VP-dispatching thread in the root on return from HVCALL_DISPATCH_VP. -- cgit v1.2.3 From a995fe1a3aa78b7d06cc1cc7b6b8436c5e93b07f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 7 Jan 2026 11:35:05 +0100 Subject: rust: driver: drop device private data post unbind Currently, the driver's device private data is allocated and initialized from driver core code called from bus abstractions after the driver's probe() callback returned the corresponding initializer. Similarly, the driver's device private data is dropped within the remove() callback of bus abstractions after calling the remove() callback of the corresponding driver. However, commit 6f61a2637abe ("rust: device: introduce Device::drvdata()") introduced an accessor for the driver's device private data for a Device, i.e. a device that is currently bound to a driver. Obviously, this is in conflict with dropping the driver's device private data in remove(), since a device can not be considered to be fully unbound after remove() has finished: We also have to consider registrations guarded by devres - such as IRQ or class device registrations - which are torn down after remove() in devres_release_all(). Thus, it can happen that, for instance, a class device or IRQ callback still calls Device::drvdata(), which then runs concurrently to remove() (which sets dev->driver_data to NULL and drops the driver's device private data), before devres_release_all() started to tear down the corresponding registration. This is because devres guarded registrations can, as expected, access the corresponding Device that defines their scope. In C it simply is the driver's responsibility to ensure that its device private data is freed after e.g. an IRQ registration is unregistered. Typically, C drivers achieve this by allocating their device private data with e.g. devm_kzalloc() before doing anything else, i.e. before e.g. registering an IRQ with devm_request_threaded_irq(), relying on the reverse order cleanup of devres. Technically, we could do something similar in Rust. However, the resulting code would be pretty messy: In Rust we have to differentiate between allocated but uninitialized memory and initialized memory in the type system. Thus, we would need to somehow keep track of whether the driver's device private data object has been initialized (i.e. probe() was successful and returned a valid initializer for this memory) and conditionally call the destructor of the corresponding object when it is freed. This is because we'd need to allocate and register the memory of the driver's device private data *before* it is initialized by the initializer returned by the driver's probe() callback, because the driver could already register devres guarded registrations within probe() outside of the driver's device private data initializer. Luckily there is a much simpler solution: Instead of dropping the driver's device private data at the end of remove(), we just drop it after the device has been fully unbound, i.e. after all devres callbacks have been processed. For this, we introduce a new post_unbind() callback private to the driver-core, i.e. the callback is neither exposed to drivers, nor to bus abstractions. This way, the driver-core code can simply continue to conditionally allocate the memory for the driver's device private data when the driver's initializer is returned from probe() - no change needed - and drop it when the driver-core code receives the post_unbind() callback. Closes: https://lore.kernel.org/all/DEZMS6Y4A7XE.XE7EUBT5SJFJ@kernel.org/ Fixes: 6f61a2637abe ("rust: device: introduce Device::drvdata()") Acked-by: Alice Ryhl Acked-by: Greg Kroah-Hartman Acked-by: Igor Korotin Link: https://patch.msgid.link/20260107103511.570525-7-dakr@kernel.org [ Remove #ifdef CONFIG_RUST, rename post_unbind() to post_unbind_rust(). - Danilo] Signed-off-by: Danilo Krummrich --- include/linux/device/driver.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index cd8e0f0a634b..bbc67ec513ed 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -85,6 +85,8 @@ enum probe_type { * uevent. * @p: Driver core's private data, no one other than the driver * core can touch this. + * @p_cb: Callbacks private to the driver core; no one other than the + * driver core is allowed to touch this. * * The device driver-model tracks all of the drivers known to the system. * The main reason for this tracking is to enable the driver core to match @@ -119,6 +121,13 @@ struct device_driver { void (*coredump) (struct device *dev); struct driver_private *p; + struct { + /* + * Called after remove() and after all devres entries have been + * processed. This is a Rust only callback. + */ + void (*post_unbind_rust)(struct device *dev); + } p_cb; }; -- cgit v1.2.3 From 10d28cffb3f6ec7ad67f0a4cd32c2afa92909452 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 3 Dec 2025 16:24:38 +0000 Subject: comedi: Fix getting range information for subdevices 16 to 255 The `COMEDI_RANGEINFO` ioctl does not work properly for subdevice indices above 15. Currently, the only in-tree COMEDI drivers that support more than 16 subdevices are the "8255" driver and the "comedi_bond" driver. Making the ioctl work for subdevice indices up to 255 is achievable. It needs minor changes to the handling of the `COMEDI_RANGEINFO` and `COMEDI_CHANINFO` ioctls that should be mostly harmless to user-space, apart from making them less broken. Details follow... The `COMEDI_RANGEINFO` ioctl command gets the list of supported ranges (usually with units of volts or milliamps) for a COMEDI subdevice or channel. (Only some subdevices have per-channel range tables, indicated by the `SDF_RANGETYPE` flag in the subdevice information.) It uses a `range_type` value and a user-space pointer, both supplied by user-space, but the `range_type` value should match what was obtained using the `COMEDI_CHANINFO` ioctl (if the subdevice has per-channel range tables) or `COMEDI_SUBDINFO` ioctl (if the subdevice uses a single range table for all channels). Bits 15 to 0 of the `range_type` value contain the length of the range table, which is the only part that user-space should care about (so it can use a suitably sized buffer to fetch the range table). Bits 23 to 16 store the channel index, which is assumed to be no more than 255 if the subdevice has per-channel range tables, and is set to 0 if the subdevice has a single range table. For `range_type` values produced by the `COMEDI_SUBDINFO` ioctl, bits 31 to 24 contain the subdevice index, which is assumed to be no more than 255. But for `range_type` values produced by the `COMEDI_CHANINFO` ioctl, bits 27 to 24 contain the subdevice index, which is assumed to be no more than 15, and bits 31 to 28 contain the COMEDI device's minor device number for some unknown reason lost in the mists of time. The `COMEDI_RANGEINFO` ioctl extract the length from bits 15 to 0 of the user-supplied `range_type` value, extracts the channel index from bits 23 to 16 (only used if the subdevice has per-channel range tables), extracts the subdevice index from bits 27 to 24, and ignores bits 31 to 28. So for subdevice indices 16 to 255, the `COMEDI_SUBDINFO` or `COMEDI_CHANINFO` ioctl will report a `range_type` value that doesn't work with the `COMEDI_RANGEINFO` ioctl. It will either get the range table for the subdevice index modulo 16, or will fail with `-EINVAL`. To fix this, always use bits 31 to 24 of the `range_type` value to hold the subdevice index (assumed to be no more than 255). This affects the `COMEDI_CHANINFO` and `COMEDI_RANGEINFO` ioctls. There should not be anything in user-space that depends on the old, broken usage, although it may now see different values in bits 31 to 28 of the `range_type` values reported by the `COMEDI_CHANINFO` ioctl for subdevices that have per-channel subdevices. User-space should not be trying to decode bits 31 to 16 of the `range_type` values anyway. Fixes: ed9eccbe8970 ("Staging: add comedi core") Cc: stable@vger.kernel.org #5.17+ Signed-off-by: Ian Abbott Link: https://patch.msgid.link/20251203162438.176841-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/comedi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/comedi.h b/include/uapi/linux/comedi.h index 7314e5ee0a1e..798ec9a39e12 100644 --- a/include/uapi/linux/comedi.h +++ b/include/uapi/linux/comedi.h @@ -640,7 +640,7 @@ struct comedi_chaninfo { /** * struct comedi_rangeinfo - used to retrieve the range table for a channel - * @range_type: Encodes subdevice index (bits 27:24), channel index + * @range_type: Encodes subdevice index (bits 31:24), channel index * (bits 23:16) and range table length (bits 15:0). * @range_ptr: Pointer to array of @struct comedi_krange to be filled * in with the range table for the channel or subdevice. -- cgit v1.2.3 From 2c28769a51deb6022d7fbd499987e237a01dd63a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 14 Jan 2026 22:03:23 +0000 Subject: rxrpc: Fix recvmsg() unconditional requeue If rxrpc_recvmsg() fails because MSG_DONTWAIT was specified but the call at the front of the recvmsg queue already has its mutex locked, it requeues the call - whether or not the call is already queued. The call may be on the queue because MSG_PEEK was also passed and so the call was not dequeued or because the I/O thread requeued it. The unconditional requeue may then corrupt the recvmsg queue, leading to things like UAFs or refcount underruns. Fix this by only requeuing the call if it isn't already on the queue - and moving it to the front if it is already queued. If we don't queue it, we have to put the ref we obtained by dequeuing it. Also, MSG_PEEK doesn't dequeue the call so shouldn't call rxrpc_notify_socket() for the call if we didn't use up all the data on the queue, so fix that also. Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg") Reported-by: Faith Reported-by: Pumpkin Chang Signed-off-by: David Howells Acked-by: Marc Dionne cc: Nir Ohfeld cc: Willy Tarreau cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/95163.1768428203@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index de6f6d25767c..869f97c9bf73 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -322,6 +322,7 @@ EM(rxrpc_call_put_kernel, "PUT kernel ") \ EM(rxrpc_call_put_poke, "PUT poke ") \ EM(rxrpc_call_put_recvmsg, "PUT recvmsg ") \ + EM(rxrpc_call_put_recvmsg_peek_nowait, "PUT peek-nwt") \ EM(rxrpc_call_put_release_recvmsg_q, "PUT rls-rcmq") \ EM(rxrpc_call_put_release_sock, "PUT rls-sock") \ EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \ @@ -340,6 +341,9 @@ EM(rxrpc_call_see_input, "SEE input ") \ EM(rxrpc_call_see_notify_released, "SEE nfy-rlsd") \ EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \ + EM(rxrpc_call_see_recvmsg_requeue, "SEE recv-rqu") \ + EM(rxrpc_call_see_recvmsg_requeue_first, "SEE recv-rqF") \ + EM(rxrpc_call_see_recvmsg_requeue_move, "SEE recv-rqM") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \ -- cgit v1.2.3 From 6ac433f8b2590b09ca00863d218665729ac985f7 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 24 Dec 2025 12:33:57 -0500 Subject: mm: rename cpu_bitmap field to flexible_array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cpu_bitmap flexible array now contains more than just the cpu_bitmap. In preparation for changing the static mm_struct definitions to cover for the additional space required, change the cpu_bitmap type from "unsigned long" to "char", require an unsigned long alignment of the flexible array, and rename the field from "cpu_bitmap" to "flexible_array". Introduce the MM_STRUCT_FLEXIBLE_ARRAY_INIT macro to statically initialize the flexible array. This covers the init_mm and efi_mm static definitions. This is a preparation step for fixing the missing mm_cid size for static mm_struct definitions. Link: https://lkml.kernel.org/r/20251224173358.647691-3-mathieu.desnoyers@efficios.com Fixes: af7f588d8f73 ("sched: Introduce per-memory-map concurrency ID") Signed-off-by: Mathieu Desnoyers Reviewed-by: Thomas Gleixner Cc: Mark Brown Cc: Aboorva Devarajan Cc: Al Viro Cc: Baolin Wang Cc: Christan König Cc: Christian Brauner Cc: Christoph Lameter Cc: David Hildenbrand Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: "Liam R . Howlett" Cc: Lorenzo Stoakes Cc: Martin Liu Cc: Masami Hiramatsu Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Michal Hocko Cc: Mike Rapoport Cc: "Paul E. McKenney" Cc: Roman Gushchin Cc: SeongJae Park Cc: Shakeel Butt Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Sweet Tea Dorminy Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wei Yang Cc: Yu Zhao Cc: Peter Zijlstra (Intel) Cc: Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 42af2292951d..110b319a2ffb 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1329,7 +1329,7 @@ struct mm_struct { * The mm_cpumask needs to be at the end of mm_struct, because it * is dynamically sized based on nr_cpu_ids. */ - unsigned long cpu_bitmap[]; + char flexible_array[] __aligned(__alignof__(unsigned long)); }; /* Copy value to the first system word of mm flags, non-atomically. */ @@ -1366,19 +1366,24 @@ static inline void __mm_flags_set_mask_bits_word(struct mm_struct *mm, MT_FLAGS_USE_RCU) extern struct mm_struct init_mm; +#define MM_STRUCT_FLEXIBLE_ARRAY_INIT \ +{ \ + [0 ... sizeof(cpumask_t)-1] = 0 \ +} + /* Pointer magic because the dynamic array size confuses some compilers. */ static inline void mm_init_cpumask(struct mm_struct *mm) { unsigned long cpu_bitmap = (unsigned long)mm; - cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap); + cpu_bitmap += offsetof(struct mm_struct, flexible_array); cpumask_clear((struct cpumask *)cpu_bitmap); } /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) { - return (struct cpumask *)&mm->cpu_bitmap; + return (struct cpumask *)&mm->flexible_array; } #ifdef CONFIG_LRU_GEN @@ -1469,7 +1474,7 @@ static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm) { unsigned long bitmap = (unsigned long)mm; - bitmap += offsetof(struct mm_struct, cpu_bitmap); + bitmap += offsetof(struct mm_struct, flexible_array); /* Skip cpu_bitmap */ bitmap += cpumask_size(); return (struct cpumask *)bitmap; -- cgit v1.2.3 From be31340a4cc259340044b7fc4f7e97f58c74ee8e Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 24 Dec 2025 12:33:58 -0500 Subject: mm: take into account mm_cid size for mm_struct static definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both init_mm and efi_mm static definitions need to make room for the 2 mm_cid cpumasks. This fixes possible out-of-bounds accesses to init_mm and efi_mm. Add a space between # and define for the mm_alloc_cid() definition to make it consistent with the coding style used in the rest of this header file. Link: https://lkml.kernel.org/r/20251224173358.647691-4-mathieu.desnoyers@efficios.com Fixes: af7f588d8f73 ("sched: Introduce per-memory-map concurrency ID") Signed-off-by: Mathieu Desnoyers Reviewed-by: Thomas Gleixner Cc: Mark Brown Cc: Aboorva Devarajan Cc: Al Viro Cc: Baolin Wang Cc: Christan König Cc: Christian Brauner Cc: Christoph Lameter Cc: David Hildenbrand Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: "Liam R . Howlett" Cc: Lorenzo Stoakes Cc: Martin Liu Cc: Masami Hiramatsu Cc: Mateusz Guzik Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Michal Hocko Cc: Mike Rapoport Cc: "Paul E. McKenney" Cc: Roman Gushchin Cc: SeongJae Park Cc: Shakeel Butt Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Sweet Tea Dorminy Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wei Yang Cc: Yu Zhao Cc: Peter Zijlstra (Intel) Cc: Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 110b319a2ffb..aa4639888f89 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1368,7 +1368,7 @@ extern struct mm_struct init_mm; #define MM_STRUCT_FLEXIBLE_ARRAY_INIT \ { \ - [0 ... sizeof(cpumask_t)-1] = 0 \ + [0 ... sizeof(cpumask_t) + MM_CID_STATIC_SIZE - 1] = 0 \ } /* Pointer magic because the dynamic array size confuses some compilers. */ @@ -1500,7 +1500,7 @@ static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct * mm_init_cid(mm, p); return 0; } -#define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__)) +# define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__)) static inline void mm_destroy_cid(struct mm_struct *mm) { @@ -1514,6 +1514,8 @@ static inline unsigned int mm_cid_size(void) return cpumask_size() + bitmap_size(num_possible_cpus()); } +/* Use 2 * NR_CPUS as worse case for static allocation. */ +# define MM_CID_STATIC_SIZE (2 * sizeof(cpumask_t)) #else /* CONFIG_SCHED_MM_CID */ static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { } static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; } @@ -1522,6 +1524,7 @@ static inline unsigned int mm_cid_size(void) { return 0; } +# define MM_CID_STATIC_SIZE 0 #endif /* CONFIG_SCHED_MM_CID */ struct mmu_gather; -- cgit v1.2.3 From f9a49aa302a05e91ca01f69031cb79a0ea33031f Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 5 Jan 2026 13:17:27 -0800 Subject: fs/writeback: skip AS_NO_DATA_INTEGRITY mappings in wait_sb_inodes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Above the while() loop in wait_sb_inodes(), we document that we must wait for all pages under writeback for data integrity. Consequently, if a mapping, like fuse, traditionally does not have data integrity semantics, there is no need to wait at all; we can simply skip these inodes. This restores fuse back to prior behavior where syncs are no-ops. This fixes a user regression where if a system is running a faulty fuse server that does not reply to issued write requests, this causes wait_sb_inodes() to wait forever. Link: https://lkml.kernel.org/r/20260105211737.4105620-2-joannelkoong@gmail.com Fixes: 0c58a97f919c ("fuse: remove tmp folio for writebacks and internal rb tree") Signed-off-by: Joanne Koong Reported-by: Athul Krishna Reported-by: J. Neuschäfer Reviewed-by: Bernd Schubert Tested-by: J. Neuschäfer Cc: Alexander Viro Cc: Bernd Schubert Cc: Bonaccorso Salvatore Cc: Christian Brauner Cc: David Hildenbrand Cc: Jan Kara Cc: "Liam R. Howlett" Cc: Lorenzo Stoakes Cc: "Matthew Wilcox (Oracle)" Cc: Michal Hocko Cc: Mike Rapoport Cc: Miklos Szeredi Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 31a848485ad9..ec442af3f886 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -210,6 +210,7 @@ enum mapping_flags { AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't account usage to user cgroups */ + AS_NO_DATA_INTEGRITY = 11, /* no data integrity guarantees */ /* Bits 16-25 are used for FOLIO_ORDER */ AS_FOLIO_ORDER_BITS = 5, AS_FOLIO_ORDER_MIN = 16, @@ -345,6 +346,16 @@ static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct addres return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); } +static inline void mapping_set_no_data_integrity(struct address_space *mapping) +{ + set_bit(AS_NO_DATA_INTEGRITY, &mapping->flags); +} + +static inline bool mapping_no_data_integrity(const struct address_space *mapping) +{ + return test_bit(AS_NO_DATA_INTEGRITY, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(const struct address_space *mapping) { return mapping->gfp_mask; -- cgit v1.2.3 From 50b359896fe55d0443ed550e1fabba71d242031a Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 18 Jan 2026 09:51:15 +0200 Subject: wifi: cfg80211: ignore link disabled flag from userspace When the AP has an advertised TID to Link Mapping (TTLM) it shall include the element in the association response. As such, when this element is present it needs to be used for the currently dormant links. See Draft P802.11REVmf_D1.0 section 35.3.7.2.3 ("Negotiation of TTLM") for the details. The flag is also not usable in case userspace wants to specify a negotiated TTLM during association. Note that for the link reconfiguration case, mac80211 did not use the information. Draft P802.11REVmf_D1.0 states in section 35.3.6.4 ("Link reconfiguration to the setup links) that we "shall operate with all the TIDs mapped to the newly added links ..." All this means that the flag is not needed. The implementation should parse the information from the association response. Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260118093904.754e057896a5.Ifd06f5ef839a93bfd54d0593dc932870f95f3242@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 --- include/uapi/linux/nl80211.h | 5 +++-- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 899f267b7cf9..2900202588a5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3221,8 +3221,6 @@ struct cfg80211_auth_request { * if this is %NULL for a link, that link is not requested * @elems: extra elements for the per-STA profile for this link * @elems_len: length of the elements - * @disabled: If set this link should be included during association etc. but it - * should not be used until enabled by the AP MLD. * @error: per-link error code, must be <= 0. If there is an error, then the * operation as a whole must fail. */ @@ -3230,7 +3228,6 @@ struct cfg80211_assoc_link { struct cfg80211_bss *bss; const u8 *elems; size_t elems_len; - bool disabled; int error; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8134f10e4e6c..8433bac48112 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2880,8 +2880,9 @@ enum nl80211_commands { * index. If the userspace includes more RNR elements than number of * MBSSID elements then these will be added in every EMA beacon. * - * @NL80211_ATTR_MLO_LINK_DISABLED: Flag attribute indicating that the link is - * disabled. + * @NL80211_ATTR_MLO_LINK_DISABLED: Unused. It was used to indicate that a link + * is disabled during association. However, the AP will send the + * information by including a TTLM in the association response. * * @NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA: Include BSS usage data, i.e. * include BSSes that can only be used in restricted scenarios and/or -- cgit v1.2.3 From ca1a47cd3f5f4c46ca188b1c9a27af87d1ab2216 Mon Sep 17 00:00:00 2001 From: "David Hildenbrand (Red Hat)" Date: Tue, 23 Dec 2025 22:40:34 +0100 Subject: mm/hugetlb: fix hugetlb_pmd_shared() Patch series "mm/hugetlb: fixes for PMD table sharing (incl. using mmu_gather)", v3. One functional fix, one performance regression fix, and two related comment fixes. I cleaned up my prototype I recently shared [1] for the performance fix, deferring most of the cleanups I had in the prototype to a later point. While doing that I identified the other things. The goal of this patch set is to be backported to stable trees "fairly" easily. At least patch #1 and #4. Patch #1 fixes hugetlb_pmd_shared() not detecting any sharing Patch #2 + #3 are simple comment fixes that patch #4 interacts with. Patch #4 is a fix for the reported performance regression due to excessive IPI broadcasts during fork()+exit(). The last patch is all about TLB flushes, IPIs and mmu_gather. Read: complicated There are plenty of cleanups in the future to be had + one reasonable optimization on x86. But that's all out of scope for this series. Runtime tested, with a focus on fixing the performance regression using the original reproducer [2] on x86. This patch (of 4): We switched from (wrongly) using the page count to an independent shared count. Now, shared page tables have a refcount of 1 (excluding speculative references) and instead use ptdesc->pt_share_count to identify sharing. We didn't convert hugetlb_pmd_shared(), so right now, we would never detect a shared PMD table as such, because sharing/unsharing no longer touches the refcount of a PMD table. Page migration, like mbind() or migrate_pages() would allow for migrating folios mapped into such shared PMD tables, even though the folios are not exclusive. In smaps we would account them as "private" although they are "shared", and we would be wrongly setting the PM_MMAP_EXCLUSIVE in the pagemap interface. Fix it by properly using ptdesc_pmd_is_shared() in hugetlb_pmd_shared(). Link: https://lkml.kernel.org/r/20251223214037.580860-1-david@kernel.org Link: https://lkml.kernel.org/r/20251223214037.580860-2-david@kernel.org Link: https://lore.kernel.org/all/8cab934d-4a56-44aa-b641-bfd7e23bd673@kernel.org/ [1] Link: https://lore.kernel.org/all/8cab934d-4a56-44aa-b641-bfd7e23bd673@kernel.org/ [2] Fixes: 59d9094df3d7 ("mm: hugetlb: independent PMD page table shared count") Signed-off-by: David Hildenbrand (Red Hat) Reviewed-by: Rik van Riel Reviewed-by: Lance Yang Tested-by: Lance Yang Reviewed-by: Harry Yoo Tested-by: Laurence Oberman Reviewed-by: Lorenzo Stoakes Acked-by: Oscar Salvador Cc: Liu Shixin Cc: Uschakow, Stanislav" Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 019a1c5281e4..03c8725efa28 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1326,7 +1326,7 @@ static inline __init void hugetlb_cma_reserve(int order) #ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING static inline bool hugetlb_pmd_shared(pte_t *pte) { - return page_count(virt_to_page(pte)) > 1; + return ptdesc_pmd_is_shared(virt_to_ptdesc(pte)); } #else static inline bool hugetlb_pmd_shared(pte_t *pte) -- cgit v1.2.3 From 8ce720d5bd91e9dc16db3604aa4b1bf76770a9a1 Mon Sep 17 00:00:00 2001 From: "David Hildenbrand (Red Hat)" Date: Tue, 23 Dec 2025 22:40:37 +0100 Subject: mm/hugetlb: fix excessive IPI broadcasts when unsharing PMD tables using mmu_gather As reported, ever since commit 1013af4f585f ("mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race") we can end up in some situations where we perform so many IPI broadcasts when unsharing hugetlb PMD page tables that it severely regresses some workloads. In particular, when we fork()+exit(), or when we munmap() a large area backed by many shared PMD tables, we perform one IPI broadcast per unshared PMD table. There are two optimizations to be had: (1) When we process (unshare) multiple such PMD tables, such as during exit(), it is sufficient to send a single IPI broadcast (as long as we respect locking rules) instead of one per PMD table. Locking prevents that any of these PMD tables could get reused before we drop the lock. (2) When we are not the last sharer (> 2 users including us), there is no need to send the IPI broadcast. The shared PMD tables cannot become exclusive (fully unshared) before an IPI will be broadcasted by the last sharer. Concurrent GUP-fast could walk into a PMD table just before we unshared it. It could then succeed in grabbing a page from the shared page table even after munmap() etc succeeded (and supressed an IPI). But there is not difference compared to GUP-fast just sleeping for a while after grabbing the page and re-enabling IRQs. Most importantly, GUP-fast will never walk into page tables that are no-longer shared, because the last sharer will issue an IPI broadcast. (if ever required, checking whether the PUD changed in GUP-fast after grabbing the page like we do in the PTE case could handle this) So let's rework PMD sharing TLB flushing + IPI sync to use the mmu_gather infrastructure so we can implement these optimizations and demystify the code at least a bit. Extend the mmu_gather infrastructure to be able to deal with our special hugetlb PMD table sharing implementation. To make initialization of the mmu_gather easier when working on a single VMA (in particular, when dealing with hugetlb), provide tlb_gather_mmu_vma(). We'll consolidate the handling for (full) unsharing of PMD tables in tlb_unshare_pmd_ptdesc() and tlb_flush_unshared_tables(), and track in "struct mmu_gather" whether we had (full) unsharing of PMD tables. Because locking is very special (concurrent unsharing+reuse must be prevented), we disallow deferring flushing to tlb_finish_mmu() and instead require an explicit earlier call to tlb_flush_unshared_tables(). From hugetlb code, we call huge_pmd_unshare_flush() where we make sure that the expected lock protecting us from concurrent unsharing+reuse is still held. Check with a VM_WARN_ON_ONCE() in tlb_finish_mmu() that tlb_flush_unshared_tables() was properly called earlier. Document it all properly. Notes about tlb_remove_table_sync_one() interaction with unsharing: There are two fairly tricky things: (1) tlb_remove_table_sync_one() is a NOP on architectures without CONFIG_MMU_GATHER_RCU_TABLE_FREE. Here, the assumption is that the previous TLB flush would send an IPI to all relevant CPUs. Careful: some architectures like x86 only send IPIs to all relevant CPUs when tlb->freed_tables is set. The relevant architectures should be selecting MMU_GATHER_RCU_TABLE_FREE, but x86 might not do that in stable kernels and it might have been problematic before this patch. Also, the arch flushing behavior (independent of IPIs) is different when tlb->freed_tables is set. Do we have to enlighten them to also take care of tlb->unshared_tables? So far we didn't care, so hopefully we are fine. Of course, we could be setting tlb->freed_tables as well, but that might then unnecessarily flush too much, because the semantics of tlb->freed_tables are a bit fuzzy. This patch changes nothing in this regard. (2) tlb_remove_table_sync_one() is not a NOP on architectures with CONFIG_MMU_GATHER_RCU_TABLE_FREE that actually don't need a sync. Take x86 as an example: in the common case (!pv, !X86_FEATURE_INVLPGB) we still issue IPIs during TLB flushes and don't actually need the second tlb_remove_table_sync_one(). This optimized can be implemented on top of this, by checking e.g., in tlb_remove_table_sync_one() whether we really need IPIs. But as described in (1), it really must honor tlb->freed_tables then to send IPIs to all relevant CPUs. Notes on TLB flushing changes: (1) Flushing for non-shared PMD tables We're converting from flush_hugetlb_tlb_range() to tlb_remove_huge_tlb_entry(). Given that we properly initialize the MMU gather in tlb_gather_mmu_vma() to be hugetlb aware, similar to __unmap_hugepage_range(), that should be fine. (2) Flushing for shared PMD tables We're converting from various things (flush_hugetlb_tlb_range(), tlb_flush_pmd_range(), flush_tlb_range()) to tlb_flush_pmd_range(). tlb_flush_pmd_range() achieves the same that tlb_remove_huge_tlb_entry() would achieve in these scenarios. Note that tlb_remove_huge_tlb_entry() also calls __tlb_remove_tlb_entry(), however that is only implemented on powerpc, which does not support PMD table sharing. Similar to (1), tlb_gather_mmu_vma() should make sure that TLB flushing keeps on working as expected. Further, note that the ptdesc_pmd_pts_dec() in huge_pmd_share() is not a concern, as we are holding the i_mmap_lock the whole time, preventing concurrent unsharing. That ptdesc_pmd_pts_dec() usage will be removed separately as a cleanup later. There are plenty more cleanups to be had, but they have to wait until this is fixed. [david@kernel.org: fix kerneldoc] Link: https://lkml.kernel.org/r/f223dd74-331c-412d-93fc-69e360a5006c@kernel.org Link: https://lkml.kernel.org/r/20251223214037.580860-5-david@kernel.org Fixes: 1013af4f585f ("mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race") Signed-off-by: David Hildenbrand (Red Hat) Reported-by: Uschakow, Stanislav" Closes: https://lore.kernel.org/all/4d3878531c76479d9f8ca9789dc6485d@amazon.de/ Tested-by: Laurence Oberman Acked-by: Harry Yoo Reviewed-by: Lorenzo Stoakes Cc: Lance Yang Cc: Liu Shixin Cc: Oscar Salvador Cc: Rik van Riel Cc: Signed-off-by: Andrew Morton --- include/asm-generic/tlb.h | 77 +++++++++++++++++++++++++++++++++++++++++++++-- include/linux/hugetlb.h | 15 ++++++--- include/linux/mm_types.h | 1 + 3 files changed, 86 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 1fff717cae51..4d679d2a206b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -46,7 +46,8 @@ * * The mmu_gather API consists of: * - * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu() + * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_gather_mmu_vma() / + * tlb_finish_mmu() * * start and finish a mmu_gather * @@ -364,6 +365,20 @@ struct mmu_gather { unsigned int vma_huge : 1; unsigned int vma_pfn : 1; + /* + * Did we unshare (unmap) any shared page tables? For now only + * used for hugetlb PMD table sharing. + */ + unsigned int unshared_tables : 1; + + /* + * Did we unshare any page tables such that they are now exclusive + * and could get reused+modified by the new owner? When setting this + * flag, "unshared_tables" will be set as well. For now only used + * for hugetlb PMD table sharing. + */ + unsigned int fully_unshared_tables : 1; + unsigned int batch_count; #ifndef CONFIG_MMU_GATHER_NO_GATHER @@ -400,6 +415,7 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) tlb->cleared_pmds = 0; tlb->cleared_puds = 0; tlb->cleared_p4ds = 0; + tlb->unshared_tables = 0; /* * Do not reset mmu_gather::vma_* fields here, we do not * call into tlb_start_vma() again to set them if there is an @@ -484,7 +500,7 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) * these bits. */ if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds || - tlb->cleared_puds || tlb->cleared_p4ds)) + tlb->cleared_puds || tlb->cleared_p4ds || tlb->unshared_tables)) return; tlb_flush(tlb); @@ -773,6 +789,63 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) } #endif +#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING +static inline void tlb_unshare_pmd_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt, + unsigned long addr) +{ + /* + * The caller must make sure that concurrent unsharing + exclusive + * reuse is impossible until tlb_flush_unshared_tables() was called. + */ + VM_WARN_ON_ONCE(!ptdesc_pmd_is_shared(pt)); + ptdesc_pmd_pts_dec(pt); + + /* Clearing a PUD pointing at a PMD table with PMD leaves. */ + tlb_flush_pmd_range(tlb, addr & PUD_MASK, PUD_SIZE); + + /* + * If the page table is now exclusively owned, we fully unshared + * a page table. + */ + if (!ptdesc_pmd_is_shared(pt)) + tlb->fully_unshared_tables = true; + tlb->unshared_tables = true; +} + +static inline void tlb_flush_unshared_tables(struct mmu_gather *tlb) +{ + /* + * As soon as the caller drops locks to allow for reuse of + * previously-shared tables, these tables could get modified and + * even reused outside of hugetlb context, so we have to make sure that + * any page table walkers (incl. TLB, GUP-fast) are aware of that + * change. + * + * Even if we are not fully unsharing a PMD table, we must + * flush the TLB for the unsharer now. + */ + if (tlb->unshared_tables) + tlb_flush_mmu_tlbonly(tlb); + + /* + * Similarly, we must make sure that concurrent GUP-fast will not + * walk previously-shared page tables that are getting modified+reused + * elsewhere. So broadcast an IPI to wait for any concurrent GUP-fast. + * + * We only perform this when we are the last sharer of a page table, + * as the IPI will reach all CPUs: any GUP-fast. + * + * Note that on configs where tlb_remove_table_sync_one() is a NOP, + * the expectation is that the tlb_flush_mmu_tlbonly() would have issued + * required IPIs already for us. + */ + if (tlb->fully_unshared_tables) { + tlb_remove_table_sync_one(); + tlb->fully_unshared_tables = false; + } +} +#endif /* CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING */ + #endif /* CONFIG_MMU */ #endif /* _ASM_GENERIC__TLB_H */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 03c8725efa28..e51b8ef0cebd 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -240,8 +240,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); unsigned long hugetlb_mask_last_page(struct hstate *h); -int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep); +int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); +void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); @@ -300,13 +301,17 @@ static inline struct address_space *hugetlb_folio_mapping_lock_write( return NULL; } -static inline int huge_pmd_unshare(struct mm_struct *mm, - struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +static inline int huge_pmd_unshare(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { return 0; } +static inline void huge_pmd_unshare_flush(struct mmu_gather *tlb, + struct vm_area_struct *vma) +{ +} + static inline void adjust_range_if_pmd_sharing_possible( struct vm_area_struct *vma, unsigned long *start, unsigned long *end) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index aa4639888f89..78950eb8926d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1530,6 +1530,7 @@ static inline unsigned int mm_cid_size(void) struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); +void tlb_gather_mmu_vma(struct mmu_gather *tlb, struct vm_area_struct *vma); extern void tlb_finish_mmu(struct mmu_gather *tlb); struct vm_fault; -- cgit v1.2.3 From 35e247032606f06c2f19d90a6562bc315206b7a7 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 14 Jan 2026 11:00:06 +0000 Subject: mm: do not copy page tables unnecessarily for VM_UFFD_WP Commit ab04b530e7e8 ("mm: introduce copy-on-fork VMAs and make VM_MAYBE_GUARD one") aggregates flags checks in vma_needs_copy(), including VM_UFFD_WP. However in doing so, it incorrectly performed this check against src_vma. This check was done on the assumption that all relevant flags are copied upon fork. However the userfaultfd logic is very innovative in that it implements custom logic on fork in dup_userfaultfd(), including a rather well hidden case where lacking UFFD_FEATURE_EVENT_FORK causes VM_UFFD_WP to not be propagated to the destination VMA. And indeed, vma_needs_copy(), prior to this patch, did check this property on dst_vma, not src_vma. Since all the other relevant flags are copied on fork, we can simply fix this by checking against dst_vma. While we're here, we fix a comment against VM_COPY_ON_FORK (noting that it did indeed already reference dst_vma) to make it abundantly clear that we must check against the destination VMA. Link: https://lkml.kernel.org/r/20260114110006.1047071-1-lorenzo.stoakes@oracle.com Fixes: ab04b530e7e8 ("mm: introduce copy-on-fork VMAs and make VM_MAYBE_GUARD one") Signed-off-by: Lorenzo Stoakes Reported-by: Chris Mason Closes: https://lore.kernel.org/all/20260113231257.3002271-1-clm@meta.com/ Acked-by: David Hildenbrand (Red Hat) Acked-by: Pedro Falcato Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6f959d8ca4b4..f0d5be9dc736 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -608,7 +608,11 @@ enum { /* * Flags which should result in page tables being copied on fork. These are * flags which indicate that the VMA maps page tables which cannot be - * reconsistuted upon page fault, so necessitate page table copying upon + * reconsistuted upon page fault, so necessitate page table copying upon fork. + * + * Note that these flags should be compared with the DESTINATION VMA not the + * source, as VM_UFFD_WP may not be propagated to destination, while all other + * flags will be. * * VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be * reasonably reconstructed on page fault. -- cgit v1.2.3 From f5f2bad67a45cd1ef6f5b727da104694a81b3666 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jan 2026 08:31:49 +0100 Subject: block: make the new blkzoned UAPI constants discoverable The Linux 6.19 merge window added the new BLKREPORTZONESV2 ioctl, and with it the new BLK_ZONE_REP_CACHED and BLK_ZONE_COND_ACTIVE constants. The two constants are defined as part of enums, which makes it very painful for userspace to discover if they are present in the installed system headers. Use the #define to the same name trick to make them trivially discoverable using CPP directives. Fixes: 0bf0e2e46668 ("block: track zone conditions") Fixes: b30ffcdc0c15 ("block: introduce BLKREPORTZONESV2 ioctl") Reported-by: Andrey Albershteyn Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blkzoned.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index e33f02703350..663836120966 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -81,7 +81,8 @@ enum blk_zone_cond { BLK_ZONE_COND_FULL = 0xE, BLK_ZONE_COND_OFFLINE = 0xF, - BLK_ZONE_COND_ACTIVE = 0xFF, + BLK_ZONE_COND_ACTIVE = 0xFF, /* added in Linux 6.19 */ +#define BLK_ZONE_COND_ACTIVE BLK_ZONE_COND_ACTIVE }; /** @@ -100,7 +101,8 @@ enum blk_zone_report_flags { BLK_ZONE_REP_CAPACITY = (1U << 0), /* Input flags */ - BLK_ZONE_REP_CACHED = (1U << 31), + BLK_ZONE_REP_CACHED = (1U << 31), /* added in Linux 6.19 */ +#define BLK_ZONE_REP_CACHED BLK_ZONE_REP_CACHED }; /** -- cgit v1.2.3 From bdcdf968be314b6fc8835b99fb4519e7619671e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 21 Jan 2026 10:10:47 +0100 Subject: drm, drm/xe: Fix xe userptr in the absence of CONFIG_DEVICE_PRIVATE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_DEVICE_PRIVATE is not selected by default by some distros, for example Fedora, and that leads to a regression in the xe driver since userptr support gets compiled out. It turns out that DRM_GPUSVM, which is needed for xe userptr support compiles also without CONFIG_DEVICE_PRIVATE, but doesn't compile without CONFIG_ZONE_DEVICE. Exclude the drm_pagemap files from compilation with !CONFIG_ZONE_DEVICE, and remove the CONFIG_DEVICE_PRIVATE dependency from CONFIG_DRM_GPUSVM and the xe driver's selection of it, re-enabling xe userptr for those configs. v2: - Don't compile the drm_pagemap files unless CONFIG_ZONE_DEVICE is set. - Adjust the drm_pagemap.h header accordingly. Fixes: 9e9787414882 ("drm/xe/userptr: replace xe_hmm with gpusvm") Cc: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Thomas Hellström Cc: Matthew Brost Cc: "Thomas Hellström" Cc: Rodrigo Vivi Cc: dri-devel@lists.freedesktop.org Cc: # v6.18+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Maarten Lankhorst Link: https://patch.msgid.link/20260121091048.41371-2-thomas.hellstrom@linux.intel.com (cherry picked from commit 1e372b246199ca7a35f930177fea91b557dac16e) Signed-off-by: Thomas Hellström --- include/drm/drm_pagemap.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h index 70a7991f784f..eb29e5309f0a 100644 --- a/include/drm/drm_pagemap.h +++ b/include/drm/drm_pagemap.h @@ -209,6 +209,19 @@ struct drm_pagemap_devmem_ops { struct dma_fence *pre_migrate_fence); }; +#if IS_ENABLED(CONFIG_ZONE_DEVICE) + +struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page); + +#else + +static inline struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page) +{ + return NULL; +} + +#endif /* IS_ENABLED(CONFIG_ZONE_DEVICE) */ + /** * struct drm_pagemap_devmem - Structure representing a GPU SVM device memory allocation * @@ -233,6 +246,8 @@ struct drm_pagemap_devmem { struct dma_fence *pre_migrate_fence; }; +#if IS_ENABLED(CONFIG_ZONE_DEVICE) + int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, struct mm_struct *mm, unsigned long start, unsigned long end, @@ -243,8 +258,6 @@ int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation); const struct dev_pagemap_ops *drm_pagemap_pagemap_ops_get(void); -struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page); - void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, struct device *dev, struct mm_struct *mm, const struct drm_pagemap_devmem_ops *ops, @@ -256,4 +269,6 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, struct mm_struct *mm, unsigned long timeslice_ms); +#endif /* IS_ENABLED(CONFIG_ZONE_DEVICE) */ + #endif -- cgit v1.2.3 From d7e1f9e84af460c5f1e5352eda8f036000cfcf0a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 22 Jan 2026 20:44:57 +0800 Subject: ASoC: codec: Remove ak4641 Since commit d6df7df7ae5a0 ("ARM: pxa: remove unused board files"), there has been no in-tree user of the AK4641 codec driver. The last user (HP iPAQ hx4700) was a non-DT PXA board file that instantiated the device via I2C board data; that code was removed as part of the PXA board-file purge. The AK4641 driver was introduced ~2011 and still probes only via the I2C device-ID table ('.id_table'), without an 'of_match_table', so there are no upstream Devicetree users to retain. With no in-tree users left, remove the driver. Signed-off-by: Peng Fan Reviewed-by: Bartosz Golaszewski Acked-by: Andy Shevchenko Link: https://patch.msgid.link/20260122-sound-cleanup-v1-1-0a91901609b8@nxp.com Signed-off-by: Mark Brown --- include/sound/ak4641.h | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 include/sound/ak4641.h (limited to 'include') diff --git a/include/sound/ak4641.h b/include/sound/ak4641.h deleted file mode 100644 index 8b1941bbde52..000000000000 --- a/include/sound/ak4641.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AK4641 ALSA SoC Codec driver - * - * Copyright 2009 Philipp Zabel - */ - -#ifndef __AK4641_H -#define __AK4641_H - -/** - * struct ak4641_platform_data - platform specific AK4641 configuration - * @gpio_power: GPIO to control external power to AK4641 - * @gpio_npdn: GPIO connected to AK4641 nPDN pin - * - * Both GPIO parameters are optional. - */ -struct ak4641_platform_data { - int gpio_power; - int gpio_npdn; -}; - -#endif /* __AK4641_H */ -- cgit v1.2.3