From 20b97acc4cafa2be8ac91a777de135110e58a90b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Mar 2025 15:51:50 -0700 Subject: scsi: ufs: core: Fix a race condition related to device commands There is a TOCTOU race in ufshcd_compl_one_cqe(): hba->dev_cmd.complete may be cleared from another thread after it has been checked and before it is used. Fix this race by moving the device command completion from the stack of the device command submitter into struct ufs_hba. This patch fixes the following kernel crash: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: _raw_spin_lock_irqsave+0x34/0x80 complete+0x24/0xb8 ufshcd_compl_one_cqe+0x13c/0x4f0 ufshcd_mcq_poll_cqe_lock+0xb4/0x108 ufshcd_intr+0x2f4/0x444 __handle_irq_event_percpu+0xbc/0x250 handle_irq_event+0x48/0xb0 Fixes: 5a0b0cb9bee7 ("[SCSI] ufs: Add support for sending NOP OUT UPIU") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250314225206.1487838-1-bvanassche@acm.org Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index e3909cc691b2..f56050ce9445 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -246,7 +246,7 @@ struct ufs_query { struct ufs_dev_cmd { enum dev_cmd_type type; struct mutex lock; - struct completion *complete; + struct completion complete; struct ufs_query query; }; -- cgit v1.2.3 From c25951eb7518844fcb7fc9ec58e888731e8c46d0 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 15 Nov 2024 15:20:55 +0000 Subject: bus: fsl-mc: Remove deadcode fsl_mc_allocator_driver_exit() was added explicitly by commit 1e8ac83b6caf ("bus: fsl-mc: add fsl_mc_allocator cleanup function") but was never used. Remove it. fsl_mc_portal_reset() was added in 2015 by commit 197f4d6a4a00 ("staging: fsl-mc: fsl-mc object allocator driver") but was never used. Remove it. fsl_mc_portal_reset() was the only caller of dpmcp_reset(). Remove it. Signed-off-by: Dr. David Alan Gilbert Acked-by: Ioana Ciornei Acked-by: Christophe Leroy Link: https://lore.kernel.org/r/20241115152055.279732-1-linux@treblig.org Signed-off-by: Christophe Leroy --- include/linux/fsl/mc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 99f30c7d6208..897d6211c163 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -417,8 +417,6 @@ int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev, void fsl_mc_portal_free(struct fsl_mc_io *mc_io); -int fsl_mc_portal_reset(struct fsl_mc_io *mc_io); - int __must_check fsl_mc_object_allocate(struct fsl_mc_device *mc_dev, enum fsl_mc_pool_type pool_type, struct fsl_mc_device **new_mc_adev); -- cgit v1.2.3 From d073a571e68f42414f8f06f01b59f52224538a83 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Oct 2024 10:13:56 -0700 Subject: asm-generic: Always define Elf_Rel and Elf_Rela These definitions are useful for relocating the kernel image as well, regardless of the type of relocations used for modules. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20241026171441.3047904-5-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- include/asm-generic/module.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h index 98e1541b72b7..a8622501b975 100644 --- a/include/asm-generic/module.h +++ b/include/asm-generic/module.h @@ -19,12 +19,8 @@ struct mod_arch_specific #define Elf_Dyn Elf64_Dyn #define Elf_Ehdr Elf64_Ehdr #define Elf_Addr Elf64_Addr -#ifdef CONFIG_MODULES_USE_ELF_REL #define Elf_Rel Elf64_Rel -#endif -#ifdef CONFIG_MODULES_USE_ELF_RELA #define Elf_Rela Elf64_Rela -#endif #define ELF_R_TYPE(X) ELF64_R_TYPE(X) #define ELF_R_SYM(X) ELF64_R_SYM(X) @@ -36,12 +32,8 @@ struct mod_arch_specific #define Elf_Dyn Elf32_Dyn #define Elf_Ehdr Elf32_Ehdr #define Elf_Addr Elf32_Addr -#ifdef CONFIG_MODULES_USE_ELF_REL #define Elf_Rel Elf32_Rel -#endif -#ifdef CONFIG_MODULES_USE_ELF_RELA #define Elf_Rela Elf32_Rela -#endif #define ELF_R_TYPE(X) ELF32_R_TYPE(X) #define ELF_R_SYM(X) ELF32_R_SYM(X) #endif -- cgit v1.2.3 From a22b3d54de94f82ca057cc2ebf9496fa91ebf698 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:39 -0400 Subject: cgroup/cpuset: Fix race between newly created partition and dying one There is a possible race between removing a cgroup diectory that is a partition root and the creation of a new partition. The partition to be removed can be dying but still online, it doesn't not currently participate in checking for exclusive CPUs conflict, but the exclusive CPUs are still there in subpartitions_cpus and isolated_cpus. These two cpumasks are global states that affect the operation of cpuset partitions. The exclusive CPUs in dying cpusets will only be removed when cpuset_css_offline() function is called after an RCU delay. As a result, it is possible that a new partition can be created with exclusive CPUs that overlap with those of a dying one. When that dying partition is finally offlined, it removes those overlapping exclusive CPUs from subpartitions_cpus and maybe isolated_cpus resulting in an incorrect CPU configuration. This bug was found when a warning was triggered in remote_partition_disable() during testing because the subpartitions_cpus mask was empty. One possible way to fix this is to iterate the dying cpusets as well and avoid using the exclusive CPUs in those dying cpusets. However, this can still cause random partition creation failures or other anomalies due to racing. A better way to fix this race is to reset the partition state at the moment when a cpuset is being killed. Introduce a new css_killed() CSS function pointer and call it, if defined, before setting CSS_DYING flag in kill_css(). Also update the css_is_dying() helper to use the CSS_DYING flag introduced by commit 33c35aa48178 ("cgroup: Prevent kill_css() from being called more than once") for proper synchronization. Add a new cpuset_css_killed() function to reset the partition state of a valid partition root if it is being killed. Fixes: ee8dde0cd2ce ("cpuset: Add new v2 cpuset.sched.partition flag") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 1 + include/linux/cgroup.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 485b651869d9..5bc8f55c8cca 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -710,6 +710,7 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); + void (*css_killed)(struct cgroup_subsys_state *css); void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); int (*css_extra_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 28e999f2c642..e7da3c3b098b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -344,7 +344,7 @@ static inline u64 cgroup_id(const struct cgroup *cgrp) */ static inline bool css_is_dying(struct cgroup_subsys_state *css) { - return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); + return css->flags & CSS_DYING; } static inline void cgroup_get(struct cgroup *cgrp) -- cgit v1.2.3 From 1fd2e77b889761d9bde0c580518689d1d8e83117 Mon Sep 17 00:00:00 2001 From: "Bao D. Nguyen" Date: Fri, 28 Mar 2025 14:46:13 -0700 Subject: scsi: ufs: core: Add device level exception support The ufs device JEDEC specification version 4.1 adds support for the device level exception events. To support this new device level exception feature, expose two new sysfs nodes below to provide the user space access to the device level exception information. /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_count /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_id The device_lvl_exception_count sysfs node reports the number of device level exceptions that have occurred since the last time this variable is reset. Writing a value of 0 will reset it. The device_lvl_exception_id reports the exception ID which is the qDeviceLevelExceptionID attribute of the device JEDEC specifications version 4.1 and later. The user space application can query these sysfs nodes to get more information about the device level exception. Signed-off-by: Bao D. Nguyen Link: https://lore.kernel.org/r/6278d7c125b2f0cf5056f4a647a4b9c1fdd24fc7.1743198325.git.quic_nguyenb@quicinc.com Reviewed-by: Peter Wang Reviewed-by: Bart Van Assche Reviewed-by: Arthur Simchaev Signed-off-by: Martin K. Petersen --- include/ufs/ufs.h | 5 ++++- include/ufs/ufshcd.h | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index 8a24ed59ec46..1c47136d8715 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -180,7 +180,8 @@ enum attr_idn { QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE = 0x1D, QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST = 0x1E, QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE = 0x1F, - QUERY_ATTR_IDN_TIMESTAMP = 0x30 + QUERY_ATTR_IDN_TIMESTAMP = 0x30, + QUERY_ATTR_IDN_DEV_LVL_EXCEPTION_ID = 0x34, }; /* Descriptor idn for Query requests */ @@ -390,6 +391,7 @@ enum { UFS_DEV_EXT_TEMP_NOTIF = BIT(6), UFS_DEV_HPB_SUPPORT = BIT(7), UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), + UFS_DEV_LVL_EXCEPTION_SUP = BIT(12), }; #define UFS_DEV_HPB_SUPPORT_VERSION 0x310 @@ -419,6 +421,7 @@ enum { MASK_EE_TOO_LOW_TEMP = BIT(4), MASK_EE_WRITEBOOSTER_EVENT = BIT(5), MASK_EE_PERFORMANCE_THROTTLING = BIT(6), + MASK_EE_DEV_LVL_EXCEPTION = BIT(7), MASK_EE_HEALTH_CRITICAL = BIT(9), }; #define MASK_EE_URGENT_TEMP (MASK_EE_TOO_HIGH_TEMP | MASK_EE_TOO_LOW_TEMP) diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index f56050ce9445..e928ed0265ff 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -968,6 +968,9 @@ enum ufshcd_mcq_opr { * @pm_qos_req: PM QoS request handle * @pm_qos_enabled: flag to check if pm qos is enabled * @critical_health_count: count of critical health exceptions + * @dev_lvl_exception_count: count of device level exceptions since last reset + * @dev_lvl_exception_id: vendor specific information about the + * device level exception event. */ struct ufs_hba { void __iomem *mmio_base; @@ -1138,6 +1141,8 @@ struct ufs_hba { bool pm_qos_enabled; int critical_health_count; + atomic_t dev_lvl_exception_count; + u64 dev_lvl_exception_id; }; /** -- cgit v1.2.3 From 4c975fd700022c90e61a46326e3444e08317876e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 1 Apr 2025 09:34:43 -0700 Subject: net: hold instance lock during NETDEV_REGISTER/UP Callers of inetdev_init can come from several places with inconsistent expectation about netdev instance lock. Grab instance lock during REGISTER (plus UP). Also solve the inconsistency with UNREGISTER where it was locked only during move netns path. WARNING: CPU: 10 PID: 1479 at ./include/net/netdev_lock.h:54 __netdev_update_features+0x65f/0xca0 __warn+0x81/0x180 __netdev_update_features+0x65f/0xca0 report_bug+0x156/0x180 handle_bug+0x4f/0x90 exc_invalid_op+0x13/0x60 asm_exc_invalid_op+0x16/0x20 __netdev_update_features+0x65f/0xca0 netif_disable_lro+0x30/0x1d0 inetdev_init+0x12f/0x1f0 inetdev_event+0x48b/0x870 notifier_call_chain+0x38/0xf0 register_netdevice+0x741/0x8b0 register_netdev+0x1f/0x40 mlx5e_probe+0x4e3/0x8e0 [mlx5_core] auxiliary_bus_probe+0x3f/0x90 really_probe+0xc3/0x3a0 __driver_probe_device+0x80/0x150 driver_probe_device+0x1f/0x90 __device_attach_driver+0x7d/0x100 bus_for_each_drv+0x80/0xd0 __device_attach+0xb4/0x1c0 bus_probe_device+0x91/0xa0 device_add+0x657/0x870 Reviewed-by: Jakub Kicinski Reported-by: Cosmin Ratiu Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250401163452.622454-3-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fa79145518d1..cf3b6445817b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4192,7 +4192,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, int netif_set_alias(struct net_device *dev, const char *alias, size_t len); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); -int netif_change_net_namespace(struct net_device *dev, struct net *net, +int __dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat, int new_ifindex, struct netlink_ext_ack *extack); int dev_change_net_namespace(struct net_device *dev, struct net *net, -- cgit v1.2.3 From 8965c160b8f7333df895321c8aa6bad4a7175f2b Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 1 Apr 2025 09:34:44 -0700 Subject: net: use netif_disable_lro in ipv6_add_dev ipv6_add_dev might call dev_disable_lro which unconditionally grabs instance lock, so it will deadlock during NETDEV_REGISTER. Switch to netif_disable_lro. Make sure all callers hold the instance lock as well. Cc: Cosmin Ratiu Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250401163452.622454-4-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/net/ip.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 8a48ade24620..47ed6d23853d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -667,14 +667,6 @@ static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast, memcpy(buf, &naddr, sizeof(naddr)); } -#if IS_MODULE(CONFIG_IPV6) -#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X) -#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X) -#else -#define EXPORT_IPV6_MOD(X) -#define EXPORT_IPV6_MOD_GPL(X) -#endif - #if IS_ENABLED(CONFIG_IPV6) #include #endif @@ -694,6 +686,14 @@ static __inline__ void inet_reset_saddr(struct sock *sk) #endif +#if IS_MODULE(CONFIG_IPV6) +#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X) +#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X) +#else +#define EXPORT_IPV6_MOD(X) +#define EXPORT_IPV6_MOD_GPL(X) +#endif + static inline unsigned int ipv4_addr_hash(__be32 ip) { return (__force unsigned int) ip; -- cgit v1.2.3 From 1901066aab7654f4a225ac29354a564d891d0c1a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 1 Apr 2025 09:34:46 -0700 Subject: netdevsim: add dummy device notifiers In order to exercise and verify notifiers' locking assumptions, register dummy notifiers (via register_netdevice_notifier_dev_net). Share notifier event handler that enforces the assumptions with lock_debug.c (rename and export rtnl_net_debug_event as netdev_debug_event). Add ops lock asserts to netdev_debug_event. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250401163452.622454-6-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/net/netdev_lock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index 1c0c9a94cc22..c316b551df8d 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -98,4 +98,7 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, &qdisc_xmit_lock_key); \ } +int netdev_debug_event(struct notifier_block *nb, unsigned long event, + void *ptr); + #endif -- cgit v1.2.3 From 459a35111b0a890172a78d51c01b204e13a34a18 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 14 Mar 2025 19:46:23 -0700 Subject: KVM: Allow building irqbypass.ko as as module when kvm.ko is a module Convert HAVE_KVM_IRQ_BYPASS into a tristate so that selecting IRQ_BYPASS_MANAGER follows KVM={m,y}, i.e. doesn't force irqbypass.ko to be built-in. Note, PPC allows building KVM as a module, but selects HAVE_KVM_IRQ_BYPASS from a boolean Kconfig, i.e. KVM PPC unnecessarily forces irqbpass.ko to be built-in. But that flaw is a longstanding PPC specific issue. Fixes: 61df71ee992d ("kvm: move "select IRQ_BYPASS_MANAGER" to common code") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20250315024623.2363994-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5438a1b446a6..291d49b9bf05 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2382,7 +2382,7 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot) struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); -- cgit v1.2.3 From ec304b70d46bd2ed66541c5b57b63276529e05b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:34:04 -0700 Subject: net: move mp dev config validation to __net_mp_open_rxq() devmem code performs a number of safety checks to avoid having to reimplement all of them in the drivers. Move those to __net_mp_open_rxq() and reuse that function for binding to make sure that io_uring ZC also benefits from them. While at it rename the queue ID variable to rxq_idx in __net_mp_open_rxq(), we touch most of the relevant lines. The XArray insertion is reordered after the netdev_rx_queue_restart() call, otherwise we'd need to duplicate the queue index check or risk inserting an invalid pointer. The XArray allocation failures should be extremely rare. Reviewed-by: Mina Almasry Acked-by: Stanislav Fomichev Fixes: 6e18ed929d3b ("net: add helpers for setting a memory provider on an rx queue") Link: https://patch.msgid.link/20250403013405.2827250-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/memory_provider.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h index b3e665897767..ada4f968960a 100644 --- a/include/net/page_pool/memory_provider.h +++ b/include/net/page_pool/memory_provider.h @@ -6,6 +6,7 @@ #include struct netdev_rx_queue; +struct netlink_ext_ack; struct sk_buff; struct memory_provider_ops { @@ -24,8 +25,13 @@ void net_mp_niov_clear_page_pool(struct net_iov *niov); int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, struct pp_memory_provider_params *p); +int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack); void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, struct pp_memory_provider_params *old_p); +void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *old_p); /** * net_mp_netmem_place_in_cache() - give a netmem to a page pool -- cgit v1.2.3 From 825dfab23bca520629a9e5a21ba5b03aaccc75f2 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:28:55 +0100 Subject: irqdomain: Rename irq_set_default_host() to irq_set_default_domain() Naming interrupt domains host is confusing at best and the irqdomain code uses both domain and host inconsistently. Therefore rename irq_set_default_host() to irq_set_default_domain(). Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-3-jirislaby@kernel.org --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 33ff41eef8f7..4b5c495b5710 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -352,7 +352,7 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, void *host_data); struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); -void irq_set_default_host(struct irq_domain *host); +void irq_set_default_domain(struct irq_domain *domain); struct irq_domain *irq_get_default_host(void); int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node, -- cgit v1.2.3 From 0a27ea384c82e70d16e40adbaebeb3725f7e6342 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:28:56 +0100 Subject: irqdomain: Rename irq_get_default_host() to irq_get_default_domain() Naming interrupt domains host is confusing at best and the irqdomain code uses both domain and host inconsistently. Therefore rename irq_get_default_host() to irq_get_default_domain(). Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-4-jirislaby@kernel.org --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 4b5c495b5710..e9ab95fbc5a9 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -353,7 +353,7 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); void irq_set_default_domain(struct irq_domain *domain); -struct irq_domain *irq_get_default_host(void); +struct irq_domain *irq_get_default_domain(void); int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node, const struct irq_affinity_desc *affinity); -- cgit v1.2.3 From d2705d33885e3a19f727dff2521fb7d5b1fc5cda Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 19 Mar 2025 10:28:57 +0100 Subject: irqdomain: Stop using 'host' for domain It is confusing to see 'host' and 'domain' to be used as 'domain'. Given this header is all about domains, switch the remaining 'host' uses to 'domain'. Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250319092951.37667-5-jirislaby@kernel.org --- include/linux/irqdomain.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e9ab95fbc5a9..bb7111105296 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -72,7 +72,7 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, /** * struct irq_domain_ops - Methods for irq_domain objects - * @match: Match an interrupt controller device node to a host, returns + * @match: Match an interrupt controller device node to a domain, returns * 1 on a match * @select: Match an interrupt controller fw specification. It is more generic * than @match as it receives a complete struct irq_fwspec. Therefore, @@ -454,7 +454,7 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod return IS_ERR(d) ? NULL : d; } -unsigned int irq_create_direct_mapping(struct irq_domain *host); +unsigned int irq_create_direct_mapping(struct irq_domain *domain); #endif static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, @@ -507,7 +507,7 @@ static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fw return IS_ERR(d) ? NULL : d; } -void irq_domain_remove(struct irq_domain *host); +void irq_domain_remove(struct irq_domain *domain); int irq_domain_associate(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq); @@ -515,16 +515,16 @@ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count); -unsigned int irq_create_mapping_affinity(struct irq_domain *host, +unsigned int irq_create_mapping_affinity(struct irq_domain *domain, irq_hw_number_t hwirq, const struct irq_affinity_desc *affinity); unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); void irq_dispose_mapping(unsigned int virq); -static inline unsigned int irq_create_mapping(struct irq_domain *host, +static inline unsigned int irq_create_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { - return irq_create_mapping_affinity(host, hwirq, NULL); + return irq_create_mapping_affinity(domain, hwirq, NULL); } struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, -- cgit v1.2.3 From 8fa7292fee5c5240402371ea89ab285ec856c916 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 5 Apr 2025 10:17:26 +0200 Subject: treewide: Switch/rename to timer_delete[_sync]() timer_delete[_sync]() replaces del_timer[_sync](). Convert the whole tree over and remove the historical wrapper inlines. Conversion was done with coccinelle plus manual fixups where necessary. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/timer.h | 36 +----------------------------------- include/net/sctp/sctp.h | 2 +- 2 files changed, 2 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index e67ecd1cbc97..10596d7c3a34 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -30,7 +30,7 @@ * * @TIMER_IRQSAFE: An irqsafe timer is executed with IRQ disabled and * it's safe to wait for the completion of the running instance from - * IRQ handlers, for example, by calling del_timer_sync(). + * IRQ handlers, for example, by calling timer_delete_sync(). * * Note: The irq disabled callback execution is a special case for * workqueue locking issues. It's not meant for executing random crap @@ -168,40 +168,6 @@ extern int timer_delete(struct timer_list *timer); extern int timer_shutdown_sync(struct timer_list *timer); extern int timer_shutdown(struct timer_list *timer); -/** - * del_timer_sync - Delete a pending timer and wait for a running callback - * @timer: The timer to be deleted - * - * See timer_delete_sync() for detailed explanation. - * - * Do not use in new code. Use timer_delete_sync() instead. - * - * Returns: - * * %0 - The timer was not pending - * * %1 - The timer was pending and deactivated - */ -static inline int del_timer_sync(struct timer_list *timer) -{ - return timer_delete_sync(timer); -} - -/** - * del_timer - Delete a pending timer - * @timer: The timer to be deleted - * - * See timer_delete() for detailed explanation. - * - * Do not use in new code. Use timer_delete() instead. - * - * Returns: - * * %0 - The timer was not pending - * * %1 - The timer was pending and deactivated - */ -static inline int del_timer(struct timer_list *timer) -{ - return timer_delete(timer); -} - extern void init_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 84e6b9fd5610..d8da764cf6de 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -636,7 +636,7 @@ static inline void sctp_transport_pl_reset(struct sctp_transport *t) } } else { if (t->pl.state != SCTP_PL_DISABLED) { - if (del_timer(&t->probe_timer)) + if (timer_delete(&t->probe_timer)) sctp_transport_put(t); t->pl.state = SCTP_PL_DISABLED; } -- cgit v1.2.3 From 9779489a31d77a7b9cb6f20d2d2caced4e29dbe6 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:10 +0100 Subject: hrtimers: Delete hrtimer_init() hrtimer_init() is now unused. Delete it. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/003722f60c7a2a4f8d4ed24fb741aa313b7e5136.1738746927.git.namcao@linutronix.de --- include/linux/hrtimer.h | 2 -- include/linux/hrtimer_types.h | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 88e078871158..1adcba3ddd76 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -231,8 +231,6 @@ static inline enum hrtimer_restart hrtimer_dummy_timeout(struct hrtimer *unused) /* Exported timer functions: */ /* Initialize timers: */ -extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, - enum hrtimer_mode mode); extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode); extern void hrtimer_setup_on_stack(struct hrtimer *timer, diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h index ad66a3081735..7c5b27daa89d 100644 --- a/include/linux/hrtimer_types.h +++ b/include/linux/hrtimer_types.h @@ -34,7 +34,7 @@ enum hrtimer_restart { * @is_hard: Set if hrtimer will be expired in hard interrupt context * even on RT. * - * The hrtimer structure must be initialized by hrtimer_init() + * The hrtimer structure must be initialized by hrtimer_setup() */ struct hrtimer { struct timerqueue_node node; -- cgit v1.2.3 From 04257da0c99c9d4ff7c5bb93046482e1f7d34938 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:16 +0100 Subject: hrtimers: Make callback function pointer private Make the struct hrtimer::function field private, to prevent users from changing this field in an unsafe way. hrtimer_update_function() should be used if the callback function needs to be changed. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/7d0e6e0c5c59a64a9bea940051aac05d750bc0c2.1738746927.git.namcao@linutronix.de --- include/linux/hrtimer_types.h | 2 +- include/trace/events/timer.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h index 7c5b27daa89d..8fbbb6bdf7a1 100644 --- a/include/linux/hrtimer_types.h +++ b/include/linux/hrtimer_types.h @@ -39,7 +39,7 @@ enum hrtimer_restart { struct hrtimer { struct timerqueue_node node; ktime_t _softexpires; - enum hrtimer_restart (*function)(struct hrtimer *); + enum hrtimer_restart (*__private function)(struct hrtimer *); struct hrtimer_clock_base *base; u8 state; u8 is_rel; diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 1ef58a04fc57..f8c906be4cd0 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -235,7 +235,7 @@ TRACE_EVENT(hrtimer_start, TP_fast_assign( __entry->hrtimer = hrtimer; - __entry->function = hrtimer->function; + __entry->function = ACCESS_PRIVATE(hrtimer, function); __entry->expires = hrtimer_get_expires(hrtimer); __entry->softexpires = hrtimer_get_softexpires(hrtimer); __entry->mode = mode; @@ -271,7 +271,7 @@ TRACE_EVENT(hrtimer_expire_entry, TP_fast_assign( __entry->hrtimer = hrtimer; __entry->now = *now; - __entry->function = hrtimer->function; + __entry->function = ACCESS_PRIVATE(hrtimer, function); ), TP_printk("hrtimer=%p function=%ps now=%llu", -- cgit v1.2.3 From 244132c4e5777fe0a4544ef23afba0d9a50e5ec5 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 5 Feb 2025 11:55:21 +0100 Subject: tracing/timers: Rename the hrtimer_init event to hrtimer_setup The function hrtimer_init() doesn't exist anymore. It was replaced by hrtimer_setup(). Thus, rename the hrtimer_init trace event to hrtimer_setup to keep it consistent. Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/cba84c3d853c5258aa3a262363a6eac08e2c7afc.1738746927.git.namcao@linutronix.de --- include/trace/events/timer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index f8c906be4cd0..1641ae3e6ca0 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -185,12 +185,12 @@ TRACE_EVENT(timer_base_idle, { HRTIMER_MODE_REL_PINNED_HARD, "REL|PINNED|HARD" }) /** - * hrtimer_init - called when the hrtimer is initialized + * hrtimer_setup - called when the hrtimer is initialized * @hrtimer: pointer to struct hrtimer * @clockid: the hrtimers clock * @mode: the hrtimers mode */ -TRACE_EVENT(hrtimer_init, +TRACE_EVENT(hrtimer_setup, TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, enum hrtimer_mode mode), -- cgit v1.2.3 From 1c1fd374a2fe72b8a6dde62d3c3a9fd153e7581c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 1 Apr 2025 15:36:37 +0200 Subject: mtd: spinand: Fix build with gcc < 7.5 __VA_OPT__ is a macro that is useful when some arguments can be present or not to entirely skip some part of a definition. Unfortunately, it is a too recent addition that some of the still supported old GCC versions do not know about, and is anyway not part of C11 that is the version used in the kernel. Find a trick to remove this macro, typically '__VA_ARGS__ + 0' is a workaround used in netlink.h which works very well here, as we either expect: - 0 - A positive value - No value, which means the field should be 0. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503181330.YcDXGy7F-lkp@intel.com/ Fixes: 7ce0d16d5802 ("mtd: spinand: Add an optional frequency to read from cache macros") Cc: stable@vger.kernel.org Tested-by: Jean Delvare Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 1e748958dad4..311f145eb4e8 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -67,7 +67,7 @@ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1), \ - __VA_OPT__(SPI_MEM_OP_MAX_FREQ(__VA_ARGS__))) + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_FAST_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ -- cgit v1.2.3 From 75f8c87555e6ddeff2c49bd47460a71a940edc48 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 6 Mar 2025 09:45:52 +0100 Subject: irqchip/davinci: Remove leftover header Commit fa8dede4d0a0 ("irqchip: remove davinci aintc driver") removed the davinci aintc driver but left behind the associated header. Remove it now. Fixes: fa8dede4d0a0 ("irqchip: remove davinci aintc driver") Signed-off-by: Bartosz Golaszewski Signed-off-by: Thomas Gleixner Acked-by: Arnd Bergmann Link: https://lore.kernel.org/all/20250306084552.15894-1-brgl@bgdev.pl --- include/linux/irqchip/irq-davinci-aintc.h | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 include/linux/irqchip/irq-davinci-aintc.h (limited to 'include') diff --git a/include/linux/irqchip/irq-davinci-aintc.h b/include/linux/irqchip/irq-davinci-aintc.h deleted file mode 100644 index ea4e087fac98..000000000000 --- a/include/linux/irqchip/irq-davinci-aintc.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2019 Texas Instruments - */ - -#ifndef _LINUX_IRQ_DAVINCI_AINTC_ -#define _LINUX_IRQ_DAVINCI_AINTC_ - -#include - -/** - * struct davinci_aintc_config - configuration data for davinci-aintc driver. - * - * @reg: register range to map - * @num_irqs: number of HW interrupts supported by the controller - * @prios: an array of size num_irqs containing priority settings for - * each interrupt - */ -struct davinci_aintc_config { - struct resource reg; - unsigned int num_irqs; - u8 *prios; -}; - -void davinci_aintc_init(const struct davinci_aintc_config *config); - -#endif /* _LINUX_IRQ_DAVINCI_AINTC_ */ -- cgit v1.2.3 From d8455a63f731b4f585acc4d49fd7ad78db63b3d0 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Thu, 13 Mar 2025 16:55:26 +0800 Subject: platform/x86: intel_pmc_ipc: add option to build without ACPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a configuration option that allows users to build the intel_pmc_ipc driver without ACPI support. This is useful for systems where ACPI is not available or desired. Based on the discussion from the patch [1], it was necessary to provide this option to accommodate specific use cases. Link: https://patchwork.kernel.org/project/netdevbpf/patch/20250227121522.1802832-6-yong.liang.choong@linux.intel.com/#26280764 [1] Signed-off-by: David E. Box Co-developed-by: Choong Yong Liang Signed-off-by: Choong Yong Liang Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250313085526.1439092-1-yong.liang.choong@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/intel_pmc_ipc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/intel_pmc_ipc.h b/include/linux/platform_data/x86/intel_pmc_ipc.h index 6e603a8c075f..1d34435b7001 100644 --- a/include/linux/platform_data/x86/intel_pmc_ipc.h +++ b/include/linux/platform_data/x86/intel_pmc_ipc.h @@ -36,6 +36,7 @@ struct pmc_ipc_rbuf { */ static inline int intel_pmc_ipc(struct pmc_ipc_cmd *ipc_cmd, struct pmc_ipc_rbuf *rbuf) { +#ifdef CONFIG_ACPI struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object params[PMC_IPCS_PARAM_COUNT] = { {.type = ACPI_TYPE_INTEGER,}, @@ -89,6 +90,9 @@ static inline int intel_pmc_ipc(struct pmc_ipc_cmd *ipc_cmd, struct pmc_ipc_rbuf } return 0; +#else + return -ENODEV; +#endif /* CONFIG_ACPI */ } #endif /* INTEL_PMC_IPC_H */ -- cgit v1.2.3 From 04efcee6ef8d0f01eef495db047e7216d6e6e38f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 4 Apr 2025 09:11:22 -0700 Subject: net: hold instance lock during NETDEV_CHANGE Cosmin reports an issue with ipv6_add_dev being called from NETDEV_CHANGE notifier: [ 3455.008776] ? ipv6_add_dev+0x370/0x620 [ 3455.010097] ipv6_find_idev+0x96/0xe0 [ 3455.010725] addrconf_add_dev+0x1e/0xa0 [ 3455.011382] addrconf_init_auto_addrs+0xb0/0x720 [ 3455.013537] addrconf_notify+0x35f/0x8d0 [ 3455.014214] notifier_call_chain+0x38/0xf0 [ 3455.014903] netdev_state_change+0x65/0x90 [ 3455.015586] linkwatch_do_dev+0x5a/0x70 [ 3455.016238] rtnl_getlink+0x241/0x3e0 [ 3455.019046] rtnetlink_rcv_msg+0x177/0x5e0 Similarly, linkwatch might get to ipv6_add_dev without ops lock: [ 3456.656261] ? ipv6_add_dev+0x370/0x620 [ 3456.660039] ipv6_find_idev+0x96/0xe0 [ 3456.660445] addrconf_add_dev+0x1e/0xa0 [ 3456.660861] addrconf_init_auto_addrs+0xb0/0x720 [ 3456.661803] addrconf_notify+0x35f/0x8d0 [ 3456.662236] notifier_call_chain+0x38/0xf0 [ 3456.662676] netdev_state_change+0x65/0x90 [ 3456.663112] linkwatch_do_dev+0x5a/0x70 [ 3456.663529] __linkwatch_run_queue+0xeb/0x200 [ 3456.663990] linkwatch_event+0x21/0x30 [ 3456.664399] process_one_work+0x211/0x610 [ 3456.664828] worker_thread+0x1cc/0x380 [ 3456.665691] kthread+0xf4/0x210 Reclassify NETDEV_CHANGE as a notifier that consistently runs under the instance lock. Link: https://lore.kernel.org/netdev/aac073de8beec3e531c86c101b274d434741c28e.camel@nvidia.com/ Reported-by: Cosmin Ratiu Tested-by: Cosmin Ratiu Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250404161122.3907628-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 ++ include/linux/rtnetlink.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cf3b6445817b..2d11d013cabe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4429,6 +4429,7 @@ void linkwatch_fire_event(struct net_device *dev); * pending work list (if queued). */ void linkwatch_sync_dev(struct net_device *dev); +void __linkwatch_sync_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -4974,6 +4975,7 @@ void dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); +void netif_state_change(struct net_device *dev); void netdev_state_change(struct net_device *dev); void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ccaaf4c7d5f6..ea39dd23a197 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -240,6 +240,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } -void netdev_set_operstate(struct net_device *dev, int newstate); +void netif_set_operstate(struct net_device *dev, int newstate); #endif /* __LINUX_RTNETLINK_H */ -- cgit v1.2.3 From d247667ecd6411ec5bec9a38db7feaa599ce3ee2 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 2 Apr 2025 10:09:44 +0300 Subject: RDMA/mlx5: Fix compilation warning when USER_ACCESS isn't set The cited commit made fs.c always compile, even when INFINIBAND_USER_ACCESS isn't set. This results in a compilation warning about an unused object when compiling with W=1 and USER_ACCESS is unset. Fix this by defining uverbs_destroy_def_handler() even when USER_ACCESS isn't set. Fixes: 36e0d433672f ("RDMA/mlx5: Compile fs.c regardless of INFINIBAND_USER_ACCESS config") Link: https://patch.msgid.link/r/20250402070944.1022093-1-mbloch@nvidia.com Signed-off-by: Mark Bloch Tested-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d42eae69d9a8..901353796fbb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4790,7 +4790,14 @@ void roce_del_all_netdev_gids(struct ib_device *ib_dev, struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); +#else +static inline int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) +{ + return 0; +} +#endif struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, -- cgit v1.2.3 From 5529df92b8e8cbb4b14a226665888f74648260ad Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 25 Mar 2025 15:47:10 -0700 Subject: drm/xe/bmg: Add one additional PCI ID One additional BMG PCI ID has been added to the spec; make sure our driver recognizes devices with this ID properly. Bspec: 68090 Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Clint Taylor Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250325224709.4073080-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper (cherry picked from commit cca9734ebe55f6af11ce8d57ca1afdc4d158c808) Signed-off-by: Lucas De Marchi --- include/drm/intel/pciids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 4736ea525048..d212848d07f3 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -850,6 +850,7 @@ MACRO__(0xE20C, ## __VA_ARGS__), \ MACRO__(0xE20D, ## __VA_ARGS__), \ MACRO__(0xE210, ## __VA_ARGS__), \ + MACRO__(0xE211, ## __VA_ARGS__), \ MACRO__(0xE212, ## __VA_ARGS__), \ MACRO__(0xE215, ## __VA_ARGS__), \ MACRO__(0xE216, ## __VA_ARGS__) -- cgit v1.2.3 From 6deb8435f6bfcc9b6c7efe3b8a941ae2fb731495 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 1 Apr 2025 14:46:42 +0200 Subject: gpio: deprecate the GPIOD_FLAGS_BIT_NONEXCLUSIVE flag The non-exclusive GPIO request flag looks like a functional feature but is in fact a workaround for a corner-case that got out of hand. It should be removed so deprecate it officially so that nobody uses it anymore. Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20250401-gpio-todo-remove-nonexclusive-v2-1-7c1380797b0d@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/consumer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 45b651c05b9c..8adc8e9cb4a7 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -31,6 +31,7 @@ struct gpio_descs { #define GPIOD_FLAGS_BIT_DIR_OUT BIT(1) #define GPIOD_FLAGS_BIT_DIR_VAL BIT(2) #define GPIOD_FLAGS_BIT_OPEN_DRAIN BIT(3) +/* GPIOD_FLAGS_BIT_NONEXCLUSIVE is DEPRECATED, don't use in new code. */ #define GPIOD_FLAGS_BIT_NONEXCLUSIVE BIT(4) /** -- cgit v1.2.3 From f1a69a940de58b16e8249dff26f74c8cc59b32be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= Date: Fri, 4 Apr 2025 16:53:21 +0200 Subject: sctp: detect and prevent references to a freed transport in sendmsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sctp_sendmsg() re-uses associations and transports when possible by doing a lookup based on the socket endpoint and the message destination address, and then sctp_sendmsg_to_asoc() sets the selected transport in all the message chunks to be sent. There's a possible race condition if another thread triggers the removal of that selected transport, for instance, by explicitly unbinding an address with setsockopt(SCTP_SOCKOPT_BINDX_REM), after the chunks have been set up and before the message is sent. This can happen if the send buffer is full, during the period when the sender thread temporarily releases the socket lock in sctp_wait_for_sndbuf(). This causes the access to the transport data in sctp_outq_select_transport(), when the association outqueue is flushed, to result in a use-after-free read. This change avoids this scenario by having sctp_transport_free() signal the freeing of the transport, tagging it as "dead". In order to do this, the patch restores the "dead" bit in struct sctp_transport, which was removed in commit 47faa1e4c50e ("sctp: remove the dead field of sctp_transport"). Then, in the scenario where the sender thread has released the socket lock in sctp_wait_for_sndbuf(), the bit is checked again after re-acquiring the socket lock to detect the deletion. This is done while holding a reference to the transport to prevent it from being freed in the process. If the transport was deleted while the socket lock was relinquished, sctp_sendmsg_to_asoc() will return -EAGAIN to let userspace retry the send. The bug was found by a private syzbot instance (see the error report [1] and the C reproducer that triggers it [2]). Link: https://people.igalia.com/rcn/kernel_logs/20250402__KASAN_slab-use-after-free_Read_in_sctp_outq_select_transport.txt [1] Link: https://people.igalia.com/rcn/kernel_logs/20250402__KASAN_slab-use-after-free_Read_in_sctp_outq_select_transport__repro.c [2] Cc: stable@vger.kernel.org Fixes: df132eff4638 ("sctp: clear the transport of some out_chunk_list chunks in sctp_assoc_rm_peer") Suggested-by: Xin Long Signed-off-by: Ricardo Cañuelo Navarro Acked-by: Xin Long Link: https://patch.msgid.link/20250404-kasan_slab-use-after-free_read_in_sctp_outq_select_transport__20250404-v1-1-5ce4a0b78ef2@igalia.com Signed-off-by: Paolo Abeni --- include/net/sctp/structs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 31248cfdfb23..dcd288fa1bb6 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -775,6 +775,7 @@ struct sctp_transport { /* Reference counting. */ refcount_t refcnt; + __u32 dead:1, /* RTO-Pending : A flag used to track if one of the DATA * chunks sent to this address is currently being * used to compute a RTT. If this flag is 0, @@ -784,7 +785,7 @@ struct sctp_transport { * calculation completes (i.e. the DATA chunk * is SACK'd) clear this flag. */ - __u32 rto_pending:1, + rto_pending:1, /* * hb_sent : a flag that signals that we have a pending -- cgit v1.2.3 From 13c1d5f3a7fa7b55a26e73bb9e95342374a489b2 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 8 Apr 2025 09:34:07 +0200 Subject: drm/tests: helpers: Create kunit helper to destroy a drm_display_mode A number of test suites call functions that expect the returned drm_display_mode to be destroyed eventually. However, none of the tests called drm_mode_destroy, which results in a memory leak. Since drm_mode_destroy takes two pointers as argument, we can't use a kunit wrapper. Let's just create a helper every test suite can use. Reviewed-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-1-996305a2e75a@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_kunit_helpers.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_kunit_helpers.h b/include/drm/drm_kunit_helpers.h index 11d59ce0bac0..1c62d1d4458c 100644 --- a/include/drm/drm_kunit_helpers.h +++ b/include/drm/drm_kunit_helpers.h @@ -118,6 +118,9 @@ drm_kunit_helper_create_crtc(struct kunit *test, const struct drm_crtc_funcs *funcs, const struct drm_crtc_helper_funcs *helper_funcs); +int drm_kunit_add_mode_destroy_action(struct kunit *test, + struct drm_display_mode *mode); + struct drm_display_mode * drm_kunit_display_mode_from_cea_vic(struct kunit *test, struct drm_device *dev, u8 video_code); -- cgit v1.2.3 From 56799bc035658738f362acec3e7647bb84e68933 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 4 Mar 2025 14:54:46 +0100 Subject: perf: Fix hang while freeing sigtrap event Perf can hang while freeing a sigtrap event if a related deferred signal hadn't managed to be sent before the file got closed: perf_event_overflow() task_work_add(perf_pending_task) fput() task_work_add(____fput()) task_work_run() ____fput() perf_release() perf_event_release_kernel() _free_event() perf_pending_task_sync() task_work_cancel() -> FAILED rcuwait_wait_event() Once task_work_run() is running, the list of pending callbacks is removed from the task_struct and from this point on task_work_cancel() can't remove any pending and not yet started work items, hence the task_work_cancel() failure and the hang on rcuwait_wait_event(). Task work could be changed to remove one work at a time, so a work running on the current task can always cancel a pending one, however the wait / wake design is still subject to inverted dependencies when remote targets are involved, as pictured by Oleg: T1 T2 fd = perf_event_open(pid => T2->pid); fd = perf_event_open(pid => T1->pid); close(fd) close(fd) perf_event_overflow() perf_event_overflow() task_work_add(perf_pending_task) task_work_add(perf_pending_task) fput() fput() task_work_add(____fput()) task_work_add(____fput()) task_work_run() task_work_run() ____fput() ____fput() perf_release() perf_release() perf_event_release_kernel() perf_event_release_kernel() _free_event() _free_event() perf_pending_task_sync() perf_pending_task_sync() rcuwait_wait_event() rcuwait_wait_event() Therefore the only option left is to acquire the event reference count upon queueing the perf task work and release it from the task work, just like it was done before 3a5465418f5f ("perf: Fix event leak upon exec and file release") but without the leaks it fixed. Some adjustments are necessary to make it work: * A child event might dereference its parent upon freeing. Care must be taken to release the parent last. * Some places assuming the event doesn't have any reference held and therefore can be freed right away must instead put the reference and let the reference counting to its job. Reported-by: "Yi Lai" Closes: https://lore.kernel.org/all/Zx9Losv4YcJowaP%2F@ly-workstation/ Reported-by: syzbot+3c4321e10eea460eb606@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/673adf75.050a0220.87769.0024.GAE@google.com/ Fixes: 3a5465418f5f ("perf: Fix event leak upon exec and file release") Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250304135446.18905-1-frederic@kernel.org --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5a9bf15d4461..0069ba6866a4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -823,7 +823,6 @@ struct perf_event { struct irq_work pending_disable_irq; struct callback_head pending_task; unsigned int pending_work; - struct rcuwait pending_work_wait; atomic_t event_limit; -- cgit v1.2.3 From d1be0cf3b8aeae75bc8fff5b7a3e01ebfe276008 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 27 Mar 2025 16:33:01 +0100 Subject: kunit: Spelling s/slowm/slow/ Fix a misspelling of "slow". Link: https://lore.kernel.org/r/1f7ebf98598418914ec9f5b6d5cb8583d24a4bf0.1743089563.git.geert@linux-m68k.org Signed-off-by: Geert Uytterhoeven Reviewed-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/test.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index 0ffb97c78566..39c768f87dc9 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -67,7 +67,7 @@ enum kunit_status { /* * Speed Attribute is stored as an enum and separated into categories of - * speed: very_slowm, slow, and normal. These speeds are relative to + * speed: very_slow, slow, and normal. These speeds are relative to * other KUnit tests. * * Note: unset speed attribute acts as default of KUNIT_SPEED_NORMAL. -- cgit v1.2.3 From 2424e146bee00ddb4d4f79d3224f54634ca8d2bc Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 8 Apr 2025 12:38:54 +0200 Subject: hrtimer: Add missing ACCESS_PRIVATE() for hrtimer::function The "function" field of struct hrtimer has been changed to private, but two instances have not been converted to use ACCESS_PRIVATE(). Convert them to use ACCESS_PRIVATE(). Fixes: 04257da0c99c ("hrtimers: Make callback function pointer private") Reported-by: kernel test robot Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250408103854.1851093-1-namcao@linutronix.de Closes: https://lore.kernel.org/oe-kbuild-all/202504071931.vOVl13tt-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202504072155.5UAZjYGU-lkp@intel.com/ --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1adcba3ddd76..1ef867bb8c44 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -345,7 +345,7 @@ static inline void hrtimer_update_function(struct hrtimer *timer, if (WARN_ON_ONCE(!function)) return; #endif - timer->function = function; + ACCESS_PRIVATE(timer, function) = function; } /* Forward a hrtimer so it expires after now: */ -- cgit v1.2.3 From 0bb2f7a1ad1f11d861f58e5ee5051c8974ff9569 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 7 Apr 2025 09:33:11 -0700 Subject: net: Fix null-ptr-deref by sock_lock_init_class_and_name() and rmmod. When I ran the repro [0] and waited a few seconds, I observed two LOCKDEP splats: a warning immediately followed by a null-ptr-deref. [1] Reproduction Steps: 1) Mount CIFS 2) Add an iptables rule to drop incoming FIN packets for CIFS 3) Unmount CIFS 4) Unload the CIFS module 5) Remove the iptables rule At step 3), the CIFS module calls sock_release() for the underlying TCP socket, and it returns quickly. However, the socket remains in FIN_WAIT_1 because incoming FIN packets are dropped. At this point, the module's refcnt is 0 while the socket is still alive, so the following rmmod command succeeds. # ss -tan State Recv-Q Send-Q Local Address:Port Peer Address:Port FIN-WAIT-1 0 477 10.0.2.15:51062 10.0.0.137:445 # lsmod | grep cifs cifs 1159168 0 This highlights a discrepancy between the lifetime of the CIFS module and the underlying TCP socket. Even after CIFS calls sock_release() and it returns, the TCP socket does not die immediately in order to close the connection gracefully. While this is generally fine, it causes an issue with LOCKDEP because CIFS assigns a different lock class to the TCP socket's sk->sk_lock using sock_lock_init_class_and_name(). Once an incoming packet is processed for the socket or a timer fires, sk->sk_lock is acquired. Then, LOCKDEP checks the lock context in check_wait_context(), where hlock_class() is called to retrieve the lock class. However, since the module has already been unloaded, hlock_class() logs a warning and returns NULL, triggering the null-ptr-deref. If LOCKDEP is enabled, we must ensure that a module calling sock_lock_init_class_and_name() (CIFS, NFS, etc) cannot be unloaded while such a socket is still alive to prevent this issue. Let's hold the module reference in sock_lock_init_class_and_name() and release it when the socket is freed in sk_prot_free(). Note that sock_lock_init() clears sk->sk_owner for svc_create_socket() that calls sock_lock_init_class_and_name() for a listening socket, which clones a socket by sk_clone_lock() without GFP_ZERO. [0]: CIFS_SERVER="10.0.0.137" CIFS_PATH="//${CIFS_SERVER}/Users/Administrator/Desktop/CIFS_TEST" DEV="enp0s3" CRED="/root/WindowsCredential.txt" MNT=$(mktemp -d /tmp/XXXXXX) mount -t cifs ${CIFS_PATH} ${MNT} -o vers=3.0,credentials=${CRED},cache=none,echo_interval=1 iptables -A INPUT -s ${CIFS_SERVER} -j DROP for i in $(seq 10); do umount ${MNT} rmmod cifs sleep 1 done rm -r ${MNT} iptables -D INPUT -s ${CIFS_SERVER} -j DROP [1]: DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 10 PID: 0 at kernel/locking/lockdep.c:234 hlock_class (kernel/locking/lockdep.c:234 kernel/locking/lockdep.c:223) Modules linked in: cifs_arc4 nls_ucs2_utils cifs_md4 [last unloaded: cifs] CPU: 10 UID: 0 PID: 0 Comm: swapper/10 Not tainted 6.14.0 #36 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:hlock_class (kernel/locking/lockdep.c:234 kernel/locking/lockdep.c:223) ... Call Trace: __lock_acquire (kernel/locking/lockdep.c:4853 kernel/locking/lockdep.c:5178) lock_acquire (kernel/locking/lockdep.c:469 kernel/locking/lockdep.c:5853 kernel/locking/lockdep.c:5816) _raw_spin_lock_nested (kernel/locking/spinlock.c:379) tcp_v4_rcv (./include/linux/skbuff.h:1678 ./include/net/tcp.h:2547 net/ipv4/tcp_ipv4.c:2350) ... BUG: kernel NULL pointer dereference, address: 00000000000000c4 PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 0 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 10 UID: 0 PID: 0 Comm: swapper/10 Tainted: G W 6.14.0 #36 Tainted: [W]=WARN Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:__lock_acquire (kernel/locking/lockdep.c:4852 kernel/locking/lockdep.c:5178) Code: 15 41 09 c7 41 8b 44 24 20 25 ff 1f 00 00 41 09 c7 8b 84 24 a0 00 00 00 45 89 7c 24 20 41 89 44 24 24 e8 e1 bc ff ff 4c 89 e7 <44> 0f b6 b8 c4 00 00 00 e8 d1 bc ff ff 0f b6 80 c5 00 00 00 88 44 RSP: 0018:ffa0000000468a10 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ff1100010091cc38 RCX: 0000000000000027 RDX: ff1100081f09ca48 RSI: 0000000000000001 RDI: ff1100010091cc88 RBP: ff1100010091c200 R08: ff1100083fe6e228 R09: 00000000ffffbfff R10: ff1100081eca0000 R11: ff1100083fe10dc0 R12: ff1100010091cc88 R13: 0000000000000001 R14: 0000000000000000 R15: 00000000000424b1 FS: 0000000000000000(0000) GS:ff1100081f080000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000c4 CR3: 0000000002c4a003 CR4: 0000000000771ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: lock_acquire (kernel/locking/lockdep.c:469 kernel/locking/lockdep.c:5853 kernel/locking/lockdep.c:5816) _raw_spin_lock_nested (kernel/locking/spinlock.c:379) tcp_v4_rcv (./include/linux/skbuff.h:1678 ./include/net/tcp.h:2547 net/ipv4/tcp_ipv4.c:2350) ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205 (discriminator 1)) ip_local_deliver_finish (./include/linux/rcupdate.h:878 net/ipv4/ip_input.c:234) ip_sublist_rcv_finish (net/ipv4/ip_input.c:576) ip_list_rcv_finish (net/ipv4/ip_input.c:628) ip_list_rcv (net/ipv4/ip_input.c:670) __netif_receive_skb_list_core (net/core/dev.c:5939 net/core/dev.c:5986) netif_receive_skb_list_internal (net/core/dev.c:6040 net/core/dev.c:6129) napi_complete_done (./include/linux/list.h:37 ./include/net/gro.h:519 ./include/net/gro.h:514 net/core/dev.c:6496) e1000_clean (drivers/net/ethernet/intel/e1000/e1000_main.c:3815) __napi_poll.constprop.0 (net/core/dev.c:7191) net_rx_action (net/core/dev.c:7262 net/core/dev.c:7382) handle_softirqs (kernel/softirq.c:561) __irq_exit_rcu (kernel/softirq.c:596 kernel/softirq.c:435 kernel/softirq.c:662) irq_exit_rcu (kernel/softirq.c:680) common_interrupt (arch/x86/kernel/irq.c:280 (discriminator 14)) asm_common_interrupt (./arch/x86/include/asm/idtentry.h:693) RIP: 0010:default_idle (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:92 arch/x86/kernel/process.c:744) Code: 4c 01 c7 4c 29 c2 e9 72 ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa eb 07 0f 00 2d c3 2b 15 00 fb f4 c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 90 90 90 90 90 RSP: 0018:ffa00000000ffee8 EFLAGS: 00000202 RAX: 000000000000640b RBX: ff1100010091c200 RCX: 0000000000061aa4 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff812f30c5 RBP: 000000000000000a R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000002 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? do_idle (kernel/sched/idle.c:186 kernel/sched/idle.c:325) default_idle_call (./include/linux/cpuidle.h:143 kernel/sched/idle.c:118) do_idle (kernel/sched/idle.c:186 kernel/sched/idle.c:325) cpu_startup_entry (kernel/sched/idle.c:422 (discriminator 1)) start_secondary (arch/x86/kernel/smpboot.c:315) common_startup_64 (arch/x86/kernel/head_64.S:421) Modules linked in: cifs_arc4 nls_ucs2_utils cifs_md4 [last unloaded: cifs] CR2: 00000000000000c4 Fixes: ed07536ed673 ("[PATCH] lockdep: annotate nfs/nfsd in-kernel sockets") Signed-off-by: Kuniyuki Iwashima Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250407163313.22682-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 8daf1b3b12c6..694f954258d4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -339,6 +339,8 @@ struct sk_filter; * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference * @sk_user_frags: xarray of pages the user is holding a reference on. + * @sk_owner: reference to the real owner of the socket that calls + * sock_lock_init_class_and_name(). */ struct sock { /* @@ -547,6 +549,10 @@ struct sock { struct rcu_head sk_rcu; netns_tracker ns_tracker; struct xarray sk_user_frags; + +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) + struct module *sk_owner; +#endif }; struct sock_bh_locked { @@ -1583,6 +1589,35 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) sk_mem_reclaim(sk); } +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) +static inline void sk_owner_set(struct sock *sk, struct module *owner) +{ + __module_get(owner); + sk->sk_owner = owner; +} + +static inline void sk_owner_clear(struct sock *sk) +{ + sk->sk_owner = NULL; +} + +static inline void sk_owner_put(struct sock *sk) +{ + module_put(sk->sk_owner); +} +#else +static inline void sk_owner_set(struct sock *sk, struct module *owner) +{ +} + +static inline void sk_owner_clear(struct sock *sk) +{ +} + +static inline void sk_owner_put(struct sock *sk) +{ +} +#endif /* * Macro so as to not evaluate some arguments when * lockdep is not enabled. @@ -1592,13 +1627,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) */ #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ do { \ + sk_owner_set(sk, THIS_MODULE); \ sk->sk_lock.owned = 0; \ init_waitqueue_head(&sk->sk_lock.wq); \ spin_lock_init(&(sk)->sk_lock.slock); \ debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ - sizeof((sk)->sk_lock)); \ + sizeof((sk)->sk_lock)); \ lockdep_set_class_and_name(&(sk)->sk_lock.slock, \ - (skey), (sname)); \ + (skey), (sname)); \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) -- cgit v1.2.3 From acea9943271b62905033f2f8ca571cdd52d6ea7b Mon Sep 17 00:00:00 2001 From: Peng Jiang Date: Mon, 24 Mar 2025 19:12:30 +0800 Subject: vdso: Address variable shadowing in macros Compiling the kernel with gcc12.3 W=2 results in shadowing warnings: warning: declaration of '__pptr' shadows a previous local [-Wshadow] const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); note: in definition of macro '__put_unaligned_t' __pptr->x = (val); note: in expansion of macro '__get_unaligned_t' __put_unaligned_t(type, __get_unaligned_t(type, src), dst); __get_unaligned_t() and __put_unaligned_t() use a local variable named '__pptr', which can lead to variable shadowing when these macros are used in the same scope. This results in a -Wshadow warning during compilation. To address this issue, rename the local variables within the macros to ensure uniqueness. Signed-off-by: Peng Jiang Signed-off-by: Shao Mingyin Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250324191230477zpGtgIRSH4mEHdtxGtgx9@zte.com.cn --- include/vdso/unaligned.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/vdso/unaligned.h b/include/vdso/unaligned.h index eee3d2a4dbe4..ff0c06b6513e 100644 --- a/include/vdso/unaligned.h +++ b/include/vdso/unaligned.h @@ -2,14 +2,14 @@ #ifndef __VDSO_UNALIGNED_H #define __VDSO_UNALIGNED_H -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed * __get_pptr = (typeof(__get_pptr))(ptr); \ + __get_pptr->x; \ }) -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed * __put_pptr = (typeof(__put_pptr))(ptr); \ + __put_pptr->x = (val); \ } while (0) #endif /* __VDSO_UNALIGNED_H */ -- cgit v1.2.3 From 285b2c74cf9982e873ef82a2cb1328d9e9406f65 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 10 Apr 2025 14:21:29 +0100 Subject: firmware: cs_dsp: test_bin_error: Fix uninitialized data used as fw version Call cs_dsp_mock_xm_header_get_fw_version() to get the firmware version from the dummy XM header data in cs_dsp_bin_err_test_common_init(). Make the same change to cs_dsp_bin_test_common_init() and remove the cs_dsp_mock_xm_header_get_fw_version_from_regmap() function. The code in cs_dsp_test_bin.c was correctly calling cs_dsp_mock_xm_header_get_fw_version_from_regmap() to fetch the fw version from a dummy header it wrote to XM registers. However in cs_dsp_test_bin_error.c the test doesn't stuff a dummy header into XM, it populates it the normal way using a wmfw file. It should have called cs_dsp_mock_xm_header_get_fw_version() to get the data from its blob buffer, but was calling cs_dsp_mock_xm_header_get_fw_version_from_regmap(). As nothing had been written to the registers this returned the value of uninitialized data. The only other use of cs_dsp_mock_xm_header_get_fw_version_from_regmap() was cs_dsp_test_bin.c, but it doesn't need to use it. It already has a blob buffer containing the dummy XM header so it can use cs_dsp_mock_xm_header_get_fw_version() to read from that. Fixes: cd8c058499b6 ("firmware: cs_dsp: Add KUnit testing of bin error cases") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250410132129.1312541-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/cs_dsp_test_utils.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/firmware/cirrus/cs_dsp_test_utils.h b/include/linux/firmware/cirrus/cs_dsp_test_utils.h index 4f87a908ab4f..ecd821ed8064 100644 --- a/include/linux/firmware/cirrus/cs_dsp_test_utils.h +++ b/include/linux/firmware/cirrus/cs_dsp_test_utils.h @@ -104,7 +104,6 @@ unsigned int cs_dsp_mock_num_dsp_words_to_num_packed_regs(unsigned int num_dsp_w unsigned int cs_dsp_mock_xm_header_get_alg_base_in_words(struct cs_dsp_test *priv, unsigned int alg_id, int mem_type); -unsigned int cs_dsp_mock_xm_header_get_fw_version_from_regmap(struct cs_dsp_test *priv); unsigned int cs_dsp_mock_xm_header_get_fw_version(struct cs_dsp_mock_xm_header *header); void cs_dsp_mock_xm_header_drop_from_regmap_cache(struct cs_dsp_test *priv); int cs_dsp_mock_xm_header_write_to_regmap(struct cs_dsp_mock_xm_header *header); -- cgit v1.2.3 From 6c2b75404d33caa46a582f2791a70f92232adb71 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Tue, 1 Apr 2025 17:59:11 +0200 Subject: accel/ivpu: Fix the NPU's DPU frequency calculation Fix the frequency returned to the user space by the DRM_IVPU_PARAM_CORE_CLOCK_RATE GET_PARAM IOCTL. The kernel driver returned CPU frequency for MTL and bare PLL frequency for LNL - this was inconsistent and incorrect for both platforms. With this fix the driver returns maximum frequency of the NPU data processing unit (DPU) for all HW generations. This is what user space always expected. Also do not set CPU frequency in boot params - the firmware does not use frequency passed from the driver, it was only used by the early pre-production firmware. With that we can remove CPU frequency calculation code. Show NPU frequency in FREQ_CHANGE interrupt when frequency tracking is enabled. Fixes: 8a27ad81f7d3 ("accel/ivpu: Split IP and buttress code") Cc: stable@vger.kernel.org # v6.11+ Signed-off-by: Andrzej Kacprowski Signed-off-by: Maciej Falkowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250401155912.4049340-2-maciej.falkowski@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 746c43bd3eb6..2f24103f4533 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __UAPI_IVPU_DRM_H__ @@ -147,7 +147,7 @@ struct drm_ivpu_param { * platform type when executing on a simulator or emulator (read-only) * * %DRM_IVPU_PARAM_CORE_CLOCK_RATE: - * Current PLL frequency (read-only) + * Maximum frequency of the NPU data processing unit clock (read-only) * * %DRM_IVPU_PARAM_NUM_CONTEXTS: * Maximum number of simultaneously existing contexts (read-only) -- cgit v1.2.3 From b2b4483b5d05026218127fc8f38c69adf69c235b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Apr 2025 13:00:53 -0700 Subject: dcache: convert dentry flag macros to enum Commit 9748cb2dc393 ("VFS: repack DENTRY_ flags.") changed the value of DCACHE_MOUNTED, which broke drgn's path_lookup() helper. drgn is forced to hard-code it because it's a macro, and macros aren't preserved in debugging information by default. Enums, on the other hand, are included in debugging information. Convert the DCACHE_* flag macros to an enum so that debugging tools like drgn and bpftrace can make use of them. Link: https://github.com/osandov/drgn/blob/2027d0fea84d74b835e77392f7040c2a333180c6/drgn/helpers/linux/fs.py#L43-L46 Signed-off-by: Omar Sandoval Link: https://lore.kernel.org/177665a082f048cf536b9cd6af467b3be6b6e6ed.1744141838.git.osandov@fb.com Signed-off-by: Christian Brauner --- include/linux/dcache.h | 106 +++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8d1395f945bf..e9f07e37dd6f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -173,65 +173,59 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_OP_HASH BIT(0) -#define DCACHE_OP_COMPARE BIT(1) -#define DCACHE_OP_REVALIDATE BIT(2) -#define DCACHE_OP_DELETE BIT(3) -#define DCACHE_OP_PRUNE BIT(4) - -#define DCACHE_DISCONNECTED BIT(5) - /* This dentry is possibly not currently connected to the dcache tree, in - * which case its parent will either be itself, or will have this flag as - * well. nfsd will not use a dentry with this bit set, but will first - * endeavour to clear the bit either by discovering that it is connected, - * or by performing lookup operations. Any filesystem which supports - * nfsd_operations MUST have a lookup function which, if it finds a - * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that - * dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. */ - -#define DCACHE_REFERENCED BIT(6) /* Recently used, don't discard. */ - -#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ - -#define DCACHE_CANT_MOUNT BIT(8) -#define DCACHE_GENOCIDE BIT(9) -#define DCACHE_SHRINK_LIST BIT(10) - -#define DCACHE_OP_WEAK_REVALIDATE BIT(11) - -#define DCACHE_NFSFS_RENAMED BIT(12) - /* this dentry has been "silly renamed" and has to be deleted on the last - * dput() */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(13) - /* Parent inode is watched by some fsnotify listener */ - -#define DCACHE_DENTRY_KILLED BIT(14) - -#define DCACHE_MOUNTED BIT(15) /* is a mountpoint */ -#define DCACHE_NEED_AUTOMOUNT BIT(16) /* handle automount on this dir */ -#define DCACHE_MANAGE_TRANSIT BIT(17) /* manage transit from this dirent */ +enum dentry_flags { + DCACHE_OP_HASH = BIT(0), + DCACHE_OP_COMPARE = BIT(1), + DCACHE_OP_REVALIDATE = BIT(2), + DCACHE_OP_DELETE = BIT(3), + DCACHE_OP_PRUNE = BIT(4), + /* + * This dentry is possibly not currently connected to the dcache tree, + * in which case its parent will either be itself, or will have this + * flag as well. nfsd will not use a dentry with this bit set, but will + * first endeavour to clear the bit either by discovering that it is + * connected, or by performing lookup operations. Any filesystem which + * supports nfsd_operations MUST have a lookup function which, if it + * finds a directory inode with a DCACHE_DISCONNECTED dentry, will + * d_move that dentry into place and return that dentry rather than the + * passed one, typically using d_splice_alias. + */ + DCACHE_DISCONNECTED = BIT(5), + DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ + DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ + DCACHE_CANT_MOUNT = BIT(8), + DCACHE_GENOCIDE = BIT(9), + DCACHE_SHRINK_LIST = BIT(10), + DCACHE_OP_WEAK_REVALIDATE = BIT(11), + /* + * this dentry has been "silly renamed" and has to be deleted on the + * last dput() + */ + DCACHE_NFSFS_RENAMED = BIT(12), + DCACHE_FSNOTIFY_PARENT_WATCHED = BIT(13), /* Parent inode is watched by some fsnotify listener */ + DCACHE_DENTRY_KILLED = BIT(14), + DCACHE_MOUNTED = BIT(15), /* is a mountpoint */ + DCACHE_NEED_AUTOMOUNT = BIT(16), /* handle automount on this dir */ + DCACHE_MANAGE_TRANSIT = BIT(17), /* manage transit from this dirent */ + DCACHE_LRU_LIST = BIT(18), + DCACHE_ENTRY_TYPE = (7 << 19), /* bits 19..21 are for storing type: */ + DCACHE_MISS_TYPE = (0 << 19), /* Negative dentry */ + DCACHE_WHITEOUT_TYPE = (1 << 19), /* Whiteout dentry (stop pathwalk) */ + DCACHE_DIRECTORY_TYPE = (2 << 19), /* Normal directory */ + DCACHE_AUTODIR_TYPE = (3 << 19), /* Lookupless directory (presumed automount) */ + DCACHE_REGULAR_TYPE = (4 << 19), /* Regular file type */ + DCACHE_SPECIAL_TYPE = (5 << 19), /* Other file type */ + DCACHE_SYMLINK_TYPE = (6 << 19), /* Symlink */ + DCACHE_NOKEY_NAME = BIT(22), /* Encrypted name encoded without key */ + DCACHE_OP_REAL = BIT(23), + DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ + DCACHE_DENTRY_CURSOR = BIT(25), + DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ +}; + #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -#define DCACHE_LRU_LIST BIT(18) - -#define DCACHE_ENTRY_TYPE (7 << 19) /* bits 19..21 are for storing type: */ -#define DCACHE_MISS_TYPE (0 << 19) /* Negative dentry */ -#define DCACHE_WHITEOUT_TYPE (1 << 19) /* Whiteout dentry (stop pathwalk) */ -#define DCACHE_DIRECTORY_TYPE (2 << 19) /* Normal directory */ -#define DCACHE_AUTODIR_TYPE (3 << 19) /* Lookupless directory (presumed automount) */ -#define DCACHE_REGULAR_TYPE (4 << 19) /* Regular file type */ -#define DCACHE_SPECIAL_TYPE (5 << 19) /* Other file type */ -#define DCACHE_SYMLINK_TYPE (6 << 19) /* Symlink */ - -#define DCACHE_NOKEY_NAME BIT(22) /* Encrypted name encoded without key */ -#define DCACHE_OP_REAL BIT(23) - -#define DCACHE_PAR_LOOKUP BIT(24) /* being looked up (with parent locked shared) */ -#define DCACHE_DENTRY_CURSOR BIT(25) -#define DCACHE_NORCU BIT(26) /* No RCU delay for freeing */ - extern seqlock_t rename_lock; /* -- cgit v1.2.3 From 7bdd8f75d16557ee4111c7b2678463cabf0f04c3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 10 Apr 2025 11:27:40 -0700 Subject: fwctl/cxl: Fix uuid_t usage in uapi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The uuid_t type is kernel internal, and Paul reports the following build error when it is used in a uapi header: usr/include/cxl/features.h:59:9: error: unknown type name ‘uuid_t’ Create a uuid type (__uapi_uuid_t) compatible with the longstanding definition uuid/uuid.h for userspace builds, and use uuid_t directly for kernel builds. Fixes: 9b8e73cdb141 ("cxl: Move cxl feature command structs to user header") Link: https://patch.msgid.link/r/174430961702.617339.13963021112051029933.stgit@dwillia2-xfh.jf.intel.com Suggested-by: Jason Gunthorpe Reported-by: Paul E. McKenney Closes: http://lore.kernel.org/f6489337-67c7-48c8-b48a-58603ec15328@paulmck-laptop Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504050434.Eb4vugh5-lkp@intel.com/ Signed-off-by: Dan Williams Reviewed-by: Dave Jiang Tested-by: Paul E. McKenney Signed-off-by: Jason Gunthorpe --- include/uapi/cxl/features.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/cxl/features.h b/include/uapi/cxl/features.h index d6db8984889f..490606d7694b 100644 --- a/include/uapi/cxl/features.h +++ b/include/uapi/cxl/features.h @@ -8,10 +8,19 @@ #define _UAPI_CXL_FEATURES_H_ #include -#ifndef __KERNEL__ -#include -#else + +typedef unsigned char __uapi_uuid_t[16]; + +#ifdef __KERNEL__ #include +/* + * Note, __uapi_uuid_t is 1-byte aligned on modern compilers and 4-byte + * aligned on others. Ensure that __uapi_uuid_t in a struct is placed at + * a 4-byte aligned offset, or the structure is packed, to ensure + * consistent padding. + */ +static_assert(sizeof(__uapi_uuid_t) == sizeof(uuid_t)); +#define __uapi_uuid_t uuid_t #endif /* @@ -60,7 +69,7 @@ struct cxl_mbox_get_sup_feats_in { * Get Supported Features Supported Feature Entry */ struct cxl_feat_entry { - uuid_t uuid; + __uapi_uuid_t uuid; __le16 id; __le16 get_feat_size; __le16 set_feat_size; @@ -110,7 +119,7 @@ struct cxl_mbox_get_sup_feats_out { * CXL spec r3.2 section 8.2.9.6.2 Table 8-99 */ struct cxl_mbox_get_feat_in { - uuid_t uuid; + __uapi_uuid_t uuid; __le16 offset; __le16 count; __u8 selection; @@ -143,7 +152,7 @@ enum cxl_get_feat_selection { */ struct cxl_mbox_set_feat_in { __struct_group(cxl_mbox_set_feat_hdr, hdr, /* no attrs */, - uuid_t uuid; + __uapi_uuid_t uuid; __le32 flags; __le16 offset; __u8 version; -- cgit v1.2.3 From 51339d99c0131bc0d16d378e9b05bc498d2967e2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 2 Apr 2025 19:55:14 -0700 Subject: locking/local_lock, mm: replace localtry_ helpers with local_trylock_t type Partially revert commit 0aaddfb06882 ("locking/local_lock: Introduce localtry_lock_t"). Remove localtry_*() helpers, since localtry_lock() name might be misinterpreted as "try lock". Introduce local_trylock[_irqsave]() helpers that only work with newly introduced local_trylock_t type. Note that attempt to use local_trylock[_irqsave]() with local_lock_t will cause compilation failure. Usage and behavior in !PREEMPT_RT: local_lock_t lock; // sizeof(lock) == 0 local_lock(&lock); // preempt disable local_lock_irqsave(&lock, ...); // irq save if (local_trylock_irqsave(&lock, ...)) // compilation error local_trylock_t lock; // sizeof(lock) == 4 local_lock(&lock); // preempt disable, acquired = 1 local_lock_irqsave(&lock, ...); // irq save, acquired = 1 if (local_trylock(&lock)) // if (!acquired) preempt disable, acquired = 1 if (local_trylock_irqsave(&lock, ...)) // if (!acquired) irq save, acquired = 1 The existing local_lock_*() macros can be used either with local_lock_t or local_trylock_t. With local_trylock_t they set acquired = 1 while local_unlock_*() clears it. In !PREEMPT_RT local_lock_irqsave(local_lock_t *) disables interrupts to protect critical section, but it doesn't prevent NMI, so the fully reentrant code cannot use local_lock_irqsave(local_lock_t *) for exclusive access. The local_lock_irqsave(local_trylock_t *) helper disables interrupts and sets acquired=1, so local_trylock_irqsave(local_trylock_t *) from NMI attempting to acquire the same lock will return false. In PREEMPT_RT local_lock_irqsave() maps to preemptible spin_lock(). Map local_trylock_irqsave() to preemptible spin_trylock(). When in hard IRQ or NMI return false right away, since spin_trylock() is not safe due to explicit locking in the underneath rt_spin_trylock() implementation. Removing this explicit locking and attempting only "trylock" is undesired due to PI implications. The local_trylock() without _irqsave can be used to avoid the cost of disabling/enabling interrupts by only disabling preemption, so local_trylock() in an interrupt attempting to acquire the same lock will return false. Note there is no need to use local_inc for acquired variable, since it's a percpu variable with strict nesting scopes. Note that guard(local_lock)(&lock) works only for "local_lock_t lock". The patch also makes sure that local_lock_release(l) is called before WRITE_ONCE(l->acquired, 0). Though IRQs are disabled at this point the local_trylock() from NMI will succeed and local_lock_acquire(l) will warn. Link: https://lkml.kernel.org/r/20250403025514.41186-1-alexei.starovoitov@gmail.com Fixes: 0aaddfb06882 ("locking/local_lock: Introduce localtry_lock_t") Signed-off-by: Alexei Starovoitov Acked-by: Vlastimil Babka Acked-by: Sebastian Andrzej Siewior Reviewed-by: Shakeel Butt Cc: Daniel Borkman Cc: Linus Torvalds Cc: Martin KaFai Lau Cc: Michal Hocko Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/local_lock.h | 58 ++-------- include/linux/local_lock_internal.h | 207 +++++++++++++++--------------------- 2 files changed, 95 insertions(+), 170 deletions(-) (limited to 'include') diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 1a0bc35839e3..16a2ee4f8310 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -52,44 +52,23 @@ __local_unlock_irqrestore(lock, flags) /** - * localtry_lock_init - Runtime initialize a lock instance - */ -#define localtry_lock_init(lock) __localtry_lock_init(lock) - -/** - * localtry_lock - Acquire a per CPU local lock - * @lock: The lock variable - */ -#define localtry_lock(lock) __localtry_lock(lock) - -/** - * localtry_lock_irq - Acquire a per CPU local lock and disable interrupts - * @lock: The lock variable - */ -#define localtry_lock_irq(lock) __localtry_lock_irq(lock) - -/** - * localtry_lock_irqsave - Acquire a per CPU local lock, save and disable - * interrupts - * @lock: The lock variable - * @flags: Storage for interrupt flags + * local_lock_init - Runtime initialize a lock instance */ -#define localtry_lock_irqsave(lock, flags) \ - __localtry_lock_irqsave(lock, flags) +#define local_trylock_init(lock) __local_trylock_init(lock) /** - * localtry_trylock - Try to acquire a per CPU local lock. + * local_trylock - Try to acquire a per CPU local lock * @lock: The lock variable * * The function can be used in any context such as NMI or HARDIRQ. Due to * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define localtry_trylock(lock) __localtry_trylock(lock) +#define local_trylock(lock) __local_trylock(lock) /** - * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable - * interrupts if acquired + * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable + * interrupts if acquired * @lock: The lock variable * @flags: Storage for interrupt flags * @@ -97,29 +76,8 @@ * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define localtry_trylock_irqsave(lock, flags) \ - __localtry_trylock_irqsave(lock, flags) - -/** - * local_unlock - Release a per CPU local lock - * @lock: The lock variable - */ -#define localtry_unlock(lock) __localtry_unlock(lock) - -/** - * local_unlock_irq - Release a per CPU local lock and enable interrupts - * @lock: The lock variable - */ -#define localtry_unlock_irq(lock) __localtry_unlock_irq(lock) - -/** - * localtry_unlock_irqrestore - Release a per CPU local lock and restore - * interrupt flags - * @lock: The lock variable - * @flags: Interrupt flags to restore - */ -#define localtry_unlock_irqrestore(lock, flags) \ - __localtry_unlock_irqrestore(lock, flags) +#define local_trylock_irqsave(lock, flags) \ + __local_trylock_irqsave(lock, flags) DEFINE_GUARD(local_lock, local_lock_t __percpu*, local_lock(_T), diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 67bd13d142fa..bf2bf40d7b18 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -15,10 +15,11 @@ typedef struct { #endif } local_lock_t; +/* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ typedef struct { local_lock_t llock; - unsigned int acquired; -} localtry_lock_t; + u8 acquired; +} local_trylock_t; #ifdef CONFIG_DEBUG_LOCK_ALLOC # define LOCAL_LOCK_DEBUG_INIT(lockname) \ @@ -29,6 +30,9 @@ typedef struct { }, \ .owner = NULL, +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \ + .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) }, + static inline void local_lock_acquire(local_lock_t *l) { lock_map_acquire(&l->dep_map); @@ -56,6 +60,7 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } static inline void local_trylock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } @@ -63,7 +68,7 @@ static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } -#define INIT_LOCALTRY_LOCK(lockname) { .llock = { LOCAL_LOCK_DEBUG_INIT(lockname.llock) }} +#define INIT_LOCAL_TRYLOCK(lockname) { LOCAL_TRYLOCK_DEBUG_INIT(lockname) } #define __local_lock_init(lock) \ do { \ @@ -76,6 +81,8 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_trylock_init(lock) __local_lock_init(lock.llock) + #define __spinlock_nested_bh_init(lock) \ do { \ static struct lock_class_key __key; \ @@ -87,149 +94,117 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_lock_acquire(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + _Generic((lock), \ + local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 0); \ + WRITE_ONCE(tl->acquired, 1); \ + }), \ + default:(void)0); \ + local_lock_acquire(l); \ + } while (0) + #define __local_lock(lock) \ do { \ preempt_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irq(lock) \ do { \ local_irq_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irqsave(lock, flags) \ do { \ local_irq_save(flags); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - preempt_enable(); \ + __local_lock_acquire(lock); \ } while (0) -#define __local_unlock_irq(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_enable(); \ - } while (0) - -#define __local_unlock_irqrestore(lock, flags) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_restore(flags); \ - } while (0) - -#define __local_lock_nested_bh(lock) \ - do { \ - lockdep_assert_in_softirq(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock_nested_bh(lock) \ - local_lock_release(this_cpu_ptr(lock)) - -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) \ -do { \ - __local_lock_init(&(lock)->llock); \ - WRITE_ONCE((lock)->acquired, 0); \ -} while (0) - -#define __localtry_lock(lock) \ - do { \ - localtry_lock_t *lt; \ - preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irq(lock) \ - do { \ - localtry_lock_t *lt; \ - local_irq_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irqsave(lock, flags) \ - do { \ - localtry_lock_t *lt; \ - local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ preempt_enable(); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ local_irq_restore(flags); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_unlock(lock) \ +#define __local_lock_release(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + local_lock_release(l); \ + _Generic((lock), \ + local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 1); \ + WRITE_ONCE(tl->acquired, 0); \ + }), \ + default:(void)0); \ + } while (0) + +#define __local_unlock(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ preempt_enable(); \ } while (0) -#define __localtry_unlock_irq(lock) \ +#define __local_unlock_irq(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_enable(); \ } while (0) -#define __localtry_unlock_irqrestore(lock, flags) \ +#define __local_unlock_irqrestore(lock, flags) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_restore(flags); \ } while (0) +#define __local_lock_nested_bh(lock) \ + do { \ + lockdep_assert_in_softirq(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +#define __local_unlock_nested_bh(lock) \ + local_lock_release(this_cpu_ptr(lock)) + #else /* !CONFIG_PREEMPT_RT */ /* @@ -237,16 +212,18 @@ do { \ * critical section while staying preemptible. */ typedef spinlock_t local_lock_t; -typedef spinlock_t localtry_lock_t; +typedef spinlock_t local_trylock_t; #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) -#define INIT_LOCALTRY_LOCK(lockname) INIT_LOCAL_LOCK(lockname) +#define INIT_LOCAL_TRYLOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) #define __local_lock_init(l) \ do { \ local_spin_lock_init((l)); \ } while (0) +#define __local_trylock_init(l) __local_lock_init(l) + #define __local_lock(__lock) \ do { \ migrate_disable(); \ @@ -283,17 +260,7 @@ do { \ spin_unlock(this_cpu_ptr((lock))); \ } while (0) -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) __local_lock_init(lock) -#define __localtry_lock(lock) __local_lock(lock) -#define __localtry_lock_irq(lock) __local_lock(lock) -#define __localtry_lock_irqsave(lock, flags) __local_lock_irqsave(lock, flags) -#define __localtry_unlock(lock) __local_unlock(lock) -#define __localtry_unlock_irq(lock) __local_unlock(lock) -#define __localtry_unlock_irqrestore(lock, flags) __local_unlock_irqrestore(lock, flags) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ int __locked; \ \ @@ -308,11 +275,11 @@ do { \ __locked; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ typecheck(unsigned long, flags); \ flags = 0; \ - __localtry_trylock(lock); \ + __local_trylock(lock); \ }) #endif /* CONFIG_PREEMPT_RT */ -- cgit v1.2.3 From 8c56c5dbcf52220cc9be7a36e7f21ebd5939e0b9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 8 Apr 2025 10:59:50 +0200 Subject: mm: (un)track_pfn_copy() fix + doc improvements We got a late smatch warning and some additional review feedback. smatch warnings: mm/memory.c:1428 copy_page_range() error: uninitialized symbol 'pfn'. We actually use the pfn only when it is properly initialized; however, we may pass an uninitialized value to a function -- although it will not use it that likely still is UB in C. So let's just fix it by always initializing pfn in the caller of track_pfn_copy(), and improving the documentation of track_pfn_copy(). While at it, clarify the doc of untrack_pfn_copy(), that internal checks make sure if we actually have to untrack anything. Link: https://lkml.kernel.org/r/20250408085950.976103-1-david@redhat.com Fixes: dc84bc2aba85 ("x86/mm/pat: Fix VM_PAT handling when fork() fails in copy_page_range()") Signed-off-by: David Hildenbrand Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202503270941.IFILyNCX-lkp@intel.com/ Reviewed-by: Lorenzo Stoakes Acked-by: Ingo Molnar Cc: Andrew Morton Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: Rik van Riel Cc: "H. Peter Anvin" Cc: Linus Torvalds Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e2b705c14945..b50447ef1c92 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1511,8 +1511,9 @@ static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, /* * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page - * tables copied during copy_page_range(). On success, stores the pfn to be - * passed to untrack_pfn_copy(). + * tables copied during copy_page_range(). Will store the pfn to be + * passed to untrack_pfn_copy() only if there is something to be untracked. + * Callers should initialize the pfn to 0. */ static inline int track_pfn_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, unsigned long *pfn) @@ -1522,7 +1523,9 @@ static inline int track_pfn_copy(struct vm_area_struct *dst_vma, /* * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during - * copy_page_range(), but after track_pfn_copy() was already called. + * copy_page_range(), but after track_pfn_copy() was already called. Can + * be called even if track_pfn_copy() did not actually track anything: + * handled internally. */ static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma, unsigned long pfn) -- cgit v1.2.3 From b2e689baf220408aff8ee5dfb4edb0817e1632bb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 11 Apr 2025 15:14:18 +0800 Subject: crypto: ahash - Disable request chaining Disable hash request chaining in case a driver that copies an ahash_request object by hand accidentally triggers chaining. Reported-by: Manorit Chawdhry Fixes: f2ffe5a9183d ("crypto: hash - Add request chaining API") Signed-off-by: Herbert Xu Tested-by: Manorit Chawdhry Signed-off-by: Herbert Xu --- include/crypto/hash.h | 6 +++++- include/crypto/internal/hash.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 2aa83ee0ec98..a67988316d06 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -10,6 +10,7 @@ #include #include +#include #include /* Set this bit for virtual address instead of SG list. */ @@ -581,7 +582,10 @@ static inline struct ahash_request *ahash_request_alloc_noprof( * ahash_request_free() - zeroize and free the request data structure * @req: request data structure cipher handle to be freed */ -void ahash_request_free(struct ahash_request *req); +static inline void ahash_request_free(struct ahash_request *req) +{ + kfree_sensitive(req); +} static inline struct ahash_request *ahash_request_cast( struct crypto_async_request *req) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 485e22cf517e..052ac7924af3 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -249,7 +249,7 @@ static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm) static inline bool ahash_request_chained(struct ahash_request *req) { - return crypto_request_chained(&req->base); + return false; } static inline bool ahash_request_isvirt(struct ahash_request *req) -- cgit v1.2.3 From cd35b6cb46649750b7dbd0df0e2d767415d8917b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 15:02:21 -0700 Subject: nfs: add missing selections of CONFIG_CRC32 nfs.ko, nfsd.ko, and lockd.ko all use crc32_le(), which is available only when CONFIG_CRC32 is enabled. But the only NFS kconfig option that selected CONFIG_CRC32 was CONFIG_NFS_DEBUG, which is client-specific and did not actually guard the use of crc32_le() even on the client. The code worked around this bug by only actually calling crc32_le() when CONFIG_CRC32 is built-in, instead hard-coding '0' in other cases. This avoided randconfig build errors, and in real kernels the fallback code was unlikely to be reached since CONFIG_CRC32 is 'default y'. But, this really needs to just be done properly, especially now that I'm planning to update CONFIG_CRC32 to not be 'default y'. Therefore, make CONFIG_NFS_FS, CONFIG_NFSD, and CONFIG_LOCKD select CONFIG_CRC32. Then remove the fallback code that becomes unnecessary, as well as the selection of CONFIG_CRC32 from CONFIG_NFS_DEBUG. Fixes: 1264a2f053a3 ("NFS: refactor code for calculating the crc32 hash of a filehandle") Signed-off-by: Eric Biggers Acked-by: Anna Schumaker Signed-off-by: Chuck Lever --- include/linux/nfs.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 9ad727ddfedb..0906a0b40c6a 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -55,7 +55,6 @@ enum nfs3_stable_how { NFS_INVALID_STABLE_HOW = -1 }; -#ifdef CONFIG_CRC32 /** * nfs_fhandle_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -67,10 +66,4 @@ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) { return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); } -#else /* CONFIG_CRC32 */ -static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) -{ - return 0; -} -#endif /* CONFIG_CRC32 */ #endif /* _LINUX_NFS_H */ -- cgit v1.2.3 From cf761e3dacc6ad5f65a4886d00da1f9681e6805a Mon Sep 17 00:00:00 2001 From: Jonathan Currier Date: Sun, 17 Nov 2024 17:48:42 -0600 Subject: PCI/MSI: Add an option to write MSIX ENTRY_DATA before any reads Commit 7d5ec3d36123 ("PCI/MSI: Mask all unused MSI-X entries") introduced a readl() from ENTRY_VECTOR_CTRL before the writel() to ENTRY_DATA. This is correct, however some hardware, like the Sun Neptune chips, the NIU module, will cause an error and/or fatal trap if any MSIX table entry is read before the corresponding ENTRY_DATA field is written to. Add an optional early writel() in msix_prepare_msi_desc(). Fixes: 7d5ec3d36123 ("PCI/MSI: Mask all unused MSI-X entries") Signed-off-by: Jonathan Currier Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241117234843.19236-2-dullfire@yahoo.com --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e96..51e2bd6405cd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -245,6 +245,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), /* Device does honor MSI masking despite saying otherwise */ PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), + /* Device requires write to PCI_MSIX_ENTRY_DATA before any MSIX reads */ + PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST = (__force pci_dev_flags_t) (1 << 13), }; enum pci_irq_reroute_variant { -- cgit v1.2.3 From c86b300b1ea35959a6e2a63a6497226a6ea90b67 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 14 Apr 2025 22:13:33 +0200 Subject: fs: add kern_path_locked_negative() The audit code relies on the fact that kern_path_locked() returned a path even for a negative dentry. If it doesn't find a valid dentry it immediately calls: audit_find_parent(d_backing_inode(parent_path.dentry)); which assumes that parent_path.dentry is still valid. But it isn't since kern_path_locked() has been changed to path_put() also for a negative dentry. Fix this by adding a helper that implements the required audit semantics and allows us to fix the immediate bleeding. We can find a unified solution for this afterwards. Link: https://lore.kernel.org/20250414-rennt-wimmeln-f186c3a780f1@brauner Fixes: 1c3cb50b58c3 ("VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry") Reported-and-tested-by: Vlastimil Babka Signed-off-by: Christian Brauner --- include/linux/namei.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index e3042176cdf4..bbaf55fb3101 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -62,6 +62,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); +extern struct dentry *kern_path_locked_negative(const char *, struct path *); extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, -- cgit v1.2.3 From 25744f849524e806a13ade17c4fb83f6888fe954 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 15 Apr 2025 14:09:45 +0100 Subject: io_uring/zcrx: return ifq id to the user IORING_OP_RECV_ZC requests take a zcrx object id via sqe::zcrx_ifq_idx, which binds it to the corresponding if / queue. However, we don't return that id back to the user. It's fine as currently there can be only one zcrx and the user assumes that its id should be 0, but as we'll need multiple zcrx objects in the future let's explicitly pass it back on registration. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8714667d370651962f7d1a169032e5f02682a73e.1744722517.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ed2beb4def3f..8f1fc12bac46 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1010,7 +1010,9 @@ struct io_uring_zcrx_ifq_reg { __u64 region_ptr; /* struct io_uring_region_desc * */ struct io_uring_zcrx_offsets offsets; - __u64 __resv[4]; + __u32 zcrx_id; + __u32 __resv2; + __u64 __resv[3]; }; #ifdef __cplusplus -- cgit v1.2.3 From 2d300ce0b783ba74420052ed3a12010ac93bdb08 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Apr 2025 20:20:21 +0300 Subject: net: fib_rules: Fix iif / oif matching on L3 master device Before commit 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") it was possible to use FIB rules to match on a L3 domain. This was done by having a FIB rule match on iif / oif being a L3 master device. It worked because prior to the FIB rule lookup the iif / oif fields in the flow structure were reset to the index of the L3 master device to which the input / output device was enslaved to. The above scheme made it impossible to match on the original input / output device. Therefore, cited commit stopped overwriting the iif / oif fields in the flow structure and instead stored the index of the enslaving L3 master device in a new field ('flowi_l3mdev') in the flow structure. While the change enabled new use cases, it broke the original use case of matching on a L3 domain. Fix this by interpreting the iif / oif matching on a L3 master device as a match against the L3 domain. In other words, if the iif / oif in the FIB rule points to a L3 master device, compare the provided index against 'flowi_l3mdev' rather than 'flowi_{i,o}if'. Before cited commit, a FIB rule that matched on 'iif vrf1' would only match incoming traffic from devices enslaved to 'vrf1'. With the proposed change (i.e., comparing against 'flowi_l3mdev'), the rule would also match traffic originating from a socket bound to 'vrf1'. Avoid that by adding a new flow flag ('FLOWI_FLAG_L3MDEV_OIF') that indicates if the L3 domain was derived from the output interface or the input interface (when not set) and take this flag into account when evaluating the FIB rule against the flow structure. Avoid unnecessary checks in the data path by detecting that a rule matches on a L3 master device when the rule is installed and marking it as such. Tested using the following script [1]. Output before 40867d74c374 (v5.4.291): default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link Output after 40867d74c374: default dev dummy1 table 300 scope link default dev dummy1 table 300 scope link Output with this patch: default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link [1] #!/bin/bash ip link add name vrf1 up type vrf table 10 ip link add name dummy1 up master vrf1 type dummy sysctl -wq net.ipv4.conf.all.forwarding=1 sysctl -wq net.ipv4.conf.all.rp_filter=0 ip route add table 100 default dev dummy1 ip route add table 200 default dev dummy1 ip route add table 300 default dev dummy1 ip rule add prio 0 oif vrf1 table 100 ip rule add prio 1 iif vrf1 table 200 ip rule add prio 2 table 300 ip route get 192.0.2.1 oif dummy1 fibmatch ip route get 192.0.2.1 iif dummy1 from 198.51.100.1 fibmatch Fixes: 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") Reported-by: hanhuihui Closes: https://lore.kernel.org/netdev/ec671c4f821a4d63904d0da15d604b75@huawei.com/ Signed-off-by: Ido Schimmel Acked-by: David Ahern Link: https://patch.msgid.link/20250414172022.242991-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/fib_rules.h | 2 ++ include/net/flow.h | 1 + include/net/l3mdev.h | 27 +++++++++++++++++++++++++++ 3 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 5927910ec06e..6e68e359ad18 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -45,6 +45,8 @@ struct fib_rule { struct fib_rule_port_range dport_range; u16 sport_mask; u16 dport_mask; + u8 iif_is_l3_master; + u8 oif_is_l3_master; struct rcu_head rcu; }; diff --git a/include/net/flow.h b/include/net/flow.h index 335bbc52171c..2a3f0c42f092 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -38,6 +38,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 +#define FLOWI_FLAG_L3MDEV_OIF 0x04 __u32 flowic_secid; kuid_t flowic_uid; __u32 flowic_multipath_hash; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index f7fe796e8429..1eb8dad18f7e 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -59,6 +59,20 @@ int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); +static inline +bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) +{ + return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) && + fl->flowi_l3mdev == iifindex; +} + +static inline +bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) +{ + return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF && + fl->flowi_l3mdev == oifindex; +} + void l3mdev_update_flow(struct net *net, struct flowi *fl); int l3mdev_master_ifindex_rcu(const struct net_device *dev); @@ -327,6 +341,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, { return 1; } + +static inline +bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) +{ + return false; +} + +static inline +bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) +{ + return false; +} + static inline void l3mdev_update_flow(struct net *net, struct flowi *fl) { -- cgit v1.2.3 From 8260731ccad0451207b45844bb66eb161a209218 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 16 Apr 2025 08:57:45 +0200 Subject: drm/gem: Internally test import_attach for imported objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test struct drm_gem_object.import_attach to detect imported objects. During object clenanup, the dma_buf field might be NULL. Testing it in an object's free callback then incorrectly does a cleanup as for native objects. Happens for calls to drm_mode_destroy_dumb_ioctl() that clears the dma_buf field in drm_gem_object_exported_dma_buf_free(). v3: - only test for import_attach (Boris) v2: - use import_attach.dmabuf instead of dma_buf (Christian) Signed-off-by: Thomas Zimmermann Fixes: b57aa47d39e9 ("drm/gem: Test for imported GEM buffers with helper") Reported-by: Andy Yan Closes: https://lore.kernel.org/dri-devel/38d09d34.4354.196379aa560.Coremail.andyshrk@163.com/ Tested-by: Andy Yan Cc: Thomas Zimmermann Cc: Anusha Srivatsa Cc: Christian König Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Simona Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Reviewed-by: Boris Brezillon Reviewed-by: Simona Vetter Link: https://lore.kernel.org/r/20250416065820.26076-1-tzimmermann@suse.de --- include/drm/drm_gem.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 2bf893eabb4b..bcd54020d6ba 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -585,8 +585,7 @@ static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_obje */ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) { - /* The dma-buf's priv field points to the original GEM object. */ - return obj->dma_buf && (obj->dma_buf->priv != obj); + return !!obj->import_attach; } #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From 777d0961ff95b26d5887fdae69900374364976f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Apr 2025 08:40:42 +0200 Subject: fs: move the bdex_statx call to vfs_getattr_nosec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently bdex_statx is only called from the very high-level vfs_statx_path function, and thus bypassing it for in-kernel calls to vfs_getattr or vfs_getattr_nosec. This breaks querying the block ѕize of the underlying device in the loop driver and also is a pitfall for any other new kernel caller. Move the call into the lowest level helper to ensure all callers get the right results. Fixes: 2d985f8c6b91 ("vfs: support STATX_DIOALIGN on block devices") Fixes: f4774e92aab8 ("loop: take the file system minimum dio alignment into account") Reported-by: "Darrick J. Wong" Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250417064042.712140-1-hch@lst.de Signed-off-by: Christian Brauner --- include/linux/blkdev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e39c45bc0a97..678dc38442bf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1685,7 +1685,7 @@ int sync_blockdev(struct block_device *bdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); -void bdev_statx(struct path *, struct kstat *, u32); +void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask); void printk_all_partitions(void); int __init early_lookup_bdev(const char *pathname, dev_t *dev); #else @@ -1703,8 +1703,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -static inline void bdev_statx(struct path *path, struct kstat *stat, - u32 request_mask) +static inline void bdev_statx(const struct path *path, struct kstat *stat, + u32 request_mask) { } static inline void printk_all_partitions(void) -- cgit v1.2.3 From 4067196a52278156d18d8d6fa7f43970611b1b49 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Sat, 29 Mar 2025 19:10:29 +0200 Subject: mm/page_alloc: fix deadlock on cpu_hotplug_lock in __accept_page() When the last page in the zone is accepted, __accept_page() calls static_branch_dec(). This function takes cpu_hotplug_lock, which can lead to a deadlock if the allocation occurs during CPU bringup path as _cpu_up() also takes the lock. To prevent this deadlock, defer static_branch_dec() to a workqueue. Call static_branch_dec() only when the workqueue is not yet initialized. Workqueues are initialized before CPU bring up, so this will not conflict with the first scenario. Link: https://lkml.kernel.org/r/20250329171030.3942298-1-kirill.shutemov@linux.intel.com Fixes: 55ad43e8ba0f ("mm: add a helper to accept page") Signed-off-by: Kirill A. Shutemov Reported-by: Srikanth Aithal Tested-by: Srikanth Aithal Cc: Dave Hansen Cc: Ashish Kalra Cc: David Hildenbrand Cc: "Edgecombe, Rick P" Cc: Mel Gorman Cc: "Mike Rapoport (IBM)" Cc: Thomas Lendacky Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 25e80b2ca7f4..4c95fcc9e9df 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -967,6 +967,9 @@ struct zone { #ifdef CONFIG_UNACCEPTED_MEMORY /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; + + /* To be called once the last page in the zone is accepted */ + struct work_struct unaccepted_cleanup; #endif /* zone flags, see below */ -- cgit v1.2.3 From 98b1917cdef92c29fc9a14060d5606c619050c2c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 10 Apr 2025 11:10:20 +0200 Subject: fs/dax: fix folio splitting issue by resetting old folio order + _nr_pages Alison reports an issue with fsdax when large extends end up using large ZONE_DEVICE folios: [ 417.796271] BUG: kernel NULL pointer dereference, address: 0000000000000b00 [ 417.796982] #PF: supervisor read access in kernel mode [ 417.797540] #PF: error_code(0x0000) - not-present page [ 417.798123] PGD 2a5c5067 P4D 2a5c5067 PUD 2a5c6067 PMD 0 [ 417.798690] Oops: Oops: 0000 [#1] SMP NOPTI [ 417.799178] CPU: 5 UID: 0 PID: 1515 Comm: mmap Tainted: ... [ 417.800150] Tainted: [O]=OOT_MODULE [ 417.800583] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 [ 417.801358] RIP: 0010:__lruvec_stat_mod_folio+0x7e/0x250 [ 417.801948] Code: ... [ 417.803662] RSP: 0000:ffffc90002be3a08 EFLAGS: 00010206 [ 417.804234] RAX: 0000000000000000 RBX: 0000000000000200 RCX: 0000000000000002 [ 417.804984] RDX: ffffffff815652d7 RSI: 0000000000000000 RDI: ffffffff82a2beae [ 417.805689] RBP: ffffc90002be3a28 R08: 0000000000000000 R09: 0000000000000000 [ 417.806384] R10: ffffea0007000040 R11: ffff888376ffe000 R12: 0000000000000001 [ 417.807099] R13: 0000000000000012 R14: ffff88807fe4ab40 R15: ffff888029210580 [ 417.807801] FS: 00007f339fa7a740(0000) GS:ffff8881fa9b9000(0000) knlGS:0000000000000000 [ 417.808570] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 417.809193] CR2: 0000000000000b00 CR3: 000000002a4f0004 CR4: 0000000000370ef0 [ 417.809925] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 417.810622] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 417.811353] Call Trace: [ 417.811709] [ 417.812038] folio_add_file_rmap_ptes+0x143/0x230 [ 417.812566] insert_page_into_pte_locked+0x1ee/0x3c0 [ 417.813132] insert_page+0x78/0xf0 [ 417.813558] vmf_insert_page_mkwrite+0x55/0xa0 [ 417.814088] dax_fault_iter+0x484/0x7b0 [ 417.814542] dax_iomap_pte_fault+0x1ca/0x620 [ 417.815055] dax_iomap_fault+0x39/0x40 [ 417.815499] __xfs_write_fault+0x139/0x380 [ 417.815995] ? __handle_mm_fault+0x5e5/0x1a60 [ 417.816483] xfs_write_fault+0x41/0x50 [ 417.816966] xfs_filemap_fault+0x3b/0xe0 [ 417.817424] __do_fault+0x31/0x180 [ 417.817859] __handle_mm_fault+0xee1/0x1a60 [ 417.818325] ? debug_smp_processor_id+0x17/0x20 [ 417.818844] handle_mm_fault+0xe1/0x2b0 [...] The issue is that when we split a large ZONE_DEVICE folio to order-0 ones, we don't reset the order/_nr_pages. As folio->_nr_pages overlays page[1]->memcg_data, once page[1] is a folio, it suddenly looks like it has folio->memcg_data set. And we never manually initialize folio->memcg_data in fsdax code, because we never expect it to be set at all. When __lruvec_stat_mod_folio() then stumbles over such a folio, it tries to use folio->memcg_data (because it's non-NULL) but it does not actually point at a memcg, resulting in the problem. Alison also observed that these folios sometimes have "locked" set, which is rather concerning (folios locked from the beginning ...). The reason is that the order for large folios is stored in page[1]->flags, which become the folio->flags of a new small folio. Let's fix it by adding a folio helper to clear order/_nr_pages for splitting purposes. Maybe we should reinitialize other large folio flags / folio members as well when splitting, because they might similarly cause harm once page[1] becomes a folio? At least other flags in PAGE_FLAGS_SECOND should not be set for fsdax, so at least page[1]->flags might be as expected with this fix. From a quick glimpse, initializing ->mapping, ->pgmap and ->share should re-initialize most things from a previous page[1] used by large folios that fsdax cares about. For example folio->private might not get reinitialized, but maybe that's not relevant -- no traces of it's use in fsdax code. Needs a closer look. Another thing that should be considered in the future is performing similar checks as we perform in free_tail_page_prepare() -- checking pincount etc. -- when freeing a large fsdax folio. Link: https://lkml.kernel.org/r/20250410091020.119116-1-david@redhat.com Fixes: 4996fc547f5b ("mm: let _folio_nr_pages overlay memcg_data in first tail page") Fixes: 38607c62b34b ("fs/dax: properly refcount fs dax pages") Signed-off-by: David Hildenbrand Reported-by: Alison Schofield Closes: https://lkml.kernel.org/r/Z_W9Oeg-D9FhImf3@aschofie-mobl2.lan Tested-by: Alison Schofield Reviewed-by: Dan Williams Tested-by: "Darrick J. Wong" Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: Matthew Wilcox Cc: Alistair Popple Cc: Christoph Hellwig Signed-off-by: Andrew Morton --- include/linux/mm.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b7f13f087954..bf55206935c4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1218,6 +1218,23 @@ static inline unsigned int folio_order(const struct folio *folio) return folio_large_order(folio); } +/** + * folio_reset_order - Reset the folio order and derived _nr_pages + * @folio: The folio. + * + * Reset the order and derived _nr_pages to 0. Must only be used in the + * process of splitting large folios. + */ +static inline void folio_reset_order(struct folio *folio) +{ + if (WARN_ON_ONCE(!folio_test_large(folio))) + return; + folio->_flags_1 &= ~0xffUL; +#ifdef NR_PAGES_IN_LARGE_FOLIO + folio->_nr_pages = 0; +#endif +} + #include /* -- cgit v1.2.3 From 9e888998ea4d22257b07ce911576509486fa0667 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 12 Apr 2025 18:39:12 +0200 Subject: writeback: fix false warning in inode_to_wb() inode_to_wb() is used also for filesystems that don't support cgroup writeback. For these filesystems inode->i_wb is stable during the lifetime of the inode (it points to bdi->wb) and there's no need to hold locks protecting the inode->i_wb dereference. Improve the warning in inode_to_wb() to not trigger for these filesystems. Link: https://lkml.kernel.org/r/20250412163914.3773459-3-agruenba@redhat.com Fixes: aaa2cacf8184 ("writeback: add lockdep annotation to inode_to_wb()") Signed-off-by: Jan Kara Signed-off-by: Andreas Gruenbacher Reviewed-by: Andreas Gruenbacher Cc: Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 8e7af9a03b41..e721148c95d0 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -249,6 +249,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) { #ifdef CONFIG_LOCKDEP WARN_ON_ONCE(debug_locks && + (inode->i_sb->s_iflags & SB_I_CGROUPWB) && (!lockdep_is_held(&inode->i_lock) && !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) && !lockdep_is_held(&inode->i_wb->list_lock))); -- cgit v1.2.3 From 38448181459e24257b40d5258afdbaa3565e8cfc Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 16 Apr 2025 09:45:39 -0400 Subject: mm: vmscan: restore high-cpu watermark safety in kswapd Vlastimil points out that commit a211c6550efc ("mm: page_alloc: defrag_mode kswapd/kcompactd watermarks") switched kswapd from zone_watermark_ok_safe() to the standard, percpu-cached version of reading free pages, thus dropping the watermark safety precautions for systems with high CPU counts (e.g. >212 cpus on 64G). Restore them. Since zone_watermark_ok_safe() is no longer the right interface, and this was the last caller of the function anyway, open-code the zone_page_state_snapshot() conditional and delete the function. Link: https://lkml.kernel.org/r/20250416135142.778933-2-hannes@cmpxchg.org Fixes: a211c6550efc ("mm: page_alloc: defrag_mode kswapd/kcompactd watermarks") Signed-off-by: Johannes Weiner Reported-by: Vlastimil Babka Reviewed-by: Vlastimil Babka Cc: Brendan Jackman Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4c95fcc9e9df..6ccec1bf2896 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1502,8 +1502,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags); -bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int highest_zoneidx); /* * Memory initialization context, use to differentiate memory added by * the platform statically or via memory hotplug interface. -- cgit v1.2.3 From 4b4bd8c50f4836ba7d3fcfd6c90f96d2605779fe Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:02:18 -0700 Subject: gcc-15: acpi: sprinkle random '__nonstring' crumbles around This is not great: I'd much rather introduce a typedef that is a "ACPI name byte buffer", and use that to mark these special 4-byte ACPI names that do not use NUL termination. But as noted in the previous commit ("gcc-15: make 'unterminated string initialization' just a warning") gcc doesn't actually seem to support that notion, so instead you have to just mark every single array declaration individually. So this is not pretty, but this gets rid of the bulk of the annoying warnings during an allmodconfig build for me. Signed-off-by: Linus Torvalds --- include/acpi/actbl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 451f6276da49..2fc89704be17 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -66,7 +66,7 @@ ******************************************************************************/ struct acpi_table_header { - char signature[ACPI_NAMESEG_SIZE]; /* ASCII table signature */ + char signature[ACPI_NAMESEG_SIZE] __nonstring; /* ASCII table signature */ u32 length; /* Length of table in bytes, including this header */ u8 revision; /* ACPI Specification minor version number */ u8 checksum; /* To make sum of entire table == 0 */ -- cgit v1.2.3