47 files changed, 478 insertions, 320 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f0ca69f888fa..3135643d5695 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9609,6 +9609,11 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
 	if (reg->type != PTR_TO_MAP_VALUE)
 		return -EINVAL;
 
+	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
+		verbose(env, "R%d points to insn_array map which cannot be used as const string\n", regno);
+		return -EACCES;
+	}
+
 	if (!bpf_map_is_rdonly(map)) {
 		verbose(env, "R%d does not point to a readonly map'\n", regno);
 		return -EACCES;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e717208cfb18..5f0d33b04910 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Generic process-grouping system.
  *
@@ -20,10 +21,6 @@
  *  2003-10-22 Updates by Stephen Hemminger.
  *  2004 May-July Rework by Paul Jackson.
  *  ---------------------------------------------------
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of the Linux
- *  distribution for more details.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -5847,7 +5844,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
 	int ret;
 
 	/* allocate the cgroup and its ID, 0 is reserved for the root */
-	cgrp = kzalloc(struct_size(cgrp, ancestors, (level + 1)), GFP_KERNEL);
+	cgrp = kzalloc(struct_size(cgrp, _low_ancestors, level), GFP_KERNEL);
 	if (!cgrp)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 6e6eb09b8db6..c06e2e96f79d 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  kernel/cpuset.c
  *
@@ -16,10 +17,6 @@
  *  2006 Rework by Paul Menage to use generic cgroups
  *  2008 Rework of the scheduler domains and CPU hotplug handling
  *       by Max Krasnyansky
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of the Linux
- *  distribution for more details.
  */
 #include "cpuset-internal.h"
 
@@ -1668,7 +1665,14 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
 static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
 {
 	WARN_ON_ONCE(!is_remote_partition(cs));
-	WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus));
+	/*
+	 * When a CPU is offlined, top_cpuset may end up with no available CPUs,
+	 * which should clear subpartitions_cpus. We should not emit a warning for this
+	 * scenario: the hierarchy is updated from top to bottom, so subpartitions_cpus
+	 * may already be cleared when disabling the partition.
+	 */
+	WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus) &&
+		     !cpumask_empty(subpartitions_cpus));
 
 	spin_lock_irq(&callback_lock);
 	cs->remote_partition = false;
@@ -3976,8 +3980,9 @@ retry:
 	if (remote || (is_partition_valid(cs) && is_partition_valid(parent)))
 		compute_partition_effective_cpumask(cs, &new_cpus);
 
-	if (remote && cpumask_empty(&new_cpus) &&
-	    partition_is_populated(cs, NULL)) {
+	if (remote && (cpumask_empty(subpartitions_cpus) ||
+			(cpumask_empty(&new_cpus) &&
+			 partition_is_populated(cs, NULL)))) {
 		cs->prs_err = PERR_HOTPLUG;
 		remote_partition_disable(cs, tmp);
 		compute_effective_cpumask(&new_cpus, cs, parent);
@@ -3990,9 +3995,12 @@ retry:
 	 * 1) empty effective cpus but not valid empty partition.
 	 * 2) parent is invalid or doesn't grant any cpus to child
 	 *    partitions.
+	 * 3) subpartitions_cpus is empty.
 	 */
-	if (is_local_partition(cs) && (!is_partition_valid(parent) ||
-				tasks_nocpu_error(parent, cs, &new_cpus)))
+	if (is_local_partition(cs) &&
+	    (!is_partition_valid(parent) ||
+	     tasks_nocpu_error(parent, cs, &new_cpus) ||
+	     cpumask_empty(subpartitions_cpus)))
 		partcmd = partcmd_invalidate;
 	/*
 	 * On the other hand, an invalid partition root may be transitioned
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
index 915b02f65980..817c33450fee 100644
--- a/kernel/cgroup/legacy_freezer.c
+++ b/kernel/cgroup/legacy_freezer.c
@@ -1,17 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * cgroup_freezer.c -  control group freezer subsystem
  *
  * Copyright IBM Corporation, 2007
  *
  * Author : Cedric Le Goater <clg@fr.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include <linux/export.h>
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 26392badc36b..c5da29ad010c 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -184,6 +184,12 @@ static __init struct gen_pool *__dma_atomic_pool_init(size_t pool_size,
 	return pool;
 }
 
+#ifdef CONFIG_ZONE_DMA32
+#define has_managed_dma32 has_managed_zone(ZONE_DMA32)
+#else
+#define has_managed_dma32 false
+#endif
+
 static int __init dma_atomic_pool_init(void)
 {
 	int ret = 0;
@@ -199,17 +205,20 @@ static int __init dma_atomic_pool_init(void)
 	}
 	INIT_WORK(&atomic_pool_work, atomic_pool_work_fn);
 
-	atomic_pool_kernel = __dma_atomic_pool_init(atomic_pool_size,
+	/* All memory might be in the DMA zone(s) to begin with */
+	if (has_managed_zone(ZONE_NORMAL)) {
+		atomic_pool_kernel = __dma_atomic_pool_init(atomic_pool_size,
 						    GFP_KERNEL);
-	if (!atomic_pool_kernel)
-		ret = -ENOMEM;
+		if (!atomic_pool_kernel)
+			ret = -ENOMEM;
+	}
 	if (has_managed_dma()) {
 		atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size,
 						GFP_KERNEL | GFP_DMA);
 		if (!atomic_pool_dma)
 			ret = -ENOMEM;
 	}
-	if (IS_ENABLED(CONFIG_ZONE_DMA32)) {
+	if (has_managed_dma32) {
 		atomic_pool_dma32 = __dma_atomic_pool_init(atomic_pool_size,
 						GFP_KERNEL | GFP_DMA32);
 		if (!atomic_pool_dma32)
@@ -224,11 +233,11 @@ postcore_initcall(dma_atomic_pool_init);
 static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp)
 {
 	if (prev == NULL) {
-		if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
-			return atomic_pool_dma32;
-		if (atomic_pool_dma && (gfp & GFP_DMA))
-			return atomic_pool_dma;
-		return atomic_pool_kernel;
+		if (gfp & GFP_DMA)
+			return atomic_pool_dma ?: atomic_pool_dma32 ?: atomic_pool_kernel;
+		if (gfp & GFP_DMA32)
+			return atomic_pool_dma32 ?: atomic_pool_dma ?: atomic_pool_kernel;
+		return atomic_pool_kernel ?: atomic_pool_dma32 ?: atomic_pool_dma;
 	}
 	if (prev == atomic_pool_kernel)
 		return atomic_pool_dma32 ? atomic_pool_dma32 : atomic_pool_dma;
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index b9c7e00725d6..1f6589578703 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -2,7 +2,7 @@
 /*
  * Performance events callchain code, extracted from core.c:
  *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008 Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
  *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
  *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dad0d3d2e85f..a0fa488bce84 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2,7 +2,7 @@
 /*
  * Performance events core code:
  *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008 Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
  *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
  *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
@@ -6997,6 +6997,15 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
 		if (data_page_nr(event->rb) != nr_pages)
 			return -EINVAL;
 
+		/*
+		 * If this event doesn't have mmap_count, we're attempting to
+		 * create an alias of another event's mmap(); this would mean
+		 * both events will end up scribbling the same user_page;
+		 * which makes no sense.
+		 */
+		if (!refcount_read(&event->mmap_count))
+			return -EBUSY;
+
 		if (refcount_inc_not_zero(&event->rb->mmap_count)) {
 			/*
 			 * Success -- managed to mmap() the same buffer
@@ -11906,6 +11915,11 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
 	}
 }
 
+static void perf_swevent_destroy_hrtimer(struct perf_event *event)
+{
+	hrtimer_cancel(&event->hw.hrtimer);
+}
+
 static void perf_swevent_init_hrtimer(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -11914,6 +11928,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
 		return;
 
 	hrtimer_setup(&hwc->hrtimer, perf_swevent_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
+	event->destroy = perf_swevent_destroy_hrtimer;
 
 	/*
 	 * Since hrtimers have a fixed rate, we can do a static freq->period
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 20a905023736..3e7de2661417 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -2,7 +2,7 @@
 /*
  * Performance events ring-buffer code:
  *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008 Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
  *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
  *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index 3527defd2890..5c5ebaee35f2 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-// Copyright 2017 Thomas Gleixner <tglx@linutronix.de>
+// Copyright 2017 Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
 
 #include <linux/irqdomain.h>
 #include <linux/irq.h>
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 8f222d1cccec..a50f2305a8dc 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2017 Thomas Gleixner <tglx@linutronix.de>
+// Copyright (C) 2017 Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
 
 #include <linux/spinlock.h>
 #include <linux/seq_file.h>
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 0f92acdd354d..95c585c6ddc3 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -953,17 +953,24 @@ int kimage_load_segment(struct kimage *image, int idx)
 	return result;
 }
 
-void *kimage_map_segment(struct kimage *image,
-			 unsigned long addr, unsigned long size)
+void *kimage_map_segment(struct kimage *image, int idx)
 {
+	unsigned long addr, size, eaddr;
 	unsigned long src_page_addr, dest_page_addr = 0;
-	unsigned long eaddr = addr + size;
 	kimage_entry_t *ptr, entry;
 	struct page **src_pages;
 	unsigned int npages;
+	struct page *cma;
 	void *vaddr = NULL;
 	int i;
 
+	cma = image->segment_cma[idx];
+	if (cma)
+		return page_address(cma);
+
+	addr = image->segment[idx].mem;
+	size = image->segment[idx].memsz;
+	eaddr = addr + size;
 	/*
 	 * Collect the source pages and map them in a contiguous VA range.
 	 */
@@ -1004,7 +1011,8 @@ void *kimage_map_segment(struct kimage *image,
 
 void kimage_unmap_segment(void *segment_buffer)
 {
-	vunmap(segment_buffer);
+	if (is_vmalloc_addr(segment_buffer))
+		vunmap(segment_buffer);
 }
 
 struct kexec_load_limit {
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 9dc51fab604f..90d411a59f76 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -255,6 +255,14 @@ static struct page *kho_restore_page(phys_addr_t phys, bool is_folio)
 	if (is_folio && info.order)
 		prep_compound_page(page, info.order);
 
+	/* Always mark headpage's codetag as empty to avoid accounting mismatch */
+	clear_page_tag_ref(page);
+	if (!is_folio) {
+		/* Also do that for the non-compound tail pages */
+		for (unsigned int i = 1; i < nr_pages; i++)
+			clear_page_tag_ref(page + i);
+	}
+
 	adjust_managed_page_count(page, nr_pages);
 	return page;
 }
@@ -460,27 +468,23 @@ static void __init deserialize_bitmap(unsigned int order,
 	}
 }
 
-/* Return true if memory was deserizlied */
-static bool __init kho_mem_deserialize(const void *fdt)
+/* Returns physical address of the preserved memory map from FDT */
+static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
 {
-	struct khoser_mem_chunk *chunk;
 	const void *mem_ptr;
-	u64 mem;
 	int len;
 
 	mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
 	if (!mem_ptr || len != sizeof(u64)) {
 		pr_err("failed to get preserved memory bitmaps\n");
-		return false;
+		return 0;
 	}
 
-	mem = get_unaligned((const u64 *)mem_ptr);
-	chunk = mem ? phys_to_virt(mem) : NULL;
-
-	/* No preserved physical pages were passed, no deserialization */
-	if (!chunk)
-		return false;
+	return get_unaligned((const u64 *)mem_ptr);
+}
 
+static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk)
+{
 	while (chunk) {
 		unsigned int i;
 
@@ -489,8 +493,6 @@ static bool __init kho_mem_deserialize(const void *fdt)
 					   &chunk->bitmaps[i]);
 		chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
 	}
-
-	return true;
 }
 
 /*
@@ -1012,8 +1014,10 @@ int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation)
 		chunk->phys[idx++] = phys;
 		if (idx == ARRAY_SIZE(chunk->phys)) {
 			chunk = new_vmalloc_chunk(chunk);
-			if (!chunk)
+			if (!chunk) {
+				err = -ENOMEM;
 				goto err_free;
+			}
 			idx = 0;
 		}
 	}
@@ -1253,6 +1257,7 @@ bool kho_finalized(void)
 struct kho_in {
 	phys_addr_t fdt_phys;
 	phys_addr_t scratch_phys;
+	phys_addr_t mem_map_phys;
 	struct kho_debugfs dbg;
 };
 
@@ -1434,12 +1439,10 @@ static void __init kho_release_scratch(void)
 
 void __init kho_memory_init(void)
 {
-	if (kho_in.scratch_phys) {
+	if (kho_in.mem_map_phys) {
 		kho_scratch = phys_to_virt(kho_in.scratch_phys);
 		kho_release_scratch();
-
-		if (!kho_mem_deserialize(kho_get_fdt()))
-			kho_in.fdt_phys = 0;
+		kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys));
 	} else {
 		kho_reserve_scratch();
 	}
@@ -1448,8 +1451,9 @@ void __init kho_memory_init(void)
 void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 			 phys_addr_t scratch_phys, u64 scratch_len)
 {
-	void *fdt = NULL;
 	struct kho_scratch *scratch = NULL;
+	phys_addr_t mem_map_phys;
+	void *fdt = NULL;
 	int err = 0;
 	unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
 
@@ -1475,6 +1479,12 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 		goto out;
 	}
 
+	mem_map_phys = kho_get_mem_map_phys(fdt);
+	if (!mem_map_phys) {
+		err = -ENOENT;
+		goto out;
+	}
+
 	scratch = early_memremap(scratch_phys, scratch_len);
 	if (!scratch) {
 		pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n",
@@ -1515,6 +1525,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 
 	kho_in.fdt_phys = fdt_phys;
 	kho_in.scratch_phys = scratch_phys;
+	kho_in.mem_map_phys = mem_map_phys;
 	kho_scratch_cnt = scratch_cnt;
 	pr_info("found kexec handover data.\n");
 
diff --git a/kernel/module/kmod.c b/kernel/module/kmod.c
index 25f253812512..a25dccdf7aa7 100644
--- a/kernel/module/kmod.c
+++ b/kernel/module/kmod.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kmod - the kernel module loader
  *
diff --git a/kernel/panic.c b/kernel/panic.c
index 0d52210a9e2b..0c20fcaae98a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -131,7 +131,8 @@ static int proc_taint(const struct ctl_table *table, int write,
 static int sysctl_panic_print_handler(const struct ctl_table *table, int write,
 			   void *buffer, size_t *lenp, loff_t *ppos)
 {
-	panic_print_deprecated();
+	if (write)
+		panic_print_deprecated();
 	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 
@@ -1014,7 +1015,6 @@ static int panic_print_set(const char *val, const struct kernel_param *kp)
 
 static int panic_print_get(char *val, const struct kernel_param *kp)
 {
-	panic_print_deprecated();
 	return  param_get_ulong(val, kp);
 }
 
diff --git a/kernel/power/em_netlink.c b/kernel/power/em_netlink.c
index 4b85da138a06..5a611d3950fd 100644
--- a/kernel/power/em_netlink.c
+++ b/kernel/power/em_netlink.c
@@ -12,27 +12,35 @@
 #include <linux/energy_model.h>
 #include <net/sock.h>
 #include <net/genetlink.h>
-#include <uapi/linux/energy_model.h>
+#include <uapi/linux/dev_energymodel.h>
 
 #include "em_netlink.h"
 #include "em_netlink_autogen.h"
 
-#define EM_A_PD_CPUS_LEN		256
-
 /*************************** Command encoding ********************************/
+struct dump_ctx {
+	int idx;
+	int start;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+};
+
 static int __em_nl_get_pd_size(struct em_perf_domain *pd, void *data)
 {
-	char cpus_buf[EM_A_PD_CPUS_LEN];
+	int nr_cpus, msg_sz, cpus_sz;
 	int *tot_msg_sz = data;
-	int msg_sz, cpus_sz;
 
-	cpus_sz = snprintf(cpus_buf, sizeof(cpus_buf), "%*pb",
-			   cpumask_pr_args(to_cpumask(pd->cpus)));
+	nr_cpus = cpumask_weight(to_cpumask(pd->cpus));
+	cpus_sz = nla_total_size_64bit(sizeof(u64)) * nr_cpus;
 
-	msg_sz = nla_total_size(0) +			/* EM_A_PDS_PD */
-		 nla_total_size(sizeof(u32)) +		/* EM_A_PD_PD_ID */
-		 nla_total_size_64bit(sizeof(u64)) +	/* EM_A_PD_FLAGS */
-		 nla_total_size(cpus_sz);		/* EM_A_PD_CPUS */
+	msg_sz = nla_total_size(0) +
+		 /* DEV_ENERGYMODEL_A_PERF_DOMAINS_PERF_DOMAIN */
+		 nla_total_size(sizeof(u32)) +
+		 /* DEV_ENERGYMODEL_A_PERF_DOMAIN_PERF_DOMAIN_ID */
+		 nla_total_size_64bit(sizeof(u64)) +
+		 /* DEV_ENERGYMODEL_A_PERF_DOMAIN_FLAGS */
+		 nla_total_size(cpus_sz);
+		 /* DEV_ENERGYMODEL_A_PERF_DOMAIN_CPUS */
 
 	*tot_msg_sz += nlmsg_total_size(genlmsg_msg_size(msg_sz));
 	return 0;
@@ -40,56 +48,80 @@ static int __em_nl_get_pd_size(struct em_perf_domain *pd, void *data)
 
 static int __em_nl_get_pd(struct em_perf_domain *pd, void *data)
 {
-	char cpus_buf[EM_A_PD_CPUS_LEN];
 	struct sk_buff *msg = data;
-	struct nlattr *entry;
-
-	entry = nla_nest_start(msg, EM_A_PDS_PD);
-	if (!entry)
-		goto out_cancel_nest;
+	struct cpumask *cpumask;
+	int cpu;
 
-	if (nla_put_u32(msg, EM_A_PD_PD_ID, pd->id))
+	if (nla_put_u32(msg, DEV_ENERGYMODEL_A_PERF_DOMAIN_PERF_DOMAIN_ID,
+			pd->id))
 		goto out_cancel_nest;
 
-	if (nla_put_u64_64bit(msg, EM_A_PD_FLAGS, pd->flags, EM_A_PD_PAD))
+	if (nla_put_u64_64bit(msg, DEV_ENERGYMODEL_A_PERF_DOMAIN_FLAGS,
+			      pd->flags, DEV_ENERGYMODEL_A_PERF_DOMAIN_PAD))
 		goto out_cancel_nest;
 
-	snprintf(cpus_buf, sizeof(cpus_buf), "%*pb",
-		 cpumask_pr_args(to_cpumask(pd->cpus)));
-	if (nla_put_string(msg, EM_A_PD_CPUS, cpus_buf))
-		goto out_cancel_nest;
-
-	nla_nest_end(msg, entry);
+	cpumask = to_cpumask(pd->cpus);
+	for_each_cpu(cpu, cpumask) {
+		if (nla_put_u64_64bit(msg, DEV_ENERGYMODEL_A_PERF_DOMAIN_CPUS,
+				      cpu, DEV_ENERGYMODEL_A_PERF_DOMAIN_PAD))
+			goto out_cancel_nest;
+	}
 
 	return 0;
 
 out_cancel_nest:
-	nla_nest_cancel(msg, entry);
-
 	return -EMSGSIZE;
 }
 
-int em_nl_get_pds_doit(struct sk_buff *skb, struct genl_info *info)
+static int __em_nl_get_pd_for_dump(struct em_perf_domain *pd, void *data)
 {
-	struct sk_buff *msg;
+	const struct genl_info *info;
+	struct dump_ctx *ctx = data;
 	void *hdr;
+	int ret;
+
+	if (ctx->idx++ < ctx->start)
+		return 0;
+
+	info = genl_info_dump(ctx->cb);
+	hdr = genlmsg_iput(ctx->skb, info);
+	if (!hdr) {
+		genlmsg_cancel(ctx->skb, hdr);
+		return -EMSGSIZE;
+	}
+
+	ret = __em_nl_get_pd(pd, ctx->skb);
+	genlmsg_end(ctx->skb, hdr);
+	return ret;
+}
+
+int dev_energymodel_nl_get_perf_domains_doit(struct sk_buff *skb,
+					      struct genl_info *info)
+{
+	int id, ret = -EMSGSIZE, msg_sz = 0;
 	int cmd = info->genlhdr->cmd;
-	int ret = -EMSGSIZE, msg_sz = 0;
+	struct em_perf_domain *pd;
+	struct sk_buff *msg;
+	void *hdr;
+
+	if (!info->attrs[DEV_ENERGYMODEL_A_PERF_DOMAIN_PERF_DOMAIN_ID])
+		return -EINVAL;
 
-	for_each_em_perf_domain(__em_nl_get_pd_size, &msg_sz);
+	id = nla_get_u32(info->attrs[DEV_ENERGYMODEL_A_PERF_DOMAIN_PERF_DOMAIN_ID]);
+	pd = em_perf_domain_get_by_id(id);
 
+	__em_nl_get_pd_size(pd, &msg_sz);
 	msg = genlmsg_new(msg_sz, GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
 
-	hdr = genlmsg_put_reply(msg, info, &em_nl_family, 0, cmd);
+	hdr = genlmsg_put_reply(msg, info, &dev_energymodel_nl_family, 0, cmd);
 	if (!hdr)
 		goto out_free_msg;
 
-	ret = for_each_em_perf_domain(__em_nl_get_pd, msg);
+	ret = __em_nl_get_pd(pd, msg);
 	if (ret)
 		goto out_cancel_msg;
-
 	genlmsg_end(msg, hdr);
 
 	return genlmsg_reply(msg, info);
@@ -98,19 +130,31 @@ out_cancel_msg:
 	genlmsg_cancel(msg, hdr);
 out_free_msg:
 	nlmsg_free(msg);
-
 	return ret;
 }
 
+int dev_energymodel_nl_get_perf_domains_dumpit(struct sk_buff *skb,
+						struct netlink_callback *cb)
+{
+	struct dump_ctx ctx = {
+		.idx = 0,
+		.start = cb->args[0],
+		.skb = skb,
+		.cb = cb,
+	};
+
+	return for_each_em_perf_domain(__em_nl_get_pd_for_dump, &ctx);
+}
+
 static struct em_perf_domain *__em_nl_get_pd_table_id(struct nlattr **attrs)
 {
 	struct em_perf_domain *pd;
 	int id;
 
-	if (!attrs[EM_A_PD_TABLE_PD_ID])
+	if (!attrs[DEV_ENERGYMODEL_A_PERF_TABLE_PERF_DOMAIN_ID])
 		return NULL;
 
-	id = nla_get_u32(attrs[EM_A_PD_TABLE_PD_ID]);
+	id = nla_get_u32(attrs[DEV_ENERGYMODEL_A_PERF_TABLE_PERF_DOMAIN_ID]);
 	pd = em_perf_domain_get_by_id(id);
 	return pd;
 }
@@ -119,25 +163,34 @@ static int __em_nl_get_pd_table_size(const struct em_perf_domain *pd)
 {
 	int id_sz, ps_sz;
 
-	id_sz = nla_total_size(sizeof(u32));		/* EM_A_PD_TABLE_PD_ID */
-	ps_sz = nla_total_size(0) +			/* EM_A_PD_TABLE_PS */
-		nla_total_size_64bit(sizeof(u64)) +	/* EM_A_PS_PERFORMANCE */
-		nla_total_size_64bit(sizeof(u64)) +	/* EM_A_PS_FREQUENCY */
-		nla_total_size_64bit(sizeof(u64)) +	/* EM_A_PS_POWER */
-		nla_total_size_64bit(sizeof(u64)) +	/* EM_A_PS_COST */
-		nla_total_size_64bit(sizeof(u64));	/* EM_A_PS_FLAGS */
+	id_sz = nla_total_size(sizeof(u32));
+		/* DEV_ENERGYMODEL_A_PERF_TABLE_PERF_DOMAIN_ID */
+	ps_sz = nla_total_size(0) +
+		/* DEV_ENERGYMODEL_A_PERF_TABLE_PERF_STATE */
+		nla_total_size_64bit(sizeof(u64)) +
+		/* DEV_ENERGYMODEL_A_PERF_STATE_PERFORMANCE */
+		nla_total_size_64bit(sizeof(u64)) +
+		/* DEV_ENERGYMODEL_A_PERF_STATE_FREQUENCY */
+		nla_total_size_64bit(sizeof(u64)) +
+		/* DEV_ENERGYMODEL_A_PERF_STATE_POWER */
+		nla_total_size_64bit(sizeof(u64)) +
+		/* DEV_ENERGYMODEL_A_PERF_STATE_COST */
+		nla_total_size_64bit(sizeof(u64));
+		/* DEV_ENERGYMODEL_A_PERF_STATE_FLAGS */
 	ps_sz *= pd->nr_perf_states;
 
 	return nlmsg_total_size(genlmsg_msg_size(id_sz + ps_sz));
 }
 
-static int __em_nl_get_pd_table(struct sk_buff *msg, const struct em_perf_domain *pd)
+static
+int __em_nl_get_pd_table(struct sk_buff *msg, const struct em_perf_domain *pd)
 {
 	struct em_perf_state *table, *ps;
 	struct nlattr *entry;
 	int i;
 
-	if (nla_put_u32(msg, EM_A_PD_TABLE_PD_ID, pd->id))
+	if (nla_put_u32(msg, DEV_ENERGYMODEL_A_PERF_TABLE_PERF_DOMAIN_ID,
+			pd->id))
 		goto out_err;
 
 	rcu_read_lock();
@@ -146,24 +199,35 @@ static int __em_nl_get_pd_table(struct sk_buff *msg, const struct em_perf_domain
 	for (i = 0; i < pd->nr_perf_states; i++) {
 		ps = &table[i];
 
-		entry = nla_nest_start(msg, EM_A_PD_TABLE_PS);
+		entry = nla_nest_start(msg,
+				       DEV_ENERGYMODEL_A_PERF_TABLE_PERF_STATE);
 		if (!entry)
 			goto out_unlock_ps;
 
-		if (nla_put_u64_64bit(msg, EM_A_PS_PERFORMANCE,
-				      ps->performance, EM_A_PS_PAD))
+		if (nla_put_u64_64bit(msg,
+				      DEV_ENERGYMODEL_A_PERF_STATE_PERFORMANCE,
+				      ps->performance,
+				      DEV_ENERGYMODEL_A_PERF_STATE_PAD))
 			goto out_cancel_ps_nest;
-		if (nla_put_u64_64bit(msg, EM_A_PS_FREQUENCY,
-				      ps->frequency, EM_A_PS_PAD))
+		if (nla_put_u64_64bit(msg,
+				      DEV_ENERGYMODEL_A_PERF_STATE_FREQUENCY,
+				      ps->frequency,
+				      DEV_ENERGYMODEL_A_PERF_STATE_PAD))
 			goto out_cancel_ps_nest;
-		if (nla_put_u64_64bit(msg, EM_A_PS_POWER,
-				      ps->power, EM_A_PS_PAD))
+		if (nla_put_u64_64bit(msg,
+				      DEV_ENERGYMODEL_A_PERF_STATE_POWER,
+				      ps->power,
+				      DEV_ENERGYMODEL_A_PERF_STATE_PAD))
 			goto out_cancel_ps_nest;
-		if (nla_put_u64_64bit(msg, EM_A_PS_COST,
-				      ps->cost, EM_A_PS_PAD))
+		if (nla_put_u64_64bit(msg,
+				      DEV_ENERGYMODEL_A_PERF_STATE_COST,
+				      ps->cost,
+				      DEV_ENERGYMODEL_A_PERF_STATE_PAD))
 			goto out_cancel_ps_nest;
-		if (nla_put_u64_64bit(msg, EM_A_PS_FLAGS,
-				      ps->flags, EM_A_PS_PAD))
+		if (nla_put_u64_64bit(msg,
+				      DEV_ENERGYMODEL_A_PERF_STATE_FLAGS,
+				      ps->flags,
+				      DEV_ENERGYMODEL_A_PERF_STATE_PAD))
 			goto out_cancel_ps_nest;
 
 		nla_nest_end(msg, entry);
@@ -179,7 +243,8 @@ out_err:
 	return -EMSGSIZE;
 }
 
-int em_nl_get_pd_table_doit(struct sk_buff *skb, struct genl_info *info)
+int dev_energymodel_nl_get_perf_table_doit(struct sk_buff *skb,
+					    struct genl_info *info)
 {
 	int cmd = info->genlhdr->cmd;
 	int msg_sz, ret = -EMSGSIZE;
@@ -197,7 +262,7 @@ int em_nl_get_pd_table_doit(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	hdr = genlmsg_put_reply(msg, info, &em_nl_family, 0, cmd);
+	hdr = genlmsg_put_reply(msg, info, &dev_energymodel_nl_family, 0, cmd);
 	if (!hdr)
 		goto out_free_msg;
 
@@ -221,7 +286,7 @@ static void __em_notify_pd_table(const struct em_perf_domain *pd, int ntf_type)
 	int msg_sz, ret = -EMSGSIZE;
 	void *hdr;
 
-	if (!genl_has_listeners(&em_nl_family, &init_net, EM_NLGRP_EVENT))
+	if (!genl_has_listeners(&dev_energymodel_nl_family, &init_net, DEV_ENERGYMODEL_NLGRP_EVENT))
 		return;
 
 	msg_sz = __em_nl_get_pd_table_size(pd);
@@ -230,7 +295,7 @@ static void __em_notify_pd_table(const struct em_perf_domain *pd, int ntf_type)
 	if (!msg)
 		return;
 
-	hdr = genlmsg_put(msg, 0, 0, &em_nl_family, 0, ntf_type);
+	hdr = genlmsg_put(msg, 0, 0, &dev_energymodel_nl_family, 0, ntf_type);
 	if (!hdr)
 		goto out_free_msg;
 
@@ -240,28 +305,28 @@ static void __em_notify_pd_table(const struct em_perf_domain *pd, int ntf_type)
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&em_nl_family, msg, 0, EM_NLGRP_EVENT, GFP_KERNEL);
+	genlmsg_multicast(&dev_energymodel_nl_family, msg, 0,
+			  DEV_ENERGYMODEL_NLGRP_EVENT, GFP_KERNEL);
 
 	return;
 
 out_free_msg:
 	nlmsg_free(msg);
-	return;
 }
 
 void em_notify_pd_created(const struct em_perf_domain *pd)
 {
-	__em_notify_pd_table(pd, EM_CMD_PD_CREATED);
+	__em_notify_pd_table(pd, DEV_ENERGYMODEL_CMD_PERF_DOMAIN_CREATED);
 }
 
 void em_notify_pd_updated(const struct em_perf_domain *pd)
 {
-	__em_notify_pd_table(pd, EM_CMD_PD_UPDATED);
+	__em_notify_pd_table(pd, DEV_ENERGYMODEL_CMD_PERF_DOMAIN_UPDATED);
 }
 
 static int __em_notify_pd_deleted_size(const struct em_perf_domain *pd)
 {
-	int id_sz = nla_total_size(sizeof(u32)); /* EM_A_PD_TABLE_PD_ID */
+	int id_sz = nla_total_size(sizeof(u32)); /* DEV_ENERGYMODEL_A_PERF_TABLE_PERF_DOMAIN_ID */
 
 	return nlmsg_total_size(genlmsg_msg_size(id_sz));
 }
@@ -272,7 +337,8 @@ void em_notify_pd_deleted(const struct em_perf_domain *pd)
 	void *hdr;
 	int msg_sz;
 
-	if (!genl_has_listeners(&em_nl_family, &init_net, EM_NLGRP_EVENT))
+	if (!genl_has_listeners(&dev_energymodel_nl_family, &init_net,
+				DEV_ENERGYMODEL_NLGRP_EVENT))
 		return;
 
 	msg_sz = __em_notify_pd_deleted_size(pd);
@@ -281,28 +347,29 @@ void em_notify_pd_deleted(const struct em_perf_domain *pd)
 	if (!msg)
 		return;
 
-	hdr = genlmsg_put(msg, 0, 0, &em_nl_family, 0, EM_CMD_PD_DELETED);
+	hdr = genlmsg_put(msg, 0, 0, &dev_energymodel_nl_family, 0,
+			  DEV_ENERGYMODEL_CMD_PERF_DOMAIN_DELETED);
 	if (!hdr)
 		goto out_free_msg;
 
-	if (nla_put_u32(msg, EM_A_PD_TABLE_PD_ID, pd->id)) {
+	if (nla_put_u32(msg, DEV_ENERGYMODEL_A_PERF_TABLE_PERF_DOMAIN_ID,
+			pd->id))
 		goto out_free_msg;
-	}
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(&em_nl_family, msg, 0, EM_NLGRP_EVENT, GFP_KERNEL);
+	genlmsg_multicast(&dev_energymodel_nl_family, msg, 0,
+			  DEV_ENERGYMODEL_NLGRP_EVENT, GFP_KERNEL);
 
 	return;
 
 out_free_msg:
 	nlmsg_free(msg);
-	return;
 }
 
 /**************************** Initialization *********************************/
 static int __init em_netlink_init(void)
 {
-	return genl_register_family(&em_nl_family);
+	return genl_register_family(&dev_energymodel_nl_family);
 }
 postcore_initcall(em_netlink_init);
diff --git a/kernel/power/em_netlink_autogen.c b/kernel/power/em_netlink_autogen.c
index ceb3b2bb6ebe..fedd473e4244 100644
--- a/kernel/power/em_netlink_autogen.c
+++ b/kernel/power/em_netlink_autogen.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 /* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/em.yaml */
+/*	Documentation/netlink/specs/dev-energymodel.yaml */
 /* YNL-GEN kernel source */
 /* To regenerate run: tools/net/ynl/ynl-regen.sh */
 
@@ -9,41 +9,53 @@
 
 #include "em_netlink_autogen.h"
 
-#include <uapi/linux/energy_model.h>
+#include <uapi/linux/dev_energymodel.h>
 
-/* EM_CMD_GET_PD_TABLE - do */
-static const struct nla_policy em_get_pd_table_nl_policy[EM_A_PD_TABLE_PD_ID + 1] = {
-	[EM_A_PD_TABLE_PD_ID] = { .type = NLA_U32, },
+/* DEV_ENERGYMODEL_CMD_GET_PERF_DOMAINS - do */
+static const struct nla_policy dev_energymodel_get_perf_domains_nl_policy[DEV_ENERGYMODEL_A_PERF_DOMAIN_PERF_DOMAIN_ID + 1] = {
+	[DEV_ENERGYMODEL_A_PERF_DOMAIN_PERF_DOMAIN_ID] = { .type = NLA_U32, },
 };
 
-/* Ops table for em */
-static const struct genl_split_ops em_nl_ops[] = {
+/* DEV_ENERGYMODEL_CMD_GET_PERF_TABLE - do */
+static const struct nla_policy dev_energymodel_get_perf_table_nl_policy[DEV_ENERGYMODEL_A_PERF_TABLE_PERF_DOMAIN_ID + 1] = {
+	[DEV_ENERGYMODEL_A_PERF_TABLE_PERF_DOMAIN_ID] = { .type = NLA_U32, },
+};
+
+/* Ops table for dev_energymodel */
+static const struct genl_split_ops dev_energymodel_nl_ops[] = {
+	{
+		.cmd		= DEV_ENERGYMODEL_CMD_GET_PERF_DOMAINS,
+		.doit		= dev_energymodel_nl_get_perf_domains_doit,
+		.policy		= dev_energymodel_get_perf_domains_nl_policy,
+		.maxattr	= DEV_ENERGYMODEL_A_PERF_DOMAIN_PERF_DOMAIN_ID,
+		.flags		= GENL_CMD_CAP_DO,
+	},
 	{
-		.cmd	= EM_CMD_GET_PDS,
-		.doit	= em_nl_get_pds_doit,
-		.flags	= GENL_CMD_CAP_DO,
+		.cmd	= DEV_ENERGYMODEL_CMD_GET_PERF_DOMAINS,
+		.dumpit	= dev_energymodel_nl_get_perf_domains_dumpit,
+		.flags	= GENL_CMD_CAP_DUMP,
 	},
 	{
-		.cmd		= EM_CMD_GET_PD_TABLE,
-		.doit		= em_nl_get_pd_table_doit,
-		.policy		= em_get_pd_table_nl_policy,
-		.maxattr	= EM_A_PD_TABLE_PD_ID,
+		.cmd		= DEV_ENERGYMODEL_CMD_GET_PERF_TABLE,
+		.doit		= dev_energymodel_nl_get_perf_table_doit,
+		.policy		= dev_energymodel_get_perf_table_nl_policy,
+		.maxattr	= DEV_ENERGYMODEL_A_PERF_TABLE_PERF_DOMAIN_ID,
 		.flags		= GENL_CMD_CAP_DO,
 	},
 };
 
-static const struct genl_multicast_group em_nl_mcgrps[] = {
-	[EM_NLGRP_EVENT] = { "event", },
+static const struct genl_multicast_group dev_energymodel_nl_mcgrps[] = {
+	[DEV_ENERGYMODEL_NLGRP_EVENT] = { "event", },
 };
 
-struct genl_family em_nl_family __ro_after_init = {
-	.name		= EM_FAMILY_NAME,
-	.version	= EM_FAMILY_VERSION,
+struct genl_family dev_energymodel_nl_family __ro_after_init = {
+	.name		= DEV_ENERGYMODEL_FAMILY_NAME,
+	.version	= DEV_ENERGYMODEL_FAMILY_VERSION,
 	.netnsok	= true,
 	.parallel_ops	= true,
 	.module		= THIS_MODULE,
-	.split_ops	= em_nl_ops,
-	.n_split_ops	= ARRAY_SIZE(em_nl_ops),
-	.mcgrps		= em_nl_mcgrps,
-	.n_mcgrps	= ARRAY_SIZE(em_nl_mcgrps),
+	.split_ops	= dev_energymodel_nl_ops,
+	.n_split_ops	= ARRAY_SIZE(dev_energymodel_nl_ops),
+	.mcgrps		= dev_energymodel_nl_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(dev_energymodel_nl_mcgrps),
 };
diff --git a/kernel/power/em_netlink_autogen.h b/kernel/power/em_netlink_autogen.h
index 140ab548103c..5caf2f7e18a5 100644
--- a/kernel/power/em_netlink_autogen.h
+++ b/kernel/power/em_netlink_autogen.h
@@ -1,24 +1,28 @@
 /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
 /* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/em.yaml */
+/*	Documentation/netlink/specs/dev-energymodel.yaml */
 /* YNL-GEN kernel header */
 /* To regenerate run: tools/net/ynl/ynl-regen.sh */
 
-#ifndef _LINUX_EM_GEN_H
-#define _LINUX_EM_GEN_H
+#ifndef _LINUX_DEV_ENERGYMODEL_GEN_H
+#define _LINUX_DEV_ENERGYMODEL_GEN_H
 
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
-#include <uapi/linux/energy_model.h>
+#include <uapi/linux/dev_energymodel.h>
 
-int em_nl_get_pds_doit(struct sk_buff *skb, struct genl_info *info);
-int em_nl_get_pd_table_doit(struct sk_buff *skb, struct genl_info *info);
+int dev_energymodel_nl_get_perf_domains_doit(struct sk_buff *skb,
+					     struct genl_info *info);
+int dev_energymodel_nl_get_perf_domains_dumpit(struct sk_buff *skb,
+					       struct netlink_callback *cb);
+int dev_energymodel_nl_get_perf_table_doit(struct sk_buff *skb,
+					   struct genl_info *info);
 
 enum {
-	EM_NLGRP_EVENT,
+	DEV_ENERGYMODEL_NLGRP_EVENT,
 };
 
-extern struct genl_family em_nl_family;
+extern struct genl_family dev_energymodel_nl_family;
 
-#endif /* _LINUX_EM_GEN_H */
+#endif /* _LINUX_DEV_ENERGYMODEL_GEN_H */
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 11af9f64aa82..5b055cbe5341 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -449,8 +449,10 @@ static int em_create_pd(struct device *dev, int nr_states,
 	INIT_LIST_HEAD(&pd->node);
 
 	id = ida_alloc(&em_pd_ida, GFP_KERNEL);
-	if (id < 0)
-		return -ENOMEM;
+	if (id < 0) {
+		kfree(pd);
+		return id;
+	}
 	pd->id = id;
 
 	em_table = em_table_alloc(pd);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 2da4482bb6eb..57c44268698f 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -349,9 +349,12 @@ static int suspend_test(int level)
 	if (pm_test_level == level) {
 		pr_info("suspend debug: Waiting for %d second(s).\n",
 				pm_test_delay);
-		for (i = 0; i < pm_test_delay && !pm_wakeup_pending(); i++)
-			msleep(1000);
-
+		for (i = 0; i < pm_test_delay && !pm_wakeup_pending(); i++) {
+			if (level > TEST_CORE)
+				msleep(1000);
+			else
+				mdelay(1000);
+		}
 		return 1;
 	}
 #endif /* !CONFIG_PM_DEBUG */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 33a186373bef..8050e5182835 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -902,8 +902,11 @@ out_clean:
 		for (thr = 0; thr < nr_threads; thr++) {
 			if (data[thr].thr)
 				kthread_stop(data[thr].thr);
-			acomp_request_free(data[thr].cr);
-			crypto_free_acomp(data[thr].cc);
+			if (data[thr].cr)
+				acomp_request_free(data[thr].cr);
+
+			if (!IS_ERR_OR_NULL(data[thr].cc))
+				crypto_free_acomp(data[thr].cc);
 		}
 		vfree(data);
 	}
@@ -1499,8 +1502,11 @@ out_clean:
 		for (thr = 0; thr < nr_threads; thr++) {
 			if (data[thr].thr)
 				kthread_stop(data[thr].thr);
-			acomp_request_free(data[thr].cr);
-			crypto_free_acomp(data[thr].cc);
+			if (data[thr].cr)
+				acomp_request_free(data[thr].cr);
+
+			if (!IS_ERR_OR_NULL(data[thr].cc))
+				crypto_free_acomp(data[thr].cc);
 		}
 		vfree(data);
 	}
diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index 3fa403f9831f..32fc12e53675 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -1557,18 +1557,27 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq)
 	ctxt->allow_unsafe_takeover	= nbcon_allow_unsafe_takeover();
 
 	while (nbcon_seq_read(con) < stop_seq) {
-		if (!nbcon_context_try_acquire(ctxt, false))
-			return -EPERM;
-
 		/*
-		 * nbcon_emit_next_record() returns false when the console was
-		 * handed over or taken over. In both cases the context is no
-		 * longer valid.
+		 * Atomic flushing does not use console driver synchronization
+		 * (i.e. it does not hold the port lock for uart consoles).
+		 * Therefore IRQs must be disabled to avoid being interrupted
+		 * and then calling into a driver that will deadlock trying
+		 * to acquire console ownership.
 		 */
-		if (!nbcon_emit_next_record(&wctxt, true))
-			return -EAGAIN;
+		scoped_guard(irqsave) {
+			if (!nbcon_context_try_acquire(ctxt, false))
+				return -EPERM;
 
-		nbcon_context_release(ctxt);
+			/*
+			 * nbcon_emit_next_record() returns false when
+			 * the console was handed over or taken over.
+			 * In both cases the context is no longer valid.
+			 */
+			if (!nbcon_emit_next_record(&wctxt, true))
+				return -EAGAIN;
+
+			nbcon_context_release(ctxt);
+		}
 
 		if (!ctxt->backlog) {
 			/* Are there reserved but not yet finalized records? */
@@ -1595,22 +1604,11 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq)
 static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq)
 {
 	struct console_flush_type ft;
-	unsigned long flags;
 	int err;
 
 again:
-	/*
-	 * Atomic flushing does not use console driver synchronization (i.e.
-	 * it does not hold the port lock for uart consoles). Therefore IRQs
-	 * must be disabled to avoid being interrupted and then calling into
-	 * a driver that will deadlock trying to acquire console ownership.
-	 */
-	local_irq_save(flags);
-
 	err = __nbcon_atomic_flush_pending_con(con, stop_seq);
 
-	local_irq_restore(flags);
-
 	/*
 	 * If there was a new owner (-EPERM, -EAGAIN), that context is
 	 * responsible for completing.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 41ba0be16911..045f83ad261e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4950,9 +4950,13 @@ struct balance_callback *splice_balance_callbacks(struct rq *rq)
 	return __splice_balance_callbacks(rq, true);
 }
 
-static void __balance_callbacks(struct rq *rq)
+void __balance_callbacks(struct rq *rq, struct rq_flags *rf)
 {
+	if (rf)
+		rq_unpin_lock(rq, rf);
 	do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
+	if (rf)
+		rq_repin_lock(rq, rf);
 }
 
 void balance_callbacks(struct rq *rq, struct balance_callback *head)
@@ -4991,7 +4995,7 @@ static inline void finish_lock_switch(struct rq *rq)
 	 * prev into current:
 	 */
 	spin_acquire(&__rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_);
-	__balance_callbacks(rq);
+	__balance_callbacks(rq, NULL);
 	raw_spin_rq_unlock_irq(rq);
 }
 
@@ -6867,7 +6871,7 @@ keep_resched:
 			proxy_tag_curr(rq, next);
 
 		rq_unpin_lock(rq, &rf);
-		__balance_callbacks(rq);
+		__balance_callbacks(rq, NULL);
 		raw_spin_rq_unlock_irq(rq);
 	}
 	trace_sched_exit_tp(is_switch);
@@ -7316,7 +7320,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
 	trace_sched_pi_setprio(p, pi_task);
 	oldprio = p->prio;
 
-	if (oldprio == prio)
+	if (oldprio == prio && !dl_prio(prio))
 		queue_flag &= ~DEQUEUE_MOVE;
 
 	prev_class = p->sched_class;
@@ -7362,9 +7366,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
 out_unlock:
 	/* Caller holds task_struct::pi_lock, IRQs are still disabled */
 
-	rq_unpin_lock(rq, &rf);
-	__balance_callbacks(rq);
-	rq_repin_lock(rq, &rf);
+	__balance_callbacks(rq, &rf);
 	__task_rq_unlock(rq, p, &rf);
 }
 #endif /* CONFIG_RT_MUTEXES */
@@ -9124,6 +9126,8 @@ void sched_move_task(struct task_struct *tsk, bool for_autogroup)
 
 	if (resched)
 		resched_curr(rq);
+
+	__balance_callbacks(rq, &rq_guard.rf);
 }
 
 static struct cgroup_subsys_state *
@@ -10694,10 +10698,11 @@ void sched_mm_cid_before_execve(struct task_struct *t)
 	sched_mm_cid_exit(t);
 }
 
-/* Reactivate MM CID after successful execve() */
+/* Reactivate MM CID after execve() */
 void sched_mm_cid_after_execve(struct task_struct *t)
 {
-	sched_mm_cid_fork(t);
+	if (t->mm)
+		sched_mm_cid_fork(t);
 }
 
 static void mm_cid_work_fn(struct work_struct *work)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 319439fe1870..c509f2e7d69d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -752,8 +752,6 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 
-	update_rq_clock(rq);
-
 	WARN_ON(is_dl_boosted(dl_se));
 	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
 
@@ -1420,7 +1418,7 @@ update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, int
 
 static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
 {
-	bool idle = rq->curr == rq->idle;
+	bool idle = idle_rq(rq);
 	s64 scaled_delta_exec;
 
 	if (unlikely(delta_exec <= 0)) {
@@ -1603,8 +1601,8 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
  * | 8 |       B:zero_laxity-wait       |     |    |
  * |   |                                | <---+    |
  * |   +--------------------------------+          |
- * |     |              ^     ^           2        |
- * |     | 7            | 2   +--------------------+
+ * |     |              ^         ^       2        |
+ * |     | 7            | 2, 1    +----------------+
  * |     v              |
  * |   +-------------+  |
  * +-- | C:idle-wait | -+
@@ -1649,8 +1647,11 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
  *   dl_defer_idle = 0
  *
  *
- * [1] A->B, A->D
+ * [1] A->B, A->D, C->B
  * dl_server_start()
+ *   dl_defer_idle = 0;
+ *   if (dl_server_active)
+ *     return; // [B]
  *   dl_server_active = 1;
  *   enqueue_dl_entity()
  *     update_dl_entity(WAKEUP)
@@ -1759,6 +1760,7 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
  *   "B:zero_laxity-wait" -> "C:idle-wait"        [label="7:dl_server_update_idle"]
  *   "B:zero_laxity-wait" -> "D:running"          [label="3:dl_server_timer"]
  *   "C:idle-wait" -> "A:init"                    [label="8:dl_server_timer"]
+ *   "C:idle-wait" -> "B:zero_laxity-wait"        [label="1:dl_server_start"]
  *   "C:idle-wait" -> "B:zero_laxity-wait"        [label="2:dl_server_update"]
  *   "C:idle-wait" -> "C:idle-wait"               [label="7:dl_server_update_idle"]
  *   "D:running" -> "A:init"                      [label="4:pick_task_dl"]
@@ -1784,6 +1786,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
 {
 	struct rq *rq = dl_se->rq;
 
+	dl_se->dl_defer_idle = 0;
 	if (!dl_server(dl_se) || dl_se->dl_server_active)
 		return;
 
@@ -1834,6 +1837,7 @@ void sched_init_dl_servers(void)
 		rq = cpu_rq(cpu);
 
 		guard(rq_lock_irq)(rq);
+		update_rq_clock(rq);
 
 		dl_se = &rq->fair_server;
 
@@ -2210,7 +2214,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
 		update_dl_entity(dl_se);
 	} else if (flags & ENQUEUE_REPLENISH) {
 		replenish_dl_entity(dl_se);
-	} else if ((flags & ENQUEUE_RESTORE) &&
+	} else if ((flags & ENQUEUE_MOVE) &&
 		   !is_dl_boosted(dl_se) &&
 		   dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_se(dl_se)))) {
 		setup_new_dl_entity(dl_se);
@@ -3154,7 +3158,7 @@ void dl_add_task_root_domain(struct task_struct *p)
 	struct rq *rq;
 	struct dl_bw *dl_b;
 	unsigned int cpu;
-	struct cpumask *msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
+	struct cpumask *msk;
 
 	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
 	if (!dl_task(p) || dl_entity_is_special(&p->dl)) {
@@ -3162,20 +3166,12 @@ void dl_add_task_root_domain(struct task_struct *p)
 		return;
 	}
 
-	/*
-	 * Get an active rq, whose rq->rd traces the correct root
-	 * domain.
-	 * Ideally this would be under cpuset reader lock until rq->rd is
-	 * fetched.  However, sleepable locks cannot nest inside pi_lock, so we
-	 * rely on the caller of dl_add_task_root_domain() holds 'cpuset_mutex'
-	 * to guarantee the CPU stays in the cpuset.
-	 */
+	msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
 	dl_get_task_effective_cpus(p, msk);
 	cpu = cpumask_first_and(cpu_active_mask, msk);
 	BUG_ON(cpu >= nr_cpu_ids);
 	rq = cpu_rq(cpu);
 	dl_b = &rq->rd->dl_bw;
-	/* End of fetching rd */
 
 	raw_spin_lock(&dl_b->lock);
 	__dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
@@ -3299,6 +3295,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 
 static u64 get_prio_dl(struct rq *rq, struct task_struct *p)
 {
+	/*
+	 * Make sure to update current so we don't return a stale value.
+	 */
+	if (task_current_donor(rq, p))
+		update_curr_dl(rq);
+
 	return p->dl.deadline;
 }
 
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 94164f2dec6d..afe28c04d5aa 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -545,6 +545,7 @@ static void scx_task_iter_start(struct scx_task_iter *iter)
 static void __scx_task_iter_rq_unlock(struct scx_task_iter *iter)
 {
 	if (iter->locked_task) {
+		__balance_callbacks(iter->rq, &iter->rf);
 		task_rq_unlock(iter->rq, iter->locked_task, &iter->rf);
 		iter->locked_task = NULL;
 	}
@@ -1577,7 +1578,7 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
 	 *
 	 * @p may go through multiple stopping <-> running transitions between
 	 * here and put_prev_task_scx() if task attribute changes occur while
-	 * balance_scx() leaves @rq unlocked. However, they don't contain any
+	 * balance_one() leaves @rq unlocked. However, they don't contain any
 	 * information meaningful to the BPF scheduler and can be suppressed by
 	 * skipping the callbacks if the task is !QUEUED.
 	 */
@@ -2372,7 +2373,7 @@ static void switch_class(struct rq *rq, struct task_struct *next)
 	 * preempted, and it regaining control of the CPU.
 	 *
 	 * ->cpu_release() complements ->cpu_acquire(), which is emitted the
-	 *  next time that balance_scx() is invoked.
+	 *  next time that balance_one() is invoked.
 	 */
 	if (!rq->scx.cpu_released) {
 		if (SCX_HAS_OP(sch, cpu_release)) {
@@ -2478,7 +2479,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
 	}
 
 	/*
-	 * If balance_scx() is telling us to keep running @prev, replenish slice
+	 * If balance_one() is telling us to keep running @prev, replenish slice
 	 * if necessary and keep running @prev. Otherwise, pop the first one
 	 * from the local DSQ.
 	 */
@@ -3956,13 +3957,8 @@ static void bypass_lb_node(struct scx_sched *sch, int node)
 					     nr_donor_target, nr_target);
 	}
 
-	for_each_cpu(cpu, resched_mask) {
-		struct rq *rq = cpu_rq(cpu);
-
-		raw_spin_rq_lock_irq(rq);
-		resched_curr(rq);
-		raw_spin_rq_unlock_irq(rq);
-	}
+	for_each_cpu(cpu, resched_mask)
+		resched_cpu(cpu);
 
 	for_each_cpu_and(cpu, cpu_online_mask, node_mask) {
 		u32 nr = READ_ONCE(cpu_rq(cpu)->scx.bypass_dsq.nr);
@@ -4025,7 +4021,7 @@ static DEFINE_TIMER(scx_bypass_lb_timer, scx_bypass_lb_timerfn);
  *
  * - ops.dispatch() is ignored.
  *
- * - balance_scx() does not set %SCX_RQ_BAL_KEEP on non-zero slice as slice
+ * - balance_one() does not set %SCX_RQ_BAL_KEEP on non-zero slice as slice
  *   can't be trusted. Whenever a tick triggers, the running task is rotated to
  *   the tail of the queue with core_sched_at touched.
  *
@@ -4783,8 +4779,10 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
 	}
 
 	sch->pcpu = alloc_percpu(struct scx_sched_pcpu);
-	if (!sch->pcpu)
+	if (!sch->pcpu) {
+		ret = -ENOMEM;
 		goto err_free_gdsqs;
+	}
 
 	sch->helper = kthread_run_worker(0, "sched_ext_helper");
 	if (IS_ERR(sch->helper)) {
@@ -6067,7 +6065,7 @@ __bpf_kfunc bool scx_bpf_dsq_move_to_local(u64 dsq_id)
 		/*
 		 * A successfully consumed task can be dequeued before it starts
 		 * running while the CPU is trying to migrate other dispatched
-		 * tasks. Bump nr_tasks to tell balance_scx() to retry on empty
+		 * tasks. Bump nr_tasks to tell balance_one() to retry on empty
 		 * local DSQ.
 		 */
 		dspc->nr_tasks++;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index da46c3164537..3eaeceda71b0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -15,7 +15,7 @@
  *  Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
  *
  *  Scaled math optimizations by Thomas Gleixner
- *  Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2007, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
  *
  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
@@ -8828,16 +8828,6 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
 	if ((wake_flags & WF_FORK) || pse->sched_delayed)
 		return;
 
-	/*
-	 * If @p potentially is completing work required by current then
-	 * consider preemption.
-	 *
-	 * Reschedule if waker is no longer eligible. */
-	if (in_task() && !entity_eligible(cfs_rq, se)) {
-		preempt_action = PREEMPT_WAKEUP_RESCHED;
-		goto preempt;
-	}
-
 	/* Prefer picking wakee soon if appropriate. */
 	if (sched_feat(NEXT_BUDDY) &&
 	    set_preempt_buddy(cfs_rq, wake_flags, pse, se)) {
@@ -8995,12 +8985,6 @@ idle:
 			goto again;
 	}
 
-	/*
-	 * rq is about to be idle, check if we need to update the
-	 * lost_idle_time of clock_pelt
-	 */
-	update_idle_rq_clock_pelt(rq);
-
 	return NULL;
 }
 
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 980d92bab8ab..136a6584be79 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -29,7 +29,7 @@ SCHED_FEAT(PREEMPT_SHORT, true)
  * wakeup-preemption), since its likely going to consume data we
  * touched, increases cache locality.
  */
-SCHED_FEAT(NEXT_BUDDY, true)
+SCHED_FEAT(NEXT_BUDDY, false)
 
 /*
  * Allow completely ignoring cfs_rq->next; which can be set from various
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index c174afe1dd17..abf8f15d60c9 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -468,6 +468,12 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir
 	scx_update_idle(rq, true, true);
 	schedstat_inc(rq->sched_goidle);
 	next->se.exec_start = rq_clock_task(rq);
+
+	/*
+	 * rq is about to be idle, check if we need to update the
+	 * lost_idle_time of clock_pelt
+	 */
+	update_idle_rq_clock_pelt(rq);
 }
 
 struct task_struct *pick_task_idle(struct rq *rq, struct rq_flags *rf)
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index fa83bbaf4f3e..897790889ba3 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -15,7 +15,7 @@
  *  Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
  *
  *  Scaled math optimizations by Thomas Gleixner
- *  Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2007, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
  *
  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d30cca6870f5..93fce4bbff5e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1364,6 +1364,28 @@ static inline u32 sched_rng(void)
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 #define raw_rq()		raw_cpu_ptr(&runqueues)
 
+static inline bool idle_rq(struct rq *rq)
+{
+	return rq->curr == rq->idle && !rq->nr_running && !rq->ttwu_pending;
+}
+
+/**
+ * available_idle_cpu - is a given CPU idle for enqueuing work.
+ * @cpu: the CPU in question.
+ *
+ * Return: 1 if the CPU is currently idle. 0 otherwise.
+ */
+static inline bool available_idle_cpu(int cpu)
+{
+	if (!idle_rq(cpu_rq(cpu)))
+		return 0;
+
+	if (vcpu_is_preempted(cpu))
+		return 0;
+
+	return 1;
+}
+
 #ifdef CONFIG_SCHED_PROXY_EXEC
 static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
 {
@@ -2366,7 +2388,8 @@ extern const u32		sched_prio_to_wmult[40];
  *                should preserve as much state as possible.
  *
  * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
- *        in the runqueue.
+ *        in the runqueue. IOW the priority is allowed to change. Callers
+ *        must expect to deal with balance callbacks.
  *
  * NOCLOCK - skip the update_rq_clock() (avoids double updates)
  *
@@ -3947,6 +3970,8 @@ extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
 extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);
 
 extern struct balance_callback *splice_balance_callbacks(struct rq *rq);
+
+extern void __balance_callbacks(struct rq *rq, struct rq_flags *rf);
 extern void balance_callbacks(struct rq *rq, struct balance_callback *head);
 
 /*
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index 0496dc29ed0f..6f10db3646e7 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -180,35 +180,7 @@ int task_prio(const struct task_struct *p)
  */
 int idle_cpu(int cpu)
 {
-	struct rq *rq = cpu_rq(cpu);
-
-	if (rq->curr != rq->idle)
-		return 0;
-
-	if (rq->nr_running)
-		return 0;
-
-	if (rq->ttwu_pending)
-		return 0;
-
-	return 1;
-}
-
-/**
- * available_idle_cpu - is a given CPU idle for enqueuing work.
- * @cpu: the CPU in question.
- *
- * Return: 1 if the CPU is currently idle. 0 otherwise.
- */
-int available_idle_cpu(int cpu)
-{
-	if (!idle_cpu(cpu))
-		return 0;
-
-	if (vcpu_is_preempted(cpu))
-		return 0;
-
-	return 1;
+	return idle_rq(cpu_rq(cpu));
 }
 
 /**
@@ -667,7 +639,7 @@ change:
 		 * itself.
 		 */
 		newprio = rt_effective_prio(p, newprio);
-		if (newprio == oldprio)
+		if (newprio == oldprio && !dl_prio(newprio))
 			queue_flags &= ~DEQUEUE_MOVE;
 	}
 
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index a59bc75ab7c5..eaae1ce9f060 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -2,7 +2,7 @@
 /*
  * This file contains functions which manage clock event devices.
  *
- * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index a1890a073196..df7194961658 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -252,7 +252,7 @@ enum wd_read_status {
 
 static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
 {
-	int64_t md = 2 * watchdog->uncertainty_margin;
+	int64_t md = watchdog->uncertainty_margin;
 	unsigned int nretries, max_retries;
 	int64_t wd_delay, wd_seq_delay;
 	u64 wd_end, wd_end2;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index f8ea8c8fc895..0e4bc1ca15ff 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
  *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
  *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
  *
@@ -913,7 +913,7 @@ static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
 			return true;
 
 		/* Extra check for softirq clock bases */
-		if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT)
+		if (base->index < HRTIMER_BASE_MONOTONIC_SOFT)
 			continue;
 		if (cpu_base->softirq_activated)
 			continue;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 0207868c8b4d..f63c65881364 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -3,7 +3,7 @@
  * This file contains functions which emulate a local clock-event
  * device via a broadcast event source.
  *
- * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  */
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 7e33d3f2e889..d305d8521896 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -3,7 +3,7 @@
  * This file contains the base functions to manage periodic tick
  * related events.
  *
- * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  */
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index ffee943d796d..7472597f3225 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -3,7 +3,7 @@
  * This file contains functions which manage high resolution tick
  * related events.
  *
- * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 8ddf74e705d3..2f8a7923fa27 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
  *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
  *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
  *
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3ec3daa4acab..91fa2003351c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2735,7 +2735,7 @@ static int __do_adjtimex(struct tk_data *tkd, struct __kernel_timex *txc,
 		timekeeping_update_from_shadow(tkd, TK_CLOCK_WAS_SET);
 		result->clock_set = true;
 	} else {
-		tk_update_leap_state_all(&tk_core);
+		tk_update_leap_state_all(tkd);
 	}
 
 	/* Update the multiplier immediately if frequency was set directly */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ef2d5dca6f70..aa758efc3731 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1148,7 +1148,6 @@ struct ftrace_page {
 };
 
 #define ENTRY_SIZE sizeof(struct dyn_ftrace)
-#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
 
 static struct ftrace_page	*ftrace_pages_start;
 static struct ftrace_page	*ftrace_pages;
@@ -3834,7 +3833,8 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 	return 0;
 }
 
-static int ftrace_allocate_records(struct ftrace_page *pg, int count)
+static int ftrace_allocate_records(struct ftrace_page *pg, int count,
+				   unsigned long *num_pages)
 {
 	int order;
 	int pages;
@@ -3844,7 +3844,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count)
 		return -EINVAL;
 
 	/* We want to fill as much as possible, with no empty pages */
-	pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE);
+	pages = DIV_ROUND_UP(count * ENTRY_SIZE, PAGE_SIZE);
 	order = fls(pages) - 1;
 
  again:
@@ -3859,6 +3859,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count)
 	}
 
 	ftrace_number_of_pages += 1 << order;
+	*num_pages += 1 << order;
 	ftrace_number_of_groups++;
 
 	cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
@@ -3887,12 +3888,14 @@ static void ftrace_free_pages(struct ftrace_page *pages)
 }
 
 static struct ftrace_page *
-ftrace_allocate_pages(unsigned long num_to_init)
+ftrace_allocate_pages(unsigned long num_to_init, unsigned long *num_pages)
 {
 	struct ftrace_page *start_pg;
 	struct ftrace_page *pg;
 	int cnt;
 
+	*num_pages = 0;
+
 	if (!num_to_init)
 		return NULL;
 
@@ -3906,7 +3909,7 @@ ftrace_allocate_pages(unsigned long num_to_init)
 	 * waste as little space as possible.
 	 */
 	for (;;) {
-		cnt = ftrace_allocate_records(pg, num_to_init);
+		cnt = ftrace_allocate_records(pg, num_to_init, num_pages);
 		if (cnt < 0)
 			goto free_pages;
 
@@ -7192,8 +7195,6 @@ static int ftrace_process_locs(struct module *mod,
 	if (!count)
 		return 0;
 
-	pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE);
-
 	/*
 	 * Sorting mcount in vmlinux at build time depend on
 	 * CONFIG_BUILDTIME_MCOUNT_SORT, while mcount loc in
@@ -7206,7 +7207,7 @@ static int ftrace_process_locs(struct module *mod,
 		test_is_sorted(start, count);
 	}
 
-	start_pg = ftrace_allocate_pages(count);
+	start_pg = ftrace_allocate_pages(count, &pages);
 	if (!start_pg)
 		return -ENOMEM;
 
@@ -7305,27 +7306,27 @@ static int ftrace_process_locs(struct module *mod,
 	/* We should have used all pages unless we skipped some */
 	if (pg_unuse) {
 		unsigned long pg_remaining, remaining = 0;
-		unsigned long skip;
+		long skip;
 
 		/* Count the number of entries unused and compare it to skipped. */
-		pg_remaining = (ENTRIES_PER_PAGE << pg->order) - pg->index;
+		pg_remaining = (PAGE_SIZE << pg->order) / ENTRY_SIZE - pg->index;
 
 		if (!WARN(skipped < pg_remaining, "Extra allocated pages for ftrace")) {
 
 			skip = skipped - pg_remaining;
 
-			for (pg = pg_unuse; pg; pg = pg->next)
+			for (pg = pg_unuse; pg && skip > 0; pg = pg->next) {
 				remaining += 1 << pg->order;
+				skip -= (PAGE_SIZE << pg->order) / ENTRY_SIZE;
+			}
 
 			pages -= remaining;
 
-			skip = DIV_ROUND_UP(skip, ENTRIES_PER_PAGE);
-
 			/*
 			 * Check to see if the number of pages remaining would
 			 * just fit the number of entries skipped.
 			 */
-			WARN(skip != remaining, "Extra allocated pages for ftrace: %lu with %lu skipped",
+			WARN(pg || skip > 0, "Extra allocated pages for ftrace: %lu with %lu skipped",
 			     remaining, skipped);
 		}
 		/* Need to synchronize with ftrace_location_range() */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 41c9f5d079be..630221b00838 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3137,6 +3137,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
 					list) {
 			list_del_init(&bpage->list);
 			free_buffer_page(bpage);
+
+			cond_resched();
 		}
 	}
  out_err_unlock:
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6f2148df14d9..8bd4ec08fb36 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -138,7 +138,7 @@ cpumask_var_t __read_mostly	tracing_buffer_mask;
  * by commas.
  */
 /* Set to string format zero to disable by default */
-char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
+static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
 
 /* When set, tracing will stop when a WARN*() is hit */
 static int __disable_trace_on_warning;
@@ -3012,6 +3012,11 @@ static void __ftrace_trace_stack(struct trace_array *tr,
 	struct ftrace_stack *fstack;
 	struct stack_entry *entry;
 	int stackidx;
+	int bit;
+
+	bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
+	if (bit < 0)
+		return;
 
 	/*
 	 * Add one, for this function and the call to save_stack_trace()
@@ -3080,6 +3085,7 @@ static void __ftrace_trace_stack(struct trace_array *tr,
 	/* Again, don't let gcc optimize things here */
 	barrier();
 	__this_cpu_dec(ftrace_stack_reserve);
+	trace_clear_recursion(bit);
 }
 
 static inline void ftrace_trace_stack(struct trace_array *tr,
@@ -6109,10 +6115,10 @@ static int cmp_mod_entry(const void *key, const void *pivot)
 	unsigned long addr = (unsigned long)key;
 	const struct trace_mod_entry *ent = pivot;
 
-	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
-		return 0;
-	else
-		return addr - ent->mod_addr;
+	if (addr < ent[0].mod_addr)
+		return -1;
+
+	return addr >= ent[1].mod_addr;
 }
 
 /**
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 76067529db61..137b4d9bb116 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -826,16 +826,15 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
 		 * When soft_disable is set and enable is set, we want to
 		 * register the tracepoint for the event, but leave the event
 		 * as is. That means, if the event was already enabled, we do
-		 * nothing (but set soft_mode). If the event is disabled, we
-		 * set SOFT_DISABLED before enabling the event tracepoint, so
-		 * it still seems to be disabled.
+		 * nothing. If the event is disabled, we set SOFT_DISABLED
+		 * before enabling the event tracepoint, so it still seems
+		 * to be disabled.
 		 */
 		if (!soft_disable)
 			clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 		else {
 			if (atomic_inc_return(&file->sm_ref) > 1)
 				break;
-			soft_mode = true;
 			/* Enable use of trace_buffered_event */
 			trace_buffered_event_enable();
 		}
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 5e6e70540eef..c97bb2fda5c0 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -2057,6 +2057,15 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
 			hist_field->fn_num = HIST_FIELD_FN_RELDYNSTRING;
 		else
 			hist_field->fn_num = HIST_FIELD_FN_PSTRING;
+	} else if (field->filter_type == FILTER_STACKTRACE) {
+		flags |= HIST_FIELD_FL_STACKTRACE;
+
+		hist_field->size = MAX_FILTER_STR_VAL;
+		hist_field->type = kstrdup_const(field->type, GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
+
+		hist_field->fn_num = HIST_FIELD_FN_STACK;
 	} else {
 		hist_field->size = field->size;
 		hist_field->is_signed = field->is_signed;
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 4554c458b78c..45c187e77e21 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -130,7 +130,9 @@ static int synth_event_define_fields(struct trace_event_call *call)
 	struct synth_event *event = call->data;
 	unsigned int i, size, n_u64;
 	char *name, *type;
+	int filter_type;
 	bool is_signed;
+	bool is_stack;
 	int ret = 0;
 
 	for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
@@ -138,8 +140,12 @@ static int synth_event_define_fields(struct trace_event_call *call)
 		is_signed = event->fields[i]->is_signed;
 		type = event->fields[i]->type;
 		name = event->fields[i]->name;
+		is_stack = event->fields[i]->is_stack;
+
+		filter_type = is_stack ? FILTER_STACKTRACE : FILTER_OTHER;
+
 		ret = trace_define_field(call, type, name, offset, size,
-					 is_signed, FILTER_OTHER);
+					 is_signed, filter_type);
 		if (ret)
 			break;
 
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b1e9c9913309..1de6f1573621 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -901,7 +901,7 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr
 		trace_seq_printf(s, "%ps", func);
 
 		if (args_size >= FTRACE_REGS_MAX_ARGS * sizeof(long)) {
-			print_function_args(s, entry->args, (unsigned long)func);
+			print_function_args(s, FGRAPH_ENTRY_ARGS(entry), (unsigned long)func);
 			trace_seq_putc(s, ';');
 		} else
 			trace_seq_puts(s, "();");
diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index fe9bf8db1922..e2784038bbed 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -36,7 +36,11 @@ struct hwerr_info {
 	time64_t timestamp;
 };
 
-static struct hwerr_info hwerr_data[HWERR_RECOV_MAX];
+/*
+ * The hwerr_data[] array is declared with global scope so that it remains
+ * accessible to vmcoreinfo even when Link Time Optimization (LTO) is enabled.
+ */
+struct hwerr_info hwerr_data[HWERR_RECOV_MAX];
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
 			  void *data, size_t data_len)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 0685e3a8aa0a..366122f4a0f8 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -71,7 +71,7 @@ unsigned int __read_mostly hardlockup_panic =
  * hard lockup is detected, it could be task, memory, lock etc.
  * Refer include/linux/sys_info.h for detailed bit definition.
  */
-static unsigned long hardlockup_si_mask;
+unsigned long hardlockup_si_mask;
 
 #ifdef CONFIG_SYSFS