summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-18 21:40:16 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-18 21:40:16 -0800
commit2b7a25df823dc7d8f56f8ce7c2d2dac391cea9c2 (patch)
tree6af407ea7335baf0f682e29f9c4b68eebbe5ac5c
parenteeccf287a2a517954b57cf9d733b3cf5d47afa34 (diff)
parent90627a1e08e602b8b7bea970d7d5007626be7527 (diff)
Merge tag 'mm-nonmm-stable-2026-02-18-19-56' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull more non-MM updates from Andrew Morton: - "two fixes in kho_populate()" fixes a couple of not-major issues in the kexec handover code (Ran Xiaokai) - misc singletons * tag 'mm-nonmm-stable-2026-02-18-19-56' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: lib/group_cpus: handle const qualifier from clusters allocation type kho: remove unnecessary WARN_ON(err) in kho_populate() kho: fix missing early_memunmap() call in kho_populate() scripts/gdb: implement x86_page_ops in mm.py objpool: fix the overestimation of object pooling metadata size selftests/memfd: use IPC semaphore instead of SIGSTOP/SIGCONT delayacct: fix build regression on accounting tool
-rw-r--r--kernel/liveupdate/kexec_handover.c28
-rw-r--r--lib/group_cpus.c2
-rw-r--r--lib/objpool.c2
-rw-r--r--scripts/gdb/linux/constants.py.in2
-rw-r--r--scripts/gdb/linux/mm.py173
-rw-r--r--tools/accounting/getdelays.c12
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c113
7 files changed, 301 insertions, 31 deletions
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index fb3a7b67676e..95601623b4d6 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -1463,36 +1463,37 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
struct kho_scratch *scratch = NULL;
phys_addr_t mem_map_phys;
void *fdt = NULL;
+ bool populated = false;
int err;
/* Validate the input FDT */
fdt = early_memremap(fdt_phys, fdt_len);
if (!fdt) {
pr_warn("setup: failed to memremap FDT (0x%llx)\n", fdt_phys);
- goto err_report;
+ goto report;
}
err = fdt_check_header(fdt);
if (err) {
pr_warn("setup: handover FDT (0x%llx) is invalid: %d\n",
fdt_phys, err);
- goto err_unmap_fdt;
+ goto unmap_fdt;
}
err = fdt_node_check_compatible(fdt, 0, KHO_FDT_COMPATIBLE);
if (err) {
pr_warn("setup: handover FDT (0x%llx) is incompatible with '%s': %d\n",
fdt_phys, KHO_FDT_COMPATIBLE, err);
- goto err_unmap_fdt;
+ goto unmap_fdt;
}
mem_map_phys = kho_get_mem_map_phys(fdt);
if (!mem_map_phys)
- goto err_unmap_fdt;
+ goto unmap_fdt;
scratch = early_memremap(scratch_phys, scratch_len);
if (!scratch) {
pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n",
scratch_phys, scratch_len);
- goto err_unmap_fdt;
+ goto unmap_fdt;
}
/*
@@ -1506,10 +1507,10 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
memblock_add(area->addr, size);
err = memblock_mark_kho_scratch(area->addr, size);
- if (WARN_ON(err)) {
+ if (err) {
pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %pe",
&area->addr, &size, ERR_PTR(err));
- goto err_unmap_scratch;
+ goto unmap_scratch;
}
pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size);
}
@@ -1529,16 +1530,17 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
kho_in.scratch_phys = scratch_phys;
kho_in.mem_map_phys = mem_map_phys;
kho_scratch_cnt = scratch_cnt;
- pr_info("found kexec handover data.\n");
- return;
+ populated = true;
+ pr_info("found kexec handover data.\n");
-err_unmap_scratch:
+unmap_scratch:
early_memunmap(scratch, scratch_len);
-err_unmap_fdt:
+unmap_fdt:
early_memunmap(fdt, fdt_len);
-err_report:
- pr_warn("disabling KHO revival\n");
+report:
+ if (!populated)
+ pr_warn("disabling KHO revival\n");
}
/* Helper functions for kexec_file_load */
diff --git a/lib/group_cpus.c b/lib/group_cpus.c
index a93df70919df..d496c5001961 100644
--- a/lib/group_cpus.c
+++ b/lib/group_cpus.c
@@ -320,7 +320,7 @@ static int alloc_cluster_groups(unsigned int ncpus,
goto no_cluster;
/* Allocate memory based on cluster number. */
- clusters = kcalloc(ncluster, sizeof(struct cpumask *), GFP_KERNEL);
+ clusters = kcalloc(ncluster, sizeof(*clusters), GFP_KERNEL);
if (!clusters)
goto no_cluster;
cluster_groups = kcalloc(ncluster, sizeof(struct node_groups), GFP_KERNEL);
diff --git a/lib/objpool.c b/lib/objpool.c
index b998b720c732..d98fadf1de16 100644
--- a/lib/objpool.c
+++ b/lib/objpool.c
@@ -142,7 +142,7 @@ int objpool_init(struct objpool_head *pool, int nr_objs, int object_size,
pool->gfp = gfp & ~__GFP_ZERO;
pool->context = context;
pool->release = release;
- slot_size = nr_cpu_ids * sizeof(struct objpool_slot);
+ slot_size = nr_cpu_ids * sizeof(struct objpool_slot *);
pool->cpu_slots = kzalloc(slot_size, pool->gfp);
if (!pool->cpu_slots)
return -ENOMEM;
diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index 6d475540c6ba..dab8b80bed69 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -150,8 +150,8 @@ LX_CONFIG(CONFIG_ARM64_64K_PAGES)
if IS_BUILTIN(CONFIG_ARM64):
LX_VALUE(CONFIG_ARM64_PA_BITS)
LX_VALUE(CONFIG_ARM64_VA_BITS)
- LX_VALUE(CONFIG_PAGE_SHIFT)
LX_VALUE(CONFIG_ARCH_FORCE_MAX_ORDER)
+LX_VALUE(CONFIG_PAGE_SHIFT)
LX_CONFIG(CONFIG_SPARSEMEM)
LX_CONFIG(CONFIG_SPARSEMEM_EXTREME)
LX_CONFIG(CONFIG_SPARSEMEM_VMEMMAP)
diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py
index 7571aebbe650..d78908f6664d 100644
--- a/scripts/gdb/linux/mm.py
+++ b/scripts/gdb/linux/mm.py
@@ -26,8 +26,179 @@ class page_ops():
raise gdb.GdbError('Only support CONFIG_SPARSEMEM_VMEMMAP now')
if constants.LX_CONFIG_ARM64 and utils.is_target_arch('aarch64'):
self.ops = aarch64_page_ops()
+ elif utils.is_target_arch('x86_64') or utils.is_target_arch('x86-64'):
+ self.ops = x86_page_ops()
else:
- raise gdb.GdbError('Only support aarch64 now')
+ raise gdb.GdbError('Only support aarch64 and x86_64 now')
+
+class x86_page_ops():
+ def __init__(self):
+ self.struct_page_size = utils.get_page_type().sizeof
+ self.PAGE_SHIFT = constants.LX_CONFIG_PAGE_SHIFT
+ self.PAGE_SIZE = 1 << self.PAGE_SHIFT
+ self.PAGE_MASK = (~(self.PAGE_SIZE - 1)) & ((1 << 64) - 1)
+
+ self.PAGE_OFFSET = int(gdb.parse_and_eval("page_offset_base"))
+ self.VMEMMAP_START = int(gdb.parse_and_eval("vmemmap_base"))
+ self.PHYS_BASE = int(gdb.parse_and_eval("phys_base"))
+ self.START_KERNEL_map = 0xffffffff80000000
+
+ self.KERNEL_START = gdb.parse_and_eval("_text")
+ self.KERNEL_END = gdb.parse_and_eval("_end")
+
+ self.VMALLOC_START = int(gdb.parse_and_eval("vmalloc_base"))
+ if self.VMALLOC_START == 0xffffc90000000000:
+ self.VMALLOC_END = self.VMALLOC_START + (32 * 1024 * 1024 * 1024 * 1024) - 1
+ elif self.VMALLOC_START == 0xffa0000000000000:
+ self.VMALLOC_END = self.VMALLOC_START + (12800 * 1024 * 1024 * 1024 * 1024) - 1
+ else:
+ self.VMALLOC_END = self.VMALLOC_START + (12800 * 1024 * 1024 * 1024 * 1024) - 1
+
+ self.MAX_PHYSMEM_BITS = 46
+ self.SECTION_SIZE_BITS = 27
+ self.MAX_ORDER = 10
+
+ self.SECTIONS_SHIFT = self.MAX_PHYSMEM_BITS - self.SECTION_SIZE_BITS
+ self.NR_MEM_SECTIONS = 1 << self.SECTIONS_SHIFT
+ self.PFN_SECTION_SHIFT = self.SECTION_SIZE_BITS - self.PAGE_SHIFT
+ self.PAGES_PER_SECTION = 1 << self.PFN_SECTION_SHIFT
+ self.PAGE_SECTION_MASK = (~(self.PAGES_PER_SECTION - 1)) & ((1 << 64) - 1)
+
+ if constants.LX_CONFIG_SPARSEMEM_EXTREME:
+ self.SECTIONS_PER_ROOT = self.PAGE_SIZE // gdb.lookup_type("struct mem_section").sizeof
+ else:
+ self.SECTIONS_PER_ROOT = 1
+
+ self.NR_SECTION_ROOTS = DIV_ROUND_UP(self.NR_MEM_SECTIONS, self.SECTIONS_PER_ROOT)
+ self.SECTION_ROOT_MASK = self.SECTIONS_PER_ROOT - 1
+
+ try:
+ self.SECTION_HAS_MEM_MAP = 1 << int(gdb.parse_and_eval('SECTION_HAS_MEM_MAP_BIT'))
+ self.SECTION_IS_EARLY = 1 << int(gdb.parse_and_eval('SECTION_IS_EARLY_BIT'))
+ except:
+ self.SECTION_HAS_MEM_MAP = 1 << 0
+ self.SECTION_IS_EARLY = 1 << 3
+
+ self.SUBSECTION_SHIFT = 21
+ self.PAGES_PER_SUBSECTION = 1 << (self.SUBSECTION_SHIFT - self.PAGE_SHIFT)
+
+ if constants.LX_CONFIG_NUMA and constants.LX_CONFIG_NODES_SHIFT:
+ self.NODE_SHIFT = constants.LX_CONFIG_NODES_SHIFT
+ else:
+ self.NODE_SHIFT = 0
+
+ self.MAX_NUMNODES = 1 << self.NODE_SHIFT
+
+ self.vmemmap = gdb.Value(self.VMEMMAP_START).cast(utils.get_page_type().pointer())
+
+ def kasan_reset_tag(self, addr):
+ return addr
+
+ def SECTION_NR_TO_ROOT(self, sec):
+ return sec // self.SECTIONS_PER_ROOT
+
+ def __nr_to_section(self, nr):
+ root = self.SECTION_NR_TO_ROOT(nr)
+ mem_section = gdb.parse_and_eval("mem_section")
+ return mem_section[root][nr & self.SECTION_ROOT_MASK]
+
+ def pfn_to_section_nr(self, pfn):
+ return pfn >> self.PFN_SECTION_SHIFT
+
+ def section_nr_to_pfn(self, sec):
+ return sec << self.PFN_SECTION_SHIFT
+
+ def __pfn_to_section(self, pfn):
+ return self.__nr_to_section(self.pfn_to_section_nr(pfn))
+
+ def pfn_to_section(self, pfn):
+ return self.__pfn_to_section(pfn)
+
+ def subsection_map_index(self, pfn):
+ return (pfn & ~(self.PAGE_SECTION_MASK)) // self.PAGES_PER_SUBSECTION
+
+ def pfn_section_valid(self, ms, pfn):
+ if constants.LX_CONFIG_SPARSEMEM_VMEMMAP:
+ idx = self.subsection_map_index(pfn)
+ return test_bit(idx, ms['usage']['subsection_map'])
+ else:
+ return True
+
+ def valid_section(self, mem_section):
+ if mem_section != None and (mem_section['section_mem_map'] & self.SECTION_HAS_MEM_MAP):
+ return True
+ return False
+
+ def early_section(self, mem_section):
+ if mem_section != None and (mem_section['section_mem_map'] & self.SECTION_IS_EARLY):
+ return True
+ return False
+
+ def pfn_valid(self, pfn):
+ ms = None
+ if self.PHYS_PFN(self.PFN_PHYS(pfn)) != pfn:
+ return False
+ if self.pfn_to_section_nr(pfn) >= self.NR_MEM_SECTIONS:
+ return False
+ ms = self.__pfn_to_section(pfn)
+
+ if not self.valid_section(ms):
+ return False
+ return self.early_section(ms) or self.pfn_section_valid(ms, pfn)
+
+ def PFN_PHYS(self, pfn):
+ return pfn << self.PAGE_SHIFT
+
+ def PHYS_PFN(self, phys):
+ return phys >> self.PAGE_SHIFT
+
+ def __phys_to_virt(self, pa):
+ return pa + self.PAGE_OFFSET
+
+ def __virt_to_phys(self, va):
+ if va >= self.START_KERNEL_map:
+ return va - self.START_KERNEL_map + self.PHYS_BASE
+ else:
+ return va - self.PAGE_OFFSET
+
+ def virt_to_phys(self, va):
+ return self.__virt_to_phys(va)
+
+ def virt_to_page(self, va):
+ return self.pfn_to_page(self.virt_to_pfn(va))
+
+ def __pa(self, va):
+ return self.__virt_to_phys(va)
+
+ def __va(self, pa):
+ return self.__phys_to_virt(pa)
+
+ def pfn_to_kaddr(self, pfn):
+ return self.__va(pfn << self.PAGE_SHIFT)
+
+ def virt_to_pfn(self, va):
+ return self.PHYS_PFN(self.__virt_to_phys(va))
+
+ def sym_to_pfn(self, x):
+ return self.PHYS_PFN(self.__virt_to_phys(x))
+
+ def page_to_pfn(self, page):
+ return int(page.cast(utils.get_page_type().pointer()) - self.vmemmap)
+
+ def pfn_to_page(self, pfn):
+ return self.vmemmap + pfn
+
+ def page_to_phys(self, page):
+ return self.PFN_PHYS(self.page_to_pfn(page))
+
+ def page_to_virt(self, page):
+ return self.__va(self.page_to_phys(page))
+
+ def page_address(self, page):
+ return self.page_to_virt(page)
+
+ def folio_address(self, folio):
+ return self.page_address(folio['page'].address)
class aarch64_page_ops():
def __init__(self):
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 64796c0223be..50792df27707 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -196,20 +196,20 @@ static int get_family_id(int sd)
#define delay_ms(t) (t / 1000000ULL)
/*
- * Format timespec64 to human readable string (YYYY-MM-DD HH:MM:SS)
+ * Format __kernel_timespec to human readable string (YYYY-MM-DD HH:MM:SS)
* Returns formatted string or "N/A" if timestamp is zero
*/
-static const char *format_timespec64(struct timespec64 *ts)
+static const char *format_timespec(struct __kernel_timespec *ts)
{
static char buffer[32];
struct tm tm_info;
- time_t time_sec;
+ __kernel_time_t time_sec;
/* Check if timestamp is zero (not set) */
if (ts->tv_sec == 0 && ts->tv_nsec == 0)
return "N/A";
- time_sec = (time_t)ts->tv_sec;
+ time_sec = ts->tv_sec;
/* Use thread-safe localtime_r */
if (localtime_r(&time_sec, &tm_info) == NULL)
@@ -257,7 +257,7 @@ static const char *format_timespec64(struct timespec64 *ts)
average_ms((double)(t)->cpu_delay_total, (t)->cpu_count), \
delay_ms((double)(t)->cpu_delay_max), \
delay_ms((double)(t)->cpu_delay_min), \
- format_timespec64(&(t)->cpu_delay_max_ts)); \
+ format_timespec(&(t)->cpu_delay_max_ts)); \
} else if (version >= 16) { \
printf("%-10s%15s%15s%15s%15s%15s%15s%15s\n", \
"CPU", "count", "real total", "virtual total", \
@@ -316,7 +316,7 @@ static const char *format_timespec64(struct timespec64 *ts)
average_ms((double)(t)->total, (t)->count), \
delay_ms((double)(t)->max), \
delay_ms((double)(t)->min), \
- format_timespec64(&(t)->max_ts)); \
+ format_timespec(&(t)->max_ts)); \
} else if (version >= 16) { \
printf("%-10s%15s%15s%15s%15s%15s\n", \
name, "count", "delay total", "delay average", \
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 5b993924cc3f..2ca07ea7202a 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -18,6 +18,9 @@
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <sys/sem.h>
#include <unistd.h>
#include <ctype.h>
@@ -39,6 +42,20 @@
F_SEAL_EXEC)
#define MFD_NOEXEC_SEAL 0x0008U
+union semun {
+ int val;
+ struct semid_ds *buf;
+ unsigned short int *array;
+ struct seminfo *__buf;
+};
+
+/*
+ * we use semaphores on nested wait tasks due the use of CLONE_NEWPID: the
+ * child will be PID 1 and can't send SIGSTOP to themselves due special
+ * treatment of the init task, so the SIGSTOP/SIGCONT synchronization
+ * approach can't be used here.
+ */
+#define SEM_KEY 0xdeadbeef
/*
* Default is not to test hugetlbfs
@@ -1333,8 +1350,22 @@ static int sysctl_nested(void *arg)
static int sysctl_nested_wait(void *arg)
{
- /* Wait for a SIGCONT. */
- kill(getpid(), SIGSTOP);
+ int sem = semget(SEM_KEY, 1, 0600);
+ struct sembuf sembuf;
+
+ if (sem < 0) {
+ perror("semget:");
+ abort();
+ }
+ sembuf.sem_num = 0;
+ sembuf.sem_flg = 0;
+ sembuf.sem_op = 0;
+
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ abort();
+ }
+
return sysctl_nested(arg);
}
@@ -1355,7 +1386,9 @@ static void test_sysctl_sysctl2_failset(void)
static int sysctl_nested_child(void *arg)
{
- int pid;
+ int pid, sem;
+ union semun semun;
+ struct sembuf sembuf;
printf("%s nested sysctl 0\n", memfd_str);
sysctl_assert_write("0");
@@ -1389,23 +1422,53 @@ static int sysctl_nested_child(void *arg)
test_sysctl_sysctl2_failset);
join_thread(pid);
+ sem = semget(SEM_KEY, 1, IPC_CREAT | 0600);
+ if (sem < 0) {
+ perror("semget:");
+ return 1;
+ }
+ semun.val = 1;
+ sembuf.sem_op = -1;
+ sembuf.sem_flg = 0;
+ sembuf.sem_num = 0;
+
/* Verify that the rules are actually inherited after fork. */
printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
sysctl_assert_write("0");
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl1_failset);
sysctl_assert_write("1");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);
printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
sysctl_assert_write("0");
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl2_failset);
sysctl_assert_write("2");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);
/*
@@ -1415,28 +1478,62 @@ static int sysctl_nested_child(void *arg)
*/
printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
sysctl_assert_write("2");
+
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl2);
sysctl_assert_write("1");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);
printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
sysctl_assert_write("2");
+
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl2);
sysctl_assert_write("0");
- kill(pid, SIGCONT);
+
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);
printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
sysctl_assert_write("1");
+
+ if (semctl(sem, 0, SETVAL, semun) < 0) {
+ perror("semctl:");
+ return 1;
+ }
+
pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
test_sysctl_sysctl1);
sysctl_assert_write("0");
- kill(pid, SIGCONT);
+ /* Allow child to continue */
+ if (semop(sem, &sembuf, 1) < 0) {
+ perror("semop:");
+ return 1;
+ }
join_thread(pid);
+ semctl(sem, 0, IPC_RMID);
+
return 0;
}