diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-18 21:40:16 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-18 21:40:16 -0800 |
| commit | 2b7a25df823dc7d8f56f8ce7c2d2dac391cea9c2 (patch) | |
| tree | 6af407ea7335baf0f682e29f9c4b68eebbe5ac5c | |
| parent | eeccf287a2a517954b57cf9d733b3cf5d47afa34 (diff) | |
| parent | 90627a1e08e602b8b7bea970d7d5007626be7527 (diff) | |
Merge tag 'mm-nonmm-stable-2026-02-18-19-56' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull more non-MM updates from Andrew Morton:
- "two fixes in kho_populate()" fixes a couple of not-major issues in
the kexec handover code (Ran Xiaokai)
- misc singletons
* tag 'mm-nonmm-stable-2026-02-18-19-56' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
lib/group_cpus: handle const qualifier from clusters allocation type
kho: remove unnecessary WARN_ON(err) in kho_populate()
kho: fix missing early_memunmap() call in kho_populate()
scripts/gdb: implement x86_page_ops in mm.py
objpool: fix the overestimation of object pooling metadata size
selftests/memfd: use IPC semaphore instead of SIGSTOP/SIGCONT
delayacct: fix build regression on accounting tool
| -rw-r--r-- | kernel/liveupdate/kexec_handover.c | 28 | ||||
| -rw-r--r-- | lib/group_cpus.c | 2 | ||||
| -rw-r--r-- | lib/objpool.c | 2 | ||||
| -rw-r--r-- | scripts/gdb/linux/constants.py.in | 2 | ||||
| -rw-r--r-- | scripts/gdb/linux/mm.py | 173 | ||||
| -rw-r--r-- | tools/accounting/getdelays.c | 12 | ||||
| -rw-r--r-- | tools/testing/selftests/memfd/memfd_test.c | 113 |
7 files changed, 301 insertions, 31 deletions
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index fb3a7b67676e..95601623b4d6 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -1463,36 +1463,37 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, struct kho_scratch *scratch = NULL; phys_addr_t mem_map_phys; void *fdt = NULL; + bool populated = false; int err; /* Validate the input FDT */ fdt = early_memremap(fdt_phys, fdt_len); if (!fdt) { pr_warn("setup: failed to memremap FDT (0x%llx)\n", fdt_phys); - goto err_report; + goto report; } err = fdt_check_header(fdt); if (err) { pr_warn("setup: handover FDT (0x%llx) is invalid: %d\n", fdt_phys, err); - goto err_unmap_fdt; + goto unmap_fdt; } err = fdt_node_check_compatible(fdt, 0, KHO_FDT_COMPATIBLE); if (err) { pr_warn("setup: handover FDT (0x%llx) is incompatible with '%s': %d\n", fdt_phys, KHO_FDT_COMPATIBLE, err); - goto err_unmap_fdt; + goto unmap_fdt; } mem_map_phys = kho_get_mem_map_phys(fdt); if (!mem_map_phys) - goto err_unmap_fdt; + goto unmap_fdt; scratch = early_memremap(scratch_phys, scratch_len); if (!scratch) { pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n", scratch_phys, scratch_len); - goto err_unmap_fdt; + goto unmap_fdt; } /* @@ -1506,10 +1507,10 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, memblock_add(area->addr, size); err = memblock_mark_kho_scratch(area->addr, size); - if (WARN_ON(err)) { + if (err) { pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %pe", &area->addr, &size, ERR_PTR(err)); - goto err_unmap_scratch; + goto unmap_scratch; } pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size); } @@ -1529,16 +1530,17 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, kho_in.scratch_phys = scratch_phys; kho_in.mem_map_phys = mem_map_phys; kho_scratch_cnt = scratch_cnt; - pr_info("found kexec handover data.\n"); - return; + populated = true; + pr_info("found kexec handover data.\n"); -err_unmap_scratch: +unmap_scratch: early_memunmap(scratch, scratch_len); -err_unmap_fdt: +unmap_fdt: early_memunmap(fdt, fdt_len); -err_report: - pr_warn("disabling KHO revival\n"); +report: + if (!populated) + pr_warn("disabling KHO revival\n"); } /* Helper functions for kexec_file_load */ diff --git a/lib/group_cpus.c b/lib/group_cpus.c index a93df70919df..d496c5001961 100644 --- a/lib/group_cpus.c +++ b/lib/group_cpus.c @@ -320,7 +320,7 @@ static int alloc_cluster_groups(unsigned int ncpus, goto no_cluster; /* Allocate memory based on cluster number. */ - clusters = kcalloc(ncluster, sizeof(struct cpumask *), GFP_KERNEL); + clusters = kcalloc(ncluster, sizeof(*clusters), GFP_KERNEL); if (!clusters) goto no_cluster; cluster_groups = kcalloc(ncluster, sizeof(struct node_groups), GFP_KERNEL); diff --git a/lib/objpool.c b/lib/objpool.c index b998b720c732..d98fadf1de16 100644 --- a/lib/objpool.c +++ b/lib/objpool.c @@ -142,7 +142,7 @@ int objpool_init(struct objpool_head *pool, int nr_objs, int object_size, pool->gfp = gfp & ~__GFP_ZERO; pool->context = context; pool->release = release; - slot_size = nr_cpu_ids * sizeof(struct objpool_slot); + slot_size = nr_cpu_ids * sizeof(struct objpool_slot *); pool->cpu_slots = kzalloc(slot_size, pool->gfp); if (!pool->cpu_slots) return -ENOMEM; diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index 6d475540c6ba..dab8b80bed69 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -150,8 +150,8 @@ LX_CONFIG(CONFIG_ARM64_64K_PAGES) if IS_BUILTIN(CONFIG_ARM64): LX_VALUE(CONFIG_ARM64_PA_BITS) LX_VALUE(CONFIG_ARM64_VA_BITS) - LX_VALUE(CONFIG_PAGE_SHIFT) LX_VALUE(CONFIG_ARCH_FORCE_MAX_ORDER) +LX_VALUE(CONFIG_PAGE_SHIFT) LX_CONFIG(CONFIG_SPARSEMEM) LX_CONFIG(CONFIG_SPARSEMEM_EXTREME) LX_CONFIG(CONFIG_SPARSEMEM_VMEMMAP) diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py index 7571aebbe650..d78908f6664d 100644 --- a/scripts/gdb/linux/mm.py +++ b/scripts/gdb/linux/mm.py @@ -26,8 +26,179 @@ class page_ops(): raise gdb.GdbError('Only support CONFIG_SPARSEMEM_VMEMMAP now') if constants.LX_CONFIG_ARM64 and utils.is_target_arch('aarch64'): self.ops = aarch64_page_ops() + elif utils.is_target_arch('x86_64') or utils.is_target_arch('x86-64'): + self.ops = x86_page_ops() else: - raise gdb.GdbError('Only support aarch64 now') + raise gdb.GdbError('Only support aarch64 and x86_64 now') + +class x86_page_ops(): + def __init__(self): + self.struct_page_size = utils.get_page_type().sizeof + self.PAGE_SHIFT = constants.LX_CONFIG_PAGE_SHIFT + self.PAGE_SIZE = 1 << self.PAGE_SHIFT + self.PAGE_MASK = (~(self.PAGE_SIZE - 1)) & ((1 << 64) - 1) + + self.PAGE_OFFSET = int(gdb.parse_and_eval("page_offset_base")) + self.VMEMMAP_START = int(gdb.parse_and_eval("vmemmap_base")) + self.PHYS_BASE = int(gdb.parse_and_eval("phys_base")) + self.START_KERNEL_map = 0xffffffff80000000 + + self.KERNEL_START = gdb.parse_and_eval("_text") + self.KERNEL_END = gdb.parse_and_eval("_end") + + self.VMALLOC_START = int(gdb.parse_and_eval("vmalloc_base")) + if self.VMALLOC_START == 0xffffc90000000000: + self.VMALLOC_END = self.VMALLOC_START + (32 * 1024 * 1024 * 1024 * 1024) - 1 + elif self.VMALLOC_START == 0xffa0000000000000: + self.VMALLOC_END = self.VMALLOC_START + (12800 * 1024 * 1024 * 1024 * 1024) - 1 + else: + self.VMALLOC_END = self.VMALLOC_START + (12800 * 1024 * 1024 * 1024 * 1024) - 1 + + self.MAX_PHYSMEM_BITS = 46 + self.SECTION_SIZE_BITS = 27 + self.MAX_ORDER = 10 + + self.SECTIONS_SHIFT = self.MAX_PHYSMEM_BITS - self.SECTION_SIZE_BITS + self.NR_MEM_SECTIONS = 1 << self.SECTIONS_SHIFT + self.PFN_SECTION_SHIFT = self.SECTION_SIZE_BITS - self.PAGE_SHIFT + self.PAGES_PER_SECTION = 1 << self.PFN_SECTION_SHIFT + self.PAGE_SECTION_MASK = (~(self.PAGES_PER_SECTION - 1)) & ((1 << 64) - 1) + + if constants.LX_CONFIG_SPARSEMEM_EXTREME: + self.SECTIONS_PER_ROOT = self.PAGE_SIZE // gdb.lookup_type("struct mem_section").sizeof + else: + self.SECTIONS_PER_ROOT = 1 + + self.NR_SECTION_ROOTS = DIV_ROUND_UP(self.NR_MEM_SECTIONS, self.SECTIONS_PER_ROOT) + self.SECTION_ROOT_MASK = self.SECTIONS_PER_ROOT - 1 + + try: + self.SECTION_HAS_MEM_MAP = 1 << int(gdb.parse_and_eval('SECTION_HAS_MEM_MAP_BIT')) + self.SECTION_IS_EARLY = 1 << int(gdb.parse_and_eval('SECTION_IS_EARLY_BIT')) + except: + self.SECTION_HAS_MEM_MAP = 1 << 0 + self.SECTION_IS_EARLY = 1 << 3 + + self.SUBSECTION_SHIFT = 21 + self.PAGES_PER_SUBSECTION = 1 << (self.SUBSECTION_SHIFT - self.PAGE_SHIFT) + + if constants.LX_CONFIG_NUMA and constants.LX_CONFIG_NODES_SHIFT: + self.NODE_SHIFT = constants.LX_CONFIG_NODES_SHIFT + else: + self.NODE_SHIFT = 0 + + self.MAX_NUMNODES = 1 << self.NODE_SHIFT + + self.vmemmap = gdb.Value(self.VMEMMAP_START).cast(utils.get_page_type().pointer()) + + def kasan_reset_tag(self, addr): + return addr + + def SECTION_NR_TO_ROOT(self, sec): + return sec // self.SECTIONS_PER_ROOT + + def __nr_to_section(self, nr): + root = self.SECTION_NR_TO_ROOT(nr) + mem_section = gdb.parse_and_eval("mem_section") + return mem_section[root][nr & self.SECTION_ROOT_MASK] + + def pfn_to_section_nr(self, pfn): + return pfn >> self.PFN_SECTION_SHIFT + + def section_nr_to_pfn(self, sec): + return sec << self.PFN_SECTION_SHIFT + + def __pfn_to_section(self, pfn): + return self.__nr_to_section(self.pfn_to_section_nr(pfn)) + + def pfn_to_section(self, pfn): + return self.__pfn_to_section(pfn) + + def subsection_map_index(self, pfn): + return (pfn & ~(self.PAGE_SECTION_MASK)) // self.PAGES_PER_SUBSECTION + + def pfn_section_valid(self, ms, pfn): + if constants.LX_CONFIG_SPARSEMEM_VMEMMAP: + idx = self.subsection_map_index(pfn) + return test_bit(idx, ms['usage']['subsection_map']) + else: + return True + + def valid_section(self, mem_section): + if mem_section != None and (mem_section['section_mem_map'] & self.SECTION_HAS_MEM_MAP): + return True + return False + + def early_section(self, mem_section): + if mem_section != None and (mem_section['section_mem_map'] & self.SECTION_IS_EARLY): + return True + return False + + def pfn_valid(self, pfn): + ms = None + if self.PHYS_PFN(self.PFN_PHYS(pfn)) != pfn: + return False + if self.pfn_to_section_nr(pfn) >= self.NR_MEM_SECTIONS: + return False + ms = self.__pfn_to_section(pfn) + + if not self.valid_section(ms): + return False + return self.early_section(ms) or self.pfn_section_valid(ms, pfn) + + def PFN_PHYS(self, pfn): + return pfn << self.PAGE_SHIFT + + def PHYS_PFN(self, phys): + return phys >> self.PAGE_SHIFT + + def __phys_to_virt(self, pa): + return pa + self.PAGE_OFFSET + + def __virt_to_phys(self, va): + if va >= self.START_KERNEL_map: + return va - self.START_KERNEL_map + self.PHYS_BASE + else: + return va - self.PAGE_OFFSET + + def virt_to_phys(self, va): + return self.__virt_to_phys(va) + + def virt_to_page(self, va): + return self.pfn_to_page(self.virt_to_pfn(va)) + + def __pa(self, va): + return self.__virt_to_phys(va) + + def __va(self, pa): + return self.__phys_to_virt(pa) + + def pfn_to_kaddr(self, pfn): + return self.__va(pfn << self.PAGE_SHIFT) + + def virt_to_pfn(self, va): + return self.PHYS_PFN(self.__virt_to_phys(va)) + + def sym_to_pfn(self, x): + return self.PHYS_PFN(self.__virt_to_phys(x)) + + def page_to_pfn(self, page): + return int(page.cast(utils.get_page_type().pointer()) - self.vmemmap) + + def pfn_to_page(self, pfn): + return self.vmemmap + pfn + + def page_to_phys(self, page): + return self.PFN_PHYS(self.page_to_pfn(page)) + + def page_to_virt(self, page): + return self.__va(self.page_to_phys(page)) + + def page_address(self, page): + return self.page_to_virt(page) + + def folio_address(self, folio): + return self.page_address(folio['page'].address) class aarch64_page_ops(): def __init__(self): diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 64796c0223be..50792df27707 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -196,20 +196,20 @@ static int get_family_id(int sd) #define delay_ms(t) (t / 1000000ULL) /* - * Format timespec64 to human readable string (YYYY-MM-DD HH:MM:SS) + * Format __kernel_timespec to human readable string (YYYY-MM-DD HH:MM:SS) * Returns formatted string or "N/A" if timestamp is zero */ -static const char *format_timespec64(struct timespec64 *ts) +static const char *format_timespec(struct __kernel_timespec *ts) { static char buffer[32]; struct tm tm_info; - time_t time_sec; + __kernel_time_t time_sec; /* Check if timestamp is zero (not set) */ if (ts->tv_sec == 0 && ts->tv_nsec == 0) return "N/A"; - time_sec = (time_t)ts->tv_sec; + time_sec = ts->tv_sec; /* Use thread-safe localtime_r */ if (localtime_r(&time_sec, &tm_info) == NULL) @@ -257,7 +257,7 @@ static const char *format_timespec64(struct timespec64 *ts) average_ms((double)(t)->cpu_delay_total, (t)->cpu_count), \ delay_ms((double)(t)->cpu_delay_max), \ delay_ms((double)(t)->cpu_delay_min), \ - format_timespec64(&(t)->cpu_delay_max_ts)); \ + format_timespec(&(t)->cpu_delay_max_ts)); \ } else if (version >= 16) { \ printf("%-10s%15s%15s%15s%15s%15s%15s%15s\n", \ "CPU", "count", "real total", "virtual total", \ @@ -316,7 +316,7 @@ static const char *format_timespec64(struct timespec64 *ts) average_ms((double)(t)->total, (t)->count), \ delay_ms((double)(t)->max), \ delay_ms((double)(t)->min), \ - format_timespec64(&(t)->max_ts)); \ + format_timespec(&(t)->max_ts)); \ } else if (version >= 16) { \ printf("%-10s%15s%15s%15s%15s%15s\n", \ name, "count", "delay total", "delay average", \ diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 5b993924cc3f..2ca07ea7202a 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -18,6 +18,9 @@ #include <sys/stat.h> #include <sys/syscall.h> #include <sys/wait.h> +#include <sys/types.h> +#include <sys/ipc.h> +#include <sys/sem.h> #include <unistd.h> #include <ctype.h> @@ -39,6 +42,20 @@ F_SEAL_EXEC) #define MFD_NOEXEC_SEAL 0x0008U +union semun { + int val; + struct semid_ds *buf; + unsigned short int *array; + struct seminfo *__buf; +}; + +/* + * we use semaphores on nested wait tasks due the use of CLONE_NEWPID: the + * child will be PID 1 and can't send SIGSTOP to themselves due special + * treatment of the init task, so the SIGSTOP/SIGCONT synchronization + * approach can't be used here. + */ +#define SEM_KEY 0xdeadbeef /* * Default is not to test hugetlbfs @@ -1333,8 +1350,22 @@ static int sysctl_nested(void *arg) static int sysctl_nested_wait(void *arg) { - /* Wait for a SIGCONT. */ - kill(getpid(), SIGSTOP); + int sem = semget(SEM_KEY, 1, 0600); + struct sembuf sembuf; + + if (sem < 0) { + perror("semget:"); + abort(); + } + sembuf.sem_num = 0; + sembuf.sem_flg = 0; + sembuf.sem_op = 0; + + if (semop(sem, &sembuf, 1) < 0) { + perror("semop:"); + abort(); + } + return sysctl_nested(arg); } @@ -1355,7 +1386,9 @@ static void test_sysctl_sysctl2_failset(void) static int sysctl_nested_child(void *arg) { - int pid; + int pid, sem; + union semun semun; + struct sembuf sembuf; printf("%s nested sysctl 0\n", memfd_str); sysctl_assert_write("0"); @@ -1389,23 +1422,53 @@ static int sysctl_nested_child(void *arg) test_sysctl_sysctl2_failset); join_thread(pid); + sem = semget(SEM_KEY, 1, IPC_CREAT | 0600); + if (sem < 0) { + perror("semget:"); + return 1; + } + semun.val = 1; + sembuf.sem_op = -1; + sembuf.sem_flg = 0; + sembuf.sem_num = 0; + /* Verify that the rules are actually inherited after fork. */ printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str); sysctl_assert_write("0"); + if (semctl(sem, 0, SETVAL, semun) < 0) { + perror("semctl:"); + return 1; + } + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, test_sysctl_sysctl1_failset); sysctl_assert_write("1"); - kill(pid, SIGCONT); + + /* Allow child to continue */ + if (semop(sem, &sembuf, 1) < 0) { + perror("semop:"); + return 1; + } join_thread(pid); printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str); sysctl_assert_write("0"); + if (semctl(sem, 0, SETVAL, semun) < 0) { + perror("semctl:"); + return 1; + } + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, test_sysctl_sysctl2_failset); sysctl_assert_write("2"); - kill(pid, SIGCONT); + + /* Allow child to continue */ + if (semop(sem, &sembuf, 1) < 0) { + perror("semop:"); + return 1; + } join_thread(pid); /* @@ -1415,28 +1478,62 @@ static int sysctl_nested_child(void *arg) */ printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str); sysctl_assert_write("2"); + + if (semctl(sem, 0, SETVAL, semun) < 0) { + perror("semctl:"); + return 1; + } + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, test_sysctl_sysctl2); sysctl_assert_write("1"); - kill(pid, SIGCONT); + + /* Allow child to continue */ + if (semop(sem, &sembuf, 1) < 0) { + perror("semop:"); + return 1; + } join_thread(pid); printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str); sysctl_assert_write("2"); + + if (semctl(sem, 0, SETVAL, semun) < 0) { + perror("semctl:"); + return 1; + } + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, test_sysctl_sysctl2); sysctl_assert_write("0"); - kill(pid, SIGCONT); + + /* Allow child to continue */ + if (semop(sem, &sembuf, 1) < 0) { + perror("semop:"); + return 1; + } join_thread(pid); printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str); sysctl_assert_write("1"); + + if (semctl(sem, 0, SETVAL, semun) < 0) { + perror("semctl:"); + return 1; + } + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, test_sysctl_sysctl1); sysctl_assert_write("0"); - kill(pid, SIGCONT); + /* Allow child to continue */ + if (semop(sem, &sembuf, 1) < 0) { + perror("semop:"); + return 1; + } join_thread(pid); + semctl(sem, 0, IPC_RMID); + return 0; } |
