diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-01-29 11:09:13 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-01-29 11:09:13 -0800 |
| commit | bcb6058a4b4596f12065276faeb9363dc4887ea9 (patch) | |
| tree | f0a5232fb0dd8dbba888720e1a0740c764e42d26 | |
| parent | 1cac38910ecb881b09f61f57545a771bbe57ba68 (diff) | |
| parent | bd58782995a2e6a07fd07255f3cc319f40b131c9 (diff) | |
Merge tag 'mm-hotfixes-stable-2026-01-29-09-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton:
"16 hotfixes. 9 are cc:stable, 12 are for MM.
There's a patch series from Pratyush Yadav which fixes a few things in
the new-in-6.19 LUO memfd code.
Plus the usual shower of singletons - please see the changelogs for
details"
* tag 'mm-hotfixes-stable-2026-01-29-09-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
vmcoreinfo: make hwerr_data visible for debugging
mm/zone_device: reinitialize large zone device private folios
mm/mm_init: don't cond_resched() in deferred_init_memmap_chunk() if called from deferred_grow_zone()
mm/kfence: randomize the freelist on initialization
kho: kho_preserve_vmalloc(): don't return 0 when ENOMEM
kho: init alloc tags when restoring pages from reserved memory
mm: memfd_luo: restore and free memfd_luo_ser on failure
mm: memfd_luo: use memfd_alloc_file() instead of shmem_file_setup()
memfd: export alloc_file()
flex_proportions: make fprop_new_period() hardirq safe
mailmap: add entry for Viacheslav Bocharov
mm/memory-failure: teach kill_accessing_process to accept hugetlb tail page pfn
mm/memory-failure: fix missing ->mf_stats count in hugetlb poison
mm, swap: restore swap_space attr aviod kernel panic
mm/kasan: fix KASAN poisoning in vrealloc()
mm/shmem, swap: fix race of truncate and swap entry split
| -rw-r--r-- | .mailmap | 1 | ||||
| -rw-r--r-- | arch/powerpc/kvm/book3s_hv_uvmem.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/drm_pagemap.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 | ||||
| -rw-r--r-- | include/linux/kasan.h | 14 | ||||
| -rw-r--r-- | include/linux/memfd.h | 6 | ||||
| -rw-r--r-- | include/linux/memremap.h | 9 | ||||
| -rw-r--r-- | kernel/liveupdate/kexec_handover.c | 12 | ||||
| -rw-r--r-- | kernel/vmcore_info.c | 6 | ||||
| -rw-r--r-- | lib/flex_proportions.c | 5 | ||||
| -rw-r--r-- | lib/test_hmm.c | 4 | ||||
| -rw-r--r-- | mm/kasan/common.c | 21 | ||||
| -rw-r--r-- | mm/kfence/core.c | 23 | ||||
| -rw-r--r-- | mm/memfd.c | 4 | ||||
| -rw-r--r-- | mm/memfd_luo.c | 10 | ||||
| -rw-r--r-- | mm/memory-failure.c | 99 | ||||
| -rw-r--r-- | mm/memremap.c | 35 | ||||
| -rw-r--r-- | mm/mm_init.c | 12 | ||||
| -rw-r--r-- | mm/shmem.c | 45 | ||||
| -rw-r--r-- | mm/swap.h | 2 | ||||
| -rw-r--r-- | mm/swap_state.c | 3 | ||||
| -rw-r--r-- | mm/vmalloc.c | 7 |
23 files changed, 239 insertions, 87 deletions
@@ -851,6 +851,7 @@ Valentin Schneider <vschneid@redhat.com> <valentin.schneider@arm.com> Veera Sundaram Sankaran <quic_veeras@quicinc.com> <veeras@codeaurora.org> Veerabhadrarao Badiganti <quic_vbadigan@quicinc.com> <vbadigan@codeaurora.org> Venkateswara Naralasetty <quic_vnaralas@quicinc.com> <vnaralas@codeaurora.org> +Viacheslav Bocharov <v@baodeep.com> <adeep@lexina.in> Vikash Garodia <vikash.garodia@oss.qualcomm.com> <vgarodia@codeaurora.org> Vikash Garodia <vikash.garodia@oss.qualcomm.com> <quic_vgarodia@quicinc.com> Vincent Mailhol <mailhol@kernel.org> <mailhol.vincent@wanadoo.fr> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index e5000bef90f2..7cf9310de0ec 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -723,7 +723,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) dpage = pfn_to_page(uvmem_pfn); dpage->zone_device_data = pvt; - zone_device_page_init(dpage, 0); + zone_device_page_init(dpage, &kvmppc_uvmem_pgmap, 0); return dpage; out_clear: spin_lock(&kvmppc_uvmem_bitmap_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index af53e796ea1b..6ada7b4af7c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -217,7 +217,7 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn) page = pfn_to_page(pfn); svm_range_bo_ref(prange->svm_bo); page->zone_device_data = prange->svm_bo; - zone_device_page_init(page, 0); + zone_device_page_init(page, page_pgmap(page), 0); } static void diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 06c1bd8fc4d1..704f2f945019 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -197,7 +197,7 @@ static void drm_pagemap_get_devmem_page(struct page *page, struct drm_pagemap_zdd *zdd) { page->zone_device_data = drm_pagemap_zdd_get(zdd); - zone_device_page_init(page, 0); + zone_device_page_init(page, page_pgmap(page), 0); } /** diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 58071652679d..3d8031296eed 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -425,7 +425,7 @@ nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm, bool is_large) order = ilog2(DMEM_CHUNK_NPAGES); } - zone_device_folio_init(folio, order); + zone_device_folio_init(folio, page_pgmap(folio_page(folio, 0)), order); return page; } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9c6ac4b62eb9..338a1921a50a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -641,6 +641,17 @@ kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, __kasan_unpoison_vmap_areas(vms, nr_vms, flags); } +void __kasan_vrealloc(const void *start, unsigned long old_size, + unsigned long new_size); + +static __always_inline void kasan_vrealloc(const void *start, + unsigned long old_size, + unsigned long new_size) +{ + if (kasan_enabled()) + __kasan_vrealloc(start, old_size, new_size); +} + #else /* CONFIG_KASAN_VMALLOC */ static inline void kasan_populate_early_vm_area_shadow(void *start, @@ -670,6 +681,9 @@ kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, kasan_vmalloc_flags_t flags) { } +static inline void kasan_vrealloc(const void *start, unsigned long old_size, + unsigned long new_size) { } + #endif /* CONFIG_KASAN_VMALLOC */ #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ diff --git a/include/linux/memfd.h b/include/linux/memfd.h index cc74de3dbcfe..c328a7b356d0 100644 --- a/include/linux/memfd.h +++ b/include/linux/memfd.h @@ -17,6 +17,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx); * to by vm_flags_ptr. */ int memfd_check_seals_mmap(struct file *file, vm_flags_t *vm_flags_ptr); +struct file *memfd_alloc_file(const char *name, unsigned int flags); #else static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a) { @@ -31,6 +32,11 @@ static inline int memfd_check_seals_mmap(struct file *file, { return 0; } + +static inline struct file *memfd_alloc_file(const char *name, unsigned int flags) +{ + return ERR_PTR(-EINVAL); +} #endif #endif /* __LINUX_MEMFD_H */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 713ec0435b48..e3c2ccf872a8 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -224,7 +224,8 @@ static inline bool is_fsdax_page(const struct page *page) } #ifdef CONFIG_ZONE_DEVICE -void zone_device_page_init(struct page *page, unsigned int order); +void zone_device_page_init(struct page *page, struct dev_pagemap *pgmap, + unsigned int order); void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); @@ -234,9 +235,11 @@ bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long memremap_compat_align(void); -static inline void zone_device_folio_init(struct folio *folio, unsigned int order) +static inline void zone_device_folio_init(struct folio *folio, + struct dev_pagemap *pgmap, + unsigned int order) { - zone_device_page_init(&folio->page, order); + zone_device_page_init(&folio->page, pgmap, order); if (order) folio_set_large_rmappable(folio); } diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index d4482b6e3cae..90d411a59f76 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -255,6 +255,14 @@ static struct page *kho_restore_page(phys_addr_t phys, bool is_folio) if (is_folio && info.order) prep_compound_page(page, info.order); + /* Always mark headpage's codetag as empty to avoid accounting mismatch */ + clear_page_tag_ref(page); + if (!is_folio) { + /* Also do that for the non-compound tail pages */ + for (unsigned int i = 1; i < nr_pages; i++) + clear_page_tag_ref(page + i); + } + adjust_managed_page_count(page, nr_pages); return page; } @@ -1006,8 +1014,10 @@ int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation) chunk->phys[idx++] = phys; if (idx == ARRAY_SIZE(chunk->phys)) { chunk = new_vmalloc_chunk(chunk); - if (!chunk) + if (!chunk) { + err = -ENOMEM; goto err_free; + } idx = 0; } } diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index fe9bf8db1922..e2784038bbed 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -36,7 +36,11 @@ struct hwerr_info { time64_t timestamp; }; -static struct hwerr_info hwerr_data[HWERR_RECOV_MAX]; +/* + * The hwerr_data[] array is declared with global scope so that it remains + * accessible to vmcoreinfo even when Link Time Optimization (LTO) is enabled. + */ +struct hwerr_info hwerr_data[HWERR_RECOV_MAX]; Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len) diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index 84ecccddc771..012d5614efb9 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -64,13 +64,14 @@ void fprop_global_destroy(struct fprop_global *p) bool fprop_new_period(struct fprop_global *p, int periods) { s64 events = percpu_counter_sum(&p->events); + unsigned long flags; /* * Don't do anything if there are no events. */ if (events <= 1) return false; - preempt_disable_nested(); + local_irq_save(flags); write_seqcount_begin(&p->sequence); if (periods < 64) events -= events >> periods; @@ -78,7 +79,7 @@ bool fprop_new_period(struct fprop_global *p, int periods) percpu_counter_add(&p->events, -events); p->period += periods; write_seqcount_end(&p->sequence); - preempt_enable_nested(); + local_irq_restore(flags); return true; } diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 8af169d3873a..455a6862ae50 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -662,7 +662,9 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror *dmirror, goto error; } - zone_device_folio_init(page_folio(dpage), order); + zone_device_folio_init(page_folio(dpage), + page_pgmap(folio_page(page_folio(dpage), 0)), + order); dpage->zone_device_data = rpage; return dpage; diff --git a/mm/kasan/common.c b/mm/kasan/common.c index ed489a14dddf..b7d05c2a6d93 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -606,4 +606,25 @@ void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, __kasan_unpoison_vmalloc(addr, size, flags | KASAN_VMALLOC_KEEP_TAG); } } + +void __kasan_vrealloc(const void *addr, unsigned long old_size, + unsigned long new_size) +{ + if (new_size < old_size) { + kasan_poison_last_granule(addr, new_size); + + new_size = round_up(new_size, KASAN_GRANULE_SIZE); + old_size = round_up(old_size, KASAN_GRANULE_SIZE); + if (new_size < old_size) + __kasan_poison_vmalloc(addr + new_size, + old_size - new_size); + } else if (new_size > old_size) { + old_size = round_down(old_size, KASAN_GRANULE_SIZE); + __kasan_unpoison_vmalloc(addr + old_size, + new_size - old_size, + KASAN_VMALLOC_PROT_NORMAL | + KASAN_VMALLOC_VM_ALLOC | + KASAN_VMALLOC_KEEP_TAG); + } +} #endif diff --git a/mm/kfence/core.c b/mm/kfence/core.c index da0f5b6f5744..4f79ec720752 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -596,7 +596,7 @@ static void rcu_guarded_free(struct rcu_head *h) static unsigned long kfence_init_pool(void) { unsigned long addr, start_pfn; - int i; + int i, rand; if (!arch_kfence_init_pool()) return (unsigned long)__kfence_pool; @@ -647,13 +647,27 @@ static unsigned long kfence_init_pool(void) INIT_LIST_HEAD(&meta->list); raw_spin_lock_init(&meta->lock); meta->state = KFENCE_OBJECT_UNUSED; - meta->addr = addr; /* Initialize for validation in metadata_to_pageaddr(). */ - list_add_tail(&meta->list, &kfence_freelist); + /* Use addr to randomize the freelist. */ + meta->addr = i; /* Protect the right redzone. */ - if (unlikely(!kfence_protect(addr + PAGE_SIZE))) + if (unlikely(!kfence_protect(addr + 2 * i * PAGE_SIZE + PAGE_SIZE))) goto reset_slab; + } + + for (i = CONFIG_KFENCE_NUM_OBJECTS; i > 0; i--) { + rand = get_random_u32_below(i); + swap(kfence_metadata_init[i - 1].addr, kfence_metadata_init[rand].addr); + } + for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { + struct kfence_metadata *meta_1 = &kfence_metadata_init[i]; + struct kfence_metadata *meta_2 = &kfence_metadata_init[meta_1->addr]; + + list_add_tail(&meta_2->list, &kfence_freelist); + } + for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { + kfence_metadata_init[i].addr = addr; addr += 2 * PAGE_SIZE; } @@ -666,6 +680,7 @@ static unsigned long kfence_init_pool(void) return 0; reset_slab: + addr += 2 * i * PAGE_SIZE; for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { struct page *page; diff --git a/mm/memfd.c b/mm/memfd.c index ab5312aff14b..f032c6052926 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -456,7 +456,7 @@ err_name: return ERR_PTR(error); } -static struct file *alloc_file(const char *name, unsigned int flags) +struct file *memfd_alloc_file(const char *name, unsigned int flags) { unsigned int *file_seals; struct file *file; @@ -520,5 +520,5 @@ SYSCALL_DEFINE2(memfd_create, return PTR_ERR(name); fd_flags = (flags & MFD_CLOEXEC) ? O_CLOEXEC : 0; - return FD_ADD(fd_flags, alloc_file(name, flags)); + return FD_ADD(fd_flags, memfd_alloc_file(name, flags)); } diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c index 4f6ba63b4310..a34fccc23b6a 100644 --- a/mm/memfd_luo.c +++ b/mm/memfd_luo.c @@ -78,6 +78,7 @@ #include <linux/liveupdate.h> #include <linux/shmem_fs.h> #include <linux/vmalloc.h> +#include <linux/memfd.h> #include "internal.h" static int memfd_luo_preserve_folios(struct file *file, @@ -443,11 +444,11 @@ static int memfd_luo_retrieve(struct liveupdate_file_op_args *args) if (!ser) return -EINVAL; - file = shmem_file_setup("", 0, VM_NORESERVE); - + file = memfd_alloc_file("", 0); if (IS_ERR(file)) { pr_err("failed to setup file: %pe\n", file); - return PTR_ERR(file); + err = PTR_ERR(file); + goto free_ser; } vfs_setpos(file, ser->pos, MAX_LFS_FILESIZE); @@ -473,7 +474,8 @@ static int memfd_luo_retrieve(struct liveupdate_file_op_args *args) put_file: fput(file); - +free_ser: + kho_restore_free(ser); return err; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c80c2907da33..cf0d526e6d41 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -692,6 +692,8 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift, unsigned long poisoned_pfn, struct to_kill *tk) { unsigned long pfn = 0; + unsigned long hwpoison_vaddr; + unsigned long mask; if (pte_present(pte)) { pfn = pte_pfn(pte); @@ -702,10 +704,12 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift, pfn = softleaf_to_pfn(entry); } - if (!pfn || pfn != poisoned_pfn) + mask = ~((1UL << (shift - PAGE_SHIFT)) - 1); + if (!pfn || pfn != (poisoned_pfn & mask)) return 0; - set_to_kill(tk, addr, shift); + hwpoison_vaddr = addr + ((poisoned_pfn - pfn) << PAGE_SHIFT); + set_to_kill(tk, hwpoison_vaddr, shift); return 1; } @@ -1883,12 +1887,22 @@ static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag) return count; } -static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page) +#define MF_HUGETLB_FREED 0 /* freed hugepage */ +#define MF_HUGETLB_IN_USED 1 /* in-use hugepage */ +#define MF_HUGETLB_NON_HUGEPAGE 2 /* not a hugepage */ +#define MF_HUGETLB_FOLIO_PRE_POISONED 3 /* folio already poisoned */ +#define MF_HUGETLB_PAGE_PRE_POISONED 4 /* exact page already poisoned */ +#define MF_HUGETLB_RETRY 5 /* hugepage is busy, retry */ +/* + * Set hugetlb folio as hwpoisoned, update folio private raw hwpoison list + * to keep track of the poisoned pages. + */ +static int hugetlb_update_hwpoison(struct folio *folio, struct page *page) { struct llist_head *head; struct raw_hwp_page *raw_hwp; struct raw_hwp_page *p; - int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0; + int ret = folio_test_set_hwpoison(folio) ? MF_HUGETLB_FOLIO_PRE_POISONED : 0; /* * Once the hwpoison hugepage has lost reliable raw error info, @@ -1896,20 +1910,17 @@ static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page) * so skip to add additional raw error info. */ if (folio_test_hugetlb_raw_hwp_unreliable(folio)) - return -EHWPOISON; + return MF_HUGETLB_FOLIO_PRE_POISONED; head = raw_hwp_list_head(folio); llist_for_each_entry(p, head->first, node) { if (p->page == page) - return -EHWPOISON; + return MF_HUGETLB_PAGE_PRE_POISONED; } raw_hwp = kmalloc(sizeof(struct raw_hwp_page), GFP_ATOMIC); if (raw_hwp) { raw_hwp->page = page; llist_add(&raw_hwp->node, head); - /* the first error event will be counted in action_result(). */ - if (ret) - num_poisoned_pages_inc(page_to_pfn(page)); } else { /* * Failed to save raw error info. We no longer trace all @@ -1957,42 +1968,39 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio) /* * Called from hugetlb code with hugetlb_lock held. - * - * Return values: - * 0 - free hugepage - * 1 - in-use hugepage - * 2 - not a hugepage - * -EBUSY - the hugepage is busy (try to retry) - * -EHWPOISON - the hugepage is already hwpoisoned */ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared) { struct page *page = pfn_to_page(pfn); struct folio *folio = page_folio(page); - int ret = 2; /* fallback to normal page handling */ bool count_increased = false; + int ret, rc; - if (!folio_test_hugetlb(folio)) + if (!folio_test_hugetlb(folio)) { + ret = MF_HUGETLB_NON_HUGEPAGE; goto out; - - if (flags & MF_COUNT_INCREASED) { - ret = 1; + } else if (flags & MF_COUNT_INCREASED) { + ret = MF_HUGETLB_IN_USED; count_increased = true; } else if (folio_test_hugetlb_freed(folio)) { - ret = 0; + ret = MF_HUGETLB_FREED; } else if (folio_test_hugetlb_migratable(folio)) { - ret = folio_try_get(folio); - if (ret) + if (folio_try_get(folio)) { + ret = MF_HUGETLB_IN_USED; count_increased = true; + } else { + ret = MF_HUGETLB_FREED; + } } else { - ret = -EBUSY; + ret = MF_HUGETLB_RETRY; if (!(flags & MF_NO_RETRY)) goto out; } - if (folio_set_hugetlb_hwpoison(folio, page)) { - ret = -EHWPOISON; + rc = hugetlb_update_hwpoison(folio, page); + if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) { + ret = rc; goto out; } @@ -2017,10 +2025,16 @@ out: * with basic operations like hugepage allocation/free/demotion. * So some of prechecks for hwpoison (pinning, and testing/setting * PageHWPoison) should be done in single hugetlb_lock range. + * Returns: + * 0 - not hugetlb, or recovered + * -EBUSY - not recovered + * -EOPNOTSUPP - hwpoison_filter'ed + * -EHWPOISON - folio or exact page already poisoned + * -EFAULT - kill_accessing_process finds current->mm null */ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb) { - int res; + int res, rv; struct page *p = pfn_to_page(pfn); struct folio *folio; unsigned long page_flags; @@ -2029,22 +2043,29 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb *hugetlb = 1; retry: res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared); - if (res == 2) { /* fallback to normal page handling */ + switch (res) { + case MF_HUGETLB_NON_HUGEPAGE: /* fallback to normal page handling */ *hugetlb = 0; return 0; - } else if (res == -EHWPOISON) { - if (flags & MF_ACTION_REQUIRED) { - folio = page_folio(p); - res = kill_accessing_process(current, folio_pfn(folio), flags); - } - action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); - return res; - } else if (res == -EBUSY) { + case MF_HUGETLB_RETRY: if (!(flags & MF_NO_RETRY)) { flags |= MF_NO_RETRY; goto retry; } return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED); + case MF_HUGETLB_FOLIO_PRE_POISONED: + case MF_HUGETLB_PAGE_PRE_POISONED: + rv = -EHWPOISON; + if (flags & MF_ACTION_REQUIRED) + rv = kill_accessing_process(current, pfn, flags); + if (res == MF_HUGETLB_PAGE_PRE_POISONED) + action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); + else + action_result(pfn, MF_MSG_HUGE, MF_FAILED); + return rv; + default: + WARN_ON((res != MF_HUGETLB_FREED) && (res != MF_HUGETLB_IN_USED)); + break; } folio = page_folio(p); @@ -2055,7 +2076,7 @@ retry: if (migratable_cleared) folio_set_hugetlb_migratable(folio); folio_unlock(folio); - if (res == 1) + if (res == MF_HUGETLB_IN_USED) folio_put(folio); return -EOPNOTSUPP; } @@ -2064,7 +2085,7 @@ retry: * Handling free hugepage. The possible race with hugepage allocation * or demotion can be prevented by PageHWPoison flag. */ - if (res == 0) { + if (res == MF_HUGETLB_FREED) { folio_unlock(folio); if (__page_handle_poison(p) > 0) { page_ref_inc(p); diff --git a/mm/memremap.c b/mm/memremap.c index 63c6ab4fdf08..ac7be07e3361 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -477,10 +477,43 @@ void free_zone_device_folio(struct folio *folio) } } -void zone_device_page_init(struct page *page, unsigned int order) +void zone_device_page_init(struct page *page, struct dev_pagemap *pgmap, + unsigned int order) { + struct page *new_page = page; + unsigned int i; + VM_WARN_ON_ONCE(order > MAX_ORDER_NR_PAGES); + for (i = 0; i < (1UL << order); ++i, ++new_page) { + struct folio *new_folio = (struct folio *)new_page; + + /* + * new_page could have been part of previous higher order folio + * which encodes the order, in page + 1, in the flags bits. We + * blindly clear bits which could have set my order field here, + * including page head. + */ + new_page->flags.f &= ~0xffUL; /* Clear possible order, page head */ + +#ifdef NR_PAGES_IN_LARGE_FOLIO + /* + * This pointer math looks odd, but new_page could have been + * part of a previous higher order folio, which sets _nr_pages + * in page + 1 (new_page). Therefore, we use pointer casting to + * correctly locate the _nr_pages bits within new_page which + * could have modified by previous higher order folio. + */ + ((struct folio *)(new_page - 1))->_nr_pages = 0; +#endif + + new_folio->mapping = NULL; + new_folio->pgmap = pgmap; /* Also clear compound head */ + new_folio->share = 0; /* fsdax only, unused for device private */ + VM_WARN_ON_FOLIO(folio_ref_count(new_folio), new_folio); + VM_WARN_ON_FOLIO(!folio_is_zone_device(new_folio), new_folio); + } + /* * Drivers shouldn't be allocating pages after calling * memunmap_pages(). diff --git a/mm/mm_init.c b/mm/mm_init.c index fc2a6f1e518f..2a809cd8e7fa 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2059,7 +2059,7 @@ static unsigned long __init deferred_init_pages(struct zone *zone, */ static unsigned long __init deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, - struct zone *zone) + struct zone *zone, bool can_resched) { int nid = zone_to_nid(zone); unsigned long nr_pages = 0; @@ -2085,10 +2085,10 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, spfn = chunk_end; - if (irqs_disabled()) - touch_nmi_watchdog(); - else + if (can_resched) cond_resched(); + else + touch_nmi_watchdog(); } } @@ -2101,7 +2101,7 @@ deferred_init_memmap_job(unsigned long start_pfn, unsigned long end_pfn, { struct zone *zone = arg; - deferred_init_memmap_chunk(start_pfn, end_pfn, zone); + deferred_init_memmap_chunk(start_pfn, end_pfn, zone, true); } static unsigned int __init @@ -2216,7 +2216,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order) for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1); nr_pages < nr_pages_needed && spfn < zone_end_pfn(zone); spfn = epfn, epfn += PAGES_PER_SECTION) { - nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone); + nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone, false); } /* diff --git a/mm/shmem.c b/mm/shmem.c index ec6c01378e9d..6c3485d24d66 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -962,17 +962,29 @@ static void shmem_delete_from_page_cache(struct folio *folio, void *radswap) * being freed). */ static long shmem_free_swap(struct address_space *mapping, - pgoff_t index, void *radswap) + pgoff_t index, pgoff_t end, void *radswap) { - int order = xa_get_order(&mapping->i_pages, index); - void *old; + XA_STATE(xas, &mapping->i_pages, index); + unsigned int nr_pages = 0; + pgoff_t base; + void *entry; - old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0); - if (old != radswap) - return 0; - free_swap_and_cache_nr(radix_to_swp_entry(radswap), 1 << order); + xas_lock_irq(&xas); + entry = xas_load(&xas); + if (entry == radswap) { + nr_pages = 1 << xas_get_order(&xas); + base = round_down(xas.xa_index, nr_pages); + if (base < index || base + nr_pages - 1 > end) + nr_pages = 0; + else + xas_store(&xas, NULL); + } + xas_unlock_irq(&xas); + + if (nr_pages) + free_swap_and_cache_nr(radix_to_swp_entry(radswap), nr_pages); - return 1 << order; + return nr_pages; } /* @@ -1124,8 +1136,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, uoff_t lend, if (xa_is_value(folio)) { if (unfalloc) continue; - nr_swaps_freed += shmem_free_swap(mapping, - indices[i], folio); + nr_swaps_freed += shmem_free_swap(mapping, indices[i], + end - 1, folio); continue; } @@ -1191,12 +1203,23 @@ whole_folios: folio = fbatch.folios[i]; if (xa_is_value(folio)) { + int order; long swaps_freed; if (unfalloc) continue; - swaps_freed = shmem_free_swap(mapping, indices[i], folio); + swaps_freed = shmem_free_swap(mapping, indices[i], + end - 1, folio); if (!swaps_freed) { + /* + * If found a large swap entry cross the end border, + * skip it as the truncate_inode_partial_folio above + * should have at least zerod its content once. + */ + order = shmem_confirm_swap(mapping, indices[i], + radix_to_swp_entry(folio)); + if (order > 0 && indices[i] + (1 << order) > end) + continue; /* Swap was replaced by page: retry */ index = indices[i]; break; diff --git a/mm/swap.h b/mm/swap.h index d034c13d8dd2..1bd466da3039 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -198,7 +198,7 @@ int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug); void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug); /* linux/mm/swap_state.c */ -extern struct address_space swap_space __ro_after_init; +extern struct address_space swap_space __read_mostly; static inline struct address_space *swap_address_space(swp_entry_t entry) { return &swap_space; diff --git a/mm/swap_state.c b/mm/swap_state.c index 5f97c6ae70a2..44d228982521 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -37,8 +37,7 @@ static const struct address_space_operations swap_aops = { #endif }; -/* Set swap_space as read only as swap cache is handled by swap table */ -struct address_space swap_space __ro_after_init = { +struct address_space swap_space __read_mostly = { .a_ops = &swap_aops, }; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 628f96e83b11..e286c2d2068c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4322,7 +4322,7 @@ void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align if (want_init_on_free() || want_init_on_alloc(flags)) memset((void *)p + size, 0, old_size - size); vm->requested_size = size; - kasan_poison_vmalloc(p + size, old_size - size); + kasan_vrealloc(p, old_size, size); return (void *)p; } @@ -4330,16 +4330,13 @@ void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align * We already have the bytes available in the allocation; use them. */ if (size <= alloced_size) { - kasan_unpoison_vmalloc(p + old_size, size - old_size, - KASAN_VMALLOC_PROT_NORMAL | - KASAN_VMALLOC_VM_ALLOC | - KASAN_VMALLOC_KEEP_TAG); /* * No need to zero memory here, as unused memory will have * already been zeroed at initial allocation time or during * realloc shrink time. */ vm->requested_size = size; + kasan_vrealloc(p, old_size, size); return (void *)p; } |
