summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-01-20 13:32:16 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-01-20 13:32:16 -0800
commitc25f2fb1f469deaed2df8db524d91f3321a0f816 (patch)
treee07a4343b060d8dc081b6a772430c2d8e2f0bd3b
parentc03e9c42ae8f9be76a0cf55ef3f88663f0f6a63a (diff)
parent16aca2c98a6fdf071e5a1a765a295995d7c7e346 (diff)
Merge tag 'mm-hotfixes-stable-2026-01-20-13-09' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton: - A patch series from David Hildenbrand which fixes a few things related to hugetlb PMD sharing - The remainder are singletons, please see their changelogs for details * tag 'mm-hotfixes-stable-2026-01-20-13-09' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: mm: restore per-memcg proactive reclaim with !CONFIG_NUMA mm/kfence: fix potential deadlock in reboot notifier Docs/mm/allocation-profiling: describe sysctrl limitations in debug mode mm: do not copy page tables unnecessarily for VM_UFFD_WP mm/hugetlb: fix excessive IPI broadcasts when unsharing PMD tables using mmu_gather mm/rmap: fix two comments related to huge_pmd_unshare() mm/hugetlb: fix two comments related to huge_pmd_unshare() mm/hugetlb: fix hugetlb_pmd_shared() mm: remove unnecessary and incorrect mmap lock assert x86/kfence: avoid writing L1TF-vulnerable PTEs mm/vma: do not leak memory when .mmap_prepare swaps the file migrate: correct lock ordering for hugetlb file folios panic: only warn about deprecated panic_print on write access fs/writeback: skip AS_NO_DATA_INTEGRITY mappings in wait_sb_inodes() mm: take into account mm_cid size for mm_struct static definitions mm: rename cpu_bitmap field to flexible_array mm: add missing static initializer for init_mm::mm_cid.lock
-rw-r--r--Documentation/admin-guide/sysctl/vm.rst4
-rw-r--r--Documentation/mm/allocation-profiling.rst10
-rw-r--r--arch/x86/include/asm/kfence.h29
-rw-r--r--drivers/firmware/efi/efi.c2
-rw-r--r--fs/fs-writeback.c7
-rw-r--r--fs/fuse/file.c4
-rw-r--r--include/asm-generic/tlb.h77
-rw-r--r--include/linux/hugetlb.h17
-rw-r--r--include/linux/mm.h6
-rw-r--r--include/linux/mm_types.h19
-rw-r--r--include/linux/pagemap.h11
-rw-r--r--kernel/panic.c4
-rw-r--r--mm/hugetlb.c131
-rw-r--r--mm/init-mm.c5
-rw-r--r--mm/internal.h8
-rw-r--r--mm/kfence/core.c17
-rw-r--r--mm/memory.c11
-rw-r--r--mm/migrate.c12
-rw-r--r--mm/mmu_gather.c33
-rw-r--r--mm/rmap.c45
-rw-r--r--mm/vma.c11
-rw-r--r--mm/vmscan.c13
22 files changed, 341 insertions, 135 deletions
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 4d71211fdad8..245bf6394935 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -494,6 +494,10 @@ memory allocations.
The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT.
+When CONFIG_MEM_ALLOC_PROFILING_DEBUG=y, this control is read-only to avoid
+warnings produced by allocations made while profiling is disabled and freed
+when it's enabled.
+
memory_failure_early_kill
=========================
diff --git a/Documentation/mm/allocation-profiling.rst b/Documentation/mm/allocation-profiling.rst
index 316311240e6a..5389d241176a 100644
--- a/Documentation/mm/allocation-profiling.rst
+++ b/Documentation/mm/allocation-profiling.rst
@@ -33,6 +33,16 @@ Boot parameter:
sysctl:
/proc/sys/vm/mem_profiling
+ 1: Enable memory profiling.
+
+ 0: Disable memory profiling.
+
+ The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT.
+
+ When CONFIG_MEM_ALLOC_PROFILING_DEBUG=y, this control is read-only to avoid
+ warnings produced by allocations made while profiling is disabled and freed
+ when it's enabled.
+
Runtime info:
/proc/allocinfo
diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h
index ff5c7134a37a..acf9ffa1a171 100644
--- a/arch/x86/include/asm/kfence.h
+++ b/arch/x86/include/asm/kfence.h
@@ -42,10 +42,34 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
{
unsigned int level;
pte_t *pte = lookup_address(addr, &level);
+ pteval_t val;
if (WARN_ON(!pte || level != PG_LEVEL_4K))
return false;
+ val = pte_val(*pte);
+
+ /*
+ * protect requires making the page not-present. If the PTE is
+ * already in the right state, there's nothing to do.
+ */
+ if (protect != !!(val & _PAGE_PRESENT))
+ return true;
+
+ /*
+ * Otherwise, invert the entire PTE. This avoids writing out an
+ * L1TF-vulnerable PTE (not present, without the high address bits
+ * set).
+ */
+ set_pte(pte, __pte(~val));
+
+ /*
+ * If the page was protected (non-present) and we're making it
+ * present, there is no need to flush the TLB at all.
+ */
+ if (!protect)
+ return true;
+
/*
* We need to avoid IPIs, as we may get KFENCE allocations or faults
* with interrupts disabled. Therefore, the below is best-effort, and
@@ -53,11 +77,6 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
* lazy fault handling takes care of faults after the page is PRESENT.
*/
- if (protect)
- set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
- else
- set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
-
/*
* Flush this CPU's TLB, assuming whoever did the allocation/free is
* likely to continue running on this CPU.
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index f5ff6e84a9b7..17b5f3415465 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -74,10 +74,10 @@ struct mm_struct efi_mm = {
.page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
.user_ns = &init_user_ns,
- .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
#ifdef CONFIG_SCHED_MM_CID
.mm_cid.lock = __RAW_SPIN_LOCK_UNLOCKED(efi_mm.mm_cid.lock),
#endif
+ .flexible_array = MM_STRUCT_FLEXIBLE_ARRAY_INIT,
};
struct workqueue_struct *efi_rts_wq;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6800886c4d10..baa2f2141146 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2750,8 +2750,13 @@ static void wait_sb_inodes(struct super_block *sb)
* The mapping can appear untagged while still on-list since we
* do not have the mapping lock. Skip it here, wb completion
* will remove it.
+ *
+ * If the mapping does not have data integrity semantics,
+ * there's no need to wait for the writeout to complete, as the
+ * mapping cannot guarantee that data is persistently stored.
*/
- if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK) ||
+ mapping_no_data_integrity(mapping))
continue;
spin_unlock_irq(&sb->s_inode_wblist_lock);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 01bc894e9c2b..3b2a171e652f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3200,8 +3200,10 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)
inode->i_fop = &fuse_file_operations;
inode->i_data.a_ops = &fuse_file_aops;
- if (fc->writeback_cache)
+ if (fc->writeback_cache) {
mapping_set_writeback_may_deadlock_on_reclaim(&inode->i_data);
+ mapping_set_no_data_integrity(&inode->i_data);
+ }
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 1fff717cae51..4d679d2a206b 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -46,7 +46,8 @@
*
* The mmu_gather API consists of:
*
- * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu()
+ * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_gather_mmu_vma() /
+ * tlb_finish_mmu()
*
* start and finish a mmu_gather
*
@@ -364,6 +365,20 @@ struct mmu_gather {
unsigned int vma_huge : 1;
unsigned int vma_pfn : 1;
+ /*
+ * Did we unshare (unmap) any shared page tables? For now only
+ * used for hugetlb PMD table sharing.
+ */
+ unsigned int unshared_tables : 1;
+
+ /*
+ * Did we unshare any page tables such that they are now exclusive
+ * and could get reused+modified by the new owner? When setting this
+ * flag, "unshared_tables" will be set as well. For now only used
+ * for hugetlb PMD table sharing.
+ */
+ unsigned int fully_unshared_tables : 1;
+
unsigned int batch_count;
#ifndef CONFIG_MMU_GATHER_NO_GATHER
@@ -400,6 +415,7 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
tlb->cleared_pmds = 0;
tlb->cleared_puds = 0;
tlb->cleared_p4ds = 0;
+ tlb->unshared_tables = 0;
/*
* Do not reset mmu_gather::vma_* fields here, we do not
* call into tlb_start_vma() again to set them if there is an
@@ -484,7 +500,7 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
* these bits.
*/
if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
- tlb->cleared_puds || tlb->cleared_p4ds))
+ tlb->cleared_puds || tlb->cleared_p4ds || tlb->unshared_tables))
return;
tlb_flush(tlb);
@@ -773,6 +789,63 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
}
#endif
+#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
+static inline void tlb_unshare_pmd_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt,
+ unsigned long addr)
+{
+ /*
+ * The caller must make sure that concurrent unsharing + exclusive
+ * reuse is impossible until tlb_flush_unshared_tables() was called.
+ */
+ VM_WARN_ON_ONCE(!ptdesc_pmd_is_shared(pt));
+ ptdesc_pmd_pts_dec(pt);
+
+ /* Clearing a PUD pointing at a PMD table with PMD leaves. */
+ tlb_flush_pmd_range(tlb, addr & PUD_MASK, PUD_SIZE);
+
+ /*
+ * If the page table is now exclusively owned, we fully unshared
+ * a page table.
+ */
+ if (!ptdesc_pmd_is_shared(pt))
+ tlb->fully_unshared_tables = true;
+ tlb->unshared_tables = true;
+}
+
+static inline void tlb_flush_unshared_tables(struct mmu_gather *tlb)
+{
+ /*
+ * As soon as the caller drops locks to allow for reuse of
+ * previously-shared tables, these tables could get modified and
+ * even reused outside of hugetlb context, so we have to make sure that
+ * any page table walkers (incl. TLB, GUP-fast) are aware of that
+ * change.
+ *
+ * Even if we are not fully unsharing a PMD table, we must
+ * flush the TLB for the unsharer now.
+ */
+ if (tlb->unshared_tables)
+ tlb_flush_mmu_tlbonly(tlb);
+
+ /*
+ * Similarly, we must make sure that concurrent GUP-fast will not
+ * walk previously-shared page tables that are getting modified+reused
+ * elsewhere. So broadcast an IPI to wait for any concurrent GUP-fast.
+ *
+ * We only perform this when we are the last sharer of a page table,
+ * as the IPI will reach all CPUs: any GUP-fast.
+ *
+ * Note that on configs where tlb_remove_table_sync_one() is a NOP,
+ * the expectation is that the tlb_flush_mmu_tlbonly() would have issued
+ * required IPIs already for us.
+ */
+ if (tlb->fully_unshared_tables) {
+ tlb_remove_table_sync_one();
+ tlb->fully_unshared_tables = false;
+ }
+}
+#endif /* CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING */
+
#endif /* CONFIG_MMU */
#endif /* _ASM_GENERIC__TLB_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 019a1c5281e4..e51b8ef0cebd 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -240,8 +240,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
unsigned long hugetlb_mask_last_page(struct hstate *h);
-int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep);
+int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
@@ -300,13 +301,17 @@ static inline struct address_space *hugetlb_folio_mapping_lock_write(
return NULL;
}
-static inline int huge_pmd_unshare(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+static inline int huge_pmd_unshare(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
return 0;
}
+static inline void huge_pmd_unshare_flush(struct mmu_gather *tlb,
+ struct vm_area_struct *vma)
+{
+}
+
static inline void adjust_range_if_pmd_sharing_possible(
struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
@@ -1326,7 +1331,7 @@ static inline __init void hugetlb_cma_reserve(int order)
#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
static inline bool hugetlb_pmd_shared(pte_t *pte)
{
- return page_count(virt_to_page(pte)) > 1;
+ return ptdesc_pmd_is_shared(virt_to_ptdesc(pte));
}
#else
static inline bool hugetlb_pmd_shared(pte_t *pte)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6f959d8ca4b4..f0d5be9dc736 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -608,7 +608,11 @@ enum {
/*
* Flags which should result in page tables being copied on fork. These are
* flags which indicate that the VMA maps page tables which cannot be
- * reconsistuted upon page fault, so necessitate page table copying upon
+ * reconsistuted upon page fault, so necessitate page table copying upon fork.
+ *
+ * Note that these flags should be compared with the DESTINATION VMA not the
+ * source, as VM_UFFD_WP may not be propagated to destination, while all other
+ * flags will be.
*
* VM_PFNMAP / VM_MIXEDMAP - These contain kernel-mapped data which cannot be
* reasonably reconstructed on page fault.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 42af2292951d..78950eb8926d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1329,7 +1329,7 @@ struct mm_struct {
* The mm_cpumask needs to be at the end of mm_struct, because it
* is dynamically sized based on nr_cpu_ids.
*/
- unsigned long cpu_bitmap[];
+ char flexible_array[] __aligned(__alignof__(unsigned long));
};
/* Copy value to the first system word of mm flags, non-atomically. */
@@ -1366,19 +1366,24 @@ static inline void __mm_flags_set_mask_bits_word(struct mm_struct *mm,
MT_FLAGS_USE_RCU)
extern struct mm_struct init_mm;
+#define MM_STRUCT_FLEXIBLE_ARRAY_INIT \
+{ \
+ [0 ... sizeof(cpumask_t) + MM_CID_STATIC_SIZE - 1] = 0 \
+}
+
/* Pointer magic because the dynamic array size confuses some compilers. */
static inline void mm_init_cpumask(struct mm_struct *mm)
{
unsigned long cpu_bitmap = (unsigned long)mm;
- cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap);
+ cpu_bitmap += offsetof(struct mm_struct, flexible_array);
cpumask_clear((struct cpumask *)cpu_bitmap);
}
/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
{
- return (struct cpumask *)&mm->cpu_bitmap;
+ return (struct cpumask *)&mm->flexible_array;
}
#ifdef CONFIG_LRU_GEN
@@ -1469,7 +1474,7 @@ static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm)
{
unsigned long bitmap = (unsigned long)mm;
- bitmap += offsetof(struct mm_struct, cpu_bitmap);
+ bitmap += offsetof(struct mm_struct, flexible_array);
/* Skip cpu_bitmap */
bitmap += cpumask_size();
return (struct cpumask *)bitmap;
@@ -1495,7 +1500,7 @@ static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *
mm_init_cid(mm, p);
return 0;
}
-#define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__))
+# define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__))
static inline void mm_destroy_cid(struct mm_struct *mm)
{
@@ -1509,6 +1514,8 @@ static inline unsigned int mm_cid_size(void)
return cpumask_size() + bitmap_size(num_possible_cpus());
}
+/* Use 2 * NR_CPUS as worse case for static allocation. */
+# define MM_CID_STATIC_SIZE (2 * sizeof(cpumask_t))
#else /* CONFIG_SCHED_MM_CID */
static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { }
static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; }
@@ -1517,11 +1524,13 @@ static inline unsigned int mm_cid_size(void)
{
return 0;
}
+# define MM_CID_STATIC_SIZE 0
#endif /* CONFIG_SCHED_MM_CID */
struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
+void tlb_gather_mmu_vma(struct mmu_gather *tlb, struct vm_area_struct *vma);
extern void tlb_finish_mmu(struct mmu_gather *tlb);
struct vm_fault;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 31a848485ad9..ec442af3f886 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -210,6 +210,7 @@ enum mapping_flags {
AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9,
AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't
account usage to user cgroups */
+ AS_NO_DATA_INTEGRITY = 11, /* no data integrity guarantees */
/* Bits 16-25 are used for FOLIO_ORDER */
AS_FOLIO_ORDER_BITS = 5,
AS_FOLIO_ORDER_MIN = 16,
@@ -345,6 +346,16 @@ static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct addres
return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
}
+static inline void mapping_set_no_data_integrity(struct address_space *mapping)
+{
+ set_bit(AS_NO_DATA_INTEGRITY, &mapping->flags);
+}
+
+static inline bool mapping_no_data_integrity(const struct address_space *mapping)
+{
+ return test_bit(AS_NO_DATA_INTEGRITY, &mapping->flags);
+}
+
static inline gfp_t mapping_gfp_mask(const struct address_space *mapping)
{
return mapping->gfp_mask;
diff --git a/kernel/panic.c b/kernel/panic.c
index 0d52210a9e2b..0c20fcaae98a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -131,7 +131,8 @@ static int proc_taint(const struct ctl_table *table, int write,
static int sysctl_panic_print_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- panic_print_deprecated();
+ if (write)
+ panic_print_deprecated();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
@@ -1014,7 +1015,6 @@ static int panic_print_set(const char *val, const struct kernel_param *kp)
static int panic_print_get(char *val, const struct kernel_param *kp)
{
- panic_print_deprecated();
return param_get_ulong(val, kp);
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e0ab14020513..a1832da0f623 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5112,7 +5112,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
unsigned long last_addr_mask;
pte_t *src_pte, *dst_pte;
struct mmu_notifier_range range;
- bool shared_pmd = false;
+ struct mmu_gather tlb;
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, old_addr,
old_end);
@@ -5122,6 +5122,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
* range.
*/
flush_cache_range(vma, range.start, range.end);
+ tlb_gather_mmu_vma(&tlb, vma);
mmu_notifier_invalidate_range_start(&range);
last_addr_mask = hugetlb_mask_last_page(h);
@@ -5138,8 +5139,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
if (huge_pte_none(huge_ptep_get(mm, old_addr, src_pte)))
continue;
- if (huge_pmd_unshare(mm, vma, old_addr, src_pte)) {
- shared_pmd = true;
+ if (huge_pmd_unshare(&tlb, vma, old_addr, src_pte)) {
old_addr |= last_addr_mask;
new_addr |= last_addr_mask;
continue;
@@ -5150,15 +5150,16 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
break;
move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte, sz);
+ tlb_remove_huge_tlb_entry(h, &tlb, src_pte, old_addr);
}
- if (shared_pmd)
- flush_hugetlb_tlb_range(vma, range.start, range.end);
- else
- flush_hugetlb_tlb_range(vma, old_end - len, old_end);
+ tlb_flush_mmu_tlbonly(&tlb);
+ huge_pmd_unshare_flush(&tlb, vma);
+
mmu_notifier_invalidate_range_end(&range);
i_mmap_unlock_write(mapping);
hugetlb_vma_unlock_write(vma);
+ tlb_finish_mmu(&tlb);
return len + old_addr - old_end;
}
@@ -5177,7 +5178,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long sz = huge_page_size(h);
bool adjust_reservation;
unsigned long last_addr_mask;
- bool force_flush = false;
WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON(start & ~huge_page_mask(h));
@@ -5200,10 +5200,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
}
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, vma, address, ptep)) {
+ if (huge_pmd_unshare(tlb, vma, address, ptep)) {
spin_unlock(ptl);
- tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
- force_flush = true;
address |= last_addr_mask;
continue;
}
@@ -5319,21 +5317,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
}
tlb_end_vma(tlb, vma);
- /*
- * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We
- * could defer the flush until now, since by holding i_mmap_rwsem we
- * guaranteed that the last reference would not be dropped. But we must
- * do the flushing before we return, as otherwise i_mmap_rwsem will be
- * dropped and the last reference to the shared PMDs page might be
- * dropped as well.
- *
- * In theory we could defer the freeing of the PMD pages as well, but
- * huge_pmd_unshare() relies on the exact page_count for the PMD page to
- * detect sharing, so we cannot defer the release of the page either.
- * Instead, do flush now.
- */
- if (force_flush)
- tlb_flush_mmu_tlbonly(tlb);
+ huge_pmd_unshare_flush(tlb, vma);
}
void __hugetlb_zap_begin(struct vm_area_struct *vma,
@@ -6432,11 +6416,11 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
pte_t pte;
struct hstate *h = hstate_vma(vma);
long pages = 0, psize = huge_page_size(h);
- bool shared_pmd = false;
struct mmu_notifier_range range;
unsigned long last_addr_mask;
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+ struct mmu_gather tlb;
/*
* In the case of shared PMDs, the area to flush could be beyond
@@ -6449,6 +6433,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
BUG_ON(address >= end);
flush_cache_range(vma, range.start, range.end);
+ tlb_gather_mmu_vma(&tlb, vma);
mmu_notifier_invalidate_range_start(&range);
hugetlb_vma_lock_write(vma);
@@ -6475,7 +6460,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
}
}
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, vma, address, ptep)) {
+ if (huge_pmd_unshare(&tlb, vma, address, ptep)) {
/*
* When uffd-wp is enabled on the vma, unshare
* shouldn't happen at all. Warn about it if it
@@ -6484,7 +6469,6 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
WARN_ON_ONCE(uffd_wp || uffd_wp_resolve);
pages++;
spin_unlock(ptl);
- shared_pmd = true;
address |= last_addr_mask;
continue;
}
@@ -6545,23 +6529,16 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
pte = huge_pte_clear_uffd_wp(pte);
huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
pages++;
+ tlb_remove_huge_tlb_entry(h, &tlb, ptep, address);
}
next:
spin_unlock(ptl);
cond_resched();
}
- /*
- * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
- * may have cleared our pud entry and done put_page on the page table:
- * once we release i_mmap_rwsem, another task can do the final put_page
- * and that page table be reused and filled with junk. If we actually
- * did unshare a page of pmds, flush the range corresponding to the pud.
- */
- if (shared_pmd)
- flush_hugetlb_tlb_range(vma, range.start, range.end);
- else
- flush_hugetlb_tlb_range(vma, start, end);
+
+ tlb_flush_mmu_tlbonly(&tlb);
+ huge_pmd_unshare_flush(&tlb, vma);
/*
* No need to call mmu_notifier_arch_invalidate_secondary_tlbs() we are
* downgrading page table protection not changing it to point to a new
@@ -6572,6 +6549,7 @@ next:
i_mmap_unlock_write(vma->vm_file->f_mapping);
hugetlb_vma_unlock_write(vma);
mmu_notifier_invalidate_range_end(&range);
+ tlb_finish_mmu(&tlb);
return pages > 0 ? (pages << h->order) : pages;
}
@@ -6928,18 +6906,27 @@ out:
return pte;
}
-/*
- * unmap huge page backed by shared pte.
+/**
+ * huge_pmd_unshare - Unmap a pmd table if it is shared by multiple users
+ * @tlb: the current mmu_gather.
+ * @vma: the vma covering the pmd table.
+ * @addr: the address we are trying to unshare.
+ * @ptep: pointer into the (pmd) page table.
*
- * Called with page table lock held.
+ * Called with the page table lock held, the i_mmap_rwsem held in write mode
+ * and the hugetlb vma lock held in write mode.
*
- * returns: 1 successfully unmapped a shared pte page
- * 0 the underlying pte page is not shared, or it is the last user
+ * Note: The caller must call huge_pmd_unshare_flush() before dropping the
+ * i_mmap_rwsem.
+ *
+ * Returns: 1 if it was a shared PMD table and it got unmapped, or 0 if it
+ * was not a shared PMD table.
*/
-int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
{
unsigned long sz = huge_page_size(hstate_vma(vma));
+ struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd = pgd_offset(mm, addr);
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud = pud_offset(p4d, addr);
@@ -6951,18 +6938,36 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
i_mmap_assert_write_locked(vma->vm_file->f_mapping);
hugetlb_vma_assert_locked(vma);
pud_clear(pud);
- /*
- * Once our caller drops the rmap lock, some other process might be
- * using this page table as a normal, non-hugetlb page table.
- * Wait for pending gup_fast() in other threads to finish before letting
- * that happen.
- */
- tlb_remove_table_sync_one();
- ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep));
+
+ tlb_unshare_pmd_ptdesc(tlb, virt_to_ptdesc(ptep), addr);
+
mm_dec_nr_pmds(mm);
return 1;
}
+/*
+ * huge_pmd_unshare_flush - Complete a sequence of huge_pmd_unshare() calls
+ * @tlb: the current mmu_gather.
+ * @vma: the vma covering the pmd table.
+ *
+ * Perform necessary TLB flushes or IPI broadcasts to synchronize PMD table
+ * unsharing with concurrent page table walkers.
+ *
+ * This function must be called after a sequence of huge_pmd_unshare()
+ * calls while still holding the i_mmap_rwsem.
+ */
+void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+ /*
+ * We must synchronize page table unsharing such that nobody will
+ * try reusing a previously-shared page table while it might still
+ * be in use by previous sharers (TLB, GUP_fast).
+ */
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+
+ tlb_flush_unshared_tables(tlb);
+}
+
#else /* !CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING */
pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -6971,12 +6976,16 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
return NULL;
}
-int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
{
return 0;
}
+void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+}
+
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
@@ -7243,6 +7252,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long sz = huge_page_size(h);
struct mm_struct *mm = vma->vm_mm;
struct mmu_notifier_range range;
+ struct mmu_gather tlb;
unsigned long address;
spinlock_t *ptl;
pte_t *ptep;
@@ -7254,6 +7264,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
return;
flush_cache_range(vma, start, end);
+ tlb_gather_mmu_vma(&tlb, vma);
+
/*
* No need to call adjust_range_if_pmd_sharing_possible(), because
* we have already done the PUD_SIZE alignment.
@@ -7272,10 +7284,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);
- huge_pmd_unshare(mm, vma, address, ptep);
+ huge_pmd_unshare(&tlb, vma, address, ptep);
spin_unlock(ptl);
}
- flush_hugetlb_tlb_range(vma, start, end);
+ huge_pmd_unshare_flush(&tlb, vma);
if (take_locks) {
i_mmap_unlock_write(vma->vm_file->f_mapping);
hugetlb_vma_unlock_write(vma);
@@ -7285,6 +7297,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
* Documentation/mm/mmu_notifier.rst.
*/
mmu_notifier_invalidate_range_end(&range);
+ tlb_finish_mmu(&tlb);
}
/*
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 4600e7605cab..c5556bb9d5f0 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -44,7 +44,10 @@ struct mm_struct init_mm = {
.mm_lock_seq = SEQCNT_ZERO(init_mm.mm_lock_seq),
#endif
.user_ns = &init_user_ns,
- .cpu_bitmap = CPU_BITS_NONE,
+#ifdef CONFIG_SCHED_MM_CID
+ .mm_cid.lock = __RAW_SPIN_LOCK_UNLOCKED(init_mm.mm_cid.lock),
+#endif
+ .flexible_array = MM_STRUCT_FLEXIBLE_ARRAY_INIT,
INIT_MM_CONTEXT(init_mm)
};
diff --git a/mm/internal.h b/mm/internal.h
index e430da900430..f35dbcf99a86 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -538,16 +538,8 @@ extern unsigned long highest_memmap_pfn;
bool folio_isolate_lru(struct folio *folio);
void folio_putback_lru(struct folio *folio);
extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);
-#ifdef CONFIG_NUMA
int user_proactive_reclaim(char *buf,
struct mem_cgroup *memcg, pg_data_t *pgdat);
-#else
-static inline int user_proactive_reclaim(char *buf,
- struct mem_cgroup *memcg, pg_data_t *pgdat)
-{
- return 0;
-}
-#endif
/*
* in mm/rmap.c:
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 577a1699c553..da0f5b6f5744 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -823,6 +823,9 @@ static struct notifier_block kfence_check_canary_notifier = {
static struct delayed_work kfence_timer;
#ifdef CONFIG_KFENCE_STATIC_KEYS
+/* Wait queue to wake up allocation-gate timer task. */
+static DECLARE_WAIT_QUEUE_HEAD(allocation_wait);
+
static int kfence_reboot_callback(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -832,7 +835,12 @@ static int kfence_reboot_callback(struct notifier_block *nb,
*/
WRITE_ONCE(kfence_enabled, false);
/* Cancel any pending timer work */
- cancel_delayed_work_sync(&kfence_timer);
+ cancel_delayed_work(&kfence_timer);
+ /*
+ * Wake up any blocked toggle_allocation_gate() so it can complete
+ * early while the system is still able to handle IPIs.
+ */
+ wake_up(&allocation_wait);
return NOTIFY_OK;
}
@@ -842,9 +850,6 @@ static struct notifier_block kfence_reboot_notifier = {
.priority = INT_MAX, /* Run early to stop timers ASAP */
};
-/* Wait queue to wake up allocation-gate timer task. */
-static DECLARE_WAIT_QUEUE_HEAD(allocation_wait);
-
static void wake_up_kfence_timer(struct irq_work *work)
{
wake_up(&allocation_wait);
@@ -873,7 +878,9 @@ static void toggle_allocation_gate(struct work_struct *work)
/* Enable static key, and await allocation to happen. */
static_branch_enable(&kfence_allocation_key);
- wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate) > 0);
+ wait_event_idle(allocation_wait,
+ atomic_read(&kfence_allocation_gate) > 0 ||
+ !READ_ONCE(kfence_enabled));
/* Disable static key and reset timer. */
static_branch_disable(&kfence_allocation_key);
diff --git a/mm/memory.c b/mm/memory.c
index 2a55edc48a65..da360a6eb8a4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1465,7 +1465,11 @@ copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
static bool
vma_needs_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
{
- if (src_vma->vm_flags & VM_COPY_ON_FORK)
+ /*
+ * We check against dst_vma as while sane VMA flags will have been
+ * copied, VM_UFFD_WP may be set only on dst_vma.
+ */
+ if (dst_vma->vm_flags & VM_COPY_ON_FORK)
return true;
/*
* The presence of an anon_vma indicates an anonymous VMA has page
@@ -1963,10 +1967,9 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
do {
next = pud_addr_end(addr, end);
if (pud_trans_huge(*pud)) {
- if (next - addr != HPAGE_PUD_SIZE) {
- mmap_assert_locked(tlb->mm);
+ if (next - addr != HPAGE_PUD_SIZE)
split_huge_pud(vma, pud, addr);
- } else if (zap_huge_pud(tlb, vma, pud, addr))
+ else if (zap_huge_pud(tlb, vma, pud, addr))
goto next;
/* fall through */
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 5169f9717f60..4688b9e38cd2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1458,6 +1458,7 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
int page_was_mapped = 0;
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
+ enum ttu_flags ttu = 0;
if (folio_ref_count(src) == 1) {
/* page was freed from under us. So we are done. */
@@ -1498,8 +1499,6 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
goto put_anon;
if (folio_mapped(src)) {
- enum ttu_flags ttu = 0;
-
if (!folio_test_anon(src)) {
/*
* In shared mappings, try_to_unmap could potentially
@@ -1516,16 +1515,17 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
try_to_migrate(src, ttu);
page_was_mapped = 1;
-
- if (ttu & TTU_RMAP_LOCKED)
- i_mmap_unlock_write(mapping);
}
if (!folio_mapped(src))
rc = move_to_new_folio(dst, src, mode);
if (page_was_mapped)
- remove_migration_ptes(src, !rc ? dst : src, 0);
+ remove_migration_ptes(src, !rc ? dst : src,
+ ttu ? RMP_LOCKED : 0);
+
+ if (ttu & TTU_RMAP_LOCKED)
+ i_mmap_unlock_write(mapping);
unlock_put_anon:
folio_unlock(dst);
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 247e3f9db6c7..7468ec388455 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -10,6 +10,7 @@
#include <linux/swap.h>
#include <linux/rmap.h>
#include <linux/pgalloc.h>
+#include <linux/hugetlb.h>
#include <asm/tlb.h>
@@ -426,6 +427,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
#endif
tlb->vma_pfn = 0;
+ tlb->fully_unshared_tables = 0;
__tlb_reset_range(tlb);
inc_tlb_flush_pending(tlb->mm);
}
@@ -460,6 +462,31 @@ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
}
/**
+ * tlb_gather_mmu_vma - initialize an mmu_gather structure for operating on a
+ * single VMA
+ * @tlb: the mmu_gather structure to initialize
+ * @vma: the vm_area_struct
+ *
+ * Called to initialize an (on-stack) mmu_gather structure for operating on
+ * a single VMA. In contrast to tlb_gather_mmu(), calling this function will
+ * not require another call to tlb_start_vma(). In contrast to tlb_start_vma(),
+ * this function will *not* call flush_cache_range().
+ *
+ * For hugetlb VMAs, this function will also initialize the mmu_gather
+ * page_size accordingly, not requiring a separate call to
+ * tlb_change_page_size().
+ *
+ */
+void tlb_gather_mmu_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+ tlb_gather_mmu(tlb, vma->vm_mm);
+ tlb_update_vma_flags(tlb, vma);
+ if (is_vm_hugetlb_page(vma))
+ /* All entries have the same size. */
+ tlb_change_page_size(tlb, huge_page_size(hstate_vma(vma)));
+}
+
+/**
* tlb_finish_mmu - finish an mmu_gather structure
* @tlb: the mmu_gather structure to finish
*
@@ -469,6 +496,12 @@ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
void tlb_finish_mmu(struct mmu_gather *tlb)
{
/*
+ * We expect an earlier huge_pmd_unshare_flush() call to sort this out,
+ * due to complicated locking requirements with page table unsharing.
+ */
+ VM_WARN_ON_ONCE(tlb->fully_unshared_tables);
+
+ /*
* If there are parallel threads are doing PTE changes on same range
* under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB
* flush by batching, one thread may end up seeing inconsistent PTEs
diff --git a/mm/rmap.c b/mm/rmap.c
index f955f02d570e..7b9879ef442d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -76,7 +76,7 @@
#include <linux/mm_inline.h>
#include <linux/oom.h>
-#include <asm/tlbflush.h>
+#include <asm/tlb.h>
#define CREATE_TRACE_POINTS
#include <trace/events/migrate.h>
@@ -2008,26 +2008,25 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* if unsuccessful.
*/
if (!anon) {
+ struct mmu_gather tlb;
+
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
if (!hugetlb_vma_trylock_write(vma))
goto walk_abort;
- if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
+
+ tlb_gather_mmu_vma(&tlb, vma);
+ if (huge_pmd_unshare(&tlb, vma, address, pvmw.pte)) {
hugetlb_vma_unlock_write(vma);
- flush_tlb_range(vma,
- range.start, range.end);
+ huge_pmd_unshare_flush(&tlb, vma);
+ tlb_finish_mmu(&tlb);
/*
- * The ref count of the PMD page was
- * dropped which is part of the way map
- * counting is done for shared PMDs.
- * Return 'true' here. When there is
- * no other sharing, huge_pmd_unshare
- * returns false and we will unmap the
- * actual page and drop map count
- * to zero.
+ * The PMD table was unmapped,
+ * consequently unmapping the folio.
*/
goto walk_done;
}
hugetlb_vma_unlock_write(vma);
+ tlb_finish_mmu(&tlb);
}
pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
if (pte_dirty(pteval))
@@ -2404,31 +2403,29 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
* fail if unsuccessful.
*/
if (!anon) {
+ struct mmu_gather tlb;
+
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
if (!hugetlb_vma_trylock_write(vma)) {
page_vma_mapped_walk_done(&pvmw);
ret = false;
break;
}
- if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
- hugetlb_vma_unlock_write(vma);
- flush_tlb_range(vma,
- range.start, range.end);
+ tlb_gather_mmu_vma(&tlb, vma);
+ if (huge_pmd_unshare(&tlb, vma, address, pvmw.pte)) {
+ hugetlb_vma_unlock_write(vma);
+ huge_pmd_unshare_flush(&tlb, vma);
+ tlb_finish_mmu(&tlb);
/*
- * The ref count of the PMD page was
- * dropped which is part of the way map
- * counting is done for shared PMDs.
- * Return 'true' here. When there is
- * no other sharing, huge_pmd_unshare
- * returns false and we will unmap the
- * actual page and drop map count
- * to zero.
+ * The PMD table was unmapped,
+ * consequently unmapping the folio.
*/
page_vma_mapped_walk_done(&pvmw);
break;
}
hugetlb_vma_unlock_write(vma);
+ tlb_finish_mmu(&tlb);
}
/* Nuke the hugetlb page table entry */
pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
diff --git a/mm/vma.c b/mm/vma.c
index dc92f3dd8514..7a908a964d18 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -37,6 +37,8 @@ struct mmap_state {
bool check_ksm_early :1;
/* If we map new, hold the file rmap lock on mapping. */
bool hold_file_rmap_lock :1;
+ /* If .mmap_prepare changed the file, we don't need to pin. */
+ bool file_doesnt_need_get :1;
};
#define MMAP_STATE(name, mm_, vmi_, addr_, len_, pgoff_, vm_flags_, file_) \
@@ -2450,7 +2452,9 @@ static int __mmap_new_file_vma(struct mmap_state *map,
struct vma_iterator *vmi = map->vmi;
int error;
- vma->vm_file = get_file(map->file);
+ vma->vm_file = map->file;
+ if (!map->file_doesnt_need_get)
+ get_file(map->file);
if (!map->file->f_op->mmap)
return 0;
@@ -2638,7 +2642,10 @@ static int call_mmap_prepare(struct mmap_state *map,
/* Update fields permitted to be changed. */
map->pgoff = desc->pgoff;
- map->file = desc->vm_file;
+ if (desc->vm_file != map->file) {
+ map->file_doesnt_need_get = true;
+ map->file = desc->vm_file;
+ }
map->vm_flags = desc->vm_flags;
map->page_prot = desc->page_prot;
/* User-defined fields. */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 670fe9fae5ba..614ccf39fe3f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -7707,6 +7707,17 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
return ret;
}
+#else
+
+static unsigned long __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask,
+ unsigned long nr_pages,
+ struct scan_control *sc)
+{
+ return 0;
+}
+
+#endif
+
enum {
MEMORY_RECLAIM_SWAPPINESS = 0,
MEMORY_RECLAIM_SWAPPINESS_MAX,
@@ -7814,8 +7825,6 @@ int user_proactive_reclaim(char *buf,
return 0;
}
-#endif
-
/**
* check_move_unevictable_folios - Move evictable folios to appropriate zone
* lru list