summaryrefslogtreecommitdiff
path: root/arch/x86/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 11:32:37 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-12 11:32:37 -0800
commit4cff5c05e076d2ee4e34122aa956b84a2eaac587 (patch)
tree6717207240b3881d1b48ff7cd86b193506756e6c /arch/x86/include
parent541c43310e85dbf35368b43b720c6724bc8ad8ec (diff)
parentfb4ddf2085115ed28dedc427d9491707b476bbfe (diff)
Merge tag 'mm-stable-2026-02-11-19-22' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - "powerpc/64s: do not re-activate batched TLB flush" makes arch_{enter|leave}_lazy_mmu_mode() nest properly (Alexander Gordeev) It adds a generic enter/leave layer and switches architectures to use it. Various hacks were removed in the process. - "zram: introduce compressed data writeback" implements data compression for zram writeback (Richard Chang and Sergey Senozhatsky) - "mm: folio_zero_user: clear page ranges" adds clearing of contiguous page ranges for hugepages. Large improvements during demand faulting are demonstrated (David Hildenbrand) - "memcg cleanups" tidies up some memcg code (Chen Ridong) - "mm/damon: introduce {,max_}nr_snapshots and tracepoint for damos stats" improves DAMOS stat's provided information, deterministic control, and readability (SeongJae Park) - "selftests/mm: hugetlb cgroup charging: robustness fixes" fixes a few issues in the hugetlb cgroup charging selftests (Li Wang) - "Fix va_high_addr_switch.sh test failure - again" addresses several issues in the va_high_addr_switch test (Chunyu Hu) - "mm/damon/tests/core-kunit: extend existing test scenarios" improves the KUnit test coverage for DAMON (Shu Anzai) - "mm/khugepaged: fix dirty page handling for MADV_COLLAPSE" fixes a glitch in khugepaged which was causing madvise(MADV_COLLAPSE) to transiently return -EAGAIN (Shivank Garg) - "arch, mm: consolidate hugetlb early reservation" reworks and consolidates a pile of straggly code related to reservation of hugetlb memory from bootmem and creation of CMA areas for hugetlb (Mike Rapoport) - "mm: clean up anon_vma implementation" cleans up the anon_vma implementation in various ways (Lorenzo Stoakes) - "tweaks for __alloc_pages_slowpath()" does a little streamlining of the page allocator's slowpath code (Vlastimil Babka) - "memcg: separate private and public ID namespaces" cleans up the memcg ID code and prevents the internal-only private IDs from being exposed to userspace (Shakeel Butt) - "mm: hugetlb: allocate frozen gigantic folio" cleans up the allocation of frozen folios and avoids some atomic refcount operations (Kefeng Wang) - "mm/damon: advance DAMOS-based LRU sorting" improves DAMOS's movement of memory betewwn the active and inactive LRUs and adds auto-tuning of the ratio-based quotas and of monitoring intervals (SeongJae Park) - "Support page table check on PowerPC" makes CONFIG_PAGE_TABLE_CHECK_ENFORCED work on powerpc (Andrew Donnellan) - "nodemask: align nodes_and{,not} with underlying bitmap ops" makes nodes_and() and nodes_andnot() propagate the return values from the underlying bit operations, enabling some cleanup in calling code (Yury Norov) - "mm/damon: hide kdamond and kdamond_lock from API callers" cleans up some DAMON internal interfaces (SeongJae Park) - "mm/khugepaged: cleanups and scan limit fix" does some cleanup work in khupaged and fixes a scan limit accounting issue (Shivank Garg) - "mm: balloon infrastructure cleanups" goes to town on the balloon infrastructure and its page migration function. Mainly cleanups, also some locking simplification (David Hildenbrand) - "mm/vmscan: add tracepoint and reason for kswapd_failures reset" adds additional tracepoints to the page reclaim code (Jiayuan Chen) - "Replace wq users and add WQ_PERCPU to alloc_workqueue() users" is part of Marco's kernel-wide migration from the legacy workqueue APIs over to the preferred unbound workqueues (Marco Crivellari) - "Various mm kselftests improvements/fixes" provides various unrelated improvements/fixes for the mm kselftests (Kevin Brodsky) - "mm: accelerate gigantic folio allocation" greatly speeds up gigantic folio allocation, mainly by avoiding unnecessary work in pfn_range_valid_contig() (Kefeng Wang) - "selftests/damon: improve leak detection and wss estimation reliability" improves the reliability of two of the DAMON selftests (SeongJae Park) - "mm/damon: cleanup kdamond, damon_call(), damos filter and DAMON_MIN_REGION" does some cleanup work in the core DAMON code (SeongJae Park) - "Docs/mm/damon: update intro, modules, maintainer profile, and misc" performs maintenance work on the DAMON documentation (SeongJae Park) - "mm: add and use vma_assert_stabilised() helper" refactors and cleans up the core VMA code. The main aim here is to be able to use the mmap write lock's lockdep state to perform various assertions regarding the locking which the VMA code requires (Lorenzo Stoakes) - "mm, swap: swap table phase II: unify swapin use" removes some old swap code (swap cache bypassing and swap synchronization) which wasn't working very well. Various other cleanups and simplifications were made. The end result is a 20% speedup in one benchmark (Kairui Song) - "enable PT_RECLAIM on more 64-bit architectures" makes PT_RECLAIM available on 64-bit alpha, loongarch, mips, parisc, and um. Various cleanups were performed along the way (Qi Zheng) * tag 'mm-stable-2026-02-11-19-22' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (325 commits) mm/memory: handle non-split locks correctly in zap_empty_pte_table() mm: move pte table reclaim code to memory.c mm: make PT_RECLAIM depends on MMU_GATHER_RCU_TABLE_FREE mm: convert __HAVE_ARCH_TLB_REMOVE_TABLE to CONFIG_HAVE_ARCH_TLB_REMOVE_TABLE config um: mm: enable MMU_GATHER_RCU_TABLE_FREE parisc: mm: enable MMU_GATHER_RCU_TABLE_FREE mips: mm: enable MMU_GATHER_RCU_TABLE_FREE LoongArch: mm: enable MMU_GATHER_RCU_TABLE_FREE alpha: mm: enable MMU_GATHER_RCU_TABLE_FREE mm: change mm/pt_reclaim.c to use asm/tlb.h instead of asm-generic/tlb.h mm/damon/stat: remove __read_mostly from memory_idle_ms_percentiles zsmalloc: make common caches global mm: add SPDX id lines to some mm source files mm/zswap: use %pe to print error pointers mm/vmscan: use %pe to print error pointers mm/readahead: fix typo in comment mm: khugepaged: fix NR_FILE_PAGES and NR_SHMEM in collapse_file() mm: refactor vma_map_pages to use vm_insert_pages mm/damon: unify address range representation with damon_addr_range mm/cma: replace snprintf with strscpy in cma_new_area ...
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/page.h6
-rw-r--r--arch/x86/include/asm/page_32.h6
-rw-r--r--arch/x86/include/asm/page_64.h78
-rw-r--r--arch/x86/include/asm/paravirt.h1
-rw-r--r--arch/x86/include/asm/pgtable.h23
-rw-r--r--arch/x86/include/asm/thread_info.h4
6 files changed, 80 insertions, 38 deletions
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 9265f2fca99a..416dc88e35c1 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -22,12 +22,6 @@ struct page;
extern struct range pfn_mapped[];
extern int nr_pfn_mapped;
-static inline void clear_user_page(void *page, unsigned long vaddr,
- struct page *pg)
-{
- clear_page(page);
-}
-
static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
struct page *topage)
{
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index 0c623706cb7e..19fddb002cc9 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -17,6 +17,12 @@ extern unsigned long __phys_addr(unsigned long);
#include <linux/string.h>
+/**
+ * clear_page() - clear a page using a kernel virtual address.
+ * @page: address of kernel page
+ *
+ * Does absolutely no exception handling.
+ */
static inline void clear_page(void *page)
{
memset(page, 0, PAGE_SIZE);
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 2f0e47be79a4..1895c207f629 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -48,26 +48,70 @@ static inline unsigned long __phys_addr_symbol(unsigned long x)
#define __phys_reloc_hide(x) (x)
-void clear_page_orig(void *page);
-void clear_page_rep(void *page);
-void clear_page_erms(void *page);
-KCFI_REFERENCE(clear_page_orig);
-KCFI_REFERENCE(clear_page_rep);
-KCFI_REFERENCE(clear_page_erms);
-
-static inline void clear_page(void *page)
+void __clear_pages_unrolled(void *page);
+KCFI_REFERENCE(__clear_pages_unrolled);
+
+/**
+ * clear_pages() - clear a page range using a kernel virtual address.
+ * @addr: start address of kernel page range
+ * @npages: number of pages
+ *
+ * Switch between three implementations of page clearing based on CPU
+ * capabilities:
+ *
+ * - __clear_pages_unrolled(): the oldest, slowest and universally
+ * supported method. Zeroes via 8-byte MOV instructions unrolled 8x
+ * to write a 64-byte cacheline in each loop iteration.
+ *
+ * - "REP; STOSQ": really old CPUs had crummy REP implementations.
+ * Vendor CPU setup code sets 'REP_GOOD' on CPUs where REP can be
+ * trusted. The instruction writes 8-byte per REP iteration but
+ * CPUs can internally batch these together and do larger writes.
+ *
+ * - "REP; STOSB": used on CPUs with "enhanced REP MOVSB/STOSB",
+ * which enumerate 'ERMS' and provide an implementation which
+ * unlike "REP; STOSQ" above wasn't overly picky about alignment.
+ * The instruction writes 1-byte per REP iteration with CPUs
+ * internally batching these together into larger writes and is
+ * generally fastest of the three.
+ *
+ * Note that when running as a guest, features exposed by the CPU
+ * might be mediated by the hypervisor. So, the STOSQ variant might
+ * be in active use on some systems even when the hardware enumerates
+ * ERMS.
+ *
+ * Does absolutely no exception handling.
+ */
+static inline void clear_pages(void *addr, unsigned int npages)
{
+ u64 len = npages * PAGE_SIZE;
+ /*
+ * Clean up KMSAN metadata for the pages being cleared. The assembly call
+ * below clobbers @addr, so perform unpoisoning before it.
+ */
+ kmsan_unpoison_memory(addr, len);
+
/*
- * Clean up KMSAN metadata for the page being cleared. The assembly call
- * below clobbers @page, so we perform unpoisoning before it.
+ * The inline asm embeds a CALL instruction and usually that is a no-no
+ * due to the compiler not knowing that and thus being unable to track
+ * callee-clobbered registers.
+ *
+ * In this case that is fine because the registers clobbered by
+ * __clear_pages_unrolled() are part of the inline asm register
+ * specification.
*/
- kmsan_unpoison_memory(page, PAGE_SIZE);
- alternative_call_2(clear_page_orig,
- clear_page_rep, X86_FEATURE_REP_GOOD,
- clear_page_erms, X86_FEATURE_ERMS,
- "=D" (page),
- "D" (page),
- "cc", "memory", "rax", "rcx");
+ asm volatile(ALTERNATIVE_2("call __clear_pages_unrolled",
+ "shrq $3, %%rcx; rep stosq", X86_FEATURE_REP_GOOD,
+ "rep stosb", X86_FEATURE_ERMS)
+ : "+c" (len), "+D" (addr), ASM_CALL_CONSTRAINT
+ : "a" (0)
+ : "cc", "memory");
+}
+#define clear_pages clear_pages
+
+static inline void clear_page(void *addr)
+{
+ clear_pages(addr, 1);
}
void copy_page(void *to, void *from);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 3d0b92a8a557..fcf8ab50948a 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -492,7 +492,6 @@ static inline void arch_end_context_switch(struct task_struct *next)
PVOP_VCALL1(pv_ops, cpu.end_context_switch, next);
}
-#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
static inline void arch_enter_lazy_mmu_mode(void)
{
PVOP_VCALL0(pv_ops, mmu.lazy_mode.enter);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index e33df3da6980..1662c5a8f445 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -118,6 +118,7 @@ extern pmdval_t early_pmd_flags;
#define __pte(x) native_make_pte(x)
#define arch_end_context_switch(prev) do {} while(0)
+static inline void arch_flush_lazy_mmu_mode(void) {}
#endif /* CONFIG_PARAVIRT_XXL */
static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
@@ -1213,14 +1214,14 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
- page_table_check_pmd_set(mm, pmdp, pmd);
+ page_table_check_pmd_set(mm, addr, pmdp, pmd);
set_pmd(pmdp, pmd);
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
- page_table_check_pud_set(mm, pudp, pud);
+ page_table_check_pud_set(mm, addr, pudp, pud);
native_set_pud(pudp, pud);
}
@@ -1251,7 +1252,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
pte_t pte = native_ptep_get_and_clear(ptep);
- page_table_check_pte_clear(mm, pte);
+ page_table_check_pte_clear(mm, addr, pte);
return pte;
}
@@ -1267,7 +1268,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
* care about updates and native needs no locking
*/
pte = native_local_ptep_get_and_clear(ptep);
- page_table_check_pte_clear(mm, pte);
+ page_table_check_pte_clear(mm, addr, pte);
} else {
pte = ptep_get_and_clear(mm, addr, ptep);
}
@@ -1318,7 +1319,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long
{
pmd_t pmd = native_pmdp_get_and_clear(pmdp);
- page_table_check_pmd_clear(mm, pmd);
+ page_table_check_pmd_clear(mm, addr, pmd);
return pmd;
}
@@ -1329,7 +1330,7 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
{
pud_t pud = native_pudp_get_and_clear(pudp);
- page_table_check_pud_clear(mm, pud);
+ page_table_check_pud_clear(mm, addr, pud);
return pud;
}
@@ -1356,7 +1357,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
- page_table_check_pmd_set(vma->vm_mm, pmdp, pmd);
+ page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
if (IS_ENABLED(CONFIG_SMP)) {
return xchg(pmdp, pmd);
} else {
@@ -1371,7 +1372,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
static inline pud_t pudp_establish(struct vm_area_struct *vma,
unsigned long address, pud_t *pudp, pud_t pud)
{
- page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ page_table_check_pud_set(vma->vm_mm, address, pudp, pud);
if (IS_ENABLED(CONFIG_SMP)) {
return xchg(pudp, pud);
} else {
@@ -1679,17 +1680,17 @@ static inline bool arch_has_hw_nonleaf_pmd_young(void)
#endif
#ifdef CONFIG_PAGE_TABLE_CHECK
-static inline bool pte_user_accessible_page(pte_t pte)
+static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr)
{
return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER);
}
-static inline bool pmd_user_accessible_page(pmd_t pmd)
+static inline bool pmd_user_accessible_page(pmd_t pmd, unsigned long addr)
{
return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER);
}
-static inline bool pud_user_accessible_page(pud_t pud)
+static inline bool pud_user_accessible_page(pud_t pud, unsigned long addr)
{
return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER);
}
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e71e0e8362ed..0067684afb5b 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -100,8 +100,7 @@ struct thread_info {
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
#define TIF_SINGLESTEP 25 /* reenable singlestep on user return*/
#define TIF_BLOCKSTEP 26 /* set when we want DEBUGCTLMSR_BTF */
-#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
-#define TIF_ADDR32 28 /* 32-bit address space on 64 bits */
+#define TIF_ADDR32 27 /* 32-bit address space on 64 bits */
#define _TIF_SSBD BIT(TIF_SSBD)
#define _TIF_SPEC_IB BIT(TIF_SPEC_IB)
@@ -114,7 +113,6 @@ struct thread_info {
#define _TIF_FORCED_TF BIT(TIF_FORCED_TF)
#define _TIF_BLOCKSTEP BIT(TIF_BLOCKSTEP)
#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP)
-#define _TIF_LAZY_MMU_UPDATES BIT(TIF_LAZY_MMU_UPDATES)
#define _TIF_ADDR32 BIT(TIF_ADDR32)
/* flags to check in __switch_to() */