diff options
| author | Ankur Arora <ankur.a.arora@oracle.com> | 2026-01-06 23:20:05 -0800 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2026-01-20 19:24:40 -0800 |
| commit | 54a6b89a3db2ecb4462abcd6e6e52dfebaa7e6c4 (patch) | |
| tree | e8cd387894809d74c67ea6547310eba03461b9c1 /arch | |
| parent | 8d846b723e5723d98d859df9feeab89c2c889fb2 (diff) | |
x86/mm: simplify clear_page_*
clear_page_rep() and clear_page_erms() are wrappers around "REP; STOS"
variations. Inlining gets rid of an unnecessary CALL/RET (which isn't
free when using RETHUNK speculative execution mitigations.) Fixup and
rename clear_page_orig() to adapt to the changed calling convention.
Also add a comment from Dave Hansen detailing various clearing mechanisms
used in clear_page().
Link: https://lkml.kernel.org/r/20260107072009.1615991-5-ankur.a.arora@oracle.com
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
Tested-by: Raghavendra K T <raghavendra.kt@amd.com>
Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Konrad Rzessutek Wilk <konrad.wilk@oracle.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Li Zhe <lizhe.67@bytedance.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/x86/include/asm/page_32.h | 6 | ||||
| -rw-r--r-- | arch/x86/include/asm/page_64.h | 69 | ||||
| -rw-r--r-- | arch/x86/lib/clear_page_64.S | 39 |
3 files changed, 67 insertions, 47 deletions
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index 0c623706cb7e..19fddb002cc9 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -17,6 +17,12 @@ extern unsigned long __phys_addr(unsigned long); #include <linux/string.h> +/** + * clear_page() - clear a page using a kernel virtual address. + * @page: address of kernel page + * + * Does absolutely no exception handling. + */ static inline void clear_page(void *page) { memset(page, 0, PAGE_SIZE); diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 2f0e47be79a4..ec3307234a17 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -48,26 +48,63 @@ static inline unsigned long __phys_addr_symbol(unsigned long x) #define __phys_reloc_hide(x) (x) -void clear_page_orig(void *page); -void clear_page_rep(void *page); -void clear_page_erms(void *page); -KCFI_REFERENCE(clear_page_orig); -KCFI_REFERENCE(clear_page_rep); -KCFI_REFERENCE(clear_page_erms); - -static inline void clear_page(void *page) +void __clear_pages_unrolled(void *page); +KCFI_REFERENCE(__clear_pages_unrolled); + +/** + * clear_page() - clear a page using a kernel virtual address. + * @addr: address of kernel page + * + * Switch between three implementations of page clearing based on CPU + * capabilities: + * + * - __clear_pages_unrolled(): the oldest, slowest and universally + * supported method. Zeroes via 8-byte MOV instructions unrolled 8x + * to write a 64-byte cacheline in each loop iteration. + * + * - "REP; STOSQ": really old CPUs had crummy REP implementations. + * Vendor CPU setup code sets 'REP_GOOD' on CPUs where REP can be + * trusted. The instruction writes 8-byte per REP iteration but + * CPUs can internally batch these together and do larger writes. + * + * - "REP; STOSB": used on CPUs with "enhanced REP MOVSB/STOSB", + * which enumerate 'ERMS' and provide an implementation which + * unlike "REP; STOSQ" above wasn't overly picky about alignment. + * The instruction writes 1-byte per REP iteration with CPUs + * internally batching these together into larger writes and is + * generally fastest of the three. + * + * Note that when running as a guest, features exposed by the CPU + * might be mediated by the hypervisor. So, the STOSQ variant might + * be in active use on some systems even when the hardware enumerates + * ERMS. + * + * Does absolutely no exception handling. + */ +static inline void clear_page(void *addr) { + u64 len = PAGE_SIZE; /* * Clean up KMSAN metadata for the page being cleared. The assembly call - * below clobbers @page, so we perform unpoisoning before it. + * below clobbers @addr, so perform unpoisoning before it. + */ + kmsan_unpoison_memory(addr, len); + + /* + * The inline asm embeds a CALL instruction and usually that is a no-no + * due to the compiler not knowing that and thus being unable to track + * callee-clobbered registers. + * + * In this case that is fine because the registers clobbered by + * __clear_pages_unrolled() are part of the inline asm register + * specification. */ - kmsan_unpoison_memory(page, PAGE_SIZE); - alternative_call_2(clear_page_orig, - clear_page_rep, X86_FEATURE_REP_GOOD, - clear_page_erms, X86_FEATURE_ERMS, - "=D" (page), - "D" (page), - "cc", "memory", "rax", "rcx"); + asm volatile(ALTERNATIVE_2("call __clear_pages_unrolled", + "shrq $3, %%rcx; rep stosq", X86_FEATURE_REP_GOOD, + "rep stosb", X86_FEATURE_ERMS) + : "+c" (len), "+D" (addr), ASM_CALL_CONSTRAINT + : "a" (0) + : "cc", "memory"); } void copy_page(void *to, void *from); diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index a508e4a8c66a..f7f356e7218b 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -6,30 +6,15 @@ #include <asm/asm.h> /* - * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is - * recommended to use this when possible and we do use them by default. - * If enhanced REP MOVSB/STOSB is not available, try to use fast string. - * Otherwise, use original. + * Zero page aligned region. + * %rdi - dest + * %rcx - length */ - -/* - * Zero a page. - * %rdi - page - */ -SYM_TYPED_FUNC_START(clear_page_rep) - movl $4096/8,%ecx - xorl %eax,%eax - rep stosq - RET -SYM_FUNC_END(clear_page_rep) -EXPORT_SYMBOL_GPL(clear_page_rep) - -SYM_TYPED_FUNC_START(clear_page_orig) - xorl %eax,%eax - movl $4096/64,%ecx +SYM_TYPED_FUNC_START(__clear_pages_unrolled) + shrq $6, %rcx .p2align 4 .Lloop: - decl %ecx + decq %rcx #define PUT(x) movq %rax,x*8(%rdi) movq %rax,(%rdi) PUT(1) @@ -43,16 +28,8 @@ SYM_TYPED_FUNC_START(clear_page_orig) jnz .Lloop nop RET -SYM_FUNC_END(clear_page_orig) -EXPORT_SYMBOL_GPL(clear_page_orig) - -SYM_TYPED_FUNC_START(clear_page_erms) - movl $4096,%ecx - xorl %eax,%eax - rep stosb - RET -SYM_FUNC_END(clear_page_erms) -EXPORT_SYMBOL_GPL(clear_page_erms) +SYM_FUNC_END(__clear_pages_unrolled) +EXPORT_SYMBOL_GPL(__clear_pages_unrolled) /* * Default clear user-space. |
