diff options
| -rw-r--r-- | arch/arm64/include/asm/mmu.h | 2 | ||||
| -rw-r--r-- | arch/arm64/kernel/cpufeature.c | 3 | ||||
| -rw-r--r-- | arch/arm64/mm/mmu.c | 182 | ||||
| -rw-r--r-- | arch/arm64/mm/proc.S | 27 |
4 files changed, 187 insertions, 27 deletions
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 56fca81f60ad..2acfa7801d02 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -72,6 +72,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot); extern void mark_linear_text_alias_ro(void); extern int split_kernel_leaf_mapping(unsigned long start, unsigned long end); +extern void init_idmap_kpti_bbml2_flag(void); +extern void linear_map_maybe_split_to_ptes(void); /* * This check is triggered during the early boot before the cpufeature diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e15472beff3f..3780f343fd2c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -85,6 +85,7 @@ #include <asm/insn.h> #include <asm/kvm_host.h> #include <asm/mmu_context.h> +#include <asm/mmu.h> #include <asm/mte.h> #include <asm/hypervisor.h> #include <asm/processor.h> @@ -2027,6 +2028,7 @@ static void __init kpti_install_ng_mappings(void) if (arm64_use_ng_mappings) return; + init_idmap_kpti_bbml2_flag(); stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask); } @@ -3928,6 +3930,7 @@ void __init setup_system_features(void) { setup_system_capabilities(); + linear_map_maybe_split_to_ptes(); kpti_install_ng_mappings(); sve_setup(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a7b29daf1a38..431ed90914bb 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -27,6 +27,8 @@ #include <linux/kfence.h> #include <linux/pkeys.h> #include <linux/mm_inline.h> +#include <linux/pagewalk.h> +#include <linux/stop_machine.h> #include <asm/barrier.h> #include <asm/cputype.h> @@ -483,11 +485,11 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, #define INVALID_PHYS_ADDR (-1ULL) -static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, +static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp, enum pgtable_type pgtable_type) { /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */ - struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_ZERO, 0); + struct ptdesc *ptdesc = pagetable_alloc(gfp & ~__GFP_ZERO, 0); phys_addr_t pa; if (!ptdesc) @@ -514,9 +516,9 @@ static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, } static phys_addr_t -try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type) +try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type, gfp_t gfp) { - return __pgd_pgtable_alloc(&init_mm, pgtable_type); + return __pgd_pgtable_alloc(&init_mm, gfp, pgtable_type); } static phys_addr_t __maybe_unused @@ -524,7 +526,7 @@ pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type) { phys_addr_t pa; - pa = __pgd_pgtable_alloc(&init_mm, pgtable_type); + pa = __pgd_pgtable_alloc(&init_mm, GFP_PGTABLE_KERNEL, pgtable_type); BUG_ON(pa == INVALID_PHYS_ADDR); return pa; } @@ -534,7 +536,7 @@ pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type) { phys_addr_t pa; - pa = __pgd_pgtable_alloc(NULL, pgtable_type); + pa = __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_type); BUG_ON(pa == INVALID_PHYS_ADDR); return pa; } @@ -548,7 +550,7 @@ static void split_contpte(pte_t *ptep) __set_pte(ptep, pte_mknoncont(__ptep_get(ptep))); } -static int split_pmd(pmd_t *pmdp, pmd_t pmd) +static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont) { pmdval_t tableprot = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF; unsigned long pfn = pmd_pfn(pmd); @@ -557,7 +559,7 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd) pte_t *ptep; int i; - pte_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PTE); + pte_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PTE, gfp); if (pte_phys == INVALID_PHYS_ADDR) return -ENOMEM; ptep = (pte_t *)phys_to_virt(pte_phys); @@ -566,7 +568,9 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd) tableprot |= PMD_TABLE_PXN; prot = __pgprot((pgprot_val(prot) & ~PTE_TYPE_MASK) | PTE_TYPE_PAGE); - prot = __pgprot(pgprot_val(prot) | PTE_CONT); + prot = __pgprot(pgprot_val(prot) & ~PTE_CONT); + if (to_cont) + prot = __pgprot(pgprot_val(prot) | PTE_CONT); for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++) __set_pte(ptep, pfn_pte(pfn, prot)); @@ -590,7 +594,7 @@ static void split_contpmd(pmd_t *pmdp) set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp))); } -static int split_pud(pud_t *pudp, pud_t pud) +static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont) { pudval_t tableprot = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF; unsigned int step = PMD_SIZE >> PAGE_SHIFT; @@ -600,7 +604,7 @@ static int split_pud(pud_t *pudp, pud_t pud) pmd_t *pmdp; int i; - pmd_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PMD); + pmd_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PMD, gfp); if (pmd_phys == INVALID_PHYS_ADDR) return -ENOMEM; pmdp = (pmd_t *)phys_to_virt(pmd_phys); @@ -609,7 +613,9 @@ static int split_pud(pud_t *pudp, pud_t pud) tableprot |= PUD_TABLE_PXN; prot = __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT); - prot = __pgprot(pgprot_val(prot) | PTE_CONT); + prot = __pgprot(pgprot_val(prot) & ~PTE_CONT); + if (to_cont) + prot = __pgprot(pgprot_val(prot) | PTE_CONT); for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step) set_pmd(pmdp, pfn_pmd(pfn, prot)); @@ -667,7 +673,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr) if (!pud_present(pud)) goto out; if (pud_leaf(pud)) { - ret = split_pud(pudp, pud); + ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL, true); if (ret) goto out; } @@ -692,7 +698,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr) */ if (ALIGN_DOWN(addr, PMD_SIZE) == addr) goto out; - ret = split_pmd(pmdp, pmd); + ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL, true); if (ret) goto out; } @@ -765,6 +771,138 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) return ret; } +static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, + unsigned long next, + struct mm_walk *walk) +{ + pud_t pud = pudp_get(pudp); + int ret = 0; + + if (pud_leaf(pud)) + ret = split_pud(pudp, pud, GFP_ATOMIC, false); + + return ret; +} + +static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, + unsigned long next, + struct mm_walk *walk) +{ + pmd_t pmd = pmdp_get(pmdp); + int ret = 0; + + if (pmd_leaf(pmd)) { + if (pmd_cont(pmd)) + split_contpmd(pmdp); + ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false); + + /* + * We have split the pmd directly to ptes so there is no need to + * visit each pte to check if they are contpte. + */ + walk->action = ACTION_CONTINUE; + } + + return ret; +} + +static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, + unsigned long next, + struct mm_walk *walk) +{ + pte_t pte = __ptep_get(ptep); + + if (pte_cont(pte)) + split_contpte(ptep); + + return 0; +} + +static const struct mm_walk_ops split_to_ptes_ops __initconst = { + .pud_entry = split_to_ptes_pud_entry, + .pmd_entry = split_to_ptes_pmd_entry, + .pte_entry = split_to_ptes_pte_entry, +}; + +static bool linear_map_requires_bbml2 __initdata; + +u32 idmap_kpti_bbml2_flag; + +void __init init_idmap_kpti_bbml2_flag(void) +{ + WRITE_ONCE(idmap_kpti_bbml2_flag, 1); + /* Must be visible to other CPUs before stop_machine() is called. */ + smp_mb(); +} + +static int __init linear_map_split_to_ptes(void *__unused) +{ + /* + * Repainting the linear map must be done by CPU0 (the boot CPU) because + * that's the only CPU that we know supports BBML2. The other CPUs will + * be held in a waiting area with the idmap active. + */ + if (!smp_processor_id()) { + unsigned long lstart = _PAGE_OFFSET(vabits_actual); + unsigned long lend = PAGE_END; + unsigned long kstart = (unsigned long)lm_alias(_stext); + unsigned long kend = (unsigned long)lm_alias(__init_begin); + int ret; + + /* + * Wait for all secondary CPUs to be put into the waiting area. + */ + smp_cond_load_acquire(&idmap_kpti_bbml2_flag, VAL == num_online_cpus()); + + /* + * Walk all of the linear map [lstart, lend), except the kernel + * linear map alias [kstart, kend), and split all mappings to + * PTE. The kernel alias remains static throughout runtime so + * can continue to be safely mapped with large mappings. + */ + ret = walk_kernel_page_table_range_lockless(lstart, kstart, + &split_to_ptes_ops, NULL, NULL); + if (!ret) + ret = walk_kernel_page_table_range_lockless(kend, lend, + &split_to_ptes_ops, NULL, NULL); + if (ret) + panic("Failed to split linear map\n"); + flush_tlb_kernel_range(lstart, lend); + + /* + * Relies on dsb in flush_tlb_kernel_range() to avoid reordering + * before any page table split operations. + */ + WRITE_ONCE(idmap_kpti_bbml2_flag, 0); + } else { + typedef void (wait_split_fn)(void); + extern wait_split_fn wait_linear_map_split_to_ptes; + wait_split_fn *wait_fn; + + wait_fn = (void *)__pa_symbol(wait_linear_map_split_to_ptes); + + /* + * At least one secondary CPU doesn't support BBML2 so cannot + * tolerate the size of the live mappings changing. So have the + * secondary CPUs wait for the boot CPU to make the changes + * with the idmap active and init_mm inactive. + */ + cpu_install_idmap(); + wait_fn(); + cpu_uninstall_idmap(); + } + + return 0; +} + +void __init linear_map_maybe_split_to_ptes(void) +{ + if (linear_map_requires_bbml2 && !system_supports_bbml2_noabort()) { + init_idmap_kpti_bbml2_flag(); + stop_machine(linear_map_split_to_ptes, NULL, cpu_online_mask); + } +} + /* * This function can only be used to modify existing table entries, * without allocating new levels of table. Note that this permits the @@ -919,6 +1057,8 @@ static void __init map_mem(pgd_t *pgdp) early_kfence_pool = arm64_kfence_alloc_pool(); + linear_map_requires_bbml2 = !force_pte_mapping() && can_set_direct_map(); + if (force_pte_mapping()) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; @@ -1053,7 +1193,7 @@ void __pi_map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa, u64 va_offset); static u8 idmap_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init, - kpti_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init; + kpti_bbml2_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init; static void __init create_idmap(void) { @@ -1065,15 +1205,17 @@ static void __init create_idmap(void) IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false, __phys_to_virt(ptep) - ptep); - if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings) { - extern u32 __idmap_kpti_flag; - phys_addr_t pa = __pa_symbol(&__idmap_kpti_flag); + if (linear_map_requires_bbml2 || + (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings)) { + phys_addr_t pa = __pa_symbol(&idmap_kpti_bbml2_flag); /* * The KPTI G-to-nG conversion code needs a read-write mapping - * of its synchronization flag in the ID map. + * of its synchronization flag in the ID map. This is also used + * when splitting the linear map to ptes if a secondary CPU + * doesn't support bbml2. */ - ptep = __pa_symbol(kpti_ptes); + ptep = __pa_symbol(kpti_bbml2_ptes); __pi_map_range(&ptep, pa, pa + sizeof(u32), pa, PAGE_KERNEL, IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false, __phys_to_virt(ptep) - ptep); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 8c75965afc9e..86818511962b 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -245,10 +245,6 @@ SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1) * * Called exactly once from stop_machine context by each CPU found during boot. */ - .pushsection ".data", "aw", %progbits -SYM_DATA(__idmap_kpti_flag, .long 1) - .popsection - SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings) cpu .req w0 temp_pte .req x0 @@ -273,7 +269,7 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings) mov x5, x3 // preserve temp_pte arg mrs swapper_ttb, ttbr1_el1 - adr_l flag_ptr, __idmap_kpti_flag + adr_l flag_ptr, idmap_kpti_bbml2_flag cbnz cpu, __idmap_kpti_secondary @@ -416,7 +412,25 @@ alternative_else_nop_endif __idmap_kpti_secondary: /* Uninstall swapper before surgery begins */ __idmap_cpu_set_reserved_ttbr1 x16, x17 + b scondary_cpu_wait + + .unreq swapper_ttb + .unreq flag_ptr +SYM_FUNC_END(idmap_kpti_install_ng_mappings) + .popsection +#endif + + .pushsection ".idmap.text", "a" +SYM_TYPED_FUNC_START(wait_linear_map_split_to_ptes) + /* Must be same registers as in idmap_kpti_install_ng_mappings */ + swapper_ttb .req x3 + flag_ptr .req x4 + + mrs swapper_ttb, ttbr1_el1 + adr_l flag_ptr, idmap_kpti_bbml2_flag + __idmap_cpu_set_reserved_ttbr1 x16, x17 +scondary_cpu_wait: /* Increment the flag to let the boot CPU we're ready */ 1: ldxr w16, [flag_ptr] add w16, w16, #1 @@ -436,9 +450,8 @@ __idmap_kpti_secondary: .unreq swapper_ttb .unreq flag_ptr -SYM_FUNC_END(idmap_kpti_install_ng_mappings) +SYM_FUNC_END(wait_linear_map_split_to_ptes) .popsection -#endif /* * __cpu_setup |
