From 113fe88eed53af08800f54a03e463636105831e0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 11 Jun 2022 18:55:15 +0200 Subject: powerpc: Don't include asm/setup.h in asm/machdep.h asm/machdep.h doesn't need asm/setup.h Remove it. Add it directly in files that needs it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3b1dfb19a2c3265fb4abc2bfc7b6eae9261a998b.1654966508.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 52b77684acda..2cf6748755e1 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -25,6 +25,7 @@ #include #include #include +#include #include -- cgit v1.2.3 From 46d60bdb1283bb0f22d9480e2d6c972623cb4182 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 6 May 2022 11:14:24 +0200 Subject: powerpc: Include asm/firmware.h in all users of firmware_has_feature() Trying to remove asm/ppc_asm.h from all places that don't need it leads to several failures linked to firmware_has_feature(). To fix it, include asm/firmware.h in all files using firmware_has_feature() All users found with: git grep -L "firmware\.h" ` git grep -l "firmware_has_feature("` Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/11956ec181a034b51a881ac9c059eea72c679a73.1651828453.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s64/hash_pgtable.c | 1 + arch/powerpc/mm/book3s64/pkeys.c | 1 + arch/powerpc/mm/hugetlbpage.c | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 2e0cad5817ba..ae008b9df0e6 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -13,6 +13,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 753e62ba67af..1d2675ab6711 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b282af39fcf6..bc84a594ca62 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -24,6 +24,7 @@ #include #include #include +#include bool hugetlb_disabled = false; -- cgit v1.2.3 From 1ce844973bb516e95d3f2bcb001a3992548def9d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 14 Jun 2022 12:32:23 +0200 Subject: powerpc/32: Remove the 'nobats' kernel parameter Mapping without BATs doesn't bring any added value to the user. Remove that option. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6977314c823cfb728bc0273cea634b41807bfb64.1655202721.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 2 +- arch/powerpc/mm/init_32.c | 11 ----------- arch/powerpc/mm/mmu_decl.h | 1 - 3 files changed, 1 insertion(+), 13 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 49a737fbbd18..1794132db31e 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -161,7 +161,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; - if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) { + if (debug_pagealloc_enabled_or_kfence()) { pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 693a3a7a9463..321794747ea1 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -69,12 +69,6 @@ EXPORT_SYMBOL(agp_special_page); void MMU_init(void); -/* - * this tells the system to map all of ram with the segregs - * (i.e. page tables) instead of the bats. - * -- Cort - */ -int __map_without_bats; int __map_without_ltlbs; /* max amount of low RAM to map in */ @@ -85,11 +79,6 @@ unsigned long __max_low_memory = MAX_LOW_MEM; */ static void __init MMU_setup(void) { - /* Check for nobats option (used in mapin_ram). */ - if (strstr(boot_command_line, "nobats")) { - __map_without_bats = 1; - } - if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 63c4b1a4d435..229c72e49198 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -92,7 +92,6 @@ extern void mapin_ram(void); extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); -extern int __map_without_bats; extern unsigned int rtas_data, rtas_size; struct hash_pte; -- cgit v1.2.3 From 56e54b4e6c477b2a7df43f9a320ae5f9a5bfb16c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 14 Jun 2022 12:32:24 +0200 Subject: powerpc/32: Remove 'noltlbs' kernel parameter Mapping without large TLBs has no added value on the 8xx. Mapping without large TLBs is still necessary on 40x when selecting CONFIG_KFENCE or CONFIG_DEBUG_PAGEALLOC or CONFIG_STRICT_KERNEL_RWX, but this is done automatically and doesn't require user selection. Remove 'noltlbs' kernel parameter, the user has no reason to use it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/80ca17bd39cf608a8ebd0764d7064a498e131199.1655202721.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/init_32.c | 3 --- arch/powerpc/mm/nohash/8xx.c | 9 --------- 2 files changed, 12 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 321794747ea1..6f2e6210c273 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -79,9 +79,6 @@ unsigned long __max_low_memory = MAX_LOW_MEM; */ static void __init MMU_setup(void) { - if (strstr(boot_command_line, "noltlbs")) { - __map_without_ltlbs = 1; - } if (IS_ENABLED(CONFIG_PPC_8xx)) return; diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 27f9186ae374..6b668ccef836 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -14,8 +14,6 @@ #define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) -extern int __map_without_ltlbs; - static unsigned long block_mapped_ram; /* @@ -28,8 +26,6 @@ phys_addr_t v_block_mapped(unsigned long va) if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE) return p + va - VIRT_IMMR_BASE; - if (__map_without_ltlbs) - return 0; if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram) return __pa(va); return 0; @@ -45,8 +41,6 @@ unsigned long p_block_mapped(phys_addr_t pa) if (pa >= p && pa < p + IMMR_SIZE) return VIRT_IMMR_BASE + pa - p; - if (__map_without_ltlbs) - return 0; if (pa < block_mapped_ram) return (unsigned long)__va(pa); return 0; @@ -153,9 +147,6 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) mmu_mapin_immr(); - if (__map_without_ltlbs) - return 0; - mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); if (debug_pagealloc_enabled_or_kfence()) { top = boundary; -- cgit v1.2.3 From 513f5bbac7b9ca7046bc350dd6eb39b957e338a7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 14 Jun 2022 12:32:25 +0200 Subject: powerpc/32: Remove __map_without_ltlbs __map_without_ltlbs is used only for 40x, and only when STRICT_KERNEL_RWX, KFENCE or DEBUG_PAGEALLOC is active. Do the verification directly in 40x version of mmu_mapin_ram() and remove __map_without_ltlbs from core ppc32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3422094db965d218c4c3d8580f526963a9ac897f.1655202721.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/init_32.c | 23 ----------------------- arch/powerpc/mm/nohash/40x.c | 9 +++++++-- 2 files changed, 7 insertions(+), 25 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 6f2e6210c273..62d9af6606cd 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -69,29 +69,9 @@ EXPORT_SYMBOL(agp_special_page); void MMU_init(void); -int __map_without_ltlbs; - /* max amount of low RAM to map in */ unsigned long __max_low_memory = MAX_LOW_MEM; -/* - * Check for command-line options that affect what MMU_init will do. - */ -static void __init MMU_setup(void) -{ - if (IS_ENABLED(CONFIG_PPC_8xx)) - return; - - if (IS_ENABLED(CONFIG_KFENCE)) - __map_without_ltlbs = 1; - - if (debug_pagealloc_enabled()) - __map_without_ltlbs = 1; - - if (strict_kernel_rwx_enabled()) - __map_without_ltlbs = 1; -} - /* * MMU_init sets up the basic memory mappings for the kernel, * including both RAM and possibly some I/O regions, @@ -102,9 +82,6 @@ void __init MMU_init(void) if (ppc_md.progress) ppc_md.progress("MMU:enter", 0x111); - /* parse args from command line */ - MMU_setup(); - /* * Reserve gigantic pages for hugetlb. This MUST occur before * lowmem_end_addr is initialized below. diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index b32e465a3d52..3684d6e570fb 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -43,7 +43,6 @@ #include -extern int __map_without_ltlbs; /* * MMU_init_hw does the chip-specific initialization of the MMU hardware. */ @@ -94,7 +93,13 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) p = 0; s = total_lowmem; - if (__map_without_ltlbs) + if (IS_ENABLED(CONFIG_KFENCE)) + return 0; + + if (debug_pagealloc_enabled()) + return 0; + + if (strict_kernel_rwx_enabled()) return 0; while (s >= LARGE_PAGE_SIZE_16M) { -- cgit v1.2.3 From dd8de84b57b02ba9c1fe530a6d916c0853f136bd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:43:35 +0200 Subject: powerpc/ptdump: Fix display of RW pages on FSL_BOOK3E On FSL_BOOK3E, _PAGE_RW is defined with two bits, one for user and one for supervisor. As soon as one of the two bits is set, the page has to be display as RW. But the way it is implemented today requires both bits to be set in order to display it as RW. Instead of display RW when _PAGE_RW bits are set and R otherwise, reverse the logic and display R when _PAGE_RW bits are all 0 and RW otherwise. This change has no impact on other platforms as _PAGE_RW is a single bit on all of them. Fixes: 8eb07b187000 ("powerpc/mm: Dump linux pagetables") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0c33b96317811edf691e81698aaee8fa45ec3449.1656427391.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/shared.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index 03607ab90c66..f884760ca5cf 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -17,9 +17,9 @@ static const struct flag_info flag_array[] = { .clear = " ", }, { .mask = _PAGE_RW, - .val = _PAGE_RW, - .set = "rw", - .clear = "r ", + .val = 0, + .set = "r ", + .clear = "rw", }, { .mask = _PAGE_EXEC, .val = _PAGE_EXEC, -- cgit v1.2.3 From 09317643117ade87c03158341e87466413fa8f1a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:54 +0200 Subject: powerpc/64e: Fix early TLB miss with KUAP With KUAP, the TLB miss handler bails out when an access to user memory is performed with a nul TID. But the normal TLB miss routine which is only used early during boot does the check regardless for all memory areas, not only user memory. By chance there is no early IO or vmalloc access, but when KASAN come we will start having early TLB misses. Fix it by creating a special branch for user accesses similar to the one in the 'bolted' TLB miss handlers. Unfortunately SPRN_MAS1 is now read too early and there are no registers available to preserve it so it will be read a second time. Fixes: 57bc963837f5 ("powerpc/kuap: Wire-up KUAP on book3e/64") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8d6c5859a45935d6e1a336da4dc20be421e8cea7.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/tlb_low_64e.S | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 8b97c4acfebf..9e9ab3803fb2 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -583,7 +583,7 @@ itlb_miss_fault_e6500: */ rlwimi r11,r14,32-19,27,27 rlwimi r11,r14,32-16,19,19 - beq normal_tlb_miss + beq normal_tlb_miss_user /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 cmpldi cr0,r15,8 /* Check for vmalloc region */ @@ -626,7 +626,7 @@ itlb_miss_fault_e6500: cmpldi cr0,r15,0 /* Check for user region */ std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ - beq normal_tlb_miss + beq normal_tlb_miss_user li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ oris r11,r11,_PAGE_ACCESSED@h @@ -653,6 +653,12 @@ itlb_miss_fault_e6500: * r11 = PTE permission mask * r10 = crap (free to use) */ +normal_tlb_miss_user: +#ifdef CONFIG_PPC_KUAP + mfspr r14,SPRN_MAS1 + rlwinm. r14,r14,0,0x3fff0000 + beq- normal_tlb_miss_access_fault /* KUAP fault */ +#endif normal_tlb_miss: /* So we first construct the page table address. We do that by * shifting the bottom of the address (not the region ID) by @@ -683,11 +689,6 @@ finish_normal_tlb_miss: /* Check if required permissions are met */ andc. r15,r11,r14 bne- normal_tlb_miss_access_fault -#ifdef CONFIG_PPC_KUAP - mfspr r11,SPRN_MAS1 - rlwinm. r10,r11,0,0x3fff0000 - beq- normal_tlb_miss_access_fault /* KUAP fault */ -#endif /* Now we build the MAS: * @@ -709,9 +710,7 @@ finish_normal_tlb_miss: rldicl r10,r14,64-8,64-8 cmpldi cr0,r10,BOOK3E_PAGESZ_4K beq- 1f -#ifndef CONFIG_PPC_KUAP mfspr r11,SPRN_MAS1 -#endif rlwimi r11,r14,31,21,24 rlwinm r11,r11,0,21,19 mtspr SPRN_MAS1,r11 -- cgit v1.2.3 From 3adfb457b84bd6de4e78a99814038fbd7205f253 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:55 +0200 Subject: powerpc/64e: Remove MMU_FTR_USE_TLBRSRV and MMU_FTR_USE_PAIRED_MAS Commit fb5a515704d7 ("powerpc: Remove platforms/wsp and associated pieces") removed the last CPU having features MMU_FTRS_A2 and commit cd68098bcedd ("powerpc: Clean up MMU_FTRS_A2 and MMU_FTR_TYPE_3E") removed MMU_FTRS_A2 which was the last user of MMU_FTR_USE_TLBRSRV and MMU_FTR_USE_PAIRED_MAS. Remove all code that relies on MMU_FTR_USE_TLBRSRV and MMU_FTR_USE_PAIRED_MAS. With this change done, TLB miss can happen before the mmu feature fixups. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cfd5a0ecdb1598da968832e1bddf7431ec267200.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/book3e_hugetlbpage.c | 30 ++++--------- arch/powerpc/mm/nohash/tlb_low_64e.S | 66 ----------------------------- 2 files changed, 8 insertions(+), 88 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c index 307ca919d393..c7d4b317a823 100644 --- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c +++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c @@ -103,21 +103,11 @@ static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) int found = 0; mtspr(SPRN_MAS6, pid << 16); - if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) { - asm volatile( - "li %0,0\n" - "tlbsx. 0,%1\n" - "bne 1f\n" - "li %0,1\n" - "1:\n" - : "=&r"(found) : "r"(ea)); - } else { - asm volatile( - "tlbsx 0,%1\n" - "mfspr %0,0x271\n" - "srwi %0,%0,31\n" - : "=&r"(found) : "r"(ea)); - } + asm volatile( + "tlbsx 0,%1\n" + "mfspr %0,0x271\n" + "srwi %0,%0,31\n" + : "=&r"(found) : "r"(ea)); return found; } @@ -169,13 +159,9 @@ book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte) mtspr(SPRN_MAS1, mas1); mtspr(SPRN_MAS2, mas2); - if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) { - mtspr(SPRN_MAS7_MAS3, mas7_3); - } else { - if (mmu_has_feature(MMU_FTR_BIG_PHYS)) - mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); - mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); - } + if (mmu_has_feature(MMU_FTR_BIG_PHYS)) + mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); + mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); asm volatile ("tlbwe"); diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 9e9ab3803fb2..a59485c549a7 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -152,16 +152,7 @@ tlb_miss_common_bolted: clrrdi r15,r15,3 beq tlb_miss_fault_bolted /* No PGDIR, bail */ -BEGIN_MMU_FTR_SECTION - /* Set the TLB reservation and search for existing entry. Then load - * the entry. - */ - PPC_TLBSRX_DOT(0,R16) - ldx r14,r14,r15 /* grab pgd entry */ - beq tlb_miss_done_bolted /* tlb exists already, bail */ -MMU_FTR_SECTION_ELSE ldx r14,r14,r15 /* grab pgd entry */ -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 @@ -674,16 +665,7 @@ normal_tlb_miss: clrrdi r14,r14,3 or r10,r15,r14 -BEGIN_MMU_FTR_SECTION - /* Set the TLB reservation and search for existing entry. Then load - * the entry. - */ - PPC_TLBSRX_DOT(0,R16) ld r14,0(r10) - beq normal_tlb_miss_done -MMU_FTR_SECTION_ELSE - ld r14,0(r10) -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) finish_normal_tlb_miss: /* Check if required permissions are met */ @@ -727,13 +709,9 @@ finish_normal_tlb_miss: li r11,MAS3_SW|MAS3_UW andc r15,r15,r11 1: -BEGIN_MMU_FTR_SECTION srdi r16,r15,32 mtspr SPRN_MAS3,r15 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r15 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe @@ -809,13 +787,6 @@ virt_page_table_tlb_miss: #else 1: #endif -BEGIN_MMU_FTR_SECTION - /* Search if we already have a TLB entry for that virtual address, and - * if we do, bail out. - */ - PPC_TLBSRX_DOT(0,R16) - beq virt_page_table_tlb_miss_done -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) /* Now, we need to walk the page tables. First check if we are in * range. @@ -866,41 +837,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) clrldi r11,r15,4 /* remove region ID from RPN */ ori r10,r11,1 /* Or-in SR */ -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe -BEGIN_MMU_FTR_SECTION -virt_page_table_tlb_miss_done: - - /* We have overridden MAS2:EPN but currently our primary TLB miss - * handler will always restore it so that should not be an issue, - * if we ever optimize the primary handler to not write MAS2 on - * some cases, we'll have to restore MAS2:EPN here based on the - * original fault's DEAR. If we do that we have to modify the - * ITLB miss handler to also store SRR0 in the exception frame - * as DEAR. - * - * However, one nasty thing we did is we cleared the reservation - * (well, potentially we did). We do a trick here thus if we - * are not a level 0 exception (we interrupted the TLB miss) we - * offset the return address by -4 in order to replay the tlbsrx - * instruction there - */ - subf r10,r13,r12 - cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE - bne- 1f - ld r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) - addi r10,r11,-4 - std r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) -1: -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) /* Return to caller, normal case */ TLB_MISS_EPILOG_SUCCESS rfi @@ -1115,13 +1057,9 @@ htw_tlb_miss: */ ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe @@ -1202,13 +1140,9 @@ tlb_load_linear: clrldi r10,r10,4 /* clear region bits */ ori r10,r10,MAS3_SR|MAS3_SW|MAS3_SX -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe -- cgit v1.2.3 From 128c1ea2f838d3031a1c475607860e4271a8e9dc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:57 +0200 Subject: powerpc/64e: Move virtual memory closer to linear memory Today nohash/64 have linear memory based at 0xc000000000000000 and virtual memory based at 0x8000000000000000. In order to implement KASAN, we need to regroup both areas. Move virtual memmory at 0xc000100000000000. This complicates a bit TLB miss handlers. Until now, memory region was easily identified with the 4 higher bits of address: - 0 ==> User - c ==> Linear Memory - 8 ==> Virtual Memory Now we need to rely on the 20 higher bits, with: - 0xxxx ==> User - c0000 ==> Linear Memory - c0001 ==> Virtual Memory Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4b225168031449fc34fc7132f3923cc8dc54af60.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/tlb_low_64e.S | 64 +++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 27 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index a59485c549a7..68ffbfdba894 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -213,10 +213,11 @@ itlb_miss_kernel_bolted: tlb_miss_kernel_bolted: mfspr r10,SPRN_MAS1 ld r14,PACA_KERNELPGD(r13) - cmpldi cr0,r15,8 /* Check for vmalloc region */ + srdi r15,r16,44 /* get kernel region */ + andi. r15,r15,1 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 - beq+ tlb_miss_common_bolted + bne+ tlb_miss_common_bolted tlb_miss_fault_bolted: /* We need to check if it was an instruction miss */ @@ -498,7 +499,9 @@ tlb_miss_huge_e6500: tlb_miss_kernel_e6500: ld r14,PACA_KERNELPGD(r13) - cmpldi cr1,r15,8 /* Check for vmalloc region */ + srdi r15,r16,44 /* get kernel region */ + xoris r15,r15,0xc /* Check for vmalloc region */ + cmplwi cr1,r15,1 beq+ cr1,tlb_miss_common_e6500 tlb_miss_fault_e6500: @@ -532,16 +535,18 @@ itlb_miss_fault_e6500: */ mfspr r14,SPRN_ESR mfspr r16,SPRN_DEAR /* get faulting address */ - srdi r15,r16,60 /* get region */ - cmpldi cr0,r15,0xc /* linear mapping ? */ + srdi r15,r16,44 /* get region */ + xoris r15,r15,0xc + cmpldi cr0,r15,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r15,1 /* vmalloc mapping ? */ /* The page tables are mapped virtually linear. At this point, though, * we don't know whether we are trying to fault in a first level * virtual address or a virtual page table address. We can get that * from bit 0x1 of the region ID which we have set for a page table */ - andi. r10,r15,0x1 + andis. r10,r15,0x1 bne- virt_page_table_tlb_miss std r14,EX_TLB_ESR(r12); /* save ESR */ @@ -553,7 +558,7 @@ itlb_miss_fault_e6500: /* We do the user/kernel test for the PID here along with the RW test */ - cmpldi cr0,r15,0 /* Check for user region */ + srdi. r15,r16,60 /* Check for user region */ /* We pre-test some combination of permissions to avoid double * faults: @@ -577,10 +582,9 @@ itlb_miss_fault_e6500: beq normal_tlb_miss_user /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r15,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 - beq+ normal_tlb_miss + beq+ cr1,normal_tlb_miss /* We got a crappy address, just fault with whatever DEAR and ESR * are here @@ -606,16 +610,18 @@ itlb_miss_fault_e6500: * * Faulting address is SRR0 which is already in r16 */ - srdi r15,r16,60 /* get region */ - cmpldi cr0,r15,0xc /* linear mapping ? */ + srdi r15,r16,44 /* get region */ + xoris r15,r15,0xc + cmpldi cr0,r15,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r15,1 /* vmalloc mapping ? */ /* We do the user/kernel test for the PID here along with the RW test */ li r11,_PAGE_PRESENT|_PAGE_BAP_UX /* Base perm */ oris r11,r11,_PAGE_ACCESSED@h - cmpldi cr0,r15,0 /* Check for user region */ + srdi. r15,r16,60 /* Check for user region */ std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ beq normal_tlb_miss_user @@ -623,10 +629,9 @@ itlb_miss_fault_e6500: oris r11,r11,_PAGE_ACCESSED@h /* XXX replace the RMW cycles with immediate loads + writes */ mfspr r10,SPRN_MAS1 - cmpldi cr0,r15,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 - beq+ normal_tlb_miss + beq+ cr1,normal_tlb_miss /* We got a crappy address, just fault */ TLB_MISS_EPILOG_ERROR @@ -659,10 +664,11 @@ normal_tlb_miss: * NOTE: For 64K pages, we do things slightly differently in * order to handle the weird page table format used by linux */ - ori r10,r15,0x1 + srdi r15,r16,44 + oris r10,r15,0x1 rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4 - sldi r15,r10,60 - clrrdi r14,r14,3 + sldi r15,r10,44 + clrrdi r14,r14,19 or r10,r15,r14 ld r14,0(r10) @@ -763,6 +769,7 @@ normal_tlb_miss_access_fault: */ virt_page_table_tlb_miss: /* Are we hitting a kernel page table ? */ + srdi r15,r16,60 andi. r10,r15,0x8 /* The cool thing now is that r10 contains 0 for user and 8 for kernel, @@ -791,7 +798,8 @@ virt_page_table_tlb_miss: /* Now, we need to walk the page tables. First check if we are in * range. */ - rldicl. r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4 + rldicl r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4 + cmpldi r10,0x80 bne- virt_page_table_tlb_miss_fault /* Get the PGD pointer */ @@ -910,23 +918,24 @@ virt_page_table_tlb_miss_whacko_fault: */ mfspr r14,SPRN_ESR mfspr r16,SPRN_DEAR /* get faulting address */ - srdi r11,r16,60 /* get region */ - cmpldi cr0,r11,0xc /* linear mapping ? */ + srdi r11,r16,44 /* get region */ + xoris r11,r11,0xc + cmpldi cr0,r11,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r11,1 /* vmalloc mapping ? */ /* We do the user/kernel test for the PID here along with the RW test */ - cmpldi cr0,r11,0 /* Check for user region */ + srdi. r11,r16,60 /* Check for user region */ ld r15,PACAPGD(r13) /* Load user pgdir */ beq htw_tlb_miss /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r11,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ - beq+ htw_tlb_miss + beq+ cr1,htw_tlb_miss /* We got a crappy address, just fault with whatever DEAR and ESR * are here @@ -952,19 +961,20 @@ virt_page_table_tlb_miss_whacko_fault: * * Faulting address is SRR0 which is already in r16 */ - srdi r11,r16,60 /* get region */ - cmpldi cr0,r11,0xc /* linear mapping ? */ + srdi r11,r16,44 /* get region */ + xoris r11,r11,0xc + cmpldi cr0,r11,0 /* linear mapping ? */ beq tlb_load_linear /* yes -> go to linear map load */ + cmpldi cr1,r11,1 /* vmalloc mapping ? */ /* We do the user/kernel test for the PID here along with the RW test */ - cmpldi cr0,r11,0 /* Check for user region */ + srdi. r11,r16,60 /* Check for user region */ ld r15,PACAPGD(r13) /* Load user pgdir */ beq htw_tlb_miss /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 - cmpldi cr0,r11,8 /* Check for vmalloc region */ rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ -- cgit v1.2.3 From c7b9ed7c34a9f5dbf8222d63e3e313cef9f3150b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:59 +0200 Subject: powerpc/64e: KASAN Full support for BOOK3E/64 We now have memory organised in a way that allows implementing KASAN. Unlike book3s/64, book3e always has translation active so the only thing needed to use KASAN is to setup an early zero shadow mapping just after setting a stack pointer and before calling early_setup(). The memory layout is now as follows +------------------------+ Kernel virtual map end (0xc000200000000000) | | | 16TB of KASAN map | | | +------------------------+ Kernel KASAN shadow map start | | | 16TB of IO map | | | +------------------------+ Kernel IO map start | | | 16TB of vmemmap | | | +------------------------+ Kernel vmemmap start | | | 16TB of vmap | | | +------------------------+ Kernel virt start (0xc000100000000000) | | | 64TB of linear mem | | | +------------------------+ Kernel linear (0xc.....) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0bef8beda27baf71e3b9e8b13e620fba6e19499b.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/kasan/Makefile | 1 + arch/powerpc/mm/kasan/init_book3e_64.c | 133 +++++++++++++++++++++++++++++++++ arch/powerpc/mm/kasan/init_book3s_64.c | 2 + 3 files changed, 136 insertions(+) create mode 100644 arch/powerpc/mm/kasan/init_book3e_64.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile index 4999aadb1867..699eeffd9f55 100644 --- a/arch/powerpc/mm/kasan/Makefile +++ b/arch/powerpc/mm/kasan/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_PPC32) += init_32.o obj-$(CONFIG_PPC_8xx) += 8xx.o obj-$(CONFIG_PPC_BOOK3S_32) += book3s_32.o obj-$(CONFIG_PPC_BOOK3S_64) += init_book3s_64.o +obj-$(CONFIG_PPC_BOOK3E_64) += init_book3e_64.o diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c new file mode 100644 index 000000000000..11519e88dc6b --- /dev/null +++ b/arch/powerpc/mm/kasan/init_book3e_64.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KASAN for 64-bit Book3e powerpc + * + * Copyright 2022, Christophe Leroy, CS GROUP France + */ + +#define DISABLE_BRANCH_PROFILING + +#include +#include +#include +#include + +#include + +static inline bool kasan_pud_table(p4d_t p4d) +{ + return p4d_page(p4d) == virt_to_page(lm_alias(kasan_early_shadow_pud)); +} + +static inline bool kasan_pmd_table(pud_t pud) +{ + return pud_page(pud) == virt_to_page(lm_alias(kasan_early_shadow_pmd)); +} + +static inline bool kasan_pte_table(pmd_t pmd) +{ + return pmd_page(pmd) == virt_to_page(lm_alias(kasan_early_shadow_pte)); +} + +static int __init kasan_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pgdp = pgd_offset_k(ea); + p4dp = p4d_offset(pgdp, ea); + if (kasan_pud_table(*p4dp)) { + pudp = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); + memcpy(pudp, kasan_early_shadow_pud, PUD_TABLE_SIZE); + p4d_populate(&init_mm, p4dp, pudp); + } + pudp = pud_offset(p4dp, ea); + if (kasan_pmd_table(*pudp)) { + pmdp = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); + memcpy(pmdp, kasan_early_shadow_pmd, PMD_TABLE_SIZE); + pud_populate(&init_mm, pudp, pmdp); + } + pmdp = pmd_offset(pudp, ea); + if (kasan_pte_table(*pmdp)) { + ptep = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); + memcpy(ptep, kasan_early_shadow_pte, PTE_TABLE_SIZE); + pmd_populate_kernel(&init_mm, pmdp, ptep); + } + ptep = pte_offset_kernel(pmdp, ea); + + __set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot), 0); + + return 0; +} + +static void __init kasan_init_phys_region(void *start, void *end) +{ + unsigned long k_start, k_end, k_cur; + void *va; + + if (start >= end) + return; + + k_start = ALIGN_DOWN((unsigned long)kasan_mem_to_shadow(start), PAGE_SIZE); + k_end = ALIGN((unsigned long)kasan_mem_to_shadow(end), PAGE_SIZE); + + va = memblock_alloc(k_end - k_start, PAGE_SIZE); + for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE, va += PAGE_SIZE) + kasan_map_kernel_page(k_cur, __pa(va), PAGE_KERNEL); +} + +void __init kasan_early_init(void) +{ + int i; + unsigned long addr; + pgd_t *pgd = pgd_offset_k(KASAN_SHADOW_START); + pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL); + + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); + + for (i = 0; i < PTRS_PER_PTE; i++) + __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page, + &kasan_early_shadow_pte[i], zero_pte, 0); + + for (i = 0; i < PTRS_PER_PMD; i++) + pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i], + kasan_early_shadow_pte); + + for (i = 0; i < PTRS_PER_PUD; i++) + pud_populate(&init_mm, &kasan_early_shadow_pud[i], + kasan_early_shadow_pmd); + + for (addr = KASAN_SHADOW_START; addr != KASAN_SHADOW_END; addr += PGDIR_SIZE) + p4d_populate(&init_mm, p4d_offset(pgd++, addr), kasan_early_shadow_pud); +} + +void __init kasan_init(void) +{ + phys_addr_t start, end; + u64 i; + pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO); + + for_each_mem_range(i, &start, &end) + kasan_init_phys_region((void *)start, (void *)end); + + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) + kasan_remove_zero_shadow((void *)VMALLOC_START, VMALLOC_SIZE); + + for (i = 0; i < PTRS_PER_PTE; i++) + __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page, + &kasan_early_shadow_pte[i], zero_pte, 0); + + flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + + /* Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KASAN init done\n"); +} + +void __init kasan_late_init(void) { } diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c index 0da5566d6b84..9300d641cf9a 100644 --- a/arch/powerpc/mm/kasan/init_book3s_64.c +++ b/arch/powerpc/mm/kasan/init_book3s_64.c @@ -99,4 +99,6 @@ void __init kasan_init(void) pr_info("KASAN init done\n"); } +void __init kasan_early_init(void) { } + void __init kasan_late_init(void) { } -- cgit v1.2.3 From 2b461880c20777d317b4ad24ef040918860133ca Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 18 Jul 2022 19:51:58 +1000 Subject: powerpc: Fix all occurences of duplicate words Since commit 87c78b612f4f ("powerpc: Fix all occurences of "the the"") fixed "the the", there's now a steady stream of patches fixing other duplicate words. Just fix them all at once, to save the overhead of dealing with individual patches for each case. This leaves a few cases of "that that", which in some contexts is correct. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220718095158.326606-1-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/hash_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index fc92613dc2bf..363a9447d63d 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -408,7 +408,7 @@ repeat: ssize); if (ret == -1) { /* - * Try to to keep bolted entries in primary. + * Try to keep bolted entries in primary. * Remove non bolted entries and try insert again */ ret = mmu_hash_ops.hpte_remove(hpteg); -- cgit v1.2.3 From abf0878ce95f8a9b47d8ecf2de1d4617bec21711 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 25 May 2022 12:23:56 +1000 Subject: powerpc/64s: POWER10 nest MMU does not require flush escalation workaround Per (non-public) Nest MMU Workbook, POWER10 and POWER9P NMMU does not cache PTEs in PWC, so does not require PWC flush to invalidate these translations. Skip the workaround on POWER10 and later. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525022358.780745-2-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index dda51fef2d2e..4e29b619578c 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -755,10 +755,18 @@ EXPORT_SYMBOL(radix__local_flush_tlb_page); static bool mm_needs_flush_escalation(struct mm_struct *mm) { /* - * P9 nest MMU has issues with the page walk cache - * caching PTEs and not flushing them properly when - * RIC = 0 for a PID/LPID invalidate + * The P9 nest MMU has issues with the page walk cache caching PTEs + * and not flushing them when RIC = 0 for a PID/LPID invalidate. + * + * This may have been fixed in shipping firmware (by disabling PWC + * or preventing it from caching PTEs), but until that is confirmed, + * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes + * to RIC=2. + * + * POWER10 (and P9P) does not have this problem. */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return false; if (atomic_read(&mm->context.copros) > 0) return true; return false; -- cgit v1.2.3 From 2a8a0f420f74425bf5f80760fd14d7a2c3abb87d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 25 May 2022 12:23:57 +1000 Subject: powerpc/64s: POWER10 nest MMU can upgrade PTE access authority without TLB flush The nest MMU in POWER9 does not re-fetch the PTE in response to permission mismatch, contrary to the architecture[*] and unlike the core MMU. This requires a TLB flush before upgrading permissions of valid PTEs, for any address space with a coprocessor attached. Per (non-public) Nest MMU Workbook, POWER10 nest MMU conforms to the architecture in this regard, so skip the workaround. [*] See: Power ISA Version 3.1B, 6.10.1.2 Modifying a Translation Table Entry, Setting a Reference or Change Bit or Upgrading Access Authority (PTE Subject to Atomic Hardware Updates): "If the only change being made to a valid PTE that is subject to atomic hardware updates is to set the Reference or Change bit to 1 or to upgrade access authority, a simpler sequence suffices because the translation hardware will refetch the PTE if an access is attempted for which the only problems were reference and/or change bits needing to be set or insufficient access authority." Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525022358.780745-3-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 10 ++++---- arch/powerpc/mm/book3s64/radix_pgtable.c | 35 +++++++++++++++++----------- 2 files changed, 28 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c index d2fb776febb4..5e3195568525 100644 --- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c @@ -48,11 +48,13 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; /* - * To avoid NMMU hang while relaxing access we need to flush the tlb before - * we set the new value. + * POWER9 NMMU must flush the TLB after clearing the PTE before + * installing a PTE with more relaxed access permissions, see + * radix__ptep_set_access_flags. */ - if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && - (atomic_read(&mm->context.copros) > 0)) + if (!cpu_has_feature(CPU_FTR_ARCH_31) && + is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + atomic_read(&mm->context.copros) > 0) radix__flush_hugetlb_page(vma, addr); set_huge_pte_at(vma->vm_mm, addr, ptep, pte); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index db2f3d193448..698274109c91 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1018,16 +1018,21 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, unsigned long change = pte_val(entry) ^ pte_val(*ptep); /* - * To avoid NMMU hang while relaxing access, we need mark - * the pte invalid in between. + * On POWER9, the NMMU is not able to relax PTE access permissions + * for a translation with a TLB. The PTE must be invalidated, TLB + * flushed before the new PTE is installed. + * + * This only needs to be done for radix, because hash translation does + * flush when updating the linux pte (and we don't support NMMU + * accelerators on HPT on POWER9 anyway XXX: do we?). + * + * POWER10 (and P9P) NMMU does behave as per ISA. */ - if ((change & _PAGE_RW) && atomic_read(&mm->context.copros) > 0) { + if (!cpu_has_feature(CPU_FTR_ARCH_31) && (change & _PAGE_RW) && + atomic_read(&mm->context.copros) > 0) { unsigned long old_pte, new_pte; old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID); - /* - * new value of pte - */ new_pte = old_pte | set; radix__flush_tlb_page_psize(mm, address, psize); __radix_pte_update(ptep, _PAGE_INVALID, new_pte); @@ -1035,9 +1040,12 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, __radix_pte_update(ptep, 0, set); /* * Book3S does not require a TLB flush when relaxing access - * restrictions when the address space is not attached to a - * NMMU, because the core MMU will reload the pte after taking - * an access fault, which is defined by the architecture. + * restrictions when the address space (modulo the POWER9 nest + * MMU issue above) because the MMU will reload the PTE after + * taking an access fault, as defined by the architecture. See + * "Setting a Reference or Change Bit or Upgrading Access + * Authority (PTE Subject to Atomic Hardware Updates)" in + * Power ISA Version 3.1B. */ } /* See ptesync comment in radix__set_pte_at */ @@ -1050,11 +1058,12 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; /* - * To avoid NMMU hang while relaxing access we need to flush the tlb before - * we set the new value. We need to do this only for radix, because hash - * translation does flush when updating the linux pte. + * POWER9 NMMU must flush the TLB after clearing the PTE before + * installing a PTE with more relaxed access permissions, see + * radix__ptep_set_access_flags. */ - if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + if (!cpu_has_feature(CPU_FTR_ARCH_31) && + is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && (atomic_read(&mm->context.copros) > 0)) radix__flush_tlb_page(vma, addr); -- cgit v1.2.3 From 980bbf7ca72012d317617fcdbfabe8708e4cef29 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 14 Jun 2022 12:34:08 +0200 Subject: powerpc/32: Call mmu_mark_initmem_nx() regardless of data block mapping. mark_initmem_nx() calls either mmu_mark_initmem_nx() or set_memory_attr() based on return from v_block_mapped() of _sinittext. But we can now handle text and data independently, so that text may be mapped by block even when data is mapped by pages. On the 8xx for instance, at startup 32Mbytes of memory are pinned in TLB. So the pinned entries need to go away for sinittext. In next patch a BAT will be set to also covers sinittext on book3s/32. So it will also be needed to call mmu_mark_initmem_nx() even when data above sinittext is not mapped with BATs. As this is highly dependent on the platform, call mmu_mark_initmem_nx() regardless of data block mapping. Then the platform will know what to do. Modify 8xx mmu_mark_initmem_nx() so that inittext mapping is modified only when pagealloc debug and kfence are not active, otherwise inittext is mapped with standard pages. And don't do anything on kernel text which is already mapped with PAGE_KERNEL_TEXT. Fixes: da1adea07576 ("powerpc/8xx: Allow STRICT_KERNEL_RwX with pinned TLB") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/db3fc14f3bfa6215b0786ef58a6e2bc1e1f964d7.1655202804.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/8xx.c | 4 ++-- arch/powerpc/mm/pgtable_32.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 6b668ccef836..dbbfe897455d 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -170,8 +170,8 @@ void mmu_mark_initmem_nx(void) unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); - mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); + if (!debug_pagealloc_enabled_or_kfence()) + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); mmu_pin_tlb(block_mapped_ram, false); } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index a56ade39dc68..3ac73f9fb5d5 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -135,9 +135,9 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_sinittext)) { - mmu_mark_initmem_nx(); - } else { + mmu_mark_initmem_nx(); + + if (!v_block_mapped((unsigned long)_sinittext)) { set_memory_nx((unsigned long)_sinittext, numpages); set_memory_rw((unsigned long)_sinittext, numpages); } -- cgit v1.2.3 From 2a0fb3c155c97c75176e557d61f8e66c1bd9b735 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 14 Jun 2022 12:34:09 +0200 Subject: powerpc/32: Set an IBAT covering up to _einittext during init Always set an IBAT covering up to _einittext during init because when CONFIG_MODULES is not selected there is no reason to have an exception handler for kernel instruction TLB misses. It implies DBAT and IBAT are now totaly independent, IBATs are set by setibat() and DBAT by setbat(). This allows to revert commit 9bb162fa26ed ("powerpc/603: Fix boot failure with DEBUG_PAGEALLOC and KFENCE") Reported-by: Maxime Bizon Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ce7f04a39593934d9b1ee68c69144ccd3d4da4a1.1655202804.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 1794132db31e..a96b73006dfb 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -159,7 +159,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long done; unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; + unsigned long size; + size = roundup_pow_of_two((unsigned long)_einittext - PAGE_OFFSET); + setibat(0, PAGE_OFFSET, 0, size, PAGE_KERNEL_X); if (debug_pagealloc_enabled_or_kfence()) { pr_debug_once("Read-Write memory mapped without BATs\n"); @@ -245,10 +248,9 @@ void mmu_mark_rodata_ro(void) } /* - * Set up one of the I/D BAT (block address translation) register pairs. + * Set up one of the D BAT (block address translation) register pairs. * The parameters are not checked; in particular size must be a power * of 2 between 128k and 256M. - * On 603+, only set IBAT when _PAGE_EXEC is set */ void __init setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot) @@ -284,10 +286,6 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, /* G bit must be zero in IBATs */ flags &= ~_PAGE_EXEC; } - if (flags & _PAGE_EXEC) - bat[0] = bat[1]; - else - bat[0].batu = bat[0].batl = 0; bat_addrs[index].start = virt; bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1; -- cgit v1.2.3 From 6042a1652d643d1d34fa89bb314cb102960c0800 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 1 Jul 2022 08:06:15 +0200 Subject: powerpc/32s: Fix boot failure with KASAN + SMP + JUMP_LABEL_FEATURE_CHECK_DEBUG Since commit 4291d085b0b0 ("powerpc/32s: Make pte_update() non atomic on 603 core"), pte_update() has been using mmu_has_feature(MMU_FTR_HPTE_TABLE) to avoid a useless atomic operation on 603 cores. When kasan_early_init() sets up the early zero shadow, it uses __set_pte_at(). On book3s/32, __set_pte_at() calls pte_update() when CONFIG_SMP is selected in order to ensure the preservation of _PAGE_HASHPTE in case of concurrent update of the PTE. But that's too early for mmu_has_feature(), so when CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG is selected, mmu_has_feature() calls printk(). That's too early to call printk() because KASAN early zero shadow page is not set up yet. It leads to a deadlock. However, when kasan_early_init() is called, there is only one CPU running and no risk of concurrent PTE update. So __set_pte_at() can be called with the 'percpu' flag. With that flag set, the PTE is written directly instead of being written via pte_update(). Fixes: 4291d085b0b0 ("powerpc/32s: Make pte_update() non atomic on 603 core") Reported-by: Erhard Furtner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2ee707512b8b212b079b877f4ceb525a1606a3fb.1656655567.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/kasan/init_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index f3e4d069e0ba..a70828a6d935 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -25,7 +25,7 @@ static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot) int i; for (i = 0; i < PTRS_PER_PTE; i++, ptep++) - __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); + __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 1); } int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) -- cgit v1.2.3 From 2255411d1d0f0661d1e5acd5f6edf4e6652a345a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Jul 2022 16:19:32 +0200 Subject: powerpc/44x: Fix build failure with GCC 12 (unrecognized opcode: `wrteei') Building ppc40x_defconfig leads to following error CC arch/powerpc/kernel/idle.o {standard input}: Assembler messages: {standard input}:67: Error: unrecognized opcode: `wrteei' {standard input}:78: Error: unrecognized opcode: `wrteei' Add -mcpu=440 by default and alternatively 464 and 476. Once that's done, -mcpu=powerpc is only for book3s/32 now. But then comes CC arch/powerpc/kernel/io.o {standard input}: Assembler messages: {standard input}:198: Error: unrecognized opcode: `eieio' {standard input}:230: Error: unrecognized opcode: `eieio' {standard input}:245: Error: unrecognized opcode: `eieio' {standard input}:254: Error: unrecognized opcode: `eieio' {standard input}:273: Error: unrecognized opcode: `eieio' {standard input}:396: Error: unrecognized opcode: `eieio' {standard input}:404: Error: unrecognized opcode: `eieio' {standard input}:423: Error: unrecognized opcode: `eieio' {standard input}:512: Error: unrecognized opcode: `eieio' {standard input}:520: Error: unrecognized opcode: `eieio' {standard input}:539: Error: unrecognized opcode: `eieio' {standard input}:628: Error: unrecognized opcode: `eieio' {standard input}:636: Error: unrecognized opcode: `eieio' {standard input}:655: Error: unrecognized opcode: `eieio' Fix it by replacing eieio by mbar on booke. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b0d982e223314ed82ab959f5d4ad2c4c00bedb99.1657549153.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/tlb_low.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index dd39074de9af..d62b613a0d5d 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -186,7 +186,7 @@ _GLOBAL(_tlbivax_bcast) isync PPC_TLBIVAX(0, R3) isync - eieio + mbar tlbsync BEGIN_FTR_SECTION b 1f @@ -355,7 +355,7 @@ _GLOBAL(_tlbivax_bcast) rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND 1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */ PPC_TLBIVAX(0,R3) - eieio + mbar tlbsync sync wrtee r10 -- cgit v1.2.3