From 88ec6b93c8e7d6d4ffaf6ad6395ceb3bf552de15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 10 Apr 2019 19:04:33 +0200 Subject: powerpc/xive: add OPAL extensions for the XIVE native exploitation support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The support for XIVE native exploitation mode in Linux/KVM needs a couple more OPAL calls to get and set the state of the XIVE internal structures being used by a sPAPR guest. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 7 ++++--- arch/powerpc/include/asm/opal.h | 7 +++++++ arch/powerpc/include/asm/xive.h | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 870fb7b239ea..e1d118ac61dc 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -186,8 +186,8 @@ #define OPAL_XIVE_FREE_IRQ 140 #define OPAL_XIVE_SYNC 141 #define OPAL_XIVE_DUMP 142 -#define OPAL_XIVE_RESERVED3 143 -#define OPAL_XIVE_RESERVED4 144 +#define OPAL_XIVE_GET_QUEUE_STATE 143 +#define OPAL_XIVE_SET_QUEUE_STATE 144 #define OPAL_SIGNAL_SYSTEM_RESET 145 #define OPAL_NPU_INIT_CONTEXT 146 #define OPAL_NPU_DESTROY_CONTEXT 147 @@ -210,7 +210,8 @@ #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 #define OPAL_NX_COPROC_INIT 167 -#define OPAL_LAST 167 +#define OPAL_XIVE_GET_VP_STATE 170 +#define OPAL_LAST 170 #define QUIESCE_HOLD 1 /* Spin all calls at entry */ #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a55b01c90bb1..4e978d4dea5c 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -279,6 +279,13 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id); int64_t opal_xive_free_irq(uint32_t girq); int64_t opal_xive_sync(uint32_t type, uint32_t id); int64_t opal_xive_dump(uint32_t type, uint32_t id); +int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio, + __be32 *out_qtoggle, + __be32 *out_qindex); +int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio, + uint32_t qtoggle, + uint32_t qindex); +int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01); int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target, uint64_t desc, uint16_t pe_number); diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 3c704f5dd3ae..b579a943407b 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -109,12 +109,26 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); extern void xive_native_sync_source(u32 hw_irq); +extern void xive_native_sync_queue(u32 hw_irq); extern bool is_xive_irq(struct irq_chip *chip); extern int xive_native_enable_vp(u32 vp_id, bool single_escalation); extern int xive_native_disable_vp(u32 vp_id); extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); extern bool xive_native_has_single_escalation(void); +extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio, + u64 *out_qpage, + u64 *out_qsize, + u64 *out_qeoi_page, + u32 *out_escalate_irq, + u64 *out_qflags); + +extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle, + u32 *qindex); +extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle, + u32 qindex); +extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state); + #else static inline bool xive_enabled(void) { return false; } -- cgit v1.2.3 From eea86aa4171d4960f0fcdc99dab358c224d53ffe Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 14 Mar 2019 23:54:53 +1100 Subject: powerpc/mm/64: Document the sizes of/sizes mapped by Pxx_INDEX_SIZE Add comments describing the size in bytes of the various levels of the page table tree, and the size of the virtual address space mapped by each level, to make it clear what the sizes are without having to also look up other definitions. The code that calculates the sizes actually uses sizeof(pgd_t) etc., so in theory these comments could skew vs the code, but the size of pgd_t etc. is unlikely to change very often. Signed-off-by: Michael Ellerman Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 8 ++++---- arch/powerpc/include/asm/book3s/64/hash-64k.h | 9 +++++---- arch/powerpc/include/asm/book3s/64/radix-4k.h | 9 +++++---- arch/powerpc/include/asm/book3s/64/radix-64k.h | 8 ++++---- 4 files changed, 18 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index cf5ba5254299..54fab723a8c7 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -2,10 +2,10 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H #define _ASM_POWERPC_BOOK3S_64_HASH_4K_H -#define H_PTE_INDEX_SIZE 9 -#define H_PMD_INDEX_SIZE 7 -#define H_PUD_INDEX_SIZE 9 -#define H_PGD_INDEX_SIZE 9 +#define H_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 4KB = 2MB +#define H_PMD_INDEX_SIZE 7 // size: 8B << 7 = 1KB, maps: 2^7 x 2MB = 256MB +#define H_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 256MB = 128GB +#define H_PGD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 128GB = 64TB /* * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index f82ee8a3b561..81f4eb6e7da4 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -2,10 +2,11 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H -#define H_PTE_INDEX_SIZE 8 -#define H_PMD_INDEX_SIZE 10 -#define H_PUD_INDEX_SIZE 10 -#define H_PGD_INDEX_SIZE 8 +#define H_PTE_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 64KB = 16MB +#define H_PMD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16MB = 16GB +#define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB +#define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB + /* * Each context is 512TB size. SLB miss for first context/default context diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h index 863c3e8286fb..d5f5ab73dc7f 100644 --- a/arch/powerpc/include/asm/book3s/64/radix-4k.h +++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h @@ -5,10 +5,11 @@ /* * For 4K page size supported index is 13/9/9/9 */ -#define RADIX_PTE_INDEX_SIZE 9 /* 2MB huge page */ -#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */ -#define RADIX_PUD_INDEX_SIZE 9 -#define RADIX_PGD_INDEX_SIZE 13 +#define RADIX_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 4K = 2MB +#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB +#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB +#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB + /* * One fragment per per page */ diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h index ccb78ca9d0c5..54e33828b0fb 100644 --- a/arch/powerpc/include/asm/book3s/64/radix-64k.h +++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h @@ -5,10 +5,10 @@ /* * For 64K page size supported index is 13/9/9/5 */ -#define RADIX_PTE_INDEX_SIZE 5 /* 2MB huge page */ -#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */ -#define RADIX_PUD_INDEX_SIZE 9 -#define RADIX_PGD_INDEX_SIZE 13 +#define RADIX_PTE_INDEX_SIZE 5 // size: 8B << 5 = 256B, maps 2^5 x 64K = 2MB +#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB +#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB +#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB /* * We use a 256 byte PTE page fragment in radix -- cgit v1.2.3 From f172acbfae1a78b1a3c775f78e8d0dcd15b9d768 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Wed, 13 Mar 2019 11:25:28 +0100 Subject: powerpc/mm: move warning from resize_hpt_for_hotplug() resize_hpt_for_hotplug() reports a warning when it cannot resize the hash page table ("Unable to resize hash page table to target order") but in some cases it's not a problem and can make user thinks something has not worked properly. This patch moves the warning to arch_remove_memory() to only report the problem when it is needed. Reviewed-by: David Gibson Signed-off-by: Laurent Vivier Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sparsemem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 68da49320592..3192d454a733 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -17,9 +17,9 @@ extern int create_section_mapping(unsigned long start, unsigned long end, int ni extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_PPC_BOOK3S_64 -extern void resize_hpt_for_hotplug(unsigned long new_mem_size); +extern int resize_hpt_for_hotplug(unsigned long new_mem_size); #else -static inline void resize_hpt_for_hotplug(unsigned long new_mem_size) { } +static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; } #endif #ifdef CONFIG_NUMA -- cgit v1.2.3 From bff25143da0d623a1765bf78dbc82044e46da5a4 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 7 Mar 2019 09:40:31 -0500 Subject: powerpc/mm: Silence unused-but-set-variable warnings pte_unmap() compiles away on some powerpc platforms, so silence the warnings below by making it a static inline function. mm/memory.c: In function 'copy_pte_range': mm/memory.c:820:24: warning: variable 'orig_dst_pte' set but not used mm/memory.c:820:9: warning: variable 'orig_src_pte' set but not used mm/madvise.c: In function 'madvise_free_pte_range': mm/madvise.c:318:9: warning: variable 'orig_pte' set but not used mm/swap_state.c: In function 'swap_ra_info': mm/swap_state.c:634:15: warning: variable 'orig_pte' set but not used Suggested-by: Christophe Leroy Signed-off-by: Qian Cai Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 3 ++- arch/powerpc/include/asm/nohash/64/pgtable.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 581f91be9dd4..e3d18b3f6e5d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -992,7 +992,8 @@ extern struct page *pgd_page(pgd_t pgd); (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while(0) + +static inline void pte_unmap(pte_t *pte) { } /* to find an entry in a kernel page-table-directory */ /* This now only contains the vmalloc pages */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index e77ed9761632..0384a3302fb6 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -205,7 +205,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while(0) + +static inline void pte_unmap(pte_t *pte) { } /* to find an entry in a kernel page-table-directory */ /* This now only contains the vmalloc pages */ -- cgit v1.2.3 From 7f177f9810ada8ec2e8b378eddbe2d91fda79c9b Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Mon, 15 Apr 2019 15:35:44 +0530 Subject: powerpc/pseries: hwpoison the pages upon hitting UE Add support to hwpoison the pages upon hitting machine check exception. This patch queues the address where UE is hit to percpu array and schedules work to plumb it into memory poison infrastructure. Reviewed-by: Mahesh Salgaonkar Signed-off-by: Ganesh Goudar [mpe: Combine #ifdefs, drop PPC_BIT8(), and empty inline stub] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mce.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 17996bc9382b..ad47fa865324 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -210,6 +210,7 @@ extern void release_mce_event(void); extern void machine_check_queue_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest); +unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ -- cgit v1.2.3 From c1fe190c06723322f2dfac31d3b982c581e434ef Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 1 Apr 2019 17:03:12 +1100 Subject: powerpc: Add force enable of DAWR on P9 option This adds a flag so that the DAWR can be enabled on P9 via: echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous The DAWR was previously force disabled on POWER9 in: 9654153158 powerpc: Disable DAWR in the base POWER9 CPU features Also see Documentation/powerpc/DAWR-POWER9.txt This is a dangerous setting, USE AT YOUR OWN RISK. Some users may not care about a bad user crashing their box (ie. single user/desktop systems) and really want the DAWR. This allows them to force enable DAWR. This flag can also be used to disable DAWR access. Once this is cleared, all DAWR access should be cleared immediately and your machine once again safe from crashing. Userspace may get confused by toggling this. If DAWR is force enabled/disabled between getting the number of breakpoints (via PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an inconsistent view of what's available. Similarly for guests. For the DAWR to be enabled in a KVM guest, the DAWR needs to be force enabled in the host AND the guest. For this reason, this won't work on POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the dawr_enable_dangerous file will fail if the hypervisor doesn't support writing the DAWR. To double check the DAWR is working, run this kernel selftest: tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c Any errors/failures/skips mean something is wrong. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_breakpoint.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index ece4dc89c90b..0fe8c1e46bbc 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -90,10 +90,18 @@ static inline void hw_breakpoint_disable(void) extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); int hw_breakpoint_handler(struct die_args *args); +extern int set_dawr(struct arch_hw_breakpoint *brk); +extern bool dawr_force_enable; +static inline bool dawr_enabled(void) +{ + return dawr_force_enable; +} + #else /* CONFIG_HAVE_HW_BREAKPOINT */ static inline void hw_breakpoint_disable(void) { } static inline void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) { } +static inline bool dawr_enabled(void) { return false; } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* __KERNEL__ */ #endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */ -- cgit v1.2.3 From 69795cabe4cfe5122438d50010ad5310c113a013 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 18 Apr 2019 16:51:18 +1000 Subject: powerpc: Add framework for Kernel Userspace Protection This patch adds a skeleton for Kernel Userspace Protection functionnalities like Kernel Userspace Access Protection and Kernel Userspace Execution Prevention The subsequent implementation of KUAP for radix makes use of a MMU feature in order to patch out assembly when KUAP is disabled or unsupported. This won't work unless there's an entry point for KUP support before the feature magic happens, so for PPC64 setup_kup() is called early in setup. On PPC32, feature_fixup() is done too early to allow the same. Suggested-by: Russell Currey Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kup.h | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 arch/powerpc/include/asm/kup.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h new file mode 100644 index 000000000000..7a88b8b9b54d --- /dev/null +++ b/arch/powerpc/include/asm/kup.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_KUP_H_ +#define _ASM_POWERPC_KUP_H_ + +#ifndef __ASSEMBLY__ + +void setup_kup(void); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_KUP_H_ */ -- cgit v1.2.3 From 0fb1c25ab523614b056ace11be67aac8f8ccabb1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 18 Apr 2019 16:51:19 +1000 Subject: powerpc: Add skeleton for Kernel Userspace Execution Prevention This patch adds a skeleton for Kernel Userspace Execution Prevention. Then subarches implementing it have to define CONFIG_PPC_HAVE_KUEP and provide setup_kuep() function. Signed-off-by: Christophe Leroy [mpe: Don't split strings, use pr_crit_ratelimited()] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kup.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 7a88b8b9b54d..a2a959cb4e36 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -6,6 +6,12 @@ void setup_kup(void); +#ifdef CONFIG_PPC_KUEP +void setup_kuep(bool disabled); +#else +static inline void setup_kuep(bool disabled) { } +#endif /* CONFIG_PPC_KUEP */ + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_KUP_H_ */ -- cgit v1.2.3 From de78a9c42a790011f179bc94a7da3f5d8721f4cc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 18 Apr 2019 16:51:20 +1000 Subject: powerpc: Add a framework for Kernel Userspace Access Protection This patch implements a framework for Kernel Userspace Access Protection. Then subarches will have the possibility to provide their own implementation by providing setup_kuap() and allow/prevent_user_access(). Some platforms will need to know the area accessed and whether it is accessed from read, write or both. Therefore source, destination and size and handed over to the two functions. mpe: Rename to allow/prevent rather than unlock/lock, and add read/write wrappers. Drop the 32-bit code for now until we have an implementation for it. Add kuap to pt_regs for 64-bit as well as 32-bit. Don't split strings, use pr_crit_ratelimited(). Signed-off-by: Christophe Leroy Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/futex.h | 4 ++++ arch/powerpc/include/asm/kup.h | 32 ++++++++++++++++++++++++++++++++ arch/powerpc/include/asm/ptrace.h | 11 +++++++++-- arch/powerpc/include/asm/uaccess.h | 38 ++++++++++++++++++++++++++++++-------- 4 files changed, 75 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 88b38b37c21b..3a6aa57b9d90 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -35,6 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; + allow_write_to_user(uaddr, sizeof(*uaddr)); pagefault_disable(); switch (op) { @@ -62,6 +63,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, if (!ret) *oval = oldval; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } @@ -75,6 +77,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(uaddr, sizeof(u32))) return -EFAULT; + allow_write_to_user(uaddr, sizeof(*uaddr)); __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -95,6 +98,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = prev; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index a2a959cb4e36..4d78b9d8c99c 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -4,6 +4,8 @@ #ifndef __ASSEMBLY__ +#include + void setup_kup(void); #ifdef CONFIG_PPC_KUEP @@ -12,6 +14,36 @@ void setup_kuep(bool disabled); static inline void setup_kuep(bool disabled) { } #endif /* CONFIG_PPC_KUEP */ +#ifdef CONFIG_PPC_KUAP +void setup_kuap(bool disabled); +#else +static inline void setup_kuap(bool disabled) { } +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) { } +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) { } +#endif /* CONFIG_PPC_KUAP */ + +static inline void allow_read_from_user(const void __user *from, unsigned long size) +{ + allow_user_access(NULL, from, size); +} + +static inline void allow_write_to_user(void __user *to, unsigned long size) +{ + allow_user_access(to, NULL, size); +} + +static inline void prevent_read_from_user(const void __user *from, unsigned long size) +{ + prevent_user_access(NULL, from, size); +} + +static inline void prevent_write_to_user(void __user *to, unsigned long size) +{ + prevent_user_access(to, NULL, size); +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_KUP_H_ */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 64271e562fed..6f047730e642 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -52,10 +52,17 @@ struct pt_regs }; }; + union { + struct { #ifdef CONFIG_PPC64 - unsigned long ppr; - unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */ + unsigned long ppr; +#endif +#ifdef CONFIG_PPC_KUAP + unsigned long kuap; #endif + }; + unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */ + }; }; #endif diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 4d6d905e9138..76f34346b642 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * The fs value determines whether argument validity checking should be @@ -140,6 +141,7 @@ extern long __put_user_bad(void); #define __put_user_size(x, ptr, size, retval) \ do { \ retval = 0; \ + allow_write_to_user(ptr, size); \ switch (size) { \ case 1: __put_user_asm(x, ptr, retval, "stb"); break; \ case 2: __put_user_asm(x, ptr, retval, "sth"); break; \ @@ -147,6 +149,7 @@ do { \ case 8: __put_user_asm2(x, ptr, retval); break; \ default: __put_user_bad(); \ } \ + prevent_write_to_user(ptr, size); \ } while (0) #define __put_user_nocheck(x, ptr, size) \ @@ -239,6 +242,7 @@ do { \ __chk_user_ptr(ptr); \ if (size > sizeof(x)) \ (x) = __get_user_bad(); \ + allow_read_from_user(ptr, size); \ switch (size) { \ case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \ case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \ @@ -246,6 +250,7 @@ do { \ case 8: __get_user_asm2(x, ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ + prevent_read_from_user(ptr, size); \ } while (0) /* @@ -305,15 +310,21 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { - return __copy_tofrom_user(to, from, n); + unsigned long ret; + + allow_user_access(to, from, n); + ret = __copy_tofrom_user(to, from, n); + prevent_user_access(to, from, n); + return ret; } #endif /* __powerpc64__ */ static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long ret; if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; + ret = 1; switch (n) { case 1: @@ -338,14 +349,18 @@ static inline unsigned long raw_copy_from_user(void *to, } barrier_nospec(); - return __copy_tofrom_user((__force void __user *)to, from, n); + allow_read_from_user(from, n); + ret = __copy_tofrom_user((__force void __user *)to, from, n); + prevent_read_from_user(from, n); + return ret; } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { + unsigned long ret; if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; + ret = 1; switch (n) { case 1: @@ -365,17 +380,24 @@ static inline unsigned long raw_copy_to_user(void __user *to, return 0; } - return __copy_tofrom_user(to, (__force const void __user *)from, n); + allow_write_to_user(to, n); + ret = __copy_tofrom_user(to, (__force const void __user *)from, n); + prevent_write_to_user(to, n); + return ret; } extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) { + unsigned long ret = size; might_fault(); - if (likely(access_ok(addr, size))) - return __clear_user(addr, size); - return size; + if (likely(access_ok(addr, size))) { + allow_write_to_user(addr, size); + ret = __clear_user(addr, size); + prevent_write_to_user(addr, size); + } + return ret; } extern long strncpy_from_user(char *dst, const char __user *src, long count); -- cgit v1.2.3 From 890274c2dc4c0a57ae5a12d6a76fa6d05b599d98 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 18 Apr 2019 16:51:24 +1000 Subject: powerpc/64s: Implement KUAP for Radix MMU Kernel Userspace Access Prevention utilises a feature of the Radix MMU which disallows read and write access to userspace addresses. By utilising this, the kernel is prevented from accessing user data from outside of trusted paths that perform proper safety checks, such as copy_{to/from}_user() and friends. Userspace access is disabled from early boot and is only enabled when performing an operation like copy_{to/from}_user(). The register that controls this (AMR) does not prevent userspace from accessing itself, so there is no need to save and restore when entering and exiting userspace. When entering the kernel from the kernel we save AMR and if it is not blocking user access (because eg. we faulted doing a user access) we reblock user access for the duration of the exception (ie. the page fault) and then restore the AMR when returning back to the kernel. This feature can be tested by using the lkdtm driver (CONFIG_LKDTM=y) and performing the following: # (echo ACCESS_USERSPACE) > [debugfs]/provoke-crash/DIRECT If enabled, this should send SIGSEGV to the thread. We also add paranoid checking of AMR in switch and syscall return under CONFIG_PPC_KUAP_DEBUG. Co-authored-by: Michael Ellerman Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/kup-radix.h | 102 +++++++++++++++++++++++++ arch/powerpc/include/asm/exception-64s.h | 2 + arch/powerpc/include/asm/feature-fixups.h | 3 + arch/powerpc/include/asm/kup.h | 4 + arch/powerpc/include/asm/mmu.h | 10 ++- 5 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/book3s/64/kup-radix.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h new file mode 100644 index 000000000000..6d6628424134 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H +#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H + +#include + +#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000) +#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000) +#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) +#define AMR_KUAP_SHIFT 62 + +#ifdef __ASSEMBLY__ + +.macro kuap_restore_amr gpr +#ifdef CONFIG_PPC_KUAP + BEGIN_MMU_FTR_SECTION_NESTED(67) + ld \gpr, STACK_REGS_KUAP(r1) + mtspr SPRN_AMR, \gpr + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +.macro kuap_check_amr gpr1, gpr2 +#ifdef CONFIG_PPC_KUAP_DEBUG + BEGIN_MMU_FTR_SECTION_NESTED(67) + mfspr \gpr1, SPRN_AMR + li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) + sldi \gpr2, \gpr2, AMR_KUAP_SHIFT +999: tdne \gpr1, \gpr2 + EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr +#ifdef CONFIG_PPC_KUAP + BEGIN_MMU_FTR_SECTION_NESTED(67) + .ifnb \msr_pr_cr + bne \msr_pr_cr, 99f + .endif + mfspr \gpr1, SPRN_AMR + std \gpr1, STACK_REGS_KUAP(r1) + li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) + sldi \gpr2, \gpr2, AMR_KUAP_SHIFT + cmpd \use_cr, \gpr1, \gpr2 + beq \use_cr, 99f + // We don't isync here because we very recently entered via rfid + mtspr SPRN_AMR, \gpr2 + isync +99: + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +#else /* !__ASSEMBLY__ */ + +#ifdef CONFIG_PPC_KUAP + +#include + +/* + * We support individually allowing read or write, but we don't support nesting + * because that would require an expensive read/modify write of the AMR. + */ + +static inline void set_kuap(unsigned long value) +{ + if (!mmu_has_feature(MMU_FTR_RADIX_KUAP)) + return; + + /* + * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both + * before and after the move to AMR. See table 6 on page 1134. + */ + isync(); + mtspr(SPRN_AMR, value); + isync(); +} + +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + // This is written so we can resolve to a single case at build time + if (__builtin_constant_p(to) && to == NULL) + set_kuap(AMR_KUAP_BLOCK_WRITE); + else if (__builtin_constant_p(from) && from == NULL) + set_kuap(AMR_KUAP_BLOCK_READ); + else + set_kuap(0); +} + +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + set_kuap(AMR_KUAP_BLOCKED); +} + +#endif /* CONFIG_PPC_KUAP */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 937bb630093f..bef4e05a6823 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -497,6 +497,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) RESTORE_CTR(r1, area); \ b bad_stack; \ 3: EXCEPTION_PROLOG_COMMON_1(); \ + kuap_save_amr_and_lock r9, r10, cr1, cr0; \ beq 4f; /* if from kernel mode */ \ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ SAVE_PPR(area, r9); \ @@ -691,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) */ #define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \ EXCEPTION_PROLOG_COMMON_1(); \ + kuap_save_amr_and_lock r9, r10, cr1; \ EXCEPTION_PROLOG_COMMON_2(area); \ EXCEPTION_PROLOG_COMMON_3(trap); \ /* Volatile regs are potentially clobbered here */ \ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 40a6c9261a6b..f6fc31f8baff 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -100,6 +100,9 @@ label##5: \ #define END_MMU_FTR_SECTION(msk, val) \ END_MMU_FTR_SECTION_NESTED(msk, val, 97) +#define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \ + END_MMU_FTR_SECTION_NESTED((msk), (msk), label) + #define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk)) #define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0) diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 4d78b9d8c99c..d7312defbe1c 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -2,6 +2,10 @@ #ifndef _ASM_POWERPC_KUP_H_ #define _ASM_POWERPC_KUP_H_ +#ifdef CONFIG_PPC64 +#include +#endif + #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 8ddd4a91bdc1..38d21adfde40 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -107,6 +107,11 @@ */ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) +/* + * Supports KUAP (key 0 controlling userspace addresses) on radix + */ +#define MMU_FTR_RADIX_KUAP ASM_CONST(0x80000000) + /* MMU feature bit sets for various CPUs */ #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 @@ -164,7 +169,10 @@ enum { #endif #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | -#endif +#ifdef CONFIG_PPC_KUAP + MMU_FTR_RADIX_KUAP | +#endif /* CONFIG_PPC_KUAP */ +#endif /* CONFIG_PPC_RADIX_MMU */ 0, }; -- cgit v1.2.3 From 5e5be3aed23032d40d5ab7407f344f1a74f2765b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 18 Apr 2019 16:51:25 +1000 Subject: powerpc/mm: Detect bad KUAP faults When KUAP is enabled we have logic to detect page faults that occur outside of a valid user access region and are blocked by the AMR. What we don't have at the moment is logic to detect a fault *within* a valid user access region, that has been incorrectly blocked by AMR. This is not meant to ever happen, but it can if we incorrectly save/restore the AMR, or if the AMR was overwritten for some other reason. Currently if that happens we assume it's just a regular fault that will be corrected by handling the fault normally, so we just return. But there is nothing the fault handling code can do to fix it, so the fault just happens again and we spin forever, leading to soft lockups. So add some logic to detect that case and WARN() if we ever see it. Arguably it should be a BUG(), but it's more polite to fail the access and let the kernel continue, rather than taking down the box. There should be no data integrity issue with failing the fault rather than BUG'ing, as we're just going to disallow an access that should have been allowed. To make the code a little easier to follow, unroll the condition at the end of bad_kernel_fault() and comment each case, before adding the call to bad_kuap_fault(). Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/kup-radix.h | 6 ++++++ arch/powerpc/include/asm/kup.h | 1 + 2 files changed, 7 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 6d6628424134..7679bd0c5af0 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -95,6 +95,12 @@ static inline void prevent_user_access(void __user *to, const void __user *from, set_kuap(AMR_KUAP_BLOCKED); } +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +{ + return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && + (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), + "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); +} #endif /* CONFIG_PPC_KUAP */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index d7312defbe1c..28ad4654eed2 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -26,6 +26,7 @@ static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size) { } static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size) { } +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; } #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) -- cgit v1.2.3 From e2fb9f5444312fd01627c84a3e018c1fe8ac6ebb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Mar 2019 08:30:31 +0000 Subject: powerpc/32: Prepare for Kernel Userspace Access Protection This patch adds ASM macros for saving, restoring and checking the KUAP state, and modifies setup_32 to call them on exceptions from kernel. The macros are defined as empty by default for when CONFIG_PPC_KUAP is not selected and/or for platforms which don't handle (yet) KUAP. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kup.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 28ad4654eed2..7d8ad3d6729d 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -6,7 +6,20 @@ #include #endif -#ifndef __ASSEMBLY__ +#ifdef __ASSEMBLY__ +#ifndef CONFIG_PPC_KUAP +.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 +.endm + +.macro kuap_restore sp, current, gpr1, gpr2, gpr3 +.endm + +.macro kuap_check current, gpr +.endm + +#endif + +#else /* !__ASSEMBLY__ */ #include -- cgit v1.2.3 From c341a108a58100b4d0774ddb1dacbd67dfa749b3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Mar 2019 08:30:32 +0000 Subject: powerpc/8xx: Only define APG0 and APG1 Since the 8xx implements hardware page table walk assistance, the PGD entries always point to a 4k aligned page, so the 2 upper bits of the APG are not clobbered anymore and remain 0. Therefore only APG0 and APG1 are used and need a definition. We set the other APG to the lowest permission level. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 0a1a3fc54e54..fc5a653d5dd2 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -35,11 +35,11 @@ * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => No user => 01 (all accesses performed according to page definition) + * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * We define all 16 groups so that all other bits of APG can take any value + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) */ -#define MI_APG_INIT 0x44444444 +#define MI_APG_INIT 0x4fffffff /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -108,11 +108,11 @@ * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => No user => 01 (all accesses performed according to page definition) + * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * We define all 16 groups so that all other bits of APG can take any value + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) */ -#define MD_APG_INIT 0x44444444 +#define MD_APG_INIT 0x4fffffff /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in -- cgit v1.2.3 From 06fbe81b5909847aa13f9c86c2b6f9bbc5c2795b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Mar 2019 08:30:33 +0000 Subject: powerpc/8xx: Add Kernel Userspace Execution Prevention This patch adds Kernel Userspace Execution Prevention on the 8xx. When a page is Executable, it is set Executable for Key 0 and NX for Key 1. Up to now, the User group is defined with Key 0 for both User and Supervisor. By changing the group to Key 0 for User and Key 1 for Supervisor, this patch prevents the Kernel from being able to execute user code. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index fc5a653d5dd2..3cb743284e09 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -41,6 +41,13 @@ */ #define MI_APG_INIT 0x4fffffff +/* + * 0 => Kernel => 01 (all accesses performed according to page definition) + * 1 => User => 10 (all accesses performed according to swaped page definition) + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + */ +#define MI_APG_KUEP 0x6fffffff + /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in * this register are used to create the TLB entry. -- cgit v1.2.3 From 2679f9bd0abafb3044bcbaac0600b32159ac8bf2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Mar 2019 08:30:34 +0000 Subject: powerpc/8xx: Add Kernel Userspace Access Protection This patch adds Kernel Userspace Access Protection on the 8xx. When a page is RO or RW, it is set RO or RW for Key 0 and NA for Key 1. Up to now, the User group is defined with Key 0 for both User and Supervisor. By changing the group to Key 0 for User and Key 1 for Supervisor, this patch prevents the Kernel from being able to access user data. At exception entry, the kernel saves SPRN_MD_AP in the regs struct, and reapply the protection. At exception exit it restores SPRN_MD_AP with the value saved on exception entry. Signed-off-by: Christophe Leroy [mpe: Drop allow_read/write_to/from_user() as they're now in kup.h] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kup.h | 3 ++ arch/powerpc/include/asm/nohash/32/kup-8xx.h | 58 ++++++++++++++++++++++++++++ arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 7 ++++ 3 files changed, 68 insertions(+) create mode 100644 arch/powerpc/include/asm/nohash/32/kup-8xx.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 7d8ad3d6729d..043c800ec5fb 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -5,6 +5,9 @@ #ifdef CONFIG_PPC64 #include #endif +#ifdef CONFIG_PPC_8xx +#include +#endif #ifdef __ASSEMBLY__ #ifndef CONFIG_PPC_KUAP diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h new file mode 100644 index 000000000000..1c3133b5f86a --- /dev/null +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_KUP_8XX_H_ +#define _ASM_POWERPC_KUP_8XX_H_ + +#include + +#ifdef CONFIG_PPC_KUAP + +#ifdef __ASSEMBLY__ + +.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 + lis \gpr2, MD_APG_KUAP@h /* only APG0 and APG1 are used */ + mfspr \gpr1, SPRN_MD_AP + mtspr SPRN_MD_AP, \gpr2 + stw \gpr1, STACK_REGS_KUAP(\sp) +.endm + +.macro kuap_restore sp, current, gpr1, gpr2, gpr3 + lwz \gpr1, STACK_REGS_KUAP(\sp) + mtspr SPRN_MD_AP, \gpr1 +.endm + +.macro kuap_check current, gpr +#ifdef CONFIG_PPC_KUAP_DEBUG + mfspr \gpr, SPRN_MD_AP + rlwinm \gpr, \gpr, 16, 0xffff +999: twnei \gpr, MD_APG_KUAP@h + EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) +#endif +.endm + +#else /* !__ASSEMBLY__ */ + +#include + +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + mtspr(SPRN_MD_AP, MD_APG_INIT); +} + +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + mtspr(SPRN_MD_AP, MD_APG_KUAP); +} + +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +{ + return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), + "Bug: fault blocked by AP register !"); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* CONFIG_PPC_KUAP */ + +#endif /* _ASM_POWERPC_KUP_8XX_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 3cb743284e09..f620adef54fc 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -121,6 +121,13 @@ */ #define MD_APG_INIT 0x4fffffff +/* + * 0 => No user => 01 (all accesses performed according to page definition) + * 1 => User => 10 (all accesses performed according to swaped page definition) + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + */ +#define MD_APG_KUAP 0x6fffffff + /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in * this register are used to create the TLB entry. -- cgit v1.2.3 From 31ed2b13c48d779efc838ad54e30121e088a62af Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Mar 2019 08:30:35 +0000 Subject: powerpc/32s: Implement Kernel Userspace Execution Prevention. To implement Kernel Userspace Execution Prevention, this patch sets NX bit on all user segments on kernel entry and clears NX bit on all user segments on kernel exit. Note that powerpc 601 doesn't have the NX bit, so KUEP will not work on it. A warning is displayed at startup. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/kup.h | 42 +++++++++++++++++++++++++++ arch/powerpc/include/asm/book3s/32/mmu-hash.h | 3 ++ arch/powerpc/include/asm/kup.h | 3 ++ 3 files changed, 48 insertions(+) create mode 100644 arch/powerpc/include/asm/book3s/32/kup.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h new file mode 100644 index 000000000000..5f97c742ca71 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_32_KUP_H +#define _ASM_POWERPC_BOOK3S_32_KUP_H + +#include + +#ifdef __ASSEMBLY__ + +.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */ +101: mtsrin \gpr1, \gpr2 + addi \gpr1, \gpr1, 0x111 /* next VSID */ + rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ + addis \gpr2, \gpr2, 0x1000 /* address of next segment */ + bdnz 101b + isync +.endm + +.macro kuep_lock gpr1, gpr2 +#ifdef CONFIG_PPC_KUEP + li \gpr1, NUM_USER_SEGMENTS + li \gpr2, 0 + mtctr \gpr1 + mfsrin \gpr1, \gpr2 + oris \gpr1, \gpr1, SR_NX@h /* set Nx */ + kuep_update_sr \gpr1, \gpr2 +#endif +.endm + +.macro kuep_unlock gpr1, gpr2 +#ifdef CONFIG_PPC_KUEP + li \gpr1, NUM_USER_SEGMENTS + li \gpr2, 0 + mtctr \gpr1 + mfsrin \gpr1, \gpr2 + rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */ + kuep_update_sr \gpr1, \gpr2 +#endif +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_32_KUP_H */ diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 5cb588395fdc..8c5727a322b1 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -63,6 +63,9 @@ typedef pte_t *pgtable_t; #define PP_RWRW 2 /* Supervisor read/write, User read/write */ #define PP_RXRX 3 /* Supervisor read, User read */ +/* Values for Segment Registers */ +#define SR_NX 0x10000000 /* No Execute */ + #ifndef __ASSEMBLY__ /* diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 043c800ec5fb..5b5e39643a27 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -8,6 +8,9 @@ #ifdef CONFIG_PPC_8xx #include #endif +#ifdef CONFIG_PPC_BOOK3S_32 +#include +#endif #ifdef __ASSEMBLY__ #ifndef CONFIG_PPC_KUAP -- cgit v1.2.3 From f342adca3afc84c4ef648352440ed6331518d72d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Mar 2019 08:30:36 +0000 Subject: powerpc/32s: Prepare Kernel Userspace Access Protection This patch prepares Kernel Userspace Access Protection for book3s/32. Due to limitations of the processor page protection capabilities, the protection is only against writing. read protection cannot be achieved using page protection. book3s/32 provides the following values for PP bits: PP00 provides RW for Key 0 and NA for Key 1 PP01 provides RW for Key 0 and RO for Key 1 PP10 provides RW for all PP11 provides RO for all Today PP10 is used for RW pages and PP11 for RO pages, and user segment register's Kp and Ks are set to 1. This patch modifies page protection to use PP01 for RW pages and sets user segment registers to Kp 0 and Ks 0. This will allow to setup Userspace write access protection by settng Ks to 1 in the following patch. Kernel space segment registers remain unchanged. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/mmu-hash.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 8c5727a322b1..f9eae105a9f4 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -65,6 +65,8 @@ typedef pte_t *pgtable_t; /* Values for Segment Registers */ #define SR_NX 0x10000000 /* No Execute */ +#define SR_KP 0x20000000 /* User key */ +#define SR_KS 0x40000000 /* Supervisor key */ #ifndef __ASSEMBLY__ -- cgit v1.2.3 From a68c31fc01ef7863acc0fc74694bf279456a58c4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Mar 2019 08:30:38 +0000 Subject: powerpc/32s: Implement Kernel Userspace Access Protection This patch implements Kernel Userspace Access Protection for book3s/32. Due to limitations of the processor page protection capabilities, the protection is only against writing. read protection cannot be achieved using page protection. The previous patch modifies the page protection so that RW user pages are RW for Key 0 and RO for Key 1, and it sets Key 0 for both user and kernel. This patch changes userspace segment registers are set to Ku 0 and Ks 1. When kernel needs to write to RW pages, the associated segment register is then changed to Ks 0 in order to allow write access to the kernel. In order to avoid having the read all segment registers when locking/unlocking the access, some data is kept in the thread_struct and saved on stack on exceptions. The field identifies both the first unlocked segment and the first segment following the last unlocked one. When no segment is unlocked, it contains value 0. As the hash_page() function is not able to easily determine if a protfault is due to a bad kernel access to userspace, protfaults need to be handled by handle_page_fault when KUAP is set. Signed-off-by: Christophe Leroy [mpe: Drop allow_read/write_to/from_user() as they're now in kup.h, and adapt allow_user_access() to do nothing when to == NULL] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/kup.h | 103 +++++++++++++++++++++++++++++++ arch/powerpc/include/asm/processor.h | 3 + 2 files changed, 106 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 5f97c742ca71..677e9babef80 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -37,6 +37,109 @@ #endif .endm +#ifdef CONFIG_PPC_KUAP + +.macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */ +101: mtsrin \gpr1, \gpr2 + addi \gpr1, \gpr1, 0x111 /* next VSID */ + rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ + addis \gpr2, \gpr2, 0x1000 /* address of next segment */ + cmplw \gpr2, \gpr3 + blt- 101b + isync +.endm + +.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 + lwz \gpr2, KUAP(\thread) + rlwinm. \gpr3, \gpr2, 28, 0xf0000000 + stw \gpr2, STACK_REGS_KUAP(\sp) + beq+ 102f + li \gpr1, 0 + stw \gpr1, KUAP(\thread) + mfsrin \gpr1, \gpr2 + oris \gpr1, \gpr1, SR_KS@h /* set Ks */ + kuap_update_sr \gpr1, \gpr2, \gpr3 +102: +.endm + +.macro kuap_restore sp, current, gpr1, gpr2, gpr3 + lwz \gpr2, STACK_REGS_KUAP(\sp) + rlwinm. \gpr3, \gpr2, 28, 0xf0000000 + stw \gpr2, THREAD + KUAP(\current) + beq+ 102f + mfsrin \gpr1, \gpr2 + rlwinm \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */ + kuap_update_sr \gpr1, \gpr2, \gpr3 +102: +.endm + +.macro kuap_check current, gpr +#ifdef CONFIG_PPC_KUAP_DEBUG + lwz \gpr2, KUAP(thread) +999: twnei \gpr, 0 + EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) +#endif +.endm + +#endif /* CONFIG_PPC_KUAP */ + +#else /* !__ASSEMBLY__ */ + +#ifdef CONFIG_PPC_KUAP + +#include + +static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) +{ + barrier(); /* make sure thread.kuap is updated before playing with SRs */ + while (addr < end) { + mtsrin(sr, addr); + sr += 0x111; /* next VSID */ + sr &= 0xf0ffffff; /* clear VSID overflow */ + addr += 0x10000000; /* address of next segment */ + } + isync(); /* Context sync required after mtsrin() */ +} + +static inline void allow_user_access(void __user *to, const void __user *from, u32 size) +{ + u32 addr, end; + + if (__builtin_constant_p(to) && to == NULL) + return; + + addr = (__force u32)to; + + if (!addr || addr >= TASK_SIZE || !size) + return; + + end = min(addr + size, TASK_SIZE); + current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf); + kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ +} + +static inline void prevent_user_access(void __user *to, const void __user *from, u32 size) +{ + u32 addr = (__force u32)to; + u32 end = min(addr + size, TASK_SIZE); + + if (!addr || addr >= TASK_SIZE || !size) + return; + + current->thread.kuap = 0; + kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ +} + +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +{ + if (!is_write) + return false; + + return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !"); +} + +#endif /* CONFIG_PPC_KUAP */ + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_32_KUP_H */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 3351bcf42f2d..540949b397d4 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -163,6 +163,9 @@ struct thread_struct { #ifdef CONFIG_PPC_RTAS unsigned long rtas_sp; /* stack pointer for when in RTAS */ #endif +#endif +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) + unsigned long kuap; /* opened segments for user access */ #endif /* Debug Registers */ struct debug_reg debug; -- cgit v1.2.3 From 6161a37307f3320808b5a7549593b991500f2656 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 3 Apr 2019 11:35:14 +0530 Subject: powerpc/mm: Fix build error with FLATMEM book3s64 config The current value of MAX_PHYSMEM_BITS cannot work with 32 bit configs. We used to have MAX_PHYSMEM_BITS not defined without SPARSEMEM and 32 bit configs never expected a value to be set for MAX_PHYSMEM_BITS. Dependent code such as zsmalloc derived the right values based on other fields. Instead of finding a value that works with different configs, use new values only for book3s_64. For 64 bit booke, use the definition of MAX_PHYSMEM_BITS as per commit a7df61a0e2b6 ("[PATCH] ppc64: Increase sparsemem defaults") That change was done in 2005 and hopefully will work with book3e 64. Fixes: 8bc086899816 ("powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 15 +++++++++++++++ arch/powerpc/include/asm/mmu.h | 15 --------------- arch/powerpc/include/asm/nohash/64/mmu.h | 2 ++ 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 1ceee000c18d..a809bdd77322 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -35,6 +35,21 @@ typedef pte_t *pgtable_t; #endif /* __ASSEMBLY__ */ +/* + * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS + * if we increase SECTIONS_WIDTH we will not store node details in page->flags and + * page_to_nid does a page->section->node lookup + * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce + * memory requirements with large number of sections. + * 51 bits is the max physical real address on POWER9 + */ +#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ + defined(CONFIG_PPC_64K_PAGES) +#define MAX_PHYSMEM_BITS 51 +#else +#define MAX_PHYSMEM_BITS 46 +#endif + /* 64-bit classic hash table MMU */ #include diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 38d21adfde40..d86c5641bd97 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -349,21 +349,6 @@ static inline bool strict_kernel_rwx_enabled(void) */ #define MMU_PAGE_COUNT 16 -/* - * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS - * if we increase SECTIONS_WIDTH we will not store node details in page->flags and - * page_to_nid does a page->section->node lookup - * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce - * memory requirements with large number of sections. - * 51 bits is the max physical real address on POWER9 - */ -#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ - defined (CONFIG_PPC_64K_PAGES) -#define MAX_PHYSMEM_BITS 51 -#elif defined(CONFIG_PPC64) -#define MAX_PHYSMEM_BITS 46 -#endif - #ifdef CONFIG_PPC_BOOK3S_64 #include #else /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h index e6585480dfc4..81cf30c370e5 100644 --- a/arch/powerpc/include/asm/nohash/64/mmu.h +++ b/arch/powerpc/include/asm/nohash/64/mmu.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_NOHASH_64_MMU_H_ #define _ASM_POWERPC_NOHASH_64_MMU_H_ +#define MAX_PHYSMEM_BITS 44 + /* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ #include -- cgit v1.2.3 From 4f40b15f339d896f5726714842947c9339742494 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Apr 2019 18:33:47 +0530 Subject: powerpc/mm: Remove PPC_MM_SLICES #ifdef for book3s64 Book3s64 always have PPC_MM_SLICES enabled. So remove the unncessary #ifdef Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 4 ---- arch/powerpc/include/asm/book3s/64/slice.h | 13 ------------- 2 files changed, 17 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index a809bdd77322..afe10dd11c68 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -138,7 +138,6 @@ typedef struct { /* NPU NMMU context */ struct npu_context *npu_context; -#ifdef CONFIG_PPC_MM_SLICES /* SLB page size encodings*/ unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; @@ -151,9 +150,6 @@ typedef struct { struct slice_mask mask_16m; struct slice_mask mask_16g; # endif -#else - u16 sllp; /* SLB page size encoding */ -#endif unsigned long vdso_base; #ifdef CONFIG_PPC_SUBPAGE_PROT struct subpage_prot_table spt; diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h index db0dedab65ee..062e11136e9c 100644 --- a/arch/powerpc/include/asm/book3s/64/slice.h +++ b/arch/powerpc/include/asm/book3s/64/slice.h @@ -2,8 +2,6 @@ #ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H #define _ASM_POWERPC_BOOK3S_64_SLICE_H -#ifdef CONFIG_PPC_MM_SLICES - #define SLICE_LOW_SHIFT 28 #define SLICE_LOW_TOP (0x100000000ul) #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) @@ -13,15 +11,4 @@ #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) -#else /* CONFIG_PPC_MM_SLICES */ - -#define get_slice_psize(mm, addr) ((mm)->context.user_psize) -#define slice_set_user_psize(mm, psize) \ -do { \ - (mm)->context.user_psize = (psize); \ - (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ -} while (0) - -#endif /* CONFIG_PPC_MM_SLICES */ - #endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */ -- cgit v1.2.3 From 60458fba469a695a026334b364cf8adbcd5807e3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Apr 2019 18:33:48 +0530 Subject: powerpc/mm: Add helpers for accessing hash translation related variables We want to switch to allocating them runtime only when hash translation is enabled. Add helpers so that both book3s and nohash can be adapted to upcoming change easily. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 4 +- arch/powerpc/include/asm/book3s/64/mmu.h | 63 ++++++++++++++++++++++++++- arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 50 +++++++++++++++++++++ 3 files changed, 114 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index a28a28079edb..eb36fbfe4ef5 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -657,8 +657,8 @@ extern void slb_set_size(u16 size); /* 4 bits per slice and we have one slice per 1TB */ #define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41) -#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.slb_addr_limit >> 41) - +#define LOW_SLICE_ARRAY_SZ (BITS_PER_LONG / BITS_PER_BYTE) +#define TASK_SLICE_ARRAY_SZ(x) ((x)->slb_addr_limit >> 41) #ifndef __ASSEMBLY__ #ifdef CONFIG_PPC_SUBPAGE_PROT diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index afe10dd11c68..c9f317090620 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -139,7 +139,7 @@ typedef struct { struct npu_context *npu_context; /* SLB page size encodings*/ - unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; + unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ]; unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; unsigned long slb_addr_limit; # ifdef CONFIG_PPC_64K_PAGES @@ -174,6 +174,67 @@ typedef struct { #endif } mm_context_t; +static inline u16 mm_ctx_user_psize(mm_context_t *ctx) +{ + return ctx->user_psize; +} + +static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize) +{ + ctx->user_psize = user_psize; +} + +static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx) +{ + return ctx->low_slices_psize; +} + +static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx) +{ + return ctx->high_slices_psize; +} + +static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx) +{ + return ctx->slb_addr_limit; +} + +static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit) +{ + ctx->slb_addr_limit = limit; +} + +#ifdef CONFIG_PPC_64K_PAGES +static inline struct slice_mask *mm_ctx_slice_mask_64k(mm_context_t *ctx) +{ + return &ctx->mask_64k; +} +#endif + +static inline struct slice_mask *mm_ctx_slice_mask_4k(mm_context_t *ctx) +{ + return &ctx->mask_4k; +} + +#ifdef CONFIG_HUGETLB_PAGE +static inline struct slice_mask *mm_ctx_slice_mask_16m(mm_context_t *ctx) +{ + return &ctx->mask_16m; +} + +static inline struct slice_mask *mm_ctx_slice_mask_16g(mm_context_t *ctx) +{ + return &ctx->mask_16g; +} +#endif + +#ifdef CONFIG_PPC_SUBPAGE_PROT +static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx) +{ + return &ctx->spt; +} +#endif + /* * The current system page and segment sizes */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index f620adef54fc..c503e2f05e61 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -181,6 +181,7 @@ #ifdef CONFIG_PPC_MM_SLICES #include #define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) +#define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE #endif #ifndef __ASSEMBLY__ @@ -207,6 +208,55 @@ typedef struct { void *pte_frag; } mm_context_t; +#ifdef CONFIG_PPC_MM_SLICES +static inline u16 mm_ctx_user_psize(mm_context_t *ctx) +{ + return ctx->user_psize; +} + +static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize) +{ + ctx->user_psize = user_psize; +} + +static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx) +{ + return ctx->low_slices_psize; +} + +static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx) +{ + return ctx->high_slices_psize; +} + +static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx) +{ + return ctx->slb_addr_limit; +} + +static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit) +{ + ctx->slb_addr_limit = limit; +} + +static inline struct slice_mask *mm_ctx_slice_mask_base(mm_context_t *ctx) +{ + return &ctx->mask_base_psize; +} + +#ifdef CONFIG_HUGETLB_PAGE +static inline struct slice_mask *mm_ctx_slice_mask_512k(mm_context_t *ctx) +{ + return &ctx->mask_512k; +} + +static inline struct slice_mask *mm_ctx_slice_mask_8m(mm_context_t *ctx) +{ + return &ctx->mask_8m; +} +#endif +#endif /* CONFIG_PPC_MM_SLICE */ + #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) -- cgit v1.2.3 From 701101865f5d3e268281ce7a254eb4a97d16cbdc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Apr 2019 18:33:50 +0530 Subject: powerpc/mm: Reduce memory usage for mm_context_t for radix Currently, our mm_context_t on book3s64 include all hash specific context details like slice mask and subpage protection details. We can skip allocating these with radix translation. This will help us to save 8K per mm_context with radix translation. With the patch applied we have sizeof(mm_context_t) = 136 sizeof(struct hash_mm_context) = 8288 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 33 +++++++++++++++++- arch/powerpc/include/asm/book3s/64/mmu.h | 49 +++++++-------------------- 2 files changed, 44 insertions(+), 38 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index eb36fbfe4ef5..4481bedbb5be 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -658,7 +658,7 @@ extern void slb_set_size(u16 size); /* 4 bits per slice and we have one slice per 1TB */ #define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41) #define LOW_SLICE_ARRAY_SZ (BITS_PER_LONG / BITS_PER_BYTE) -#define TASK_SLICE_ARRAY_SZ(x) ((x)->slb_addr_limit >> 41) +#define TASK_SLICE_ARRAY_SZ(x) ((x)->hash_context->slb_addr_limit >> 41) #ifndef __ASSEMBLY__ #ifdef CONFIG_PPC_SUBPAGE_PROT @@ -693,6 +693,37 @@ static inline void subpage_prot_free(struct mm_struct *mm) {} static inline void subpage_prot_init_new_context(struct mm_struct *mm) { } #endif /* CONFIG_PPC_SUBPAGE_PROT */ +/* + * One bit per slice. We have lower slices which cover 256MB segments + * upto 4G range. That gets us 16 low slices. For the rest we track slices + * in 1TB size. + */ +struct slice_mask { + u64 low_slices; + DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); +}; + +struct hash_mm_context { + u16 user_psize; /* page size index */ + + /* SLB page size encodings*/ + unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ]; + unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; + unsigned long slb_addr_limit; +#ifdef CONFIG_PPC_64K_PAGES + struct slice_mask mask_64k; +#endif + struct slice_mask mask_4k; +#ifdef CONFIG_HUGETLB_PAGE + struct slice_mask mask_16m; + struct slice_mask mask_16g; +#endif + +#ifdef CONFIG_PPC_SUBPAGE_PROT + struct subpage_prot_table spt; +#endif /* CONFIG_PPC_SUBPAGE_PROT */ +}; + #if 0 /* * The code below is equivalent to this function for arguments diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index c9f317090620..e510e46b07ce 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -104,16 +104,6 @@ struct spinlock; /* Maximum possible number of NPUs in a system. */ #define NV_MAX_NPUS 8 -/* - * One bit per slice. We have lower slices which cover 256MB segments - * upto 4G range. That gets us 16 low slices. For the rest we track slices - * in 1TB size. - */ -struct slice_mask { - u64 low_slices; - DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); -}; - typedef struct { union { /* @@ -127,7 +117,6 @@ typedef struct { mm_context_id_t id; mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE]; }; - u16 user_psize; /* page size index */ /* Number of bits in the mm_cpumask */ atomic_t active_cpus; @@ -137,23 +126,9 @@ typedef struct { /* NPU NMMU context */ struct npu_context *npu_context; + struct hash_mm_context *hash_context; - /* SLB page size encodings*/ - unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ]; - unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; - unsigned long slb_addr_limit; -# ifdef CONFIG_PPC_64K_PAGES - struct slice_mask mask_64k; -# endif - struct slice_mask mask_4k; -# ifdef CONFIG_HUGETLB_PAGE - struct slice_mask mask_16m; - struct slice_mask mask_16g; -# endif unsigned long vdso_base; -#ifdef CONFIG_PPC_SUBPAGE_PROT - struct subpage_prot_table spt; -#endif /* CONFIG_PPC_SUBPAGE_PROT */ /* * pagetable fragment support */ @@ -176,62 +151,62 @@ typedef struct { static inline u16 mm_ctx_user_psize(mm_context_t *ctx) { - return ctx->user_psize; + return ctx->hash_context->user_psize; } static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize) { - ctx->user_psize = user_psize; + ctx->hash_context->user_psize = user_psize; } static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx) { - return ctx->low_slices_psize; + return ctx->hash_context->low_slices_psize; } static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx) { - return ctx->high_slices_psize; + return ctx->hash_context->high_slices_psize; } static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx) { - return ctx->slb_addr_limit; + return ctx->hash_context->slb_addr_limit; } static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit) { - ctx->slb_addr_limit = limit; + ctx->hash_context->slb_addr_limit = limit; } #ifdef CONFIG_PPC_64K_PAGES static inline struct slice_mask *mm_ctx_slice_mask_64k(mm_context_t *ctx) { - return &ctx->mask_64k; + return &ctx->hash_context->mask_64k; } #endif static inline struct slice_mask *mm_ctx_slice_mask_4k(mm_context_t *ctx) { - return &ctx->mask_4k; + return &ctx->hash_context->mask_4k; } #ifdef CONFIG_HUGETLB_PAGE static inline struct slice_mask *mm_ctx_slice_mask_16m(mm_context_t *ctx) { - return &ctx->mask_16m; + return &ctx->hash_context->mask_16m; } static inline struct slice_mask *mm_ctx_slice_mask_16g(mm_context_t *ctx) { - return &ctx->mask_16g; + return &ctx->hash_context->mask_16g; } #endif #ifdef CONFIG_PPC_SUBPAGE_PROT static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx) { - return &ctx->spt; + return &ctx->hash_context->spt; } #endif -- cgit v1.2.3 From ef629cc5bf0543eb57d6e344ba776880ac35fd00 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Apr 2019 18:33:51 +0530 Subject: powerc/mm/hash: Reduce hash_mm_context size Allocate subpage protect related variables only if we use the feature. This helps in reducing the hash related mm context struct by around 4K Before the patch sizeof(struct hash_mm_context) = 8288 After the patch sizeof(struct hash_mm_context) = 4160 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 4 +--- arch/powerpc/include/asm/book3s/64/mmu.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 4481bedbb5be..eeb40091b46b 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -687,10 +687,8 @@ struct subpage_prot_table { #define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS) extern void subpage_prot_free(struct mm_struct *mm); -extern void subpage_prot_init_new_context(struct mm_struct *mm); #else static inline void subpage_prot_free(struct mm_struct *mm) {} -static inline void subpage_prot_init_new_context(struct mm_struct *mm) { } #endif /* CONFIG_PPC_SUBPAGE_PROT */ /* @@ -720,7 +718,7 @@ struct hash_mm_context { #endif #ifdef CONFIG_PPC_SUBPAGE_PROT - struct subpage_prot_table spt; + struct subpage_prot_table *spt; #endif /* CONFIG_PPC_SUBPAGE_PROT */ }; diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index e510e46b07ce..230a9dec7677 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -206,7 +206,7 @@ static inline struct slice_mask *mm_ctx_slice_mask_16g(mm_context_t *ctx) #ifdef CONFIG_PPC_SUBPAGE_PROT static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx) { - return &ctx->hash_context->spt; + return ctx->hash_context->spt; } #endif -- cgit v1.2.3 From a35a3c6f60657812366fca86a9ce71df1b8f7aff Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Apr 2019 18:29:13 +0530 Subject: powerpc/mm/hash64: Add a variable to track the end of IO mapping This makes it easy to update the region mapping in the later patch Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 3 ++- arch/powerpc/include/asm/book3s/64/pgtable.h | 8 +++++--- arch/powerpc/include/asm/book3s/64/radix.h | 1 + 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 54b7af6cd27f..8cbc4106d449 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -69,7 +69,8 @@ #define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE) #define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) -#define H_KERN_IO_START H_VMALLOC_END +#define H_KERN_IO_START H_VMALLOC_END +#define H_KERN_IO_END (H_KERN_VIRT_START + H_KERN_VIRT_SIZE) /* * Region IDs diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index e3d18b3f6e5d..f8ab18f77d1b 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -277,9 +277,12 @@ extern unsigned long __vmalloc_end; extern unsigned long __kernel_virt_start; extern unsigned long __kernel_virt_size; extern unsigned long __kernel_io_start; +extern unsigned long __kernel_io_end; #define KERN_VIRT_START __kernel_virt_start #define KERN_VIRT_SIZE __kernel_virt_size #define KERN_IO_START __kernel_io_start +#define KERN_IO_END __kernel_io_end + extern struct page *vmemmap; extern unsigned long ioremap_bot; extern unsigned long pci_io_base; @@ -296,8 +299,7 @@ extern unsigned long pci_io_base; #include /* - * The second half of the kernel virtual space is used for IO mappings, - * it's itself carved into the PIO region (ISA and PHB IO space) and + * IO space itself carved into the PIO region (ISA and PHB IO space) and * the ioremap space * * ISA_IO_BASE = KERN_IO_START, 64K reserved area @@ -310,7 +312,7 @@ extern unsigned long pci_io_base; #define PHB_IO_BASE (ISA_IO_END) #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) -#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE) +#define IOREMAP_END (KERN_IO_END) /* Advertise special mapping type for AGP */ #define HAVE_PAGE_AGP diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 5ab134eeed20..6d760a083d62 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -111,6 +111,7 @@ #define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END) #define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1)) +#define RADIX_KERN_IO_END (RADIX_KERN_VIRT_START + RADIX_KERN_VIRT_SIZE) #ifndef __ASSEMBLY__ #define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE) -- cgit v1.2.3 From 0034d395f89d9c092bb15adbabdca5283e258b41 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Apr 2019 18:29:14 +0530 Subject: powerpc/mm/hash64: Map all the kernel regions in the same 0xc range This patch maps vmalloc, IO and vmemap regions in the 0xc address range instead of the current 0xd and 0xf range. This brings the mapping closer to radix translation mode. With hash 64K page size each of this region is 512TB whereas with 4K config we are limited by the max page table range of 64TB and hence there regions are of 16TB size. The kernel mapping is now: On 4K hash kernel_region_map_size = 16TB kernel vmalloc start = 0xc000100000000000 kernel IO start = 0xc000200000000000 kernel vmemmap start = 0xc000300000000000 64K hash, 64K radix and 4k radix: kernel_region_map_size = 512TB kernel vmalloc start = 0xc008000000000000 kernel IO start = 0xc00a000000000000 kernel vmemmap start = 0xc00c000000000000 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 13 ++++ arch/powerpc/include/asm/book3s/64/hash-64k.h | 11 ++++ arch/powerpc/include/asm/book3s/64/hash.h | 95 +++++++++++++++++---------- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 31 +++++---- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 - arch/powerpc/include/asm/book3s/64/radix.h | 41 ++++++------ arch/powerpc/include/asm/page.h | 3 +- 7 files changed, 121 insertions(+), 74 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 54fab723a8c7..4c9dfd625461 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -13,6 +13,19 @@ */ #define MAX_EA_BITS_PER_CONTEXT 46 +/* + * Our page table limit us to 64TB. Hence for the kernel mapping, + * each MAP area is limited to 16 TB. + * The four map areas are: linear mapping, vmap, IO and vmemmap + */ +#define H_KERN_MAP_SIZE (ASM_CONST(1) << (MAX_EA_BITS_PER_CONTEXT - 2)) + +/* + * Define the address range of the kernel non-linear virtual area + * 16TB + */ +#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000) + #ifndef __ASSEMBLY__ #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 81f4eb6e7da4..0d0191cda050 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -14,6 +14,17 @@ */ #define MAX_EA_BITS_PER_CONTEXT 49 +/* + * We use one context for each MAP area. + */ +#define H_KERN_MAP_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT) + +/* + * Define the address range of the kernel non-linear virtual area + * 2PB + */ +#define H_KERN_VIRT_START ASM_CONST(0xc008000000000000) + /* * 64k aligned address free up few of the lower bits of RPN for us * We steal that here. For more deatils look at pte_pfn/pfn_pte() diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 8cbc4106d449..76741a221910 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -29,6 +29,10 @@ #define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \ H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) #define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) +/* + * Top 2 bits are ignored in page table walk. + */ +#define EA_MASK (~(0xcUL << 60)) /* * We store the slot details in the second half of page table. @@ -42,53 +46,56 @@ #endif /* - * Define the address range of the kernel non-linear virtual area. In contrast - * to the linear mapping, this is managed using the kernel page tables and then - * inserted into the hash page table to actually take effect, similarly to user - * mappings. + * +------------------------------+ + * | | + * | | + * | | + * +------------------------------+ Kernel virtual map end (0xc00e000000000000) + * | | + * | | + * | 512TB/16TB of vmemmap | + * | | + * | | + * +------------------------------+ Kernel vmemmap start + * | | + * | 512TB/16TB of IO map | + * | | + * +------------------------------+ Kernel IO map start + * | | + * | 512TB/16TB of vmap | + * | | + * +------------------------------+ Kernel virt start (0xc008000000000000) + * | | + * | | + * | | + * +------------------------------+ Kernel linear (0xc.....) */ -#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) -/* - * Allow virtual mapping of one context size. - * 512TB for 64K page size - * 64TB for 4K page size - */ -#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT) +#define H_VMALLOC_START H_KERN_VIRT_START +#define H_VMALLOC_SIZE H_KERN_MAP_SIZE +#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) -/* - * 8TB IO mapping size - */ -#define H_KERN_IO_SIZE ASM_CONST(0x80000000000) /* 8T */ - -/* - * The vmalloc space starts at the beginning of the kernel non-linear virtual - * region, and occupies 504T (64K) or 56T (4K) - */ -#define H_VMALLOC_START H_KERN_VIRT_START -#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE) -#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) +#define H_KERN_IO_START H_VMALLOC_END +#define H_KERN_IO_SIZE H_KERN_MAP_SIZE +#define H_KERN_IO_END (H_KERN_IO_START + H_KERN_IO_SIZE) -#define H_KERN_IO_START H_VMALLOC_END -#define H_KERN_IO_END (H_KERN_VIRT_START + H_KERN_VIRT_SIZE) +#define H_VMEMMAP_START H_KERN_IO_END +#define H_VMEMMAP_SIZE H_KERN_MAP_SIZE +#define H_VMEMMAP_END (H_VMEMMAP_START + H_VMEMMAP_SIZE) /* * Region IDs */ -#define REGION_SHIFT 60UL -#define REGION_MASK (0xfUL << REGION_SHIFT) -#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) - -#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START)) -#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) -#define VMEMMAP_REGION_ID (0xfUL) /* Server only */ -#define USER_REGION_ID (0UL) +#define USER_REGION_ID 1 +#define KERNEL_REGION_ID 2 +#define VMALLOC_REGION_ID 3 +#define IO_REGION_ID 4 +#define VMEMMAP_REGION_ID 5 /* * Defines the address of the vmemap area, in its own region on * hash table CPUs. */ -#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) #ifdef CONFIG_PPC_MM_SLICES #define HAVE_ARCH_UNMAPPED_AREA @@ -104,6 +111,26 @@ #define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1) #ifndef __ASSEMBLY__ +static inline int get_region_id(unsigned long ea) +{ + int id = (ea >> 60UL); + + if (id == 0) + return USER_REGION_ID; + + VM_BUG_ON(id != 0xc); + VM_BUG_ON(ea >= H_VMEMMAP_END); + + if (ea >= H_VMEMMAP_START) + return VMEMMAP_REGION_ID; + else if (ea >= H_KERN_IO_START) + return IO_REGION_ID; + else if (ea >= H_VMALLOC_START) + return VMALLOC_REGION_ID; + + return KERNEL_REGION_ID; +} + #define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) #define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) static inline int hash__pgd_bad(pgd_t pgd) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index eeb40091b46b..8a30bf189f10 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -588,7 +588,8 @@ extern void slb_set_size(u16 size); #endif #define MAX_VMALLOC_CTX_CNT 1 -#define MAX_MEMMAP_CTX_CNT 1 +#define MAX_IO_CTX_CNT 1 +#define MAX_VMEMMAP_CTX_CNT 1 /* * 256MB segment @@ -601,13 +602,10 @@ extern void slb_set_size(u16 size); * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 * because of the modulo operation in vsid scramble. * - * We add one extra context to MIN_USER_CONTEXT so that we can map kernel - * context easily. The +1 is to map the unused 0xe region mapping. */ #define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2) #define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \ - MAX_MEMMAP_CTX_CNT + 2) - + MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT) /* * For platforms that support on 65bit VA we limit the context bits */ @@ -776,7 +774,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, /* * Bad address. We return VSID 0 for that */ - if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) + if ((ea & EA_MASK) >= H_PGTABLE_RANGE) return 0; if (!mmu_has_feature(MMU_FTR_68_BIT_VA)) @@ -803,28 +801,29 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, * 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff] * 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff] * 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff] - - * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ] - * 0x00006 - Not used - Can map 0xe000000000000000 range. - * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ] * - * So we can compute the context from the region (top nibble) by - * subtracting 11, or 0xc - 1. + * vmap, IO, vmemap + * + * 0x00005 - [ 0xc008000000000000 - 0xc009ffffffffffff] + * 0x00006 - [ 0xc00a000000000000 - 0xc00bffffffffffff] + * 0x00007 - [ 0xc00c000000000000 - 0xc00dffffffffffff] + * */ static inline unsigned long get_kernel_context(unsigned long ea) { - unsigned long region_id = REGION_ID(ea); + unsigned long region_id = get_region_id(ea); unsigned long ctx; /* - * For linear mapping we do support multiple context + * Depending on Kernel config, kernel region can have one context + * or more. */ if (region_id == KERNEL_REGION_ID) { /* * We already verified ea to be not beyond the addr limit. */ - ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT); + ctx = 1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT); } else - ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT; + ctx = region_id + MAX_KERNEL_CTX_CNT - 2; return ctx; } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index f8ab18f77d1b..7dede2e34b70 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -279,7 +279,6 @@ extern unsigned long __kernel_virt_size; extern unsigned long __kernel_io_start; extern unsigned long __kernel_io_end; #define KERN_VIRT_START __kernel_virt_start -#define KERN_VIRT_SIZE __kernel_virt_size #define KERN_IO_START __kernel_io_start #define KERN_IO_END __kernel_io_end diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 6d760a083d62..574eca33f893 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -72,19 +72,17 @@ * | | * | | * | | - * +------------------------------+ Kernel IO map end (0xc010000000000000) + * +------------------------------+ Kernel vmemmap end (0xc010000000000000) * | | + * | 512TB | * | | - * | 1/2 of virtual map | + * +------------------------------+ Kernel IO map end/vmemap start * | | + * | 512TB | * | | - * +------------------------------+ Kernel IO map start + * +------------------------------+ Kernel vmap end/ IO map start * | | - * | 1/4 of virtual map | - * | | - * +------------------------------+ Kernel vmemap start - * | | - * | 1/4 of virtual map | + * | 512TB | * | | * +------------------------------+ Kernel virt start (0xc008000000000000) * | | @@ -93,25 +91,24 @@ * +------------------------------+ Kernel linear (0xc.....) */ -#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000) -#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000) - +#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000) /* - * The vmalloc space starts at the beginning of that region, and - * occupies a quarter of it on radix config. - * (we keep a quarter for the virtual memmap) + * 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick + * the same value as hash. */ +#define RADIX_KERN_MAP_SIZE (1UL << 49) + #define RADIX_VMALLOC_START RADIX_KERN_VIRT_START -#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2) +#define RADIX_VMALLOC_SIZE RADIX_KERN_MAP_SIZE #define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE) -/* - * Defines the address of the vmemap area, in its own region on - * hash table CPUs. - */ -#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END) -#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1)) -#define RADIX_KERN_IO_END (RADIX_KERN_VIRT_START + RADIX_KERN_VIRT_SIZE) +#define RADIX_KERN_IO_START RADIX_VMALLOC_END +#define RADIX_KERN_IO_SIZE RADIX_KERN_MAP_SIZE +#define RADIX_KERN_IO_END (RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE) + +#define RADIX_VMEMMAP_START RADIX_KERN_IO_END +#define RADIX_VMEMMAP_SIZE RADIX_KERN_MAP_SIZE +#define RADIX_VMEMMAP_END (RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE) #ifndef __ASSEMBLY__ #define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index ed870468ef6f..918228f2205b 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -139,7 +139,8 @@ static inline bool pfn_valid(unsigned long pfn) * return true for some vmalloc addresses, which is incorrect. So explicitly * check that the address is in the kernel region. */ -#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \ +/* may be can drop get_region_id */ +#define virt_addr_valid(kaddr) (get_region_id((unsigned long)kaddr) == KERNEL_REGION_ID && \ pfn_valid(virt_to_pfn(kaddr))) #else #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) -- cgit v1.2.3 From 53ed7a5947de2e19c270a0bc0c29257c6d004b0f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Apr 2019 18:29:16 +0530 Subject: powerpc/mm: Drop the unnecessary region check All the regions are now mapped with top nibble 0xc. Hence the region id check is not needed for virt_addr_valid() Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/page.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 918228f2205b..748f5db2e2b7 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,19 +132,7 @@ static inline bool pfn_valid(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#ifdef CONFIG_PPC_BOOK3S_64 -/* - * On hash the vmalloc and other regions alias to the kernel region when passed - * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can - * return true for some vmalloc addresses, which is incorrect. So explicitly - * check that the address is in the kernel region. - */ -/* may be can drop get_region_id */ -#define virt_addr_valid(kaddr) (get_region_id((unsigned long)kaddr) == KERNEL_REGION_ID && \ - pfn_valid(virt_to_pfn(kaddr))) -#else #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) -#endif /* * On Book-E parts we need __va to parse the device tree and we can't -- cgit v1.2.3 From 1c946c1b7f2ba40bc9b521219ad34e5da3fc3088 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Apr 2019 18:29:17 +0530 Subject: powerpc/mm/hash: Simplify the region id calculation. This reduces multiple comparisons in get_region_id to a bit shift operation. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 4 +++- arch/powerpc/include/asm/book3s/64/hash-64k.h | 1 + arch/powerpc/include/asm/book3s/64/hash.h | 31 +++++++++++++-------------- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 2 +- 4 files changed, 20 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 4c9dfd625461..8fd8599c9395 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -13,12 +13,14 @@ */ #define MAX_EA_BITS_PER_CONTEXT 46 +#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2) + /* * Our page table limit us to 64TB. Hence for the kernel mapping, * each MAP area is limited to 16 TB. * The four map areas are: linear mapping, vmap, IO and vmemmap */ -#define H_KERN_MAP_SIZE (ASM_CONST(1) << (MAX_EA_BITS_PER_CONTEXT - 2)) +#define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT) /* * Define the address range of the kernel non-linear virtual area diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 0d0191cda050..d1d9177d9ebd 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -13,6 +13,7 @@ * is handled in the hotpath. */ #define MAX_EA_BITS_PER_CONTEXT 49 +#define REGION_SHIFT MAX_EA_BITS_PER_CONTEXT /* * We use one context for each MAP area. diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 76741a221910..7faa3d7214c0 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -83,26 +83,26 @@ #define H_VMEMMAP_SIZE H_KERN_MAP_SIZE #define H_VMEMMAP_END (H_VMEMMAP_START + H_VMEMMAP_SIZE) +#define NON_LINEAR_REGION_ID(ea) ((((unsigned long)ea - H_KERN_VIRT_START) >> REGION_SHIFT) + 2) + /* * Region IDs */ -#define USER_REGION_ID 1 -#define KERNEL_REGION_ID 2 -#define VMALLOC_REGION_ID 3 -#define IO_REGION_ID 4 -#define VMEMMAP_REGION_ID 5 +#define USER_REGION_ID 0 +#define KERNEL_REGION_ID 1 +#define VMALLOC_REGION_ID NON_LINEAR_REGION_ID(H_VMALLOC_START) +#define IO_REGION_ID NON_LINEAR_REGION_ID(H_KERN_IO_START) +#define VMEMMAP_REGION_ID NON_LINEAR_REGION_ID(H_VMEMMAP_START) /* * Defines the address of the vmemap area, in its own region on * hash table CPUs. */ - #ifdef CONFIG_PPC_MM_SLICES #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #endif /* CONFIG_PPC_MM_SLICES */ - /* PTEIDX nibble */ #define _PTEIDX_SECONDARY 0x8 #define _PTEIDX_GROUP_IX 0x7 @@ -113,22 +113,21 @@ #ifndef __ASSEMBLY__ static inline int get_region_id(unsigned long ea) { + int region_id; int id = (ea >> 60UL); if (id == 0) return USER_REGION_ID; - VM_BUG_ON(id != 0xc); - VM_BUG_ON(ea >= H_VMEMMAP_END); + if (ea < H_KERN_VIRT_START) + return KERNEL_REGION_ID; - if (ea >= H_VMEMMAP_START) - return VMEMMAP_REGION_ID; - else if (ea >= H_KERN_IO_START) - return IO_REGION_ID; - else if (ea >= H_VMALLOC_START) - return VMALLOC_REGION_ID; + VM_BUG_ON(id != 0xc); + BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2); - return KERNEL_REGION_ID; + region_id = NON_LINEAR_REGION_ID(ea); + VM_BUG_ON(region_id > VMEMMAP_REGION_ID); + return region_id; } #define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 8a30bf189f10..9a9adbeef070 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -823,7 +823,7 @@ static inline unsigned long get_kernel_context(unsigned long ea) */ ctx = 1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT); } else - ctx = region_id + MAX_KERNEL_CTX_CNT - 2; + ctx = region_id + MAX_KERNEL_CTX_CNT - 1; return ctx; } -- cgit v1.2.3 From 5f53d28608f600d9ee07378453bd2d49e132fff4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Apr 2019 18:29:19 +0530 Subject: powerpc/mm/hash: Rename KERNEL_REGION_ID to LINEAR_MAP_REGION_ID The region actually point to linear map. Rename the #define to clarify thati. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 4 ++-- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 7faa3d7214c0..1d1183048cfd 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -89,7 +89,7 @@ * Region IDs */ #define USER_REGION_ID 0 -#define KERNEL_REGION_ID 1 +#define LINEAR_MAP_REGION_ID 1 #define VMALLOC_REGION_ID NON_LINEAR_REGION_ID(H_VMALLOC_START) #define IO_REGION_ID NON_LINEAR_REGION_ID(H_KERN_IO_START) #define VMEMMAP_REGION_ID NON_LINEAR_REGION_ID(H_VMEMMAP_START) @@ -120,7 +120,7 @@ static inline int get_region_id(unsigned long ea) return USER_REGION_ID; if (ea < H_KERN_VIRT_START) - return KERNEL_REGION_ID; + return LINEAR_MAP_REGION_ID; VM_BUG_ON(id != 0xc); BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2); diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 9a9adbeef070..1e4705516a54 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -817,7 +817,7 @@ static inline unsigned long get_kernel_context(unsigned long ea) * Depending on Kernel config, kernel region can have one context * or more. */ - if (region_id == KERNEL_REGION_ID) { + if (region_id == LINEAR_MAP_REGION_ID) { /* * We already verified ea to be not beyond the addr limit. */ -- cgit v1.2.3 From b2d3b5ee66f2a04a918cc043cec0c9ed3de58f40 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Tue, 2 Oct 2018 10:35:59 -0500 Subject: powerpc/pseries: Track LMB nid instead of using device tree When removing memory we need to remove the memory from the node it was added to instead of looking up the node it should be in in the device tree. During testing we have seen scenarios where the affinity for a LMB changes due to a partition migration or PRRN event. In these cases the node the LMB exists in may not match the node the device tree indicates it belongs in. This can lead to a system crash when trying to DLPAR remove the LMB after a migration or PRRN event. The current code looks up the node in the device tree to remove the LMB from, the crash occurs when we try to offline this node and it does not have any data, i.e. node_data[nid] == NULL. 36:mon> e cpu 0x36: Vector: 300 (Data Access) at [c0000001828b7810] pc: c00000000036d08c: try_offline_node+0x2c/0x1b0 lr: c0000000003a14ec: remove_memory+0xbc/0x110 sp: c0000001828b7a90 msr: 800000000280b033 dar: 9a28 dsisr: 40000000 current = 0xc0000006329c4c80 paca = 0xc000000007a55200 softe: 0 irq_happened: 0x01 pid = 76926, comm = kworker/u320:3 36:mon> t [link register ] c0000000003a14ec remove_memory+0xbc/0x110 [c0000001828b7a90] c00000000006a1cc arch_remove_memory+0x9c/0xd0 (unreliable) [c0000001828b7ad0] c0000000003a14e0 remove_memory+0xb0/0x110 [c0000001828b7b20] c0000000000c7db4 dlpar_remove_lmb+0x94/0x160 [c0000001828b7b60] c0000000000c8ef8 dlpar_memory+0x7e8/0xd10 [c0000001828b7bf0] c0000000000bf828 handle_dlpar_errorlog+0xf8/0x160 [c0000001828b7c60] c0000000000bf8cc pseries_hp_work_fn+0x3c/0xa0 [c0000001828b7c90] c000000000128cd8 process_one_work+0x298/0x5a0 [c0000001828b7d20] c000000000129068 worker_thread+0x88/0x620 [c0000001828b7dc0] c00000000013223c kthread+0x1ac/0x1c0 [c0000001828b7e30] c00000000000b45c ret_from_kernel_thread+0x5c/0x80 To resolve this we need to track the node a LMB belongs to when it is added to the system so we can remove it from that node instead of the node that the device tree indicates it should belong to. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/drmem.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 7c1d8e74b25d..7f3279b014db 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -17,6 +17,9 @@ struct drmem_lmb { u32 drc_index; u32 aa_index; u32 flags; +#ifdef CONFIG_MEMORY_HOTPLUG + int nid; +#endif }; struct drmem_lmb_info { @@ -104,4 +107,22 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) lmb->aa_index = 0xffffffff; } +#ifdef CONFIG_MEMORY_HOTPLUG +static inline void lmb_set_nid(struct drmem_lmb *lmb) +{ + lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr); +} +static inline void lmb_clear_nid(struct drmem_lmb *lmb) +{ + lmb->nid = -1; +} +#else +static inline void lmb_set_nid(struct drmem_lmb *lmb) +{ +} +static inline void lmb_clear_nid(struct drmem_lmb *lmb) +{ +} +#endif + #endif /* _ASM_POWERPC_LMB_H */ -- cgit v1.2.3 From 10d91611f426d4bafd2a83d966c36da811b2f7ad Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 13 Apr 2019 00:30:52 +1000 Subject: powerpc/64s: Reimplement book3s idle code in C Reimplement Book3S idle code in C, moving POWER7/8/9 implementation speific HV idle code to the powernv platform code. Book3S assembly stubs are kept in common code and used only to save the stack frame and non-volatile GPRs before executing architected idle instructions, and restoring the stack and reloading GPRs then returning to C after waking from idle. The complex logic dealing with threads and subcores, locking, SPRs, HMIs, timebase resync, etc., is all done in C which makes it more maintainable. This is not a strict translation to C code, there are some significant differences: - Idle wakeup no longer uses the ->cpu_restore call to reinit SPRs, but saves and restores them itself. - The optimisation where EC=ESL=0 idle modes did not have to save GPRs or change MSR is restored, because it's now simple to do. ESL=1 sleeps that do not lose GPRs can use this optimization too. - KVM secondary entry and cede is now more of a call/return style rather than branchy. nap_state_lost is not required because KVM always returns via NVGPR restoring path. - KVM secondary wakeup from offline sequence is moved entirely into the offline wakeup, which avoids a hwsync in the normal idle wakeup path. Performance measured with context switch ping-pong on different threads or cores, is possibly improved a small amount, 1-3% depending on stop state and core vs thread test for shallow states. Deep states it's in the noise compared with other latencies. KVM improvements: - Idle sleepers now always return to caller rather than branch out to KVM first. - This allows optimisations like very fast return to caller when no state has been lost. - KVM no longer requires nap_state_lost because it controls NVGPR save/restore itself on the way in and out. - The heavy idle wakeup KVM request check can be moved out of the normal host idle code and into the not-performance-critical offline code. - KVM nap code now returns from where it is called, which makes the flow a bit easier to follow. Reviewed-by: Gautham R. Shenoy Signed-off-by: Nicholas Piggin [mpe: Squash the KVM changes in] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 19 +++-------------- arch/powerpc/include/asm/paca.h | 40 ++++++++++++++++++++---------------- arch/powerpc/include/asm/processor.h | 9 +++++--- arch/powerpc/include/asm/reg.h | 8 ++++---- 4 files changed, 35 insertions(+), 41 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 43e5f31fe64d..9844b3ded187 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -27,10 +27,11 @@ * the THREAD_WINKLE_BITS are set, which indicate which threads have not * yet woken from the winkle state. */ -#define PNV_CORE_IDLE_LOCK_BIT 0x10000000 +#define NR_PNV_CORE_IDLE_LOCK_BIT 28 +#define PNV_CORE_IDLE_LOCK_BIT (1ULL << NR_PNV_CORE_IDLE_LOCK_BIT) +#define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 16 #define PNV_CORE_IDLE_WINKLE_COUNT 0x00010000 -#define PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT 0x00080000 #define PNV_CORE_IDLE_WINKLE_COUNT_BITS 0x000F0000 #define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT 8 #define PNV_CORE_IDLE_THREAD_WINKLE_BITS 0x0000FF00 @@ -68,16 +69,6 @@ #define ERR_DEEP_STATE_ESL_MISMATCH -2 #ifndef __ASSEMBLY__ -/* Additional SPRs that need to be saved/restored during stop */ -struct stop_sprs { - u64 pid; - u64 ldbar; - u64 fscr; - u64 hfscr; - u64 mmcr1; - u64 mmcr2; - u64 mmcra; -}; #define PNV_IDLE_NAME_LEN 16 struct pnv_idle_states_t { @@ -92,10 +83,6 @@ struct pnv_idle_states_t { extern struct pnv_idle_states_t *pnv_idle_states; extern int nr_pnv_idle_states; -extern u32 pnv_fastsleep_workaround_at_entry[]; -extern u32 pnv_fastsleep_workaround_at_exit[]; - -extern u64 pnv_first_deep_stop_state; unsigned long pnv_cpu_offline(unsigned int cpu); int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index e843bc5d1a0f..245d11a71784 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -173,7 +173,6 @@ struct paca_struct { u8 irq_happened; /* irq happened while soft-disabled */ u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ - u8 nap_state_lost; /* NV GPR values lost in power7_idle */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE u8 pmcregs_in_use; /* pseries puts this in lppaca */ #endif @@ -183,23 +182,28 @@ struct paca_struct { #endif #ifdef CONFIG_PPC_POWERNV - /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */ - u32 *core_idle_state_ptr; - u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */ - /* Mask to indicate thread id in core */ - u8 thread_mask; - /* Mask to denote subcore sibling threads */ - u8 subcore_sibling_mask; - /* Flag to request this thread not to stop */ - atomic_t dont_stop; - /* The PSSCR value that the kernel requested before going to stop */ - u64 requested_psscr; - - /* - * Save area for additional SPRs that need to be - * saved/restored during cpuidle stop. - */ - struct stop_sprs stop_sprs; + /* PowerNV idle fields */ + /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */ + unsigned long idle_state; + union { + /* P7/P8 specific fields */ + struct { + /* PNV_THREAD_RUNNING/NAP/SLEEP */ + u8 thread_idle_state; + /* Mask to denote subcore sibling threads */ + u8 subcore_sibling_mask; + }; + + /* P9 specific fields */ + struct { +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* The PSSCR value that the kernel requested before going to stop */ + u64 requested_psscr; + /* Flag to request this thread not to stop */ + atomic_t dont_stop; +#endif + }; + }; #endif #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 3351bcf42f2d..3120cca72e1f 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -411,14 +411,17 @@ static inline unsigned long get_clean_sp(unsigned long sp, int is_32) } #endif +/* asm stubs */ +extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val); +extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); +extern unsigned long isa206_idle_insn_mayloss(unsigned long type); + extern unsigned long cpuidle_disable; enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ -extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/ + extern void power7_idle_type(unsigned long type); -extern unsigned long power9_idle_stop(unsigned long psscr_val); -extern unsigned long power9_offline_stop(unsigned long psscr_val); extern void power9_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c5b2aff0ce8e..10caa145f98b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -168,6 +168,7 @@ #define PSSCR_ESL 0x00200000 /* Enable State Loss */ #define PSSCR_SD 0x00400000 /* Status Disable */ #define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ +#define PSSCR_PLS_SHIFT 60 #define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */ #define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */ #define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */ @@ -758,10 +759,9 @@ #define SRR1_WAKERESET 0x00100000 /* System reset */ #define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */ #define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */ -#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained, - * may not be recoverable */ -#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */ -#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */ +#define SRR1_WS_HVLOSS 0x00030000 /* HV resources not maintained */ +#define SRR1_WS_GPRLOSS 0x00020000 /* GPRs not maintained */ +#define SRR1_WS_NOLOSS 0x00010000 /* All resources maintained */ #define SRR1_PROGTM 0x00200000 /* TM Bad Thing */ #define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ #define SRR1_PROGILL 0x00080000 /* Illegal instruction */ -- cgit v1.2.3 From 5b2a15296210d3b70e06d0f09a8e701ff74ccbe8 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 4 Oct 2018 16:23:37 +1000 Subject: powerpc: Add doorbell tracepoints When analysing sources of OS jitter, I noticed that doorbells cannot be traced. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/trace.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 58ef8c43a89d..08cd60cd70b7 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -54,6 +54,22 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit, TP_ARGS(regs) ); +#ifdef CONFIG_PPC_DOORBELL +DEFINE_EVENT(ppc64_interrupt_class, doorbell_entry, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs) +); + +DEFINE_EVENT(ppc64_interrupt_class, doorbell_exit, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs) +); +#endif + #ifdef CONFIG_PPC_PSERIES extern int hcall_tracepoint_regfunc(void); extern void hcall_tracepoint_unregfunc(void); -- cgit v1.2.3 From d6e8a150850601277039a548ffcdddd1bfe3e365 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 29 Apr 2019 23:45:48 +0530 Subject: powerpc/powernv/mce: Reduce MCE console logs to lesser lines. Also add cpu number while displaying MCE log. This will help cleaner logs when MCE hits on multiple cpus simultaneously. Before the changes the MCE output was: Severe Machine check interrupt [Recovered] NIP [d00000000ba80280]: insert_slb_entry.constprop.0+0x278/0x2c0 [mcetest_slb] Initiator: CPU Error type: SLB [Multihit] Effective address: d00000000ba80280 After this patch series changes the MCE output will be: MCE: CPU80: machine check (Warning) Host SLB Multihit [Recovered] MCE: CPU80: NIP: [d00000000b550280] insert_slb_entry.constprop.0+0x278/0x2c0 [mcetest_slb] MCE: CPU80: Probable software error (some chance of hardware cause) UE in host application: MCE: CPU48: machine check (Severe) Host UE Load/Store DAR: 00007fffc6079a80 paddr: 0000000f8e260000 [Not recovered] MCE: CPU48: PID: 4584 Comm: find NIP: [0000000010023368] MCE: CPU48: Hardware error and for MCE in Guest: MCE: CPU80: machine check (Warning) Guest SLB Multihit DAR: 000001001b6e0320 [Recovered] MCE: CPU80: PID: 24765 Comm: qemu-system-ppc Guest NIP: [00007fffa309dc60] MCE: CPU80: Probable software error (some chance of hardware cause) Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index ad47fa865324..c888ef9a3eaf 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -116,7 +116,7 @@ struct machine_check_event { enum MCE_Initiator initiator:8; /* 0x03 */ enum MCE_ErrorType error_type:8; /* 0x04 */ enum MCE_Disposition disposition:8; /* 0x05 */ - uint8_t reserved_1[2]; /* 0x06 */ + uint16_t cpu; /* 0x06 */ uint64_t gpr3; /* 0x08 */ uint64_t srr0; /* 0x10 */ uint64_t srr1; /* 0x18 */ -- cgit v1.2.3 From cda6618d060b5e8afc93e691d4bcd987f3dd4393 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 29 Apr 2019 23:45:55 +0530 Subject: powerpc/powernv/mce: Print correct severity for MCE error. Currently all machine check errors are printed as severe errors which isn't correct. Print soft errors as warning instead of severe errors. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mce.h | 86 +++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 42 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index c888ef9a3eaf..d6dc75f9e9bb 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -31,7 +31,7 @@ enum MCE_Version { enum MCE_Severity { MCE_SEV_NO_ERROR = 0, MCE_SEV_WARNING = 1, - MCE_SEV_ERROR_SYNC = 2, + MCE_SEV_SEVERE = 2, MCE_SEV_FATAL = 3, }; @@ -110,73 +110,74 @@ enum MCE_LinkErrorType { }; struct machine_check_event { - enum MCE_Version version:8; /* 0x00 */ - uint8_t in_use; /* 0x01 */ - enum MCE_Severity severity:8; /* 0x02 */ - enum MCE_Initiator initiator:8; /* 0x03 */ - enum MCE_ErrorType error_type:8; /* 0x04 */ - enum MCE_Disposition disposition:8; /* 0x05 */ - uint16_t cpu; /* 0x06 */ - uint64_t gpr3; /* 0x08 */ - uint64_t srr0; /* 0x10 */ - uint64_t srr1; /* 0x18 */ - union { /* 0x20 */ + enum MCE_Version version:8; + u8 in_use; + enum MCE_Severity severity:8; + enum MCE_Initiator initiator:8; + enum MCE_ErrorType error_type:8; + enum MCE_Disposition disposition:8; + bool sync_error; + u16 cpu; + u64 gpr3; + u64 srr0; + u64 srr1; + union { struct { enum MCE_UeErrorType ue_error_type:8; - uint8_t effective_address_provided; - uint8_t physical_address_provided; - uint8_t reserved_1[5]; - uint64_t effective_address; - uint64_t physical_address; - uint8_t reserved_2[8]; + u8 effective_address_provided; + u8 physical_address_provided; + u8 reserved_1[5]; + u64 effective_address; + u64 physical_address; + u8 reserved_2[8]; } ue_error; struct { enum MCE_SlbErrorType slb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } slb_error; struct { enum MCE_EratErrorType erat_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } erat_error; struct { enum MCE_TlbErrorType tlb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } tlb_error; struct { enum MCE_UserErrorType user_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } user_error; struct { enum MCE_RaErrorType ra_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } ra_error; struct { enum MCE_LinkErrorType link_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } link_error; } u; }; @@ -194,6 +195,7 @@ struct mce_error_info { } u; enum MCE_Severity severity:8; enum MCE_Initiator initiator:8; + bool sync_error; }; #define MAX_MC_EVT 100 -- cgit v1.2.3 From 50dbabe06a6e1c35980154ea1fac2ed6ad28652b Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 29 Apr 2019 23:46:02 +0530 Subject: powerpc/powernv/mce: Print additional information about MCE error. Print more information about MCE error whether it is an hardware or software error. Some of the MCE errors can be easily categorized as hardware or software errors e.g. UEs are due to hardware error, where as error triggered due to invalid usage of tlbie is a pure software bug. But not all the MCE errors can be easily categorize into either software or hardware. There are errors like multihit errors which are usually result of a software bug, but in some rare cases a hardware failure can cause a multihit error. In past, we have seen case where after replacing faulty chip, multihit errors stopped occurring. Same with parity errors, which are usually due to faulty hardware but there are chances where multihit can also cause an parity error. Such errors are difficult to determine what really caused it. Hence this patch classifies MCE errors into following four categorize: 1. Hardware error: UE and Link timeout failure errors. 2. Probable hardware error (some chance of software cause) SLB/ERAT/TLB Parity errors. 3. Software error Invalid tlbie form. 4. Probable software error (some chance of hardware cause) SLB/ERAT/TLB Multihit errors. Sample output: MCE: CPU80: machine check (Warning) Guest SLB Multihit DAR: 000001001b6e0320 [Recovered] MCE: CPU80: PID: 24765 Comm: qemu-system-ppc Guest NIP: [00007fffa309dc60] MCE: CPU80: Probable Software error (some chance of hardware cause) Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mce.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index d6dc75f9e9bb..23247a132ce8 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -56,6 +56,14 @@ enum MCE_ErrorType { MCE_ERROR_TYPE_LINK = 7, }; +enum MCE_ErrorClass { + MCE_ECLASS_UNKNOWN = 0, + MCE_ECLASS_HARDWARE, + MCE_ECLASS_HARD_INDETERMINATE, + MCE_ECLASS_SOFTWARE, + MCE_ECLASS_SOFT_INDETERMINATE, +}; + enum MCE_UeErrorType { MCE_UE_ERROR_INDETERMINATE = 0, MCE_UE_ERROR_IFETCH = 1, @@ -115,6 +123,7 @@ struct machine_check_event { enum MCE_Severity severity:8; enum MCE_Initiator initiator:8; enum MCE_ErrorType error_type:8; + enum MCE_ErrorClass error_class:8; enum MCE_Disposition disposition:8; bool sync_error; u16 cpu; @@ -195,6 +204,7 @@ struct mce_error_info { } u; enum MCE_Severity severity:8; enum MCE_Initiator initiator:8; + enum MCE_ErrorClass error_class:8; bool sync_error; }; -- cgit v1.2.3 From 71faf8145cdc20f22aa398eb7b206b33826cf2bd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 28 Mar 2019 13:19:47 +0000 Subject: powerpc/nohash64: clean pgtable.h TRANSPARENT_HUGEPAGE is only supported by book3s VMEMMAP_REGION_ID is never used Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/64/pgtable.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 0384a3302fb6..c8e6a9a5bc33 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -23,11 +23,7 @@ PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) #define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1) -#else #define PMD_CACHE_INDEX PMD_INDEX_SIZE -#endif #define PUD_CACHE_INDEX PUD_INDEX_SIZE /* @@ -73,7 +69,6 @@ #define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) #define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) -#define VMEMMAP_REGION_ID (0xfUL) /* Server only */ #define USER_REGION_ID (0UL) /* -- cgit v1.2.3 From 02f89aed6b829d73980bb633d9f4e3de9eb45543 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 25 Apr 2019 14:29:28 +0000 Subject: powerpc/mm: no slice for nohash/64 Only nohash/32 and book3s/64 support mm slices. Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/64/slice.h | 12 ------------ arch/powerpc/include/asm/slice.h | 4 +--- 2 files changed, 1 insertion(+), 15 deletions(-) delete mode 100644 arch/powerpc/include/asm/nohash/64/slice.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h deleted file mode 100644 index ad0d6e3cc1c5..000000000000 --- a/arch/powerpc/include/asm/nohash/64/slice.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H -#define _ASM_POWERPC_NOHASH_64_SLICE_H - -#ifdef CONFIG_PPC_64K_PAGES -#define get_slice_psize(mm, addr) MMU_PAGE_64K -#else /* CONFIG_PPC_64K_PAGES */ -#define get_slice_psize(mm, addr) MMU_PAGE_4K -#endif /* !CONFIG_PPC_64K_PAGES */ -#define slice_set_user_psize(mm, psize) do { BUG(); } while (0) - -#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h index 44816cbc4198..be8af667098f 100644 --- a/arch/powerpc/include/asm/slice.h +++ b/arch/powerpc/include/asm/slice.h @@ -4,9 +4,7 @@ #ifdef CONFIG_PPC_BOOK3S_64 #include -#elif defined(CONFIG_PPC64) -#include -#elif defined(CONFIG_PPC_MMU_NOHASH) +#elif defined(CONFIG_PPC_MMU_NOHASH_32) #include #endif -- cgit v1.2.3 From fca5c1e9eb5e263c1b4def0b5ae4ce5b2e1a9877 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 25 Apr 2019 14:29:30 +0000 Subject: powerpc/mm: move slice_mask_for_size() into mmu.h Move slice_mask_for_size() into subarch mmu.h Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V [mpe: Retain the BUG_ON()s, rather than converting to VM_BUG_ON()] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 17 +++++++++++ arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 42 +++++++++++++++++++--------- 2 files changed, 46 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 230a9dec7677..a6d5b5ed1170 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -203,6 +203,23 @@ static inline struct slice_mask *mm_ctx_slice_mask_16g(mm_context_t *ctx) } #endif +static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) +{ +#ifdef CONFIG_PPC_64K_PAGES + if (psize == MMU_PAGE_64K) + return mm_ctx_slice_mask_64k(&ctx); +#endif +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_16M) + return mm_ctx_slice_mask_16m(&ctx); + if (psize == MMU_PAGE_16G) + return mm_ctx_slice_mask_16g(&ctx); +#endif + BUG_ON(psize != MMU_PAGE_4K); + + return mm_ctx_slice_mask_4k(&ctx); +} + #ifdef CONFIG_PPC_SUBPAGE_PROT static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx) { diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index c503e2f05e61..114f50d995dc 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -184,7 +184,23 @@ #define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE #endif +#if defined(CONFIG_PPC_4K_PAGES) +#define mmu_virtual_psize MMU_PAGE_4K +#elif defined(CONFIG_PPC_16K_PAGES) +#define mmu_virtual_psize MMU_PAGE_16K +#define PTE_FRAG_NR 4 +#define PTE_FRAG_SIZE_SHIFT 12 +#define PTE_FRAG_SIZE (1UL << 12) +#else +#error "Unsupported PAGE_SIZE" +#endif + +#define mmu_linear_psize MMU_PAGE_8M + #ifndef __ASSEMBLY__ + +#include + struct slice_mask { u64 low_slices; DECLARE_BITMAP(high_slices, 0); @@ -255,6 +271,19 @@ static inline struct slice_mask *mm_ctx_slice_mask_8m(mm_context_t *ctx) return &ctx->mask_8m; } #endif + +static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) +{ +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_512K) + return &ctx->mask_512k; + if (psize == MMU_PAGE_8M) + return &ctx->mask_8m; +#endif + BUG_ON(psize != mmu_virtual_psize); + + return &ctx->mask_base_psize; +} #endif /* CONFIG_PPC_MM_SLICE */ #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) @@ -306,17 +335,4 @@ extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf; #endif /* !__ASSEMBLY__ */ -#if defined(CONFIG_PPC_4K_PAGES) -#define mmu_virtual_psize MMU_PAGE_4K -#elif defined(CONFIG_PPC_16K_PAGES) -#define mmu_virtual_psize MMU_PAGE_16K -#define PTE_FRAG_NR 4 -#define PTE_FRAG_SIZE_SHIFT 12 -#define PTE_FRAG_SIZE (1UL << 12) -#else -#error "Unsupported PAGE_SIZE" -#endif - -#define mmu_linear_psize MMU_PAGE_8M - #endif /* _ASM_POWERPC_MMU_8XX_H_ */ -- cgit v1.2.3 From 877461210ea1c92f159bf261924e58d7d27edadc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 25 Apr 2019 14:29:31 +0000 Subject: powerpc/mm: get rid of mm_ctx_slice_mask_xxx() Now that slice_mask_for_size() is in mmu.h, the mm_ctx_slice_mask_xxx() are not needed anymore, so drop them. Note that the 8xx ones where not used anyway. Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 32 ++++------------------------ arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 17 --------------- 2 files changed, 4 insertions(+), 45 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index a6d5b5ed1170..51b2d60efc1b 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -179,45 +179,21 @@ static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long li ctx->hash_context->slb_addr_limit = limit; } -#ifdef CONFIG_PPC_64K_PAGES -static inline struct slice_mask *mm_ctx_slice_mask_64k(mm_context_t *ctx) -{ - return &ctx->hash_context->mask_64k; -} -#endif - -static inline struct slice_mask *mm_ctx_slice_mask_4k(mm_context_t *ctx) -{ - return &ctx->hash_context->mask_4k; -} - -#ifdef CONFIG_HUGETLB_PAGE -static inline struct slice_mask *mm_ctx_slice_mask_16m(mm_context_t *ctx) -{ - return &ctx->hash_context->mask_16m; -} - -static inline struct slice_mask *mm_ctx_slice_mask_16g(mm_context_t *ctx) -{ - return &ctx->hash_context->mask_16g; -} -#endif - static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) { #ifdef CONFIG_PPC_64K_PAGES if (psize == MMU_PAGE_64K) - return mm_ctx_slice_mask_64k(&ctx); + return &ctx->hash_context->mask_64k; #endif #ifdef CONFIG_HUGETLB_PAGE if (psize == MMU_PAGE_16M) - return mm_ctx_slice_mask_16m(&ctx); + return &ctx->hash_context->mask_16m; if (psize == MMU_PAGE_16G) - return mm_ctx_slice_mask_16g(&ctx); + return &ctx->hash_context->mask_16g; #endif BUG_ON(psize != MMU_PAGE_4K); - return mm_ctx_slice_mask_4k(&ctx); + return &ctx->hash_context->mask_4k; } #ifdef CONFIG_PPC_SUBPAGE_PROT diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 114f50d995dc..77ccf7cb6fcc 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -255,23 +255,6 @@ static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long li ctx->slb_addr_limit = limit; } -static inline struct slice_mask *mm_ctx_slice_mask_base(mm_context_t *ctx) -{ - return &ctx->mask_base_psize; -} - -#ifdef CONFIG_HUGETLB_PAGE -static inline struct slice_mask *mm_ctx_slice_mask_512k(mm_context_t *ctx) -{ - return &ctx->mask_512k; -} - -static inline struct slice_mask *mm_ctx_slice_mask_8m(mm_context_t *ctx) -{ - return &ctx->mask_8m; -} -#endif - static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) { #ifdef CONFIG_HUGETLB_PAGE -- cgit v1.2.3 From 33f128c64919736164e70eb024da3ae5e5768cd6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 25 Apr 2019 14:29:34 +0000 Subject: powerpc/8xx: get rid of #ifdef CONFIG_HUGETLB_PAGE for slices The 8xx only selects CONFIG_PPC_MM_SLICES when CONFIG_HUGETLB_PAGE is set. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 77ccf7cb6fcc..76af5b0cb16e 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -216,10 +216,8 @@ typedef struct { unsigned char high_slices_psize[0]; unsigned long slb_addr_limit; struct slice_mask mask_base_psize; /* 4k or 16k */ -# ifdef CONFIG_HUGETLB_PAGE struct slice_mask mask_512k; struct slice_mask mask_8m; -# endif #endif void *pte_frag; } mm_context_t; @@ -257,12 +255,11 @@ static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long li static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) { -#ifdef CONFIG_HUGETLB_PAGE if (psize == MMU_PAGE_512K) return &ctx->mask_512k; if (psize == MMU_PAGE_8M) return &ctx->mask_8m; -#endif + BUG_ON(psize != mmu_virtual_psize); return &ctx->mask_base_psize; -- cgit v1.2.3 From 43ed7909d70a61c621cadb5d808dc392ad537e5a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 25 Apr 2019 14:29:35 +0000 Subject: powerpc/mm: define get_slice_psize() all the time get_slice_psize() can be defined regardless of CONFIG_PPC_MM_SLICES to avoid ifdefs Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/slice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h index be8af667098f..c6f466f4c241 100644 --- a/arch/powerpc/include/asm/slice.h +++ b/arch/powerpc/include/asm/slice.h @@ -36,6 +36,11 @@ void slice_setup_new_exec(void); static inline void slice_init_new_context_exec(struct mm_struct *mm) {} +static inline unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + #endif /* CONFIG_PPC_MM_SLICES */ #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 5953fb4f4671d7d755a81017a76766c00922d059 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 25 Apr 2019 14:29:36 +0000 Subject: powerpc/mm: define subarch SLB_ADDR_LIMIT_DEFAULT This patch defines a subarch specific SLB_ADDR_LIMIT_DEFAULT to remove the #ifdefs around the setup of mm->context.slb_addr_limit It also generalises the use of mm_ctx_set_slb_addr_limit() helper. Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/slice.h | 2 ++ arch/powerpc/include/asm/nohash/32/slice.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h index 062e11136e9c..f0d3194ba41b 100644 --- a/arch/powerpc/include/asm/book3s/64/slice.h +++ b/arch/powerpc/include/asm/book3s/64/slice.h @@ -11,4 +11,6 @@ #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) +#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64 + #endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h index 777d62e40ac0..39eb0154ae2d 100644 --- a/arch/powerpc/include/asm/nohash/32/slice.h +++ b/arch/powerpc/include/asm/nohash/32/slice.h @@ -13,6 +13,8 @@ #define SLICE_NUM_HIGH 0ul #define GET_HIGH_SLICE_INDEX(addr) (addr & 0) +#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW + #endif /* CONFIG_PPC_MM_SLICES */ #endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */ -- cgit v1.2.3 From 5874cabe29079b72b192a28d266adf1a460fc5d6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 05:59:39 +0000 Subject: powerpc/64: only book3s/64 supports CONFIG_PPC_64K_PAGES CONFIG_PPC_64K_PAGES cannot be selected by nohash/64. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/64/pgalloc.h | 3 --- arch/powerpc/include/asm/nohash/64/pgtable.h | 4 ---- arch/powerpc/include/asm/nohash/pte-book3e.h | 5 ----- arch/powerpc/include/asm/pgtable-be-types.h | 9 ++------- arch/powerpc/include/asm/pgtable-types.h | 9 ++------- arch/powerpc/include/asm/task_size_64.h | 2 +- 6 files changed, 5 insertions(+), 27 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 66d086f85bd5..ded453f9b5a8 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -171,12 +171,9 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, #define __pmd_free_tlb(tlb, pmd, addr) \ pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) -#ifndef CONFIG_PPC_64K_PAGES #define __pud_free_tlb(tlb, pud, addr) \ pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) -#endif /* CONFIG_PPC_64K_PAGES */ - #define check_pgt_cache() do { } while (0) #endif /* _ASM_POWERPC_PGALLOC_64_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index c8e6a9a5bc33..b9f66cf15c31 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -10,10 +10,6 @@ #include #include -#ifdef CONFIG_PPC_64K_PAGES -#error "Page size not supported" -#endif - #define FIRST_USER_ADDRESS 0UL /* diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h index dd40d200f274..813918f40765 100644 --- a/arch/powerpc/include/asm/nohash/pte-book3e.h +++ b/arch/powerpc/include/asm/nohash/pte-book3e.h @@ -60,13 +60,8 @@ #define _PAGE_SPECIAL _PAGE_SW0 /* Base page size */ -#ifdef CONFIG_PPC_64K_PAGES -#define _PAGE_PSIZE _PAGE_PSIZE_64K -#define PTE_RPN_SHIFT (28) -#else #define _PAGE_PSIZE _PAGE_PSIZE_4K #define PTE_RPN_SHIFT (24) -#endif #define PTE_WIMGE_SHIFT (19) #define PTE_BAP_SHIFT (2) diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index a89c67b62680..b169bbf95fcb 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -33,11 +33,7 @@ static inline __be64 pmd_raw(pmd_t x) return x.pmd; } -/* - * 64 bit hash always use 4 level table. Everybody else use 4 level - * only for 4K page size. - */ -#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) +/* 64 bit always use 4 level table. */ typedef struct { __be64 pud; } pud_t; #define __pud(x) ((pud_t) { cpu_to_be64(x) }) #define __pud_raw(x) ((pud_t) { (x) }) @@ -51,7 +47,6 @@ static inline __be64 pud_raw(pud_t x) return x.pud; } -#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ /* PGD level */ @@ -77,7 +72,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; * With hash config 64k pages additionally define a bigger "real PTE" type that * gathers the "second half" part of the PTE for pseudo 64k pages */ -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64) +#ifdef CONFIG_PPC_64K_PAGES typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef struct { pte_t pte; } real_pte_t; diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index 3b0edf041b2e..d11b4c61d686 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -23,18 +23,13 @@ static inline unsigned long pmd_val(pmd_t x) return x.pmd; } -/* - * 64 bit hash always use 4 level table. Everybody else use 4 level - * only for 4K page size. - */ -#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) +/* 64 bit always use 4 level table. */ typedef struct { unsigned long pud; } pud_t; #define __pud(x) ((pud_t) { (x) }) static inline unsigned long pud_val(pud_t x) { return x.pud; } -#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ /* PGD level */ @@ -54,7 +49,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; * With hash config 64k pages additionally define a bigger "real PTE" type that * gathers the "second half" part of the PTE for pseudo 64k pages */ -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64) +#ifdef CONFIG_PPC_64K_PAGES typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef struct { pte_t pte; } real_pte_t; diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h index eab4779f6b84..c993482237ed 100644 --- a/arch/powerpc/include/asm/task_size_64.h +++ b/arch/powerpc/include/asm/task_size_64.h @@ -20,7 +20,7 @@ /* * For now 512TB is only supported with book3s and 64K linux page size. */ -#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES) +#ifdef CONFIG_PPC_64K_PAGES /* * Max value currently used: */ -- cgit v1.2.3 From 0001e5aa5c028c11570f2e641f0198287f4808ba Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 05:59:43 +0000 Subject: powerpc/mm: make gup_hugepte() static gup_huge_pd() is the only user of gup_hugepte() and it is located in the same file. This patch moves gup_huge_pd() after gup_hugepte() and makes gup_hugepte() static. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 505550fb2935..c51846da41a7 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -89,9 +89,6 @@ extern void paging_init(void); */ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); -extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, - struct page **pages, int *nr); #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #endif -- cgit v1.2.3 From 8197af22be01e7c9ab476138652e0dc8cd22a207 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 05:59:44 +0000 Subject: powerpc/mm: split asm/hugetlb.h into dedicated subarch files Three subarches support hugepages: - fsl book3e - book3s/64 - 8xx This patch splits asm/hugetlb.h to reduce the #ifdef mess. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 40 +++++++++++ arch/powerpc/include/asm/hugetlb.h | 87 ++---------------------- arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 31 +++++++++ arch/powerpc/include/asm/nohash/hugetlb-book3e.h | 31 +++++++++ 4 files changed, 106 insertions(+), 83 deletions(-) create mode 100644 arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h create mode 100644 arch/powerpc/include/asm/nohash/hugetlb-book3e.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index ec2a55a553c7..cbc8153d6e0e 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -62,4 +62,44 @@ extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t new_pte); +/* + * This should work for other subarchs too. But right now we use the + * new format only for 64bit book3s + */ +static inline pte_t *hugepd_page(hugepd_t hpd) +{ + BUG_ON(!hugepd_ok(hpd)); + /* + * We have only four bits to encode, MMU page size + */ + BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf); + return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK); +} + +static inline unsigned int hugepd_mmu_psize(hugepd_t hpd) +{ + return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2; +} + +static inline unsigned int hugepd_shift(hugepd_t hpd) +{ + return mmu_psize_to_shift(hugepd_mmu_psize(hpd)); +} +static inline void flush_hugetlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + if (radix_enabled()) + return radix__flush_hugetlb_page(vma, vmaddr); +} + +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, + unsigned int pdshift) +{ + unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); + + return hugepd_page(hpd) + idx; +} + +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); + #endif diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 8d40565ad0c3..fd5c0873a57d 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -6,83 +6,13 @@ #include #ifdef CONFIG_PPC_BOOK3S_64 - #include -/* - * This should work for other subarchs too. But right now we use the - * new format only for 64bit book3s - */ -static inline pte_t *hugepd_page(hugepd_t hpd) -{ - BUG_ON(!hugepd_ok(hpd)); - /* - * We have only four bits to encode, MMU page size - */ - BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf); - return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK); -} - -static inline unsigned int hugepd_mmu_psize(hugepd_t hpd) -{ - return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2; -} - -static inline unsigned int hugepd_shift(hugepd_t hpd) -{ - return mmu_psize_to_shift(hugepd_mmu_psize(hpd)); -} -static inline void flush_hugetlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ - if (radix_enabled()) - return radix__flush_hugetlb_page(vma, vmaddr); -} - -#else - -static inline pte_t *hugepd_page(hugepd_t hpd) -{ - BUG_ON(!hugepd_ok(hpd)); -#ifdef CONFIG_PPC_8xx - return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK); -#else - return (pte_t *)((hpd_val(hpd) & - ~HUGEPD_SHIFT_MASK) | PD_HUGE); -#endif -} - -static inline unsigned int hugepd_shift(hugepd_t hpd) -{ -#ifdef CONFIG_PPC_8xx - return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17; -#else - return hpd_val(hpd) & HUGEPD_SHIFT_MASK; -#endif -} - +#elif defined(CONFIG_PPC_FSL_BOOK3E) +#include +#elif defined(CONFIG_PPC_8xx) +#include #endif /* CONFIG_PPC_BOOK3S_64 */ - -static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, - unsigned pdshift) -{ - /* - * On FSL BookE, we have multiple higher-level table entries that - * point to the same hugepte. Just use the first one since they're all - * identical. So for that case, idx=0. - */ - unsigned long idx = 0; - - pte_t *dir = hugepd_page(hpd); -#ifdef CONFIG_PPC_8xx - idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT; -#elif !defined(CONFIG_PPC_FSL_BOOK3E) - idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); -#endif - - return dir + idx; -} - void flush_dcache_icache_hugepage(struct page *page); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, @@ -99,15 +29,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte); -#ifdef CONFIG_PPC_8xx -static inline void flush_hugetlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ - flush_tlb_page(vma, vmaddr); -} -#else -void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -#endif #define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h new file mode 100644 index 000000000000..997f5b3d6b99 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H +#define _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H + +static inline pte_t *hugepd_page(hugepd_t hpd) +{ + BUG_ON(!hugepd_ok(hpd)); + + return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK); +} + +static inline unsigned int hugepd_shift(hugepd_t hpd) +{ + return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17; +} + +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, + unsigned int pdshift) +{ + unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT; + + return hugepd_page(hpd) + idx; +} + +static inline void flush_hugetlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + flush_tlb_page(vma, vmaddr); +} + +#endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */ diff --git a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h new file mode 100644 index 000000000000..e94f1cd048ee --- /dev/null +++ b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H +#define _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H + +static inline pte_t *hugepd_page(hugepd_t hpd) +{ + if (WARN_ON(!hugepd_ok(hpd))) + return NULL; + + return (pte_t *)((hpd_val(hpd) & ~HUGEPD_SHIFT_MASK) | PD_HUGE); +} + +static inline unsigned int hugepd_shift(hugepd_t hpd) +{ + return hpd_val(hpd) & HUGEPD_SHIFT_MASK; +} + +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, + unsigned int pdshift) +{ + /* + * On FSL BookE, we have multiple higher-level table entries that + * point to the same hugepte. Just use the first one since they're all + * identical. So for that case, idx=0. + */ + return hugepd_page(hpd); +} + +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); + +#endif /* _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H */ -- cgit v1.2.3 From 5fb84fec46015758271fcd2a746633fd4d48e619 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 05:59:45 +0000 Subject: powerpc/mm: add a helper to populate hugepd This patchs adds a subarch helper to populate hugepd. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 5 +++++ arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 8 ++++++++ arch/powerpc/include/asm/nohash/hugetlb-book3e.h | 6 ++++++ 3 files changed, 19 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index cbc8153d6e0e..def77a45e905 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -100,6 +100,11 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, return hugepd_page(hpd) + idx; } +static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | (shift_to_mmu_psize(pshift) << 2)); +} + void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); #endif diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h index 997f5b3d6b99..75676885bec2 100644 --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H #define _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H +#define PAGE_SHIFT_8M 23 + static inline pte_t *hugepd_page(hugepd_t hpd) { BUG_ON(!hugepd_ok(hpd)); @@ -28,4 +30,10 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, flush_tlb_page(vma, vmaddr); } +static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | + (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K)); +} + #endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */ diff --git a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h index e94f1cd048ee..51439bcfe313 100644 --- a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h +++ b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h @@ -28,4 +28,10 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + /* We use the old format for PPC_FSL_BOOK3E */ + *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift); +} + #endif /* _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H */ -- cgit v1.2.3 From 723f268f19daddba56a987b934f3e34a04b6499d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 05:59:46 +0000 Subject: powerpc/mm: cleanup ifdef mess in add_huge_page_size() Introduce a subarch specific helper check_and_get_huge_psize() to check the huge page sizes and cleanup the ifdef mess in add_huge_page_size() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 27 ++++++++++++++++++++++++ arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 5 +++++ arch/powerpc/include/asm/nohash/hugetlb-book3e.h | 8 +++++++ 3 files changed, 40 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index def77a45e905..56140d19c85f 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -107,4 +107,31 @@ static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshi void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +static inline int check_and_get_huge_psize(int shift) +{ + int mmu_psize; + + if (shift > SLICE_HIGH_SHIFT) + return -EINVAL; + + mmu_psize = shift_to_mmu_psize(shift); + + /* + * We need to make sure that for different page sizes reported by + * firmware we only add hugetlb support for page sizes that can be + * supported by linux page table layout. + * For now we have + * Radix: 2M and 1G + * Hash: 16M and 16G + */ + if (radix_enabled()) { + if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G) + return -EINVAL; + } else { + if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G) + return -EINVAL; + } + return mmu_psize; +} + #endif diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h index 75676885bec2..a46616937d20 100644 --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -36,4 +36,9 @@ static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshi (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K)); } +static inline int check_and_get_huge_psize(int shift) +{ + return shift_to_mmu_psize(shift); +} + #endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */ diff --git a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h index 51439bcfe313..ecd8694cb229 100644 --- a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h +++ b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h @@ -34,4 +34,12 @@ static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshi *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift); } +static inline int check_and_get_huge_psize(int shift) +{ + if (shift & 1) /* Not a power of 4 */ + return -EINVAL; + + return shift_to_mmu_psize(shift); +} + #endif /* _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H */ -- cgit v1.2.3 From 45d0ba527b575d47b2be75dd517b57cceda04bfe Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 05:59:47 +0000 Subject: powerpc/mm: move hugetlb_disabled into asm/hugetlb.h No need to have this in asm/page.h, move it into asm/hugetlb.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hugetlb.h | 2 ++ arch/powerpc/include/asm/page.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index fd5c0873a57d..84598c6b0959 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -13,6 +13,8 @@ #include #endif /* CONFIG_PPC_BOOK3S_64 */ +extern bool hugetlb_disabled; + void flush_dcache_icache_hugepage(struct page *page); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 748f5db2e2b7..6b508420d92b 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -29,7 +29,6 @@ #ifndef __ASSEMBLY__ #ifdef CONFIG_HUGETLB_PAGE -extern bool hugetlb_disabled; extern unsigned int HPAGE_SHIFT; #else #define HPAGE_SHIFT PAGE_SHIFT -- cgit v1.2.3 From c5710cd20735037ba9be0e95530f0d3795ce07e6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 05:59:48 +0000 Subject: powerpc/mm: cleanup HPAGE_SHIFT setup Only book3s/64 may select default among several HPAGE_SHIFT at runtime. 8xx always defines 512K pages as default FSL_BOOK3E always defines 4M pages as default This patch limits HUGETLB_PAGE_SIZE_VARIABLE to book3s/64 moves the definitions in subarches files. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hugetlb.h | 2 ++ arch/powerpc/include/asm/page.h | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 84598c6b0959..20a101046cff 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -15,6 +15,8 @@ extern bool hugetlb_disabled; +void hugetlbpage_init_default(void); + void flush_dcache_icache_hugepage(struct page *page); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 6b508420d92b..dbc8c0679480 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -28,10 +28,15 @@ #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) #ifndef __ASSEMBLY__ -#ifdef CONFIG_HUGETLB_PAGE -extern unsigned int HPAGE_SHIFT; -#else +#ifndef CONFIG_HUGETLB_PAGE #define HPAGE_SHIFT PAGE_SHIFT +#elif defined(CONFIG_PPC_BOOK3S_64) +extern unsigned int hpage_shift; +#define HPAGE_SHIFT hpage_shift +#elif defined(CONFIG_PPC_8xx) +#define HPAGE_SHIFT 19 /* 512k pages */ +#elif defined(CONFIG_PPC_FSL_BOOK3E) +#define HPAGE_SHIFT 22 /* 4M pages */ #endif #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) -- cgit v1.2.3 From 447def3b06adab60b999417b316bd2352d7e643e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 15:57:59 +0000 Subject: powerpc/mm: drop __bad_pte() This has never been called (since Kernel has been in git at least), drop it. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgalloc.h | 2 -- arch/powerpc/include/asm/nohash/32/pgalloc.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 3633502e102c..645af86cd072 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -22,8 +22,6 @@ */ #define MAX_PGTABLE_INDEX_SIZE 0xf -extern void __bad_pte(pmd_t *pmd); - extern struct kmem_cache *pgtable_cache[]; #define PGT_CACHE(shift) pgtable_cache[shift] diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index bd186e85b4f7..ea265a578eb0 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -22,8 +22,6 @@ */ #define MAX_PGTABLE_INDEX_SIZE 0xf -extern void __bad_pte(pmd_t *pmd); - extern struct kmem_cache *pgtable_cache[]; #define PGT_CACHE(shift) pgtable_cache[shift] -- cgit v1.2.3 From 737b434d3d55c0b3c23df4eab1ea5b33f8850f30 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 15:58:01 +0000 Subject: powerpc/mm: convert Book3E 64 to pte_fragment Book3E 64 is the only subarch not using pte_fragment. In order to allow refactorisation, this patch converts it to pte_fragment. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 6 ----- arch/powerpc/include/asm/nohash/64/mmu.h | 4 +++- arch/powerpc/include/asm/nohash/64/pgalloc.h | 33 ++++++++++------------------ 3 files changed, 15 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 6ee8195a2ffb..66a3805dc935 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -228,13 +228,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, #endif } -#ifdef CONFIG_PPC_BOOK3E_64 -static inline void arch_exit_mmap(struct mm_struct *mm) -{ -} -#else extern void arch_exit_mmap(struct mm_struct *mm); -#endif static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h index 81cf30c370e5..26e05ce8f5aa 100644 --- a/arch/powerpc/include/asm/nohash/64/mmu.h +++ b/arch/powerpc/include/asm/nohash/64/mmu.h @@ -4,11 +4,13 @@ #define MAX_PHYSMEM_BITS 44 +#include + /* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ #include #ifndef __ASSEMBLY__ -typedef struct page *pgtable_t; +typedef pte_t *pgtable_t; #endif #endif /* _ASM_POWERPC_NOHASH_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index ded453f9b5a8..7fb87235f845 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -76,10 +76,10 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) { - pmd_set(pmd, (unsigned long)page_address(pte_page)); + pmd_set(pmd, (unsigned long)pte_page); } -#define pmd_pgtable(pmd) pmd_page(pmd) +#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -92,44 +92,35 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd); } +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + return (pte_t *)pte_fragment_alloc(mm, 1); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { - struct page *page; - pte_t *pte; - - pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT); - if (!pte) - return NULL; - page = virt_to_page(pte); - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page; + return (pgtable_t)pte_fragment_alloc(mm, 0); } +void pte_frag_destroy(void *pte_frag); +void pte_fragment_free(unsigned long *table, int kernel); + static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - free_page((unsigned long)pte); + pte_fragment_free((unsigned long *)pte, 1); } static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) { - pgtable_page_dtor(ptepage); - __free_page(ptepage); + pte_fragment_free((unsigned long *)ptepage, 0); } static inline void pgtable_free(void *table, int shift) { if (!shift) { - pgtable_page_dtor(virt_to_page(table)); - free_page((unsigned long)table); + pte_fragment_free((unsigned long *)table, 0); } else { BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); kmem_cache_free(PGT_CACHE(shift), table); @@ -166,7 +157,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { tlb_flush_pgtable(tlb, address); - pgtable_free_tlb(tlb, page_address(table), 0); + pgtable_free_tlb(tlb, table, 0); } #define __pmd_free_tlb(tlb, pmd, addr) \ -- cgit v1.2.3 From 696dffa24bd0e17c8bccb18467555c17cc15e62c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 15:58:02 +0000 Subject: powerpc/mm: move pgtable_t in asm/mmu.h pgtable_t is now identical for all subarches, move it to the top level asm/mmu.h Reviewed-by: Aneesh Kumar K.V Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/mmu-hash.h | 4 ---- arch/powerpc/include/asm/book3s/64/mmu.h | 8 -------- arch/powerpc/include/asm/mmu.h | 3 +++ arch/powerpc/include/asm/nohash/32/mmu.h | 6 ------ arch/powerpc/include/asm/nohash/64/mmu.h | 6 ------ 5 files changed, 3 insertions(+), 24 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index f9eae105a9f4..2e277ca0170f 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -10,8 +10,6 @@ * BATs */ -#include - /* Block size masks */ #define BL_128K 0x000 #define BL_256K 0x001 @@ -49,8 +47,6 @@ struct ppc_bat { u32 batu; u32 batl; }; - -typedef pte_t *pgtable_t; #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 51b2d60efc1b..74d24201fc4f 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -25,14 +25,6 @@ struct mmu_psize_def { }; }; extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; - -/* - * For BOOK3s 64 with 4k and 64K linux page size - * we want to use pointers, because the page table - * actually store pfn - */ -typedef pte_t *pgtable_t; - #endif /* __ASSEMBLY__ */ /* diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index d86c5641bd97..ba94ce8c22d7 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -129,6 +129,9 @@ #ifndef __ASSEMBLY__ #include #include +#include + +typedef pte_t *pgtable_t; #ifdef CONFIG_PPC_FSL_BOOK3E #include diff --git a/arch/powerpc/include/asm/nohash/32/mmu.h b/arch/powerpc/include/asm/nohash/32/mmu.h index 7d94a36d57d2..af0e8b54876a 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu.h +++ b/arch/powerpc/include/asm/nohash/32/mmu.h @@ -2,8 +2,6 @@ #ifndef _ASM_POWERPC_NOHASH_32_MMU_H_ #define _ASM_POWERPC_NOHASH_32_MMU_H_ -#include - #if defined(CONFIG_40x) /* 40x-style software loaded TLB */ #include @@ -18,8 +16,4 @@ #include #endif -#ifndef __ASSEMBLY__ -typedef pte_t *pgtable_t; -#endif - #endif /* _ASM_POWERPC_NOHASH_32_MMU_H_ */ diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h index 26e05ce8f5aa..e490ecdac012 100644 --- a/arch/powerpc/include/asm/nohash/64/mmu.h +++ b/arch/powerpc/include/asm/nohash/64/mmu.h @@ -4,13 +4,7 @@ #define MAX_PHYSMEM_BITS 44 -#include - /* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ #include -#ifndef __ASSEMBLY__ -typedef pte_t *pgtable_t; -#endif - #endif /* _ASM_POWERPC_NOHASH_64_MMU_H_ */ -- cgit v1.2.3 From 7a792d5da27f8407c5fe1b3c976106229e0d8bbd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 15:58:03 +0000 Subject: powerpc/mm: get rid of nohash/32/mmu.h and nohash/64/mmu.h Those files have no real added values, especially the 64 bit which only includes the common book3e mmu.h which is also included from 32 bits side. So lets do the final inclusion directly from nohash/mmu.h Reviewed-by: Aneesh Kumar K.V Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/32/mmu.h | 19 ------------------- arch/powerpc/include/asm/nohash/64/mmu.h | 10 ---------- arch/powerpc/include/asm/nohash/mmu-book3e.h | 2 ++ arch/powerpc/include/asm/nohash/mmu.h | 16 ++++++++++++---- 4 files changed, 14 insertions(+), 33 deletions(-) delete mode 100644 arch/powerpc/include/asm/nohash/32/mmu.h delete mode 100644 arch/powerpc/include/asm/nohash/64/mmu.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/nohash/32/mmu.h b/arch/powerpc/include/asm/nohash/32/mmu.h deleted file mode 100644 index af0e8b54876a..000000000000 --- a/arch/powerpc/include/asm/nohash/32/mmu.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_32_MMU_H_ -#define _ASM_POWERPC_NOHASH_32_MMU_H_ - -#if defined(CONFIG_40x) -/* 40x-style software loaded TLB */ -#include -#elif defined(CONFIG_44x) -/* 44x-style software loaded TLB */ -#include -#elif defined(CONFIG_PPC_BOOK3E_MMU) -/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ -#include -#elif defined (CONFIG_PPC_8xx) -/* Motorola/Freescale 8xx software loaded TLB */ -#include -#endif - -#endif /* _ASM_POWERPC_NOHASH_32_MMU_H_ */ diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h deleted file mode 100644 index e490ecdac012..000000000000 --- a/arch/powerpc/include/asm/nohash/64/mmu.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_64_MMU_H_ -#define _ASM_POWERPC_NOHASH_64_MMU_H_ - -#define MAX_PHYSMEM_BITS 44 - -/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ -#include - -#endif /* _ASM_POWERPC_NOHASH_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h index e20072972e35..4c9777d256fb 100644 --- a/arch/powerpc/include/asm/nohash/mmu-book3e.h +++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h @@ -306,6 +306,8 @@ extern int book3e_htw_mode; #define mmu_cleanup_all NULL +#define MAX_PHYSMEM_BITS 44 + #endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/mmu.h b/arch/powerpc/include/asm/nohash/mmu.h index a037cb1efb57..edc793e5f08f 100644 --- a/arch/powerpc/include/asm/nohash/mmu.h +++ b/arch/powerpc/include/asm/nohash/mmu.h @@ -2,10 +2,18 @@ #ifndef _ASM_POWERPC_NOHASH_MMU_H_ #define _ASM_POWERPC_NOHASH_MMU_H_ -#ifdef CONFIG_PPC64 -#include -#else -#include +#if defined(CONFIG_40x) +/* 40x-style software loaded TLB */ +#include +#elif defined(CONFIG_44x) +/* 44x-style software loaded TLB */ +#include +#elif defined(CONFIG_PPC_BOOK3E_MMU) +/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ +#include +#elif defined (CONFIG_PPC_8xx) +/* Motorola/Freescale 8xx software loaded TLB */ +#include #endif #endif /* _ASM_POWERPC_NOHASH_MMU_H_ */ -- cgit v1.2.3 From b0124ff57e9405725b4dfeffbdfa929bb973ad2c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 15:58:07 +0000 Subject: powerpc/mm: inline pte_alloc_one_kernel() and pte_alloc_one() on PPC32 pte_alloc_one_kernel() and pte_alloc_one() are simple calls to pte_fragment_alloc(), so they are good candidates for inlining as already done on PPC64. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgalloc.h | 15 ++++++++++++--- arch/powerpc/include/asm/nohash/32/pgalloc.h | 15 ++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 645af86cd072..0ed856068bb8 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -59,10 +59,19 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, #define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); -extern pgtable_t pte_alloc_one(struct mm_struct *mm); -void pte_frag_destroy(void *pte_frag); pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) +{ + return (pte_t *)pte_fragment_alloc(mm, 1); +} + +static inline pgtable_t pte_alloc_one(struct mm_struct *mm) +{ + return (pgtable_t)pte_fragment_alloc(mm, 0); +} + +void pte_frag_destroy(void *pte_frag); void pte_fragment_free(unsigned long *table, int kernel); static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index ea265a578eb0..1d41508f0676 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -77,10 +77,19 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, #define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) #endif -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); -extern pgtable_t pte_alloc_one(struct mm_struct *mm); -void pte_frag_destroy(void *pte_frag); pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) +{ + return (pte_t *)pte_fragment_alloc(mm, 1); +} + +static inline pgtable_t pte_alloc_one(struct mm_struct *mm) +{ + return (pgtable_t)pte_fragment_alloc(mm, 0); +} + +void pte_frag_destroy(void *pte_frag); void pte_fragment_free(unsigned long *table, int kernel); static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -- cgit v1.2.3 From dc096864ba784c2d3d10480d71f14a53f40f997c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 15:58:08 +0000 Subject: powerpc/mm: refactor pte_alloc_one() and pte_free() families definition. Functions pte_alloc_one(), pte_alloc_one_kernel(), pte_free(), pte_free_kernel() are identical for the four subarches. This patch moves their definition in a common place. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgalloc.h | 25 ------------------------- arch/powerpc/include/asm/book3s/64/pgalloc.h | 22 ---------------------- arch/powerpc/include/asm/nohash/32/pgalloc.h | 25 ------------------------- arch/powerpc/include/asm/nohash/64/pgalloc.h | 25 ------------------------- arch/powerpc/include/asm/pgalloc.h | 25 +++++++++++++++++++++++++ 5 files changed, 25 insertions(+), 97 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 0ed856068bb8..46422309d6e0 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -59,31 +59,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, #define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) -pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); - -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)pte_fragment_alloc(mm, 1); -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - return (pgtable_t)pte_fragment_alloc(mm, 0); -} - -void pte_frag_destroy(void *pte_frag); -void pte_fragment_free(unsigned long *table, int kernel); - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - pte_fragment_free((unsigned long *)pte, 1); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pte_fragment_free((unsigned long *)ptepage, 0); -} - static inline void pgtable_free(void *table, unsigned index_size) { if (!index_size) { diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 138bc2ecc0c4..cfd48d8cc055 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -39,9 +39,7 @@ extern struct vmemmap_backing *vmemmap_list; extern struct kmem_cache *pgtable_cache[]; #define PGT_CACHE(shift) pgtable_cache[shift] -extern pte_t *pte_fragment_alloc(struct mm_struct *, int); extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); -extern void pte_fragment_free(unsigned long *, int); extern void pmd_fragment_free(unsigned long *); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); #ifdef CONFIG_SMP @@ -190,26 +188,6 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd) return (pgtable_t)pmd_page_vaddr(pmd); } -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)pte_fragment_alloc(mm, 1); -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - return (pgtable_t)pte_fragment_alloc(mm, 0); -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - pte_fragment_free((unsigned long *)pte, 1); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pte_fragment_free((unsigned long *)ptepage, 0); -} - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index 1d41508f0676..e96ef2fde2ca 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -77,31 +77,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, #define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) #endif -pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); - -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)pte_fragment_alloc(mm, 1); -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - return (pgtable_t)pte_fragment_alloc(mm, 0); -} - -void pte_frag_destroy(void *pte_frag); -void pte_fragment_free(unsigned long *table, int kernel); - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - pte_fragment_free((unsigned long *)pte, 1); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pte_fragment_free((unsigned long *)ptepage, 0); -} - static inline void pgtable_free(void *table, unsigned index_size) { if (!index_size) { diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 7fb87235f845..98de4f3b0306 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -92,31 +92,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd); } -pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); - -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)pte_fragment_alloc(mm, 1); -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - return (pgtable_t)pte_fragment_alloc(mm, 0); -} - -void pte_frag_destroy(void *pte_frag); -void pte_fragment_free(unsigned long *table, int kernel); - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - pte_fragment_free((unsigned long *)pte, 1); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pte_fragment_free((unsigned long *)ptepage, 0); -} - static inline void pgtable_free(void *table, int shift) { if (!shift) { diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index e11f03007b57..c2c6fd438840 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -20,6 +20,31 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp) #define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) +{ + return (pte_t *)pte_fragment_alloc(mm, 1); +} + +static inline pgtable_t pte_alloc_one(struct mm_struct *mm) +{ + return (pgtable_t)pte_fragment_alloc(mm, 0); +} + +void pte_frag_destroy(void *pte_frag); +void pte_fragment_free(unsigned long *table, int kernel); + +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + pte_fragment_free((unsigned long *)pte, 1); +} + +static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) +{ + pte_fragment_free((unsigned long *)ptepage, 0); +} + #ifdef CONFIG_PPC_BOOK3S #include #else -- cgit v1.2.3 From e80789a3c13f9fbc8f361a988868f9b68a8cf134 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 15:58:09 +0000 Subject: powerpc/mm: refactor definition of pgtable_cache[] pgtable_cache[] is the same for the 4 subarches, lets make it common. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgalloc.h | 21 --------------------- arch/powerpc/include/asm/book3s/64/pgalloc.h | 22 ---------------------- arch/powerpc/include/asm/nohash/32/pgalloc.h | 21 --------------------- arch/powerpc/include/asm/nohash/64/pgalloc.h | 22 ---------------------- arch/powerpc/include/asm/pgalloc.h | 21 +++++++++++++++++++++ 5 files changed, 21 insertions(+), 86 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 46422309d6e0..1b9b5c228230 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -5,26 +5,6 @@ #include #include -/* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - static inline pgd_t *pgd_alloc(struct mm_struct *mm) { return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), @@ -69,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size) } } -#define check_pgt_cache() do { } while (0) #define get_hugepd_cache_index(x) (x) #ifdef CONFIG_SMP diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index cfd48d8cc055..df2dce6afe14 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -19,26 +19,6 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -/* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); extern void pmd_fragment_free(unsigned long *); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); @@ -199,8 +179,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, pgtable_free_tlb(tlb, table, PTE_INDEX); } -#define check_pgt_cache() do { } while (0) - extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; static inline void update_page_count(int psize, long count) { diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index e96ef2fde2ca..4615801aa953 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -5,26 +5,6 @@ #include #include -/* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - static inline pgd_t *pgd_alloc(struct mm_struct *mm) { return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), @@ -87,7 +67,6 @@ static inline void pgtable_free(void *table, unsigned index_size) } } -#define check_pgt_cache() do { } while (0) #define get_hugepd_cache_index(x) (x) #ifdef CONFIG_SMP diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 98de4f3b0306..ffc86d42816d 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -18,26 +18,6 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -/* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - static inline pgd_t *pgd_alloc(struct mm_struct *mm) { return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), @@ -140,6 +120,4 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, #define __pud_free_tlb(tlb, pud, addr) \ pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) -#define check_pgt_cache() do { } while (0) - #endif /* _ASM_POWERPC_PGALLOC_64_H */ diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index c2c6fd438840..5761bee0f004 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -45,6 +45,27 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) pte_fragment_free((unsigned long *)ptepage, 0); } +/* + * Functions that deal with pagetables that could be at any level of + * the table need to be passed an "index_size" so they know how to + * handle allocation. For PTE pages, the allocation size will be + * (2^index_size * sizeof(pointer)) and allocations are drawn from + * the kmem_cache in PGT_CACHE(index_size). + * + * The maximum index size needs to be big enough to allow any + * pagetable sizes we need, but small enough to fit in the low bits of + * any page table pointer. In other words all pagetables, even tiny + * ones, must be aligned to allow at least enough low 0 bits to + * contain this value. This value is also used as a mask, so it must + * be one less than a power of two. + */ +#define MAX_PGTABLE_INDEX_SIZE 0xf + +extern struct kmem_cache *pgtable_cache[]; +#define PGT_CACHE(shift) pgtable_cache[shift] + +static inline void check_pgt_cache(void) { } + #ifdef CONFIG_PPC_BOOK3S #include #else -- cgit v1.2.3 From bf8156c5aef12621e20afa470ae41f92cdca377b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 15:58:10 +0000 Subject: powerpc/mm: Only keep one version of pmd_populate() functions on nohash/32 Use IS_ENABLED(CONFIG_BOOKE) to make single versions of pmd_populate() and pmd_populate_kernel() Reviewed-by: Aneesh Kumar K.V Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/32/pgalloc.h | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index 4615801aa953..7ee8e27070f4 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -25,37 +25,25 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) #define __pmd_free_tlb(tlb,x,a) do { } while (0) /* #define pgd_populate(mm, pmd, pte) BUG() */ -#ifndef CONFIG_BOOKE - static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *pte) { - *pmdp = __pmd(__pa(pte) | _PMD_PRESENT); + if (IS_ENABLED(CONFIG_BOOKE)) + *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT); + else + *pmdp = __pmd(__pa(pte) | _PMD_PRESENT); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pte_page) { - *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT); + if (IS_ENABLED(CONFIG_BOOKE)) + *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT); + else + *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT); } #define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) -#else - -static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, - pte_t *pte) -{ - *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT); -} - -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pte_page) -{ - *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT); -} - -#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) -#endif static inline void pgtable_free(void *table, unsigned index_size) { -- cgit v1.2.3 From 7cec90e9499c25c31b539f8a35d949c8e9043c14 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 15:58:11 +0000 Subject: powerpc/mm: refactor pgtable freeing functions on nohash pgtable_free() and others are identical on nohash/32 and 64, so move them into asm/nohash/pgalloc.h Reviewed-by: Aneesh Kumar K.V Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/32/pgalloc.h | 43 --------------------------- arch/powerpc/include/asm/nohash/64/pgalloc.h | 43 --------------------------- arch/powerpc/include/asm/nohash/pgalloc.h | 44 ++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 86 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index 7ee8e27070f4..6c0f5151dc1d 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -45,47 +45,4 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, #define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) -static inline void pgtable_free(void *table, unsigned index_size) -{ - if (!index_size) { - pte_fragment_free((unsigned long *)table, 0); - } else { - BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE); - kmem_cache_free(PGT_CACHE(index_size), table); - } -} - -#define get_hugepd_cache_index(x) (x) - -#ifdef CONFIG_SMP -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - unsigned long pgf = (unsigned long)table; - BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); - pgf |= shift; - tlb_remove_table(tlb, (void *)pgf); -} - -static inline void __tlb_remove_table(void *_table) -{ - void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); - unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; - - pgtable_free(table, shift); -} -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - -static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, - unsigned long address) -{ - tlb_flush_pgtable(tlb, address); - pgtable_free_tlb(tlb, table, 0); -} #endif /* _ASM_POWERPC_PGALLOC_32_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index ffc86d42816d..c636feced1ff 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -72,49 +72,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd); } -static inline void pgtable_free(void *table, int shift) -{ - if (!shift) { - pte_fragment_free((unsigned long *)table, 0); - } else { - BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); - kmem_cache_free(PGT_CACHE(shift), table); - } -} - -#define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - unsigned long pgf = (unsigned long)table; - - BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); - pgf |= shift; - tlb_remove_table(tlb, (void *)pgf); -} - -static inline void __tlb_remove_table(void *_table) -{ - void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); - unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; - - pgtable_free(table, shift); -} - -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - -static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, - unsigned long address) -{ - tlb_flush_pgtable(tlb, address); - pgtable_free_tlb(tlb, table, 0); -} - #define __pmd_free_tlb(tlb, pmd, addr) \ pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) #define __pud_free_tlb(tlb, pud, addr) \ diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 0634f2949438..4fccac6af3ad 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -21,4 +21,48 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb, #else #include #endif + +static inline void pgtable_free(void *table, int shift) +{ + if (!shift) { + pte_fragment_free((unsigned long *)table, 0); + } else { + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); + kmem_cache_free(PGT_CACHE(shift), table); + } +} + +#define get_hugepd_cache_index(x) (x) + +#ifdef CONFIG_SMP +static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) +{ + unsigned long pgf = (unsigned long)table; + + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); + pgf |= shift; + tlb_remove_table(tlb, (void *)pgf); +} + +static inline void __tlb_remove_table(void *_table) +{ + void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); + unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; + + pgtable_free(table, shift); +} + +#else +static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) +{ + pgtable_free(table, shift); +} +#endif + +static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, + unsigned long address) +{ + tlb_flush_pgtable(tlb, address); + pgtable_free_tlb(tlb, table, 0); +} #endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */ -- cgit v1.2.3 From 8a2cc87a24e8c0a823c2e4ec8702c90d743a69d4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 15:58:12 +0000 Subject: powerpc/mm: refactor pmd_pgtable() pmd_pgtable() is identical on the 4 subarches, refactor it. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgalloc.h | 2 -- arch/powerpc/include/asm/book3s/64/pgalloc.h | 5 ----- arch/powerpc/include/asm/nohash/32/pgalloc.h | 2 -- arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 -- arch/powerpc/include/asm/pgalloc.h | 5 +++++ 5 files changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 1b9b5c228230..998317702630 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -37,8 +37,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, *pmdp = __pmd(__pa(pte_page) | _PMD_PRESENT); } -#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) - static inline void pgtable_free(void *table, unsigned index_size) { if (!index_size) { diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index df2dce6afe14..053a7940504e 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -163,11 +163,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, *pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS); } -static inline pgtable_t pmd_pgtable(pmd_t pmd) -{ - return (pgtable_t)pmd_page_vaddr(pmd); -} - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index 6c0f5151dc1d..137761b01588 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -43,6 +43,4 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT); } -#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) - #endif /* _ASM_POWERPC_PGALLOC_32_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index c636feced1ff..5a0ea63c77c7 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -59,8 +59,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pmd_set(pmd, (unsigned long)pte_page); } -#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) - static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index 5761bee0f004..2b2c60a1a66d 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -72,4 +72,9 @@ static inline void check_pgt_cache(void) { } #include #endif +static inline pgtable_t pmd_pgtable(pmd_t pmd) +{ + return (pgtable_t)pmd_page_vaddr(pmd); +} + #endif /* _ASM_POWERPC_PGALLOC_H */ -- cgit v1.2.3 From 069239169ab060da4236a59d35aec91084cc694d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 15:58:13 +0000 Subject: powerpc/mm: refactor pgd_alloc() and pgd_free() on nohash pgd_alloc() and pgd_free() are identical on nohash 32 and 64. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/32/pgalloc.h | 11 ----------- arch/powerpc/include/asm/nohash/64/pgalloc.h | 11 ----------- arch/powerpc/include/asm/nohash/pgalloc.h | 12 ++++++++++++ 3 files changed, 12 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index 137761b01588..11eac371e7e0 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -5,17 +5,6 @@ #include #include -static inline pgd_t *pgd_alloc(struct mm_struct *mm) -{ - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), - pgtable_gfp_flags(mm, GFP_KERNEL)); -} - -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); -} - /* * We don't have any real pmd's, and this code never triggers because * the pgd will always be present.. diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 5a0ea63c77c7..62321cd12da9 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -18,17 +18,6 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -static inline pgd_t *pgd_alloc(struct mm_struct *mm) -{ - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), - pgtable_gfp_flags(mm, GFP_KERNEL)); -} - -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); -} - #define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 4fccac6af3ad..332b13b4ecdb 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_NOHASH_PGALLOC_H #include +#include extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #ifdef CONFIG_PPC64 @@ -16,6 +17,17 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb, } #endif /* !CONFIG_PPC_BOOK3E */ +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), + pgtable_gfp_flags(mm, GFP_KERNEL)); +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); +} + #ifdef CONFIG_PPC64 #include #else -- cgit v1.2.3 From 26deb04342e343ac58ab05bc7d2345ff0be9b667 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 16:23:26 +0000 Subject: powerpc: prepare string/mem functions for KASAN CONFIG_KASAN implements wrappers for memcpy() memmove() and memset() Those wrappers are doing the verification then call respectively __memcpy() __memmove() and __memset(). The arches are therefore expected to rename their optimised functions that way. For files on which KASAN is inhibited, #defines are used to allow them to directly call optimised versions of the functions without going through the KASAN wrappers. See commit 393f203f5fd5 ("x86_64: kasan: add interceptors for memset/memmove/memcpy functions") for details. Other string / mem functions do not (yet) have kasan wrappers, we therefore have to fallback to the generic versions when KASAN is active, otherwise KASAN checks will be skipped. Signed-off-by: Christophe Leroy [mpe: Fixups to keep selftests working] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kasan.h | 15 +++++++++++++++ arch/powerpc/include/asm/string.h | 32 +++++++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/include/asm/kasan.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h new file mode 100644 index 000000000000..2c179a39d4ba --- /dev/null +++ b/arch/powerpc/include/asm/kasan.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifdef CONFIG_KASAN +#define _GLOBAL_KASAN(fn) _GLOBAL(__##fn) +#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn) +#define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn) +#else +#define _GLOBAL_KASAN(fn) _GLOBAL(fn) +#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(fn) +#define EXPORT_SYMBOL_KASAN(fn) +#endif + +#endif diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index 1647de15a31e..9bf6dffb4090 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -4,14 +4,17 @@ #ifdef __KERNEL__ +#ifndef CONFIG_KASAN #define __HAVE_ARCH_STRNCPY #define __HAVE_ARCH_STRNCMP +#define __HAVE_ARCH_MEMCHR +#define __HAVE_ARCH_MEMCMP +#define __HAVE_ARCH_MEMSET16 +#endif + #define __HAVE_ARCH_MEMSET #define __HAVE_ARCH_MEMCPY #define __HAVE_ARCH_MEMMOVE -#define __HAVE_ARCH_MEMCMP -#define __HAVE_ARCH_MEMCHR -#define __HAVE_ARCH_MEMSET16 #define __HAVE_ARCH_MEMCPY_FLUSHCACHE extern char * strcpy(char *,const char *); @@ -27,7 +30,27 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); extern void * memcpy_flushcache(void *,const void *,__kernel_size_t); +void *__memset(void *s, int c, __kernel_size_t count); +void *__memcpy(void *to, const void *from, __kernel_size_t n); +void *__memmove(void *to, const void *from, __kernel_size_t n); + +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif + #ifdef CONFIG_PPC64 +#ifndef CONFIG_KASAN #define __HAVE_ARCH_MEMSET32 #define __HAVE_ARCH_MEMSET64 @@ -49,8 +72,11 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) { return __memset64(p, v, n * 8); } +#endif #else +#ifndef CONFIG_KASAN #define __HAVE_ARCH_STRLEN +#endif extern void *memset16(uint16_t *, uint16_t, __kernel_size_t); #endif -- cgit v1.2.3 From a67beca077ef79e971443aa6af6b14d4b3fb3bd6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 16:23:31 +0000 Subject: powerpc/32: make KVIRT_TOP dependent on FIXMAP_START When we add KASAN shadow area, KVIRT_TOP can't be anymore fixed at 0xfe000000. This patch uses FIXADDR_START to define KVIRT_TOP. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgtable.h | 13 ++++++++++--- arch/powerpc/include/asm/nohash/32/pgtable.h | 13 ++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index aa8406b8f7ba..838de59f6754 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -134,15 +134,24 @@ static inline bool pte_user(pte_t pte) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) + +#ifndef __ASSEMBLY__ + +int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); + +#endif /* !__ASSEMBLY__ */ + /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary * value (for now) on others, from where we can start layout kernel * virtual space that goes below PKMAP and FIXMAP */ +#include + #ifdef CONFIG_HIGHMEM #define KVIRT_TOP PKMAP_BASE #else -#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ +#define KVIRT_TOP FIXADDR_START #endif /* @@ -373,8 +382,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); - /* Generic accessors to PTE bits */ static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} static inline int pte_read(pte_t pte) { return 1; } diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index bed433358260..0284f8f5305f 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -64,15 +64,24 @@ extern int icache_44x_need_flush; #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) +#ifndef __ASSEMBLY__ + +int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); + +#endif /* !__ASSEMBLY__ */ + + /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary * value (for now) on others, from where we can start layout kernel * virtual space that goes below PKMAP and FIXMAP */ +#include + #ifdef CONFIG_HIGHMEM #define KVIRT_TOP PKMAP_BASE #else -#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ +#define KVIRT_TOP FIXADDR_START #endif /* @@ -379,8 +388,6 @@ static inline int pte_young(pte_t pte) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */ -- cgit v1.2.3 From b4abe38fd698ace6942edeeb79a5b8a60a7af4fa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 16:23:32 +0000 Subject: powerpc/32: prepare shadow area for KASAN This patch prepares a shadow area for KASAN. The shadow area will be at the top of the kernel virtual memory space above the fixmap area and will occupy one eighth of the total kernel virtual memory space. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/fixmap.h | 5 +++++ arch/powerpc/include/asm/kasan.h | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index b9fbed84ddca..0cfc365d814b 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -22,7 +22,12 @@ #include #endif +#ifdef CONFIG_KASAN +#include +#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) +#else #define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) +#endif /* * Here we define all the compile-time 'special' virtual diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 2c179a39d4ba..05274dea3109 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -12,4 +12,20 @@ #define EXPORT_SYMBOL_KASAN(fn) #endif +#ifndef __ASSEMBLY__ + +#include + +#define KASAN_SHADOW_SCALE_SHIFT 3 + +#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ + (PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT)) + +#define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET) + +#define KASAN_SHADOW_END 0UL + +#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START) + +#endif /* __ASSEMBLY */ #endif -- cgit v1.2.3 From 2edb16efc899f9c232e2d880930b855e4cf55df4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 26 Apr 2019 16:23:34 +0000 Subject: powerpc/32: Add KASAN support This patch adds KASAN support for PPC32. The following patch will add an early activation of hash table for book3s. Until then, a warning will be raised if trying to use KASAN on an hash 6xx. To support KASAN, this patch initialises that MMU mapings for accessing to the KASAN shadow area defined in a previous patch. An early mapping is set as soon as the kernel code has been relocated at its definitive place. Then the definitive mapping is set once paging is initialised. For modules, the shadow area is allocated at module_alloc(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kasan.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 05274dea3109..296e51c2f066 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -27,5 +27,14 @@ #define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START) +#ifdef CONFIG_KASAN +void kasan_early_init(void); +void kasan_mmu_init(void); +void kasan_init(void); +#else +static inline void kasan_init(void) { } +static inline void kasan_mmu_init(void) { } +#endif + #endif /* __ASSEMBLY */ #endif -- cgit v1.2.3 From 9c1d38b34e944cace44e0d2bea0beb5601a4d36d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 22 Mar 2019 08:08:39 +0000 Subject: powerpc/fadump: define an empty fadump_cleanup() To avoid #ifdefs, define an static inline fadump_cleanup() function when CONFIG_FADUMP is not selected Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/fadump.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 188776befaf9..e2099c0a15c3 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -219,5 +219,6 @@ extern void fadump_cleanup(void); static inline int is_fadump_active(void) { return 0; } static inline int should_fadump_crash(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } +static inline void fadump_cleanup(void) { } #endif #endif -- cgit v1.2.3 From 93f2cd813797baf5590459fb0439c62e873b7748 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 22 Mar 2019 08:08:40 +0000 Subject: powerpc/mm: define an empty mm_iommu_init() To avoid ifdefs, define a empty static inline mm_iommu_init() function when CONFIG_SPAPR_TCE_IOMMU is not selected. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 66a3805dc935..611204e588b9 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -52,6 +52,7 @@ static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, { return false; } +static inline void mm_iommu_init(struct mm_struct *mm) { } #endif extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); extern void set_context(unsigned long id, pgd_t *pgd); -- cgit v1.2.3 From 453d87f6a8aed827f5ebb1708a4cea458fd68d23 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Thu, 2 May 2019 17:39:47 +1000 Subject: powerpc/mm: Warn if W+X pages found on boot Implement code to walk all pages and warn if any are found to be both writable and executable. Depends on STRICT_KERNEL_RWX enabled, and is behind the DEBUG_WX config option. This only runs on boot and has no runtime performance implications. Very heavily influenced (and in some cases copied verbatim) from the ARM64 code written by Laura Abbott (thanks!), since our ptdump infrastructure is similar. Signed-off-by: Russell Currey [mpe: Fixup build error when disabled] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index c51846da41a7..3f53be60fb01 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -105,6 +105,12 @@ void mark_initmem_nx(void); static inline void mark_initmem_nx(void) { } #endif +#ifdef CONFIG_PPC_DEBUG_WX +void ptdump_check_wx(void); +#else +static inline void ptdump_check_wx(void) { } +#endif + /* * When used, PTE_FRAG_NR is defined in subarch pgtable.h * so we are sure it is included when arriving here. -- cgit v1.2.3 From de269129a48a2d590ba1d20c719e19d86e3ddb3f Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 5 Mar 2019 01:12:19 +0530 Subject: powerpc/hmi: Fix kernel hang when TB is in error state. On TOD/TB errors timebase register stops/freezes until HMI error recovery gets TOD/TB back into running state. On successful recovery, TB starts running again and udelay() that relies on TB value continues to function properly. But in case when HMI fails to recover from TOD/TB errors, the TB register stay freezed. With TB not running the __delay() function keeps looping and never return. If __delay() is called while in panic path then system hangs and never reboots after panic. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 10 ++++++++++ arch/powerpc/include/asm/opal.h | 2 ++ arch/powerpc/include/asm/time.h | 2 ++ 3 files changed, 14 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index e1d118ac61dc..234fde15b37c 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -209,6 +209,7 @@ #define OPAL_SENSOR_GROUP_ENABLE 163 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 +#define OPAL_HANDLE_HMI2 166 #define OPAL_NX_COPROC_INIT 167 #define OPAL_XIVE_GET_VP_STATE 170 #define OPAL_LAST 170 @@ -635,6 +636,15 @@ struct OpalHMIEvent { } u; }; +/* OPAL_HANDLE_HMI2 out_flags */ +enum { + OPAL_HMI_FLAGS_TB_RESYNC = (1ull << 0), /* Timebase has been resynced */ + OPAL_HMI_FLAGS_DEC_LOST = (1ull << 1), /* DEC lost, needs to be reprogrammed */ + OPAL_HMI_FLAGS_HDEC_LOST = (1ull << 2), /* HDEC lost, needs to be reprogrammed */ + OPAL_HMI_FLAGS_TOD_TB_FAIL = (1ull << 3), /* TOD/TB recovery failed. */ + OPAL_HMI_FLAGS_NEW_EVENT = (1ull << 63), /* An event has been created */ +}; + enum { OPAL_P7IOC_DIAG_TYPE_NONE = 0, OPAL_P7IOC_DIAG_TYPE_RGC = 1, diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 4e978d4dea5c..4cc37e708bc7 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -203,6 +203,7 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer, int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data); int64_t opal_handle_hmi(void); +int64_t opal_handle_hmi2(__be64 *out_flags); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); @@ -359,6 +360,7 @@ int opal_power_control_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); extern int opal_hmi_exception_early(struct pt_regs *regs); +extern int opal_hmi_exception_early2(struct pt_regs *regs); extern int opal_handle_hmi_exception(struct pt_regs *regs); extern void opal_shutdown(void); diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 54bf7e68a7e1..57e968413d1e 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -36,6 +36,8 @@ extern unsigned long ppc_proc_freq; extern unsigned long ppc_tb_freq; #define DEFAULT_TB_FREQ 125000000UL +extern bool tb_invalid; + struct div_result { u64 result_high; u64 result_low; -- cgit v1.2.3 From d1720adff3783a2ba7c128e304a385d18962835b Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Tue, 16 Apr 2019 15:18:27 +0530 Subject: powerpc/include: Add data structures and macros for IMC trace mode Add the macros needed for IMC (In-Memory Collection Counters) trace-mode and data structure to hold the trace-imc record data. Also, add the new type "OPAL_IMC_COUNTERS_TRACE" in 'opal-api.h', since there is a new switch case added in the opal-calls for IMC. Signed-off-by: Anju T Sudhakar Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/imc-pmu.h | 39 +++++++++++++++++++++++++++++++++++++ arch/powerpc/include/asm/opal-api.h | 1 + 2 files changed, 40 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 69f516ecb2fd..7c2ef0e42661 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -33,6 +33,7 @@ */ #define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL #define THREAD_IMC_ENABLE 0x8000000000000000ULL +#define TRACE_IMC_ENABLE 0x4000000000000000ULL /* * For debugfs interface for imc-mode and imc-command @@ -59,6 +60,34 @@ struct imc_events { char *scale; }; +/* + * Trace IMC hardware updates a 64bytes record on + * Core Performance Monitoring Counter (CPMC) + * overflow. Here is the layout for the trace imc record + * + * DW 0 : Timebase + * DW 1 : Program Counter + * DW 2 : PIDR information + * DW 3 : CPMC1 + * DW 4 : CPMC2 + * DW 5 : CPMC3 + * Dw 6 : CPMC4 + * DW 7 : Timebase + * ..... + * + * The following is the data structure to hold trace imc data. + */ +struct trace_imc_data { + u64 tb1; + u64 ip; + u64 val; + u64 cpmc1; + u64 cpmc2; + u64 cpmc3; + u64 cpmc4; + u64 tb2; +}; + /* Event attribute array index */ #define IMC_FORMAT_ATTR 0 #define IMC_EVENT_ATTR 1 @@ -68,6 +97,13 @@ struct imc_events { /* PMU Format attribute macros */ #define IMC_EVENT_OFFSET_MASK 0xffffffffULL +/* + * Macro to mask bits 0:21 of first double word(which is the timebase) to + * compare with 8th double word (timebase) of trace imc record data. + */ +#define IMC_TRACE_RECORD_TB1_MASK 0x3ffffffffffULL + + /* * Device tree parser code detects IMC pmu support and * registers new IMC pmus. This structure will hold the @@ -113,6 +149,7 @@ struct imc_pmu_ref { enum { IMC_TYPE_THREAD = 0x1, + IMC_TYPE_TRACE = 0x2, IMC_TYPE_CORE = 0x4, IMC_TYPE_CHIP = 0x10, }; @@ -123,6 +160,8 @@ enum { #define IMC_DOMAIN_NEST 1 #define IMC_DOMAIN_CORE 2 #define IMC_DOMAIN_THREAD 3 +/* For trace-imc the domain is still thread but it operates in trace-mode */ +#define IMC_DOMAIN_TRACE 4 extern int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id); diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 234fde15b37c..e1577cfa7186 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1129,6 +1129,7 @@ enum { enum { OPAL_IMC_COUNTERS_NEST = 1, OPAL_IMC_COUNTERS_CORE = 2, + OPAL_IMC_COUNTERS_TRACE = 3, }; -- cgit v1.2.3 From 5266e58d6cd90ac85c187d673093ad9cb649e16d Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Mon, 15 Apr 2019 14:52:11 +0300 Subject: powerpc/booke64: set RI in default MSR Set RI in the default kernel's MSR so that the architected way of detecting unrecoverable machine check interrupts has a chance to work. This is inline with the MSR setup of the rest of booke powerpc architectures configured here. Signed-off-by: Laurentiu Tudor Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg_booke.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index eb2a33d5df26..e382bd6ede84 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -41,7 +41,7 @@ #if defined(CONFIG_PPC_BOOK3E_64) #define MSR_64BIT MSR_CM -#define MSR_ (MSR_ME | MSR_CE) +#define MSR_ (MSR_ME | MSR_RI | MSR_CE) #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) #define MSR_USER64 (MSR_USER32 | MSR_64BIT) -- cgit v1.2.3 From f39356261c265a0689d7ee568132d516e8b6cecc Mon Sep 17 00:00:00 2001 From: Rick Lindsley Date: Sun, 5 May 2019 17:20:43 -0700 Subject: powerpc/book3s/64: check for NULL pointer in pgd_alloc() When the memset code was added to pgd_alloc(), it failed to consider that kmem_cache_alloc() can return NULL. It's uncommon, but not impossible under heavy memory contention. Example oops: Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc0000000000a4000 Oops: Kernel access of bad area, sig: 11 [#1] LE SMP NR_CPUS=2048 NUMA pSeries CPU: 70 PID: 48471 Comm: entrypoint.sh Kdump: loaded Not tainted 4.14.0-115.6.1.el7a.ppc64le #1 task: c000000334a00000 task.stack: c000000331c00000 NIP: c0000000000a4000 LR: c00000000012f43c CTR: 0000000000000020 REGS: c000000331c039c0 TRAP: 0300 Not tainted (4.14.0-115.6.1.el7a.ppc64le) MSR: 800000010280b033 CR: 44022840 XER: 20040000 CFAR: c000000000008874 DAR: 0000000000000000 DSISR: 42000000 SOFTE: 1 ... NIP [c0000000000a4000] memset+0x68/0x104 LR [c00000000012f43c] mm_init+0x27c/0x2f0 Call Trace: mm_init+0x260/0x2f0 (unreliable) copy_mm+0x11c/0x638 copy_process.isra.28.part.29+0x6fc/0x1080 _do_fork+0xdc/0x4c0 ppc_clone+0x8/0xc Instruction dump: 409e000c b0860000 38c60002 409d000c 90860000 38c60004 78a0d183 78a506a0 7c0903a6 41820034 60000000 60420000 f8860008 f8860010 f8860018 Fixes: fc5c2f4a55a2 ("powerpc/mm/hash64: Zero PGD pages on allocation") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Rick Lindsley Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 053a7940504e..d45e4449619f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -59,6 +59,9 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), pgtable_gfp_flags(mm, GFP_KERNEL)); + if (unlikely(!pgd)) + return pgd; + /* * Don't scan the PGD for pointers, it contains references to PUDs but * those references are not full pointers and so can't be recognised by -- cgit v1.2.3 From 8150a153c013aa2dd1ffae43370b89ac1347a7fb Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 8 May 2019 13:06:42 +1000 Subject: powerpc/64s: Use early_mmu_has_feature() in set_kuap() When implementing the KUAP support on Radix we fixed one case where mmu_has_feature() was being called too early in boot via __put_user_size(). However since then some new code in linux-next has created a new path via which we can end up calling mmu_has_feature() too early. On P9 this leads to crashes early in boot if we have both PPC_KUAP and CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG enabled. Our early boot code calls printk() which calls probe_kernel_read(), that does a __copy_from_user_inatomic() which calls into set_kuap() and that uses mmu_has_feature(). At that point in boot we haven't patched MMU features yet so the debug code in mmu_has_feature() complains, and calls printk(). At that point we recurse, eg: ... dump_stack+0xdc probe_kernel_read+0x1a4 check_pointer+0x58 ... printk+0x40 dump_stack_print_info+0xbc dump_stack+0x8 probe_kernel_read+0x1a4 probe_kernel_read+0x19c check_pointer+0x58 ... printk+0x40 cpufeatures_process_feature+0xc8 scan_cpufeatures_subnodes+0x380 of_scan_flat_dt_subnodes+0xb4 dt_cpu_ftrs_scan_callback+0x158 of_scan_flat_dt+0xf0 dt_cpu_ftrs_scan+0x3c early_init_devtree+0x360 early_setup+0x9c And so on for infinity, symptom is a dead system. Even more fun is what happens when using the hash MMU (ie. p8 or p9 with Radix disabled), and when we don't have CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG enabled. With the debug disabled we don't check if static keys have been initialised, we just rely on the jump label. But the jump label defaults to true so we just whack the AMR even though Radix is not enabled. Clearing the AMR is fine, but after we've done the user copy we write (0b11 << 62) into AMR. When using hash that makes all pages with key zero no longer readable or writable. All kernel pages implicitly have key zero, and so all of a sudden the kernel can't read or write any of its memory. Again dead system. In the medium term we have several options for fixing this. probe_kernel_read() doesn't need to touch AMR at all, it's not doing a user access after all, but it uses __copy_from_user_inatomic() just because it's easy, we could fix that. It would also be safe to default to not writing to the AMR during early boot, until we've detected features. But it's not clear that flipping all the MMU features to static_key_false won't introduce other bugs. But for now just switch to early_mmu_has_feature() in set_kuap(), that avoids all the problems with jump labels. It adds the overhead of a global lookup and test, but that's probably trivial compared to the writes to the AMR anyway. Fixes: 890274c2dc4c ("powerpc/64s: Implement KUAP for Radix MMU") Signed-off-by: Michael Ellerman Reviewed-by: Russell Currey --- arch/powerpc/include/asm/book3s/64/kup-radix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 7679bd0c5af0..f254de956d6a 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -65,7 +65,7 @@ static inline void set_kuap(unsigned long value) { - if (!mmu_has_feature(MMU_FTR_RADIX_KUAP)) + if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) return; /* -- cgit v1.2.3