diff options
Diffstat (limited to 'arch/powerpc')
43 files changed, 333 insertions, 132 deletions
| diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ef6549e57157..26d5d2a5b8e9 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -101,7 +101,8 @@ $(addprefix $(obj)/,$(zlib-y)): \  libfdt       := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c  libfdtheader := fdt.h libfdt.h libfdt_internal.h -$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \ +$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \ +	treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \  	$(addprefix $(obj)/,$(libfdtheader))  src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 30a155c0a6b0..c615abdce119 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -16,6 +16,7 @@  #define PGD_INDEX_SIZE	(32 - PGDIR_SHIFT)  #define PMD_CACHE_INDEX	PMD_INDEX_SIZE +#define PUD_CACHE_INDEX	PUD_INDEX_SIZE  #ifndef __ASSEMBLY__  #define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 949d691094a4..67c5475311ee 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)   * keeping the prototype consistent across the two formats.   */  static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, -			unsigned int subpg_index, unsigned long hidx) +					 unsigned int subpg_index, unsigned long hidx, +					 int offset)  {  	return (hidx << H_PAGE_F_GIX_SHIFT) &  		(H_PAGE_F_SECOND | H_PAGE_F_GIX); diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 338b7da468ce..3bcf269f8f55 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -45,7 +45,7 @@   * generic accessors and iterators here   */  #define __real_pte __real_pte -static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)  {  	real_pte_t rpte;  	unsigned long *hidxp; @@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)  	 */  	smp_rmb(); -	hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); +	hidxp = (unsigned long *)(ptep + offset);  	rpte.hidx = *hidxp;  	return rpte;  } @@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)   * expected to modify the PTE bits accordingly and commit the PTE to memory.   */  static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, -		unsigned int subpg_index, unsigned long hidx) +					 unsigned int subpg_index, +					 unsigned long hidx, int offset)  { -	unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); +	unsigned long *hidxp = (unsigned long *)(ptep + offset);  	rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);  	*hidxp = rpte.hidx  | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index); @@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a  }  #define H_PTE_TABLE_SIZE	PTE_FRAG_SIZE -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)  #define H_PMD_TABLE_SIZE	((sizeof(pmd_t) << PMD_INDEX_SIZE) + \  				 (sizeof(unsigned long) << PMD_INDEX_SIZE))  #else  #define H_PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)  #endif +#ifdef CONFIG_HUGETLB_PAGE +#define H_PUD_TABLE_SIZE	((sizeof(pud_t) << PUD_INDEX_SIZE) +	\ +				 (sizeof(unsigned long) << PUD_INDEX_SIZE)) +#else  #define H_PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE) +#endif  #define H_PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)  #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 0920eff731b3..935adcd92a81 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -23,7 +23,8 @@  				 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)  #define H_PGTABLE_RANGE		(ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&  defined(CONFIG_PPC_64K_PAGES) +#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \ +	defined(CONFIG_PPC_64K_PAGES)  /*   * only with hash 64k we need to use the second half of pmd page table   * to store pointer to deposited pgtable_t @@ -33,6 +34,16 @@  #define H_PMD_CACHE_INDEX	H_PMD_INDEX_SIZE  #endif  /* + * We store the slot details in the second half of page table. + * Increase the pud level table so that hugetlb ptes can be stored + * at pud level. + */ +#if defined(CONFIG_HUGETLB_PAGE) &&  defined(CONFIG_PPC_64K_PAGES) +#define H_PUD_CACHE_INDEX	(H_PUD_INDEX_SIZE + 1) +#else +#define H_PUD_CACHE_INDEX	(H_PUD_INDEX_SIZE) +#endif +/*   * Define the address range of the kernel non-linear virtual area   */  #define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 1fcfa425cefa..4746bc68d446 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)  static inline pgd_t *pgd_alloc(struct mm_struct *mm)  { +	pgd_t *pgd; +  	if (radix_enabled())  		return radix__pgd_alloc(mm); -	return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), -		pgtable_gfp_flags(mm, GFP_KERNEL)); + +	pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), +			       pgtable_gfp_flags(mm, GFP_KERNEL)); +	memset(pgd, 0, PGD_TABLE_SIZE); + +	return pgd;  }  static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)  { -	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), +	return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),  		pgtable_gfp_flags(mm, GFP_KERNEL));  }  static inline void pud_free(struct mm_struct *mm, pud_t *pud)  { -	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); +	kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);  }  static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) @@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,  	 * ahead and flush the page walk cache  	 */  	flush_tlb_pgtable(tlb, address); -        pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); +	pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);  }  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 51017726d495..a6b9f1d74600 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;  extern unsigned long __pud_index_size;  extern unsigned long __pgd_index_size;  extern unsigned long __pmd_cache_index; +extern unsigned long __pud_cache_index;  #define PTE_INDEX_SIZE  __pte_index_size  #define PMD_INDEX_SIZE  __pmd_index_size  #define PUD_INDEX_SIZE  __pud_index_size  #define PGD_INDEX_SIZE  __pgd_index_size  #define PMD_CACHE_INDEX __pmd_cache_index +#define PUD_CACHE_INDEX __pud_cache_index  /*   * Because of use of pte fragments and THP, size of page table   * are not always derived out of index size above. @@ -348,7 +350,7 @@ extern unsigned long pci_io_base;   */  #ifndef __real_pte -#define __real_pte(e,p)		((real_pte_t){(e)}) +#define __real_pte(e, p, o)		((real_pte_t){(e)})  #define __rpte_to_pte(r)	((r).pte)  #define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 176dfb73d42c..471b2274fbeb 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)  					  EXC_HV, SOFTEN_TEST_HV, bitmask)  #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask)		\ -	MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\ +	MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\  	EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)  /* diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 511acfd7ab0d..535add3f7791 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,7 +52,7 @@  #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)  #define FW_FEATURE_PRRN		ASM_CONST(0x0000000200000000)  #define FW_FEATURE_DRMEM_V2	ASM_CONST(0x0000000400000000) -#define FW_FEATURE_DRC_INFO	ASM_CONST(0x0000000400000000) +#define FW_FEATURE_DRC_INFO	ASM_CONST(0x0000000800000000)  #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 88e5e8f17e98..855e17d158b1 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -30,6 +30,16 @@  #define PACA_IRQ_PMI		0x40  /* + * Some soft-masked interrupts must be hard masked until they are replayed + * (e.g., because the soft-masked handler does not clear the exception). + */ +#ifdef CONFIG_PPC_BOOK3S +#define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE|PACA_IRQ_PMI) +#else +#define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE) +#endif + +/*   * flags for paca->irq_soft_mask   */  #define IRQS_ENABLED		0 @@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)  static inline void may_hard_irq_enable(void)  {  	get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; -	if (!(get_paca()->irq_happened & PACA_IRQ_EE)) +	if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))  		__hard_irq_enable();  } diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 9dcbfa6bbb91..d8b1e8e7e035 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)  	return false;  } +static inline void crash_ipi_callback(struct pt_regs *regs) { } + +static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) +{ +} +  #endif /* CONFIG_KEXEC_CORE */  #endif /* ! __ASSEMBLY__ */  #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 504a3c36ce5c..03bbd1149530 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -24,6 +24,7 @@ extern int icache_44x_need_flush;  #define PGD_INDEX_SIZE	(32 - PGDIR_SHIFT)  #define PMD_CACHE_INDEX	PMD_INDEX_SIZE +#define PUD_CACHE_INDEX	PUD_INDEX_SIZE  #ifndef __ASSEMBLY__  #define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index abddf5830ad5..5c5f75d005ad 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -27,6 +27,7 @@  #else  #define PMD_CACHE_INDEX	PMD_INDEX_SIZE  #endif +#define PUD_CACHE_INDEX PUD_INDEX_SIZE  /*   * Define the address range of the kernel non-linear virtual area diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 88187c285c70..9f421641a35c 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);  extern void sysfs_remove_device_from_node(struct device *dev, int nid);  extern int numa_update_cpu_topology(bool cpus_locked); +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) +{ +	numa_cpu_lookup_table[cpu] = node; +} +  static inline int early_cpu_to_node(int cpu)  {  	int nid; @@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked)  {  	return 0;  } + +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} +  #endif /* CONFIG_NUMA */  #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)  extern int start_topology_update(void);  extern int stop_topology_update(void);  extern int prrn_is_enabled(void); +extern int find_and_online_cpu_nid(int cpu);  #else  static inline int start_topology_update(void)  { @@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void)  {  	return 0;  } +static inline int find_and_online_cpu_nid(int cpu) +{ +	return 0; +}  #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */  #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index beea2182d754..0c0b66fc5bfb 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -384,7 +384,8 @@ static void *eeh_report_resume(void *data, void *userdata)  	eeh_pcid_put(dev);  	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);  #ifdef CONFIG_PCI_IOV -	eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); +	if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) +		eeh_ops->notify_resume(eeh_dev_to_pdn(edev));  #endif  	return NULL;  } diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index ee832d344a5a..9b6e653e501a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -943,6 +943,8 @@ kernel_dbg_exc:  /*   * An interrupt came in while soft-disabled; We mark paca->irq_happened   * accordingly and if the interrupt is level sensitive, we hard disable + * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so + * keep these in synch.   */  .macro masked_interrupt_book3e paca_irq full_mask diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 243d072a225a..3ac87e53b3da 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)   *   triggered and won't automatically refire.   * - If it was a HMI we return immediately since we handled it in realmode   *   and it won't refire. - * - else we hard disable and return. + * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.   * This is called with r10 containing the value to OR to the paca field.   */  #define MASKED_INTERRUPT(_H)				\ @@ -1441,8 +1441,8 @@ masked_##_H##interrupt:					\  	ori	r10,r10,0xffff;				\  	mtspr	SPRN_DEC,r10;				\  	b	MASKED_DEC_HANDLER_LABEL;		\ -1:	andi.	r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI);	\ -	bne	2f;					\ +1:	andi.	r10,r10,PACA_IRQ_MUST_HARD_MASK;	\ +	beq	2f;					\  	mfspr	r10,SPRN_##_H##SRR1;			\  	xori	r10,r10,MSR_EE; /* clear MSR_EE */	\  	mtspr	SPRN_##_H##SRR1,r10;			\ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index adf044daafd7..acf4b2e0530c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -874,7 +874,6 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {  		.mmu = 0,  		.hash_ext = 0,  		.radix_ext = 0, -		.byte22 = OV5_FEAT(OV5_DRC_INFO),  	},  	/* option vector 6: IBM PAPR hints */ diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 5a8bfee6e187..04d0bbd7a1dd 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu)  	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))  		device_create_file(s, &dev_attr_pir); -	if (cpu_has_feature(CPU_FTR_ARCH_206)) +	if (cpu_has_feature(CPU_FTR_ARCH_206) && +		!firmware_has_feature(FW_FEATURE_LPAR))  		device_create_file(s, &dev_attr_tscr);  #endif /* CONFIG_PPC64 */ @@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu)  	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))  		device_remove_file(s, &dev_attr_pir); -	if (cpu_has_feature(CPU_FTR_ARCH_206)) +	if (cpu_has_feature(CPU_FTR_ARCH_206) && +		!firmware_has_feature(FW_FEATURE_LPAR))  		device_remove_file(s, &dev_attr_tscr);  #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 0c854816e653..5cb4e4687107 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -195,6 +195,12 @@ static void kvmppc_pte_free(pte_t *ptep)  	kmem_cache_free(kvm_pte_cache, ptep);  } +/* Like pmd_huge() and pmd_large(), but works regardless of config options */ +static inline int pmd_is_leaf(pmd_t pmd) +{ +	return !!(pmd_val(pmd) & _PAGE_PTE); +} +  static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,  			     unsigned int level, unsigned long mmu_seq)  { @@ -219,7 +225,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,  	else  		new_pmd = pmd_alloc_one(kvm->mm, gpa); -	if (level == 0 && !(pmd && pmd_present(*pmd))) +	if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))  		new_ptep = kvmppc_pte_alloc();  	/* Check if we might have been invalidated; let the guest retry if so */ @@ -244,12 +250,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,  		new_pmd = NULL;  	}  	pmd = pmd_offset(pud, gpa); -	if (pmd_large(*pmd)) { -		/* Someone else has instantiated a large page here; retry */ -		ret = -EAGAIN; -		goto out_unlock; -	} -	if (level == 1 && !pmd_none(*pmd)) { +	if (pmd_is_leaf(*pmd)) { +		unsigned long lgpa = gpa & PMD_MASK; + +		/* +		 * If we raced with another CPU which has just put +		 * a 2MB pte in after we saw a pte page, try again. +		 */ +		if (level == 0 && !new_ptep) { +			ret = -EAGAIN; +			goto out_unlock; +		} +		/* Valid 2MB page here already, remove it */ +		old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), +					      ~0UL, 0, lgpa, PMD_SHIFT); +		kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT); +		if (old & _PAGE_DIRTY) { +			unsigned long gfn = lgpa >> PAGE_SHIFT; +			struct kvm_memory_slot *memslot; +			memslot = gfn_to_memslot(kvm, gfn); +			if (memslot && memslot->dirty_bitmap) +				kvmppc_update_dirty_map(memslot, +							gfn, PMD_SIZE); +		} +	} else if (level == 1 && !pmd_none(*pmd)) {  		/*  		 * There's a page table page here, but we wanted  		 * to install a large page.  Tell the caller and let @@ -412,28 +436,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,  	} else {  		page = pages[0];  		pfn = page_to_pfn(page); -		if (PageHuge(page)) { -			page = compound_head(page); -			pte_size <<= compound_order(page); +		if (PageCompound(page)) { +			pte_size <<= compound_order(compound_head(page));  			/* See if we can insert a 2MB large-page PTE here */  			if (pte_size >= PMD_SIZE && -			    (gpa & PMD_MASK & PAGE_MASK) == -			    (hva & PMD_MASK & PAGE_MASK)) { +			    (gpa & (PMD_SIZE - PAGE_SIZE)) == +			    (hva & (PMD_SIZE - PAGE_SIZE))) {  				level = 1;  				pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);  			}  		}  		/* See if we can provide write access */  		if (writing) { -			/* -			 * We assume gup_fast has set dirty on the host PTE. -			 */  			pgflags |= _PAGE_WRITE;  		} else {  			local_irq_save(flags);  			ptep = find_current_mm_pte(current->mm->pgd,  						   hva, NULL, NULL); -			if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) +			if (ptep && pte_write(*ptep))  				pgflags |= _PAGE_WRITE;  			local_irq_restore(flags);  		} @@ -459,18 +479,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,  		pte = pfn_pte(pfn, __pgprot(pgflags));  		ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);  	} -	if (ret == 0 || ret == -EAGAIN) -		ret = RESUME_GUEST;  	if (page) { -		/* -		 * We drop pages[0] here, not page because page might -		 * have been set to the head page of a compound, but -		 * we have to drop the reference on the correct tail -		 * page to match the get inside gup() -		 */ -		put_page(pages[0]); +		if (!ret && (pgflags & _PAGE_WRITE)) +			set_page_dirty_lock(page); +		put_page(page);  	} + +	if (ret == 0 || ret == -EAGAIN) +		ret = RESUME_GUEST;  	return ret;  } @@ -644,7 +661,7 @@ void kvmppc_free_radix(struct kvm *kvm)  				continue;  			pmd = pmd_offset(pud, 0);  			for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { -				if (pmd_huge(*pmd)) { +				if (pmd_is_leaf(*pmd)) {  					pmd_clear(pmd);  					continue;  				} diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 89707354c2ef..9cb9448163c4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)  	 */  	trace_hardirqs_on(); -	guest_enter(); +	guest_enter_irqoff();  	srcu_idx = srcu_read_lock(&vc->kvm->srcu); @@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)  	srcu_read_unlock(&vc->kvm->srcu, srcu_idx); -	guest_exit(); -  	trace_hardirqs_off();  	set_irq_happened(trap); @@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)  	kvmppc_set_host_core(pcpu);  	local_irq_enable(); +	guest_exit();  	/* Let secondaries go back to the offline loop */  	for (i = 0; i < controlled_threads; ++i) { @@ -3656,15 +3655,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)  		goto up_out;  	psize = vma_kernel_pagesize(vma); -	porder = __ilog2(psize);  	up_read(¤t->mm->mmap_sem);  	/* We can handle 4k, 64k or 16M pages in the VRMA */ -	err = -EINVAL; -	if (!(psize == 0x1000 || psize == 0x10000 || -	      psize == 0x1000000)) -		goto out_srcu; +	if (psize >= 0x1000000) +		psize = 0x1000000; +	else if (psize >= 0x10000) +		psize = 0x10000; +	else +		psize = 0x1000; +	porder = __ilog2(psize);  	senc = slb_pgsize_encoding(psize);  	kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f0f5cd4d2fe7..f9818d7d3381 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -188,7 +188,7 @@ static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)  	if (!qpage) {  		pr_err("Failed to allocate queue %d for VCPU %d\n",  		       prio, xc->server_num); -		return -ENOMEM;; +		return -ENOMEM;  	}  	memset(qpage, 0, 1 << xive->q_order); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 403e642c78f5..52c205373986 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,  int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,  		unsigned int rt, int is_default_endian)  { -	enum emulation_result emulated; +	enum emulation_result emulated = EMULATE_DONE;  	while (vcpu->arch.mmio_vmx_copy_nums) {  		emulated = __kvmppc_handle_load(run, vcpu, rt, 8, @@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)  	kvm_sigset_deactivate(vcpu); +#ifdef CONFIG_ALTIVEC  out: +#endif  	vcpu_put(vcpu);  	return r;  } diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 1604110c4238..3f1803672c9b 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -98,7 +98,7 @@ static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,  	dr_cell->base_addr = cpu_to_be64(lmb->base_addr);  	dr_cell->drc_index = cpu_to_be32(lmb->drc_index);  	dr_cell->aa_index = cpu_to_be32(lmb->aa_index); -	dr_cell->flags = cpu_to_be32(lmb->flags); +	dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));  }  static int drmem_update_dt_v2(struct device_node *memory, @@ -121,7 +121,7 @@ static int drmem_update_dt_v2(struct device_node *memory,  		}  		if (prev_lmb->aa_index != lmb->aa_index || -		    prev_lmb->flags != lmb->flags) +		    drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb))  			lmb_sets++;  		prev_lmb = lmb; @@ -150,7 +150,7 @@ static int drmem_update_dt_v2(struct device_node *memory,  		}  		if (prev_lmb->aa_index != lmb->aa_index || -		    prev_lmb->flags != lmb->flags) { +		    drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) {  			/* end of one set, start of another */  			dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);  			dr_cell++; @@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,  	u32 i, n_lmbs;  	n_lmbs = of_read_number(prop++, 1); +	if (n_lmbs == 0) +		return;  	for (i = 0; i < n_lmbs; i++) {  		read_drconf_v1_cell(&lmb, &prop); @@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,  	u32 i, j, lmb_sets;  	lmb_sets = of_read_number(prop++, 1); +	if (lmb_sets == 0) +		return;  	for (i = 0; i < lmb_sets; i++) {  		read_drconf_v2_cell(&dr_cell, &prop); @@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)  	struct drmem_lmb *lmb;  	drmem_info->n_lmbs = of_read_number(prop++, 1); +	if (drmem_info->n_lmbs == 0) +		return;  	drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),  				   GFP_KERNEL); @@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)  	int lmb_index;  	lmb_sets = of_read_number(prop++, 1); +	if (lmb_sets == 0) +		return;  	/* first pass, calculate the number of LMBs */  	p = prop; diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 5a69b51d08a3..d573d7d07f25 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,  	 * need to add in 0x1 if it's a read-only user page  	 */  	rflags = htab_convert_pte_flags(new_pte); -	rpte = __real_pte(__pte(old_pte), ptep); +	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);  	if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&  	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -117,7 +117,7 @@ repeat:  			return -1;  		}  		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; -		new_pte |= pte_set_hidx(ptep, rpte, 0, slot); +		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);  	}  	*ptep = __pte(new_pte & ~H_PAGE_BUSY);  	return 0; diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 2253bbc6a599..e601d95c3b20 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,  	subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;  	vpn  = hpt_vpn(ea, vsid, ssize); -	rpte = __real_pte(__pte(old_pte), ptep); +	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);  	/*  	 *None of the sub 4k page is hashed  	 */ @@ -214,7 +214,7 @@ repeat:  		return -1;  	} -	new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot); +	new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);  	new_pte |= H_PAGE_HASHPTE;  	*ptep = __pte(new_pte & ~H_PAGE_BUSY); @@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,  	} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));  	rflags = htab_convert_pte_flags(new_pte); -	rpte = __real_pte(__pte(old_pte), ptep); +	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);  	if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&  	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -327,7 +327,7 @@ repeat:  		}  		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; -		new_pte |= pte_set_hidx(ptep, rpte, 0, slot); +		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);  	}  	*ptep = __pte(new_pte & ~H_PAGE_BUSY);  	return 0; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7d07c7e17db6..cf290d415dcd 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void)  	__pmd_index_size = H_PMD_INDEX_SIZE;  	__pud_index_size = H_PUD_INDEX_SIZE;  	__pgd_index_size = H_PGD_INDEX_SIZE; +	__pud_cache_index = H_PUD_CACHE_INDEX;  	__pmd_cache_index = H_PMD_CACHE_INDEX;  	__pte_table_size = H_PTE_TABLE_SIZE;  	__pmd_table_size = H_PMD_TABLE_SIZE; diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 12511f5a015f..b320f5097a06 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,  	unsigned long vpn;  	unsigned long old_pte, new_pte;  	unsigned long rflags, pa, sz; -	long slot; +	long slot, offset;  	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); @@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,  	} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));  	rflags = htab_convert_pte_flags(new_pte); -	rpte = __real_pte(__pte(old_pte), ptep); +	if (unlikely(mmu_psize == MMU_PAGE_16G)) +		offset = PTRS_PER_PUD; +	else +		offset = PTRS_PER_PMD; +	rpte = __real_pte(__pte(old_pte), ptep, offset);  	sz = ((1UL) << shift);  	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,  			return -1;  		} -		new_pte |= pte_set_hidx(ptep, rpte, 0, slot); +		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);  	}  	/* diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index eb8c6c8c4851..2b656e67f2ea 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -100,6 +100,6 @@ void pgtable_cache_init(void)  	 * same size as either the pgd or pmd index except with THP enabled  	 * on book3s 64  	 */ -	if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) -		pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); +	if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX)) +		pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);  } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 314d19ab9385..edd8d0bc9364 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)  		numa_cpu_lookup_table[cpu] = -1;  } -static void update_numa_cpu_lookup_table(unsigned int cpu, int node) -{ -	numa_cpu_lookup_table[cpu] = node; -} -  static void map_cpu_to_node(int cpu, int node)  {  	update_numa_cpu_lookup_table(cpu, node); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 573a9a2ee455..2e10a964e290 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -17,9 +17,11 @@  #include <linux/of_fdt.h>  #include <linux/mm.h>  #include <linux/string_helpers.h> +#include <linux/stop_machine.h>  #include <asm/pgtable.h>  #include <asm/pgalloc.h> +#include <asm/mmu_context.h>  #include <asm/dma.h>  #include <asm/machdep.h>  #include <asm/mmu.h> @@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void)  		     "r" (TLBIEL_INVAL_SET_LPID), "r" (0));  	asm volatile("eieio; tlbsync; ptesync" : : : "memory");  	trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); + +	/* +	 * The init_mm context is given the first available (non-zero) PID, +	 * which is the "guard PID" and contains no page table. PIDR should +	 * never be set to zero because that duplicates the kernel address +	 * space at the 0x0... offset (quadrant 0)! +	 * +	 * An arbitrary PID that may later be allocated by the PID allocator +	 * for userspace processes must not be used either, because that +	 * would cause stale user mappings for that PID on CPUs outside of +	 * the TLB invalidation scheme (because it won't be in mm_cpumask). +	 * +	 * So permanently carve out one PID for the purpose of a guard PID. +	 */ +	init_mm.context.id = mmu_base_pid; +	mmu_base_pid++;  }  static void __init radix_init_partition_table(void) @@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void)  	__pmd_index_size = RADIX_PMD_INDEX_SIZE;  	__pud_index_size = RADIX_PUD_INDEX_SIZE;  	__pgd_index_size = RADIX_PGD_INDEX_SIZE; +	__pud_cache_index = RADIX_PUD_INDEX_SIZE;  	__pmd_cache_index = RADIX_PMD_INDEX_SIZE;  	__pte_table_size = RADIX_PTE_TABLE_SIZE;  	__pmd_table_size = RADIX_PMD_TABLE_SIZE; @@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void)  	radix_init_iamr();  	radix_init_pgtable(); - +	/* Switch to the guard PID before turning on MMU */ +	radix__switch_mmu_context(NULL, &init_mm);  	if (cpu_has_feature(CPU_FTR_HVMODE))  		tlbiel_all();  } @@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void)  	}  	radix_init_iamr(); +	radix__switch_mmu_context(NULL, &init_mm);  	if (cpu_has_feature(CPU_FTR_HVMODE))  		tlbiel_all();  } @@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)  	pud_clear(pud);  } +struct change_mapping_params { +	pte_t *pte; +	unsigned long start; +	unsigned long end; +	unsigned long aligned_start; +	unsigned long aligned_end; +}; + +static int stop_machine_change_mapping(void *data) +{ +	struct change_mapping_params *params = +			(struct change_mapping_params *)data; + +	if (!data) +		return -1; + +	spin_unlock(&init_mm.page_table_lock); +	pte_clear(&init_mm, params->aligned_start, params->pte); +	create_physical_mapping(params->aligned_start, params->start); +	create_physical_mapping(params->end, params->aligned_end); +	spin_lock(&init_mm.page_table_lock); +	return 0; +} +  static void remove_pte_table(pte_t *pte_start, unsigned long addr,  			     unsigned long end)  { @@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,  	}  } +/* + * clear the pte and potentially split the mapping helper + */ +static void split_kernel_mapping(unsigned long addr, unsigned long end, +				unsigned long size, pte_t *pte) +{ +	unsigned long mask = ~(size - 1); +	unsigned long aligned_start = addr & mask; +	unsigned long aligned_end = addr + size; +	struct change_mapping_params params; +	bool split_region = false; + +	if ((end - addr) < size) { +		/* +		 * We're going to clear the PTE, but not flushed +		 * the mapping, time to remap and flush. The +		 * effects if visible outside the processor or +		 * if we are running in code close to the +		 * mapping we cleared, we are in trouble. +		 */ +		if (overlaps_kernel_text(aligned_start, addr) || +			overlaps_kernel_text(end, aligned_end)) { +			/* +			 * Hack, just return, don't pte_clear +			 */ +			WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " +				  "text, not splitting\n", addr, end); +			return; +		} +		split_region = true; +	} + +	if (split_region) { +		params.pte = pte; +		params.start = addr; +		params.end = end; +		params.aligned_start = addr & ~(size - 1); +		params.aligned_end = min_t(unsigned long, aligned_end, +				(unsigned long)__va(memblock_end_of_DRAM())); +		stop_machine(stop_machine_change_mapping, ¶ms, NULL); +		return; +	} + +	pte_clear(&init_mm, addr, pte); +} +  static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,  			     unsigned long end)  { @@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,  			continue;  		if (pmd_huge(*pmd)) { -			if (!IS_ALIGNED(addr, PMD_SIZE) || -			    !IS_ALIGNED(next, PMD_SIZE)) { -				WARN_ONCE(1, "%s: unaligned range\n", __func__); -				continue; -			} - -			pte_clear(&init_mm, addr, (pte_t *)pmd); +			split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);  			continue;  		} @@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,  			continue;  		if (pud_huge(*pud)) { -			if (!IS_ALIGNED(addr, PUD_SIZE) || -			    !IS_ALIGNED(next, PUD_SIZE)) { -				WARN_ONCE(1, "%s: unaligned range\n", __func__); -				continue; -			} - -			pte_clear(&init_mm, addr, (pte_t *)pud); +			split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);  			continue;  		} @@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)  			continue;  		if (pgd_huge(*pgd)) { -			if (!IS_ALIGNED(addr, PGDIR_SIZE) || -			    !IS_ALIGNED(next, PGDIR_SIZE)) { -				WARN_ONCE(1, "%s: unaligned range\n", __func__); -				continue; -			} - -			pte_clear(&init_mm, addr, (pte_t *)pgd); +			split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);  			continue;  		} diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c9a623c2d8a2..28c980eb4422 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -82,6 +82,8 @@ unsigned long __pgd_index_size;  EXPORT_SYMBOL(__pgd_index_size);  unsigned long __pmd_cache_index;  EXPORT_SYMBOL(__pmd_cache_index); +unsigned long __pud_cache_index; +EXPORT_SYMBOL(__pud_cache_index);  unsigned long __pte_table_size;  EXPORT_SYMBOL(__pte_table_size);  unsigned long __pmd_table_size; @@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,  	if (old & PATB_HR) {  		asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :  			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); +		asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : +			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));  		trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);  	} else {  		asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 881ebd53ffc2..9b23f12e863c 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,  	unsigned int psize;  	int ssize;  	real_pte_t rpte; -	int i; +	int i, offset;  	i = batch->index; @@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,  		psize = get_slice_psize(mm, addr);  		/* Mask the address for the correct page size */  		addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); +		if (unlikely(psize == MMU_PAGE_16G)) +			offset = PTRS_PER_PUD; +		else +			offset = PTRS_PER_PMD;  #else  		BUG();  		psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ @@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,  		 * support 64k pages, this might be different from the  		 * hardware page size encoded in the slice table. */  		addr &= PAGE_MASK; +		offset = PTRS_PER_PTE;  	} @@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,  	}  	WARN_ON(vsid == 0);  	vpn = hpt_vpn(addr, vsid, ssize); -	rpte = __real_pte(__pte(pte), ptep); +	rpte = __real_pte(__pte(pte), ptep, offset);  	/*  	 * Check if we have an active batch on this CPU. If not, just diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 872d1f6dd11e..a9636d8cba15 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -327,6 +327,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,  			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);  			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));  			break; +		case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ +			PPC_LWZ_OFFS(r_A, r_skb, K); +			break;  		case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */  			PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));  			break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0a34b0cec7b7..0ef3d9580e98 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -240,6 +240,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32  	 *   goto out;  	 */  	PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); +	PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31);  	PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);  	PPC_BCC(COND_GE, out); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index dd4c9b8b8a81..f6f55ab4980e 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void)  	const struct cpumask *l_cpumask;  	get_online_cpus(); -	for_each_online_node(nid) { +	for_each_node_with_cpus(nid) {  		l_cpumask = cpumask_of_node(nid); -		cpu = cpumask_first(l_cpumask); +		cpu = cpumask_first_and(l_cpumask, cpu_online_mask); +		if (cpu >= nr_cpu_ids) +			continue;  		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,  				       get_hard_smp_processor_id(cpu));  	} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 496e47696ed0..a6c92c78c9b2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1854,7 +1854,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)  	s64 rc;  	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) -		return -ENODEV;; +		return -ENODEV;  	pe = &phb->ioda.pe_array[pdn->pe_number];  	if (pe->tce_bypass_enabled) { diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 4fb21e17504a..092715b9674b 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -80,6 +80,10 @@ static void pnv_setup_rfi_flush(void)  		if (np && of_property_read_bool(np, "disabled"))  			enable--; +		np = of_get_child_by_name(fw_features, "speculation-policy-favor-security"); +		if (np && of_property_read_bool(np, "disabled")) +			enable = 0; +  		of_node_put(np);  		of_node_put(fw_features);  	} diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 2b3eb01ab110..b7c53a51c31b 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,  			rc = PTR_ERR(txwin->paste_kaddr);  			goto free_window;  		} +	} else { +		/* +		 * A user mapping must ensure that context switch issues +		 * CP_ABORT for this thread. +		 */ +		rc = set_thread_uses_vas(); +		if (rc) +			goto free_window;  	} -	/* -	 * Now that we have a send window, ensure context switch issues -	 * CP_ABORT for this thread. -	 */ -	rc = -EINVAL; -	if (set_thread_uses_vas() < 0) -		goto free_window; -  	set_vinst_win(vinst, txwin);  	return txwin; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index dceb51454d8d..652d3e96b812 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -36,6 +36,7 @@  #include <asm/xics.h>  #include <asm/xive.h>  #include <asm/plpar_wrappers.h> +#include <asm/topology.h>  #include "pseries.h"  #include "offline_states.h" @@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)  			BUG_ON(cpu_online(cpu));  			set_cpu_present(cpu, false);  			set_hard_smp_processor_id(cpu, -1); +			update_numa_cpu_lookup_table(cpu, -1);  			break;  		}  		if (cpu >= nr_cpu_ids) @@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np)  	cpu_maps_update_done();  } -extern int find_and_online_cpu_nid(int cpu); -  static int dlpar_online_cpu(struct device_node *dn)  {  	int rc = 0; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 81d8614e7379..5e1ef9150182 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -49,6 +49,28 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id);  /* + * Enable the hotplug interrupt late because processing them may touch other + * devices or systems (e.g. hugepages) that have not been initialized at the + * subsys stage. + */ +int __init init_ras_hotplug_IRQ(void) +{ +	struct device_node *np; + +	/* Hotplug Events */ +	np = of_find_node_by_path("/event-sources/hot-plug-events"); +	if (np != NULL) { +		if (dlpar_workqueue_init() == 0) +			request_event_sources_irqs(np, ras_hotplug_interrupt, +						   "RAS_HOTPLUG"); +		of_node_put(np); +	} + +	return 0; +} +machine_late_initcall(pseries, init_ras_hotplug_IRQ); + +/*   * Initialize handlers for the set of interrupts caused by hardware errors   * and power system events.   */ @@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void)  		of_node_put(np);  	} -	/* Hotplug Events */ -	np = of_find_node_by_path("/event-sources/hot-plug-events"); -	if (np != NULL) { -		if (dlpar_workqueue_init() == 0) -			request_event_sources_irqs(np, ras_hotplug_interrupt, -					   "RAS_HOTPLUG"); -		of_node_put(np); -	} -  	/* EPOW Events */  	np = of_find_node_by_path("/event-sources/epow-events");  	if (np != NULL) { diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 372d7ada1a0c..1a527625acf7 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -482,7 +482,8 @@ static void pseries_setup_rfi_flush(void)  		if (types == L1D_FLUSH_NONE)  			types = L1D_FLUSH_FALLBACK; -		if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) +		if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) || +		    (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))  			enable = false;  	} else {  		/* Default to fallback if case hcall is not available */ diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index d9c4c9366049..091f1d0d0af1 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,  	rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);  	if (rc) { -		pr_err("Error %lld getting queue info prio %d\n", rc, prio); +		pr_err("Error %lld getting queue info CPU %d prio %d\n", rc, +		       target, prio);  		rc = -EIO;  		goto fail;  	} @@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,  	/* Configure and enable the queue in HW */  	rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);  	if (rc) { -		pr_err("Error %lld setting queue for prio %d\n", rc, prio); +		pr_err("Error %lld setting queue for CPU %d prio %d\n", rc, +		       target, prio);  		rc = -EIO;  	} else {  		q->qpage = qpage; @@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,  	if (IS_ERR(qpage))  		return PTR_ERR(qpage); -	return xive_spapr_configure_queue(cpu, q, prio, qpage, -					  xive_queue_shift); +	return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu), +					  q, prio, qpage, xive_queue_shift);  }  static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, @@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,  	struct xive_q *q = &xc->queue[prio];  	unsigned int alloc_order;  	long rc; +	int hw_cpu = get_hard_smp_processor_id(cpu); -	rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0); +	rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);  	if (rc) -		pr_err("Error %ld setting queue for prio %d\n", rc, prio); +		pr_err("Error %ld setting queue for CPU %d prio %d\n", rc, +		       hw_cpu, prio);  	alloc_order = xive_alloc_order(xive_queue_shift);  	free_pages((unsigned long)q->qpage, alloc_order); | 
