From 64f31d5802af11fd87872b4bae07b35cf0acb358 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 25 May 2016 09:45:26 +0200 Subject: s390/mm: simplify the TLB flushing code ptep_flush_lazy and pmdp_flush_lazy use mm->context.attach_count to decide between a lazy TLB flush vs an immediate TLB flush. The field contains two 16-bit counters, the number of CPUs that have the mm attached and can create TLB entries for it and the number of CPUs in the middle of a page table update. The __tlb_flush_asce, ptep_flush_direct and pmdp_flush_direct functions use the attach counter and a mask check with mm_cpumask(mm) to decide between a local flush local of the current CPU and a global flush. For all these functions the decision between lazy vs immediate and local vs global TLB flush can be based on CPU masks. There are two masks: the mm->context.cpu_attach_mask with the CPUs that are actively using the mm, and the mm_cpumask(mm) with the CPUs that have used the mm since the last full flush. The decision between lazy vs immediate flush is based on the mm->context.cpu_attach_mask, to decide between local vs global flush the mm_cpumask(mm) is used. With this patch all checks will use the CPU masks, the old counter mm->context.attach_count with its two 16-bit values is turned into a single counter mm->context.flush_count that keeps track of the number of CPUs with incomplete page table updates. The sole user of this counter is finish_arch_post_lock_switch() which waits for the end of all page table updates. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 2 +- arch/s390/include/asm/mmu_context.h | 15 +++++---------- arch/s390/include/asm/tlbflush.h | 13 +++++-------- arch/s390/kernel/smp.c | 8 ++------ arch/s390/mm/init.c | 4 +--- arch/s390/mm/pgtable.c | 34 ++++++++++++++-------------------- 6 files changed, 28 insertions(+), 48 deletions(-) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 081b2ad99d73..18226437a832 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -6,7 +6,7 @@ typedef struct { cpumask_t cpu_attach_mask; - atomic_t attach_count; + atomic_t flush_count; unsigned int flush_mm; spinlock_t list_lock; struct list_head pgtable_list; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index c837b79b455d..f77c638bf397 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -19,7 +19,7 @@ static inline int init_new_context(struct task_struct *tsk, INIT_LIST_HEAD(&mm->context.pgtable_list); INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); - atomic_set(&mm->context.attach_count, 0); + atomic_set(&mm->context.flush_count, 0); mm->context.flush_mm = 0; #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste; @@ -90,15 +90,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, S390_lowcore.user_asce = next->context.asce; if (prev == next) return; - if (MACHINE_HAS_TLB_LC) - cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); + cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); + cpumask_set_cpu(cpu, mm_cpumask(next)); /* Clear old ASCE by loading the kernel ASCE. */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); - atomic_inc(&next->context.attach_count); - atomic_dec(&prev->context.attach_count); - if (MACHINE_HAS_TLB_LC) - cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); + cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } #define finish_arch_post_lock_switch finish_arch_post_lock_switch @@ -110,10 +107,9 @@ static inline void finish_arch_post_lock_switch(void) load_kernel_asce(); if (mm) { preempt_disable(); - while (atomic_read(&mm->context.attach_count) >> 16) + while (atomic_read(&mm->context.flush_count)) cpu_relax(); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); if (mm->context.flush_mm) __tlb_flush_mm(mm); preempt_enable(); @@ -128,7 +124,6 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { switch_mm(prev, next, current); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); set_user_asce(next); } diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index ac02a6c37a3e..e72cea7a4bfe 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -57,7 +57,7 @@ static inline void __tlb_flush_global(void) static inline void __tlb_flush_full(struct mm_struct *mm) { preempt_disable(); - atomic_add(0x10000, &mm->context.attach_count); + atomic_inc(&mm->context.flush_count); if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { /* Local TLB flush */ __tlb_flush_local(); @@ -69,7 +69,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); } - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); preempt_enable(); } @@ -78,12 +78,9 @@ static inline void __tlb_flush_full(struct mm_struct *mm) */ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) { - int active, count; - preempt_disable(); - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + atomic_inc(&mm->context.flush_count); + if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { __tlb_flush_idte_local(asce); } else { @@ -96,7 +93,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); } - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); preempt_enable(); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 7b89a7572100..830537432493 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -242,10 +242,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { struct lowcore *lc = pcpu->lowcore; - if (MACHINE_HAS_TLB_LC) - cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); - atomic_inc(&init_mm.context.attach_count); lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->percpu_offset = __per_cpu_offset[cpu]; @@ -876,10 +874,8 @@ void __cpu_die(unsigned int cpu) while (!pcpu_stopped(pcpu)) cpu_relax(); pcpu_free_lowcore(pcpu); - atomic_dec(&init_mm.context.attach_count); cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); - if (MACHINE_HAS_TLB_LC) - cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); + cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); } void __noreturn cpu_die(void) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 44db60d9e519..de2cdf4fbb9a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -118,10 +118,8 @@ void mark_rodata_ro(void) void __init mem_init(void) { - if (MACHINE_HAS_TLB_LC) - cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - atomic_set(&init_mm.context.attach_count, 1); set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 67111ccbb5e0..74f8f2a8a4e8 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -27,40 +27,37 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - int active, count; pte_t old; old = *ptep; if (unlikely(pte_val(old) & _PAGE_INVALID)) return old; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + atomic_inc(&mm->context.flush_count); + if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) __ptep_ipte_local(addr, ptep); else __ptep_ipte(addr, ptep); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } static inline pte_t ptep_flush_lazy(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - int active, count; pte_t old; old = *ptep; if (unlikely(pte_val(old) & _PAGE_INVALID)) return old; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { + atomic_inc(&mm->context.flush_count); + if (cpumask_equal(&mm->context.cpu_attach_mask, + cpumask_of(smp_processor_id()))) { pte_val(*ptep) |= _PAGE_INVALID; mm->context.flush_mm = 1; } else __ptep_ipte(addr, ptep); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } @@ -289,7 +286,6 @@ EXPORT_SYMBOL(ptep_modify_prot_commit); static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - int active, count; pmd_t old; old = *pmdp; @@ -299,36 +295,34 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, __pmdp_csp(pmdp); return old; } - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + atomic_inc(&mm->context.flush_count); + if (MACHINE_HAS_TLB_LC && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) __pmdp_idte_local(addr, pmdp); else __pmdp_idte(addr, pmdp); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - int active, count; pmd_t old; old = *pmdp; if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) return old; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { + atomic_inc(&mm->context.flush_count); + if (cpumask_equal(&mm->context.cpu_attach_mask, + cpumask_of(smp_processor_id()))) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; } else if (MACHINE_HAS_IDTE) __pmdp_idte(addr, pmdp); else __pmdp_csp(pmdp); - atomic_sub(0x10000, &mm->context.attach_count); + atomic_dec(&mm->context.flush_count); return old; } -- cgit v1.2.3