7 files changed, 107 insertions, 44 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6cbbabf52707..7e2c2a635737 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -397,9 +397,10 @@ static inline void __flush_tlb_single(unsigned long addr)
 
 static inline void flush_tlb_others(const struct cpumask *cpumask,
 				    struct mm_struct *mm,
-				    unsigned long va)
+				    unsigned long start,
+				    unsigned long end)
 {
-	PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
+	PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end);
 }
 
 static inline int paravirt_pgd_alloc(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8e8b9a4987ee..600a5fcac9cd 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -250,7 +250,8 @@ struct pv_mmu_ops {
 	void (*flush_tlb_single)(unsigned long addr);
 	void (*flush_tlb_others)(const struct cpumask *cpus,
 				 struct mm_struct *mm,
-				 unsigned long va);
+				 unsigned long start,
+				 unsigned long end);
 
 	/* Hooks for allocating and freeing a pagetable top-level */
 	int  (*pgd_alloc)(struct mm_struct *mm);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 36a1a2ab87d2..33608d96d68b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -73,14 +73,10 @@ static inline void __flush_tlb_one(unsigned long addr)
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus
+ *  - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus
  *
  * ..but the i386 has somewhat limited tlb flushing capabilities,
  * and page-granular flushes are available only on i486 and up.
- *
- * x86-64 can only flush individual pages or full VMs. For a range flush
- * we always do the full VM. Might be worth trying if for a small
- * range a few INVLPGs in a row are a win.
  */
 
 #ifndef CONFIG_SMP
@@ -111,7 +107,8 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 
 static inline void native_flush_tlb_others(const struct cpumask *cpumask,
 					   struct mm_struct *mm,
-					   unsigned long va)
+					   unsigned long start,
+					   unsigned long end)
 {
 }
 
@@ -129,17 +126,14 @@ extern void flush_tlb_all(void);
 extern void flush_tlb_current_task(void);
 extern void flush_tlb_mm(struct mm_struct *);
 extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+extern void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end);
 
 #define flush_tlb()	flush_tlb_current_task()
 
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
-{
-	flush_tlb_mm(vma->vm_mm);
-}
-
 void native_flush_tlb_others(const struct cpumask *cpumask,
-			     struct mm_struct *mm, unsigned long va);
+				struct mm_struct *mm,
+				unsigned long start, unsigned long end);
 
 #define TLBSTATE_OK	1
 #define TLBSTATE_LAZY	2
@@ -159,7 +153,8 @@ static inline void reset_lazy_tlbstate(void)
 #endif	/* SMP */
 
 #ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(mask, mm, va)
+#define flush_tlb_others(mask, mm, start, end)	\
+	native_flush_tlb_others(mask, mm, start, end)
 #endif
 
 static inline void flush_tlb_kernel_range(unsigned long start,
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 3bb9491b7659..b47c2a82ff15 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -15,7 +15,8 @@ extern void uv_nmi_init(void);
 extern void uv_system_init(void);
 extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 						 struct mm_struct *mm,
-						 unsigned long va,
+						 unsigned long start,
+						 unsigned end,
 						 unsigned int cpu);
 
 #else	/* X86_UV */
@@ -26,7 +27,7 @@ static inline void uv_cpu_init(void)	{ }
 static inline void uv_system_init(void)	{ }
 static inline const struct cpumask *
 uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
-		    unsigned long va, unsigned int cpu)
+		    unsigned long start, unsigned long end, unsigned int cpu)
 { return cpumask; }
 
 #endif	/* X86_UV */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5e57e113b72c..3b91c981a27f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -41,7 +41,8 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
 union smp_flush_state {
 	struct {
 		struct mm_struct *flush_mm;
-		unsigned long flush_va;
+		unsigned long flush_start;
+		unsigned long flush_end;
 		raw_spinlock_t tlbstate_lock;
 		DECLARE_BITMAP(flush_cpumask, NR_CPUS);
 	};
@@ -156,10 +157,19 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
 
 	if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) {
 		if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
-			if (f->flush_va == TLB_FLUSH_ALL)
+			if (f->flush_end == TLB_FLUSH_ALL
+					|| !cpu_has_invlpg)
 				local_flush_tlb();
-			else
-				__flush_tlb_one(f->flush_va);
+			else if (!f->flush_end)
+				__flush_tlb_single(f->flush_start);
+			else {
+				unsigned long addr;
+				addr = f->flush_start;
+				while (addr < f->flush_end) {
+					__flush_tlb_single(addr);
+					addr += PAGE_SIZE;
+				}
+			}
 		} else
 			leave_mm(cpu);
 	}
@@ -172,7 +182,8 @@ out:
 }
 
 static void flush_tlb_others_ipi(const struct cpumask *cpumask,
-				 struct mm_struct *mm, unsigned long va)
+				 struct mm_struct *mm, unsigned long start,
+				 unsigned long end)
 {
 	unsigned int sender;
 	union smp_flush_state *f;
@@ -185,7 +196,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
 		raw_spin_lock(&f->tlbstate_lock);
 
 	f->flush_mm = mm;
-	f->flush_va = va;
+	f->flush_start = start;
+	f->flush_end = end;
 	if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) {
 		/*
 		 * We have to send the IPI only to
@@ -199,24 +211,26 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
 	}
 
 	f->flush_mm = NULL;
-	f->flush_va = 0;
+	f->flush_start = 0;
+	f->flush_end = 0;
 	if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
 		raw_spin_unlock(&f->tlbstate_lock);
 }
 
 void native_flush_tlb_others(const struct cpumask *cpumask,
-			     struct mm_struct *mm, unsigned long va)
+				 struct mm_struct *mm, unsigned long start,
+				 unsigned long end)
 {
 	if (is_uv_system()) {
 		unsigned int cpu;
 
 		cpu = smp_processor_id();
-		cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
+		cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu);
 		if (cpumask)
-			flush_tlb_others_ipi(cpumask, mm, va);
+			flush_tlb_others_ipi(cpumask, mm, start, end);
 		return;
 	}
-	flush_tlb_others_ipi(cpumask, mm, va);
+	flush_tlb_others_ipi(cpumask, mm, start, end);
 }
 
 static void __cpuinit calculate_tlb_offset(void)
@@ -282,7 +296,7 @@ void flush_tlb_current_task(void)
 
 	local_flush_tlb();
 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
-		flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
+		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
 	preempt_enable();
 }
 
@@ -297,12 +311,63 @@ void flush_tlb_mm(struct mm_struct *mm)
 			leave_mm(smp_processor_id());
 	}
 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
-		flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
+		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
+
+	preempt_enable();
+}
+
+#define FLUSHALL_BAR	16
+
+void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm;
+
+	if (!cpu_has_invlpg || vma->vm_flags & VM_HUGETLB) {
+		flush_tlb_mm(vma->vm_mm);
+		return;
+	}
+
+	preempt_disable();
+	mm = vma->vm_mm;
+	if (current->active_mm == mm) {
+		if (current->mm) {
+			unsigned long addr, vmflag = vma->vm_flags;
+			unsigned act_entries, tlb_entries = 0;
+
+			if (vmflag & VM_EXEC)
+				tlb_entries = tlb_lli_4k[ENTRIES];
+			else
+				tlb_entries = tlb_lld_4k[ENTRIES];
+
+			act_entries = tlb_entries > mm->total_vm ?
+					mm->total_vm : tlb_entries;
 
+			if ((end - start)/PAGE_SIZE > act_entries/FLUSHALL_BAR)
+				local_flush_tlb();
+			else {
+				for (addr = start; addr < end;
+						addr += PAGE_SIZE)
+					__flush_tlb_single(addr);
+
+				if (cpumask_any_but(mm_cpumask(mm),
+					smp_processor_id()) < nr_cpu_ids)
+					flush_tlb_others(mm_cpumask(mm), mm,
+								start, end);
+				preempt_enable();
+				return;
+			}
+		} else {
+			leave_mm(smp_processor_id());
+		}
+	}
+	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
 	preempt_enable();
 }
 
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
 {
 	struct mm_struct *mm = vma->vm_mm;
 
@@ -310,13 +375,13 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 
 	if (current->active_mm == mm) {
 		if (current->mm)
-			__flush_tlb_one(va);
+			__flush_tlb_one(start);
 		else
 			leave_mm(smp_processor_id());
 	}
 
 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
-		flush_tlb_others(mm_cpumask(mm), mm, va);
+		flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);
 
 	preempt_enable();
 }
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 59880afa851f..f1bef8e1d633 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1068,8 +1068,8 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
  * done.  The returned pointer is valid till preemption is re-enabled.
  */
 const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
-				struct mm_struct *mm, unsigned long va,
-				unsigned int cpu)
+				struct mm_struct *mm, unsigned long start,
+				unsigned end, unsigned int cpu)
 {
 	int locals = 0;
 	int remotes = 0;
@@ -1112,7 +1112,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 
 	record_send_statistics(stat, locals, hubs, remotes, bau_desc);
 
-	bau_desc->payload.address = va;
+	bau_desc->payload.address = start;
 	bau_desc->payload.sending_cpu = cpu;
 	/*
 	 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3a73785631ce..39ed56789f68 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1244,7 +1244,8 @@ static void xen_flush_tlb_single(unsigned long addr)
 }
 
 static void xen_flush_tlb_others(const struct cpumask *cpus,
-				 struct mm_struct *mm, unsigned long va)
+				 struct mm_struct *mm, unsigned long start,
+				 unsigned long end)
 {
 	struct {
 		struct mmuext_op op;
@@ -1256,7 +1257,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 	} *args;
 	struct multicall_space mcs;
 
-	trace_xen_mmu_flush_tlb_others(cpus, mm, va);
+	trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
 
 	if (cpumask_empty(cpus))
 		return;		/* nothing to do */
@@ -1269,11 +1270,10 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 	cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
 	cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
 
-	if (va == TLB_FLUSH_ALL) {
-		args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
-	} else {
+	args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
+	if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
 		args->op.cmd = MMUEXT_INVLPG_MULTI;
-		args->op.arg1.linear_addr = va;
+		args->op.arg1.linear_addr = start;
 	}
 
 	MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);