diff options
-rw-r--r-- | include/linux/mm_types.h | 4 | ||||
-rw-r--r-- | mm/internal.h | 5 | ||||
-rw-r--r-- | mm/madvise.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 1 | ||||
-rw-r--r-- | mm/mprotect.c | 1 | ||||
-rw-r--r-- | mm/mremap.c | 1 | ||||
-rw-r--r-- | mm/rmap.c | 36 |
7 files changed, 49 insertions, 1 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 08d947fc4c59..e8471c2ca83a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -508,6 +508,10 @@ struct mm_struct { */ bool tlb_flush_pending; #endif +#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + /* See flush_tlb_batched_pending() */ + bool tlb_flush_batched; +#endif struct uprobes_state uprobes_state; #ifdef CONFIG_X86_INTEL_MPX /* address of the bounds directory */ diff --git a/mm/internal.h b/mm/internal.h index 537ac9951f5f..34a5459e5989 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -472,6 +472,7 @@ struct tlbflush_unmap_batch; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH void try_to_unmap_flush(void); void try_to_unmap_flush_dirty(void); +void flush_tlb_batched_pending(struct mm_struct *mm); #else static inline void try_to_unmap_flush(void) { @@ -479,7 +480,9 @@ static inline void try_to_unmap_flush(void) static inline void try_to_unmap_flush_dirty(void) { } - +static inline void flush_tlb_batched_pending(struct mm_struct *mm) +{ +} #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ extern const struct trace_print_flags pageflag_names[]; diff --git a/mm/madvise.c b/mm/madvise.c index 93fb63e88b5e..253b1533fba5 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -21,6 +21,7 @@ #include <linux/swap.h> #include <linux/swapops.h> #include <linux/mmu_notifier.h> +#include "internal.h" #include <asm/tlb.h> @@ -282,6 +283,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, return 0; orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; diff --git a/mm/memory.c b/mm/memory.c index e6a5a1f20492..9bf3da0d0e14 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1124,6 +1124,7 @@ again: init_rss_vec(rss); start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte = start_pte; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; diff --git a/mm/mprotect.c b/mm/mprotect.c index 11936526b08b..ae740c9b1f9b 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -74,6 +74,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (!pte) return 0; + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); do { oldpte = *pte; diff --git a/mm/mremap.c b/mm/mremap.c index 30d7d2482eea..15976716dd40 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -142,6 +142,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_ptl = pte_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, diff --git a/mm/rmap.c b/mm/rmap.c index cd37c1c7e21b..94488b0362f8 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -617,6 +617,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, tlb_ubc->flush_required = true; /* + * Ensure compiler does not re-order the setting of tlb_flush_batched + * before the PTE is cleared. + */ + barrier(); + mm->tlb_flush_batched = true; + + /* * If the PTE was dirty then it's best to assume it's writable. The * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() * before the page is queued for IO. @@ -643,6 +650,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) return should_defer; } + +/* + * Reclaim unmaps pages under the PTL but do not flush the TLB prior to + * releasing the PTL if TLB flushes are batched. It's possible for a parallel + * operation such as mprotect or munmap to race between reclaim unmapping + * the page and flushing the page. If this race occurs, it potentially allows + * access to data via a stale TLB entry. Tracking all mm's that have TLB + * batching in flight would be expensive during reclaim so instead track + * whether TLB batching occurred in the past and if so then do a flush here + * if required. This will cost one additional flush per reclaim cycle paid + * by the first operation at risk such as mprotect and mumap. + * + * This must be called under the PTL so that an access to tlb_flush_batched + * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise + * via the PTL. + */ +void flush_tlb_batched_pending(struct mm_struct *mm) +{ + if (mm->tlb_flush_batched) { + flush_tlb_mm(mm); + + /* + * Do not allow the compiler to re-order the clearing of + * tlb_flush_batched before the tlb is flushed. + */ + barrier(); + mm->tlb_flush_batched = false; + } +} #else static void set_tlb_ubc_flush_pending(struct mm_struct *mm, struct page *page, bool writable) |