diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-08-14 13:29:31 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-08-14 13:29:31 -0700 |
commit | d985524680b77a1cf90738db9a2d72065a746afa (patch) | |
tree | 101857c3e59f9238b0c7d4bbbf58e46476957387 /mm/rmap.c | |
parent | 2007eafd9df71375d443192bbbe7aa2175992477 (diff) | |
parent | ef954844c7ace62f773f4f23e28d2d915adc419f (diff) |
Merge 4.13-rc5 into char-misc-next
We want the firmware, and other changes, in here as well.
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 88 |
1 files changed, 66 insertions, 22 deletions
diff --git a/mm/rmap.c b/mm/rmap.c index ced14f1af6dc..c1286d47aa1f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -605,6 +605,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) tlb_ubc->flush_required = true; /* + * Ensure compiler does not re-order the setting of tlb_flush_batched + * before the PTE is cleared. + */ + barrier(); + mm->tlb_flush_batched = true; + + /* * If the PTE was dirty then it's best to assume it's writable. The * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() * before the page is queued for IO. @@ -631,6 +638,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) return should_defer; } + +/* + * Reclaim unmaps pages under the PTL but do not flush the TLB prior to + * releasing the PTL if TLB flushes are batched. It's possible for a parallel + * operation such as mprotect or munmap to race between reclaim unmapping + * the page and flushing the page. If this race occurs, it potentially allows + * access to data via a stale TLB entry. Tracking all mm's that have TLB + * batching in flight would be expensive during reclaim so instead track + * whether TLB batching occurred in the past and if so then do a flush here + * if required. This will cost one additional flush per reclaim cycle paid + * by the first operation at risk such as mprotect and mumap. + * + * This must be called under the PTL so that an access to tlb_flush_batched + * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise + * via the PTL. + */ +void flush_tlb_batched_pending(struct mm_struct *mm) +{ + if (mm->tlb_flush_batched) { + flush_tlb_mm(mm); + + /* + * Do not allow the compiler to re-order the clearing of + * tlb_flush_batched before the tlb is flushed. + */ + barrier(); + mm->tlb_flush_batched = false; + } +} #else static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) { @@ -852,10 +888,10 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, .flags = PVMW_SYNC, }; int *cleaned = arg; + bool invalidation_needed = false; while (page_vma_mapped_walk(&pvmw)) { int ret = 0; - address = pvmw.address; if (pvmw.pte) { pte_t entry; pte_t *pte = pvmw.pte; @@ -863,11 +899,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, if (!pte_dirty(*pte) && !pte_write(*pte)) continue; - flush_cache_page(vma, address, pte_pfn(*pte)); - entry = ptep_clear_flush(vma, address, pte); + flush_cache_page(vma, pvmw.address, pte_pfn(*pte)); + entry = ptep_clear_flush(vma, pvmw.address, pte); entry = pte_wrprotect(entry); entry = pte_mkclean(entry); - set_pte_at(vma->vm_mm, address, pte, entry); + set_pte_at(vma->vm_mm, pvmw.address, pte, entry); ret = 1; } else { #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE @@ -877,11 +913,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, if (!pmd_dirty(*pmd) && !pmd_write(*pmd)) continue; - flush_cache_page(vma, address, page_to_pfn(page)); - entry = pmdp_huge_clear_flush(vma, address, pmd); + flush_cache_page(vma, pvmw.address, page_to_pfn(page)); + entry = pmdp_huge_clear_flush(vma, pvmw.address, pmd); entry = pmd_wrprotect(entry); entry = pmd_mkclean(entry); - set_pmd_at(vma->vm_mm, address, pmd, entry); + set_pmd_at(vma->vm_mm, pvmw.address, pmd, entry); ret = 1; #else /* unexpected pmd-mapped page? */ @@ -890,11 +926,16 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, } if (ret) { - mmu_notifier_invalidate_page(vma->vm_mm, address); (*cleaned)++; + invalidation_needed = true; } } + if (invalidation_needed) { + mmu_notifier_invalidate_range(vma->vm_mm, address, + address + (1UL << compound_order(page))); + } + return true; } @@ -1287,7 +1328,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, }; pte_t pteval; struct page *subpage; - bool ret = true; + bool ret = true, invalidation_needed = false; enum ttu_flags flags = (enum ttu_flags)arg; /* munlock has nothing to gain from examining un-locked vmas */ @@ -1327,11 +1368,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, VM_BUG_ON_PAGE(!pvmw.pte, page); subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); - address = pvmw.address; - if (!(flags & TTU_IGNORE_ACCESS)) { - if (ptep_clear_flush_young_notify(vma, address, + if (ptep_clear_flush_young_notify(vma, pvmw.address, pvmw.pte)) { ret = false; page_vma_mapped_walk_done(&pvmw); @@ -1340,7 +1379,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } /* Nuke the page table entry. */ - flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); + flush_cache_page(vma, pvmw.address, pte_pfn(*pvmw.pte)); if (should_defer_flush(mm, flags)) { /* * We clear the PTE but do not flush so potentially @@ -1350,11 +1389,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * transition on a cached TLB entry is written through * and traps if the PTE is unmapped. */ - pteval = ptep_get_and_clear(mm, address, pvmw.pte); + pteval = ptep_get_and_clear(mm, pvmw.address, + pvmw.pte); set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); } else { - pteval = ptep_clear_flush(vma, address, pvmw.pte); + pteval = ptep_clear_flush(vma, pvmw.address, pvmw.pte); } /* Move the dirty bit to the page. Now the pte is gone. */ @@ -1369,12 +1409,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (PageHuge(page)) { int nr = 1 << compound_order(page); hugetlb_count_sub(nr, mm); - set_huge_swap_pte_at(mm, address, + set_huge_swap_pte_at(mm, pvmw.address, pvmw.pte, pteval, vma_mmu_pagesize(vma)); } else { dec_mm_counter(mm, mm_counter(page)); - set_pte_at(mm, address, pvmw.pte, pteval); + set_pte_at(mm, pvmw.address, pvmw.pte, pteval); } } else if (pte_unused(pteval)) { @@ -1398,7 +1438,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); - set_pte_at(mm, address, pvmw.pte, swp_pte); + set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); } else if (PageAnon(page)) { swp_entry_t entry = { .val = page_private(subpage) }; pte_t swp_pte; @@ -1424,7 +1464,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * If the page was redirtied, it cannot be * discarded. Remap the page to page table. */ - set_pte_at(mm, address, pvmw.pte, pteval); + set_pte_at(mm, pvmw.address, pvmw.pte, pteval); SetPageSwapBacked(page); ret = false; page_vma_mapped_walk_done(&pvmw); @@ -1432,7 +1472,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } if (swap_duplicate(entry) < 0) { - set_pte_at(mm, address, pvmw.pte, pteval); + set_pte_at(mm, pvmw.address, pvmw.pte, pteval); ret = false; page_vma_mapped_walk_done(&pvmw); break; @@ -1448,14 +1488,18 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); - set_pte_at(mm, address, pvmw.pte, swp_pte); + set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); } else dec_mm_counter(mm, mm_counter_file(page)); discard: page_remove_rmap(subpage, PageHuge(page)); put_page(page); - mmu_notifier_invalidate_page(mm, address); + invalidation_needed = true; } + + if (invalidation_needed) + mmu_notifier_invalidate_range(mm, address, + address + (1UL << compound_order(page))); return ret; } |