diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 89 |
1 files changed, 44 insertions, 45 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e4c6c3edaf6a..7ea8da990b9d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1445,7 +1445,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) + pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; pmd_t pmd; @@ -1476,7 +1476,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); - if (pmd_present(pmd) && pmd_dirty(pmd)) + if (pmd_present(pmd)) force_flush = true; VM_BUG_ON(!pmd_none(*new_pmd)); @@ -1487,12 +1487,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, pgtable_trans_huge_deposit(mm, new_pmd, pgtable); } set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); - if (new_ptl != old_ptl) - spin_unlock(new_ptl); if (force_flush) flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); - else - *need_flush = true; + if (new_ptl != old_ptl) + spin_unlock(new_ptl); spin_unlock(old_ptl); return true; } @@ -1841,7 +1839,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, } } -static void freeze_page(struct page *page) +static void unmap_page(struct page *page) { enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED; @@ -1864,7 +1862,7 @@ static void freeze_page(struct page *page) VM_BUG_ON_PAGE(ret, page + i - 1); } -static void unfreeze_page(struct page *page) +static void remap_page(struct page *page) { int i; @@ -1878,26 +1876,13 @@ static void __split_huge_page_tail(struct page *head, int tail, struct page *page_tail = head + tail; VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail); - VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail); /* - * tail_page->_refcount is zero and not changing from under us. But - * get_page_unless_zero() may be running from under us on the - * tail_page. If we used atomic_set() below instead of atomic_inc() or - * atomic_add(), we would then run atomic_set() concurrently with - * get_page_unless_zero(), and atomic_set() is implemented in C not - * using locked ops. spin_unlock on x86 sometime uses locked ops - * because of PPro errata 66, 92, so unless somebody can guarantee - * atomic_set() here would be safe on all archs (and not only on x86), - * it's safer to use atomic_inc()/atomic_add(). + * Clone page flags before unfreezing refcount. + * + * After successful get_page_unless_zero() might follow flags change, + * for exmaple lock_page() which set PG_waiters. */ - if (PageAnon(head)) { - page_ref_inc(page_tail); - } else { - /* Additional pin to radix tree */ - page_ref_add(page_tail, 2); - } - page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; page_tail->flags |= (head->flags & ((1L << PG_referenced) | @@ -1909,36 +1894,42 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_unevictable) | (1L << PG_dirty))); - /* - * After clearing PageTail the gup refcount can be released. - * Page flags also must be visible before we make the page non-compound. - */ + /* ->mapping in first tail page is compound_mapcount */ + VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, + page_tail); + page_tail->mapping = head->mapping; + page_tail->index = head->index + tail; + + /* Page flags must be visible before we make the page non-compound. */ smp_wmb(); + /* + * Clear PageTail before unfreezing page refcount. + * + * After successful get_page_unless_zero() might follow put_page() + * which needs correct compound_head(). + */ clear_compound_head(page_tail); + /* Finally unfreeze refcount. Additional reference from page cache. */ + page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) || + PageSwapCache(head))); + if (page_is_young(head)) set_page_young(page_tail); if (page_is_idle(head)) set_page_idle(page_tail); - /* ->mapping in first tail page is compound_mapcount */ - VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, - page_tail); - page_tail->mapping = head->mapping; - - page_tail->index = head->index + tail; page_cpupid_xchg_last(page_tail, page_cpupid_last(head)); lru_add_page_tail(head, page_tail, lruvec, list); } static void __split_huge_page(struct page *page, struct list_head *list, - unsigned long flags) + pgoff_t end, unsigned long flags) { struct page *head = compound_head(page); struct zone *zone = page_zone(head); struct lruvec *lruvec; - pgoff_t end = -1; int i; lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat); @@ -1946,9 +1937,6 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* complete memcg works before add pages to LRU */ mem_cgroup_split_huge_fixup(head); - if (!PageAnon(page)) - end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE); - for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond i_size: drop them from page cache */ @@ -1973,7 +1961,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); - unfreeze_page(head); + remap_page(head); for (i = 0; i < HPAGE_PMD_NR; i++) { struct page *subpage = head + i; @@ -2101,6 +2089,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) int count, mapcount, extra_pins, ret; bool mlocked; unsigned long flags; + pgoff_t end; VM_BUG_ON_PAGE(is_huge_zero_page(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); @@ -2122,6 +2111,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) goto out; } extra_pins = 0; + end = -1; mapping = NULL; anon_vma_lock_write(anon_vma); } else { @@ -2137,10 +2127,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) extra_pins = HPAGE_PMD_NR; anon_vma = NULL; i_mmap_lock_read(mapping); + + /* + *__split_huge_page() may need to trim off pages beyond EOF: + * but on 32-bit, i_size_read() takes an irq-unsafe seqlock, + * which cannot be nested inside the page tree lock. So note + * end now: i_size itself may be changed at any moment, but + * head page lock is good enough to serialize the trimming. + */ + end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); } /* - * Racy check if we can split the page, before freeze_page() will + * Racy check if we can split the page, before unmap_page() will * split PMDs */ if (total_mapcount(head) != page_count(head) - extra_pins - 1) { @@ -2149,7 +2148,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } mlocked = PageMlocked(page); - freeze_page(head); + unmap_page(head); VM_BUG_ON_PAGE(compound_mapcount(head), head); /* Make sure the page is not on per-CPU pagevec as it takes pin */ @@ -2186,7 +2185,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) if (mapping) __dec_node_page_state(page, NR_SHMEM_THPS); spin_unlock(&pgdata->split_queue_lock); - __split_huge_page(page, list, flags); + __split_huge_page(page, list, end, flags); ret = 0; } else { if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { @@ -2201,7 +2200,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) fail: if (mapping) spin_unlock(&mapping->tree_lock); spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); - unfreeze_page(head); + remap_page(head); ret = -EBUSY; } |