From 5b56d49fc31dbb0487e14ead790fc81ca9fb2c99 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 14 Dec 2016 15:06:52 -0800 Subject: mm: add locked parameter to get_user_pages_remote() Patch series "mm: unexport __get_user_pages_unlocked()". This patch series continues the cleanup of get_user_pages*() functions taking advantage of the fact we can now pass gup_flags as we please. It firstly adds an additional 'locked' parameter to get_user_pages_remote() to allow for its callers to utilise VM_FAULT_RETRY functionality. This is necessary as the invocation of __get_user_pages_unlocked() in process_vm_rw_single_vec() makes use of this and no other existing higher level function would allow it to do so. Secondly existing callers of __get_user_pages_unlocked() are replaced with the appropriate higher-level replacement - get_user_pages_unlocked() if the current task and memory descriptor are referenced, or get_user_pages_remote() if other task/memory descriptors are referenced (having acquiring mmap_sem.) This patch (of 2): Add a int *locked parameter to get_user_pages_remote() to allow VM_FAULT_RETRY faulting behaviour similar to get_user_pages_[un]locked(). Taking into account the previous adjustments to get_user_pages*() functions allowing for the passing of gup_flags, we are now in a position where __get_user_pages_unlocked() need only be exported for his ability to allow VM_FAULT_RETRY behaviour, this adjustment allows us to subsequently unexport __get_user_pages_unlocked() as well as allowing for future flexibility in the use of get_user_pages_remote(). [sfr@canb.auug.org.au: merge fix for get_user_pages_remote API change] Link: http://lkml.kernel.org/r/20161122210511.024ec341@canb.auug.org.au Link: http://lkml.kernel.org/r/20161027095141.2569-2-lstoakes@gmail.com Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Cc: Jan Kara Cc: Hugh Dickins Cc: Dave Hansen Cc: Rik van Riel Cc: Mel Gorman Cc: Paolo Bonzini Cc: Radim Krcmar Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index c264f7cd3e47..3a6a1239c42b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3919,7 +3919,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, struct page *page = NULL; ret = get_user_pages_remote(tsk, mm, addr, 1, - gup_flags, &page, &vma); + gup_flags, &page, &vma, NULL); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT break; -- cgit v1.2.3 From 82b0f8c39a3869b6fd2a10e180a862248736ec6f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:06:58 -0800 Subject: mm: join struct fault_env and vm_fault Currently we have two different structures for passing fault information around - struct vm_fault and struct fault_env. DAX will need more information in struct vm_fault to handle its faults so the content of that structure would become event closer to fault_env. Furthermore it would need to generate struct fault_env to be able to call some of the generic functions. So at this point I don't think there's much use in keeping these two structures separate. Just embed into struct vm_fault all that is needed to use it for both purposes. Link: http://lkml.kernel.org/r/1479460644-25076-2-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Acked-by: Kirill A. Shutemov Cc: Ross Zwisler Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 568 ++++++++++++++++++++++++++++++------------------------------ 1 file changed, 286 insertions(+), 282 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 3a6a1239c42b..512e1c359193 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2070,11 +2070,11 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, * case, all we need to do here is to mark the page as writable and update * any related book-keeping. */ -static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte, +static inline int wp_page_reuse(struct vm_fault *vmf, pte_t orig_pte, struct page *page, int page_mkwrite, int dirty_shared) - __releases(fe->ptl) + __releases(vmf->ptl) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; pte_t entry; /* * Clear the pages cpupid information as the existing @@ -2084,12 +2084,12 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte, if (page) page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); - flush_cache_page(vma, fe->address, pte_pfn(orig_pte)); + flush_cache_page(vma, vmf->address, pte_pfn(orig_pte)); entry = pte_mkyoung(orig_pte); entry = maybe_mkwrite(pte_mkdirty(entry), vma); - if (ptep_set_access_flags(vma, fe->address, fe->pte, entry, 1)) - update_mmu_cache(vma, fe->address, fe->pte); - pte_unmap_unlock(fe->pte, fe->ptl); + if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) + update_mmu_cache(vma, vmf->address, vmf->pte); + pte_unmap_unlock(vmf->pte, vmf->ptl); if (dirty_shared) { struct address_space *mapping; @@ -2135,15 +2135,15 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte, * held to the old page, as well as updating the rmap. * - In any case, unlock the PTL and drop the reference we took to the old page. */ -static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, +static int wp_page_copy(struct vm_fault *vmf, pte_t orig_pte, struct page *old_page) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; struct page *new_page = NULL; pte_t entry; int page_copied = 0; - const unsigned long mmun_start = fe->address & PAGE_MASK; + const unsigned long mmun_start = vmf->address & PAGE_MASK; const unsigned long mmun_end = mmun_start + PAGE_SIZE; struct mem_cgroup *memcg; @@ -2151,15 +2151,16 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, goto oom; if (is_zero_pfn(pte_pfn(orig_pte))) { - new_page = alloc_zeroed_user_highpage_movable(vma, fe->address); + new_page = alloc_zeroed_user_highpage_movable(vma, + vmf->address); if (!new_page) goto oom; } else { new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, - fe->address); + vmf->address); if (!new_page) goto oom; - cow_user_page(new_page, old_page, fe->address, vma); + cow_user_page(new_page, old_page, vmf->address, vma); } if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) @@ -2172,8 +2173,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, /* * Re-check the pte - we dropped the lock */ - fe->pte = pte_offset_map_lock(mm, fe->pmd, fe->address, &fe->ptl); - if (likely(pte_same(*fe->pte, orig_pte))) { + vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); + if (likely(pte_same(*vmf->pte, orig_pte))) { if (old_page) { if (!PageAnon(old_page)) { dec_mm_counter_fast(mm, @@ -2183,7 +2184,7 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, } else { inc_mm_counter_fast(mm, MM_ANONPAGES); } - flush_cache_page(vma, fe->address, pte_pfn(orig_pte)); + flush_cache_page(vma, vmf->address, pte_pfn(orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* @@ -2192,8 +2193,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, * seen in the presence of one thread doing SMC and another * thread doing COW. */ - ptep_clear_flush_notify(vma, fe->address, fe->pte); - page_add_new_anon_rmap(new_page, vma, fe->address, false); + ptep_clear_flush_notify(vma, vmf->address, vmf->pte); + page_add_new_anon_rmap(new_page, vma, vmf->address, false); mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); /* @@ -2201,8 +2202,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, * mmu page tables (such as kvm shadow page tables), we want the * new page to be mapped directly into the secondary page table. */ - set_pte_at_notify(mm, fe->address, fe->pte, entry); - update_mmu_cache(vma, fe->address, fe->pte); + set_pte_at_notify(mm, vmf->address, vmf->pte, entry); + update_mmu_cache(vma, vmf->address, vmf->pte); if (old_page) { /* * Only after switching the pte to the new page may @@ -2239,7 +2240,7 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte, if (new_page) put_page(new_page); - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); if (old_page) { /* @@ -2267,43 +2268,43 @@ oom: * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED * mapping */ -static int wp_pfn_shared(struct fault_env *fe, pte_t orig_pte) +static int wp_pfn_shared(struct vm_fault *vmf, pte_t orig_pte) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { - struct vm_fault vmf = { + struct vm_fault vmf2 = { .page = NULL, - .pgoff = linear_page_index(vma, fe->address), + .pgoff = linear_page_index(vma, vmf->address), .virtual_address = - (void __user *)(fe->address & PAGE_MASK), + (void __user *)(vmf->address & PAGE_MASK), .flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE, }; int ret; - pte_unmap_unlock(fe->pte, fe->ptl); - ret = vma->vm_ops->pfn_mkwrite(vma, &vmf); + pte_unmap_unlock(vmf->pte, vmf->ptl); + ret = vma->vm_ops->pfn_mkwrite(vma, &vmf2); if (ret & VM_FAULT_ERROR) return ret; - fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, - &fe->ptl); + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); /* * We might have raced with another page fault while we * released the pte_offset_map_lock. */ - if (!pte_same(*fe->pte, orig_pte)) { - pte_unmap_unlock(fe->pte, fe->ptl); + if (!pte_same(*vmf->pte, orig_pte)) { + pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } } - return wp_page_reuse(fe, orig_pte, NULL, 0, 0); + return wp_page_reuse(vmf, orig_pte, NULL, 0, 0); } -static int wp_page_shared(struct fault_env *fe, pte_t orig_pte, +static int wp_page_shared(struct vm_fault *vmf, pte_t orig_pte, struct page *old_page) - __releases(fe->ptl) + __releases(vmf->ptl) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; int page_mkwrite = 0; get_page(old_page); @@ -2311,8 +2312,8 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte, if (vma->vm_ops && vma->vm_ops->page_mkwrite) { int tmp; - pte_unmap_unlock(fe->pte, fe->ptl); - tmp = do_page_mkwrite(vma, old_page, fe->address); + pte_unmap_unlock(vmf->pte, vmf->ptl); + tmp = do_page_mkwrite(vma, old_page, vmf->address); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(old_page); @@ -2324,18 +2325,18 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte, * they did, we just return, as we can count on the * MMU to tell us if they didn't also make it writable. */ - fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, - &fe->ptl); - if (!pte_same(*fe->pte, orig_pte)) { + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (!pte_same(*vmf->pte, orig_pte)) { unlock_page(old_page); - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); put_page(old_page); return 0; } page_mkwrite = 1; } - return wp_page_reuse(fe, orig_pte, old_page, page_mkwrite, 1); + return wp_page_reuse(vmf, orig_pte, old_page, page_mkwrite, 1); } /* @@ -2356,13 +2357,13 @@ static int wp_page_shared(struct fault_env *fe, pte_t orig_pte, * but allow concurrent faults), with pte both mapped and locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_wp_page(struct fault_env *fe, pte_t orig_pte) - __releases(fe->ptl) +static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte) + __releases(vmf->ptl) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; struct page *old_page; - old_page = vm_normal_page(vma, fe->address, orig_pte); + old_page = vm_normal_page(vma, vmf->address, orig_pte); if (!old_page) { /* * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a @@ -2373,10 +2374,10 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte) */ if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)) - return wp_pfn_shared(fe, orig_pte); + return wp_pfn_shared(vmf, orig_pte); - pte_unmap_unlock(fe->pte, fe->ptl); - return wp_page_copy(fe, orig_pte, old_page); + pte_unmap_unlock(vmf->pte, vmf->ptl); + return wp_page_copy(vmf, orig_pte, old_page); } /* @@ -2387,13 +2388,13 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte) int total_mapcount; if (!trylock_page(old_page)) { get_page(old_page); - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); lock_page(old_page); - fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, - fe->address, &fe->ptl); - if (!pte_same(*fe->pte, orig_pte)) { + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (!pte_same(*vmf->pte, orig_pte)) { unlock_page(old_page); - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); put_page(old_page); return 0; } @@ -2411,12 +2412,12 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte) page_move_anon_rmap(old_page, vma); } unlock_page(old_page); - return wp_page_reuse(fe, orig_pte, old_page, 0, 0); + return wp_page_reuse(vmf, orig_pte, old_page, 0, 0); } unlock_page(old_page); } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { - return wp_page_shared(fe, orig_pte, old_page); + return wp_page_shared(vmf, orig_pte, old_page); } /* @@ -2424,8 +2425,8 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte) */ get_page(old_page); - pte_unmap_unlock(fe->pte, fe->ptl); - return wp_page_copy(fe, orig_pte, old_page); + pte_unmap_unlock(vmf->pte, vmf->ptl); + return wp_page_copy(vmf, orig_pte, old_page); } static void unmap_mapping_range_vma(struct vm_area_struct *vma, @@ -2513,9 +2514,9 @@ EXPORT_SYMBOL(unmap_mapping_range); * We return with the mmap_sem locked or unlocked in the same cases * as does filemap_fault(). */ -int do_swap_page(struct fault_env *fe, pte_t orig_pte) +int do_swap_page(struct vm_fault *vmf, pte_t orig_pte) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; struct page *page, *swapcache; struct mem_cgroup *memcg; swp_entry_t entry; @@ -2524,17 +2525,18 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) int exclusive = 0; int ret = 0; - if (!pte_unmap_same(vma->vm_mm, fe->pmd, fe->pte, orig_pte)) + if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, orig_pte)) goto out; entry = pte_to_swp_entry(orig_pte); if (unlikely(non_swap_entry(entry))) { if (is_migration_entry(entry)) { - migration_entry_wait(vma->vm_mm, fe->pmd, fe->address); + migration_entry_wait(vma->vm_mm, vmf->pmd, + vmf->address); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; } else { - print_bad_pte(vma, fe->address, orig_pte, NULL); + print_bad_pte(vma, vmf->address, orig_pte, NULL); ret = VM_FAULT_SIGBUS; } goto out; @@ -2542,16 +2544,16 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) delayacct_set_flag(DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry); if (!page) { - page = swapin_readahead(entry, - GFP_HIGHUSER_MOVABLE, vma, fe->address); + page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vma, + vmf->address); if (!page) { /* * Back out if somebody else faulted in this pte * while we released the pte lock. */ - fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, - fe->address, &fe->ptl); - if (likely(pte_same(*fe->pte, orig_pte))) + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (likely(pte_same(*vmf->pte, orig_pte))) ret = VM_FAULT_OOM; delayacct_clear_flag(DELAYACCT_PF_SWAPIN); goto unlock; @@ -2573,7 +2575,7 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) } swapcache = page; - locked = lock_page_or_retry(page, vma->vm_mm, fe->flags); + locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); delayacct_clear_flag(DELAYACCT_PF_SWAPIN); if (!locked) { @@ -2590,7 +2592,7 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val)) goto out_page; - page = ksm_might_need_to_copy(page, vma, fe->address); + page = ksm_might_need_to_copy(page, vma, vmf->address); if (unlikely(!page)) { ret = VM_FAULT_OOM; page = swapcache; @@ -2606,9 +2608,9 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) /* * Back out if somebody else already faulted in this pte. */ - fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, - &fe->ptl); - if (unlikely(!pte_same(*fe->pte, orig_pte))) + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, + &vmf->ptl); + if (unlikely(!pte_same(*vmf->pte, orig_pte))) goto out_nomap; if (unlikely(!PageUptodate(page))) { @@ -2629,22 +2631,22 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); - if ((fe->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { + if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); - fe->flags &= ~FAULT_FLAG_WRITE; + vmf->flags &= ~FAULT_FLAG_WRITE; ret |= VM_FAULT_WRITE; exclusive = RMAP_EXCLUSIVE; } flush_icache_page(vma, page); if (pte_swp_soft_dirty(orig_pte)) pte = pte_mksoft_dirty(pte); - set_pte_at(vma->vm_mm, fe->address, fe->pte, pte); + set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); if (page == swapcache) { - do_page_add_anon_rmap(page, vma, fe->address, exclusive); + do_page_add_anon_rmap(page, vma, vmf->address, exclusive); mem_cgroup_commit_charge(page, memcg, true, false); activate_page(page); } else { /* ksm created a completely new copy */ - page_add_new_anon_rmap(page, vma, fe->address, false); + page_add_new_anon_rmap(page, vma, vmf->address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); } @@ -2667,22 +2669,22 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte) put_page(swapcache); } - if (fe->flags & FAULT_FLAG_WRITE) { - ret |= do_wp_page(fe, pte); + if (vmf->flags & FAULT_FLAG_WRITE) { + ret |= do_wp_page(vmf, pte); if (ret & VM_FAULT_ERROR) ret &= VM_FAULT_ERROR; goto out; } /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, fe->address, fe->pte); + update_mmu_cache(vma, vmf->address, vmf->pte); unlock: - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); out: return ret; out_nomap: mem_cgroup_cancel_charge(page, memcg, false); - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); out_page: unlock_page(page); out_release: @@ -2733,9 +2735,9 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_anonymous_page(struct fault_env *fe) +static int do_anonymous_page(struct vm_fault *vmf) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; struct mem_cgroup *memcg; struct page *page; pte_t entry; @@ -2745,7 +2747,7 @@ static int do_anonymous_page(struct fault_env *fe) return VM_FAULT_SIGBUS; /* Check if we need to add a guard page to the stack */ - if (check_stack_guard_page(vma, fe->address) < 0) + if (check_stack_guard_page(vma, vmf->address) < 0) return VM_FAULT_SIGSEGV; /* @@ -2758,26 +2760,26 @@ static int do_anonymous_page(struct fault_env *fe) * * Here we only have down_read(mmap_sem). */ - if (pte_alloc(vma->vm_mm, fe->pmd, fe->address)) + if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address)) return VM_FAULT_OOM; /* See the comment in pte_alloc_one_map() */ - if (unlikely(pmd_trans_unstable(fe->pmd))) + if (unlikely(pmd_trans_unstable(vmf->pmd))) return 0; /* Use the zero-page for reads */ - if (!(fe->flags & FAULT_FLAG_WRITE) && + if (!(vmf->flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(vma->vm_mm)) { - entry = pte_mkspecial(pfn_pte(my_zero_pfn(fe->address), + entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), vma->vm_page_prot)); - fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, - &fe->ptl); - if (!pte_none(*fe->pte)) + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (!pte_none(*vmf->pte)) goto unlock; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { - pte_unmap_unlock(fe->pte, fe->ptl); - return handle_userfault(fe, VM_UFFD_MISSING); + pte_unmap_unlock(vmf->pte, vmf->ptl); + return handle_userfault(vmf, VM_UFFD_MISSING); } goto setpte; } @@ -2785,7 +2787,7 @@ static int do_anonymous_page(struct fault_env *fe) /* Allocate our own private page. */ if (unlikely(anon_vma_prepare(vma))) goto oom; - page = alloc_zeroed_user_highpage_movable(vma, fe->address); + page = alloc_zeroed_user_highpage_movable(vma, vmf->address); if (!page) goto oom; @@ -2803,30 +2805,30 @@ static int do_anonymous_page(struct fault_env *fe) if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); - fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, - &fe->ptl); - if (!pte_none(*fe->pte)) + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, + &vmf->ptl); + if (!pte_none(*vmf->pte)) goto release; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); mem_cgroup_cancel_charge(page, memcg, false); put_page(page); - return handle_userfault(fe, VM_UFFD_MISSING); + return handle_userfault(vmf, VM_UFFD_MISSING); } inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); - page_add_new_anon_rmap(page, vma, fe->address, false); + page_add_new_anon_rmap(page, vma, vmf->address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); setpte: - set_pte_at(vma->vm_mm, fe->address, fe->pte, entry); + set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, fe->address, fe->pte); + update_mmu_cache(vma, vmf->address, vmf->pte); unlock: - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; release: mem_cgroup_cancel_charge(page, memcg, false); @@ -2843,62 +2845,62 @@ oom: * released depending on flags and vma->vm_ops->fault() return value. * See filemap_fault() and __lock_page_retry(). */ -static int __do_fault(struct fault_env *fe, pgoff_t pgoff, +static int __do_fault(struct vm_fault *vmf, pgoff_t pgoff, struct page *cow_page, struct page **page, void **entry) { - struct vm_area_struct *vma = fe->vma; - struct vm_fault vmf; + struct vm_area_struct *vma = vmf->vma; + struct vm_fault vmf2; int ret; - vmf.virtual_address = (void __user *)(fe->address & PAGE_MASK); - vmf.pgoff = pgoff; - vmf.flags = fe->flags; - vmf.page = NULL; - vmf.gfp_mask = __get_fault_gfp_mask(vma); - vmf.cow_page = cow_page; + vmf2.virtual_address = (void __user *)(vmf->address & PAGE_MASK); + vmf2.pgoff = pgoff; + vmf2.flags = vmf->flags; + vmf2.page = NULL; + vmf2.gfp_mask = __get_fault_gfp_mask(vma); + vmf2.cow_page = cow_page; - ret = vma->vm_ops->fault(vma, &vmf); + ret = vma->vm_ops->fault(vma, &vmf2); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; if (ret & VM_FAULT_DAX_LOCKED) { - *entry = vmf.entry; + *entry = vmf2.entry; return ret; } - if (unlikely(PageHWPoison(vmf.page))) { + if (unlikely(PageHWPoison(vmf2.page))) { if (ret & VM_FAULT_LOCKED) - unlock_page(vmf.page); - put_page(vmf.page); + unlock_page(vmf2.page); + put_page(vmf2.page); return VM_FAULT_HWPOISON; } if (unlikely(!(ret & VM_FAULT_LOCKED))) - lock_page(vmf.page); + lock_page(vmf2.page); else - VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page); + VM_BUG_ON_PAGE(!PageLocked(vmf2.page), vmf2.page); - *page = vmf.page; + *page = vmf2.page; return ret; } -static int pte_alloc_one_map(struct fault_env *fe) +static int pte_alloc_one_map(struct vm_fault *vmf) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; - if (!pmd_none(*fe->pmd)) + if (!pmd_none(*vmf->pmd)) goto map_pte; - if (fe->prealloc_pte) { - fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); - if (unlikely(!pmd_none(*fe->pmd))) { - spin_unlock(fe->ptl); + if (vmf->prealloc_pte) { + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_none(*vmf->pmd))) { + spin_unlock(vmf->ptl); goto map_pte; } atomic_long_inc(&vma->vm_mm->nr_ptes); - pmd_populate(vma->vm_mm, fe->pmd, fe->prealloc_pte); - spin_unlock(fe->ptl); - fe->prealloc_pte = 0; - } else if (unlikely(pte_alloc(vma->vm_mm, fe->pmd, fe->address))) { + pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); + spin_unlock(vmf->ptl); + vmf->prealloc_pte = 0; + } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) { return VM_FAULT_OOM; } map_pte: @@ -2913,11 +2915,11 @@ map_pte: * through an atomic read in C, which is what pmd_trans_unstable() * provides. */ - if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd)) + if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) return VM_FAULT_NOPAGE; - fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, - &fe->ptl); + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, + &vmf->ptl); return 0; } @@ -2935,24 +2937,24 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, return true; } -static void deposit_prealloc_pte(struct fault_env *fe) +static void deposit_prealloc_pte(struct vm_fault *vmf) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; - pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte); + pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); /* * We are going to consume the prealloc table, * count that as nr_ptes. */ atomic_long_inc(&vma->vm_mm->nr_ptes); - fe->prealloc_pte = 0; + vmf->prealloc_pte = 0; } -static int do_set_pmd(struct fault_env *fe, struct page *page) +static int do_set_pmd(struct vm_fault *vmf, struct page *page) { - struct vm_area_struct *vma = fe->vma; - bool write = fe->flags & FAULT_FLAG_WRITE; - unsigned long haddr = fe->address & HPAGE_PMD_MASK; + struct vm_area_struct *vma = vmf->vma; + bool write = vmf->flags & FAULT_FLAG_WRITE; + unsigned long haddr = vmf->address & HPAGE_PMD_MASK; pmd_t entry; int i, ret; @@ -2966,15 +2968,15 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) * Archs like ppc64 need additonal space to store information * related to pte entry. Use the preallocated table for that. */ - if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) { - fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address); - if (!fe->prealloc_pte) + if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) { + vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address); + if (!vmf->prealloc_pte) return VM_FAULT_OOM; smp_wmb(); /* See comment in __pte_alloc() */ } - fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); - if (unlikely(!pmd_none(*fe->pmd))) + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_none(*vmf->pmd))) goto out; for (i = 0; i < HPAGE_PMD_NR; i++) @@ -2990,11 +2992,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) * deposit and withdraw with pmd lock held */ if (arch_needs_pgtable_deposit()) - deposit_prealloc_pte(fe); + deposit_prealloc_pte(vmf); - set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); + set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); - update_mmu_cache_pmd(vma, haddr, fe->pmd); + update_mmu_cache_pmd(vma, haddr, vmf->pmd); /* fault is handled */ ret = 0; @@ -3005,13 +3007,13 @@ out: * withdraw with pmd lock held. */ if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK) - fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, - fe->pmd); - spin_unlock(fe->ptl); + vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, + vmf->pmd); + spin_unlock(vmf->ptl); return ret; } #else -static int do_set_pmd(struct fault_env *fe, struct page *page) +static int do_set_pmd(struct vm_fault *vmf, struct page *page) { BUILD_BUG(); return 0; @@ -3022,41 +3024,42 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) * alloc_set_pte - setup new PTE entry for given page and add reverse page * mapping. If needed, the fucntion allocates page table or use pre-allocated. * - * @fe: fault environment + * @vmf: fault environment * @memcg: memcg to charge page (only for private mappings) * @page: page to map * - * Caller must take care of unlocking fe->ptl, if fe->pte is non-NULL on return. + * Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on + * return. * * Target users are page handler itself and implementations of * vm_ops->map_pages. */ -int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, +int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, struct page *page) { - struct vm_area_struct *vma = fe->vma; - bool write = fe->flags & FAULT_FLAG_WRITE; + struct vm_area_struct *vma = vmf->vma; + bool write = vmf->flags & FAULT_FLAG_WRITE; pte_t entry; int ret; - if (pmd_none(*fe->pmd) && PageTransCompound(page) && + if (pmd_none(*vmf->pmd) && PageTransCompound(page) && IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { /* THP on COW? */ VM_BUG_ON_PAGE(memcg, page); - ret = do_set_pmd(fe, page); + ret = do_set_pmd(vmf, page); if (ret != VM_FAULT_FALLBACK) goto fault_handled; } - if (!fe->pte) { - ret = pte_alloc_one_map(fe); + if (!vmf->pte) { + ret = pte_alloc_one_map(vmf); if (ret) goto fault_handled; } /* Re-check under ptl */ - if (unlikely(!pte_none(*fe->pte))) { + if (unlikely(!pte_none(*vmf->pte))) { ret = VM_FAULT_NOPAGE; goto fault_handled; } @@ -3068,24 +3071,24 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, /* copy-on-write page */ if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); - page_add_new_anon_rmap(page, vma, fe->address, false); + page_add_new_anon_rmap(page, vma, vmf->address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); } else { inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page, false); } - set_pte_at(vma->vm_mm, fe->address, fe->pte, entry); + set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); /* no need to invalidate: a not-present page won't be cached */ - update_mmu_cache(vma, fe->address, fe->pte); + update_mmu_cache(vma, vmf->address, vmf->pte); ret = 0; fault_handled: /* preallocated pagetable is unused: free it */ - if (fe->prealloc_pte) { - pte_free(fe->vma->vm_mm, fe->prealloc_pte); - fe->prealloc_pte = 0; + if (vmf->prealloc_pte) { + pte_free(vmf->vma->vm_mm, vmf->prealloc_pte); + vmf->prealloc_pte = 0; } return ret; } @@ -3154,17 +3157,17 @@ late_initcall(fault_around_debugfs); * fault_around_pages() value (and therefore to page order). This way it's * easier to guarantee that we don't cross page table boundaries. */ -static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff) +static int do_fault_around(struct vm_fault *vmf, pgoff_t start_pgoff) { - unsigned long address = fe->address, nr_pages, mask; + unsigned long address = vmf->address, nr_pages, mask; pgoff_t end_pgoff; int off, ret = 0; nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; - fe->address = max(address & mask, fe->vma->vm_start); - off = ((address - fe->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + vmf->address = max(address & mask, vmf->vma->vm_start); + off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); start_pgoff -= off; /* @@ -3172,44 +3175,45 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff) * or fault_around_pages() from start_pgoff, depending what is nearest. */ end_pgoff = start_pgoff - - ((fe->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + + ((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + PTRS_PER_PTE - 1; - end_pgoff = min3(end_pgoff, vma_pages(fe->vma) + fe->vma->vm_pgoff - 1, + end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, start_pgoff + nr_pages - 1); - if (pmd_none(*fe->pmd)) { - fe->prealloc_pte = pte_alloc_one(fe->vma->vm_mm, fe->address); - if (!fe->prealloc_pte) + if (pmd_none(*vmf->pmd)) { + vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm, + vmf->address); + if (!vmf->prealloc_pte) goto out; smp_wmb(); /* See comment in __pte_alloc() */ } - fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff); + vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); /* Huge page is mapped? Page fault is solved */ - if (pmd_trans_huge(*fe->pmd)) { + if (pmd_trans_huge(*vmf->pmd)) { ret = VM_FAULT_NOPAGE; goto out; } /* ->map_pages() haven't done anything useful. Cold page cache? */ - if (!fe->pte) + if (!vmf->pte) goto out; /* check if the page fault is solved */ - fe->pte -= (fe->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT); - if (!pte_none(*fe->pte)) + vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT); + if (!pte_none(*vmf->pte)) ret = VM_FAULT_NOPAGE; - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); out: - fe->address = address; - fe->pte = NULL; + vmf->address = address; + vmf->pte = NULL; return ret; } -static int do_read_fault(struct fault_env *fe, pgoff_t pgoff) +static int do_read_fault(struct vm_fault *vmf, pgoff_t pgoff) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; struct page *fault_page; int ret = 0; @@ -3219,27 +3223,27 @@ static int do_read_fault(struct fault_env *fe, pgoff_t pgoff) * something). */ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { - ret = do_fault_around(fe, pgoff); + ret = do_fault_around(vmf, pgoff); if (ret) return ret; } - ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL); + ret = __do_fault(vmf, pgoff, NULL, &fault_page, NULL); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; - ret |= alloc_set_pte(fe, NULL, fault_page); - if (fe->pte) - pte_unmap_unlock(fe->pte, fe->ptl); + ret |= alloc_set_pte(vmf, NULL, fault_page); + if (vmf->pte) + pte_unmap_unlock(vmf->pte, vmf->ptl); unlock_page(fault_page); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) put_page(fault_page); return ret; } -static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff) +static int do_cow_fault(struct vm_fault *vmf, pgoff_t pgoff) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; struct page *fault_page, *new_page; void *fault_entry; struct mem_cgroup *memcg; @@ -3248,7 +3252,7 @@ static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff) if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; - new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, fe->address); + new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); if (!new_page) return VM_FAULT_OOM; @@ -3258,17 +3262,17 @@ static int do_cow_fault(struct fault_env *fe, pgoff_t pgoff) return VM_FAULT_OOM; } - ret = __do_fault(fe, pgoff, new_page, &fault_page, &fault_entry); + ret = __do_fault(vmf, pgoff, new_page, &fault_page, &fault_entry); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; if (!(ret & VM_FAULT_DAX_LOCKED)) - copy_user_highpage(new_page, fault_page, fe->address, vma); + copy_user_highpage(new_page, fault_page, vmf->address, vma); __SetPageUptodate(new_page); - ret |= alloc_set_pte(fe, memcg, new_page); - if (fe->pte) - pte_unmap_unlock(fe->pte, fe->ptl); + ret |= alloc_set_pte(vmf, memcg, new_page); + if (vmf->pte) + pte_unmap_unlock(vmf->pte, vmf->ptl); if (!(ret & VM_FAULT_DAX_LOCKED)) { unlock_page(fault_page); put_page(fault_page); @@ -3284,15 +3288,15 @@ uncharge_out: return ret; } -static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff) +static int do_shared_fault(struct vm_fault *vmf, pgoff_t pgoff) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; struct page *fault_page; struct address_space *mapping; int dirtied = 0; int ret, tmp; - ret = __do_fault(fe, pgoff, NULL, &fault_page, NULL); + ret = __do_fault(vmf, pgoff, NULL, &fault_page, NULL); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; @@ -3302,7 +3306,7 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff) */ if (vma->vm_ops->page_mkwrite) { unlock_page(fault_page); - tmp = do_page_mkwrite(vma, fault_page, fe->address); + tmp = do_page_mkwrite(vma, fault_page, vmf->address); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(fault_page); @@ -3310,9 +3314,9 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff) } } - ret |= alloc_set_pte(fe, NULL, fault_page); - if (fe->pte) - pte_unmap_unlock(fe->pte, fe->ptl); + ret |= alloc_set_pte(vmf, NULL, fault_page); + if (vmf->pte) + pte_unmap_unlock(vmf->pte, vmf->ptl); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) { unlock_page(fault_page); @@ -3350,19 +3354,19 @@ static int do_shared_fault(struct fault_env *fe, pgoff_t pgoff) * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ -static int do_fault(struct fault_env *fe) +static int do_fault(struct vm_fault *vmf) { - struct vm_area_struct *vma = fe->vma; - pgoff_t pgoff = linear_page_index(vma, fe->address); + struct vm_area_struct *vma = vmf->vma; + pgoff_t pgoff = linear_page_index(vma, vmf->address); /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ if (!vma->vm_ops->fault) return VM_FAULT_SIGBUS; - if (!(fe->flags & FAULT_FLAG_WRITE)) - return do_read_fault(fe, pgoff); + if (!(vmf->flags & FAULT_FLAG_WRITE)) + return do_read_fault(vmf, pgoff); if (!(vma->vm_flags & VM_SHARED)) - return do_cow_fault(fe, pgoff); - return do_shared_fault(fe, pgoff); + return do_cow_fault(vmf, pgoff); + return do_shared_fault(vmf, pgoff); } static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, @@ -3380,9 +3384,9 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, return mpol_misplaced(page, vma, addr); } -static int do_numa_page(struct fault_env *fe, pte_t pte) +static int do_numa_page(struct vm_fault *vmf, pte_t pte) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; struct page *page = NULL; int page_nid = -1; int last_cpupid; @@ -3400,10 +3404,10 @@ static int do_numa_page(struct fault_env *fe, pte_t pte) * page table entry is not accessible, so there would be no * concurrent hardware modifications to the PTE. */ - fe->ptl = pte_lockptr(vma->vm_mm, fe->pmd); - spin_lock(fe->ptl); - if (unlikely(!pte_same(*fe->pte, pte))) { - pte_unmap_unlock(fe->pte, fe->ptl); + vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd); + spin_lock(vmf->ptl); + if (unlikely(!pte_same(*vmf->pte, pte))) { + pte_unmap_unlock(vmf->pte, vmf->ptl); goto out; } @@ -3412,18 +3416,18 @@ static int do_numa_page(struct fault_env *fe, pte_t pte) pte = pte_mkyoung(pte); if (was_writable) pte = pte_mkwrite(pte); - set_pte_at(vma->vm_mm, fe->address, fe->pte, pte); - update_mmu_cache(vma, fe->address, fe->pte); + set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); + update_mmu_cache(vma, vmf->address, vmf->pte); - page = vm_normal_page(vma, fe->address, pte); + page = vm_normal_page(vma, vmf->address, pte); if (!page) { - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } /* TODO: handle PTE-mapped THP */ if (PageCompound(page)) { - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } @@ -3447,9 +3451,9 @@ static int do_numa_page(struct fault_env *fe, pte_t pte) last_cpupid = page_cpupid_last(page); page_nid = page_to_nid(page); - target_nid = numa_migrate_prep(page, vma, fe->address, page_nid, + target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, &flags); - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); if (target_nid == -1) { put_page(page); goto out; @@ -3469,28 +3473,28 @@ out: return 0; } -static int create_huge_pmd(struct fault_env *fe) +static int create_huge_pmd(struct vm_fault *vmf) { - struct vm_area_struct *vma = fe->vma; + struct vm_area_struct *vma = vmf->vma; if (vma_is_anonymous(vma)) - return do_huge_pmd_anonymous_page(fe); + return do_huge_pmd_anonymous_page(vmf); if (vma->vm_ops->pmd_fault) - return vma->vm_ops->pmd_fault(vma, fe->address, fe->pmd, - fe->flags); + return vma->vm_ops->pmd_fault(vma, vmf->address, vmf->pmd, + vmf->flags); return VM_FAULT_FALLBACK; } -static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd) +static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) { - if (vma_is_anonymous(fe->vma)) - return do_huge_pmd_wp_page(fe, orig_pmd); - if (fe->vma->vm_ops->pmd_fault) - return fe->vma->vm_ops->pmd_fault(fe->vma, fe->address, fe->pmd, - fe->flags); + if (vma_is_anonymous(vmf->vma)) + return do_huge_pmd_wp_page(vmf, orig_pmd); + if (vmf->vma->vm_ops->pmd_fault) + return vmf->vma->vm_ops->pmd_fault(vmf->vma, vmf->address, + vmf->pmd, vmf->flags); /* COW handled on pte level: split pmd */ - VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma); - __split_huge_pmd(fe->vma, fe->pmd, fe->address, false, NULL); + VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma); + __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL); return VM_FAULT_FALLBACK; } @@ -3515,21 +3519,21 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) * The mmap_sem may have been released depending on flags and our return value. * See filemap_fault() and __lock_page_or_retry(). */ -static int handle_pte_fault(struct fault_env *fe) +static int handle_pte_fault(struct vm_fault *vmf) { pte_t entry; - if (unlikely(pmd_none(*fe->pmd))) { + if (unlikely(pmd_none(*vmf->pmd))) { /* * Leave __pte_alloc() until later: because vm_ops->fault may * want to allocate huge page, and if we expose page table * for an instant, it will be difficult to retract from * concurrent faults and from rmap lookups. */ - fe->pte = NULL; + vmf->pte = NULL; } else { /* See comment in pte_alloc_one_map() */ - if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd)) + if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) return 0; /* * A regular pmd is established and it can't morph into a huge @@ -3537,9 +3541,9 @@ static int handle_pte_fault(struct fault_env *fe) * mmap_sem read mode and khugepaged takes it in write mode. * So now it's safe to run pte_offset_map(). */ - fe->pte = pte_offset_map(fe->pmd, fe->address); + vmf->pte = pte_offset_map(vmf->pmd, vmf->address); - entry = *fe->pte; + entry = *vmf->pte; /* * some architectures can have larger ptes than wordsize, @@ -3551,37 +3555,37 @@ static int handle_pte_fault(struct fault_env *fe) */ barrier(); if (pte_none(entry)) { - pte_unmap(fe->pte); - fe->pte = NULL; + pte_unmap(vmf->pte); + vmf->pte = NULL; } } - if (!fe->pte) { - if (vma_is_anonymous(fe->vma)) - return do_anonymous_page(fe); + if (!vmf->pte) { + if (vma_is_anonymous(vmf->vma)) + return do_anonymous_page(vmf); else - return do_fault(fe); + return do_fault(vmf); } if (!pte_present(entry)) - return do_swap_page(fe, entry); + return do_swap_page(vmf, entry); - if (pte_protnone(entry) && vma_is_accessible(fe->vma)) - return do_numa_page(fe, entry); + if (pte_protnone(entry) && vma_is_accessible(vmf->vma)) + return do_numa_page(vmf, entry); - fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd); - spin_lock(fe->ptl); - if (unlikely(!pte_same(*fe->pte, entry))) + vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); + spin_lock(vmf->ptl); + if (unlikely(!pte_same(*vmf->pte, entry))) goto unlock; - if (fe->flags & FAULT_FLAG_WRITE) { + if (vmf->flags & FAULT_FLAG_WRITE) { if (!pte_write(entry)) - return do_wp_page(fe, entry); + return do_wp_page(vmf, entry); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); - if (ptep_set_access_flags(fe->vma, fe->address, fe->pte, entry, - fe->flags & FAULT_FLAG_WRITE)) { - update_mmu_cache(fe->vma, fe->address, fe->pte); + if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, + vmf->flags & FAULT_FLAG_WRITE)) { + update_mmu_cache(vmf->vma, vmf->address, vmf->pte); } else { /* * This is needed only for protection faults but the arch code @@ -3589,11 +3593,11 @@ static int handle_pte_fault(struct fault_env *fe) * This still avoids useless tlb flushes for .text page faults * with threads. */ - if (fe->flags & FAULT_FLAG_WRITE) - flush_tlb_fix_spurious_fault(fe->vma, fe->address); + if (vmf->flags & FAULT_FLAG_WRITE) + flush_tlb_fix_spurious_fault(vmf->vma, vmf->address); } unlock: - pte_unmap_unlock(fe->pte, fe->ptl); + pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } @@ -3606,7 +3610,7 @@ unlock: static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) { - struct fault_env fe = { + struct vm_fault vmf = { .vma = vma, .address = address, .flags = flags, @@ -3619,35 +3623,35 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, pud = pud_alloc(mm, pgd, address); if (!pud) return VM_FAULT_OOM; - fe.pmd = pmd_alloc(mm, pud, address); - if (!fe.pmd) + vmf.pmd = pmd_alloc(mm, pud, address); + if (!vmf.pmd) return VM_FAULT_OOM; - if (pmd_none(*fe.pmd) && transparent_hugepage_enabled(vma)) { - int ret = create_huge_pmd(&fe); + if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) { + int ret = create_huge_pmd(&vmf); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { - pmd_t orig_pmd = *fe.pmd; + pmd_t orig_pmd = *vmf.pmd; int ret; barrier(); if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) - return do_huge_pmd_numa_page(&fe, orig_pmd); + return do_huge_pmd_numa_page(&vmf, orig_pmd); - if ((fe.flags & FAULT_FLAG_WRITE) && + if ((vmf.flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) { - ret = wp_huge_pmd(&fe, orig_pmd); + ret = wp_huge_pmd(&vmf, orig_pmd); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { - huge_pmd_set_accessed(&fe, orig_pmd); + huge_pmd_set_accessed(&vmf, orig_pmd); return 0; } } } - return handle_pte_fault(&fe); + return handle_pte_fault(&vmf); } /* -- cgit v1.2.3 From 1a29d85eb0f19b7d8271923d8917d7b4f5540b3e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:01 -0800 Subject: mm: use vmf->address instead of of vmf->virtual_address Every single user of vmf->virtual_address typed that entry to unsigned long before doing anything with it so the type of virtual_address does not really provide us any additional safety. Just use masked vmf->address which already has the appropriate type. Link: http://lkml.kernel.org/r/1479460644-25076-3-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Acked-by: Kirill A. Shutemov Cc: Dan Williams Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 512e1c359193..379836261d4a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2040,7 +2040,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, struct vm_fault vmf; int ret; - vmf.virtual_address = (void __user *)(address & PAGE_MASK); + vmf.address = address & PAGE_MASK; vmf.pgoff = page->index; vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; vmf.gfp_mask = __get_fault_gfp_mask(vma); @@ -2276,8 +2276,7 @@ static int wp_pfn_shared(struct vm_fault *vmf, pte_t orig_pte) struct vm_fault vmf2 = { .page = NULL, .pgoff = linear_page_index(vma, vmf->address), - .virtual_address = - (void __user *)(vmf->address & PAGE_MASK), + .address = vmf->address, .flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE, }; int ret; @@ -2852,7 +2851,7 @@ static int __do_fault(struct vm_fault *vmf, pgoff_t pgoff, struct vm_fault vmf2; int ret; - vmf2.virtual_address = (void __user *)(vmf->address & PAGE_MASK); + vmf2.address = vmf->address; vmf2.pgoff = pgoff; vmf2.flags = vmf->flags; vmf2.page = NULL; @@ -3612,7 +3611,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, { struct vm_fault vmf = { .vma = vma, - .address = address, + .address = address & PAGE_MASK, .flags = flags, }; struct mm_struct *mm = vma->vm_mm; -- cgit v1.2.3 From 0721ec8bc156fafc9057ec1df95cdb3bbc3cbae8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:04 -0800 Subject: mm: use pgoff in struct vm_fault instead of passing it separately struct vm_fault has already pgoff entry. Use it instead of passing pgoff as a separate argument and then assigning it later. Link: http://lkml.kernel.org/r/1479460644-25076-4-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 379836261d4a..c514b4a07a7a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2275,7 +2275,7 @@ static int wp_pfn_shared(struct vm_fault *vmf, pte_t orig_pte) if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { struct vm_fault vmf2 = { .page = NULL, - .pgoff = linear_page_index(vma, vmf->address), + .pgoff = vmf->pgoff, .address = vmf->address, .flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE, }; @@ -2844,15 +2844,15 @@ oom: * released depending on flags and vma->vm_ops->fault() return value. * See filemap_fault() and __lock_page_retry(). */ -static int __do_fault(struct vm_fault *vmf, pgoff_t pgoff, - struct page *cow_page, struct page **page, void **entry) +static int __do_fault(struct vm_fault *vmf, struct page *cow_page, + struct page **page, void **entry) { struct vm_area_struct *vma = vmf->vma; struct vm_fault vmf2; int ret; vmf2.address = vmf->address; - vmf2.pgoff = pgoff; + vmf2.pgoff = vmf->pgoff; vmf2.flags = vmf->flags; vmf2.page = NULL; vmf2.gfp_mask = __get_fault_gfp_mask(vma); @@ -3156,9 +3156,10 @@ late_initcall(fault_around_debugfs); * fault_around_pages() value (and therefore to page order). This way it's * easier to guarantee that we don't cross page table boundaries. */ -static int do_fault_around(struct vm_fault *vmf, pgoff_t start_pgoff) +static int do_fault_around(struct vm_fault *vmf) { unsigned long address = vmf->address, nr_pages, mask; + pgoff_t start_pgoff = vmf->pgoff; pgoff_t end_pgoff; int off, ret = 0; @@ -3210,7 +3211,7 @@ out: return ret; } -static int do_read_fault(struct vm_fault *vmf, pgoff_t pgoff) +static int do_read_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *fault_page; @@ -3222,12 +3223,12 @@ static int do_read_fault(struct vm_fault *vmf, pgoff_t pgoff) * something). */ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { - ret = do_fault_around(vmf, pgoff); + ret = do_fault_around(vmf); if (ret) return ret; } - ret = __do_fault(vmf, pgoff, NULL, &fault_page, NULL); + ret = __do_fault(vmf, NULL, &fault_page, NULL); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; @@ -3240,7 +3241,7 @@ static int do_read_fault(struct vm_fault *vmf, pgoff_t pgoff) return ret; } -static int do_cow_fault(struct vm_fault *vmf, pgoff_t pgoff) +static int do_cow_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *fault_page, *new_page; @@ -3261,7 +3262,7 @@ static int do_cow_fault(struct vm_fault *vmf, pgoff_t pgoff) return VM_FAULT_OOM; } - ret = __do_fault(vmf, pgoff, new_page, &fault_page, &fault_entry); + ret = __do_fault(vmf, new_page, &fault_page, &fault_entry); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; @@ -3276,7 +3277,7 @@ static int do_cow_fault(struct vm_fault *vmf, pgoff_t pgoff) unlock_page(fault_page); put_page(fault_page); } else { - dax_unlock_mapping_entry(vma->vm_file->f_mapping, pgoff); + dax_unlock_mapping_entry(vma->vm_file->f_mapping, vmf->pgoff); } if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; @@ -3287,7 +3288,7 @@ uncharge_out: return ret; } -static int do_shared_fault(struct vm_fault *vmf, pgoff_t pgoff) +static int do_shared_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *fault_page; @@ -3295,7 +3296,7 @@ static int do_shared_fault(struct vm_fault *vmf, pgoff_t pgoff) int dirtied = 0; int ret, tmp; - ret = __do_fault(vmf, pgoff, NULL, &fault_page, NULL); + ret = __do_fault(vmf, NULL, &fault_page, NULL); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; @@ -3356,16 +3357,15 @@ static int do_shared_fault(struct vm_fault *vmf, pgoff_t pgoff) static int do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - pgoff_t pgoff = linear_page_index(vma, vmf->address); /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ if (!vma->vm_ops->fault) return VM_FAULT_SIGBUS; if (!(vmf->flags & FAULT_FLAG_WRITE)) - return do_read_fault(vmf, pgoff); + return do_read_fault(vmf); if (!(vma->vm_flags & VM_SHARED)) - return do_cow_fault(vmf, pgoff); - return do_shared_fault(vmf, pgoff); + return do_cow_fault(vmf); + return do_shared_fault(vmf); } static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, @@ -3613,6 +3613,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, .vma = vma, .address = address & PAGE_MASK, .flags = flags, + .pgoff = linear_page_index(vma, address), }; struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; -- cgit v1.2.3 From 667240e0f2e13e792a5af99b3c34dfab12ef125b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:07 -0800 Subject: mm: use passed vm_fault structure in __do_fault() Instead of creating another vm_fault structure, use the one passed to __do_fault() for passing arguments into fault handler. Link: http://lkml.kernel.org/r/1479460644-25076-5-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index c514b4a07a7a..cbc6d47fda73 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2848,37 +2848,31 @@ static int __do_fault(struct vm_fault *vmf, struct page *cow_page, struct page **page, void **entry) { struct vm_area_struct *vma = vmf->vma; - struct vm_fault vmf2; int ret; - vmf2.address = vmf->address; - vmf2.pgoff = vmf->pgoff; - vmf2.flags = vmf->flags; - vmf2.page = NULL; - vmf2.gfp_mask = __get_fault_gfp_mask(vma); - vmf2.cow_page = cow_page; + vmf->cow_page = cow_page; - ret = vma->vm_ops->fault(vma, &vmf2); + ret = vma->vm_ops->fault(vma, vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; if (ret & VM_FAULT_DAX_LOCKED) { - *entry = vmf2.entry; + *entry = vmf->entry; return ret; } - if (unlikely(PageHWPoison(vmf2.page))) { + if (unlikely(PageHWPoison(vmf->page))) { if (ret & VM_FAULT_LOCKED) - unlock_page(vmf2.page); - put_page(vmf2.page); + unlock_page(vmf->page); + put_page(vmf->page); return VM_FAULT_HWPOISON; } if (unlikely(!(ret & VM_FAULT_LOCKED))) - lock_page(vmf2.page); + lock_page(vmf->page); else - VM_BUG_ON_PAGE(!PageLocked(vmf2.page), vmf2.page); + VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); - *page = vmf2.page; + *page = vmf->page; return ret; } @@ -3614,6 +3608,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, .address = address & PAGE_MASK, .flags = flags, .pgoff = linear_page_index(vma, address), + .gfp_mask = __get_fault_gfp_mask(vma), }; struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; -- cgit v1.2.3 From 936ca80d3773bd9b6dda8a0dfd54425f9ec1be9d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:10 -0800 Subject: mm: trim __do_fault() arguments Use vm_fault structure to pass cow_page, page, and entry in and out of the function. That reduces number of __do_fault() arguments from 4 to 1. Link: http://lkml.kernel.org/r/1479460644-25076-6-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 67 ++++++++++++++++++++++++++----------------------------------- 1 file changed, 29 insertions(+), 38 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index cbc6d47fda73..78b81e8984df 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2844,26 +2844,22 @@ oom: * released depending on flags and vma->vm_ops->fault() return value. * See filemap_fault() and __lock_page_retry(). */ -static int __do_fault(struct vm_fault *vmf, struct page *cow_page, - struct page **page, void **entry) +static int __do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; int ret; - vmf->cow_page = cow_page; - ret = vma->vm_ops->fault(vma, vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; - if (ret & VM_FAULT_DAX_LOCKED) { - *entry = vmf->entry; + if (ret & VM_FAULT_DAX_LOCKED) return ret; - } if (unlikely(PageHWPoison(vmf->page))) { if (ret & VM_FAULT_LOCKED) unlock_page(vmf->page); put_page(vmf->page); + vmf->page = NULL; return VM_FAULT_HWPOISON; } @@ -2872,7 +2868,6 @@ static int __do_fault(struct vm_fault *vmf, struct page *cow_page, else VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); - *page = vmf->page; return ret; } @@ -3208,7 +3203,6 @@ out: static int do_read_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - struct page *fault_page; int ret = 0; /* @@ -3222,54 +3216,52 @@ static int do_read_fault(struct vm_fault *vmf) return ret; } - ret = __do_fault(vmf, NULL, &fault_page, NULL); + ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; - ret |= alloc_set_pte(vmf, NULL, fault_page); + ret |= alloc_set_pte(vmf, NULL, vmf->page); if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); - unlock_page(fault_page); + unlock_page(vmf->page); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) - put_page(fault_page); + put_page(vmf->page); return ret; } static int do_cow_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - struct page *fault_page, *new_page; - void *fault_entry; struct mem_cgroup *memcg; int ret; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; - new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); - if (!new_page) + vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); + if (!vmf->cow_page) return VM_FAULT_OOM; - if (mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, + if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL, &memcg, false)) { - put_page(new_page); + put_page(vmf->cow_page); return VM_FAULT_OOM; } - ret = __do_fault(vmf, new_page, &fault_page, &fault_entry); + ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; if (!(ret & VM_FAULT_DAX_LOCKED)) - copy_user_highpage(new_page, fault_page, vmf->address, vma); - __SetPageUptodate(new_page); + copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); + __SetPageUptodate(vmf->cow_page); - ret |= alloc_set_pte(vmf, memcg, new_page); + ret |= alloc_set_pte(vmf, memcg, vmf->cow_page); if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); if (!(ret & VM_FAULT_DAX_LOCKED)) { - unlock_page(fault_page); - put_page(fault_page); + unlock_page(vmf->page); + put_page(vmf->page); } else { dax_unlock_mapping_entry(vma->vm_file->f_mapping, vmf->pgoff); } @@ -3277,20 +3269,19 @@ static int do_cow_fault(struct vm_fault *vmf) goto uncharge_out; return ret; uncharge_out: - mem_cgroup_cancel_charge(new_page, memcg, false); - put_page(new_page); + mem_cgroup_cancel_charge(vmf->cow_page, memcg, false); + put_page(vmf->cow_page); return ret; } static int do_shared_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - struct page *fault_page; struct address_space *mapping; int dirtied = 0; int ret, tmp; - ret = __do_fault(vmf, NULL, &fault_page, NULL); + ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; @@ -3299,26 +3290,26 @@ static int do_shared_fault(struct vm_fault *vmf) * about to become writable */ if (vma->vm_ops->page_mkwrite) { - unlock_page(fault_page); - tmp = do_page_mkwrite(vma, fault_page, vmf->address); + unlock_page(vmf->page); + tmp = do_page_mkwrite(vma, vmf->page, vmf->address); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { - put_page(fault_page); + put_page(vmf->page); return tmp; } } - ret |= alloc_set_pte(vmf, NULL, fault_page); + ret |= alloc_set_pte(vmf, NULL, vmf->page); if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) { - unlock_page(fault_page); - put_page(fault_page); + unlock_page(vmf->page); + put_page(vmf->page); return ret; } - if (set_page_dirty(fault_page)) + if (set_page_dirty(vmf->page)) dirtied = 1; /* * Take a local copy of the address_space - page.mapping may be zeroed @@ -3326,8 +3317,8 @@ static int do_shared_fault(struct vm_fault *vmf) * pinned by vma->vm_file's reference. We rely on unlock_page()'s * release semantics to prevent the compiler from undoing this copying. */ - mapping = page_rmapping(fault_page); - unlock_page(fault_page); + mapping = page_rmapping(vmf->page); + unlock_page(vmf->page); if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) { /* * Some device drivers do not set page.mapping but still -- cgit v1.2.3 From fe82221f57ea6840a4238a8e077e3f93f257a03f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:13 -0800 Subject: mm: use passed vm_fault structure for in wp_pfn_shared() Instead of creating another vm_fault structure, use the one passed to wp_pfn_shared() for passing arguments into pfn_mkwrite handler. Link: http://lkml.kernel.org/r/1479460644-25076-7-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 78b81e8984df..7ba9cc58dddd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2273,16 +2273,11 @@ static int wp_pfn_shared(struct vm_fault *vmf, pte_t orig_pte) struct vm_area_struct *vma = vmf->vma; if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { - struct vm_fault vmf2 = { - .page = NULL, - .pgoff = vmf->pgoff, - .address = vmf->address, - .flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE, - }; int ret; pte_unmap_unlock(vmf->pte, vmf->ptl); - ret = vma->vm_ops->pfn_mkwrite(vma, &vmf2); + vmf->flags |= FAULT_FLAG_MKWRITE; + ret = vma->vm_ops->pfn_mkwrite(vma, vmf); if (ret & VM_FAULT_ERROR) return ret; vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, -- cgit v1.2.3 From 2994302bc8a17180788fac66a47102d338d5d0ec Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:16 -0800 Subject: mm: add orig_pte field into vm_fault Add orig_pte field to vm_fault structure to allow ->page_mkwrite handlers to fully handle the fault. This also allows us to save some passing of extra arguments around. Link: http://lkml.kernel.org/r/1479460644-25076-8-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 82 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 7ba9cc58dddd..cf74f7ca911b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2070,8 +2070,8 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, * case, all we need to do here is to mark the page as writable and update * any related book-keeping. */ -static inline int wp_page_reuse(struct vm_fault *vmf, pte_t orig_pte, - struct page *page, int page_mkwrite, int dirty_shared) +static inline int wp_page_reuse(struct vm_fault *vmf, struct page *page, + int page_mkwrite, int dirty_shared) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; @@ -2084,8 +2084,8 @@ static inline int wp_page_reuse(struct vm_fault *vmf, pte_t orig_pte, if (page) page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); - flush_cache_page(vma, vmf->address, pte_pfn(orig_pte)); - entry = pte_mkyoung(orig_pte); + flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); + entry = pte_mkyoung(vmf->orig_pte); entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) update_mmu_cache(vma, vmf->address, vmf->pte); @@ -2135,8 +2135,7 @@ static inline int wp_page_reuse(struct vm_fault *vmf, pte_t orig_pte, * held to the old page, as well as updating the rmap. * - In any case, unlock the PTL and drop the reference we took to the old page. */ -static int wp_page_copy(struct vm_fault *vmf, pte_t orig_pte, - struct page *old_page) +static int wp_page_copy(struct vm_fault *vmf, struct page *old_page) { struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; @@ -2150,7 +2149,7 @@ static int wp_page_copy(struct vm_fault *vmf, pte_t orig_pte, if (unlikely(anon_vma_prepare(vma))) goto oom; - if (is_zero_pfn(pte_pfn(orig_pte))) { + if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { new_page = alloc_zeroed_user_highpage_movable(vma, vmf->address); if (!new_page) @@ -2174,7 +2173,7 @@ static int wp_page_copy(struct vm_fault *vmf, pte_t orig_pte, * Re-check the pte - we dropped the lock */ vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); - if (likely(pte_same(*vmf->pte, orig_pte))) { + if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { if (old_page) { if (!PageAnon(old_page)) { dec_mm_counter_fast(mm, @@ -2184,7 +2183,7 @@ static int wp_page_copy(struct vm_fault *vmf, pte_t orig_pte, } else { inc_mm_counter_fast(mm, MM_ANONPAGES); } - flush_cache_page(vma, vmf->address, pte_pfn(orig_pte)); + flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* @@ -2268,7 +2267,7 @@ oom: * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED * mapping */ -static int wp_pfn_shared(struct vm_fault *vmf, pte_t orig_pte) +static int wp_pfn_shared(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -2286,16 +2285,15 @@ static int wp_pfn_shared(struct vm_fault *vmf, pte_t orig_pte) * We might have raced with another page fault while we * released the pte_offset_map_lock. */ - if (!pte_same(*vmf->pte, orig_pte)) { + if (!pte_same(*vmf->pte, vmf->orig_pte)) { pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } } - return wp_page_reuse(vmf, orig_pte, NULL, 0, 0); + return wp_page_reuse(vmf, NULL, 0, 0); } -static int wp_page_shared(struct vm_fault *vmf, pte_t orig_pte, - struct page *old_page) +static int wp_page_shared(struct vm_fault *vmf, struct page *old_page) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; @@ -2321,7 +2319,7 @@ static int wp_page_shared(struct vm_fault *vmf, pte_t orig_pte, */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); - if (!pte_same(*vmf->pte, orig_pte)) { + if (!pte_same(*vmf->pte, vmf->orig_pte)) { unlock_page(old_page); pte_unmap_unlock(vmf->pte, vmf->ptl); put_page(old_page); @@ -2330,7 +2328,7 @@ static int wp_page_shared(struct vm_fault *vmf, pte_t orig_pte, page_mkwrite = 1; } - return wp_page_reuse(vmf, orig_pte, old_page, page_mkwrite, 1); + return wp_page_reuse(vmf, old_page, page_mkwrite, 1); } /* @@ -2351,13 +2349,13 @@ static int wp_page_shared(struct vm_fault *vmf, pte_t orig_pte, * but allow concurrent faults), with pte both mapped and locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte) +static int do_wp_page(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; struct page *old_page; - old_page = vm_normal_page(vma, vmf->address, orig_pte); + old_page = vm_normal_page(vma, vmf->address, vmf->orig_pte); if (!old_page) { /* * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a @@ -2368,10 +2366,10 @@ static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte) */ if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)) - return wp_pfn_shared(vmf, orig_pte); + return wp_pfn_shared(vmf); pte_unmap_unlock(vmf->pte, vmf->ptl); - return wp_page_copy(vmf, orig_pte, old_page); + return wp_page_copy(vmf, old_page); } /* @@ -2386,7 +2384,7 @@ static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte) lock_page(old_page); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); - if (!pte_same(*vmf->pte, orig_pte)) { + if (!pte_same(*vmf->pte, vmf->orig_pte)) { unlock_page(old_page); pte_unmap_unlock(vmf->pte, vmf->ptl); put_page(old_page); @@ -2406,12 +2404,12 @@ static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte) page_move_anon_rmap(old_page, vma); } unlock_page(old_page); - return wp_page_reuse(vmf, orig_pte, old_page, 0, 0); + return wp_page_reuse(vmf, old_page, 0, 0); } unlock_page(old_page); } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { - return wp_page_shared(vmf, orig_pte, old_page); + return wp_page_shared(vmf, old_page); } /* @@ -2420,7 +2418,7 @@ static int do_wp_page(struct vm_fault *vmf, pte_t orig_pte) get_page(old_page); pte_unmap_unlock(vmf->pte, vmf->ptl); - return wp_page_copy(vmf, orig_pte, old_page); + return wp_page_copy(vmf, old_page); } static void unmap_mapping_range_vma(struct vm_area_struct *vma, @@ -2508,7 +2506,7 @@ EXPORT_SYMBOL(unmap_mapping_range); * We return with the mmap_sem locked or unlocked in the same cases * as does filemap_fault(). */ -int do_swap_page(struct vm_fault *vmf, pte_t orig_pte) +int do_swap_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page, *swapcache; @@ -2519,10 +2517,10 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte) int exclusive = 0; int ret = 0; - if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, orig_pte)) + if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) goto out; - entry = pte_to_swp_entry(orig_pte); + entry = pte_to_swp_entry(vmf->orig_pte); if (unlikely(non_swap_entry(entry))) { if (is_migration_entry(entry)) { migration_entry_wait(vma->vm_mm, vmf->pmd, @@ -2530,7 +2528,7 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte) } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; } else { - print_bad_pte(vma, vmf->address, orig_pte, NULL); + print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); ret = VM_FAULT_SIGBUS; } goto out; @@ -2547,7 +2545,7 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte) */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); - if (likely(pte_same(*vmf->pte, orig_pte))) + if (likely(pte_same(*vmf->pte, vmf->orig_pte))) ret = VM_FAULT_OOM; delayacct_clear_flag(DELAYACCT_PF_SWAPIN); goto unlock; @@ -2604,7 +2602,7 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte) */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); - if (unlikely(!pte_same(*vmf->pte, orig_pte))) + if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) goto out_nomap; if (unlikely(!PageUptodate(page))) { @@ -2632,9 +2630,10 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte) exclusive = RMAP_EXCLUSIVE; } flush_icache_page(vma, page); - if (pte_swp_soft_dirty(orig_pte)) + if (pte_swp_soft_dirty(vmf->orig_pte)) pte = pte_mksoft_dirty(pte); set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); + vmf->orig_pte = pte; if (page == swapcache) { do_page_add_anon_rmap(page, vma, vmf->address, exclusive); mem_cgroup_commit_charge(page, memcg, true, false); @@ -2664,7 +2663,7 @@ int do_swap_page(struct vm_fault *vmf, pte_t orig_pte) } if (vmf->flags & FAULT_FLAG_WRITE) { - ret |= do_wp_page(vmf, pte); + ret |= do_wp_page(vmf); if (ret & VM_FAULT_ERROR) ret &= VM_FAULT_ERROR; goto out; @@ -3363,7 +3362,7 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, return mpol_misplaced(page, vma, addr); } -static int do_numa_page(struct vm_fault *vmf, pte_t pte) +static int do_numa_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL; @@ -3371,6 +3370,7 @@ static int do_numa_page(struct vm_fault *vmf, pte_t pte) int last_cpupid; int target_nid; bool migrated = false; + pte_t pte = vmf->orig_pte; bool was_writable = pte_write(pte); int flags = 0; @@ -3521,8 +3521,7 @@ static int handle_pte_fault(struct vm_fault *vmf) * So now it's safe to run pte_offset_map(). */ vmf->pte = pte_offset_map(vmf->pmd, vmf->address); - - entry = *vmf->pte; + vmf->orig_pte = *vmf->pte; /* * some architectures can have larger ptes than wordsize, @@ -3533,7 +3532,7 @@ static int handle_pte_fault(struct vm_fault *vmf) * ptl lock held. So here a barrier will do. */ barrier(); - if (pte_none(entry)) { + if (pte_none(vmf->orig_pte)) { pte_unmap(vmf->pte); vmf->pte = NULL; } @@ -3546,19 +3545,20 @@ static int handle_pte_fault(struct vm_fault *vmf) return do_fault(vmf); } - if (!pte_present(entry)) - return do_swap_page(vmf, entry); + if (!pte_present(vmf->orig_pte)) + return do_swap_page(vmf); - if (pte_protnone(entry) && vma_is_accessible(vmf->vma)) - return do_numa_page(vmf, entry); + if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) + return do_numa_page(vmf); vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); spin_lock(vmf->ptl); + entry = vmf->orig_pte; if (unlikely(!pte_same(*vmf->pte, entry))) goto unlock; if (vmf->flags & FAULT_FLAG_WRITE) { if (!pte_write(entry)) - return do_wp_page(vmf, entry); + return do_wp_page(vmf); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); -- cgit v1.2.3 From 3917048d4572b9cabf6f8f5ad395eb693717367c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:18 -0800 Subject: mm: allow full handling of COW faults in ->fault handlers Patch series "dax: Clear dirty bits after flushing caches", v5. Patchset to clear dirty bits from radix tree of DAX inodes when caches for corresponding pfns have been flushed. In principle, these patches enable handlers to easily update PTEs and do other work necessary to finish the fault without duplicating the functionality present in the generic code. I'd like to thank Kirill and Ross for reviews of the series! This patch (of 20): To allow full handling of COW faults add memcg field to struct vm_fault and a return value of ->fault() handler meaning that COW fault is fully handled and memcg charge must not be canceled. This will allow us to remove knowledge about special DAX locking from the generic fault code. Link: http://lkml.kernel.org/r/1479460644-25076-9-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index cf74f7ca911b..02504cd4ca0e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2844,9 +2844,8 @@ static int __do_fault(struct vm_fault *vmf) int ret; ret = vma->vm_ops->fault(vma, vmf); - if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) - return ret; - if (ret & VM_FAULT_DAX_LOCKED) + if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | + VM_FAULT_DAX_LOCKED | VM_FAULT_DONE_COW))) return ret; if (unlikely(PageHWPoison(vmf->page))) { @@ -3226,7 +3225,6 @@ static int do_read_fault(struct vm_fault *vmf) static int do_cow_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - struct mem_cgroup *memcg; int ret; if (unlikely(anon_vma_prepare(vma))) @@ -3237,7 +3235,7 @@ static int do_cow_fault(struct vm_fault *vmf) return VM_FAULT_OOM; if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL, - &memcg, false)) { + &vmf->memcg, false)) { put_page(vmf->cow_page); return VM_FAULT_OOM; } @@ -3245,12 +3243,14 @@ static int do_cow_fault(struct vm_fault *vmf) ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; + if (ret & VM_FAULT_DONE_COW) + return ret; if (!(ret & VM_FAULT_DAX_LOCKED)) copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); __SetPageUptodate(vmf->cow_page); - ret |= alloc_set_pte(vmf, memcg, vmf->cow_page); + ret |= alloc_set_pte(vmf, vmf->memcg, vmf->cow_page); if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); if (!(ret & VM_FAULT_DAX_LOCKED)) { @@ -3263,7 +3263,7 @@ static int do_cow_fault(struct vm_fault *vmf) goto uncharge_out; return ret; uncharge_out: - mem_cgroup_cancel_charge(vmf->cow_page, memcg, false); + mem_cgroup_cancel_charge(vmf->cow_page, vmf->memcg, false); put_page(vmf->cow_page); return ret; } -- cgit v1.2.3 From 9118c0cbd44262d0015568266f314e645ed6b9ce Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:21 -0800 Subject: mm: factor out functionality to finish page faults Introduce finish_fault() as a helper function for finishing page faults. It is rather thin wrapper around alloc_set_pte() but since we'd want to call this from DAX code or filesystems, it is still useful to avoid some boilerplate code. Link: http://lkml.kernel.org/r/1479460644-25076-10-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 02504cd4ca0e..22f7f6e38515 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3074,6 +3074,38 @@ fault_handled: return ret; } + +/** + * finish_fault - finish page fault once we have prepared the page to fault + * + * @vmf: structure describing the fault + * + * This function handles all that is needed to finish a page fault once the + * page to fault in is prepared. It handles locking of PTEs, inserts PTE for + * given page, adds reverse page mapping, handles memcg charges and LRU + * addition. The function returns 0 on success, VM_FAULT_ code in case of + * error. + * + * The function expects the page to be locked and on success it consumes a + * reference of a page being mapped (for the PTE which maps it). + */ +int finish_fault(struct vm_fault *vmf) +{ + struct page *page; + int ret; + + /* Did we COW the page? */ + if ((vmf->flags & FAULT_FLAG_WRITE) && + !(vmf->vma->vm_flags & VM_SHARED)) + page = vmf->cow_page; + else + page = vmf->page; + ret = alloc_set_pte(vmf, vmf->memcg, page); + if (vmf->pte) + pte_unmap_unlock(vmf->pte, vmf->ptl); + return ret; +} + static unsigned long fault_around_bytes __read_mostly = rounddown_pow_of_two(65536); @@ -3213,9 +3245,7 @@ static int do_read_fault(struct vm_fault *vmf) if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; - ret |= alloc_set_pte(vmf, NULL, vmf->page); - if (vmf->pte) - pte_unmap_unlock(vmf->pte, vmf->ptl); + ret |= finish_fault(vmf); unlock_page(vmf->page); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) put_page(vmf->page); @@ -3250,9 +3280,7 @@ static int do_cow_fault(struct vm_fault *vmf) copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); __SetPageUptodate(vmf->cow_page); - ret |= alloc_set_pte(vmf, vmf->memcg, vmf->cow_page); - if (vmf->pte) - pte_unmap_unlock(vmf->pte, vmf->ptl); + ret |= finish_fault(vmf); if (!(ret & VM_FAULT_DAX_LOCKED)) { unlock_page(vmf->page); put_page(vmf->page); @@ -3293,9 +3321,7 @@ static int do_shared_fault(struct vm_fault *vmf) } } - ret |= alloc_set_pte(vmf, NULL, vmf->page); - if (vmf->pte) - pte_unmap_unlock(vmf->pte, vmf->ptl); + ret |= finish_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) { unlock_page(vmf->page); -- cgit v1.2.3 From b1aa812b21084285e9f6098639be9cd5bf9e05d7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:24 -0800 Subject: mm: move handling of COW faults into DAX code Move final handling of COW faults from generic code into DAX fault handler. That way generic code doesn't have to be aware of peculiarities of DAX locking so remove that knowledge and make locking functions private to fs/dax.c. Link: http://lkml.kernel.org/r/1479460644-25076-11-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Acked-by: Kirill A. Shutemov Reviewed-by: Ross Zwisler Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 22f7f6e38515..ca3b95fa5fd1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2845,7 +2845,7 @@ static int __do_fault(struct vm_fault *vmf) ret = vma->vm_ops->fault(vma, vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | - VM_FAULT_DAX_LOCKED | VM_FAULT_DONE_COW))) + VM_FAULT_DONE_COW))) return ret; if (unlikely(PageHWPoison(vmf->page))) { @@ -3276,17 +3276,12 @@ static int do_cow_fault(struct vm_fault *vmf) if (ret & VM_FAULT_DONE_COW) return ret; - if (!(ret & VM_FAULT_DAX_LOCKED)) - copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); + copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); __SetPageUptodate(vmf->cow_page); ret |= finish_fault(vmf); - if (!(ret & VM_FAULT_DAX_LOCKED)) { - unlock_page(vmf->page); - put_page(vmf->page); - } else { - dax_unlock_mapping_entry(vma->vm_file->f_mapping, vmf->pgoff); - } + unlock_page(vmf->page); + put_page(vmf->page); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; return ret; -- cgit v1.2.3 From 97ba0c2b4b0994044e404b7a96fc92a2e0424534 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:27 -0800 Subject: mm: factor out common parts of write fault handling Currently we duplicate handling of shared write faults in wp_page_reuse() and do_shared_fault(). Factor them out into a common function. Link: http://lkml.kernel.org/r/1479460644-25076-12-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 78 +++++++++++++++++++++++++++++-------------------------------- 1 file changed, 37 insertions(+), 41 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index ca3b95fa5fd1..6fd827804bf5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2062,6 +2062,41 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, return ret; } +/* + * Handle dirtying of a page in shared file mapping on a write fault. + * + * The function expects the page to be locked and unlocks it. + */ +static void fault_dirty_shared_page(struct vm_area_struct *vma, + struct page *page) +{ + struct address_space *mapping; + bool dirtied; + bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; + + dirtied = set_page_dirty(page); + VM_BUG_ON_PAGE(PageAnon(page), page); + /* + * Take a local copy of the address_space - page.mapping may be zeroed + * by truncate after unlock_page(). The address_space itself remains + * pinned by vma->vm_file's reference. We rely on unlock_page()'s + * release semantics to prevent the compiler from undoing this copying. + */ + mapping = page_rmapping(page); + unlock_page(page); + + if ((dirtied || page_mkwrite) && mapping) { + /* + * Some device drivers do not set page.mapping + * but still dirty their pages + */ + balance_dirty_pages_ratelimited(mapping); + } + + if (!page_mkwrite) + file_update_time(vma->vm_file); +} + /* * Handle write page faults for pages that can be reused in the current vma * @@ -2092,28 +2127,11 @@ static inline int wp_page_reuse(struct vm_fault *vmf, struct page *page, pte_unmap_unlock(vmf->pte, vmf->ptl); if (dirty_shared) { - struct address_space *mapping; - int dirtied; - if (!page_mkwrite) lock_page(page); - dirtied = set_page_dirty(page); - VM_BUG_ON_PAGE(PageAnon(page), page); - mapping = page->mapping; - unlock_page(page); + fault_dirty_shared_page(vma, page); put_page(page); - - if ((dirtied || page_mkwrite) && mapping) { - /* - * Some device drivers do not set page.mapping - * but still dirty their pages - */ - balance_dirty_pages_ratelimited(mapping); - } - - if (!page_mkwrite) - file_update_time(vma->vm_file); } return VM_FAULT_WRITE; @@ -3294,8 +3312,6 @@ uncharge_out: static int do_shared_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - struct address_space *mapping; - int dirtied = 0; int ret, tmp; ret = __do_fault(vmf); @@ -3324,27 +3340,7 @@ static int do_shared_fault(struct vm_fault *vmf) return ret; } - if (set_page_dirty(vmf->page)) - dirtied = 1; - /* - * Take a local copy of the address_space - page.mapping may be zeroed - * by truncate after unlock_page(). The address_space itself remains - * pinned by vma->vm_file's reference. We rely on unlock_page()'s - * release semantics to prevent the compiler from undoing this copying. - */ - mapping = page_rmapping(vmf->page); - unlock_page(vmf->page); - if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) { - /* - * Some device drivers do not set page.mapping but still - * dirty their pages - */ - balance_dirty_pages_ratelimited(mapping); - } - - if (!vma->vm_ops->page_mkwrite) - file_update_time(vma->vm_file); - + fault_dirty_shared_page(vma, vmf->page); return ret; } -- cgit v1.2.3 From 38b8cb7fbb892503fe9fcf748ebbed8c9fde7bf8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:30 -0800 Subject: mm: pass vm_fault structure into do_page_mkwrite() We will need more information in the ->page_mkwrite() helper for DAX to be able to fully finish faults there. Pass vm_fault structure to do_page_mkwrite() and use it there so that information propagates properly from upper layers. Link: http://lkml.kernel.org/r/1479460644-25076-13-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 6fd827804bf5..e8a527885e8b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2034,20 +2034,17 @@ static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) * * We do this without the lock held, so that it can sleep if it needs to. */ -static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, - unsigned long address) +static int do_page_mkwrite(struct vm_fault *vmf) { - struct vm_fault vmf; int ret; + struct page *page = vmf->page; + unsigned int old_flags = vmf->flags; - vmf.address = address & PAGE_MASK; - vmf.pgoff = page->index; - vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; - vmf.gfp_mask = __get_fault_gfp_mask(vma); - vmf.page = page; - vmf.cow_page = NULL; + vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; - ret = vma->vm_ops->page_mkwrite(vma, &vmf); + ret = vmf->vma->vm_ops->page_mkwrite(vmf->vma, vmf); + /* Restore original flags so that caller is not surprised */ + vmf->flags = old_flags; if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) return ret; if (unlikely(!(ret & VM_FAULT_LOCKED))) { @@ -2323,7 +2320,8 @@ static int wp_page_shared(struct vm_fault *vmf, struct page *old_page) int tmp; pte_unmap_unlock(vmf->pte, vmf->ptl); - tmp = do_page_mkwrite(vma, old_page, vmf->address); + vmf->page = old_page; + tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(old_page); @@ -3324,7 +3322,7 @@ static int do_shared_fault(struct vm_fault *vmf) */ if (vma->vm_ops->page_mkwrite) { unlock_page(vmf->page); - tmp = do_page_mkwrite(vma, vmf->page, vmf->address); + tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(vmf->page); -- cgit v1.2.3 From a41b70d6dfc28b9e1a17c2a9f3181c2b614bfd54 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:33 -0800 Subject: mm: use vmf->page during WP faults So far we set vmf->page during WP faults only when we needed to pass it to the ->page_mkwrite handler. Set it in all the cases now and use that instead of passing page pointer explicitly around. Link: http://lkml.kernel.org/r/1479460644-25076-14-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 58 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index e8a527885e8b..ad452898e6c0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2102,11 +2102,12 @@ static void fault_dirty_shared_page(struct vm_area_struct *vma, * case, all we need to do here is to mark the page as writable and update * any related book-keeping. */ -static inline int wp_page_reuse(struct vm_fault *vmf, struct page *page, +static inline int wp_page_reuse(struct vm_fault *vmf, int page_mkwrite, int dirty_shared) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; + struct page *page = vmf->page; pte_t entry; /* * Clear the pages cpupid information as the existing @@ -2150,10 +2151,11 @@ static inline int wp_page_reuse(struct vm_fault *vmf, struct page *page, * held to the old page, as well as updating the rmap. * - In any case, unlock the PTL and drop the reference we took to the old page. */ -static int wp_page_copy(struct vm_fault *vmf, struct page *old_page) +static int wp_page_copy(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; + struct page *old_page = vmf->page; struct page *new_page = NULL; pte_t entry; int page_copied = 0; @@ -2305,26 +2307,25 @@ static int wp_pfn_shared(struct vm_fault *vmf) return 0; } } - return wp_page_reuse(vmf, NULL, 0, 0); + return wp_page_reuse(vmf, 0, 0); } -static int wp_page_shared(struct vm_fault *vmf, struct page *old_page) +static int wp_page_shared(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; int page_mkwrite = 0; - get_page(old_page); + get_page(vmf->page); if (vma->vm_ops && vma->vm_ops->page_mkwrite) { int tmp; pte_unmap_unlock(vmf->pte, vmf->ptl); - vmf->page = old_page; tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { - put_page(old_page); + put_page(vmf->page); return tmp; } /* @@ -2336,15 +2337,15 @@ static int wp_page_shared(struct vm_fault *vmf, struct page *old_page) vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (!pte_same(*vmf->pte, vmf->orig_pte)) { - unlock_page(old_page); + unlock_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); - put_page(old_page); + put_page(vmf->page); return 0; } page_mkwrite = 1; } - return wp_page_reuse(vmf, old_page, page_mkwrite, 1); + return wp_page_reuse(vmf, page_mkwrite, 1); } /* @@ -2369,10 +2370,9 @@ static int do_wp_page(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; - struct page *old_page; - old_page = vm_normal_page(vma, vmf->address, vmf->orig_pte); - if (!old_page) { + vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); + if (!vmf->page) { /* * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a * VM_PFNMAP VMA. @@ -2385,30 +2385,30 @@ static int do_wp_page(struct vm_fault *vmf) return wp_pfn_shared(vmf); pte_unmap_unlock(vmf->pte, vmf->ptl); - return wp_page_copy(vmf, old_page); + return wp_page_copy(vmf); } /* * Take out anonymous pages first, anonymous shared vmas are * not dirty accountable. */ - if (PageAnon(old_page) && !PageKsm(old_page)) { + if (PageAnon(vmf->page) && !PageKsm(vmf->page)) { int total_mapcount; - if (!trylock_page(old_page)) { - get_page(old_page); + if (!trylock_page(vmf->page)) { + get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); - lock_page(old_page); + lock_page(vmf->page); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (!pte_same(*vmf->pte, vmf->orig_pte)) { - unlock_page(old_page); + unlock_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); - put_page(old_page); + put_page(vmf->page); return 0; } - put_page(old_page); + put_page(vmf->page); } - if (reuse_swap_page(old_page, &total_mapcount)) { + if (reuse_swap_page(vmf->page, &total_mapcount)) { if (total_mapcount == 1) { /* * The page is all ours. Move it to @@ -2417,24 +2417,24 @@ static int do_wp_page(struct vm_fault *vmf) * Protected against the rmap code by * the page lock. */ - page_move_anon_rmap(old_page, vma); + page_move_anon_rmap(vmf->page, vma); } - unlock_page(old_page); - return wp_page_reuse(vmf, old_page, 0, 0); + unlock_page(vmf->page); + return wp_page_reuse(vmf, 0, 0); } - unlock_page(old_page); + unlock_page(vmf->page); } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { - return wp_page_shared(vmf, old_page); + return wp_page_shared(vmf); } /* * Ok, we need to copy. Oh, well.. */ - get_page(old_page); + get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); - return wp_page_copy(vmf, old_page); + return wp_page_copy(vmf); } static void unmap_mapping_range_vma(struct vm_area_struct *vma, -- cgit v1.2.3 From 997dd98dd68beb2aea74cac53e7fd440cc8dba68 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:36 -0800 Subject: mm: move part of wp_page_reuse() into the single call site wp_page_reuse() handles write shared faults which is needed only in wp_page_shared(). Move the handling only into that location to make wp_page_reuse() simpler and avoid a strange situation when we sometimes pass in locked page, sometimes unlocked etc. Link: http://lkml.kernel.org/r/1479460644-25076-15-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index ad452898e6c0..82e7689e3059 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2102,8 +2102,7 @@ static void fault_dirty_shared_page(struct vm_area_struct *vma, * case, all we need to do here is to mark the page as writable and update * any related book-keeping. */ -static inline int wp_page_reuse(struct vm_fault *vmf, - int page_mkwrite, int dirty_shared) +static inline void wp_page_reuse(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; @@ -2123,16 +2122,6 @@ static inline int wp_page_reuse(struct vm_fault *vmf, if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) update_mmu_cache(vma, vmf->address, vmf->pte); pte_unmap_unlock(vmf->pte, vmf->ptl); - - if (dirty_shared) { - if (!page_mkwrite) - lock_page(page); - - fault_dirty_shared_page(vma, page); - put_page(page); - } - - return VM_FAULT_WRITE; } /* @@ -2307,7 +2296,8 @@ static int wp_pfn_shared(struct vm_fault *vmf) return 0; } } - return wp_page_reuse(vmf, 0, 0); + wp_page_reuse(vmf); + return VM_FAULT_WRITE; } static int wp_page_shared(struct vm_fault *vmf) @@ -2345,7 +2335,13 @@ static int wp_page_shared(struct vm_fault *vmf) page_mkwrite = 1; } - return wp_page_reuse(vmf, page_mkwrite, 1); + wp_page_reuse(vmf); + if (!page_mkwrite) + lock_page(vmf->page); + fault_dirty_shared_page(vma, vmf->page); + put_page(vmf->page); + + return VM_FAULT_WRITE; } /* @@ -2420,7 +2416,8 @@ static int do_wp_page(struct vm_fault *vmf) page_move_anon_rmap(vmf->page, vma); } unlock_page(vmf->page); - return wp_page_reuse(vmf, 0, 0); + wp_page_reuse(vmf); + return VM_FAULT_WRITE; } unlock_page(vmf->page); } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == -- cgit v1.2.3 From 66a6197c118540d454913eef24d68d7491ab5d5f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:39 -0800 Subject: mm: provide helper for finishing mkwrite faults Provide a helper function for finishing write faults due to PTE being read-only. The helper will be used by DAX to avoid the need of complicating generic MM code with DAX locking specifics. Link: http://lkml.kernel.org/r/1479460644-25076-16-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Acked-by: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 67 ++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 27 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 82e7689e3059..bbc25da48a18 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2269,6 +2269,38 @@ oom: return VM_FAULT_OOM; } +/** + * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE + * writeable once the page is prepared + * + * @vmf: structure describing the fault + * + * This function handles all that is needed to finish a write page fault in a + * shared mapping due to PTE being read-only once the mapped page is prepared. + * It handles locking of PTE and modifying it. The function returns + * VM_FAULT_WRITE on success, 0 when PTE got changed before we acquired PTE + * lock. + * + * The function expects the page to be locked or other protection against + * concurrent faults / writeback (such as DAX radix tree locks). + */ +int finish_mkwrite_fault(struct vm_fault *vmf) +{ + WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); + vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, + &vmf->ptl); + /* + * We might have raced with another page fault while we released the + * pte_offset_map_lock. + */ + if (!pte_same(*vmf->pte, vmf->orig_pte)) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + return 0; + } + wp_page_reuse(vmf); + return VM_FAULT_WRITE; +} + /* * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED * mapping @@ -2285,16 +2317,7 @@ static int wp_pfn_shared(struct vm_fault *vmf) ret = vma->vm_ops->pfn_mkwrite(vma, vmf); if (ret & VM_FAULT_ERROR) return ret; - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - /* - * We might have raced with another page fault while we - * released the pte_offset_map_lock. - */ - if (!pte_same(*vmf->pte, vmf->orig_pte)) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; - } + return finish_mkwrite_fault(vmf); } wp_page_reuse(vmf); return VM_FAULT_WRITE; @@ -2304,7 +2327,6 @@ static int wp_page_shared(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; - int page_mkwrite = 0; get_page(vmf->page); @@ -2318,26 +2340,17 @@ static int wp_page_shared(struct vm_fault *vmf) put_page(vmf->page); return tmp; } - /* - * Since we dropped the lock we need to revalidate - * the PTE as someone else may have changed it. If - * they did, we just return, as we can count on the - * MMU to tell us if they didn't also make it writable. - */ - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - if (!pte_same(*vmf->pte, vmf->orig_pte)) { + tmp = finish_mkwrite_fault(vmf); + if (unlikely(!tmp || (tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { unlock_page(vmf->page); - pte_unmap_unlock(vmf->pte, vmf->ptl); put_page(vmf->page); - return 0; + return tmp; } - page_mkwrite = 1; - } - - wp_page_reuse(vmf); - if (!page_mkwrite) + } else { + wp_page_reuse(vmf); lock_page(vmf->page); + } fault_dirty_shared_page(vma, vmf->page); put_page(vmf->page); -- cgit v1.2.3 From a19e25536ed3a20845f642ce531e10c27fb2add5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:42 -0800 Subject: mm: change return values of finish_mkwrite_fault() Currently finish_mkwrite_fault() returns 0 when PTE got changed before we acquired PTE lock and VM_FAULT_WRITE when we succeeded in modifying the PTE. This is somewhat confusing since 0 generally means success, it is also inconsistent with finish_fault() which returns 0 on success. Change finish_mkwrite_fault() to return 0 on success and VM_FAULT_NOPAGE when PTE changed. Practically, there should be no behavioral difference since we bail out from the fault the same way regardless whether we return 0, VM_FAULT_NOPAGE, or VM_FAULT_WRITE. Also note that VM_FAULT_WRITE has no effect for shared mappings since the only two places that check it - KSM and GUP - care about private mappings only. Generally the meaning of VM_FAULT_WRITE for shared mappings is not well defined and we should probably clean that up. Link: http://lkml.kernel.org/r/1479460644-25076-17-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Acked-by: Kirill A. Shutemov Cc: Ross Zwisler Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index bbc25da48a18..8b7f0656a921 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2295,10 +2295,10 @@ int finish_mkwrite_fault(struct vm_fault *vmf) */ if (!pte_same(*vmf->pte, vmf->orig_pte)) { pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; + return VM_FAULT_NOPAGE; } wp_page_reuse(vmf); - return VM_FAULT_WRITE; + return 0; } /* @@ -2341,8 +2341,7 @@ static int wp_page_shared(struct vm_fault *vmf) return tmp; } tmp = finish_mkwrite_fault(vmf); - if (unlikely(!tmp || (tmp & - (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { + if (unlikely(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { unlock_page(vmf->page); put_page(vmf->page); return tmp; -- cgit v1.2.3 From cae1240257d9ba4b40eb240124c530de8ee349bc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:45 -0800 Subject: mm: export follow_pte() DAX will need to implement its own version of page_check_address(). To avoid duplicating page table walking code, export follow_pte() which does what we need. Link: http://lkml.kernel.org/r/1479460644-25076-18-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Cc: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 8b7f0656a921..edd899d0decb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3817,8 +3817,8 @@ out: return -EINVAL; } -static inline int follow_pte(struct mm_struct *mm, unsigned long address, - pte_t **ptepp, spinlock_t **ptlp) +int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp, + spinlock_t **ptlp) { int res; -- cgit v1.2.3 From 2f89dc12a25ddf995b9acd7b6543fe892e3473d6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Dec 2016 15:07:50 -0800 Subject: dax: protect PTE modification on WP fault by radix tree entry lock Currently PTE gets updated in wp_pfn_shared() after dax_pfn_mkwrite() has released corresponding radix tree entry lock. When we want to writeprotect PTE on cache flush, we need PTE modification to happen under radix tree entry lock to ensure consistent updates of PTE and radix tree (standard faults use page lock to ensure this consistency). So move update of PTE bit into dax_pfn_mkwrite(). Link: http://lkml.kernel.org/r/1479460644-25076-20-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Ross Zwisler Cc: Kirill A. Shutemov Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index edd899d0decb..57d0bd1bd2c4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2315,7 +2315,7 @@ static int wp_pfn_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; ret = vma->vm_ops->pfn_mkwrite(vma, vmf); - if (ret & VM_FAULT_ERROR) + if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) return ret; return finish_mkwrite_fault(vmf); } -- cgit v1.2.3