summaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c142
1 files changed, 123 insertions, 19 deletions
diff --git a/mm/memory.c b/mm/memory.c
index c43447382fec..d2f0c08adf82 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1353,7 +1353,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
- /*
+ /*
* Require read or write permissions.
* If FOLL_FORCE is set, we only require the "MAY" flags.
*/
@@ -1497,7 +1497,63 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
return i;
}
-/**
+/*
+ * fixup_user_fault() - manually resolve a user page fault
+ * @tsk: the task_struct to use for page fault accounting, or
+ * NULL if faults are not to be recorded.
+ * @mm: mm_struct of target mm
+ * @address: user address
+ * @fault_flags:flags to pass down to handle_mm_fault()
+ *
+ * This is meant to be called in the specific scenario where for locking reasons
+ * we try to access user memory in atomic context (within a pagefault_disable()
+ * section), this returns -EFAULT, and we want to resolve the user fault before
+ * trying again.
+ *
+ * Typically this is meant to be used by the futex code.
+ *
+ * The main difference with get_user_pages() is that this function will
+ * unconditionally call handle_mm_fault() which will in turn perform all the
+ * necessary SW fixup of the dirty and young bits in the PTE, while
+ * handle_mm_fault() only guarantees to update these in the struct page.
+ *
+ * This is important for some architectures where those bits also gate the
+ * access permission to the page because they are maintained in software. On
+ * such architectures, gup() will not be enough to make a subsequent access
+ * succeed.
+ *
+ * This should be called with the mm_sem held for read.
+ */
+int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long address, unsigned int fault_flags)
+{
+ struct vm_area_struct *vma;
+ int ret;
+
+ vma = find_extend_vma(mm, address);
+ if (!vma || address < vma->vm_start)
+ return -EFAULT;
+
+ ret = handle_mm_fault(mm, vma, address, fault_flags);
+ if (ret & VM_FAULT_ERROR) {
+ if (ret & VM_FAULT_OOM)
+ return -ENOMEM;
+ if (ret & VM_FAULT_HWPOISON)
+ return -EFAULT;
+ if (ret & VM_FAULT_SIGBUS)
+ return -EFAULT;
+ BUG();
+ }
+ if (tsk) {
+ if (ret & VM_FAULT_MAJOR)
+ tsk->maj_flt++;
+ else
+ tsk->min_flt++;
+ }
+ return 0;
+}
+
+/*
* get_user_pages() - pin user pages in memory
* @tsk: task_struct of target task
* @mm: mm_struct of target mm
@@ -2573,6 +2629,7 @@ void unmap_mapping_range(struct address_space *mapping,
details.last_index = ULONG_MAX;
details.i_mmap_lock = &mapping->i_mmap_lock;
+ mutex_lock(&mapping->unmap_mutex);
spin_lock(&mapping->i_mmap_lock);
/* Protect against endless unmapping loops */
@@ -2589,6 +2646,7 @@ void unmap_mapping_range(struct address_space *mapping,
if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->unmap_mutex);
}
EXPORT_SYMBOL(unmap_mapping_range);
@@ -2626,7 +2684,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned int flags, pte_t orig_pte)
{
spinlock_t *ptl;
- struct page *page;
+ struct page *page, *swapcache = NULL;
swp_entry_t entry;
pte_t pte;
struct mem_cgroup *ptr = NULL;
@@ -2681,10 +2739,25 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
lock_page(page);
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
- page = ksm_might_need_to_copy(page, vma, address);
- if (!page) {
- ret = VM_FAULT_OOM;
- goto out;
+ /*
+ * Make sure try_to_free_swap or reuse_swap_page or swapoff did not
+ * release the swapcache from under us. The page pin, and pte_same
+ * test below, are not enough to exclude that. Even if it is still
+ * swapcache, we need to check that the page's swap has not changed.
+ */
+ if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
+ goto out_page;
+
+ if (ksm_might_need_to_copy(page, vma, address)) {
+ swapcache = page;
+ page = ksm_does_need_to_copy(page, vma, address);
+
+ if (unlikely(!page)) {
+ ret = VM_FAULT_OOM;
+ page = swapcache;
+ swapcache = NULL;
+ goto out_page;
+ }
}
if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
@@ -2735,6 +2808,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
try_to_free_swap(page);
unlock_page(page);
+ if (swapcache) {
+ /*
+ * Hold the lock to avoid the swap entry to be reused
+ * until we take the PT lock for the pte_same() check
+ * (to avoid false positives from pte_same). For
+ * further safety release the lock after the swap_free
+ * so that the swap count won't change under a
+ * parallel locked swapcache.
+ */
+ unlock_page(swapcache);
+ page_cache_release(swapcache);
+ }
if (flags & FAULT_FLAG_WRITE) {
ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
@@ -2756,25 +2841,43 @@ out_page:
unlock_page(page);
out_release:
page_cache_release(page);
+ if (swapcache) {
+ unlock_page(swapcache);
+ page_cache_release(swapcache);
+ }
return ret;
}
/*
- * This is like a special single-page "expand_downwards()",
- * except we must first make sure that 'address-PAGE_SIZE'
+ * This is like a special single-page "expand_{down|up}wards()",
+ * except we must first make sure that 'address{-|+}PAGE_SIZE'
* doesn't hit another vma.
- *
- * The "find_vma()" will do the right thing even if we wrap
*/
static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
{
address &= PAGE_MASK;
if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
- address -= PAGE_SIZE;
- if (find_vma(vma->vm_mm, address) != vma)
- return -ENOMEM;
+ struct vm_area_struct *prev = vma->vm_prev;
- expand_stack(vma, address);
+ /*
+ * Is there a mapping abutting this one below?
+ *
+ * That's only ok if it's the same stack mapping
+ * that has gotten split..
+ */
+ if (prev && prev->vm_end == address)
+ return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
+
+ expand_stack(vma, address - PAGE_SIZE);
+ }
+ if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
+ struct vm_area_struct *next = vma->vm_next;
+
+ /* As VM_GROWSDOWN but s/below/above/ */
+ if (next && next->vm_start == address + PAGE_SIZE)
+ return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
+
+ expand_upwards(vma, address + PAGE_SIZE);
}
return 0;
}
@@ -2792,22 +2895,23 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
spinlock_t *ptl;
pte_t entry;
+ pte_unmap(page_table);
+
+ /* Check if we need to add a guard page to the stack */
if (check_stack_guard_page(vma, address) < 0)
return VM_FAULT_SIGBUS;
+ /* Use the zero-page for reads */
if (!(flags & FAULT_FLAG_WRITE)) {
entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
vma->vm_page_prot));
- ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
+ page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!pte_none(*page_table))
goto unlock;
goto setpte;
}
/* Allocate our own private page. */
- pte_unmap(page_table);
-
if (unlikely(anon_vma_prepare(vma)))
goto oom;
page = alloc_zeroed_user_highpage_movable(vma, address);