summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mm/mempolicy.c270
1 files changed, 138 insertions, 132 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4b077ec6c005..7051fe450e96 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -183,55 +183,9 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
return policy;
}
-/* Check if we are the only process mapping the page in question */
-static inline int single_mm_mapping(struct mm_struct *mm,
- struct address_space *mapping)
-{
- struct vm_area_struct *vma;
- struct prio_tree_iter iter;
- int rc = 1;
-
- spin_lock(&mapping->i_mmap_lock);
- vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
- if (mm != vma->vm_mm) {
- rc = 0;
- goto out;
- }
- list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
- if (mm != vma->vm_mm) {
- rc = 0;
- goto out;
- }
-out:
- spin_unlock(&mapping->i_mmap_lock);
- return rc;
-}
-
-/*
- * Add a page to be migrated to the pagelist
- */
-static void migrate_page_add(struct vm_area_struct *vma,
- struct page *page, struct list_head *pagelist, unsigned long flags)
-{
- /*
- * Avoid migrating a page that is shared by others and not writable.
- */
- if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) ||
- mapping_writably_mapped(page->mapping) ||
- single_mm_mapping(vma->vm_mm, page->mapping)) {
- int rc = isolate_lru_page(page);
-
- if (rc == 1)
- list_add(&page->lru, pagelist);
- /*
- * If the isolate attempt was not successful then we just
- * encountered an unswappable page. Something must be wrong.
- */
- WARN_ON(rc == 0);
- }
-}
-
static void gather_stats(struct page *, void *);
+static void migrate_page_add(struct vm_area_struct *vma,
+ struct page *page, struct list_head *pagelist, unsigned long flags);
/* Scan through pages checking if pages follow certain conditions. */
static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
@@ -440,90 +394,6 @@ static int contextualize_policy(int mode, nodemask_t *nodes)
return mpol_check_policy(mode, nodes);
}
-static int swap_pages(struct list_head *pagelist)
-{
- LIST_HEAD(moved);
- LIST_HEAD(failed);
- int n;
-
- n = migrate_pages(pagelist, NULL, &moved, &failed);
- putback_lru_pages(&failed);
- putback_lru_pages(&moved);
-
- return n;
-}
-
-long do_mbind(unsigned long start, unsigned long len,
- unsigned long mode, nodemask_t *nmask, unsigned long flags)
-{
- struct vm_area_struct *vma;
- struct mm_struct *mm = current->mm;
- struct mempolicy *new;
- unsigned long end;
- int err;
- LIST_HEAD(pagelist);
-
- if ((flags & ~(unsigned long)(MPOL_MF_STRICT |
- MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
- || mode > MPOL_MAX)
- return -EINVAL;
- if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE))
- return -EPERM;
-
- if (start & ~PAGE_MASK)
- return -EINVAL;
-
- if (mode == MPOL_DEFAULT)
- flags &= ~MPOL_MF_STRICT;
-
- len = (len + PAGE_SIZE - 1) & PAGE_MASK;
- end = start + len;
-
- if (end < start)
- return -EINVAL;
- if (end == start)
- return 0;
-
- if (mpol_check_policy(mode, nmask))
- return -EINVAL;
-
- new = mpol_new(mode, nmask);
- if (IS_ERR(new))
- return PTR_ERR(new);
-
- /*
- * If we are using the default policy then operation
- * on discontinuous address spaces is okay after all
- */
- if (!new)
- flags |= MPOL_MF_DISCONTIG_OK;
-
- PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
- mode,nodes_addr(nodes)[0]);
-
- down_write(&mm->mmap_sem);
- vma = check_range(mm, start, end, nmask,
- flags | MPOL_MF_INVERT, &pagelist);
-
- err = PTR_ERR(vma);
- if (!IS_ERR(vma)) {
- int nr_failed = 0;
-
- err = mbind_range(vma, start, end, new);
- if (!list_empty(&pagelist))
- nr_failed = swap_pages(&pagelist);
-
- if (!err && nr_failed && (flags & MPOL_MF_STRICT))
- err = -EIO;
- }
- if (!list_empty(&pagelist))
- putback_lru_pages(&pagelist);
-
- up_write(&mm->mmap_sem);
- mpol_free(new);
- return err;
-}
-
/* Set the process memory policy */
long do_set_mempolicy(int mode, nodemask_t *nodes)
{
@@ -644,6 +514,71 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
}
/*
+ * page migration
+ */
+
+/* Check if we are the only process mapping the page in question */
+static inline int single_mm_mapping(struct mm_struct *mm,
+ struct address_space *mapping)
+{
+ struct vm_area_struct *vma;
+ struct prio_tree_iter iter;
+ int rc = 1;
+
+ spin_lock(&mapping->i_mmap_lock);
+ vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
+ if (mm != vma->vm_mm) {
+ rc = 0;
+ goto out;
+ }
+ list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
+ if (mm != vma->vm_mm) {
+ rc = 0;
+ goto out;
+ }
+out:
+ spin_unlock(&mapping->i_mmap_lock);
+ return rc;
+}
+
+/*
+ * Add a page to be migrated to the pagelist
+ */
+static void migrate_page_add(struct vm_area_struct *vma,
+ struct page *page, struct list_head *pagelist, unsigned long flags)
+{
+ /*
+ * Avoid migrating a page that is shared by others and not writable.
+ */
+ if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) ||
+ mapping_writably_mapped(page->mapping) ||
+ single_mm_mapping(vma->vm_mm, page->mapping)) {
+ int rc = isolate_lru_page(page);
+
+ if (rc == 1)
+ list_add(&page->lru, pagelist);
+ /*
+ * If the isolate attempt was not successful then we just
+ * encountered an unswappable page. Something must be wrong.
+ */
+ WARN_ON(rc == 0);
+ }
+}
+
+static int swap_pages(struct list_head *pagelist)
+{
+ LIST_HEAD(moved);
+ LIST_HEAD(failed);
+ int n;
+
+ n = migrate_pages(pagelist, NULL, &moved, &failed);
+ putback_lru_pages(&failed);
+ putback_lru_pages(&moved);
+
+ return n;
+}
+
+/*
* For now migrate_pages simply swaps out the pages from nodes that are in
* the source set but not in the target set. In the future, we would
* want a function that moves pages between the two nodesets in such
@@ -673,6 +608,77 @@ int do_migrate_pages(struct mm_struct *mm,
return count;
}
+long do_mbind(unsigned long start, unsigned long len,
+ unsigned long mode, nodemask_t *nmask, unsigned long flags)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+ struct mempolicy *new;
+ unsigned long end;
+ int err;
+ LIST_HEAD(pagelist);
+
+ if ((flags & ~(unsigned long)(MPOL_MF_STRICT |
+ MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ || mode > MPOL_MAX)
+ return -EINVAL;
+ if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+
+ if (start & ~PAGE_MASK)
+ return -EINVAL;
+
+ if (mode == MPOL_DEFAULT)
+ flags &= ~MPOL_MF_STRICT;
+
+ len = (len + PAGE_SIZE - 1) & PAGE_MASK;
+ end = start + len;
+
+ if (end < start)
+ return -EINVAL;
+ if (end == start)
+ return 0;
+
+ if (mpol_check_policy(mode, nmask))
+ return -EINVAL;
+
+ new = mpol_new(mode, nmask);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+
+ /*
+ * If we are using the default policy then operation
+ * on discontinuous address spaces is okay after all
+ */
+ if (!new)
+ flags |= MPOL_MF_DISCONTIG_OK;
+
+ PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
+ mode,nodes_addr(nodes)[0]);
+
+ down_write(&mm->mmap_sem);
+ vma = check_range(mm, start, end, nmask,
+ flags | MPOL_MF_INVERT, &pagelist);
+
+ err = PTR_ERR(vma);
+ if (!IS_ERR(vma)) {
+ int nr_failed = 0;
+
+ err = mbind_range(vma, start, end, new);
+ if (!list_empty(&pagelist))
+ nr_failed = swap_pages(&pagelist);
+
+ if (!err && nr_failed && (flags & MPOL_MF_STRICT))
+ err = -EIO;
+ }
+ if (!list_empty(&pagelist))
+ putback_lru_pages(&pagelist);
+
+ up_write(&mm->mmap_sem);
+ mpol_free(new);
+ return err;
+}
+
/*
* User space interface with variable sized bitmaps for nodelists.
*/