summaryrefslogtreecommitdiff
path: root/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h150
1 files changed, 110 insertions, 40 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dd5ea3016fc4..47a93928b90f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -138,7 +138,6 @@ extern unsigned int kobjsize(const void *objp);
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
-#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
#define VM_ARCH_2 0x02000000
#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
@@ -206,27 +205,26 @@ extern unsigned int kobjsize(const void *objp);
extern pgprot_t protection_map[16];
#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
-#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
-#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */
-#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */
-#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
-#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
-#define FAULT_FLAG_TRIED 0x40 /* second try */
-#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */
+#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */
+#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */
+#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */
+#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */
+#define FAULT_FLAG_TRIED 0x20 /* Second try */
+#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
* ->fault function. The vma's ->fault is responsible for returning a bitmask
* of VM_FAULT_xxx flags that give details about how the fault was handled.
*
- * pgoff should be used in favour of virtual_address, if possible. If pgoff
- * is used, one may implement ->remap_pages to get nonlinear mapping support.
+ * pgoff should be used in favour of virtual_address, if possible.
*/
struct vm_fault {
unsigned int flags; /* FAULT_FLAG_xxx flags */
pgoff_t pgoff; /* Logical page offset based on vma */
void __user *virtual_address; /* Faulting virtual address */
+ struct page *cow_page; /* Handler may choose to COW */
struct page *page; /* ->fault handlers should return a
* page here, unless VM_FAULT_NOPAGE
* is set (which is also implied by
@@ -287,9 +285,13 @@ struct vm_operations_struct {
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
unsigned long addr);
#endif
- /* called by sys_remap_file_pages() to populate non-linear mapping */
- int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,
- unsigned long size, pgoff_t pgoff);
+ /*
+ * Called by vm_normal_page() for special PTEs to find the
+ * page for @addr. This is useful if the default behavior
+ * (using pte_page()) would not find the correct page.
+ */
+ struct page *(*find_special_page)(struct vm_area_struct *vma,
+ unsigned long addr);
};
struct mmu_gather;
@@ -446,6 +448,12 @@ static inline struct page *compound_head_by_tail(struct page *tail)
return tail;
}
+/*
+ * Since either compound page could be dismantled asynchronously in THP
+ * or we access asynchronously arbitrary positioned struct page, there
+ * would be tail flag race. To handle this race, we should call
+ * smp_rmb() before checking tail flag. compound_head_by_tail() did it.
+ */
static inline struct page *compound_head(struct page *page)
{
if (unlikely(PageTail(page)))
@@ -454,6 +462,18 @@ static inline struct page *compound_head(struct page *page)
}
/*
+ * If we access compound page synchronously such as access to
+ * allocated page, there is no need to handle tail flag race, so we can
+ * check tail flag directly without any synchronization primitive.
+ */
+static inline struct page *compound_head_fast(struct page *page)
+{
+ if (unlikely(PageTail(page)))
+ return page->first_page;
+ return page;
+}
+
+/*
* The atomic page->_mapcount, starts from -1: so that transitions
* both from it and to it can be tracked, using atomic_inc_and_test
* and atomic_add_negative(-1).
@@ -465,7 +485,8 @@ static inline void page_mapcount_reset(struct page *page)
static inline int page_mapcount(struct page *page)
{
- return atomic_read(&(page)->_mapcount) + 1;
+ VM_BUG_ON_PAGE(PageSlab(page), page);
+ return atomic_read(&page->_mapcount) + 1;
}
static inline int page_count(struct page *page)
@@ -531,7 +552,14 @@ static inline void get_page(struct page *page)
static inline struct page *virt_to_head_page(const void *x)
{
struct page *page = virt_to_page(x);
- return compound_head(page);
+
+ /*
+ * We don't need to worry about synchronization of tail flag
+ * when we call virt_to_head_page() since it is only called for
+ * already allocated page and this page won't be freed until
+ * this virt_to_head_page() is finished. So use _fast variant.
+ */
+ return compound_head_fast(page);
}
/*
@@ -601,29 +629,28 @@ int split_free_page(struct page *page);
* prototype for that function and accessor functions.
* These are _only_ valid on the head of a PG_compound page.
*/
-typedef void compound_page_dtor(struct page *);
static inline void set_compound_page_dtor(struct page *page,
compound_page_dtor *dtor)
{
- page[1].lru.next = (void *)dtor;
+ page[1].compound_dtor = dtor;
}
static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
{
- return (compound_page_dtor *)page[1].lru.next;
+ return page[1].compound_dtor;
}
static inline int compound_order(struct page *page)
{
if (!PageHead(page))
return 0;
- return (unsigned long)page[1].lru.prev;
+ return page[1].compound_order;
}
static inline void set_compound_order(struct page *page, unsigned long order)
{
- page[1].lru.prev = (void *)order;
+ page[1].compound_order = order;
}
#ifdef CONFIG_MMU
@@ -1121,7 +1148,6 @@ extern void user_shm_unlock(size_t, struct user_struct *);
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
struct zap_details {
- struct vm_area_struct *nonlinear_vma; /* Check page->index if set */
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
@@ -1139,8 +1165,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
/**
* mm_walk - callbacks for walk_page_range
- * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
* this handler is required to be able to handle
* pmd_trans_huge() pmds. They may simply choose to
@@ -1148,16 +1172,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
* @pte_entry: if set, called for each non-empty PTE (4th-level) entry
* @pte_hole: if set, called for each hole at all levels
* @hugetlb_entry: if set, called for each hugetlb entry
- * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry
- * is used.
+ * @test_walk: caller specific callback function to determine whether
+ * we walk over the current vma or not. A positive returned
+ * value means "do page table walk over the current vma,"
+ * and a negative one means "abort current page table walk
+ * right now." 0 means "skip the current vma."
+ * @mm: mm_struct representing the target process of page table walk
+ * @vma: vma currently walked (NULL if walking outside vmas)
+ * @private: private data for callbacks' usage
*
- * (see walk_page_range for more details)
+ * (see the comment on walk_page_range() for more details)
*/
struct mm_walk {
- int (*pgd_entry)(pgd_t *pgd, unsigned long addr,
- unsigned long next, struct mm_walk *walk);
- int (*pud_entry)(pud_t *pud, unsigned long addr,
- unsigned long next, struct mm_walk *walk);
int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk);
int (*pte_entry)(pte_t *pte, unsigned long addr,
@@ -1167,12 +1193,16 @@ struct mm_walk {
int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long next,
struct mm_walk *walk);
+ int (*test_walk)(unsigned long addr, unsigned long next,
+ struct mm_walk *walk);
struct mm_struct *mm;
+ struct vm_area_struct *vma;
void *private;
};
int walk_page_range(unsigned long addr, unsigned long end,
struct mm_walk *walk);
+int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long ceiling);
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
@@ -1236,6 +1266,17 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
int write, int force, struct page **pages,
struct vm_area_struct **vmas);
+long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, unsigned long nr_pages,
+ int write, int force, struct page **pages,
+ int *locked);
+long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, unsigned long nr_pages,
+ int write, int force, struct page **pages,
+ unsigned int gup_flags);
+long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, unsigned long nr_pages,
+ int write, int force, struct page **pages);
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
struct kvec;
@@ -1368,6 +1409,11 @@ static inline void update_hiwater_vm(struct mm_struct *mm)
mm->hiwater_vm = mm->total_vm;
}
+static inline void reset_mm_hiwater_rss(struct mm_struct *mm)
+{
+ mm->hiwater_rss = get_mm_rss(mm);
+}
+
static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
struct mm_struct *mm)
{
@@ -1407,14 +1453,45 @@ static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
#endif
-#ifdef __PAGETABLE_PMD_FOLDED
+#if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
unsigned long address)
{
return 0;
}
+
+static inline void mm_nr_pmds_init(struct mm_struct *mm) {}
+
+static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
+{
+ return 0;
+}
+
+static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
+static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}
+
#else
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
+
+static inline void mm_nr_pmds_init(struct mm_struct *mm)
+{
+ atomic_long_set(&mm->nr_pmds, 0);
+}
+
+static inline unsigned long mm_nr_pmds(struct mm_struct *mm)
+{
+ return atomic_long_read(&mm->nr_pmds);
+}
+
+static inline void mm_inc_nr_pmds(struct mm_struct *mm)
+{
+ atomic_long_inc(&mm->nr_pmds);
+}
+
+static inline void mm_dec_nr_pmds(struct mm_struct *mm)
+{
+ atomic_long_dec(&mm->nr_pmds);
+}
#endif
int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -1777,12 +1854,6 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
for (vma = vma_interval_tree_iter_first(root, start, last); \
vma; vma = vma_interval_tree_iter_next(vma, start, last))
-static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
- struct list_head *list)
-{
- list_add_tail(&vma->shared.nonlinear, list);
-}
-
void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
struct rb_root *root);
void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
@@ -2110,9 +2181,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
#endif
-unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
- unsigned long nr_scanned,
- unsigned long nr_eligible);
+void drop_slab(void);
+void drop_slab_node(int nid);
#ifndef CONFIG_MMU
#define randomize_va_space 0