diff options
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r-- | include/linux/mm.h | 373 |
1 files changed, 304 insertions, 69 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b6e55ee8855..c1b7414c7bef 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -5,6 +5,7 @@ #ifdef __KERNEL__ +#include <linux/mmdebug.h> #include <linux/gfp.h> #include <linux/bug.h> #include <linux/list.h> @@ -50,9 +51,22 @@ extern int sysctl_legacy_va_layout; #include <asm/pgtable.h> #include <asm/processor.h> +#ifndef __pa_symbol +#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) +#endif + extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; +extern int sysctl_overcommit_memory; +extern int sysctl_overcommit_ratio; +extern unsigned long sysctl_overcommit_kbytes; + +extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *, + size_t *, loff_t *); +extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, + size_t *, loff_t *); + #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) /* to align the pointer to the (next) page boundary */ @@ -161,7 +175,7 @@ extern unsigned int kobjsize(const void *objp); * Special vmas that are non-mergable, non-mlock()able. * Note: mm/huge_memory.c VM_NO_THP depends on this definition. */ -#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP) +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) /* * mapping from the currently active vm_flags protection bits (the @@ -290,19 +304,33 @@ static inline int get_freepage_migratetype(struct page *page) */ static inline int put_page_testzero(struct page *page) { - VM_BUG_ON(atomic_read(&page->_count) == 0); + VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0, page); return atomic_dec_and_test(&page->_count); } /* * Try to grab a ref unless the page has a refcount of zero, return false if * that is the case. + * This can be called when MMU is off so it must not access + * any of the virtual mappings. */ static inline int get_page_unless_zero(struct page *page) { return atomic_inc_not_zero(&page->_count); } +/* + * Try to drop a ref unless the page has a refcount of one, return false if + * that is the case. + * This is to make sure that the refcount won't become zero after this drop. + * This can be called when MMU is off so it must not access + * any of the virtual mappings. + */ +static inline int put_page_unless_one(struct page *page) +{ + return atomic_add_unless(&page->_count, -1, 1); +} + extern int page_is_ram(unsigned long pfn); /* Support for virtually mapped pages */ @@ -337,7 +365,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) static inline void compound_lock(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON(PageSlab(page)); + VM_BUG_ON_PAGE(PageSlab(page), page); bit_spin_lock(PG_compound_lock, &page->flags); #endif } @@ -345,7 +373,7 @@ static inline void compound_lock(struct page *page) static inline void compound_unlock(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON(PageSlab(page)); + VM_BUG_ON_PAGE(PageSlab(page), page); bit_spin_unlock(PG_compound_lock, &page->flags); #endif } @@ -371,8 +399,18 @@ static inline void compound_unlock_irqrestore(struct page *page, static inline struct page *compound_head(struct page *page) { - if (unlikely(PageTail(page))) - return page->first_page; + if (unlikely(PageTail(page))) { + struct page *head = page->first_page; + + /* + * page->first_page may be a dangling pointer to an old + * compound page, so recheck that it is still a tail + * page before returning. + */ + smp_rmb(); + if (likely(PageTail(page))) + return head; + } return page; } @@ -396,15 +434,44 @@ static inline int page_count(struct page *page) return atomic_read(&compound_head(page)->_count); } +#ifdef CONFIG_HUGETLB_PAGE +extern int PageHeadHuge(struct page *page_head); +#else /* CONFIG_HUGETLB_PAGE */ +static inline int PageHeadHuge(struct page *page_head) +{ + return 0; +} +#endif /* CONFIG_HUGETLB_PAGE */ + +static inline bool __compound_tail_refcounted(struct page *page) +{ + return !PageSlab(page) && !PageHeadHuge(page); +} + +/* + * This takes a head page as parameter and tells if the + * tail page reference counting can be skipped. + * + * For this to be safe, PageSlab and PageHeadHuge must remain true on + * any given page where they return true here, until all tail pins + * have been released. + */ +static inline bool compound_tail_refcounted(struct page *page) +{ + VM_BUG_ON_PAGE(!PageHead(page), page); + return __compound_tail_refcounted(page); +} + static inline void get_huge_page_tail(struct page *page) { /* - * __split_huge_page_refcount() cannot run - * from under us. + * __split_huge_page_refcount() cannot run from under us. */ - VM_BUG_ON(page_mapcount(page) < 0); - VM_BUG_ON(atomic_read(&page->_count) != 0); - atomic_inc(&page->_mapcount); + VM_BUG_ON_PAGE(!PageTail(page), page); + VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); + VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); + if (compound_tail_refcounted(page->first_page)) + atomic_inc(&page->_mapcount); } extern bool __get_page_tail(struct page *page); @@ -418,7 +485,7 @@ static inline void get_page(struct page *page) * Getting a normal page or the head of a compound page * requires to already have an elevated page->_count. */ - VM_BUG_ON(atomic_read(&page->_count) <= 0); + VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page); atomic_inc(&page->_count); } @@ -455,13 +522,13 @@ static inline int PageBuddy(struct page *page) static inline void __SetPageBuddy(struct page *page) { - VM_BUG_ON(atomic_read(&page->_mapcount) != -1); + VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE); } static inline void __ClearPageBuddy(struct page *page) { - VM_BUG_ON(!PageBuddy(page)); + VM_BUG_ON_PAGE(!PageBuddy(page), page); atomic_set(&page->_mapcount, -1); } @@ -581,11 +648,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) * sets it, so none of the operations on it need to be atomic. */ -/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NID] | ... | FLAGS | */ +/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) -#define LAST_NID_PGOFF (ZONES_PGOFF - LAST_NID_WIDTH) +#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) /* * Define the bit shifts to access each section. For non-existent @@ -595,7 +662,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) -#define LAST_NID_PGSHIFT (LAST_NID_PGOFF * (LAST_NID_WIDTH != 0)) +#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ #ifdef NODE_NOT_IN_PAGE_FLAGS @@ -617,7 +684,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) -#define LAST_NID_MASK ((1UL << LAST_NID_WIDTH) - 1) +#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) static inline enum zone_type page_zonenum(const struct page *page) @@ -661,51 +728,117 @@ static inline int page_to_nid(const struct page *page) #endif #ifdef CONFIG_NUMA_BALANCING -#ifdef LAST_NID_NOT_IN_PAGE_FLAGS -static inline int page_nid_xchg_last(struct page *page, int nid) +static inline int cpu_pid_to_cpupid(int cpu, int pid) +{ + return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK); +} + +static inline int cpupid_to_pid(int cpupid) { - return xchg(&page->_last_nid, nid); + return cpupid & LAST__PID_MASK; } -static inline int page_nid_last(struct page *page) +static inline int cpupid_to_cpu(int cpupid) { - return page->_last_nid; + return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK; } -static inline void page_nid_reset_last(struct page *page) + +static inline int cpupid_to_nid(int cpupid) { - page->_last_nid = -1; + return cpu_to_node(cpupid_to_cpu(cpupid)); } -#else -static inline int page_nid_last(struct page *page) + +static inline bool cpupid_pid_unset(int cpupid) +{ + return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK); +} + +static inline bool cpupid_cpu_unset(int cpupid) { - return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK; + return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK); } -extern int page_nid_xchg_last(struct page *page, int nid); +static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) +{ + return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid); +} -static inline void page_nid_reset_last(struct page *page) +#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid) +#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS +static inline int page_cpupid_xchg_last(struct page *page, int cpupid) { - int nid = (1 << LAST_NID_SHIFT) - 1; + return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK); +} - page->flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT); - page->flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT; +static inline int page_cpupid_last(struct page *page) +{ + return page->_last_cpupid; +} +static inline void page_cpupid_reset_last(struct page *page) +{ + page->_last_cpupid = -1 & LAST_CPUPID_MASK; } -#endif /* LAST_NID_NOT_IN_PAGE_FLAGS */ #else -static inline int page_nid_xchg_last(struct page *page, int nid) +static inline int page_cpupid_last(struct page *page) { - return page_to_nid(page); + return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; } -static inline int page_nid_last(struct page *page) +extern int page_cpupid_xchg_last(struct page *page, int cpupid); + +static inline void page_cpupid_reset_last(struct page *page) { - return page_to_nid(page); + int cpupid = (1 << LAST_CPUPID_SHIFT) - 1; + + page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT); + page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT; +} +#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ +#else /* !CONFIG_NUMA_BALANCING */ +static inline int page_cpupid_xchg_last(struct page *page, int cpupid) +{ + return page_to_nid(page); /* XXX */ } -static inline void page_nid_reset_last(struct page *page) +static inline int page_cpupid_last(struct page *page) { + return page_to_nid(page); /* XXX */ +} + +static inline int cpupid_to_nid(int cpupid) +{ + return -1; +} + +static inline int cpupid_to_pid(int cpupid) +{ + return -1; } -#endif + +static inline int cpupid_to_cpu(int cpupid) +{ + return -1; +} + +static inline int cpu_pid_to_cpupid(int nid, int pid) +{ + return -1; +} + +static inline bool cpupid_pid_unset(int cpupid) +{ + return 1; +} + +static inline void page_cpupid_reset_last(struct page *page) +{ +} + +static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) +{ + return false; +} +#endif /* CONFIG_NUMA_BALANCING */ static inline struct zone *page_zone(const struct page *page) { @@ -762,11 +895,14 @@ static __always_inline void *lowmem_page_address(const struct page *page) #endif #if defined(WANT_PAGE_VIRTUAL) -#define page_address(page) ((page)->virtual) -#define set_page_address(page, address) \ - do { \ - (page)->virtual = (address); \ - } while(0) +static inline void *page_address(const struct page *page) +{ + return page->virtual; +} +static inline void set_page_address(struct page *page, void *address) +{ + page->virtual = address; +} #define page_address_init() do { } while(0) #endif @@ -900,7 +1036,6 @@ extern void pagefault_out_of_memory(void); * various contexts. */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ -#define SHOW_MEM_FILTER_PAGE_COUNT (0x0002u) /* page type count */ extern void show_free_areas(unsigned int flags); extern bool skip_free_areas_node(unsigned int flags, int nid); @@ -1232,32 +1367,88 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ -#if USE_SPLIT_PTLOCKS -/* - * We tuck a spinlock to guard each pagetable page into its struct page, - * at page->private, with BUILD_BUG_ON to make sure that this will not - * overflow into the next struct page (as it might with DEBUG_SPINLOCK). - * When freeing, reset page->mapping so free_pages_check won't complain. - */ -#define __pte_lockptr(page) &((page)->ptl) -#define pte_lock_init(_page) do { \ - spin_lock_init(__pte_lockptr(_page)); \ -} while (0) -#define pte_lock_deinit(page) ((page)->mapping = NULL) -#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) -#else /* !USE_SPLIT_PTLOCKS */ +#if USE_SPLIT_PTE_PTLOCKS +#if ALLOC_SPLIT_PTLOCKS +void __init ptlock_cache_init(void); +extern bool ptlock_alloc(struct page *page); +extern void ptlock_free(struct page *page); + +static inline spinlock_t *ptlock_ptr(struct page *page) +{ + return page->ptl; +} +#else /* ALLOC_SPLIT_PTLOCKS */ +static inline void ptlock_cache_init(void) +{ +} + +static inline bool ptlock_alloc(struct page *page) +{ + return true; +} + +static inline void ptlock_free(struct page *page) +{ +} + +static inline spinlock_t *ptlock_ptr(struct page *page) +{ + return &page->ptl; +} +#endif /* ALLOC_SPLIT_PTLOCKS */ + +static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return ptlock_ptr(pmd_page(*pmd)); +} + +static inline bool ptlock_init(struct page *page) +{ + /* + * prep_new_page() initialize page->private (and therefore page->ptl) + * with 0. Make sure nobody took it in use in between. + * + * It can happen if arch try to use slab for page table allocation: + * slab code uses page->slab_cache and page->first_page (for tail + * pages), which share storage with page->ptl. + */ + VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); + if (!ptlock_alloc(page)) + return false; + spin_lock_init(ptlock_ptr(page)); + return true; +} + +/* Reset page->mapping so free_pages_check won't complain. */ +static inline void pte_lock_deinit(struct page *page) +{ + page->mapping = NULL; + ptlock_free(page); +} + +#else /* !USE_SPLIT_PTE_PTLOCKS */ /* * We use mm->page_table_lock to guard all pagetable pages of the mm. */ -#define pte_lock_init(page) do {} while (0) -#define pte_lock_deinit(page) do {} while (0) -#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) -#endif /* USE_SPLIT_PTLOCKS */ +static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return &mm->page_table_lock; +} +static inline void ptlock_cache_init(void) {} +static inline bool ptlock_init(struct page *page) { return true; } +static inline void pte_lock_deinit(struct page *page) {} +#endif /* USE_SPLIT_PTE_PTLOCKS */ -static inline void pgtable_page_ctor(struct page *page) +static inline void pgtable_init(void) +{ + ptlock_cache_init(); + pgtable_cache_init(); +} + +static inline bool pgtable_page_ctor(struct page *page) { - pte_lock_init(page); inc_zone_page_state(page, NR_PAGETABLE); + return ptlock_init(page); } static inline void pgtable_page_dtor(struct page *page) @@ -1294,6 +1485,52 @@ static inline void pgtable_page_dtor(struct page *page) ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ NULL: pte_offset_kernel(pmd, address)) +#if USE_SPLIT_PMD_PTLOCKS + +static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return ptlock_ptr(virt_to_page(pmd)); +} + +static inline bool pgtable_pmd_page_ctor(struct page *page) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + page->pmd_huge_pte = NULL; +#endif + return ptlock_init(page); +} + +static inline void pgtable_pmd_page_dtor(struct page *page) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + VM_BUG_ON_PAGE(page->pmd_huge_pte, page); +#endif + ptlock_free(page); +} + +#define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte) + +#else + +static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return &mm->page_table_lock; +} + +static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; } +static inline void pgtable_pmd_page_dtor(struct page *page) {} + +#define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) + +#endif + +static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) +{ + spinlock_t *ptl = pmd_lockptr(mm, pmd); + spin_lock(ptl); + return ptl; +} + extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); @@ -1668,7 +1905,7 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) } #endif -#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE +#ifdef CONFIG_NUMA_BALANCING unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long start, unsigned long end); #endif @@ -1803,8 +2040,6 @@ extern void shake_page(struct page *p, int access); extern atomic_long_t num_poisoned_pages; extern int soft_offline_page(struct page *page, int flags); -extern void dump_page(struct page *page); - #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) extern void clear_huge_page(struct page *page, unsigned long addr, |