diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 24 | ||||
-rw-r--r-- | mm/compaction.c | 98 | ||||
-rw-r--r-- | mm/huge_memory.c | 19 | ||||
-rw-r--r-- | mm/hugetlb.c | 1 | ||||
-rw-r--r-- | mm/internal.h | 1 | ||||
-rw-r--r-- | mm/memblock.c | 3 | ||||
-rw-r--r-- | mm/memcontrol.c | 4 | ||||
-rw-r--r-- | mm/migrate.c | 18 | ||||
-rw-r--r-- | mm/mlock.c | 6 | ||||
-rw-r--r-- | mm/mmap.c | 5 | ||||
-rw-r--r-- | mm/mremap.c | 1 | ||||
-rw-r--r-- | mm/nommu.c | 1 | ||||
-rw-r--r-- | mm/page-writeback.c | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 57 |
14 files changed, 104 insertions, 135 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index 1324cd74faec..b93376c39b61 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -185,10 +185,23 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) while (start < end) { unsigned long *map, idx, vec; + unsigned shift; map = bdata->node_bootmem_map; idx = start - bdata->node_min_pfn; + shift = idx & (BITS_PER_LONG - 1); + /* + * vec holds at most BITS_PER_LONG map bits, + * bit 0 corresponds to start. + */ vec = ~map[idx / BITS_PER_LONG]; + + if (shift) { + vec >>= shift; + if (end - start >= BITS_PER_LONG) + vec |= ~map[idx / BITS_PER_LONG + 1] << + (BITS_PER_LONG - shift); + } /* * If we have a properly aligned and fully unreserved * BITS_PER_LONG block of pages in front of us, free @@ -201,19 +214,18 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) count += BITS_PER_LONG; start += BITS_PER_LONG; } else { - unsigned long off = 0; + unsigned long cur = start; - vec >>= start & (BITS_PER_LONG - 1); - while (vec) { + start = ALIGN(start + 1, BITS_PER_LONG); + while (vec && cur != start) { if (vec & 1) { - page = pfn_to_page(start + off); + page = pfn_to_page(cur); __free_pages_bootmem(page, 0); count++; } vec >>= 1; - off++; + ++cur; } - start = ALIGN(start + 1, BITS_PER_LONG); } } diff --git a/mm/compaction.c b/mm/compaction.c index 6b807e466497..c62bd063d766 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -816,6 +816,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, static int compact_finished(struct zone *zone, struct compact_control *cc) { + unsigned int order; unsigned long watermark; if (fatal_signal_pending(current)) @@ -850,22 +851,16 @@ static int compact_finished(struct zone *zone, return COMPACT_CONTINUE; /* Direct compactor: Is a suitable page free? */ - if (cc->page) { - /* Was a suitable page captured? */ - if (*cc->page) + for (order = cc->order; order < MAX_ORDER; order++) { + struct free_area *area = &zone->free_area[order]; + + /* Job done if page is free of the right migratetype */ + if (!list_empty(&area->free_list[cc->migratetype])) + return COMPACT_PARTIAL; + + /* Job done if allocation would set block type */ + if (cc->order >= pageblock_order && area->nr_free) return COMPACT_PARTIAL; - } else { - unsigned int order; - for (order = cc->order; order < MAX_ORDER; order++) { - struct free_area *area = &zone->free_area[cc->order]; - /* Job done if page is free of the right migratetype */ - if (!list_empty(&area->free_list[cc->migratetype])) - return COMPACT_PARTIAL; - - /* Job done if allocation would set block type */ - if (cc->order >= pageblock_order && area->nr_free) - return COMPACT_PARTIAL; - } } return COMPACT_CONTINUE; @@ -921,60 +916,6 @@ unsigned long compaction_suitable(struct zone *zone, int order) return COMPACT_CONTINUE; } -static void compact_capture_page(struct compact_control *cc) -{ - unsigned long flags; - int mtype, mtype_low, mtype_high; - - if (!cc->page || *cc->page) - return; - - /* - * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP - * regardless of the migratetype of the freelist is is captured from. - * This is fine because the order for a high-order MIGRATE_MOVABLE - * allocation is typically at least a pageblock size and overall - * fragmentation is not impaired. Other allocation types must - * capture pages from their own migratelist because otherwise they - * could pollute other pageblocks like MIGRATE_MOVABLE with - * difficult to move pages and making fragmentation worse overall. - */ - if (cc->migratetype == MIGRATE_MOVABLE) { - mtype_low = 0; - mtype_high = MIGRATE_PCPTYPES; - } else { - mtype_low = cc->migratetype; - mtype_high = cc->migratetype + 1; - } - - /* Speculatively examine the free lists without zone lock */ - for (mtype = mtype_low; mtype < mtype_high; mtype++) { - int order; - for (order = cc->order; order < MAX_ORDER; order++) { - struct page *page; - struct free_area *area; - area = &(cc->zone->free_area[order]); - if (list_empty(&area->free_list[mtype])) - continue; - - /* Take the lock and attempt capture of the page */ - if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc)) - return; - if (!list_empty(&area->free_list[mtype])) { - page = list_entry(area->free_list[mtype].next, - struct page, lru); - if (capture_free_page(page, cc->order, mtype)) { - spin_unlock_irqrestore(&cc->zone->lock, - flags); - *cc->page = page; - return; - } - } - spin_unlock_irqrestore(&cc->zone->lock, flags); - } - } -} - static int compact_zone(struct zone *zone, struct compact_control *cc) { int ret; @@ -1054,9 +995,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) goto out; } } - - /* Capture a page now if it is a suitable size */ - compact_capture_page(cc); } out: @@ -1069,8 +1007,7 @@ out: static unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - bool sync, bool *contended, - struct page **page) + bool sync, bool *contended) { unsigned long ret; struct compact_control cc = { @@ -1080,7 +1017,6 @@ static unsigned long compact_zone_order(struct zone *zone, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, .sync = sync, - .page = page, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -1110,7 +1046,7 @@ int sysctl_extfrag_threshold = 500; */ unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended, struct page **page) + bool sync, bool *contended) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); int may_enter_fs = gfp_mask & __GFP_FS; @@ -1136,7 +1072,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, int status; status = compact_zone_order(zone, order, gfp_mask, sync, - contended, page); + contended); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ @@ -1192,7 +1128,6 @@ int compact_pgdat(pg_data_t *pgdat, int order) struct compact_control cc = { .order = order, .sync = false, - .page = NULL, }; return __compact_pgdat(pgdat, &cc); @@ -1203,14 +1138,13 @@ static int compact_node(int nid) struct compact_control cc = { .order = -1, .sync = true, - .page = NULL, }; return __compact_pgdat(NODE_DATA(nid), &cc); } /* Compact all nodes in the system */ -static int compact_nodes(void) +static void compact_nodes(void) { int nid; @@ -1219,8 +1153,6 @@ static int compact_nodes(void) for_each_online_node(nid) compact_node(nid); - - return COMPACT_COMPLETE; } /* The written value is actually unused, all memory is compacted */ @@ -1231,7 +1163,7 @@ int sysctl_compaction_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { if (write) - return compact_nodes(); + compact_nodes(); return 0; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9e894edc7811..b5783d81eda9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1257,6 +1257,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, if (flags & FOLL_WRITE && !pmd_write(*pmd)) goto out; + /* Avoid dumping huge zero page */ + if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) + return ERR_PTR(-EFAULT); + page = pmd_page(*pmd); VM_BUG_ON(!PageHead(page)); if (flags & FOLL_TOUCH) { @@ -1819,9 +1823,19 @@ int split_huge_page(struct page *page) BUG_ON(is_huge_zero_pfn(page_to_pfn(page))); BUG_ON(!PageAnon(page)); - anon_vma = page_lock_anon_vma_read(page); + + /* + * The caller does not necessarily hold an mmap_sem that would prevent + * the anon_vma disappearing so we first we take a reference to it + * and then lock the anon_vma for write. This is similar to + * page_lock_anon_vma_read except the write lock is taken to serialise + * against parallel split or collapse operations. + */ + anon_vma = page_get_anon_vma(page); if (!anon_vma) goto out; + anon_vma_lock_write(anon_vma); + ret = 0; if (!PageCompound(page)) goto out_unlock; @@ -1832,7 +1846,8 @@ int split_huge_page(struct page *page) BUG_ON(PageCompound(page)); out_unlock: - page_unlock_anon_vma_read(anon_vma); + anon_vma_unlock(anon_vma); + put_anon_vma(anon_vma); out: return ret; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4f3ea0b1e57c..546db81820e4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3033,6 +3033,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, if (!huge_pte_none(huge_ptep_get(ptep))) { pte = huge_ptep_get_and_clear(mm, address, ptep); pte = pte_mkhuge(pte_modify(pte, newprot)); + pte = arch_make_huge_pte(pte, vma, NULL, 0); set_huge_pte_at(mm, address, ptep, pte); pages++; } diff --git a/mm/internal.h b/mm/internal.h index d597f94cc205..9ba21100ebf3 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -135,7 +135,6 @@ struct compact_control { int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; bool contended; /* True if a lock was contended */ - struct page **page; /* Page captured of requested size */ }; unsigned long diff --git a/mm/memblock.c b/mm/memblock.c index 625905523c2a..88adc8afb610 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -314,7 +314,8 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type) } this->size += next->size; - memmove(next, next + 1, (type->cnt - (i + 1)) * sizeof(*next)); + /* move forward from next + 1, index of which is i + 2 */ + memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); type->cnt--; } } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 09255ec8159c..fbb60b103e64 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3030,7 +3030,9 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, if (memcg) { s->memcg_params->memcg = memcg; s->memcg_params->root_cache = root_cache; - } + } else + s->memcg_params->is_root_cache = true; + return 0; } diff --git a/mm/migrate.c b/mm/migrate.c index 3b676b0c5c3e..2fd8b4af4744 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -160,8 +160,10 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, if (is_write_migration_entry(entry)) pte = pte_mkwrite(pte); #ifdef CONFIG_HUGETLB_PAGE - if (PageHuge(new)) + if (PageHuge(new)) { pte = pte_mkhuge(pte); + pte = arch_make_huge_pte(pte, vma, new, 0); + } #endif flush_cache_page(vma, addr, pte_pfn(pte)); set_pte_at(mm, addr, ptep, pte); @@ -1679,9 +1681,21 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, page_xchg_last_nid(new_page, page_last_nid(page)); isolated = numamigrate_isolate_page(pgdat, page); - if (!isolated) { + + /* + * Failing to isolate or a GUP pin prevents migration. The expected + * page count is 2. 1 for anonymous pages without a mapping and 1 + * for the callers pin. If the page was isolated, the page will + * need to be put back on the LRU. + */ + if (!isolated || page_count(page) != 2) { count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); put_page(new_page); + if (isolated) { + putback_lru_page(page); + isolated = 0; + goto out; + } goto out_keep_locked; } diff --git a/mm/mlock.c b/mm/mlock.c index f0b9ce572fc7..c9bd528b01d2 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -517,11 +517,11 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) static int do_mlockall(int flags) { struct vm_area_struct * vma, * prev = NULL; - unsigned int def_flags = 0; if (flags & MCL_FUTURE) - def_flags = VM_LOCKED; - current->mm->def_flags = def_flags; + current->mm->def_flags |= VM_LOCKED; + else + current->mm->def_flags &= ~VM_LOCKED; if (flags == MCL_FUTURE) goto out; diff --git a/mm/mmap.c b/mm/mmap.c index f54b235f29a9..09da0b264982 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -32,6 +32,7 @@ #include <linux/khugepaged.h> #include <linux/uprobes.h> #include <linux/rbtree_augmented.h> +#include <linux/sched/sysctl.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -2886,7 +2887,7 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) * The LSB of head.next can't change from under us * because we hold the mm_all_locks_mutex. */ - down_write(&anon_vma->root->rwsem); + down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem); /* * We can safely modify head.next after taking the * anon_vma->root->rwsem. If some other vma in this mm shares @@ -2943,7 +2944,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) * vma in this mm is backed by the same anon_vma or address_space. * * We can take all the locks in random order because the VM code - * taking i_mmap_mutex or anon_vma->mutex outside the mmap_sem never + * taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never * takes more than one of them in a row. Secondly we're protected * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. * diff --git a/mm/mremap.c b/mm/mremap.c index e1031e1f6a61..f9766f460299 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -19,6 +19,7 @@ #include <linux/security.h> #include <linux/syscalls.h> #include <linux/mmu_notifier.h> +#include <linux/sched/sysctl.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> diff --git a/mm/nommu.c b/mm/nommu.c index 79c3cac87afa..b20db4e22263 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -29,6 +29,7 @@ #include <linux/security.h> #include <linux/syscalls.h> #include <linux/audit.h> +#include <linux/sched/sysctl.h> #include <asm/uaccess.h> #include <asm/tlb.h> diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0713bfbf0954..66a0024becd9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -35,6 +35,7 @@ #include <linux/buffer_head.h> /* __set_page_dirty_buffers */ #include <linux/pagevec.h> #include <linux/timer.h> +#include <linux/sched/rt.h> #include <trace/events/writeback.h> /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bc6cc0e913bd..d1107adf174a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -58,6 +58,7 @@ #include <linux/prefetch.h> #include <linux/migrate.h> #include <linux/page-debug-flags.h> +#include <linux/sched/rt.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -773,6 +774,10 @@ void __init init_cma_reserved_pageblock(struct page *page) set_pageblock_migratetype(page, MIGRATE_CMA); __free_pages(page, pageblock_order); totalram_pages += pageblock_nr_pages; +#ifdef CONFIG_HIGHMEM + if (PageHighMem(page)) + totalhigh_pages += pageblock_nr_pages; +#endif } #endif @@ -1384,14 +1389,8 @@ void split_page(struct page *page, unsigned int order) set_page_refcounted(page + i); } -/* - * Similar to the split_page family of functions except that the page - * required at the given order and being isolated now to prevent races - * with parallel allocators - */ -int capture_free_page(struct page *page, int alloc_order, int migratetype) +static int __isolate_free_page(struct page *page, unsigned int order) { - unsigned int order; unsigned long watermark; struct zone *zone; int mt; @@ -1399,7 +1398,6 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) BUG_ON(!PageBuddy(page)); zone = page_zone(page); - order = page_order(page); mt = get_pageblock_migratetype(page); if (mt != MIGRATE_ISOLATE) { @@ -1408,7 +1406,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) return 0; - __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); + __mod_zone_freepage_state(zone, -(1UL << order), mt); } /* Remove page from free list */ @@ -1416,11 +1414,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) zone->free_area[order].nr_free--; rmv_page_order(page); - if (alloc_order != order) - expand(zone, page, alloc_order, order, - &zone->free_area[order], migratetype); - - /* Set the pageblock if the captured page is at least a pageblock */ + /* Set the pageblock if the isolated page is at least a pageblock */ if (order >= pageblock_order - 1) { struct page *endpage = page + (1 << order) - 1; for (; page < endpage; page += pageblock_nr_pages) { @@ -1431,7 +1425,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) } } - return 1UL << alloc_order; + return 1UL << order; } /* @@ -1449,10 +1443,9 @@ int split_free_page(struct page *page) unsigned int order; int nr_pages; - BUG_ON(!PageBuddy(page)); order = page_order(page); - nr_pages = capture_free_page(page, order, 0); + nr_pages = __isolate_free_page(page, order); if (!nr_pages) return 0; @@ -2136,8 +2129,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, bool *contended_compaction, bool *deferred_compaction, unsigned long *did_some_progress) { - struct page *page = NULL; - if (!order) return NULL; @@ -2149,16 +2140,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, current->flags |= PF_MEMALLOC; *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, nodemask, sync_migration, - contended_compaction, &page); + contended_compaction); current->flags &= ~PF_MEMALLOC; - /* If compaction captured a page, prep and use it */ - if (page) { - prep_new_page(page, order, gfp_mask); - goto got_page; - } - if (*did_some_progress != COMPACT_SKIPPED) { + struct page *page; + /* Page migration frees to the PCP lists but we want merging */ drain_pages(get_cpu()); put_cpu(); @@ -2168,7 +2155,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, alloc_flags & ~ALLOC_NO_WATERMARKS, preferred_zone, migratetype); if (page) { -got_page: preferred_zone->compact_blockskip_flush = false; preferred_zone->compact_considered = 0; preferred_zone->compact_defer_shift = 0; @@ -4435,10 +4421,11 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, * round what is now in bits to nearest long in bits, then return it in * bytes. */ -static unsigned long __init usemap_size(unsigned long zonesize) +static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize) { unsigned long usemapsize; + zonesize += zone_start_pfn & (pageblock_nr_pages-1); usemapsize = roundup(zonesize, pageblock_nr_pages); usemapsize = usemapsize >> pageblock_order; usemapsize *= NR_PAGEBLOCK_BITS; @@ -4448,17 +4435,19 @@ static unsigned long __init usemap_size(unsigned long zonesize) } static void __init setup_usemap(struct pglist_data *pgdat, - struct zone *zone, unsigned long zonesize) + struct zone *zone, + unsigned long zone_start_pfn, + unsigned long zonesize) { - unsigned long usemapsize = usemap_size(zonesize); + unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize); zone->pageblock_flags = NULL; if (usemapsize) zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, usemapsize); } #else -static inline void setup_usemap(struct pglist_data *pgdat, - struct zone *zone, unsigned long zonesize) {} +static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, + unsigned long zone_start_pfn, unsigned long zonesize) {} #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE @@ -4609,7 +4598,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, continue; set_pageblock_order(); - setup_usemap(pgdat, zone, size); + setup_usemap(pgdat, zone, zone_start_pfn, size); ret = init_currently_empty_zone(zone, zone_start_pfn, size, MEMMAP_EARLY); BUG_ON(ret); @@ -5604,7 +5593,7 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) pfn &= (PAGES_PER_SECTION-1); return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; #else - pfn = pfn - zone->zone_start_pfn; + pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages); return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; #endif /* CONFIG_SPARSEMEM */ } |