summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c61
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/huge_memory.c38
-rw-r--r--mm/hugetlb.c6
-rw-r--r--mm/internal.h3
-rw-r--r--mm/kasan/kasan.c6
-rw-r--r--mm/kasan/quarantine.c29
-rw-r--r--mm/kmemleak.c2
-rw-r--r--mm/memcontrol.c89
-rw-r--r--mm/memory.c34
-rw-r--r--mm/mempool.c12
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mmap.c10
-rw-r--r--mm/oom_kill.c7
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/page_owner.c6
-rw-r--r--mm/percpu.c73
-rw-r--r--mm/rmap.c12
-rw-r--r--mm/shmem.c8
-rw-r--r--mm/slab_common.c4
-rw-r--r--mm/swap.c11
-rw-r--r--mm/workingset.c2
22 files changed, 235 insertions, 188 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 1427366ad673..7bc04778f84d 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -441,25 +441,23 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
/* Found a free page, break it into order-0 pages */
isolated = split_free_page(page);
+ if (!isolated)
+ break;
+
total_isolated += isolated;
+ cc->nr_freepages += isolated;
for (i = 0; i < isolated; i++) {
list_add(&page->lru, freelist);
page++;
}
-
- /* If a page was split, advance to the end of it */
- if (isolated) {
- cc->nr_freepages += isolated;
- if (!strict &&
- cc->nr_migratepages <= cc->nr_freepages) {
- blockpfn += isolated;
- break;
- }
-
- blockpfn += isolated - 1;
- cursor += isolated - 1;
- continue;
+ if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
+ blockpfn += isolated;
+ break;
}
+ /* Advance to the end of split page */
+ blockpfn += isolated - 1;
+ cursor += isolated - 1;
+ continue;
isolate_fail:
if (strict)
@@ -469,6 +467,9 @@ isolate_fail:
}
+ if (locked)
+ spin_unlock_irqrestore(&cc->zone->lock, flags);
+
/*
* There is a tiny chance that we have read bogus compound_order(),
* so be careful to not go outside of the pageblock.
@@ -490,9 +491,6 @@ isolate_fail:
if (strict && blockpfn < end_pfn)
total_isolated = 0;
- if (locked)
- spin_unlock_irqrestore(&cc->zone->lock, flags);
-
/* Update the pageblock-skip if the whole pageblock was scanned */
if (blockpfn == end_pfn)
update_pageblock_skip(cc, valid_page, total_isolated, false);
@@ -1011,7 +1009,6 @@ static void isolate_freepages(struct compact_control *cc)
block_end_pfn = block_start_pfn,
block_start_pfn -= pageblock_nr_pages,
isolate_start_pfn = block_start_pfn) {
-
/*
* This can iterate a massively long zone without finding any
* suitable migration targets, so periodically check if we need
@@ -1035,32 +1032,30 @@ static void isolate_freepages(struct compact_control *cc)
continue;
/* Found a block suitable for isolating free pages from. */
- isolate_freepages_block(cc, &isolate_start_pfn,
- block_end_pfn, freelist, false);
+ isolate_freepages_block(cc, &isolate_start_pfn, block_end_pfn,
+ freelist, false);
/*
- * If we isolated enough freepages, or aborted due to async
- * compaction being contended, terminate the loop.
- * Remember where the free scanner should restart next time,
- * which is where isolate_freepages_block() left off.
- * But if it scanned the whole pageblock, isolate_start_pfn
- * now points at block_end_pfn, which is the start of the next
- * pageblock.
- * In that case we will however want to restart at the start
- * of the previous pageblock.
+ * If we isolated enough freepages, or aborted due to lock
+ * contention, terminate.
*/
if ((cc->nr_freepages >= cc->nr_migratepages)
|| cc->contended) {
- if (isolate_start_pfn >= block_end_pfn)
+ if (isolate_start_pfn >= block_end_pfn) {
+ /*
+ * Restart at previous pageblock if more
+ * freepages can be isolated next time.
+ */
isolate_start_pfn =
block_start_pfn - pageblock_nr_pages;
+ }
break;
- } else {
+ } else if (isolate_start_pfn < block_end_pfn) {
/*
- * isolate_freepages_block() should not terminate
- * prematurely unless contended, or isolated enough
+ * If isolation failed early, do not continue
+ * needlessly.
*/
- VM_BUG_ON(isolate_start_pfn < block_end_pfn);
+ break;
}
}
diff --git a/mm/filemap.c b/mm/filemap.c
index 00ae878b2a38..20f3b1f33f0e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2186,7 +2186,7 @@ repeat:
if (file->f_ra.mmap_miss > 0)
file->f_ra.mmap_miss--;
addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
- do_set_pte(vma, addr, page, pte, false, false, true);
+ do_set_pte(vma, addr, page, pte, false, false);
unlock_page(page);
goto next;
unlock:
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9ed58530f695..343a2b7e57aa 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1624,14 +1624,9 @@ int madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (next - addr != HPAGE_PMD_SIZE) {
get_page(page);
spin_unlock(ptl);
- if (split_huge_page(page)) {
- put_page(page);
- unlock_page(page);
- goto out_unlocked;
- }
+ split_huge_page(page);
put_page(page);
unlock_page(page);
- ret = 1;
goto out_unlocked;
}
@@ -2989,7 +2984,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
}
void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long address, bool freeze)
+ unsigned long address, bool freeze, struct page *page)
{
spinlock_t *ptl;
struct mm_struct *mm = vma->vm_mm;
@@ -2997,8 +2992,17 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE);
ptl = pmd_lock(mm, pmd);
+
+ /*
+ * If caller asks to setup a migration entries, we need a page to check
+ * pmd against. Otherwise we can end up replacing wrong page.
+ */
+ VM_BUG_ON(freeze && !page);
+ if (page && page != pmd_page(*pmd))
+ goto out;
+
if (pmd_trans_huge(*pmd)) {
- struct page *page = pmd_page(*pmd);
+ page = pmd_page(*pmd);
if (PageMlocked(page))
clear_page_mlock(page);
} else if (!pmd_devmap(*pmd))
@@ -3025,24 +3029,8 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
return;
pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd) || (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)))
- return;
- /*
- * If caller asks to setup a migration entries, we need a page to check
- * pmd against. Otherwise we can end up replacing wrong page.
- */
- VM_BUG_ON(freeze && !page);
- if (page && page != pmd_page(*pmd))
- return;
-
- /*
- * Caller holds the mmap_sem write mode or the anon_vma lock,
- * so a huge pmd cannot materialize from under us (khugepaged
- * holds both the mmap_sem write mode and the anon_vma lock
- * write mode).
- */
- __split_huge_pmd(vma, pmd, address, freeze);
+ __split_huge_pmd(vma, pmd, address, freeze, page);
}
void vma_adjust_trans_huge(struct vm_area_struct *vma,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7f0fa87cc66d..cc2a99e9cbc8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1032,6 +1032,7 @@ static void destroy_compound_gigantic_page(struct page *page,
int nr_pages = 1 << order;
struct page *p = page + 1;
+ atomic_set(compound_mapcount_ptr(page), 0);
for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
clear_compound_head(p);
set_page_refcounted(p);
@@ -3384,7 +3385,7 @@ retry_avoidcopy:
/* If no-one else is actually using this page, avoid the copy
* and just make the page writable */
if (page_mapcount(old_page) == 1 && PageAnon(old_page)) {
- page_move_anon_rmap(old_page, vma, address);
+ page_move_anon_rmap(old_page, vma);
set_huge_ptep_writable(vma, address, ptep);
return 0;
}
@@ -4230,7 +4231,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
if (saddr) {
spte = huge_pte_offset(svma->vm_mm, saddr);
if (spte) {
- mm_inc_nr_pmds(mm);
get_page(virt_to_page(spte));
break;
}
@@ -4245,9 +4245,9 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
if (pud_none(*pud)) {
pud_populate(mm, pud,
(pmd_t *)((unsigned long)spte & PAGE_MASK));
+ mm_inc_nr_pmds(mm);
} else {
put_page(virt_to_page(spte));
- mm_inc_nr_pmds(mm);
}
spin_unlock(ptl);
out:
diff --git a/mm/internal.h b/mm/internal.h
index a37e5b6f9d25..2524ec880e24 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -24,7 +24,8 @@
*/
#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
- __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
+ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
+ __GFP_ATOMIC)
/* The GFP flags allowed during early boot */
#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 28439acda6ec..6845f9294696 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -508,7 +508,7 @@ void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
kasan_kmalloc(cache, object, cache->object_size, flags);
}
-void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
+static void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
{
unsigned long size = cache->object_size;
unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
@@ -626,7 +626,7 @@ void kasan_krealloc(const void *object, size_t size, gfp_t flags)
kasan_kmalloc(page->slab_cache, object, size, flags);
}
-void kasan_kfree(void *ptr)
+void kasan_poison_kfree(void *ptr)
{
struct page *page;
@@ -636,7 +636,7 @@ void kasan_kfree(void *ptr)
kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
KASAN_FREE_PAGE);
else
- kasan_slab_free(page->slab_cache, ptr);
+ kasan_poison_slab_free(page->slab_cache, ptr);
}
void kasan_kfree_large(const void *ptr)
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 4973505a9bdd..65793f150d1f 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -238,30 +238,23 @@ static void qlist_move_cache(struct qlist_head *from,
struct qlist_head *to,
struct kmem_cache *cache)
{
- struct qlist_node *prev = NULL, *curr;
+ struct qlist_node *curr;
if (unlikely(qlist_empty(from)))
return;
curr = from->head;
+ qlist_init(from);
while (curr) {
- struct qlist_node *qlink = curr;
- struct kmem_cache *obj_cache = qlink_to_cache(qlink);
-
- if (obj_cache == cache) {
- if (unlikely(from->head == qlink)) {
- from->head = curr->next;
- prev = curr;
- } else
- prev->next = curr->next;
- if (unlikely(from->tail == qlink))
- from->tail = curr->next;
- from->bytes -= cache->size;
- qlist_put(to, qlink, cache->size);
- } else {
- prev = curr;
- }
- curr = curr->next;
+ struct qlist_node *next = curr->next;
+ struct kmem_cache *obj_cache = qlink_to_cache(curr);
+
+ if (obj_cache == cache)
+ qlist_put(to, curr, obj_cache->size);
+ else
+ qlist_put(from, curr, obj_cache->size);
+
+ curr = next;
}
}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index e6429926e957..04320d3adbef 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -307,8 +307,10 @@ static void hex_dump_object(struct seq_file *seq,
len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE);
seq_printf(seq, " hex dump (first %zu bytes):\n", len);
+ kasan_disable_current();
seq_hex_dump(seq, " ", DUMP_PREFIX_NONE, HEX_ROW_SIZE,
HEX_GROUP_SIZE, ptr, len, HEX_ASCII);
+ kasan_enable_current();
}
/*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 75e74408cc8f..5339c89dff63 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4057,6 +4057,60 @@ static struct cftype mem_cgroup_legacy_files[] = {
{ }, /* terminate */
};
+/*
+ * Private memory cgroup IDR
+ *
+ * Swap-out records and page cache shadow entries need to store memcg
+ * references in constrained space, so we maintain an ID space that is
+ * limited to 16 bit (MEM_CGROUP_ID_MAX), limiting the total number of
+ * memory-controlled cgroups to 64k.
+ *
+ * However, there usually are many references to the oflline CSS after
+ * the cgroup has been destroyed, such as page cache or reclaimable
+ * slab objects, that don't need to hang on to the ID. We want to keep
+ * those dead CSS from occupying IDs, or we might quickly exhaust the
+ * relatively small ID space and prevent the creation of new cgroups
+ * even when there are much fewer than 64k cgroups - possibly none.
+ *
+ * Maintain a private 16-bit ID space for memcg, and allow the ID to
+ * be freed and recycled when it's no longer needed, which is usually
+ * when the CSS is offlined.
+ *
+ * The only exception to that are records of swapped out tmpfs/shmem
+ * pages that need to be attributed to live ancestors on swapin. But
+ * those references are manageable from userspace.
+ */
+
+static DEFINE_IDR(mem_cgroup_idr);
+
+static void mem_cgroup_id_get(struct mem_cgroup *memcg)
+{
+ atomic_inc(&memcg->id.ref);
+}
+
+static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+{
+ if (atomic_dec_and_test(&memcg->id.ref)) {
+ idr_remove(&mem_cgroup_idr, memcg->id.id);
+ memcg->id.id = 0;
+
+ /* Memcg ID pins CSS */
+ css_put(&memcg->css);
+ }
+}
+
+/**
+ * mem_cgroup_from_id - look up a memcg from a memcg id
+ * @id: the memcg id to look up
+ *
+ * Caller must hold rcu_read_lock().
+ */
+struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return idr_find(&mem_cgroup_idr, id);
+}
+
static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
{
struct mem_cgroup_per_node *pn;
@@ -4116,6 +4170,12 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (!memcg)
return NULL;
+ memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
+ 1, MEM_CGROUP_ID_MAX,
+ GFP_KERNEL);
+ if (memcg->id.id < 0)
+ goto fail;
+
memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
if (!memcg->stat)
goto fail;
@@ -4142,8 +4202,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
#ifdef CONFIG_CGROUP_WRITEBACK
INIT_LIST_HEAD(&memcg->cgwb_list);
#endif
+ idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
return memcg;
fail:
+ if (memcg->id.id > 0)
+ idr_remove(&mem_cgroup_idr, memcg->id.id);
mem_cgroup_free(memcg);
return NULL;
}
@@ -4203,15 +4266,14 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
return &memcg->css;
fail:
mem_cgroup_free(memcg);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
-static int
-mem_cgroup_css_online(struct cgroup_subsys_state *css)
+static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
{
- if (css->id > MEM_CGROUP_ID_MAX)
- return -ENOSPC;
-
+ /* Online state pins memcg ID, memcg ID pins CSS */
+ mem_cgroup_id_get(mem_cgroup_from_css(css));
+ css_get(css);
return 0;
}
@@ -4234,6 +4296,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
memcg_offline_kmem(memcg);
wb_memcg_offline(memcg);
+
+ mem_cgroup_id_put(memcg);
}
static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
@@ -5544,6 +5608,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
struct mem_cgroup *memcg;
unsigned int nr_pages;
bool compound;
+ unsigned long flags;
VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
@@ -5574,10 +5639,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
commit_charge(newpage, memcg, false);
- local_irq_disable();
+ local_irq_save(flags);
mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
memcg_check_events(memcg, newpage);
- local_irq_enable();
+ local_irq_restore(flags);
}
DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
@@ -5755,6 +5820,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
if (!memcg)
return;
+ mem_cgroup_id_get(memcg);
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
mem_cgroup_swap_statistics(memcg, true);
@@ -5773,6 +5839,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
VM_BUG_ON(!irqs_disabled());
mem_cgroup_charge_statistics(memcg, page, false, -1);
memcg_check_events(memcg, page);
+
+ if (!mem_cgroup_is_root(memcg))
+ css_put(&memcg->css);
}
/*
@@ -5803,11 +5872,11 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
!page_counter_try_charge(&memcg->swap, 1, &counter))
return -ENOMEM;
+ mem_cgroup_id_get(memcg);
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
mem_cgroup_swap_statistics(memcg, true);
- css_get(&memcg->css);
return 0;
}
@@ -5836,7 +5905,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
page_counter_uncharge(&memcg->memsw, 1);
}
mem_cgroup_swap_statistics(memcg, false);
- css_put(&memcg->css);
+ mem_cgroup_id_put(memcg);
}
rcu_read_unlock();
}
diff --git a/mm/memory.c b/mm/memory.c
index 15322b73636b..9e046819e619 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2399,8 +2399,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
* Protected against the rmap code by
* the page lock.
*/
- page_move_anon_rmap(compound_head(old_page),
- vma, address);
+ page_move_anon_rmap(old_page, vma);
}
unlock_page(old_page);
return wp_page_reuse(mm, vma, address, page_table, ptl,
@@ -2877,7 +2876,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
* vm_ops->map_pages.
*/
void do_set_pte(struct vm_area_struct *vma, unsigned long address,
- struct page *page, pte_t *pte, bool write, bool anon, bool old)
+ struct page *page, pte_t *pte, bool write, bool anon)
{
pte_t entry;
@@ -2885,8 +2884,6 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
entry = mk_pte(page, vma->vm_page_prot);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- if (old)
- entry = pte_mkold(entry);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address, false);
@@ -2900,16 +2897,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
update_mmu_cache(vma, address, pte);
}
-/*
- * If architecture emulates "accessed" or "young" bit without HW support,
- * there is no much gain with fault_around.
- */
static unsigned long fault_around_bytes __read_mostly =
-#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
- PAGE_SIZE;
-#else
rounddown_pow_of_two(65536);
-#endif
#ifdef CONFIG_DEBUG_FS
static int fault_around_bytes_get(void *data, u64 *val)
@@ -3032,20 +3021,9 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*/
if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
- if (!pte_same(*pte, orig_pte))
- goto unlock_out;
do_fault_around(vma, address, pte, pgoff, flags);
- /* Check if the fault is handled by faultaround */
- if (!pte_same(*pte, orig_pte)) {
- /*
- * Faultaround produce old pte, but the pte we've
- * handler fault for should be young.
- */
- pte_t entry = pte_mkyoung(*pte);
- if (ptep_set_access_flags(vma, address, pte, entry, 0))
- update_mmu_cache(vma, address, pte);
+ if (!pte_same(*pte, orig_pte))
goto unlock_out;
- }
pte_unmap_unlock(pte, ptl);
}
@@ -3060,7 +3038,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
put_page(fault_page);
return ret;
}
- do_set_pte(vma, address, fault_page, pte, false, false, false);
+ do_set_pte(vma, address, fault_page, pte, false, false);
unlock_page(fault_page);
unlock_out:
pte_unmap_unlock(pte, ptl);
@@ -3111,7 +3089,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
goto uncharge_out;
}
- do_set_pte(vma, address, new_page, pte, true, true, false);
+ do_set_pte(vma, address, new_page, pte, true, true);
mem_cgroup_commit_charge(new_page, memcg, false, false);
lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
@@ -3164,7 +3142,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
put_page(fault_page);
return ret;
}
- do_set_pte(vma, address, fault_page, pte, true, false, false);
+ do_set_pte(vma, address, fault_page, pte, true, false);
pte_unmap_unlock(pte, ptl);
if (set_page_dirty(fault_page))
diff --git a/mm/mempool.c b/mm/mempool.c
index 9e075f829d0d..8f65464da5de 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -104,20 +104,16 @@ static inline void poison_element(mempool_t *pool, void *element)
static void kasan_poison_element(mempool_t *pool, void *element)
{
- if (pool->alloc == mempool_alloc_slab)
- kasan_poison_slab_free(pool->pool_data, element);
- if (pool->alloc == mempool_kmalloc)
- kasan_kfree(element);
+ if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+ kasan_poison_kfree(element);
if (pool->alloc == mempool_alloc_pages)
kasan_free_pages(element, (unsigned long)pool->pool_data);
}
static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags)
{
- if (pool->alloc == mempool_alloc_slab)
- kasan_slab_alloc(pool->pool_data, element, flags);
- if (pool->alloc == mempool_kmalloc)
- kasan_krealloc(element, (size_t)pool->pool_data, flags);
+ if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+ kasan_unpoison_slab(element);
if (pool->alloc == mempool_alloc_pages)
kasan_alloc_pages(element, (unsigned long)pool->pool_data);
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 9baf41c877ff..bd3fdc202e8b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -431,6 +431,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
return MIGRATEPAGE_SUCCESS;
}
+EXPORT_SYMBOL(migrate_page_move_mapping);
/*
* The expected number of remaining references is the same as that
@@ -586,6 +587,7 @@ void migrate_page_copy(struct page *newpage, struct page *page)
mem_cgroup_migrate(page, newpage);
}
+EXPORT_SYMBOL(migrate_page_copy);
/************************************************************
* Migration functions
diff --git a/mm/mmap.c b/mm/mmap.c
index de2c1769cc68..234edffec1d0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2943,9 +2943,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
return ((struct vm_special_mapping *)vma->vm_private_data)->name;
}
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+ struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+ if (sm->mremap)
+ return sm->mremap(sm, new_vma);
+ return 0;
+}
+
static const struct vm_operations_struct special_mapping_vmops = {
.close = special_mapping_close,
.fault = special_mapping_fault,
+ .mremap = special_mapping_mremap,
.name = special_mapping_name,
};
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index acbc432d1a52..ddf74487f848 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -474,13 +474,8 @@ static bool __oom_reap_task(struct task_struct *tsk)
p = find_lock_task_mm(tsk);
if (!p)
goto unlock_oom;
-
mm = p->mm;
- if (!atomic_inc_not_zero(&mm->mm_users)) {
- task_unlock(p);
- goto unlock_oom;
- }
-
+ atomic_inc(&mm->mm_users);
task_unlock(p);
if (!down_read_trylock(&mm->mmap_sem)) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6903b695ebae..8b3e1341b754 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -286,7 +286,9 @@ static inline void reset_deferred_meminit(pg_data_t *pgdat)
/* Returns true if the struct page for the pfn is uninitialised */
static inline bool __meminit early_page_uninitialised(unsigned long pfn)
{
- if (pfn >= NODE_DATA(early_pfn_to_nid(pfn))->first_deferred_pfn)
+ int nid = early_pfn_to_nid(pfn);
+
+ if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn)
return true;
return false;
@@ -1273,7 +1275,7 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
spin_lock(&early_pfn_lock);
nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
if (nid < 0)
- nid = 0;
+ nid = first_online_node;
spin_unlock(&early_pfn_lock);
return nid;
diff --git a/mm/page_owner.c b/mm/page_owner.c
index c6cda3e36212..fedeba88c9cb 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -207,13 +207,15 @@ void __dump_page_owner(struct page *page)
.nr_entries = page_ext->nr_entries,
.entries = &page_ext->trace_entries[0],
};
- gfp_t gfp_mask = page_ext->gfp_mask;
- int mt = gfpflags_to_migratetype(gfp_mask);
+ gfp_t gfp_mask;
+ int mt;
if (unlikely(!page_ext)) {
pr_alert("There is not page extension available.\n");
return;
}
+ gfp_mask = page_ext->gfp_mask;
+ mt = gfpflags_to_migratetype(gfp_mask);
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
pr_alert("page_owner info is not active (free page?)\n");
diff --git a/mm/percpu.c b/mm/percpu.c
index 0c59684f1ff2..9903830aaebb 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -112,7 +112,7 @@ struct pcpu_chunk {
int map_used; /* # of map entries used before the sentry */
int map_alloc; /* # of map entries allocated */
int *map; /* allocation map */
- struct work_struct map_extend_work;/* async ->map[] extension */
+ struct list_head map_extend_list;/* on pcpu_map_extend_chunks */
void *data; /* chunk data */
int first_free; /* no free below this */
@@ -162,10 +162,13 @@ static struct pcpu_chunk *pcpu_reserved_chunk;
static int pcpu_reserved_chunk_limit;
static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */
-static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */
+static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */
static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
+/* chunks which need their map areas extended, protected by pcpu_lock */
+static LIST_HEAD(pcpu_map_extend_chunks);
+
/*
* The number of empty populated pages, protected by pcpu_lock. The
* reserved chunk doesn't contribute to the count.
@@ -395,13 +398,19 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic)
{
int margin, new_alloc;
+ lockdep_assert_held(&pcpu_lock);
+
if (is_atomic) {
margin = 3;
if (chunk->map_alloc <
- chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW &&
- pcpu_async_enabled)
- schedule_work(&chunk->map_extend_work);
+ chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW) {
+ if (list_empty(&chunk->map_extend_list)) {
+ list_add_tail(&chunk->map_extend_list,
+ &pcpu_map_extend_chunks);
+ pcpu_schedule_balance_work();
+ }
+ }
} else {
margin = PCPU_ATOMIC_MAP_MARGIN_HIGH;
}
@@ -435,6 +444,8 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
unsigned long flags;
+ lockdep_assert_held(&pcpu_alloc_mutex);
+
new = pcpu_mem_zalloc(new_size);
if (!new)
return -ENOMEM;
@@ -467,20 +478,6 @@ out_unlock:
return 0;
}
-static void pcpu_map_extend_workfn(struct work_struct *work)
-{
- struct pcpu_chunk *chunk = container_of(work, struct pcpu_chunk,
- map_extend_work);
- int new_alloc;
-
- spin_lock_irq(&pcpu_lock);
- new_alloc = pcpu_need_to_extend(chunk, false);
- spin_unlock_irq(&pcpu_lock);
-
- if (new_alloc)
- pcpu_extend_area_map(chunk, new_alloc);
-}
-
/**
* pcpu_fit_in_area - try to fit the requested allocation in a candidate area
* @chunk: chunk the candidate area belongs to
@@ -740,7 +737,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
chunk->map_used = 1;
INIT_LIST_HEAD(&chunk->list);
- INIT_WORK(&chunk->map_extend_work, pcpu_map_extend_workfn);
+ INIT_LIST_HEAD(&chunk->map_extend_list);
chunk->free_size = pcpu_unit_size;
chunk->contig_hint = pcpu_unit_size;
@@ -895,6 +892,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
return NULL;
}
+ if (!is_atomic)
+ mutex_lock(&pcpu_alloc_mutex);
+
spin_lock_irqsave(&pcpu_lock, flags);
/* serve reserved allocations from the reserved chunk if available */
@@ -967,12 +967,9 @@ restart:
if (is_atomic)
goto fail;
- mutex_lock(&pcpu_alloc_mutex);
-
if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
chunk = pcpu_create_chunk();
if (!chunk) {
- mutex_unlock(&pcpu_alloc_mutex);
err = "failed to allocate new chunk";
goto fail;
}
@@ -983,7 +980,6 @@ restart:
spin_lock_irqsave(&pcpu_lock, flags);
}
- mutex_unlock(&pcpu_alloc_mutex);
goto restart;
area_found:
@@ -993,8 +989,6 @@ area_found:
if (!is_atomic) {
int page_start, page_end, rs, re;
- mutex_lock(&pcpu_alloc_mutex);
-
page_start = PFN_DOWN(off);
page_end = PFN_UP(off + size);
@@ -1005,7 +999,6 @@ area_found:
spin_lock_irqsave(&pcpu_lock, flags);
if (ret) {
- mutex_unlock(&pcpu_alloc_mutex);
pcpu_free_area(chunk, off, &occ_pages);
err = "failed to populate";
goto fail_unlock;
@@ -1045,6 +1038,8 @@ fail:
/* see the flag handling in pcpu_blance_workfn() */
pcpu_atomic_alloc_failed = true;
pcpu_schedule_balance_work();
+ } else {
+ mutex_unlock(&pcpu_alloc_mutex);
}
return NULL;
}
@@ -1129,6 +1124,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
continue;
+ list_del_init(&chunk->map_extend_list);
list_move(&chunk->list, &to_free);
}
@@ -1146,6 +1142,25 @@ static void pcpu_balance_workfn(struct work_struct *work)
pcpu_destroy_chunk(chunk);
}
+ /* service chunks which requested async area map extension */
+ do {
+ int new_alloc = 0;
+
+ spin_lock_irq(&pcpu_lock);
+
+ chunk = list_first_entry_or_null(&pcpu_map_extend_chunks,
+ struct pcpu_chunk, map_extend_list);
+ if (chunk) {
+ list_del_init(&chunk->map_extend_list);
+ new_alloc = pcpu_need_to_extend(chunk, false);
+ }
+
+ spin_unlock_irq(&pcpu_lock);
+
+ if (new_alloc)
+ pcpu_extend_area_map(chunk, new_alloc);
+ } while (chunk);
+
/*
* Ensure there are certain number of free populated pages for
* atomic allocs. Fill up from the most packed so that atomic
@@ -1644,7 +1659,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
*/
schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
INIT_LIST_HEAD(&schunk->list);
- INIT_WORK(&schunk->map_extend_work, pcpu_map_extend_workfn);
+ INIT_LIST_HEAD(&schunk->map_extend_list);
schunk->base_addr = base_addr;
schunk->map = smap;
schunk->map_alloc = ARRAY_SIZE(smap);
@@ -1673,7 +1688,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
if (dyn_size) {
dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
INIT_LIST_HEAD(&dchunk->list);
- INIT_WORK(&dchunk->map_extend_work, pcpu_map_extend_workfn);
+ INIT_LIST_HEAD(&dchunk->map_extend_list);
dchunk->base_addr = base_addr;
dchunk->map = dmap;
dchunk->map_alloc = ARRAY_SIZE(dmap);
diff --git a/mm/rmap.c b/mm/rmap.c
index 0ea5d9071b32..701b93fea2a0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1084,23 +1084,20 @@ EXPORT_SYMBOL_GPL(page_mkclean);
* page_move_anon_rmap - move a page to our anon_vma
* @page: the page to move to our anon_vma
* @vma: the vma the page belongs to
- * @address: the user virtual address mapped
*
* When a page belongs exclusively to one process after a COW event,
* that page can be moved into the anon_vma that belongs to just that
* process, so the rmap code will not search the parent or sibling
* processes.
*/
-void page_move_anon_rmap(struct page *page,
- struct vm_area_struct *vma, unsigned long address)
+void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
+ page = compound_head(page);
+
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_VMA(!anon_vma, vma);
- if (IS_ENABLED(CONFIG_DEBUG_VM) && PageTransHuge(page))
- address &= HPAGE_PMD_MASK;
- VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
/*
@@ -1427,7 +1424,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
goto out;
}
- pte = page_check_address(page, mm, address, &ptl, 0);
+ pte = page_check_address(page, mm, address, &ptl,
+ PageTransCompound(page));
if (!pte)
goto out;
diff --git a/mm/shmem.c b/mm/shmem.c
index a36144909b28..171dee7a131f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2225,9 +2225,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
error = shmem_getpage(inode, index, &page, SGP_FALLOC);
if (error) {
/* Remove the !PageUptodate pages we added */
- shmem_undo_range(inode,
- (loff_t)start << PAGE_SHIFT,
- (loff_t)index << PAGE_SHIFT, true);
+ if (index > start) {
+ shmem_undo_range(inode,
+ (loff_t)start << PAGE_SHIFT,
+ ((loff_t)index << PAGE_SHIFT) - 1, true);
+ }
goto undone;
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index a65dad7fdcd1..82317abb03ed 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -526,8 +526,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
goto out_unlock;
cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
- cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
- css->id, memcg_name_buf);
+ cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
+ css->serial_nr, memcg_name_buf);
if (!cache_name)
goto out_unlock;
diff --git a/mm/swap.c b/mm/swap.c
index 59f5fafa6e1f..90530ff8ed16 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -242,7 +242,7 @@ void rotate_reclaimable_page(struct page *page)
get_page(page);
local_irq_save(flags);
pvec = this_cpu_ptr(&lru_rotate_pvecs);
- if (!pagevec_add(pvec, page))
+ if (!pagevec_add(pvec, page) || PageCompound(page))
pagevec_move_tail(pvec);
local_irq_restore(flags);
}
@@ -296,7 +296,7 @@ void activate_page(struct page *page)
struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
get_page(page);
- if (!pagevec_add(pvec, page))
+ if (!pagevec_add(pvec, page) || PageCompound(page))
pagevec_lru_move_fn(pvec, __activate_page, NULL);
put_cpu_var(activate_page_pvecs);
}
@@ -391,9 +391,8 @@ static void __lru_cache_add(struct page *page)
struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
get_page(page);
- if (!pagevec_space(pvec))
+ if (!pagevec_add(pvec, page) || PageCompound(page))
__pagevec_lru_add(pvec);
- pagevec_add(pvec, page);
put_cpu_var(lru_add_pvec);
}
@@ -628,7 +627,7 @@ void deactivate_file_page(struct page *page)
if (likely(get_page_unless_zero(page))) {
struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
- if (!pagevec_add(pvec, page))
+ if (!pagevec_add(pvec, page) || PageCompound(page))
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
put_cpu_var(lru_deactivate_file_pvecs);
}
@@ -648,7 +647,7 @@ void deactivate_page(struct page *page)
struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
get_page(page);
- if (!pagevec_add(pvec, page))
+ if (!pagevec_add(pvec, page) || PageCompound(page))
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
put_cpu_var(lru_deactivate_pvecs);
}
diff --git a/mm/workingset.c b/mm/workingset.c
index 8a75f8d2916a..577277546d98 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -491,7 +491,7 @@ static int __init workingset_init(void)
max_order = fls_long(totalram_pages - 1);
if (max_order > timestamp_bits)
bucket_order = max_order - timestamp_bits;
- printk("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
+ pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
timestamp_bits, max_order, bucket_order);
ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);