summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-12-25 10:41:37 +0100
committerIngo Molnar <mingo@kernel.org>2019-12-25 10:41:37 +0100
commit1e5f8a308551b9816588e12bb795aeadebe37c4a (patch)
treebd71fc796fed24a3b7cc99df4a1d1bdaecc2b387 /mm
parenta5e37de90e67ac1072a9a44bd0cec9f5e98ded08 (diff)
parent46cf053efec6a3a5f343fead837777efe8252a46 (diff)
Merge tag 'v5.5-rc3' into sched/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/kasan/common.c37
-rw-r--r--mm/ksm.c1
-rw-r--r--mm/memcontrol.c52
-rw-r--r--mm/memory.c146
-rw-r--r--mm/slab_common.c12
-rw-r--r--mm/vmalloc.c129
-rw-r--r--mm/vmscan.c2
-rw-r--r--mm/vmstat.c60
8 files changed, 257 insertions, 182 deletions
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index df3371d5c572..c15d8ae68c96 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -36,6 +36,7 @@
#include <linux/bug.h>
#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include "kasan.h"
@@ -777,15 +778,17 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
return 0;
}
-int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
+int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
{
unsigned long shadow_start, shadow_end;
int ret;
- shadow_start = (unsigned long)kasan_mem_to_shadow(area->addr);
+ if (!is_vmalloc_or_module_addr((void *)addr))
+ return 0;
+
+ shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr);
shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE);
- shadow_end = (unsigned long)kasan_mem_to_shadow(area->addr +
- area->size);
+ shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size);
shadow_end = ALIGN(shadow_end, PAGE_SIZE);
ret = apply_to_page_range(&init_mm, shadow_start,
@@ -796,10 +799,6 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
flush_cache_vmap(shadow_start, shadow_end);
- kasan_unpoison_shadow(area->addr, requested_size);
-
- area->flags |= VM_KASAN;
-
/*
* We need to be careful about inter-cpu effects here. Consider:
*
@@ -842,12 +841,23 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
* Poison the shadow for a vmalloc region. Called as part of the
* freeing process at the time the region is freed.
*/
-void kasan_poison_vmalloc(void *start, unsigned long size)
+void kasan_poison_vmalloc(const void *start, unsigned long size)
{
+ if (!is_vmalloc_or_module_addr(start))
+ return;
+
size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID);
}
+void kasan_unpoison_vmalloc(const void *start, unsigned long size)
+{
+ if (!is_vmalloc_or_module_addr(start))
+ return;
+
+ kasan_unpoison_shadow(start, size);
+}
+
static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
void *unused)
{
@@ -947,6 +957,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
{
void *shadow_start, *shadow_end;
unsigned long region_start, region_end;
+ unsigned long size;
region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
@@ -969,9 +980,11 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
shadow_end = kasan_mem_to_shadow((void *)region_end);
if (shadow_end > shadow_start) {
- apply_to_page_range(&init_mm, (unsigned long)shadow_start,
- (unsigned long)(shadow_end - shadow_start),
- kasan_depopulate_vmalloc_pte, NULL);
+ size = shadow_end - shadow_start;
+ apply_to_existing_page_range(&init_mm,
+ (unsigned long)shadow_start,
+ size, kasan_depopulate_vmalloc_pte,
+ NULL);
flush_tlb_kernel_range((unsigned long)shadow_start,
(unsigned long)shadow_end);
}
diff --git a/mm/ksm.c b/mm/ksm.c
index 7905934cd3ad..d17c7d57d0d8 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2478,6 +2478,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
return 0;
}
+EXPORT_SYMBOL_GPL(ksm_madvise);
int __ksm_enter(struct mm_struct *mm)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bc01423277c5..c5b5f74cfd4d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -98,14 +98,6 @@ static bool do_memsw_account(void)
return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && do_swap_account;
}
-static const char *const mem_cgroup_lru_names[] = {
- "inactive_anon",
- "active_anon",
- "inactive_file",
- "active_file",
- "unevictable",
-};
-
#define THRESHOLDS_EVENTS_TARGET 128
#define SOFTLIMIT_EVENTS_TARGET 1024
@@ -1421,7 +1413,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
PAGE_SIZE);
for (i = 0; i < NR_LRU_LISTS; i++)
- seq_buf_printf(&s, "%s %llu\n", mem_cgroup_lru_names[i],
+ seq_buf_printf(&s, "%s %llu\n", lru_list_name(i),
(u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
PAGE_SIZE);
@@ -1434,8 +1426,10 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
/* Accumulated memory events */
- seq_buf_printf(&s, "pgfault %lu\n", memcg_events(memcg, PGFAULT));
- seq_buf_printf(&s, "pgmajfault %lu\n", memcg_events(memcg, PGMAJFAULT));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGFAULT),
+ memcg_events(memcg, PGFAULT));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGMAJFAULT),
+ memcg_events(memcg, PGMAJFAULT));
seq_buf_printf(&s, "workingset_refault %lu\n",
memcg_page_state(memcg, WORKINGSET_REFAULT));
@@ -1444,22 +1438,27 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
seq_buf_printf(&s, "workingset_nodereclaim %lu\n",
memcg_page_state(memcg, WORKINGSET_NODERECLAIM));
- seq_buf_printf(&s, "pgrefill %lu\n", memcg_events(memcg, PGREFILL));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGREFILL),
+ memcg_events(memcg, PGREFILL));
seq_buf_printf(&s, "pgscan %lu\n",
memcg_events(memcg, PGSCAN_KSWAPD) +
memcg_events(memcg, PGSCAN_DIRECT));
seq_buf_printf(&s, "pgsteal %lu\n",
memcg_events(memcg, PGSTEAL_KSWAPD) +
memcg_events(memcg, PGSTEAL_DIRECT));
- seq_buf_printf(&s, "pgactivate %lu\n", memcg_events(memcg, PGACTIVATE));
- seq_buf_printf(&s, "pgdeactivate %lu\n", memcg_events(memcg, PGDEACTIVATE));
- seq_buf_printf(&s, "pglazyfree %lu\n", memcg_events(memcg, PGLAZYFREE));
- seq_buf_printf(&s, "pglazyfreed %lu\n", memcg_events(memcg, PGLAZYFREED));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGACTIVATE),
+ memcg_events(memcg, PGACTIVATE));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGDEACTIVATE),
+ memcg_events(memcg, PGDEACTIVATE));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREE),
+ memcg_events(memcg, PGLAZYFREE));
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREED),
+ memcg_events(memcg, PGLAZYFREED));
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- seq_buf_printf(&s, "thp_fault_alloc %lu\n",
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_FAULT_ALLOC),
memcg_events(memcg, THP_FAULT_ALLOC));
- seq_buf_printf(&s, "thp_collapse_alloc %lu\n",
+ seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_COLLAPSE_ALLOC),
memcg_events(memcg, THP_COLLAPSE_ALLOC));
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -3742,13 +3741,6 @@ static const unsigned int memcg1_events[] = {
PGMAJFAULT,
};
-static const char *const memcg1_event_names[] = {
- "pgpgin",
- "pgpgout",
- "pgfault",
- "pgmajfault",
-};
-
static int memcg_stat_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
@@ -3757,7 +3749,6 @@ static int memcg_stat_show(struct seq_file *m, void *v)
unsigned int i;
BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
- BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
@@ -3768,11 +3759,11 @@ static int memcg_stat_show(struct seq_file *m, void *v)
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_printf(m, "%s %lu\n", memcg1_event_names[i],
+ seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]),
memcg_events_local(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++)
- seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i],
+ seq_printf(m, "%s %lu\n", lru_list_name(i),
memcg_page_state_local(memcg, NR_LRU_BASE + i) *
PAGE_SIZE);
@@ -3797,11 +3788,12 @@ static int memcg_stat_show(struct seq_file *m, void *v)
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_printf(m, "total_%s %llu\n", memcg1_event_names[i],
+ seq_printf(m, "total_%s %llu\n",
+ vm_event_name(memcg1_events[i]),
(u64)memcg_events(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++)
- seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i],
+ seq_printf(m, "total_%s %llu\n", lru_list_name(i),
(u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
PAGE_SIZE);
diff --git a/mm/memory.c b/mm/memory.c
index d56883c220f4..1c4be871a237 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -672,7 +672,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
if (pmd_devmap(pmd))
return NULL;
- if (is_zero_pfn(pfn))
+ if (is_huge_zero_pmd(pmd))
return NULL;
if (unlikely(pfn > highest_memmap_pfn))
return NULL;
@@ -2021,26 +2021,34 @@ EXPORT_SYMBOL(vm_iomap_memory);
static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
pte_t *pte;
- int err;
+ int err = 0;
spinlock_t *uninitialized_var(ptl);
- pte = (mm == &init_mm) ?
- pte_alloc_kernel(pmd, addr) :
- pte_alloc_map_lock(mm, pmd, addr, &ptl);
- if (!pte)
- return -ENOMEM;
+ if (create) {
+ pte = (mm == &init_mm) ?
+ pte_alloc_kernel(pmd, addr) :
+ pte_alloc_map_lock(mm, pmd, addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+ } else {
+ pte = (mm == &init_mm) ?
+ pte_offset_kernel(pmd, addr) :
+ pte_offset_map_lock(mm, pmd, addr, &ptl);
+ }
BUG_ON(pmd_huge(*pmd));
arch_enter_lazy_mmu_mode();
do {
- err = fn(pte++, addr, data);
- if (err)
- break;
+ if (create || !pte_none(*pte)) {
+ err = fn(pte++, addr, data);
+ if (err)
+ break;
+ }
} while (addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
@@ -2052,77 +2060,95 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
pmd_t *pmd;
unsigned long next;
- int err;
+ int err = 0;
BUG_ON(pud_huge(*pud));
- pmd = pmd_alloc(mm, pud, addr);
- if (!pmd)
- return -ENOMEM;
+ if (create) {
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
+ } else {
+ pmd = pmd_offset(pud, addr);
+ }
do {
next = pmd_addr_end(addr, end);
- err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
- if (err)
- break;
+ if (create || !pmd_none_or_clear_bad(pmd)) {
+ err = apply_to_pte_range(mm, pmd, addr, next, fn, data,
+ create);
+ if (err)
+ break;
+ }
} while (pmd++, addr = next, addr != end);
return err;
}
static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
pud_t *pud;
unsigned long next;
- int err;
+ int err = 0;
- pud = pud_alloc(mm, p4d, addr);
- if (!pud)
- return -ENOMEM;
+ if (create) {
+ pud = pud_alloc(mm, p4d, addr);
+ if (!pud)
+ return -ENOMEM;
+ } else {
+ pud = pud_offset(p4d, addr);
+ }
do {
next = pud_addr_end(addr, end);
- err = apply_to_pmd_range(mm, pud, addr, next, fn, data);
- if (err)
- break;
+ if (create || !pud_none_or_clear_bad(pud)) {
+ err = apply_to_pmd_range(mm, pud, addr, next, fn, data,
+ create);
+ if (err)
+ break;
+ }
} while (pud++, addr = next, addr != end);
return err;
}
static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
p4d_t *p4d;
unsigned long next;
- int err;
+ int err = 0;
- p4d = p4d_alloc(mm, pgd, addr);
- if (!p4d)
- return -ENOMEM;
+ if (create) {
+ p4d = p4d_alloc(mm, pgd, addr);
+ if (!p4d)
+ return -ENOMEM;
+ } else {
+ p4d = p4d_offset(pgd, addr);
+ }
do {
next = p4d_addr_end(addr, end);
- err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
- if (err)
- break;
+ if (create || !p4d_none_or_clear_bad(p4d)) {
+ err = apply_to_pud_range(mm, p4d, addr, next, fn, data,
+ create);
+ if (err)
+ break;
+ }
} while (p4d++, addr = next, addr != end);
return err;
}
-/*
- * Scan a region of virtual memory, filling in page tables as necessary
- * and calling a provided function on each leaf page table.
- */
-int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
- unsigned long size, pte_fn_t fn, void *data)
+static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long size, pte_fn_t fn,
+ void *data, bool create)
{
pgd_t *pgd;
unsigned long next;
unsigned long end = addr + size;
- int err;
+ int err = 0;
if (WARN_ON(addr >= end))
return -EINVAL;
@@ -2130,16 +2156,42 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
- err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
+ if (!create && pgd_none_or_clear_bad(pgd))
+ continue;
+ err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create);
if (err)
break;
} while (pgd++, addr = next, addr != end);
return err;
}
+
+/*
+ * Scan a region of virtual memory, filling in page tables as necessary
+ * and calling a provided function on each leaf page table.
+ */
+int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long size, pte_fn_t fn, void *data)
+{
+ return __apply_to_page_range(mm, addr, size, fn, data, true);
+}
EXPORT_SYMBOL_GPL(apply_to_page_range);
/*
+ * Scan a region of virtual memory, calling a provided function on
+ * each leaf page table where it exists.
+ *
+ * Unlike apply_to_page_range, this does _not_ fill in page tables
+ * where they are absent.
+ */
+int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long size, pte_fn_t fn, void *data)
+{
+ return __apply_to_page_range(mm, addr, size, fn, data, false);
+}
+EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
+
+/*
* handle_pte_fault chooses page fault handler according to an entry which was
* read non-atomically. Before making any commitment, on those architectures
* or configurations (e.g. i386 with PAE) which might give a mix of unmatched
@@ -4197,19 +4249,11 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
smp_wmb(); /* See comment in __pte_alloc */
ptl = pud_lock(mm, pud);
-#ifndef __ARCH_HAS_4LEVEL_HACK
if (!pud_present(*pud)) {
mm_inc_nr_pmds(mm);
pud_populate(mm, pud, new);
} else /* Another has populated it */
pmd_free(mm, new);
-#else
- if (!pgd_present(*pud)) {
- mm_inc_nr_pmds(mm);
- pgd_populate(mm, pud, new);
- } else /* Another has populated it */
- pmd_free(mm, new);
-#endif /* __ARCH_HAS_4LEVEL_HACK */
spin_unlock(ptl);
return 0;
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 8afa188f6e20..f0ab6d4ceb4c 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -904,6 +904,18 @@ static void flush_memcg_workqueue(struct kmem_cache *s)
* previous workitems on workqueue are processed.
*/
flush_workqueue(memcg_kmem_cache_wq);
+
+ /*
+ * If we're racing with children kmem_cache deactivation, it might
+ * take another rcu grace period to complete their destruction.
+ * At this moment the corresponding percpu_ref_kill() call should be
+ * done, but it might take another rcu grace period to complete
+ * switching to the atomic mode.
+ * Please, note that we check without grabbing the slab_mutex. It's safe
+ * because at this moment the children list can't grow.
+ */
+ if (!list_empty(&s->memcg_params.children))
+ rcu_barrier();
}
#else
static inline int shutdown_memcg_caches(struct kmem_cache *s)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 4d3b3d60d893..e9681dc4aa75 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1062,6 +1062,26 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
}
/*
+ * Free a region of KVA allocated by alloc_vmap_area
+ */
+static void free_vmap_area(struct vmap_area *va)
+{
+ /*
+ * Remove from the busy tree/list.
+ */
+ spin_lock(&vmap_area_lock);
+ unlink_va(va, &vmap_area_root);
+ spin_unlock(&vmap_area_lock);
+
+ /*
+ * Insert/Merge it back to the free tree/list.
+ */
+ spin_lock(&free_vmap_area_lock);
+ merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
+ spin_unlock(&free_vmap_area_lock);
+}
+
+/*
* Allocate a region of KVA of the specified size and alignment, within the
* vstart and vend.
*/
@@ -1073,6 +1093,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
struct vmap_area *va, *pva;
unsigned long addr;
int purged = 0;
+ int ret;
BUG_ON(!size);
BUG_ON(offset_in_page(size));
@@ -1139,6 +1160,7 @@ retry:
va->va_end = addr + size;
va->vm = NULL;
+
spin_lock(&vmap_area_lock);
insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
spin_unlock(&vmap_area_lock);
@@ -1147,6 +1169,12 @@ retry:
BUG_ON(va->va_start < vstart);
BUG_ON(va->va_end > vend);
+ ret = kasan_populate_vmalloc(addr, size);
+ if (ret) {
+ free_vmap_area(va);
+ return ERR_PTR(ret);
+ }
+
return va;
overflow:
@@ -1186,26 +1214,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
/*
- * Free a region of KVA allocated by alloc_vmap_area
- */
-static void free_vmap_area(struct vmap_area *va)
-{
- /*
- * Remove from the busy tree/list.
- */
- spin_lock(&vmap_area_lock);
- unlink_va(va, &vmap_area_root);
- spin_unlock(&vmap_area_lock);
-
- /*
- * Insert/Merge it back to the free tree/list.
- */
- spin_lock(&free_vmap_area_lock);
- merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
- spin_unlock(&free_vmap_area_lock);
-}
-
-/*
* Clear the pagetable entries of a given vmap_area
*/
static void unmap_vmap_area(struct vmap_area *va)
@@ -1771,6 +1779,8 @@ void vm_unmap_ram(const void *mem, unsigned int count)
BUG_ON(addr > VMALLOC_END);
BUG_ON(!PAGE_ALIGNED(addr));
+ kasan_poison_vmalloc(mem, size);
+
if (likely(count <= VMAP_MAX_ALLOC)) {
debug_check_no_locks_freed(mem, size);
vb_free(mem, size);
@@ -1821,6 +1831,9 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
addr = va->va_start;
mem = (void *)addr;
}
+
+ kasan_unpoison_vmalloc(mem, size);
+
if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
vm_unmap_ram(mem, count);
return NULL;
@@ -2075,6 +2088,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
{
struct vmap_area *va;
struct vm_struct *area;
+ unsigned long requested_size = size;
BUG_ON(in_interrupt());
size = PAGE_ALIGN(size);
@@ -2098,23 +2112,9 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
return NULL;
}
- setup_vmalloc_vm(area, va, flags, caller);
+ kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
- /*
- * For KASAN, if we are in vmalloc space, we need to cover the shadow
- * area with real memory. If we come here through VM_ALLOC, this is
- * done by a higher level function that has access to the true size,
- * which might not be a full page.
- *
- * We assume module space comes via VM_ALLOC path.
- */
- if (is_vmalloc_addr(area->addr) && !(area->flags & VM_ALLOC)) {
- if (kasan_populate_vmalloc(area->size, area)) {
- unmap_vmap_area(va);
- kfree(area);
- return NULL;
- }
- }
+ setup_vmalloc_vm(area, va, flags, caller);
return area;
}
@@ -2293,8 +2293,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
- if (area->flags & VM_KASAN)
- kasan_poison_vmalloc(area->addr, area->size);
+ kasan_poison_vmalloc(area->addr, area->size);
vm_remove_mappings(area, deallocate_pages);
@@ -2539,7 +2538,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (!size || (size >> PAGE_SHIFT) > totalram_pages())
goto fail;
- area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
+ area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED |
vm_flags, start, end, node, gfp_mask, caller);
if (!area)
goto fail;
@@ -2548,11 +2547,6 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (!addr)
return NULL;
- if (is_vmalloc_or_module_addr(area->addr)) {
- if (kasan_populate_vmalloc(real_size, area))
- return NULL;
- }
-
/*
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
* flag. It means that vm_struct is not fully initialized.
@@ -3294,7 +3288,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
struct vmap_area **vas, *va;
struct vm_struct **vms;
int area, area2, last_area, term_area;
- unsigned long base, start, size, end, last_end;
+ unsigned long base, start, size, end, last_end, orig_start, orig_end;
bool purged = false;
enum fit_type type;
@@ -3424,6 +3418,15 @@ retry:
spin_unlock(&free_vmap_area_lock);
+ /* populate the kasan shadow space */
+ for (area = 0; area < nr_vms; area++) {
+ if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
+ goto err_free_shadow;
+
+ kasan_unpoison_vmalloc((void *)vas[area]->va_start,
+ sizes[area]);
+ }
+
/* insert all vm's */
spin_lock(&vmap_area_lock);
for (area = 0; area < nr_vms; area++) {
@@ -3434,12 +3437,6 @@ retry:
}
spin_unlock(&vmap_area_lock);
- /* populate the shadow space outside of the lock */
- for (area = 0; area < nr_vms; area++) {
- /* assume success here */
- kasan_populate_vmalloc(sizes[area], vms[area]);
- }
-
kfree(vas);
return vms;
@@ -3451,8 +3448,12 @@ recovery:
* and when pcpu_get_vm_areas() is success.
*/
while (area--) {
- merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
- &free_vmap_area_list);
+ orig_start = vas[area]->va_start;
+ orig_end = vas[area]->va_end;
+ va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
+ &free_vmap_area_list);
+ kasan_release_vmalloc(orig_start, orig_end,
+ va->va_start, va->va_end);
vas[area] = NULL;
}
@@ -3487,6 +3488,28 @@ err_free2:
kfree(vas);
kfree(vms);
return NULL;
+
+err_free_shadow:
+ spin_lock(&free_vmap_area_lock);
+ /*
+ * We release all the vmalloc shadows, even the ones for regions that
+ * hadn't been successfully added. This relies on kasan_release_vmalloc
+ * being able to tolerate this case.
+ */
+ for (area = 0; area < nr_vms; area++) {
+ orig_start = vas[area]->va_start;
+ orig_end = vas[area]->va_end;
+ va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
+ &free_vmap_area_list);
+ kasan_release_vmalloc(orig_start, orig_end,
+ va->va_start, va->va_end);
+ vas[area] = NULL;
+ kfree(vms[area]);
+ }
+ spin_unlock(&free_vmap_area_lock);
+ kfree(vas);
+ kfree(vms);
+ return NULL;
}
/**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 74e8edce83ca..572fb17c6273 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -387,7 +387,7 @@ void register_shrinker_prepared(struct shrinker *shrinker)
{
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
idr_replace(&shrinker_idr, shrinker, shrinker->id);
#endif
diff --git a/mm/vmstat.c b/mm/vmstat.c
index a8222041bd44..78d53378db99 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1084,7 +1084,8 @@ int fragmentation_index(struct zone *zone, unsigned int order)
}
#endif
-#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
+ defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
#ifdef CONFIG_ZONE_DMA
#define TEXT_FOR_DMA(xx) xx "_dma",
#else
@@ -1134,7 +1135,7 @@ const char * const vmstat_text[] = {
"numa_other",
#endif
- /* Node-based counters */
+ /* enum node_stat_item counters */
"nr_inactive_anon",
"nr_active_anon",
"nr_inactive_file",
@@ -1172,7 +1173,7 @@ const char * const vmstat_text[] = {
"nr_dirty_threshold",
"nr_dirty_background_threshold",
-#ifdef CONFIG_VM_EVENT_COUNTERS
+#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
/* enum vm_event_item counters */
"pgpgin",
"pgpgout",
@@ -1291,9 +1292,9 @@ const char * const vmstat_text[] = {
"swap_ra",
"swap_ra_hit",
#endif
-#endif /* CONFIG_VM_EVENTS_COUNTERS */
+#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
};
-#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
+#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
defined(CONFIG_PROC_FS)
@@ -1564,10 +1565,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
if (is_zone_first_populated(pgdat, zone)) {
seq_printf(m, "\n per-node stats");
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
- seq_printf(m, "\n %-12s %lu",
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
- NR_VM_NUMA_STAT_ITEMS],
- node_page_state(pgdat, i));
+ seq_printf(m, "\n %-12s %lu", node_stat_name(i),
+ node_page_state(pgdat, i));
}
}
seq_printf(m,
@@ -1600,14 +1599,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
}
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- seq_printf(m, "\n %-12s %lu", vmstat_text[i],
- zone_page_state(zone, i));
+ seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
+ zone_page_state(zone, i));
#ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
- seq_printf(m, "\n %-12s %lu",
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
- zone_numa_state_snapshot(zone, i));
+ seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
+ zone_numa_state_snapshot(zone, i));
#endif
seq_printf(m, "\n pagesets");
@@ -1658,31 +1656,23 @@ static const struct seq_operations zoneinfo_op = {
.show = zoneinfo_show,
};
-enum writeback_stat_item {
- NR_DIRTY_THRESHOLD,
- NR_DIRTY_BG_THRESHOLD,
- NR_VM_WRITEBACK_STAT_ITEMS,
-};
+#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
+ NR_VM_NUMA_STAT_ITEMS + \
+ NR_VM_NODE_STAT_ITEMS + \
+ NR_VM_WRITEBACK_STAT_ITEMS + \
+ (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
+ NR_VM_EVENT_ITEMS : 0))
static void *vmstat_start(struct seq_file *m, loff_t *pos)
{
unsigned long *v;
- int i, stat_items_size;
+ int i;
- if (*pos >= ARRAY_SIZE(vmstat_text))
+ if (*pos >= NR_VMSTAT_ITEMS)
return NULL;
- stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
- NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
- NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
- NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
-
-#ifdef CONFIG_VM_EVENT_COUNTERS
- stat_items_size += sizeof(struct vm_event_state);
-#endif
- BUILD_BUG_ON(stat_items_size !=
- ARRAY_SIZE(vmstat_text) * sizeof(unsigned long));
- v = kmalloc(stat_items_size, GFP_KERNEL);
+ BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
+ v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
m->private = v;
if (!v)
return ERR_PTR(-ENOMEM);
@@ -1715,7 +1705,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
{
(*pos)++;
- if (*pos >= ARRAY_SIZE(vmstat_text))
+ if (*pos >= NR_VMSTAT_ITEMS)
return NULL;
return (unsigned long *)m->private + *pos;
}
@@ -1781,7 +1771,7 @@ int vmstat_refresh(struct ctl_table *table, int write,
val = atomic_long_read(&vm_zone_stat[i]);
if (val < 0) {
pr_warn("%s: %s %ld\n",
- __func__, vmstat_text[i], val);
+ __func__, zone_stat_name(i), val);
err = -EINVAL;
}
}
@@ -1790,7 +1780,7 @@ int vmstat_refresh(struct ctl_table *table, int write,
val = atomic_long_read(&vm_numa_stat[i]);
if (val < 0) {
pr_warn("%s: %s %ld\n",
- __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
+ __func__, numa_stat_name(i), val);
err = -EINVAL;
}
}