diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-09-19 18:18:37 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-09-19 18:18:37 -0700 |
commit | 325d0eab4f31c6240b59d5b2b8042c88f59405b5 (patch) | |
tree | 9b8db5a866fea994d5c64eca2955b17543e2e6fe | |
parent | c8d1a46f943877c08d1154a6f90f43a245a671cf (diff) | |
parent | 2645d432051cd4e6c04ee8af23be07c92f1f52a2 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton:
"15 patches.
Subsystems affected by this patch series: mailmap, mm/hotfixes,
mm/thp, mm/memory-hotplug, misc, kcsan"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
kcsan: kconfig: move to menu 'Generic Kernel Debugging Instruments'
fs/fs-writeback.c: adjust dirtytime_interval_handler definition to match prototype
stackleak: let stack_erasing_sysctl take a kernel pointer buffer
ftrace: let ftrace_enable_sysctl take a kernel pointer buffer
mm/memory_hotplug: drain per-cpu pages again during memory offline
selftests/vm: fix display of page size in map_hugetlb
mm/thp: fix __split_huge_pmd_locked() for migration PMD
kprobes: fix kill kprobe which has been marked as gone
tmpfs: restore functionality of nr_inodes=0
mlock: fix unevictable_pgs event counts on THP
mm: fix check_move_unevictable_pages() on THP
mm: migration of hugetlbfs page skip memcg
ksm: reinstate memcg charge on copied pages
mailmap: add older email addresses for Kees Cook
-rw-r--r-- | .mailmap | 4 | ||||
-rw-r--r-- | fs/fs-writeback.c | 2 | ||||
-rw-r--r-- | include/linux/ftrace.h | 3 | ||||
-rw-r--r-- | include/linux/stackleak.h | 2 | ||||
-rw-r--r-- | kernel/kprobes.c | 9 | ||||
-rw-r--r-- | kernel/stackleak.c | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 3 | ||||
-rw-r--r-- | lib/Kconfig.debug | 4 | ||||
-rw-r--r-- | mm/huge_memory.c | 42 | ||||
-rw-r--r-- | mm/ksm.c | 4 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 14 | ||||
-rw-r--r-- | mm/migrate.c | 3 | ||||
-rw-r--r-- | mm/mlock.c | 24 | ||||
-rw-r--r-- | mm/page_isolation.c | 8 | ||||
-rw-r--r-- | mm/shmem.c | 10 | ||||
-rw-r--r-- | mm/swap.c | 6 | ||||
-rw-r--r-- | mm/vmscan.c | 10 | ||||
-rw-r--r-- | tools/testing/selftests/vm/map_hugetlb.c | 2 |
18 files changed, 102 insertions, 50 deletions
@@ -169,6 +169,10 @@ Juha Yrjola <juha.yrjola@solidboot.com> Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com> Kamil Konieczny <k.konieczny@samsung.com> <k.konieczny@partner.samsung.com> Kay Sievers <kay.sievers@vrfy.org> +Kees Cook <keescook@chromium.org> <kees.cook@canonical.com> +Kees Cook <keescook@chromium.org> <keescook@google.com> +Kees Cook <keescook@chromium.org> <kees@outflux.net> +Kees Cook <keescook@chromium.org> <kees@ubuntu.com> Kenneth W Chen <kenneth.w.chen@intel.com> Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru> Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 149227160ff0..58b27e4070a3 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2184,7 +2184,7 @@ static int __init start_dirtytime_writeback(void) __initcall(start_dirtytime_writeback); int dirtytime_interval_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ce2c06f72e86..e5c2d5cc6e6a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -85,8 +85,7 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val extern int ftrace_enabled; extern int ftrace_enable_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); struct ftrace_ops; diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index 3d5c3271a9a8..a59db2f08e76 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -25,7 +25,7 @@ static inline void stackleak_task_init(struct task_struct *t) #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE int stack_erasing_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); #endif #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 287b263c9cb9..049da84e1952 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2140,6 +2140,9 @@ static void kill_kprobe(struct kprobe *p) lockdep_assert_held(&kprobe_mutex); + if (WARN_ON_ONCE(kprobe_gone(p))) + return; + p->flags |= KPROBE_FLAG_GONE; if (kprobe_aggrprobe(p)) { /* @@ -2419,7 +2422,10 @@ static int kprobes_module_callback(struct notifier_block *nb, mutex_lock(&kprobe_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry(p, head, hlist) + hlist_for_each_entry(p, head, hlist) { + if (kprobe_gone(p)) + continue; + if (within_module_init((unsigned long)p->addr, mod) || (checkcore && within_module_core((unsigned long)p->addr, mod))) { @@ -2436,6 +2442,7 @@ static int kprobes_module_callback(struct notifier_block *nb, */ kill_kprobe(p); } + } } if (val == MODULE_STATE_GOING) remove_module_kprobe_blacklist(mod); diff --git a/kernel/stackleak.c b/kernel/stackleak.c index a8fc9ae1d03d..ce161a8e8d97 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -20,7 +20,7 @@ static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass); int stack_erasing_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; int state = !static_branch_unlikely(&stack_erasing_bypass); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 275441254bb5..e9fa580f3083 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7531,8 +7531,7 @@ static bool is_permanent_ops_registered(void) int ftrace_enable_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = -ENODEV; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e068c3c7189a..0c781f912f9f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -520,8 +520,8 @@ config DEBUG_FS_ALLOW_NONE endchoice source "lib/Kconfig.kgdb" - source "lib/Kconfig.ubsan" +source "lib/Kconfig.kcsan" endmenu @@ -1620,8 +1620,6 @@ config PROVIDE_OHCI1394_DMA_INIT source "samples/Kconfig" -source "lib/Kconfig.kcsan" - config ARCH_HAS_DEVMEM_IS_ALLOWED bool diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7ff29cc3d55c..faadc449cca5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2022,7 +2022,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, put_page(page); add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); return; - } else if (is_huge_zero_pmd(*pmd)) { + } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside @@ -2116,30 +2116,34 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, pte = pte_offset_map(&_pmd, addr); BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, entry); - atomic_inc(&page[i]._mapcount); - pte_unmap(pte); - } - - /* - * Set PG_double_map before dropping compound_mapcount to avoid - * false-negative page_mapped(). - */ - if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) { - for (i = 0; i < HPAGE_PMD_NR; i++) + if (!pmd_migration) atomic_inc(&page[i]._mapcount); + pte_unmap(pte); } - lock_page_memcg(page); - if (atomic_add_negative(-1, compound_mapcount_ptr(page))) { - /* Last compound_mapcount is gone. */ - __dec_lruvec_page_state(page, NR_ANON_THPS); - if (TestClearPageDoubleMap(page)) { - /* No need in mapcount reference anymore */ + if (!pmd_migration) { + /* + * Set PG_double_map before dropping compound_mapcount to avoid + * false-negative page_mapped(). + */ + if (compound_mapcount(page) > 1 && + !TestSetPageDoubleMap(page)) { for (i = 0; i < HPAGE_PMD_NR; i++) - atomic_dec(&page[i]._mapcount); + atomic_inc(&page[i]._mapcount); + } + + lock_page_memcg(page); + if (atomic_add_negative(-1, compound_mapcount_ptr(page))) { + /* Last compound_mapcount is gone. */ + __dec_lruvec_page_state(page, NR_ANON_THPS); + if (TestClearPageDoubleMap(page)) { + /* No need in mapcount reference anymore */ + for (i = 0; i < HPAGE_PMD_NR; i++) + atomic_dec(&page[i]._mapcount); + } } + unlock_page_memcg(page); } - unlock_page_memcg(page); smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); @@ -2586,6 +2586,10 @@ struct page *ksm_might_need_to_copy(struct page *page, return page; /* let do_swap_page report the error */ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); + if (new_page && mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL)) { + put_page(new_page); + new_page = NULL; + } if (new_page) { copy_user_highpage(new_page, page, address, vma); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e9d5ab5d3ca0..b11a269e2356 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1575,6 +1575,20 @@ static int __ref __offline_pages(unsigned long start_pfn, /* check again */ ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL, check_pages_isolated_cb); + /* + * per-cpu pages are drained in start_isolate_page_range, but if + * there are still pages that are not free, make sure that we + * drain again, because when we isolated range we might + * have raced with another thread that was adding pages to pcp + * list. + * + * Forward progress should be still guaranteed because + * pages on the pcp list can only belong to MOVABLE_ZONE + * because has_unmovable_pages explicitly checks for + * PageBuddy on freed pages on other zones. + */ + if (ret) + drain_all_pages(zone); } while (ret); /* Ok, all of our target is isolated. diff --git a/mm/migrate.c b/mm/migrate.c index 941b89383cf3..aecb1433cf3c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -668,7 +668,8 @@ void migrate_page_states(struct page *newpage, struct page *page) copy_page_owner(page, newpage); - mem_cgroup_migrate(page, newpage); + if (!PageHuge(page)) + mem_cgroup_migrate(page, newpage); } EXPORT_SYMBOL(migrate_page_states); diff --git a/mm/mlock.c b/mm/mlock.c index 93ca2bf30b4f..884b1216da6a 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -58,11 +58,14 @@ EXPORT_SYMBOL(can_do_mlock); */ void clear_page_mlock(struct page *page) { + int nr_pages; + if (!TestClearPageMlocked(page)) return; - mod_zone_page_state(page_zone(page), NR_MLOCK, -thp_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGCLEARED); + nr_pages = thp_nr_pages(page); + mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); + count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages); /* * The previous TestClearPageMlocked() corresponds to the smp_mb() * in __pagevec_lru_add_fn(). @@ -76,7 +79,7 @@ void clear_page_mlock(struct page *page) * We lost the race. the page already moved to evictable list. */ if (PageUnevictable(page)) - count_vm_event(UNEVICTABLE_PGSTRANDED); + count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); } } @@ -93,9 +96,10 @@ void mlock_vma_page(struct page *page) VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page); if (!TestSetPageMlocked(page)) { - mod_zone_page_state(page_zone(page), NR_MLOCK, - thp_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGMLOCKED); + int nr_pages = thp_nr_pages(page); + + mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); + count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); if (!isolate_lru_page(page)) putback_lru_page(page); } @@ -138,7 +142,7 @@ static void __munlock_isolated_page(struct page *page) /* Did try_to_unlock() succeed or punt? */ if (!PageMlocked(page)) - count_vm_event(UNEVICTABLE_PGMUNLOCKED); + count_vm_events(UNEVICTABLE_PGMUNLOCKED, thp_nr_pages(page)); putback_lru_page(page); } @@ -154,10 +158,12 @@ static void __munlock_isolated_page(struct page *page) */ static void __munlock_isolation_failed(struct page *page) { + int nr_pages = thp_nr_pages(page); + if (PageUnevictable(page)) - __count_vm_event(UNEVICTABLE_PGSTRANDED); + __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); else - __count_vm_event(UNEVICTABLE_PGMUNLOCKED); + __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages); } /** diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 242c03121d73..63a3db10a8c0 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -170,6 +170,14 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * pageblocks we may have modified and return -EBUSY to caller. This * prevents two threads from simultaneously working on overlapping ranges. * + * Please note that there is no strong synchronization with the page allocator + * either. Pages might be freed while their page blocks are marked ISOLATED. + * In some cases pages might still end up on pcp lists and that would allow + * for their allocation even when they are in fact isolated already. Depending + * on how strong of a guarantee the caller needs drain_all_pages might be needed + * (e.g. __offline_pages will need to call it after check for isolated range for + * a next retry). + * * Return: the number of isolated pageblocks on success and -EBUSY if any part * of range cannot be isolated. */ diff --git a/mm/shmem.c b/mm/shmem.c index 271548ca20f3..8e2b35ba93ad 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -279,11 +279,13 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) if (!(sb->s_flags & SB_KERNMOUNT)) { spin_lock(&sbinfo->stat_lock); - if (!sbinfo->free_inodes) { - spin_unlock(&sbinfo->stat_lock); - return -ENOSPC; + if (sbinfo->max_inodes) { + if (!sbinfo->free_inodes) { + spin_unlock(&sbinfo->stat_lock); + return -ENOSPC; + } + sbinfo->free_inodes--; } - sbinfo->free_inodes--; if (inop) { ino = sbinfo->next_ino++; if (unlikely(is_zero_ino(ino))) diff --git a/mm/swap.c b/mm/swap.c index d16d65d9b4e0..e7bdf094f76a 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -494,14 +494,14 @@ void lru_cache_add_inactive_or_unevictable(struct page *page, unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED; if (unlikely(unevictable) && !TestSetPageMlocked(page)) { + int nr_pages = thp_nr_pages(page); /* * We use the irq-unsafe __mod_zone_page_stat because this * counter is not modified from interrupt context, and the pte * lock is held(spinlock), which implies preemption disabled. */ - __mod_zone_page_state(page_zone(page), NR_MLOCK, - thp_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGMLOCKED); + __mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); + count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); } lru_cache_add(page); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 9727dd8e2581..466fc3144fff 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4268,8 +4268,14 @@ void check_move_unevictable_pages(struct pagevec *pvec) for (i = 0; i < pvec->nr; i++) { struct page *page = pvec->pages[i]; struct pglist_data *pagepgdat = page_pgdat(page); + int nr_pages; + + if (PageTransTail(page)) + continue; + + nr_pages = thp_nr_pages(page); + pgscanned += nr_pages; - pgscanned++; if (pagepgdat != pgdat) { if (pgdat) spin_unlock_irq(&pgdat->lru_lock); @@ -4288,7 +4294,7 @@ void check_move_unevictable_pages(struct pagevec *pvec) ClearPageUnevictable(page); del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); add_page_to_lru_list(page, lruvec, lru); - pgrescued++; + pgrescued += nr_pages; } } diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index 6af951900aa3..312889edb84a 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -83,7 +83,7 @@ int main(int argc, char **argv) } if (shift) - printf("%u kB hugepages\n", 1 << shift); + printf("%u kB hugepages\n", 1 << (shift - 10)); else printf("Default size hugepages\n"); printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20); |