From f630c7c6f10546ebff15c3a856e7949feb7a2372 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 14 Dec 2020 19:03:14 -0800 Subject: kthread: add kthread_work tracepoints While migrating some code from wq to kthread_worker, I found that I missed the execute_start/end tracepoints. So add similar tracepoints for kthread_work. And for completeness, queue_work tracepoint (although this one differs slightly from the matching workqueue tracepoint). Link: https://lkml.kernel.org/r/20201010180323.126634-1-robdclark@gmail.com Signed-off-by: Rob Clark Cc: Rob Clark Cc: Steven Rostedt Cc: Ingo Molnar Cc: "Peter Zijlstra (Intel)" Cc: Phil Auld Cc: Valentin Schneider Cc: Thara Gopinath Cc: Randy Dunlap Cc: Vincent Donnefort Cc: Mel Gorman Cc: Jens Axboe Cc: Marcelo Tosatti Cc: Frederic Weisbecker Cc: Ilias Stamatis Cc: Liang Chen Cc: Ben Dooks Cc: Peter Zijlstra Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kthread.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel') diff --git a/kernel/kthread.c b/kernel/kthread.c index 933a625621b8..34516b0a6eb7 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -704,8 +704,15 @@ repeat: raw_spin_unlock_irq(&worker->lock); if (work) { + kthread_work_func_t func = work->func; __set_current_state(TASK_RUNNING); + trace_sched_kthread_work_execute_start(work); work->func(work); + /* + * Avoid dereferencing work after this point. The trace + * event only cares about the address. + */ + trace_sched_kthread_work_execute_end(work, func); } else if (!freezing(current)) schedule(); @@ -834,6 +841,8 @@ static void kthread_insert_work(struct kthread_worker *worker, { kthread_insert_work_sanity_check(worker, work); + trace_sched_kthread_work_queue_work(worker, work); + list_add_tail(&work->node, pos); work->worker = worker; if (!worker->current_work && likely(worker->task)) -- cgit v1.2.3 From ebb2bdcef8a00d59b27d3532c423110559821e1d Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 14 Dec 2020 19:03:18 -0800 Subject: kthread_worker: document CPU hotplug handling The kthread worker API is simple. In short, it allows to create, use, and destroy workers. kthread_create_worker_on_cpu() just allows to bind a newly created worker to a given CPU. It is up to the API user how to handle CPU hotplug. They have to decide how to handle pending work items, prevent queuing new ones, and restore the functionality when the CPU goes off and on. There are few catches: + The CPU affinity gets lost when it is scheduled on an offline CPU. + The worker might not exist when the CPU was off when the user created the workers. A good practice is to implement two CPU hotplug callbacks and destroy/create the worker when CPU goes down/up. Mention this in the function description. [akpm@linux-foundation.org: grammar tweaks] Link: https://lore.kernel.org/r/20201028073031.4536-1-qiang.zhang@windriver.com Link: https://lkml.kernel.org/r/20201102101039.19227-1-pmladek@suse.com Reported-by: Zhang Qiang Signed-off-by: Petr Mladek Cc: Tejun Heo Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kthread.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kthread.c b/kernel/kthread.c index 34516b0a6eb7..97e053ade74a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -793,7 +793,25 @@ EXPORT_SYMBOL(kthread_create_worker); * A good practice is to add the cpu number also into the worker name. * For example, use kthread_create_worker_on_cpu(cpu, "helper/%d", cpu). * - * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM) + * CPU hotplug: + * The kthread worker API is simple and generic. It just provides a way + * to create, use, and destroy workers. + * + * It is up to the API user how to handle CPU hotplug. They have to decide + * how to handle pending work items, prevent queuing new ones, and + * restore the functionality when the CPU goes off and on. There are a + * few catches: + * + * - CPU affinity gets lost when it is scheduled on an offline CPU. + * + * - The worker might not exist when the CPU was off when the user + * created the workers. + * + * Good practice is to implement two CPU hotplug callbacks and to + * destroy/create the worker when the CPU goes down/up. + * + * Return: + * The pointer to the allocated worker on success, ERR_PTR(-ENOMEM) * when the needed structures could not get allocated, and ERR_PTR(-EINTR) * when the worker was SIGKILLed. */ -- cgit v1.2.3 From 57efa1fe5957694fa541c9062de0a127f0b9acb0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Dec 2020 19:05:44 -0800 Subject: mm/gup: prevent gup_fast from racing with COW during fork Since commit 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes") pages under a FOLL_PIN will not be write protected during COW for fork. This means that pages returned from pin_user_pages(FOLL_WRITE) should not become write protected while the pin is active. However, there is a small race where get_user_pages_fast(FOLL_PIN) can establish a FOLL_PIN at the same time copy_present_page() is write protecting it: CPU 0 CPU 1 get_user_pages_fast() internal_get_user_pages_fast() copy_page_range() pte_alloc_map_lock() copy_present_page() atomic_read(has_pinned) == 0 page_maybe_dma_pinned() == false atomic_set(has_pinned, 1); gup_pgd_range() gup_pte_range() pte_t pte = gup_get_pte(ptep) pte_access_permitted(pte) try_grab_compound_head() pte = pte_wrprotect(pte) set_pte_at(); pte_unmap_unlock() // GUP now returns with a write protected page The first attempt to resolve this by using the write protect caused problems (and was missing a barrrier), see commit f3c64eda3e50 ("mm: avoid early COW write protect games during fork()") Instead wrap copy_p4d_range() with the write side of a seqcount and check the read side around gup_pgd_range(). If there is a collision then get_user_pages_fast() fails and falls back to slow GUP. Slow GUP is safe against this race because copy_page_range() is only called while holding the exclusive side of the mmap_lock on the src mm_struct. [akpm@linux-foundation.org: coding style fixes] Link: https://lore.kernel.org/r/CAHk-=wi=iCnYCARbPGjkVJu9eyYeZ13N64tZYLdOB8CP5Q_PLw@mail.gmail.com Link: https://lkml.kernel.org/r/2-v4-908497cf359a+4782-gup_fork_jgg@nvidia.com Fixes: f3c64eda3e50 ("mm: avoid early COW write protect games during fork()") Signed-off-by: Jason Gunthorpe Suggested-by: Linus Torvalds Reviewed-by: John Hubbard Reviewed-by: Jan Kara Reviewed-by: Peter Xu Acked-by: "Ahmed S. Darwish" [seqcount_t parts] Cc: Andrea Arcangeli Cc: "Aneesh Kumar K.V" Cc: Christoph Hellwig Cc: Hugh Dickins Cc: Jann Horn Cc: Kirill Shutemov Cc: Kirill Tkhai Cc: Leon Romanovsky Cc: Michal Hocko Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 6d266388d380..dc55f68a6ee3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1007,6 +1007,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm->vmacache_seqnum = 0; atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); + seqcount_init(&mm->write_protect_seq); mmap_init_lock(mm); INIT_LIST_HEAD(&mm->mmlist); mm->core_state = NULL; -- cgit v1.2.3 From bef8620cd8e0a117c1a0719604052e424eb418f9 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 14 Dec 2020 19:06:49 -0800 Subject: mm: memcg: deprecate the non-hierarchical mode Patch series "mm: memcg: deprecate cgroup v1 non-hierarchical mode", v1. The non-hierarchical cgroup v1 mode is a legacy of early days of the memory controller and doesn't bring any value today. However, it complicates the code and creates many edge cases all over the memory controller code. It's a good time to deprecate it completely. This patchset removes the internal logic, adjusts the user interface and updates the documentation. The alt patch removes some bits of the cgroup core code, which become obsolete. Michal Hocko said: "All that we know today is that we have a warning in place to complain loudly when somebody relies on use_hierarchy=0 with a deeper hierarchy. For all those years we have seen _zero_ reports that would describe a sensible usecase. Moreover we (SUSE) have backported this warning into old distribution kernels (since 3.0 based kernels) to extend the coverage and didn't hear even for users who adopt new kernels only very slowly. The only report we have seen so far was a LTP test suite which doesn't really reflect any real life usecase" This patch (of 3): The non-hierarchical cgroup v1 mode is a legacy of early days of the memory controller and doesn't bring any value today. However, it complicates the code and creates many edge cases all over the memory controller code. It's a good time to deprecate it completely. Functionally this patch enabled is by default for all cgroups and forbids switching it off. Nothing changes if cgroup v2 is used: hierarchical mode was enforced from scratch. To protect the ABI memory.use_hierarchy interface is preserved with a limited functionality: reading always returns "1", writing of "1" passes silently, writing of any other value fails with -EINVAL and a warning to dmesg (on the first occasion). Link: https://lkml.kernel.org/r/20201110220800.929549-1-guro@fb.com Link: https://lkml.kernel.org/r/20201110220800.929549-2-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Acked-by: David Rientjes Acked-by: Johannes Weiner Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup/cgroup.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e41c21819ba0..80c5c34416e8 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -281,9 +281,6 @@ bool cgroup_ssid_enabled(int ssid) * - cpuset: a task can be moved into an empty cpuset, and again it takes * masks of ancestors. * - * - memcg: use_hierarchy is on by default and the cgroup file for the flag - * is not created. - * * - blkcg: blk-throttle becomes properly hierarchical. * * - debug: disallowed on the default hierarchy. @@ -5156,8 +5153,6 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, cgroup_parent(parent)) { pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", current->comm, current->pid, ss->name); - if (!strcmp(ss->name, "memory")) - pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n"); ss->warned_broken_hierarchy = true; } -- cgit v1.2.3 From 9d9d341df4d519d96e7927941d91f5785c5cea07 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 14 Dec 2020 19:06:55 -0800 Subject: cgroup: remove obsoleted broken_hierarchy and warned_broken_hierarchy With the deprecation of the non-hierarchical mode of the memory controller there are no more examples of broken hierarchies left. Let's remove the cgroup core code which was supposed to print warnings about creating of broken hierarchies. Link: https://lkml.kernel.org/r/20201110220800.929549-4-guro@fb.com Signed-off-by: Roman Gushchin Reviewed-by: Shakeel Butt Acked-by: David Rientjes Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup/cgroup.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 80c5c34416e8..16f4692dc961 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5149,13 +5149,6 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, if (err) goto err_list_del; - if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && - cgroup_parent(parent)) { - pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", - current->comm, current->pid, ss->name); - ss->warned_broken_hierarchy = true; - } - return css; err_list_del: -- cgit v1.2.3 From da3ceeff923e3bc750a8423c840462760c463926 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Mon, 14 Dec 2020 19:07:04 -0800 Subject: mm: memcg/slab: rename *_lruvec_slab_state to *_lruvec_kmem_state The *_lruvec_slab_state is also suitable for pages allocated from buddy, not just for the slab objects. But the function name seems to tell us that only slab object is applicable. So we can rename the keyword of slab to kmem. Link: https://lkml.kernel.org/r/20201117085249.24319-1-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Roman Gushchin Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index dc55f68a6ee3..5e7cc88eadb5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -385,7 +385,7 @@ static void account_kernel_stack(struct task_struct *tsk, int account) mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB, account * (THREAD_SIZE / 1024)); else - mod_lruvec_slab_state(stack, NR_KERNEL_STACK_KB, + mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB, account * (THREAD_SIZE / 1024)); } -- cgit v1.2.3 From d3f5ffcacd1528736471bc78f03f06da6c4551cc Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:07:45 -0800 Subject: mm: cleanup: remove unused tsk arg from __access_remote_vm Despite a comment that said that page fault accounting would be charged to whatever task_struct* was passed into __access_remote_vm(), the tsk argument was actually unused. Making page fault accounting actually use this task struct is quite a project, so there is no point in keeping the tsk argument. Delete both the comment, and the argument. [rppt@linux.ibm.com: changelog addition] Link: https://lkml.kernel.org/r/20201026074137.4147787-1-jhubbard@nvidia.com Signed-off-by: John Hubbard Reviewed-by: Mike Rapoport Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 79de1294f8eb..a77d25c641e9 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -57,7 +57,7 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, return 0; } - ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); + ret = __access_remote_vm(mm, addr, buf, len, gup_flags); mmput(mm); return ret; -- cgit v1.2.3 From e89a85d63fb2e187f5afcbf83c12743132596563 Mon Sep 17 00:00:00 2001 From: Walter Wu Date: Mon, 14 Dec 2020 19:09:09 -0800 Subject: workqueue: kasan: record workqueue stack Patch series "kasan: add workqueue stack for generic KASAN", v5. Syzbot reports many UAF issues for workqueue, see [1]. In some of these access/allocation happened in process_one_work(), we see the free stack is useless in KASAN report, it doesn't help programmers to solve UAF for workqueue issue. This patchset improves KASAN reports by making them to have workqueue queueing stack. It is useful for programmers to solve use-after-free or double-free memory issue. Generic KASAN also records the last two workqueue stacks and prints them in KASAN report. It is only suitable for generic KASAN. [1] https://groups.google.com/g/syzkaller-bugs/search?q=%22use-after-free%22+process_one_work [2] https://bugzilla.kernel.org/show_bug.cgi?id=198437 This patch (of 4): When analyzing use-after-free or double-free issue, recording the enqueuing work stacks is helpful to preserve usage history which potentially gives a hint about the affected code. For workqueue it has turned out to be useful to record the enqueuing work call stacks. Because user can see KASAN report to determine whether it is root cause. They don't need to enable debugobjects, but they have a chance to find out the root cause. Link: https://lkml.kernel.org/r/20201203022148.29754-1-walter-zh.wu@mediatek.com Link: https://lkml.kernel.org/r/20201203022442.30006-1-walter-zh.wu@mediatek.com Signed-off-by: Walter Wu Suggested-by: Marco Elver Acked-by: Marco Elver Acked-by: Tejun Heo Reviewed-by: Dmitry Vyukov Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Lai Jiangshan Cc: Marco Elver Cc: Matthias Brugger Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/workqueue.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 437935e7a199..33608a8c611e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1327,6 +1327,9 @@ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work, { struct worker_pool *pool = pwq->pool; + /* record the work call stack in order to print it in KASAN reports */ + kasan_record_aux_stack(work); + /* we own @work, set data and link */ set_work_pwq(work, pwq, extra_flags); list_add_tail(&work->entry, head); -- cgit v1.2.3 From 2abf962a8d42b32f5ffeb827826290b799c85f86 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:10:25 -0800 Subject: PM: hibernate: make direct map manipulations more explicit When DEBUG_PAGEALLOC or ARCH_HAS_SET_DIRECT_MAP is enabled a page may be not present in the direct map and has to be explicitly mapped before it could be copied. Introduce hibernate_map_page() and hibernation_unmap_page() that will explicitly use set_direct_map_{default,invalid}_noflush() for ARCH_HAS_SET_DIRECT_MAP case and debug_pagealloc_{map,unmap}_pages() for DEBUG_PAGEALLOC case. The remapping of the pages in safe_copy_page() presumes that it only changes protection bits in an existing PTE and so it is safe to ignore return value of set_direct_map_{default,invalid}_noflush(). Still, add a pr_warn() so that future changes in set_memory APIs will not silently break hibernation. Link: https://lkml.kernel.org/r/20201109192128.960-3-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Rafael J. Wysocki Reviewed-by: David Hildenbrand Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: Albert Ou Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Lameter Cc: Dave Hansen Cc: David Rientjes Cc: "David S. Miller" Cc: "Edgecombe, Rick P" Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Len Brown Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pavel Machek Cc: Pekka Enberg Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/snapshot.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 46b1804c1ddf..d848377dd8dc 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -76,6 +76,40 @@ static inline void hibernate_restore_protect_page(void *page_address) {} static inline void hibernate_restore_unprotect_page(void *page_address) {} #endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */ + +/* + * The calls to set_direct_map_*() should not fail because remapping a page + * here means that we only update protection bits in an existing PTE. + * It is still worth to have a warning here if something changes and this + * will no longer be the case. + */ +static inline void hibernate_map_page(struct page *page) +{ + if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) { + int ret = set_direct_map_default_noflush(page); + + if (ret) + pr_warn_once("Failed to remap page\n"); + } else { + debug_pagealloc_map_pages(page, 1); + } +} + +static inline void hibernate_unmap_page(struct page *page) +{ + if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) { + unsigned long addr = (unsigned long)page_address(page); + int ret = set_direct_map_invalid_noflush(page); + + if (ret) + pr_warn_once("Failed to remap page\n"); + + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + } else { + debug_pagealloc_unmap_pages(page, 1); + } +} + static int swsusp_page_is_free(struct page *); static void swsusp_set_page_forbidden(struct page *); static void swsusp_unset_page_forbidden(struct page *); @@ -1355,9 +1389,9 @@ static void safe_copy_page(void *dst, struct page *s_page) if (kernel_page_present(s_page)) { do_copy_page(dst, page_address(s_page)); } else { - kernel_map_pages(s_page, 1, 1); + hibernate_map_page(s_page); do_copy_page(dst, page_address(s_page)); - kernel_map_pages(s_page, 1, 0); + hibernate_unmap_page(s_page); } } -- cgit v1.2.3 From 03b6c9a3e8805606c0bb4ad41855fac3bf85c3b9 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:13:38 -0800 Subject: kernel/power: allow hibernation with page_poison sanity checking Page poisoning used to be incompatible with hibernation, as the state of poisoned pages was lost after resume, thus enabling CONFIG_HIBERNATION forces CONFIG_PAGE_POISONING_NO_SANITY. For the same reason, the poisoning with zeroes variant CONFIG_PAGE_POISONING_ZERO used to disable hibernation. The latter restriction was removed by commit 1ad1410f632d ("PM / Hibernate: allow hibernation with PAGE_POISONING_ZERO") and similarly for init_on_free by commit 18451f9f9e58 ("PM: hibernate: fix crashes with init_on_free=1") by making sure free pages are cleared after resume. We can use the same mechanism to instead poison free pages with PAGE_POISON after resume. This covers both zero and 0xAA patterns. Thus we can remove the Kconfig restriction that disables page poison sanity checking when hibernation is enabled. Link: https://lkml.kernel.org/r/20201113104033.22907-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Rafael J. Wysocki [hibernation] Reviewed-by: David Hildenbrand Cc: Mike Rapoport Cc: Alexander Potapenko Cc: Kees Cook Cc: Laura Abbott Cc: Mateusz Nosek Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/hibernate.c | 2 +- kernel/power/power.h | 2 +- kernel/power/snapshot.c | 14 +++++++++++--- 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 2fc7d509a34f..da0b41914177 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -326,7 +326,7 @@ static int create_image(int platform_mode) if (!in_suspend) { events_check_enabled = false; - clear_free_pages(); + clear_or_poison_free_pages(); } platform_leave(platform_mode); diff --git a/kernel/power/power.h b/kernel/power/power.h index 24f12d534515..778bf431ec02 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -106,7 +106,7 @@ extern int create_basic_memory_bitmaps(void); extern void free_basic_memory_bitmaps(void); extern int hibernate_preallocate_memory(void); -extern void clear_free_pages(void); +extern void clear_or_poison_free_pages(void); /** * Auxiliary structure used for reading the snapshot image data and diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index d848377dd8dc..d63560e1cf87 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1178,7 +1178,15 @@ void free_basic_memory_bitmaps(void) pr_debug("Basic memory bitmaps freed\n"); } -void clear_free_pages(void) +static void clear_or_poison_free_page(struct page *page) +{ + if (page_poisoning_enabled_static()) + __kernel_poison_pages(page, 1); + else if (want_init_on_free()) + clear_highpage(page); +} + +void clear_or_poison_free_pages(void) { struct memory_bitmap *bm = free_pages_map; unsigned long pfn; @@ -1186,12 +1194,12 @@ void clear_free_pages(void) if (WARN_ON(!(free_pages_map))) return; - if (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) || want_init_on_free()) { + if (page_poisoning_enabled() || want_init_on_free()) { memory_bm_position_reset(bm); pfn = memory_bm_next_pfn(bm); while (pfn != BM_END_OF_MAP) { if (pfn_valid(pfn)) - clear_highpage(pfn_to_page(pfn)); + clear_or_poison_free_page(pfn_to_page(pfn)); pfn = memory_bm_next_pfn(bm); } -- cgit v1.2.3