From 12ffc3b1513ebc1f11ae77d053948504a94a68a6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 13 Jun 2025 16:43:44 -0500 Subject: PM: Restrict swap use to later in the suspend sequence Currently swap is restricted before drivers have had a chance to do their prepare() PM callbacks. Restricting swap this early means that if a driver needs to evict some content from memory into sawp in it's prepare callback, it won't be able to. On AMD dGPUs this can lead to failed suspends under memory pressure situations as all VRAM must be evicted to system memory or swap. Move the swap restriction to right after all devices have had a chance to do the prepare() callback. If there is any problem with the sequence, restore swap in the appropriate dpm resume callbacks or error handling paths. Closes: https://github.com/ROCm/ROCK-Kernel-Driver/issues/174 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2362 Signed-off-by: Mario Limonciello Tested-by: Nat Wittstock Tested-by: Lucian Langa Link: https://patch.msgid.link/20250613214413.4127087-1-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 519fb09de5e0..9216e3b91d3b 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -423,7 +423,6 @@ int hibernation_snapshot(int platform_mode) } console_suspend_all(); - pm_restrict_gfp_mask(); error = dpm_suspend(PMSG_FREEZE); @@ -559,7 +558,6 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); console_suspend_all(); - pm_restrict_gfp_mask(); error = dpm_suspend_start(PMSG_QUIESCE); if (!error) { error = resume_target_kernel(platform_mode); @@ -571,7 +569,6 @@ int hibernation_restore(int platform_mode) BUG_ON(!error); } dpm_resume_end(PMSG_RECOVER); - pm_restore_gfp_mask(); console_resume_all(); pm_restore_console(); return error; -- cgit v1.2.3 From 2640e819474f4a9ec78aa3cdb9063e4b5cf18ae4 Mon Sep 17 00:00:00 2001 From: Samuel Zhang Date: Thu, 10 Jul 2025 14:23:11 +0800 Subject: PM: hibernate: shrink shmem pages after dev_pm_ops.prepare() When hibernate with data center dGPUs, huge number of VRAM data will be moved to shmem during dev_pm_ops.prepare(). These shmem pages take a lot of system memory so that there's no enough free memory for creating the hibernation image. This will cause hibernation fail and abort. After dev_pm_ops.prepare(), call shrink_all_memory() to force move shmem pages to swap disk and reclaim the pages, so that there's enough system memory for hibernation image and less pages needed to copy to the image. This patch can only flush and free about half shmem pages. It will be better to flush and free more pages, even all of shmem pages, so that there're less pages to be copied to the hibernation image and the overall hibernation time can be reduced. Signed-off-by: Samuel Zhang Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20250710062313.3226149-4-guoqing.zhang@amd.com Signed-off-by: Mario Limonciello --- kernel/power/hibernate.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'kernel/power/hibernate.c') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 519fb09de5e0..e526237a8c0f 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -381,6 +381,23 @@ static int create_image(int platform_mode) return error; } +static void shrink_shmem_memory(void) +{ + struct sysinfo info; + unsigned long nr_shmem_pages, nr_freed_pages; + + si_meminfo(&info); + nr_shmem_pages = info.sharedram; /* current page count used for shmem */ + /* + * The intent is to reclaim all shmem pages. Though shrink_all_memory() can + * only reclaim about half of them, it's enough for creating the hibernation + * image. + */ + nr_freed_pages = shrink_all_memory(nr_shmem_pages); + pr_debug("requested to reclaim %lu shmem pages, actually freed %lu pages\n", + nr_shmem_pages, nr_freed_pages); +} + /** * hibernation_snapshot - Quiesce devices and create a hibernation image. * @platform_mode: If set, use platform driver to prepare for the transition. @@ -422,6 +439,15 @@ int hibernation_snapshot(int platform_mode) goto Thaw; } + /* + * Device drivers may move lots of data to shmem in dpm_prepare(). The shmem + * pages will use lots of system memory, causing hibernation image creation + * fail due to insufficient free memory. + * This call is to force flush the shmem pages to swap disk and reclaim + * the system memory so that image creation can succeed. + */ + shrink_shmem_memory(); + console_suspend_all(); pm_restrict_gfp_mask(); -- cgit v1.2.3