summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap3
-rw-r--r--MAINTAINERS33
-rw-r--r--arch/arm64/kernel/mte.c3
-rw-r--r--arch/arm64/mm/fault.c10
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/proc/generic.c12
-rw-r--r--include/linux/gfp.h3
-rw-r--r--include/linux/huge_mm.h55
-rw-r--r--include/linux/mm.h13
-rw-r--r--kernel/Kconfig.kexec9
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/crash_core.c2
-rw-r--r--kernel/gcov/gcc_4_7.c4
-rw-r--r--kernel/kexec_handover.c95
-rw-r--r--kernel/kexec_handover_debug.c25
-rw-r--r--kernel/kexec_handover_internal.h20
-rw-r--r--lib/maple_tree.c30
-rw-r--r--lib/test_kho.c3
-rw-r--r--mm/Kconfig7
-rw-r--r--mm/damon/stat.c9
-rw-r--r--mm/damon/sysfs.c10
-rw-r--r--mm/filemap.c33
-rw-r--r--mm/huge_memory.c63
-rw-r--r--mm/kmsan/core.c3
-rw-r--r--mm/kmsan/hooks.c6
-rw-r--r--mm/kmsan/shadow.c2
-rw-r--r--mm/ksm.c113
-rw-r--r--mm/memfd.c27
-rw-r--r--mm/memory.c20
-rw-r--r--mm/mm_init.c2
-rw-r--r--mm/mmap_lock.c1
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/secretmem.c2
-rw-r--r--mm/shmem.c9
-rw-r--r--mm/slub.c6
-rw-r--r--mm/swap_state.c13
-rw-r--r--mm/swapfile.c4
-rw-r--r--mm/truncate.c37
-rwxr-xr-xscripts/decode_stacktrace.sh14
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c15
-rw-r--r--tools/testing/selftests/user_events/perf_test.c2
43 files changed, 531 insertions, 199 deletions
diff --git a/.mailmap b/.mailmap
index 017d357019ac..4ee8d87159fd 100644
--- a/.mailmap
+++ b/.mailmap
@@ -206,6 +206,7 @@ Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com>
David Brownell <david-b@pacbell.net>
David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org>
David Heidelberg <david@ixit.cz> <d.okias@gmail.com>
+David Hildenbrand <david@kernel.org> <david@redhat.com>
David Rheinsberg <david@readahead.eu> <dh.herrmann@gmail.com>
David Rheinsberg <david@readahead.eu> <dh.herrmann@googlemail.com>
David Rheinsberg <david@readahead.eu> <david.rheinsberg@gmail.com>
@@ -687,6 +688,8 @@ Sachin Mokashi <sachin.mokashi@intel.com> <sachinx.mokashi@intel.com>
Sachin P Sant <ssant@in.ibm.com>
Sai Prakash Ranjan <quic_saipraka@quicinc.com> <saiprakash.ranjan@codeaurora.org>
Sakari Ailus <sakari.ailus@linux.intel.com> <sakari.ailus@iki.fi>
+Sam Protsenko <semen.protsenko@linaro.org>
+Sam Protsenko <semen.protsenko@linaro.org> <semen.protsenko@globallogic.com>
Sam Ravnborg <sam@mars.ravnborg.org>
Sankeerth Billakanti <quic_sbillaka@quicinc.com> <sbillaka@codeaurora.org>
Santosh Shilimkar <santosh.shilimkar@oracle.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 0ac71a8b8b4a..bd22adb17846 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11526,7 +11526,7 @@ F: include/linux/platform_data/huawei-gaokun-ec.h
HUGETLB SUBSYSTEM
M: Muchun Song <muchun.song@linux.dev>
M: Oscar Salvador <osalvador@suse.de>
-R: David Hildenbrand <david@redhat.com>
+R: David Hildenbrand <david@kernel.org>
L: linux-mm@kvack.org
S: Maintained
F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@ -13734,7 +13734,7 @@ KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
M: Christian Borntraeger <borntraeger@linux.ibm.com>
M: Janosch Frank <frankja@linux.ibm.com>
M: Claudio Imbrenda <imbrenda@linux.ibm.com>
-R: David Hildenbrand <david@redhat.com>
+R: David Hildenbrand <david@kernel.org>
L: kvm@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
@@ -13800,6 +13800,7 @@ F: Documentation/admin-guide/mm/kho.rst
F: Documentation/core-api/kho/*
F: include/linux/kexec_handover.h
F: kernel/kexec_handover.c
+F: lib/test_kho.c
F: tools/testing/selftests/kho/
KEYS-ENCRYPTED
@@ -16222,7 +16223,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git
F: drivers/devfreq/tegra30-devfreq.c
MEMORY HOT(UN)PLUG
-M: David Hildenbrand <david@redhat.com>
+M: David Hildenbrand <david@kernel.org>
M: Oscar Salvador <osalvador@suse.de>
L: linux-mm@kvack.org
S: Maintained
@@ -16247,7 +16248,7 @@ F: tools/mm/
MEMORY MANAGEMENT - CORE
M: Andrew Morton <akpm@linux-foundation.org>
-M: David Hildenbrand <david@redhat.com>
+M: David Hildenbrand <david@kernel.org>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
@@ -16303,7 +16304,7 @@ F: mm/execmem.c
MEMORY MANAGEMENT - GUP (GET USER PAGES)
M: Andrew Morton <akpm@linux-foundation.org>
-M: David Hildenbrand <david@redhat.com>
+M: David Hildenbrand <david@kernel.org>
R: Jason Gunthorpe <jgg@nvidia.com>
R: John Hubbard <jhubbard@nvidia.com>
R: Peter Xu <peterx@redhat.com>
@@ -16319,7 +16320,7 @@ F: tools/testing/selftests/mm/gup_test.c
MEMORY MANAGEMENT - KSM (Kernel Samepage Merging)
M: Andrew Morton <akpm@linux-foundation.org>
-M: David Hildenbrand <david@redhat.com>
+M: David Hildenbrand <david@kernel.org>
R: Xu Xin <xu.xin16@zte.com.cn>
R: Chengming Zhou <chengming.zhou@linux.dev>
L: linux-mm@kvack.org
@@ -16335,7 +16336,7 @@ F: mm/mm_slot.h
MEMORY MANAGEMENT - MEMORY POLICY AND MIGRATION
M: Andrew Morton <akpm@linux-foundation.org>
-M: David Hildenbrand <david@redhat.com>
+M: David Hildenbrand <david@kernel.org>
R: Zi Yan <ziy@nvidia.com>
R: Matthew Brost <matthew.brost@intel.com>
R: Joshua Hahn <joshua.hahnjy@gmail.com>
@@ -16375,7 +16376,7 @@ F: mm/workingset.c
MEMORY MANAGEMENT - MISC
M: Andrew Morton <akpm@linux-foundation.org>
-M: David Hildenbrand <david@redhat.com>
+M: David Hildenbrand <david@kernel.org>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
@@ -16463,7 +16464,7 @@ F: mm/shuffle.h
MEMORY MANAGEMENT - RECLAIM
M: Andrew Morton <akpm@linux-foundation.org>
M: Johannes Weiner <hannes@cmpxchg.org>
-R: David Hildenbrand <david@redhat.com>
+R: David Hildenbrand <david@kernel.org>
R: Michal Hocko <mhocko@kernel.org>
R: Qi Zheng <zhengqi.arch@bytedance.com>
R: Shakeel Butt <shakeel.butt@linux.dev>
@@ -16476,7 +16477,7 @@ F: mm/workingset.c
MEMORY MANAGEMENT - RMAP (REVERSE MAPPING)
M: Andrew Morton <akpm@linux-foundation.org>
-M: David Hildenbrand <david@redhat.com>
+M: David Hildenbrand <david@kernel.org>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Rik van Riel <riel@surriel.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
@@ -16500,12 +16501,12 @@ F: mm/secretmem.c
MEMORY MANAGEMENT - SWAP
M: Andrew Morton <akpm@linux-foundation.org>
+M: Chris Li <chrisl@kernel.org>
+M: Kairui Song <kasong@tencent.com>
R: Kemeng Shi <shikemeng@huaweicloud.com>
-R: Kairui Song <kasong@tencent.com>
R: Nhat Pham <nphamcs@gmail.com>
R: Baoquan He <bhe@redhat.com>
R: Barry Song <baohua@kernel.org>
-R: Chris Li <chrisl@kernel.org>
L: linux-mm@kvack.org
S: Maintained
F: Documentation/mm/swap-table.rst
@@ -16521,7 +16522,7 @@ F: mm/swapfile.c
MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE)
M: Andrew Morton <akpm@linux-foundation.org>
-M: David Hildenbrand <david@redhat.com>
+M: David Hildenbrand <david@kernel.org>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Zi Yan <ziy@nvidia.com>
R: Baolin Wang <baolin.wang@linux.alibaba.com>
@@ -16623,7 +16624,7 @@ MEMORY MAPPING - MADVISE (MEMORY ADVICE)
M: Andrew Morton <akpm@linux-foundation.org>
M: Liam R. Howlett <Liam.Howlett@oracle.com>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
-M: David Hildenbrand <david@redhat.com>
+M: David Hildenbrand <david@kernel.org>
R: Vlastimil Babka <vbabka@suse.cz>
R: Jann Horn <jannh@google.com>
L: linux-mm@kvack.org
@@ -27090,7 +27091,7 @@ F: net/vmw_vsock/virtio_transport_common.c
VIRTIO BALLOON
M: "Michael S. Tsirkin" <mst@redhat.com>
-M: David Hildenbrand <david@redhat.com>
+M: David Hildenbrand <david@kernel.org>
L: virtualization@lists.linux.dev
S: Maintained
F: drivers/virtio/virtio_balloon.c
@@ -27245,7 +27246,7 @@ F: drivers/iommu/virtio-iommu.c
F: include/uapi/linux/virtio_iommu.h
VIRTIO MEM DRIVER
-M: David Hildenbrand <david@redhat.com>
+M: David Hildenbrand <david@kernel.org>
L: virtualization@lists.linux.dev
S: Maintained
W: https://virtio-mem.gitlab.io/
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 43f7a2f39403..32148bf09c1d 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -476,7 +476,8 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
folio = page_folio(page);
if (folio_test_hugetlb(folio))
- WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio));
+ WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) &&
+ !is_huge_zero_folio(folio));
else
WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page));
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index d816ff44faff..125dfa6c613b 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -969,6 +969,16 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
void tag_clear_highpage(struct page *page)
{
+ /*
+ * Check if MTE is supported and fall back to clear_highpage().
+ * get_huge_zero_folio() unconditionally passes __GFP_ZEROTAGS and
+ * post_alloc_hook() will invoke tag_clear_highpage().
+ */
+ if (!system_supports_mte()) {
+ clear_highpage(page);
+ return;
+ }
+
/* Newly allocated page, shouldn't have been tagged yet */
WARN_ON_ONCE(!try_page_mte_tagging(page));
mte_zero_clear_page_tags(page_address(page));
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e24f4d88885a..9537a61ebae0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,6 +137,7 @@ config PPC
select ARCH_HAS_DMA_OPS if PPC64
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_GIGANTIC_PAGE if ARCH_SUPPORTS_HUGETLBFS
select ARCH_HAS_KCOV
select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC64 && PPC_FPU
select ARCH_HAS_MEMBARRIER_CALLBACKS
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 7b527d18aa5e..4c321a8ea896 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -423,7 +423,6 @@ config PPC_64S_HASH_MMU
config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64
- select ARCH_HAS_GIGANTIC_PAGE
default y
help
Enable support for the Power ISA 3.0 Radix style MMU. Currently this
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index f15ca6fc400d..deee16bc9d4e 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2768,7 +2768,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
if (sci->sc_task) {
wake_up(&sci->sc_wait_daemon);
- kthread_stop(sci->sc_task);
+ if (kthread_stop(sci->sc_task)) {
+ spin_lock(&sci->sc_state_lock);
+ sci->sc_task = NULL;
+ timer_shutdown_sync(&sci->sc_timer);
+ spin_unlock(&sci->sc_state_lock);
+ }
}
spin_lock(&sci->sc_state_lock);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 176281112273..501889856461 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -698,6 +698,12 @@ void pde_put(struct proc_dir_entry *pde)
}
}
+static void pde_erase(struct proc_dir_entry *pde, struct proc_dir_entry *parent)
+{
+ rb_erase(&pde->subdir_node, &parent->subdir);
+ RB_CLEAR_NODE(&pde->subdir_node);
+}
+
/*
* Remove a /proc entry and free it if it's not currently in use.
*/
@@ -720,7 +726,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
WARN(1, "removing permanent /proc entry '%s'", de->name);
de = NULL;
} else {
- rb_erase(&de->subdir_node, &parent->subdir);
+ pde_erase(de, parent);
if (S_ISDIR(de->mode))
parent->nlink--;
}
@@ -764,7 +770,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
root->parent->name, root->name);
return -EINVAL;
}
- rb_erase(&root->subdir_node, &parent->subdir);
+ pde_erase(root, parent);
de = root;
while (1) {
@@ -776,7 +782,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
next->parent->name, next->name);
return -EINVAL;
}
- rb_erase(&next->subdir_node, &de->subdir);
+ pde_erase(next, de);
de = next;
continue;
}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0ceb4e09306c..623bee335383 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -7,6 +7,7 @@
#include <linux/mmzone.h>
#include <linux/topology.h>
#include <linux/alloc_tag.h>
+#include <linux/cleanup.h>
#include <linux/sched.h>
struct vm_area_struct;
@@ -463,4 +464,6 @@ static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
/* This should be paired with folio_put() rather than free_contig_range(). */
#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
+DEFINE_FREE(free_page, void *, free_page((unsigned long)_T))
+
#endif /* __LINUX_GFP_H */
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f327d62fc985..71ac78b9f834 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -376,45 +376,30 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
struct list_head *list);
/*
- * try_folio_split - try to split a @folio at @page using non uniform split.
+ * try_folio_split_to_order - try to split a @folio at @page to @new_order using
+ * non uniform split.
* @folio: folio to be split
- * @page: split to order-0 at the given page
- * @list: store the after-split folios
+ * @page: split to @new_order at the given page
+ * @new_order: the target split order
*
- * Try to split a @folio at @page using non uniform split to order-0, if
- * non uniform split is not supported, fall back to uniform split.
+ * Try to split a @folio at @page using non uniform split to @new_order, if
+ * non uniform split is not supported, fall back to uniform split. After-split
+ * folios are put back to LRU list. Use min_order_for_split() to get the lower
+ * bound of @new_order.
*
* Return: 0: split is successful, otherwise split failed.
*/
-static inline int try_folio_split(struct folio *folio, struct page *page,
- struct list_head *list)
+static inline int try_folio_split_to_order(struct folio *folio,
+ struct page *page, unsigned int new_order)
{
- int ret = min_order_for_split(folio);
-
- if (ret < 0)
- return ret;
-
- if (!non_uniform_split_supported(folio, 0, false))
- return split_huge_page_to_list_to_order(&folio->page, list,
- ret);
- return folio_split(folio, ret, page, list);
+ if (!non_uniform_split_supported(folio, new_order, /* warns= */ false))
+ return split_huge_page_to_list_to_order(&folio->page, NULL,
+ new_order);
+ return folio_split(folio, new_order, page, NULL);
}
static inline int split_huge_page(struct page *page)
{
- struct folio *folio = page_folio(page);
- int ret = min_order_for_split(folio);
-
- if (ret < 0)
- return ret;
-
- /*
- * split_huge_page() locks the page before splitting and
- * expects the same page that has been split to be locked when
- * returned. split_folio(page_folio(page)) cannot be used here
- * because it converts the page to folio and passes the head
- * page to be split.
- */
- return split_huge_page_to_list_to_order(page, NULL, ret);
+ return split_huge_page_to_list_to_order(page, NULL, 0);
}
void deferred_split_folio(struct folio *folio, bool partially_mapped);
@@ -597,14 +582,20 @@ static inline int split_huge_page(struct page *page)
return -EINVAL;
}
+static inline int min_order_for_split(struct folio *folio)
+{
+ VM_WARN_ON_ONCE_FOLIO(1, folio);
+ return -EINVAL;
+}
+
static inline int split_folio_to_list(struct folio *folio, struct list_head *list)
{
VM_WARN_ON_ONCE_FOLIO(1, folio);
return -EINVAL;
}
-static inline int try_folio_split(struct folio *folio, struct page *page,
- struct list_head *list)
+static inline int try_folio_split_to_order(struct folio *folio,
+ struct page *page, unsigned int new_order)
{
VM_WARN_ON_ONCE_FOLIO(1, folio);
return -EINVAL;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d16b33bacc32..7c79b3369b82 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2074,7 +2074,7 @@ static inline unsigned long folio_nr_pages(const struct folio *folio)
return folio_large_nr_pages(folio);
}
-#if !defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE)
+#if !defined(CONFIG_HAVE_GIGANTIC_FOLIOS)
/*
* We don't expect any folios that exceed buddy sizes (and consequently
* memory sections).
@@ -2087,10 +2087,17 @@ static inline unsigned long folio_nr_pages(const struct folio *folio)
* pages are guaranteed to be contiguous.
*/
#define MAX_FOLIO_ORDER PFN_SECTION_SHIFT
-#else
+#elif defined(CONFIG_HUGETLB_PAGE)
/*
* There is no real limit on the folio size. We limit them to the maximum we
- * currently expect (e.g., hugetlb, dax).
+ * currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect
+ * no folios larger than 16 GiB on 64bit and 1 GiB on 32bit.
+ */
+#define MAX_FOLIO_ORDER get_order(IS_ENABLED(CONFIG_64BIT) ? SZ_16G : SZ_1G)
+#else
+/*
+ * Without hugetlb, gigantic folios that are bigger than a single PUD are
+ * currently impossible.
*/
#define MAX_FOLIO_ORDER PUD_ORDER
#endif
diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index 422270d64820..54e581072617 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -109,6 +109,15 @@ config KEXEC_HANDOVER
to keep data or state alive across the kexec. For this to work,
both source and target kernels need to have this option enabled.
+config KEXEC_HANDOVER_DEBUG
+ bool "Enable Kexec Handover debug checks"
+ depends on KEXEC_HANDOVER
+ help
+ This option enables extra sanity checks for the Kexec Handover
+ subsystem. Since, KHO performance is crucial in live update
+ scenarios and the extra code might be adding overhead it is
+ only optionally enabled.
+
config CRASH_DUMP
bool "kernel crash dumps"
default ARCH_DEFAULT_CRASH_DUMP
diff --git a/kernel/Makefile b/kernel/Makefile
index df3dd8291bb6..9fe722305c9b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o
+obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup/
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 3b1c43382eec..99dac1aa972a 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -373,7 +373,7 @@ static int __crash_shrink_memory(struct resource *old_res,
old_res->start = 0;
old_res->end = 0;
} else {
- crashk_res.end = ram_res->start - 1;
+ old_res->end = ram_res->start - 1;
}
crash_free_reserved_phys_range(ram_res->start, ram_res->end);
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index a08cc076f332..ffde93d051a4 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,9 @@
#include <linux/mm.h>
#include "gcov.h"
-#if (__GNUC__ >= 14)
+#if (__GNUC__ >= 15)
+#define GCOV_COUNTERS 10
+#elif (__GNUC__ >= 14)
#define GCOV_COUNTERS 9
#elif (__GNUC__ >= 10)
#define GCOV_COUNTERS 8
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index 76f0940fb485..03d12e27189f 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -8,6 +8,7 @@
#define pr_fmt(fmt) "KHO: " fmt
+#include <linux/cleanup.h>
#include <linux/cma.h>
#include <linux/count_zeros.h>
#include <linux/debugfs.h>
@@ -22,6 +23,7 @@
#include <asm/early_ioremap.h>
+#include "kexec_handover_internal.h"
/*
* KHO is tightly coupled with mm init and needs access to some of mm
* internal APIs.
@@ -67,10 +69,10 @@ early_param("kho", kho_parse_enable);
* Keep track of memory that is to be preserved across KHO.
*
* The serializing side uses two levels of xarrays to manage chunks of per-order
- * 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a
- * 1TB system would fit inside a single 512 byte bitmap. For order 0 allocations
- * each bitmap will cover 16M of address space. Thus, for 16G of memory at most
- * 512K of bitmap memory will be needed for order 0.
+ * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order
+ * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0
+ * allocations each bitmap will cover 128M of address space. Thus, for 16G of
+ * memory at most 512K of bitmap memory will be needed for order 0.
*
* This approach is fully incremental, as the serialization progresses folios
* can continue be aggregated to the tracker. The final step, immediately prior
@@ -78,12 +80,14 @@ early_param("kho", kho_parse_enable);
* successor kernel to parse.
*/
-#define PRESERVE_BITS (512 * 8)
+#define PRESERVE_BITS (PAGE_SIZE * 8)
struct kho_mem_phys_bits {
DECLARE_BITMAP(preserve, PRESERVE_BITS);
};
+static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE);
+
struct kho_mem_phys {
/*
* Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized
@@ -131,28 +135,28 @@ static struct kho_out kho_out = {
.finalized = false,
};
-static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
+static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
{
- void *elm, *res;
+ void *res = xa_load(xa, index);
+
+ if (res)
+ return res;
- elm = xa_load(xa, index);
- if (elm)
- return elm;
+ void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL);
- elm = kzalloc(sz, GFP_KERNEL);
if (!elm)
return ERR_PTR(-ENOMEM);
+ if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE)))
+ return ERR_PTR(-EINVAL);
+
res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
if (xa_is_err(res))
- res = ERR_PTR(xa_err(res));
-
- if (res) {
- kfree(elm);
+ return ERR_PTR(xa_err(res));
+ else if (res)
return res;
- }
- return elm;
+ return no_free_ptr(elm);
}
static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
@@ -167,12 +171,12 @@ static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
const unsigned long pfn_high = pfn >> order;
physxa = xa_load(&track->orders, order);
- if (!physxa)
- continue;
+ if (WARN_ON_ONCE(!physxa))
+ return;
bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
- if (!bits)
- continue;
+ if (WARN_ON_ONCE(!bits))
+ return;
clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
@@ -216,8 +220,7 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
}
}
- bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS,
- sizeof(*bits));
+ bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
if (IS_ERR(bits))
return PTR_ERR(bits);
@@ -345,15 +348,19 @@ static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE);
static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
unsigned long order)
{
- struct khoser_mem_chunk *chunk;
+ struct khoser_mem_chunk *chunk __free(free_page) = NULL;
- chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ chunk = (void *)get_zeroed_page(GFP_KERNEL);
if (!chunk)
- return NULL;
+ return ERR_PTR(-ENOMEM);
+
+ if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE)))
+ return ERR_PTR(-EINVAL);
+
chunk->hdr.order = order;
if (cur_chunk)
KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk);
- return chunk;
+ return no_free_ptr(chunk);
}
static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
@@ -374,14 +381,17 @@ static int kho_mem_serialize(struct kho_serialization *ser)
struct khoser_mem_chunk *chunk = NULL;
struct kho_mem_phys *physxa;
unsigned long order;
+ int err = -ENOMEM;
xa_for_each(&ser->track.orders, order, physxa) {
struct kho_mem_phys_bits *bits;
unsigned long phys;
chunk = new_chunk(chunk, order);
- if (!chunk)
+ if (IS_ERR(chunk)) {
+ err = PTR_ERR(chunk);
goto err_free;
+ }
if (!first_chunk)
first_chunk = chunk;
@@ -391,8 +401,10 @@ static int kho_mem_serialize(struct kho_serialization *ser)
if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) {
chunk = new_chunk(chunk, order);
- if (!chunk)
+ if (IS_ERR(chunk)) {
+ err = PTR_ERR(chunk);
goto err_free;
+ }
}
elm = &chunk->bitmaps[chunk->hdr.num_elms];
@@ -409,7 +421,7 @@ static int kho_mem_serialize(struct kho_serialization *ser)
err_free:
kho_mem_ser_free(first_chunk);
- return -ENOMEM;
+ return err;
}
static void __init deserialize_bitmap(unsigned int order,
@@ -465,8 +477,8 @@ static void __init kho_mem_deserialize(const void *fdt)
* area for early allocations that happen before page allocator is
* initialized.
*/
-static struct kho_scratch *kho_scratch;
-static unsigned int kho_scratch_cnt;
+struct kho_scratch *kho_scratch;
+unsigned int kho_scratch_cnt;
/*
* The scratch areas are scaled by default as percent of memory allocated from
@@ -752,6 +764,9 @@ int kho_preserve_folio(struct folio *folio)
const unsigned int order = folio_order(folio);
struct kho_mem_track *track = &kho_out.ser.track;
+ if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
+ return -EINVAL;
+
return __kho_preserve_order(track, pfn, order);
}
EXPORT_SYMBOL_GPL(kho_preserve_folio);
@@ -775,6 +790,11 @@ int kho_preserve_pages(struct page *page, unsigned int nr_pages)
unsigned long failed_pfn = 0;
int err = 0;
+ if (WARN_ON(kho_scratch_overlap(start_pfn << PAGE_SHIFT,
+ nr_pages << PAGE_SHIFT))) {
+ return -EINVAL;
+ }
+
while (pfn < end_pfn) {
const unsigned int order =
min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
@@ -862,16 +882,17 @@ err_free:
return NULL;
}
-static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk)
+static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk,
+ unsigned short order)
{
struct kho_mem_track *track = &kho_out.ser.track;
unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
__kho_unpreserve(track, pfn, pfn + 1);
- for (int i = 0; chunk->phys[i]; i++) {
+ for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) {
pfn = PHYS_PFN(chunk->phys[i]);
- __kho_unpreserve(track, pfn, pfn + 1);
+ __kho_unpreserve(track, pfn, pfn + (1 << order));
}
}
@@ -882,7 +903,7 @@ static void kho_vmalloc_free_chunks(struct kho_vmalloc *kho_vmalloc)
while (chunk) {
struct kho_vmalloc_chunk *tmp = chunk;
- kho_vmalloc_unpreserve_chunk(chunk);
+ kho_vmalloc_unpreserve_chunk(chunk, kho_vmalloc->order);
chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
free_page((unsigned long)tmp);
@@ -992,7 +1013,7 @@ void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
while (chunk) {
struct page *page;
- for (int i = 0; chunk->phys[i]; i++) {
+ for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) {
phys_addr_t phys = chunk->phys[i];
if (idx + contig_pages > total_pages)
diff --git a/kernel/kexec_handover_debug.c b/kernel/kexec_handover_debug.c
new file mode 100644
index 000000000000..6efb696f5426
--- /dev/null
+++ b/kernel/kexec_handover_debug.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kexec_handover_debug.c - kexec handover optional debug functionality
+ * Copyright (C) 2025 Google LLC, Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#define pr_fmt(fmt) "KHO: " fmt
+
+#include "kexec_handover_internal.h"
+
+bool kho_scratch_overlap(phys_addr_t phys, size_t size)
+{
+ phys_addr_t scratch_start, scratch_end;
+ unsigned int i;
+
+ for (i = 0; i < kho_scratch_cnt; i++) {
+ scratch_start = kho_scratch[i].addr;
+ scratch_end = kho_scratch[i].addr + kho_scratch[i].size;
+
+ if (phys < scratch_end && (phys + size) > scratch_start)
+ return true;
+ }
+
+ return false;
+}
diff --git a/kernel/kexec_handover_internal.h b/kernel/kexec_handover_internal.h
new file mode 100644
index 000000000000..3c3c7148ceed
--- /dev/null
+++ b/kernel/kexec_handover_internal.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H
+#define LINUX_KEXEC_HANDOVER_INTERNAL_H
+
+#include <linux/kexec_handover.h>
+#include <linux/types.h>
+
+extern struct kho_scratch *kho_scratch;
+extern unsigned int kho_scratch_cnt;
+
+#ifdef CONFIG_KEXEC_HANDOVER_DEBUG
+bool kho_scratch_overlap(phys_addr_t phys, size_t size);
+#else
+static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size)
+{
+ return false;
+}
+#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */
+
+#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 39bb779cb311..5aa4c9500018 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -64,6 +64,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/maple_tree.h>
+#define TP_FCT tracepoint_string(__func__)
+
/*
* Kernel pointer hashing renders much of the maple tree dump useless as tagged
* pointers get hashed to arbitrary values.
@@ -2756,7 +2758,7 @@ static inline void mas_rebalance(struct ma_state *mas,
MA_STATE(l_mas, mas->tree, mas->index, mas->last);
MA_STATE(r_mas, mas->tree, mas->index, mas->last);
- trace_ma_op(__func__, mas);
+ trace_ma_op(TP_FCT, mas);
/*
* Rebalancing occurs if a node is insufficient. Data is rebalanced
@@ -2997,7 +2999,7 @@ static void mas_split(struct ma_state *mas, struct maple_big_node *b_node)
MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
- trace_ma_op(__func__, mas);
+ trace_ma_op(TP_FCT, mas);
mast.l = &l_mas;
mast.r = &r_mas;
@@ -3172,7 +3174,7 @@ static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
return false;
}
- trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry);
+ trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry);
return true;
}
@@ -3416,7 +3418,7 @@ static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
* of data may happen.
*/
mas = wr_mas->mas;
- trace_ma_op(__func__, mas);
+ trace_ma_op(TP_FCT, mas);
if (unlikely(!mas->index && mas->last == ULONG_MAX))
return mas_new_root(mas, wr_mas->entry);
@@ -3552,7 +3554,7 @@ done:
} else {
memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
}
- trace_ma_write(__func__, mas, 0, wr_mas->entry);
+ trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
mas_update_gap(mas);
mas->end = new_end;
return;
@@ -3596,7 +3598,7 @@ static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas)
mas->offset++; /* Keep mas accurate. */
}
- trace_ma_write(__func__, mas, 0, wr_mas->entry);
+ trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
/*
* Only update gap when the new entry is empty or there is an empty
* entry in the original two ranges.
@@ -3717,7 +3719,7 @@ static inline void mas_wr_append(struct ma_wr_state *wr_mas,
mas_update_gap(mas);
mas->end = new_end;
- trace_ma_write(__func__, mas, new_end, wr_mas->entry);
+ trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry);
return;
}
@@ -3731,7 +3733,7 @@ static void mas_wr_bnode(struct ma_wr_state *wr_mas)
{
struct maple_big_node b_node;
- trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
+ trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry);
memset(&b_node, 0, sizeof(struct maple_big_node));
mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
mas_commit_b_node(wr_mas, &b_node);
@@ -5062,7 +5064,7 @@ void *mas_store(struct ma_state *mas, void *entry)
{
MA_WR_STATE(wr_mas, mas, entry);
- trace_ma_write(__func__, mas, 0, entry);
+ trace_ma_write(TP_FCT, mas, 0, entry);
#ifdef CONFIG_DEBUG_MAPLE_TREE
if (MAS_WARN_ON(mas, mas->index > mas->last))
pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last,
@@ -5163,7 +5165,7 @@ void mas_store_prealloc(struct ma_state *mas, void *entry)
}
store:
- trace_ma_write(__func__, mas, 0, entry);
+ trace_ma_write(TP_FCT, mas, 0, entry);
mas_wr_store_entry(&wr_mas);
MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
mas_destroy(mas);
@@ -5882,7 +5884,7 @@ void *mtree_load(struct maple_tree *mt, unsigned long index)
MA_STATE(mas, mt, index, index);
void *entry;
- trace_ma_read(__func__, &mas);
+ trace_ma_read(TP_FCT, &mas);
rcu_read_lock();
retry:
entry = mas_start(&mas);
@@ -5925,7 +5927,7 @@ int mtree_store_range(struct maple_tree *mt, unsigned long index,
MA_STATE(mas, mt, index, last);
int ret = 0;
- trace_ma_write(__func__, &mas, 0, entry);
+ trace_ma_write(TP_FCT, &mas, 0, entry);
if (WARN_ON_ONCE(xa_is_advanced(entry)))
return -EINVAL;
@@ -6148,7 +6150,7 @@ void *mtree_erase(struct maple_tree *mt, unsigned long index)
void *entry = NULL;
MA_STATE(mas, mt, index, index);
- trace_ma_op(__func__, &mas);
+ trace_ma_op(TP_FCT, &mas);
mtree_lock(mt);
entry = mas_erase(&mas);
@@ -6485,7 +6487,7 @@ void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max)
unsigned long copy = *index;
#endif
- trace_ma_read(__func__, &mas);
+ trace_ma_read(TP_FCT, &mas);
if ((*index) > max)
return NULL;
diff --git a/lib/test_kho.c b/lib/test_kho.c
index 60cd899ea745..fff018e5548d 100644
--- a/lib/test_kho.c
+++ b/lib/test_kho.c
@@ -301,6 +301,9 @@ static int __init kho_test_init(void)
phys_addr_t fdt_phys;
int err;
+ if (!kho_is_enabled())
+ return 0;
+
err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys);
if (!err)
return kho_test_restore(fdt_phys);
diff --git a/mm/Kconfig b/mm/Kconfig
index 0e26f4fc8717..ca3f146bc705 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -908,6 +908,13 @@ config PAGE_MAPCOUNT
config PGTABLE_HAS_HUGE_LEAVES
def_bool TRANSPARENT_HUGEPAGE || HUGETLB_PAGE
+#
+# We can end up creating gigantic folio.
+#
+config HAVE_GIGANTIC_FOLIOS
+ def_bool (HUGETLB_PAGE && ARCH_HAS_GIGANTIC_PAGE) || \
+ (ZONE_DEVICE && HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+
# TODO: Allow to be enabled without THP
config ARCH_SUPPORTS_HUGE_PFNMAP
def_bool n
diff --git a/mm/damon/stat.c b/mm/damon/stat.c
index d8010968bbed..bf8626859902 100644
--- a/mm/damon/stat.c
+++ b/mm/damon/stat.c
@@ -46,6 +46,8 @@ MODULE_PARM_DESC(aggr_interval_us,
static struct damon_ctx *damon_stat_context;
+static unsigned long damon_stat_last_refresh_jiffies;
+
static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c)
{
struct damon_target *t;
@@ -130,13 +132,12 @@ static void damon_stat_set_idletime_percentiles(struct damon_ctx *c)
static int damon_stat_damon_call_fn(void *data)
{
struct damon_ctx *c = data;
- static unsigned long last_refresh_jiffies;
/* avoid unnecessarily frequent stat update */
- if (time_before_eq(jiffies, last_refresh_jiffies +
+ if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies +
msecs_to_jiffies(5 * MSEC_PER_SEC)))
return 0;
- last_refresh_jiffies = jiffies;
+ damon_stat_last_refresh_jiffies = jiffies;
aggr_interval_us = c->attrs.aggr_interval;
damon_stat_set_estimated_memory_bandwidth(c);
@@ -210,6 +211,8 @@ static int damon_stat_start(void)
err = damon_start(&damon_stat_context, 1, true);
if (err)
return err;
+
+ damon_stat_last_refresh_jiffies = jiffies;
call_control.data = damon_stat_context;
return damon_call(damon_stat_context, &call_control);
}
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index cd6815ecc04e..3c0d727788c8 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1552,16 +1552,17 @@ static struct damon_ctx *damon_sysfs_build_ctx(
return ctx;
}
+static unsigned long damon_sysfs_next_update_jiffies;
+
static int damon_sysfs_repeat_call_fn(void *data)
{
struct damon_sysfs_kdamond *sysfs_kdamond = data;
- static unsigned long next_update_jiffies;
if (!sysfs_kdamond->refresh_ms)
return 0;
- if (time_before(jiffies, next_update_jiffies))
+ if (time_before(jiffies, damon_sysfs_next_update_jiffies))
return 0;
- next_update_jiffies = jiffies +
+ damon_sysfs_next_update_jiffies = jiffies +
msecs_to_jiffies(sysfs_kdamond->refresh_ms);
if (!mutex_trylock(&damon_sysfs_lock))
@@ -1607,6 +1608,9 @@ static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond)
}
kdamond->damon_ctx = ctx;
+ damon_sysfs_next_update_jiffies =
+ jiffies + msecs_to_jiffies(kdamond->refresh_ms);
+
repeat_call_control->fn = damon_sysfs_repeat_call_fn;
repeat_call_control->data = kdamond;
repeat_call_control->repeat = true;
diff --git a/mm/filemap.c b/mm/filemap.c
index 13f0259d993c..024b71da5224 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3681,8 +3681,10 @@ skip:
static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
struct folio *folio, unsigned long start,
unsigned long addr, unsigned int nr_pages,
- unsigned long *rss, unsigned short *mmap_miss)
+ unsigned long *rss, unsigned short *mmap_miss,
+ pgoff_t file_end)
{
+ struct address_space *mapping = folio->mapping;
unsigned int ref_from_caller = 1;
vm_fault_t ret = 0;
struct page *page = folio_page(folio, start);
@@ -3691,12 +3693,16 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
unsigned long addr0;
/*
- * Map the large folio fully where possible.
+ * Map the large folio fully where possible:
*
- * The folio must not cross VMA or page table boundary.
+ * - The folio is fully within size of the file or belong
+ * to shmem/tmpfs;
+ * - The folio doesn't cross VMA boundary;
+ * - The folio doesn't cross page table boundary;
*/
addr0 = addr - start * PAGE_SIZE;
- if (folio_within_vma(folio, vmf->vma) &&
+ if ((file_end >= folio_next_index(folio) || shmem_mapping(mapping)) &&
+ folio_within_vma(folio, vmf->vma) &&
(addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) {
vmf->pte -= start;
page -= start;
@@ -3817,7 +3823,18 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
if (!folio)
goto out;
- if (filemap_map_pmd(vmf, folio, start_pgoff)) {
+ file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+ end_pgoff = min(end_pgoff, file_end);
+
+ /*
+ * Do not allow to map with PMD across i_size to preserve
+ * SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ if ((file_end >= folio_next_index(folio) || shmem_mapping(mapping)) &&
+ filemap_map_pmd(vmf, folio, start_pgoff)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
@@ -3830,10 +3847,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
goto out;
}
- file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
- if (end_pgoff > file_end)
- end_pgoff = file_end;
-
folio_type = mm_counter_file(folio);
do {
unsigned long end;
@@ -3850,7 +3863,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
else
ret |= filemap_map_folio_range(vmf, folio,
xas.xa_index - folio->index, addr,
- nr_pages, &rss, &mmap_miss);
+ nr_pages, &rss, &mmap_miss, file_end);
folio_unlock(folio);
} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1d1b74950332..6cba1cb14b23 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -214,7 +214,8 @@ retry:
if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
return true;
- zero_folio = folio_alloc((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
+ zero_folio = folio_alloc((GFP_TRANSHUGE | __GFP_ZERO | __GFP_ZEROTAGS) &
+ ~__GFP_MOVABLE,
HPAGE_PMD_ORDER);
if (!zero_folio) {
count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
@@ -3263,6 +3264,14 @@ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins)
caller_pins;
}
+static bool page_range_has_hwpoisoned(struct page *page, long nr_pages)
+{
+ for (; nr_pages; page++, nr_pages--)
+ if (PageHWPoison(page))
+ return true;
+ return false;
+}
+
/*
* It splits @folio into @new_order folios and copies the @folio metadata to
* all the resulting folios.
@@ -3270,17 +3279,24 @@ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins)
static void __split_folio_to_order(struct folio *folio, int old_order,
int new_order)
{
+ /* Scan poisoned pages when split a poisoned folio to large folios */
+ const bool handle_hwpoison = folio_test_has_hwpoisoned(folio) && new_order;
long new_nr_pages = 1 << new_order;
long nr_pages = 1 << old_order;
long i;
+ folio_clear_has_hwpoisoned(folio);
+
+ /* Check first new_nr_pages since the loop below skips them */
+ if (handle_hwpoison &&
+ page_range_has_hwpoisoned(folio_page(folio, 0), new_nr_pages))
+ folio_set_has_hwpoisoned(folio);
/*
* Skip the first new_nr_pages, since the new folio from them have all
* the flags from the original folio.
*/
for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) {
struct page *new_head = &folio->page + i;
-
/*
* Careful: new_folio is not a "real" folio before we cleared PageTail.
* Don't pass it around before clear_compound_head().
@@ -3322,6 +3338,10 @@ static void __split_folio_to_order(struct folio *folio, int old_order,
(1L << PG_dirty) |
LRU_GEN_MASK | LRU_REFS_MASK));
+ if (handle_hwpoison &&
+ page_range_has_hwpoisoned(new_head, new_nr_pages))
+ folio_set_has_hwpoisoned(new_folio);
+
new_folio->mapping = folio->mapping;
new_folio->index = folio->index + i;
@@ -3422,8 +3442,6 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
if (folio_test_anon(folio))
mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
- folio_clear_has_hwpoisoned(folio);
-
/*
* split to new_order one order at a time. For uniform split,
* folio is split to new_order directly.
@@ -3504,7 +3522,8 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
/* order-1 is not supported for anonymous THP. */
VM_WARN_ONCE(warns && new_order == 1,
"Cannot split to order-1 folio");
- return new_order != 1;
+ if (new_order == 1)
+ return false;
} else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
!mapping_large_folio_support(folio->mapping)) {
/*
@@ -3535,7 +3554,8 @@ bool uniform_split_supported(struct folio *folio, unsigned int new_order,
if (folio_test_anon(folio)) {
VM_WARN_ONCE(warns && new_order == 1,
"Cannot split to order-1 folio");
- return new_order != 1;
+ if (new_order == 1)
+ return false;
} else if (new_order) {
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
!mapping_large_folio_support(folio->mapping)) {
@@ -3599,6 +3619,16 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
if (folio != page_folio(split_at) || folio != page_folio(lock_at))
return -EINVAL;
+ /*
+ * Folios that just got truncated cannot get split. Signal to the
+ * caller that there was a race.
+ *
+ * TODO: this will also currently refuse shmem folios that are in the
+ * swapcache.
+ */
+ if (!is_anon && !folio->mapping)
+ return -EBUSY;
+
if (new_order >= folio_order(folio))
return -EINVAL;
@@ -3639,22 +3669,8 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
gfp_t gfp;
mapping = folio->mapping;
-
- /* Truncated ? */
- /*
- * TODO: add support for large shmem folio in swap cache.
- * When shmem is in swap cache, mapping is NULL and
- * folio_test_swapcache() is true.
- */
- if (!mapping) {
- ret = -EBUSY;
- goto out;
- }
-
min_order = mapping_min_folio_order(folio->mapping);
if (new_order < min_order) {
- VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u",
- min_order);
ret = -EINVAL;
goto out;
}
@@ -3986,12 +4002,7 @@ int min_order_for_split(struct folio *folio)
int split_folio_to_list(struct folio *folio, struct list_head *list)
{
- int ret = min_order_for_split(folio);
-
- if (ret < 0)
- return ret;
-
- return split_huge_page_to_list_to_order(&folio->page, list, ret);
+ return split_huge_page_to_list_to_order(&folio->page, list, 0);
}
/*
diff --git a/mm/kmsan/core.c b/mm/kmsan/core.c
index 8bca7fece47f..35ceaa8adb41 100644
--- a/mm/kmsan/core.c
+++ b/mm/kmsan/core.c
@@ -72,9 +72,6 @@ depot_stack_handle_t kmsan_save_stack_with_flags(gfp_t flags,
nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0);
- /* Don't sleep. */
- flags &= ~(__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM);
-
handle = stack_depot_save(entries, nr_entries, flags);
return stack_depot_set_extra_bits(handle, extra);
}
diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c
index 2cee59d89c80..8f22d1f22981 100644
--- a/mm/kmsan/hooks.c
+++ b/mm/kmsan/hooks.c
@@ -84,7 +84,8 @@ void kmsan_slab_free(struct kmem_cache *s, void *object)
if (s->ctor)
return;
kmsan_enter_runtime();
- kmsan_internal_poison_memory(object, s->object_size, GFP_KERNEL,
+ kmsan_internal_poison_memory(object, s->object_size,
+ GFP_KERNEL & ~(__GFP_RECLAIM),
KMSAN_POISON_CHECK | KMSAN_POISON_FREE);
kmsan_leave_runtime();
}
@@ -114,7 +115,8 @@ void kmsan_kfree_large(const void *ptr)
kmsan_enter_runtime();
page = virt_to_head_page((void *)ptr);
KMSAN_WARN_ON(ptr != page_address(page));
- kmsan_internal_poison_memory((void *)ptr, page_size(page), GFP_KERNEL,
+ kmsan_internal_poison_memory((void *)ptr, page_size(page),
+ GFP_KERNEL & ~(__GFP_RECLAIM),
KMSAN_POISON_CHECK | KMSAN_POISON_FREE);
kmsan_leave_runtime();
}
diff --git a/mm/kmsan/shadow.c b/mm/kmsan/shadow.c
index 54f3c3c962f0..55fdea199aaf 100644
--- a/mm/kmsan/shadow.c
+++ b/mm/kmsan/shadow.c
@@ -208,7 +208,7 @@ void kmsan_free_page(struct page *page, unsigned int order)
return;
kmsan_enter_runtime();
kmsan_internal_poison_memory(page_address(page), page_size(page),
- GFP_KERNEL,
+ GFP_KERNEL & ~(__GFP_RECLAIM),
KMSAN_POISON_CHECK | KMSAN_POISON_FREE);
kmsan_leave_runtime();
}
diff --git a/mm/ksm.c b/mm/ksm.c
index 7bc726b50b2f..c4e730409949 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2455,6 +2455,95 @@ static bool should_skip_rmap_item(struct folio *folio,
return true;
}
+struct ksm_next_page_arg {
+ struct folio *folio;
+ struct page *page;
+ unsigned long addr;
+};
+
+static int ksm_next_page_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct ksm_next_page_arg *private = walk->private;
+ struct vm_area_struct *vma = walk->vma;
+ pte_t *start_ptep = NULL, *ptep, pte;
+ struct mm_struct *mm = walk->mm;
+ struct folio *folio;
+ struct page *page;
+ spinlock_t *ptl;
+ pmd_t pmd;
+
+ if (ksm_test_exit(mm))
+ return 0;
+
+ cond_resched();
+
+ pmd = pmdp_get_lockless(pmdp);
+ if (!pmd_present(pmd))
+ return 0;
+
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && pmd_leaf(pmd)) {
+ ptl = pmd_lock(mm, pmdp);
+ pmd = pmdp_get(pmdp);
+
+ if (!pmd_present(pmd)) {
+ goto not_found_unlock;
+ } else if (pmd_leaf(pmd)) {
+ page = vm_normal_page_pmd(vma, addr, pmd);
+ if (!page)
+ goto not_found_unlock;
+ folio = page_folio(page);
+
+ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
+ goto not_found_unlock;
+
+ page += ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT);
+ goto found_unlock;
+ }
+ spin_unlock(ptl);
+ }
+
+ start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ if (!start_ptep)
+ return 0;
+
+ for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) {
+ pte = ptep_get(ptep);
+
+ if (!pte_present(pte))
+ continue;
+
+ page = vm_normal_page(vma, addr, pte);
+ if (!page)
+ continue;
+ folio = page_folio(page);
+
+ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
+ continue;
+ goto found_unlock;
+ }
+
+not_found_unlock:
+ spin_unlock(ptl);
+ if (start_ptep)
+ pte_unmap(start_ptep);
+ return 0;
+found_unlock:
+ folio_get(folio);
+ spin_unlock(ptl);
+ if (start_ptep)
+ pte_unmap(start_ptep);
+ private->page = page;
+ private->folio = folio;
+ private->addr = addr;
+ return 1;
+}
+
+static struct mm_walk_ops ksm_next_page_ops = {
+ .pmd_entry = ksm_next_page_pmd_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
{
struct mm_struct *mm;
@@ -2542,21 +2631,27 @@ next_mm:
ksm_scan.address = vma->vm_end;
while (ksm_scan.address < vma->vm_end) {
+ struct ksm_next_page_arg ksm_next_page_arg;
struct page *tmp_page = NULL;
- struct folio_walk fw;
struct folio *folio;
if (ksm_test_exit(mm))
break;
- folio = folio_walk_start(&fw, vma, ksm_scan.address, 0);
- if (folio) {
- if (!folio_is_zone_device(folio) &&
- folio_test_anon(folio)) {
- folio_get(folio);
- tmp_page = fw.page;
- }
- folio_walk_end(&fw, vma);
+ int found;
+
+ found = walk_page_range_vma(vma, ksm_scan.address,
+ vma->vm_end,
+ &ksm_next_page_ops,
+ &ksm_next_page_arg);
+
+ if (found > 0) {
+ folio = ksm_next_page_arg.folio;
+ tmp_page = ksm_next_page_arg.page;
+ ksm_scan.address = ksm_next_page_arg.addr;
+ } else {
+ VM_WARN_ON_ONCE(found < 0);
+ ksm_scan.address = vma->vm_end - PAGE_SIZE;
}
if (tmp_page) {
diff --git a/mm/memfd.c b/mm/memfd.c
index 1d109c1acf21..a405eaa451ee 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -96,9 +96,36 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
NULL,
gfp_mask);
if (folio) {
+ u32 hash;
+
+ /*
+ * Zero the folio to prevent information leaks to userspace.
+ * Use folio_zero_user() which is optimized for huge/gigantic
+ * pages. Pass 0 as addr_hint since this is not a faulting path
+ * and we don't have a user virtual address yet.
+ */
+ folio_zero_user(folio, 0);
+
+ /*
+ * Mark the folio uptodate before adding to page cache,
+ * as required by filemap.c and other hugetlb paths.
+ */
+ __folio_mark_uptodate(folio);
+
+ /*
+ * Serialize hugepage allocation and instantiation to prevent
+ * races with concurrent allocations, as required by all other
+ * callers of hugetlb_add_to_page_cache().
+ */
+ hash = hugetlb_fault_mutex_hash(memfd->f_mapping, idx);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+
err = hugetlb_add_to_page_cache(folio,
memfd->f_mapping,
idx);
+
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+
if (err) {
folio_put(folio);
goto err_unresv;
diff --git a/mm/memory.c b/mm/memory.c
index 74b45e258323..b59ae7ce42eb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -65,6 +65,7 @@
#include <linux/gfp.h>
#include <linux/migrate.h>
#include <linux/string.h>
+#include <linux/shmem_fs.h>
#include <linux/memory-tiers.h>
#include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
@@ -5501,8 +5502,25 @@ fallback:
return ret;
}
+ if (!needs_fallback && vma->vm_file) {
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ pgoff_t file_end;
+
+ file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+
+ /*
+ * Do not allow to map with PTEs beyond i_size and with PMD
+ * across i_size to preserve SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ needs_fallback = !shmem_mapping(mapping) &&
+ file_end < folio_next_index(folio);
+ }
+
if (pmd_none(*vmf->pmd)) {
- if (folio_test_pmd_mappable(folio)) {
+ if (!needs_fallback && folio_test_pmd_mappable(folio)) {
ret = do_set_pmd(vmf, folio, page);
if (ret != VM_FAULT_FALLBACK)
return ret;
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3db2dea7db4c..7712d887b696 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2469,7 +2469,7 @@ void *__init alloc_large_system_hash(const char *tablename,
panic("Failed to allocate %s hash table\n", tablename);
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
- tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
+ tablename, 1UL << log2qty, get_order(size), size,
virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
index 0a0db5849b8e..42e3dde73e74 100644
--- a/mm/mmap_lock.c
+++ b/mm/mmap_lock.c
@@ -241,6 +241,7 @@ retry:
if (PTR_ERR(vma) == -EAGAIN) {
count_vm_vma_lock_event(VMA_LOCK_MISS);
/* The area was replaced with another one */
+ mas_set(&mas, address);
goto retry;
}
diff --git a/mm/mremap.c b/mm/mremap.c
index bd7314898ec5..419a0ea0a870 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -187,7 +187,7 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr
if (!folio || !folio_test_large(folio))
return 1;
- return folio_pte_batch(folio, ptep, pte, max_nr);
+ return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, FPB_RESPECT_WRITE);
}
static int move_ptes(struct pagetable_move_control *pmc,
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 60137305bc20..b59350daffe3 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -82,13 +82,13 @@ retry:
__folio_mark_uptodate(folio);
err = filemap_add_folio(mapping, folio, offset, gfp);
if (unlikely(err)) {
- folio_put(folio);
/*
* If a split of large page was required, it
* already happened when we marked the page invalid
* which guarantees that this call won't fail
*/
set_direct_map_default_noflush(folio_page(folio, 0));
+ folio_put(folio);
if (err == -EEXIST)
goto retry;
diff --git a/mm/shmem.c b/mm/shmem.c
index b9081b817d28..58701d14dd96 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1882,6 +1882,7 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
struct shmem_inode_info *info = SHMEM_I(inode);
unsigned long suitable_orders = 0;
struct folio *folio = NULL;
+ pgoff_t aligned_index;
long pages;
int error, order;
@@ -1895,10 +1896,12 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
order = highest_order(suitable_orders);
while (suitable_orders) {
pages = 1UL << order;
- index = round_down(index, pages);
- folio = shmem_alloc_folio(gfp, order, info, index);
- if (folio)
+ aligned_index = round_down(index, pages);
+ folio = shmem_alloc_folio(gfp, order, info, aligned_index);
+ if (folio) {
+ index = aligned_index;
goto allocated;
+ }
if (pages == HPAGE_PMD_NR)
count_vm_event(THP_FILE_FALLBACK);
diff --git a/mm/slub.c b/mm/slub.c
index f1a5373eee7b..1bf65c421325 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2046,7 +2046,11 @@ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts)
if (slab_exts) {
unsigned int offs = obj_to_index(obj_exts_slab->slab_cache,
obj_exts_slab, obj_exts);
- /* codetag should be NULL */
+
+ if (unlikely(is_codetag_empty(&slab_exts[offs].ref)))
+ return;
+
+ /* codetag should be NULL here */
WARN_ON(slab_exts[offs].ref.ct);
set_codetag_empty(&slab_exts[offs].ref);
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b13e9c4baa90..f4980dde5394 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -748,6 +748,8 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
blk_start_plug(&plug);
for (addr = start; addr < end; ilx++, addr += PAGE_SIZE) {
+ struct swap_info_struct *si = NULL;
+
if (!pte++) {
pte = pte_offset_map(vmf->pmd, addr);
if (!pte)
@@ -761,8 +763,19 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
continue;
pte_unmap(pte);
pte = NULL;
+ /*
+ * Readahead entry may come from a device that we are not
+ * holding a reference to, try to grab a reference, or skip.
+ */
+ if (swp_type(entry) != swp_type(targ_entry)) {
+ si = get_swap_device(entry);
+ if (!si)
+ continue;
+ }
folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
&page_allocated, false);
+ if (si)
+ put_swap_device(si);
if (!folio)
continue;
if (page_allocated) {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 10760240a3a2..a1b4b9d80e3b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2005,10 +2005,8 @@ swp_entry_t get_swap_page_of_type(int type)
local_lock(&percpu_swap_cluster.lock);
offset = cluster_alloc_swap_entry(si, 0, 1);
local_unlock(&percpu_swap_cluster.lock);
- if (offset) {
+ if (offset)
entry = swp_entry(si->type, offset);
- atomic_long_dec(&nr_swap_pages);
- }
}
put_swap_device(si);
}
diff --git a/mm/truncate.c b/mm/truncate.c
index 91eb92a5ce4f..3c5a50ae3274 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -177,6 +177,32 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
return 0;
}
+static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at,
+ unsigned long min_order)
+{
+ enum ttu_flags ttu_flags =
+ TTU_SYNC |
+ TTU_SPLIT_HUGE_PMD |
+ TTU_IGNORE_MLOCK;
+ int ret;
+
+ ret = try_folio_split_to_order(folio, split_at, min_order);
+
+ /*
+ * If the split fails, unmap the folio, so it will be refaulted
+ * with PTEs to respect SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ if (ret && !shmem_mapping(folio->mapping)) {
+ try_to_unmap(folio, ttu_flags);
+ WARN_ON(folio_mapped(folio));
+ }
+
+ return ret;
+}
+
/*
* Handle partial folios. The folio may be entirely within the
* range if a split has raced with us. If not, we zero the part of the
@@ -194,6 +220,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
size_t size = folio_size(folio);
unsigned int offset, length;
struct page *split_at, *split_at2;
+ unsigned int min_order;
if (pos < start)
offset = start - pos;
@@ -223,8 +250,9 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
if (!folio_test_large(folio))
return true;
+ min_order = mapping_min_folio_order(folio->mapping);
split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
- if (!try_folio_split(folio, split_at, NULL)) {
+ if (!try_folio_split_or_unmap(folio, split_at, min_order)) {
/*
* try to split at offset + length to make sure folios within
* the range can be dropped, especially to avoid memory waste
@@ -248,13 +276,10 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
if (!folio_trylock(folio2))
goto out;
- /*
- * make sure folio2 is large and does not change its mapping.
- * Its split result does not matter here.
- */
+ /* make sure folio2 is large and does not change its mapping */
if (folio_test_large(folio2) &&
folio2->mapping == folio->mapping)
- try_folio_split(folio2, split_at2, NULL);
+ try_folio_split_or_unmap(folio2, split_at2, min_order);
folio_unlock(folio2);
out:
diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh
index c73cb802a0a3..8d01b741de62 100755
--- a/scripts/decode_stacktrace.sh
+++ b/scripts/decode_stacktrace.sh
@@ -277,12 +277,6 @@ handle_line() {
fi
done
- if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then
- words[$last-1]="${words[$last-1]} ${words[$last]}"
- unset words[$last] spaces[$last]
- last=$(( $last - 1 ))
- fi
-
# Extract info after the symbol if present. E.g.:
# func_name+0x54/0x80 (P)
# ^^^
@@ -295,6 +289,14 @@ handle_line() {
last=$(( $last - 1 ))
fi
+ # Join module name with its build id if present, as these were
+ # split during tokenization (e.g. "[module" and "modbuildid]").
+ if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then
+ words[$last-1]="${words[$last-1]} ${words[$last]}"
+ unset words[$last] spaces[$last]
+ last=$(( $last - 1 ))
+ fi
+
if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then
module=${words[$last]}
# some traces format is "(%pS)", which like "(foo+0x0/0x1 [bar])"
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 9e3be2ee7f1b..f917b4c4c943 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -1758,10 +1758,15 @@ int main(int argc, char *argv[])
uffd_test_ops = mem_type->mem_ops;
uffd_test_case_ops = test->test_case_ops;
- if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
+ if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) {
gopts.page_size = default_huge_page_size();
- else
+ if (gopts.page_size == 0) {
+ uffd_test_skip("huge page size is 0, feature missing?");
+ continue;
+ }
+ } else {
gopts.page_size = psize();
+ }
/* Ensure we have at least 2 pages */
gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2)
@@ -1776,12 +1781,6 @@ int main(int argc, char *argv[])
continue;
uffd_test_start("%s on %s", test->name, mem_type->name);
- if ((mem_type->mem_flag == MEM_HUGETLB ||
- mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
- (default_huge_page_size() == 0)) {
- uffd_test_skip("huge page size is 0, feature missing?");
- continue;
- }
if (!uffd_feature_supported(test)) {
uffd_test_skip("feature missing");
continue;
diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c
index 5288e768b207..68625362add2 100644
--- a/tools/testing/selftests/user_events/perf_test.c
+++ b/tools/testing/selftests/user_events/perf_test.c
@@ -236,7 +236,7 @@ TEST_F(user, perf_empty_events) {
ASSERT_EQ(1 << reg.enable_bit, self->check);
/* Ensure write shows up at correct offset */
- ASSERT_NE(-1, write(self->data_fd, &reg.write_index,
+ ASSERT_NE(-1, write(self->data_fd, (void *)&reg.write_index,
sizeof(reg.write_index)));
val = (void *)(((char *)perf_page) + perf_page->data_offset);
ASSERT_EQ(PERF_RECORD_SAMPLE, *val);