summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-05-26 08:23:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-05-26 08:23:19 -0700
commitd60ec36cab338dfe2ae40d73e9c8d6c4af70d2b8 (patch)
treee9a0c763a92a7f4a33c956c4d1ad6a6026e583dc
parente8c2f9fdadee7cbc75134dc463c1e0d856d6e5c7 (diff)
parent54cf41c969da6637cce790b7400da1451609db9b (diff)
Merge tag 'mm-hotfixes-stable-2026-05-25-16-22' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mmHEADmaster
Pull misc fixes from Andrew Morton: "13 hotfixes. 9 are for MM. 9 are cc:stable and the remaining 4 address post-7.1 issues or aren't considered suitable for backporting. All patches are singletons - please see the individual changelogs for details" * tag 'mm-hotfixes-stable-2026-05-25-16-22' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: Revert "mm: introduce a new page type for page pool in page type" mm/vmalloc: do not trigger BUG() on BH disabled context MAINTAINERS, mailmap: change email for Eugen Hristev mm/migrate_device: fix pgtable leak in migrate_vma_insert_huge_pmd_page kernel/fork: validate exit_signal in kernel_clone() mm: memcontrol: propagate NMI slab stats to memcg vmstats mm/damon/sysfs-schemes: delete tried region in regions_rmdirs() mm/rmap: initialize nr_pages to 1 at loop start in try_to_unmap_one zram: fix use-after-free in zram_writeback_endio memfd: deny writeable mappings when implying SEAL_WRITE ipc: limit next_id allocation to the valid ID range Revert "mm/hugetlbfs: update hugetlbfs to use mmap_prepare" MAINTAINERS: .mailmap: update after GEHC spin-off
-rw-r--r--.mailmap6
-rw-r--r--MAINTAINERS14
-rw-r--r--drivers/block/zram/zram_drv.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c2
-rw-r--r--fs/hugetlbfs/inode.c46
-rw-r--r--include/linux/hugetlb.h8
-rw-r--r--include/linux/hugetlb_inline.h14
-rw-r--r--include/linux/mm.h27
-rw-r--r--include/linux/page-flags.h6
-rw-r--r--include/net/netmem.h19
-rw-r--r--ipc/util.c2
-rw-r--r--kernel/fork.c11
-rw-r--r--mm/damon/sysfs-schemes.c8
-rw-r--r--mm/hugetlb.c71
-rw-r--r--mm/memcontrol.c6
-rw-r--r--mm/memfd.c12
-rw-r--r--mm/migrate_device.c4
-rw-r--r--mm/page_alloc.c13
-rw-r--r--mm/rmap.c2
-rw-r--r--mm/vmalloc.c2
-rw-r--r--net/core/netmem_priv.h23
-rw-r--r--net/core/page_pool.c24
22 files changed, 135 insertions, 191 deletions
diff --git a/.mailmap b/.mailmap
index 99dba08041fa..a009f73d7ea5 100644
--- a/.mailmap
+++ b/.mailmap
@@ -263,8 +263,9 @@ Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com>
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
Ethan Carter Edwards <ethan@ethancedwards.com> Ethan Edwards <ethancarteredwards@gmail.com>
-Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@microchip.com>
-Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@collabora.com>
+Eugen Hristev <ehristev@kernel.org> <eugen.hristev@microchip.com>
+Eugen Hristev <ehristev@kernel.org> <eugen.hristev@linaro.org>
+Eugen Hristev <ehristev@kernel.org> <eugen.hristev@collabora.com>
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com>
Faith Ekstrand <faith.ekstrand@collabora.com> <jason@jlekstrand.net>
@@ -339,6 +340,7 @@ Henrik Rydberg <rydberg@bitmath.org>
Herbert Xu <herbert@gondor.apana.org.au>
Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com>
Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
+Ian Ray <ian.ray@gehealthcare.com> <ian.ray@ge.com>
Ignat Korchagin <ignat@linux.win> <ignat@cloudflare.com>
Igor Korotin <igor.korotin@linux.dev> <igor.korotin.linux@gmail.com>
Ike Panhc <ikepanhc@gmail.com> <ike.pan@canonical.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index b539be153f6a..461a3eed6129 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10835,7 +10835,7 @@ F: include/linux/generic-radix-tree.h
F: lib/generic-radix-tree.c
GENERIC RESISTIVE TOUCHSCREEN ADC DRIVER
-M: Eugen Hristev <eugen.hristev@microchip.com>
+M: Eugen Hristev <ehristev@kernel.org>
L: linux-input@vger.kernel.org
S: Maintained
F: drivers/input/touchscreen/resistive-adc-touch.c
@@ -16506,7 +16506,7 @@ F: drivers/usb/mtu3/
MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES
M: Peter Senna Tschudin <peter.senna@gmail.com>
-M: Ian Ray <ian.ray@ge.com>
+M: Ian Ray <ian.ray@gehealthcare.com>
M: Martyn Welch <martyn.welch@collabora.co.uk>
S: Maintained
F: Documentation/devicetree/bindings/display/bridge/megachips-stdpxxxx-ge-b850v3-fw.txt
@@ -17345,7 +17345,7 @@ F: Documentation/devicetree/bindings/sound/mikroe,mikroe-proto.txt
F: sound/soc/atmel
MICROCHIP CSI2DC DRIVER
-M: Eugen Hristev <eugen.hristev@microchip.com>
+M: Eugen Hristev <ehristev@kernel.org>
L: linux-media@vger.kernel.org
S: Supported
F: Documentation/devicetree/bindings/media/microchip,csi2dc.yaml
@@ -17372,7 +17372,7 @@ F: drivers/i2c/busses/i2c-at91-*.c
F: drivers/i2c/busses/i2c-at91.h
MICROCHIP ISC DRIVER
-M: Eugen Hristev <eugen.hristev@microchip.com>
+M: Eugen Hristev <ehristev@kernel.org>
L: linux-media@vger.kernel.org
S: Supported
F: Documentation/devicetree/bindings/media/atmel,isc.yaml
@@ -17384,7 +17384,7 @@ F: drivers/staging/media/deprecated/atmel/atmel-sama*-isc*
F: include/linux/atmel-isc-media.h
MICROCHIP ISI DRIVER
-M: Eugen Hristev <eugen.hristev@microchip.com>
+M: Eugen Hristev <ehristev@kernel.org>
L: linux-media@vger.kernel.org
S: Supported
F: drivers/media/platform/atmel/atmel-isi.c
@@ -17574,7 +17574,7 @@ F: Documentation/devicetree/bindings/display/bridge/microchip,sam9x75-lvds.yaml
F: drivers/gpu/drm/bridge/microchip-lvds.c
MICROCHIP SAMA5D2-COMPATIBLE ADC DRIVER
-M: Eugen Hristev <eugen.hristev@microchip.com>
+M: Eugen Hristev <ehristev@kernel.org>
L: linux-iio@vger.kernel.org
S: Supported
F: Documentation/devicetree/bindings/iio/adc/atmel,sama5d2-adc.yaml
@@ -24124,7 +24124,7 @@ F: drivers/mmc/host/sdhci*
SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) MICROCHIP DRIVER
M: Aubin Constans <aubin.constans@microchip.com>
-R: Eugen Hristev <eugen.hristev@collabora.com>
+R: Eugen Hristev <ehristev@kernel.org>
L: linux-mmc@vger.kernel.org
S: Supported
F: drivers/mmc/host/sdhci-of-at91.c
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index aebc710f0d6a..07111455eecf 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -33,6 +33,7 @@
#include <linux/cpuhotplug.h>
#include <linux/part_stat.h>
#include <linux/kernel_read_file.h>
+#include <linux/rcupdate.h>
#include "zram_drv.h"
@@ -504,6 +505,7 @@ struct zram_wb_ctl {
wait_queue_head_t done_wait;
spinlock_t done_lock;
atomic_t num_inflight;
+ struct rcu_head rcu;
};
struct zram_wb_req {
@@ -847,7 +849,7 @@ static void release_wb_ctl(struct zram_wb_ctl *wb_ctl)
release_wb_req(req);
}
- kfree(wb_ctl);
+ kfree_rcu(wb_ctl, rcu);
}
static struct zram_wb_ctl *init_wb_ctl(struct zram *zram)
@@ -964,11 +966,13 @@ static void zram_writeback_endio(struct bio *bio)
struct zram_wb_ctl *wb_ctl = bio->bi_private;
unsigned long flags;
+ rcu_read_lock();
spin_lock_irqsave(&wb_ctl->done_lock, flags);
list_add(&req->entry, &wb_ctl->done_reqs);
spin_unlock_irqrestore(&wb_ctl->done_lock, flags);
wake_up(&wb_ctl->done_wait);
+ rcu_read_unlock();
}
static void zram_submit_wb_request(struct zram *zram,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 190b8b66b3ce..d3bab198c99c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -708,7 +708,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
page = xdpi.page.page;
- /* No need to check PageNetpp() as we
+ /* No need to check page_pool_page_is_pp() as we
* know this is a page_pool page.
*/
page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8b05bec08e04..78d61bf2bd9b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -96,15 +96,8 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
#define PGOFF_LOFFT_MAX \
(((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
-static int hugetlb_file_mmap_prepare_success(const struct vm_area_struct *vma)
+static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
- /* Unfortunate we have to reassign vma->vm_private_data. */
- return hugetlb_vma_lock_alloc((struct vm_area_struct *)vma);
-}
-
-static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
-{
- struct file *file = desc->file;
struct inode *inode = file_inode(file);
loff_t len, vma_len;
int ret;
@@ -119,8 +112,8 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
* way when do_mmap unwinds (may be important on powerpc
* and ia64).
*/
- vma_desc_set_flags(desc, VMA_HUGETLB_BIT, VMA_DONTEXPAND_BIT);
- desc->vm_ops = &hugetlb_vm_ops;
+ vma_set_flags(vma, VMA_HUGETLB_BIT, VMA_DONTEXPAND_BIT);
+ vma->vm_ops = &hugetlb_vm_ops;
/*
* page based offset in vm_pgoff could be sufficiently large to
@@ -129,16 +122,16 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
* sizeof(unsigned long). So, only check in those instances.
*/
if (sizeof(unsigned long) == sizeof(loff_t)) {
- if (desc->pgoff & PGOFF_LOFFT_MAX)
+ if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
return -EINVAL;
}
/* must be huge page aligned */
- if (desc->pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
+ if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
- vma_len = (loff_t)vma_desc_size(desc);
- len = vma_len + ((loff_t)desc->pgoff << PAGE_SHIFT);
+ vma_len = (loff_t)(vma->vm_end - vma->vm_start);
+ len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
/* check for overflow */
if (len < vma_len)
return -EINVAL;
@@ -148,7 +141,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
ret = -ENOMEM;
- vma_flags = desc->vma_flags;
+ vma_flags = vma->flags;
/*
* for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
* reserving here. Note: only for SHM hugetlbfs file, the inode
@@ -158,30 +151,17 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
vma_flags_set(&vma_flags, VMA_NORESERVE_BIT);
if (hugetlb_reserve_pages(inode,
- desc->pgoff >> huge_page_order(h),
- len >> huge_page_shift(h), desc,
- vma_flags) < 0)
+ vma->vm_pgoff >> huge_page_order(h),
+ len >> huge_page_shift(h), vma,
+ vma_flags) < 0)
goto out;
ret = 0;
- if (vma_desc_test(desc, VMA_WRITE_BIT) && inode->i_size < len)
+ if (vma_test(vma, VMA_WRITE_BIT) && inode->i_size < len)
i_size_write(inode, len);
out:
inode_unlock(inode);
- if (!ret) {
- /* Allocate the VMA lock after we set it up. */
- desc->action.success_hook = hugetlb_file_mmap_prepare_success;
- /*
- * We cannot permit the rmap finding this VMA in the time
- * between the VMA being inserted into the VMA tree and the
- * completion/success hook being invoked.
- *
- * This is because we establish a per-VMA hugetlb lock which can
- * be raced by rmap.
- */
- desc->action.hide_from_rmap_until_complete = true;
- }
return ret;
}
@@ -1227,7 +1207,7 @@ static void init_once(void *foo)
static const struct file_operations hugetlbfs_file_operations = {
.read_iter = hugetlbfs_read_iter,
- .mmap_prepare = hugetlbfs_file_mmap_prepare,
+ .mmap = hugetlbfs_file_mmap,
.fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area,
.llseek = default_llseek,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 93418625d3c5..5957bc25efa8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -148,7 +148,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
struct folio **foliop);
#endif /* CONFIG_USERFAULTFD */
long hugetlb_reserve_pages(struct inode *inode, long from, long to,
- struct vm_area_desc *desc, vma_flags_t vma_flags);
+ struct vm_area_struct *vma, vma_flags_t vma_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
@@ -276,7 +276,6 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
void fixup_hugetlb_reservations(struct vm_area_struct *vma);
void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
-int hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
unsigned int arch_hugetlb_cma_order(void);
@@ -469,11 +468,6 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
-static inline int hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
-{
- return 0;
-}
-
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h
index 565b473fd135..5c29cd3223a1 100644
--- a/include/linux/hugetlb_inline.h
+++ b/include/linux/hugetlb_inline.h
@@ -6,23 +6,13 @@
#ifdef CONFIG_HUGETLB_PAGE
-static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
-{
- return !!(vm_flags & VM_HUGETLB);
-}
-
static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
{
- return vma_flags_test_any(flags, VMA_HUGETLB_BIT);
+ return vma_flags_test(flags, VMA_HUGETLB_BIT);
}
#else
-static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
-{
- return false;
-}
-
static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
{
return false;
@@ -32,7 +22,7 @@ static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
static inline bool is_vm_hugetlb_page(const struct vm_area_struct *vma)
{
- return is_vm_hugetlb_flags(vma->vm_flags);
+ return is_vma_hugetlb_flags(&vma->flags);
}
#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index af23453e9dbd..06bbe9eba636 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -5174,9 +5174,10 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
* DMA mapping IDs for page_pool
*
* When DMA-mapping a page, page_pool allocates an ID (from an xarray) and
- * stashes it in the upper bits of page->pp_magic. Non-PP pages can have
- * arbitrary kernel pointers stored in the same field as pp_magic (since
- * it overlaps with page->lru.next), so we must ensure that we cannot
+ * stashes it in the upper bits of page->pp_magic. We always want to be able to
+ * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP
+ * pages can have arbitrary kernel pointers stored in the same field as pp_magic
+ * (since it overlaps with page->lru.next), so we must ensure that we cannot
* mistake a valid kernel pointer with any of the values we write into this
* field.
*
@@ -5211,6 +5212,26 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
PP_DMA_INDEX_SHIFT)
+/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is
+ * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
+ * the head page of compound page and bit 1 for pfmemalloc page, as well as the
+ * bits used for the DMA index. page_is_pfmemalloc() is checked in
+ * __page_pool_put_page() to avoid recycling the pfmemalloc page.
+ */
+#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL)
+
+#ifdef CONFIG_PAGE_POOL
+static inline bool page_pool_page_is_pp(const struct page *page)
+{
+ return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE;
+}
+#else
+static inline bool page_pool_page_is_pp(const struct page *page)
+{
+ return false;
+}
+#endif
+
#define PAGE_SNAPSHOT_FAITHFUL (1 << 0)
#define PAGE_SNAPSHOT_PG_BUDDY (1 << 1)
#define PAGE_SNAPSHOT_PG_IDLE (1 << 2)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0e03d816e8b9..7223f6f4e2b4 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -923,7 +923,6 @@ enum pagetype {
PGTY_zsmalloc = 0xf6,
PGTY_unaccepted = 0xf7,
PGTY_large_kmalloc = 0xf8,
- PGTY_netpp = 0xf9,
PGTY_mapcount_underflow = 0xff
};
@@ -1056,11 +1055,6 @@ PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted)
PAGE_TYPE_OPS(LargeKmalloc, large_kmalloc, large_kmalloc)
-/*
- * Marks page_pool allocated pages.
- */
-PAGE_TYPE_OPS(Netpp, netpp, netpp)
-
/**
* PageHuge - Determine if the page belongs to hugetlbfs
* @page: The page to test.
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 78fe51e5756b..bccacd21b6c3 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -94,20 +94,10 @@ enum net_iov_type {
*/
struct net_iov {
struct netmem_desc desc;
- unsigned int page_type;
enum net_iov_type type;
struct net_iov_area *owner;
};
-/* Make sure 'the offset of page_type in struct page == the offset of
- * type in struct net_iov'.
- */
-#define NET_IOV_ASSERT_OFFSET(pg, iov) \
- static_assert(offsetof(struct page, pg) == \
- offsetof(struct net_iov, iov))
-NET_IOV_ASSERT_OFFSET(page_type, page_type);
-#undef NET_IOV_ASSERT_OFFSET
-
struct net_iov_area {
/* Array of net_iovs for this area. */
struct net_iov *niovs;
@@ -127,11 +117,7 @@ static inline unsigned int net_iov_idx(const struct net_iov *niov)
return niov - net_iov_owner(niov)->niovs;
}
-/* Initialize a niov: stamp the owning area, the memory provider type,
- * and the page_type "no type" sentinel expected by the page-type API
- * (see PAGE_TYPE_OPS in <linux/page-flags.h>) so that
- * page_pool_set_pp_info() can later call __SetPageNetpp() on a niov
- * cast to struct page.
+/* Initialize a niov: stamp the owning area, the memory provider type.
*/
static inline void net_iov_init(struct net_iov *niov,
struct net_iov_area *owner,
@@ -139,7 +125,6 @@ static inline void net_iov_init(struct net_iov *niov,
{
niov->owner = owner;
niov->type = type;
- niov->page_type = UINT_MAX;
}
/* netmem */
@@ -245,7 +230,7 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem)
*/
#define pp_page_to_nmdesc(p) \
({ \
- DEBUG_NET_WARN_ON_ONCE(!PageNetpp(p)); \
+ DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \
__pp_page_to_nmdesc(p); \
})
diff --git a/ipc/util.c b/ipc/util.c
index 9eb89820594e..1737d776bc08 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -253,7 +253,7 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
} else {
new->seq = ipcid_to_seqx(next_id);
idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id),
- 0, GFP_NOWAIT);
+ ipc_mni, GFP_NOWAIT);
}
if (idx >= 0)
new->id = (new->seq << ipcmni_seq_shift()) + idx;
diff --git a/kernel/fork.c b/kernel/fork.c
index 5f3fdfdb14c7..8ac38beae360 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2664,8 +2664,6 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
*
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
- *
- * args->exit_signal is expected to be checked for sanity by the caller.
*/
pid_t kernel_clone(struct kernel_clone_args *args)
{
@@ -2700,6 +2698,9 @@ pid_t kernel_clone(struct kernel_clone_args *args)
(args->pidfd == args->parent_tid))
return -EINVAL;
+ if (!valid_signal(args->exit_signal))
+ return -EINVAL;
+
/*
* Determine whether and which event to report to ptracer. When
* called from kernel_thread or CLONE_UNTRACED is explicitly
@@ -2898,11 +2899,9 @@ static noinline int copy_clone_args_from_user(struct kernel_clone_args *kargs,
return -EINVAL;
/*
- * Verify that higher 32bits of exit_signal are unset and that
- * it is a valid signal
+ * Verify that higher 32bits of exit_signal are unset
*/
- if (unlikely((args.exit_signal & ~((u64)CSIGNAL)) ||
- !valid_signal(args.exit_signal)))
+ if (unlikely(args.exit_signal & ~((u64)CSIGNAL)))
return -EINVAL;
if ((args.flags & CLONE_INTO_CGROUP) &&
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 04746cbb3327..a8014780edae 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -88,7 +88,6 @@ static void damon_sysfs_scheme_region_release(struct kobject *kobj)
struct damon_sysfs_scheme_region *region = container_of(kobj,
struct damon_sysfs_scheme_region, kobj);
- list_del(&region->list);
kfree(region);
}
@@ -164,7 +163,7 @@ static void damon_sysfs_scheme_regions_rm_dirs(
struct damon_sysfs_scheme_region *r, *next;
list_for_each_entry_safe(r, next, &regions->regions_list, list) {
- /* release function deletes it from the list */
+ list_del(&r->list);
kobject_put(&r->kobj);
regions->nr_regions--;
}
@@ -2928,14 +2927,15 @@ void damos_sysfs_populate_region_dir(struct damon_sysfs_schemes *sysfs_schemes,
if (!region)
return;
region->sz_filter_passed = sz_filter_passed;
- list_add_tail(&region->list, &sysfs_regions->regions_list);
- sysfs_regions->nr_regions++;
if (kobject_init_and_add(&region->kobj,
&damon_sysfs_scheme_region_ktype,
&sysfs_regions->kobj, "%d",
sysfs_regions->nr_regions++)) {
kobject_put(&region->kobj);
+ return;
}
+ list_add_tail(&region->list, &sysfs_regions->regions_list);
+ sysfs_regions->nr_regions++;
}
int damon_sysfs_schemes_clear_regions(
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f24bf49be047..4b80b167cc9c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -116,6 +116,7 @@ struct mutex *hugetlb_fault_mutex_table __ro_after_init;
/* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta);
static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
+static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start, unsigned long end, bool take_locks);
@@ -413,21 +414,17 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
}
}
-/*
- * vma specific semaphore used for pmd sharing and fault/truncation
- * synchronization
- */
-int hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
+static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
{
struct hugetlb_vma_lock *vma_lock;
/* Only establish in (flags) sharable vmas */
if (!vma || !(vma->vm_flags & VM_MAYSHARE))
- return 0;
+ return;
/* Should never get here with non-NULL vm_private_data */
if (vma->vm_private_data)
- return -EINVAL;
+ return;
vma_lock = kmalloc_obj(*vma_lock);
if (!vma_lock) {
@@ -442,15 +439,13 @@ int hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
* allocation failure.
*/
pr_warn_once("HugeTLB: unable to allocate vma specific lock\n");
- return -EINVAL;
+ return;
}
kref_init(&vma_lock->refs);
init_rwsem(&vma_lock->rw_sema);
vma_lock->vma = vma;
vma->vm_private_data = vma_lock;
-
- return 0;
}
/* Helper that removes a struct file_region from the resv_map cache and returns
@@ -1147,28 +1142,20 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
}
}
-static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
+static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
{
VM_WARN_ON_ONCE_VMA(!is_vm_hugetlb_page(vma), vma);
- VM_WARN_ON_ONCE_VMA(vma->vm_flags & VM_MAYSHARE, vma);
+ VM_WARN_ON_ONCE_VMA(vma_test(vma, VMA_MAYSHARE_BIT), vma);
- set_vma_private_data(vma, get_vma_private_data(vma) | flags);
+ set_vma_private_data(vma, (unsigned long)map);
}
-static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *map)
-{
- VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
- VM_WARN_ON_ONCE(vma_desc_test(desc, VMA_MAYSHARE_BIT));
-
- desc->private_data = map;
-}
-
-static void set_vma_desc_resv_flags(struct vm_area_desc *desc, unsigned long flags)
+static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
{
- VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
- VM_WARN_ON_ONCE(vma_desc_test(desc, VMA_MAYSHARE_BIT));
+ VM_WARN_ON_ONCE_VMA(!is_vm_hugetlb_page(vma), vma);
+ VM_WARN_ON_ONCE_VMA(vma_test(vma, VMA_MAYSHARE_BIT), vma);
- desc->private_data = (void *)((unsigned long)desc->private_data | flags);
+ set_vma_private_data(vma, get_vma_private_data(vma) | flags);
}
static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
@@ -1178,13 +1165,6 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
return (get_vma_private_data(vma) & flag) != 0;
}
-static bool is_vma_desc_resv_set(struct vm_area_desc *desc, unsigned long flag)
-{
- VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
-
- return ((unsigned long)desc->private_data) & flag;
-}
-
bool __vma_private_lock(struct vm_area_struct *vma)
{
return !(vma->vm_flags & VM_MAYSHARE) &&
@@ -6553,7 +6533,7 @@ next:
long hugetlb_reserve_pages(struct inode *inode,
long from, long to,
- struct vm_area_desc *desc,
+ struct vm_area_struct *vma,
vma_flags_t vma_flags)
{
long chg = -1, add = -1, spool_resv, gbl_resv;
@@ -6571,6 +6551,12 @@ long hugetlb_reserve_pages(struct inode *inode,
}
/*
+ * vma specific semaphore used for pmd sharing and fault/truncation
+ * synchronization
+ */
+ hugetlb_vma_lock_alloc(vma);
+
+ /*
* Only apply hugepage reservation if asked. At fault time, an
* attempt will be made for VM_NORESERVE to allocate a page
* without using reserves
@@ -6582,9 +6568,9 @@ long hugetlb_reserve_pages(struct inode *inode,
* Shared mappings base their reservation on the number of pages that
* are already allocated on behalf of the file. Private mappings need
* to reserve the full area even if read-only as mprotect() may be
- * called to make the mapping read-write. Assume !desc is a shm mapping
+ * called to make the mapping read-write. Assume !vma is a shm mapping
*/
- if (!desc || vma_desc_test(desc, VMA_MAYSHARE_BIT)) {
+ if (!vma || vma_test(vma, VMA_MAYSHARE_BIT)) {
/*
* resv_map can not be NULL as hugetlb_reserve_pages is only
* called for inodes for which resv_maps were created (see
@@ -6603,8 +6589,8 @@ long hugetlb_reserve_pages(struct inode *inode,
chg = to - from;
- set_vma_desc_resv_map(desc, resv_map);
- set_vma_desc_resv_flags(desc, HPAGE_RESV_OWNER);
+ set_vma_resv_map(vma, resv_map);
+ set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
}
if (chg < 0) {
@@ -6618,7 +6604,7 @@ long hugetlb_reserve_pages(struct inode *inode,
if (err < 0)
goto out_err;
- if (desc && !vma_desc_test(desc, VMA_MAYSHARE_BIT) && h_cg) {
+ if (vma && !vma_test(vma, VMA_MAYSHARE_BIT) && h_cg) {
/* For private mappings, the hugetlb_cgroup uncharge info hangs
* of the resv_map.
*/
@@ -6655,7 +6641,7 @@ long hugetlb_reserve_pages(struct inode *inode,
* consumed reservations are stored in the map. Hence, nothing
* else has to be done for private mappings here
*/
- if (!desc || vma_desc_test(desc, VMA_MAYSHARE_BIT)) {
+ if (!vma || vma_test(vma, VMA_MAYSHARE_BIT)) {
add = region_add(resv_map, from, to, regions_needed, h, h_cg);
if (unlikely(add < 0)) {
@@ -6719,15 +6705,16 @@ out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
out_err:
- if (!desc || vma_desc_test(desc, VMA_MAYSHARE_BIT))
+ hugetlb_vma_lock_free(vma);
+ if (!vma || vma_test(vma, VMA_MAYSHARE_BIT))
/* Only call region_abort if the region_chg succeeded but the
* region_add failed or didn't run.
*/
if (chg >= 0 && add < 0)
region_abort(resv_map, from, to, regions_needed);
- if (desc && is_vma_desc_resv_set(desc, HPAGE_RESV_OWNER)) {
+ if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
kref_put(&resv_map->refs, resv_map_release);
- set_vma_desc_resv_map(desc, NULL);
+ set_vma_resv_map(vma, NULL);
}
return err;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 749c128b4fad..177732fef010 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4352,6 +4352,9 @@ static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent,
lstats->state[index] += slab;
if (plstats)
plstats->state_pending[index] += slab;
+ memcg->vmstats->state[index] += slab;
+ if (parent)
+ parent->vmstats->state_pending[index] += slab;
}
if (atomic_read(&pn->slab_unreclaimable)) {
int slab = atomic_xchg(&pn->slab_unreclaimable, 0);
@@ -4360,6 +4363,9 @@ static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent,
lstats->state[index] += slab;
if (plstats)
plstats->state_pending[index] += slab;
+ memcg->vmstats->state[index] += slab;
+ if (parent)
+ parent->vmstats->state_pending[index] += slab;
}
}
}
diff --git a/mm/memfd.c b/mm/memfd.c
index fb425f4e315f..abe13b291ddc 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -283,6 +283,12 @@ int memfd_add_seals(struct file *file, unsigned int seals)
goto unlock;
}
+ /*
+ * SEAL_EXEC implies SEAL_WRITE, making W^X from the start.
+ */
+ if (seals & F_SEAL_EXEC && inode->i_mode & 0111)
+ seals |= F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_FUTURE_WRITE;
+
if ((seals & F_SEAL_WRITE) && !(*file_seals & F_SEAL_WRITE)) {
error = mapping_deny_writable(file->f_mapping);
if (error)
@@ -295,12 +301,6 @@ int memfd_add_seals(struct file *file, unsigned int seals)
}
}
- /*
- * SEAL_EXEC implies SEAL_WRITE, making W^X from the start.
- */
- if (seals & F_SEAL_EXEC && inode->i_mode & 0111)
- seals |= F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_FUTURE_WRITE;
-
*file_seals |= seals;
error = 0;
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index ab49d4dcdb60..19cd14b34114 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -840,7 +840,7 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate,
} else {
if (folio_is_zone_device(folio) &&
!folio_is_device_coherent(folio)) {
- goto abort;
+ goto free_abort;
}
entry = folio_mk_pmd(folio, vma->vm_page_prot);
if (vma->vm_flags & VM_WRITE)
@@ -893,6 +893,8 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate,
unlock_abort:
spin_unlock(ptl);
+free_abort:
+ pte_free(vma->vm_mm, pgtable);
abort:
for (i = 0; i < HPAGE_PMD_NR; i++)
src[i] &= ~MIGRATE_PFN_MIGRATE;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 23c7298d3be2..d49c254174da 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1035,6 +1035,7 @@ static inline bool page_expected_state(struct page *page,
#ifdef CONFIG_MEMCG
page->memcg_data |
#endif
+ page_pool_page_is_pp(page) |
(page->flags.f & check_flags)))
return false;
@@ -1061,6 +1062,8 @@ static const char *page_bad_reason(struct page *page, unsigned long flags)
if (unlikely(page->memcg_data))
bad_reason = "page still charged to cgroup";
#endif
+ if (unlikely(page_pool_page_is_pp(page)))
+ bad_reason = "page_pool leak";
return bad_reason;
}
@@ -1377,17 +1380,9 @@ __always_inline bool __free_pages_prepare(struct page *page,
mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
folio->mapping = NULL;
}
- if (unlikely(page_has_type(page))) {
- /* networking expects to clear its page type before releasing */
- if (is_check_pages_enabled()) {
- if (unlikely(PageNetpp(page))) {
- bad_page(page, "page_pool leak");
- return false;
- }
- }
+ if (unlikely(page_has_type(page)))
/* Reset the page_type (which overlays _mapcount) */
page->page_type = UINT_MAX;
- }
if (is_check_pages_enabled()) {
if (free_page_is_bad(page))
diff --git a/mm/rmap.c b/mm/rmap.c
index 78b7fb5f367c..99e1b3dc390b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2030,6 +2030,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
+ nr_pages = 1;
+
/*
* If the folio is in an mlock()d vma, we must not swap it out.
*/
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c31a8615a832..bb6ae08d18f5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3203,7 +3203,7 @@ struct vm_struct *__get_vm_area_node(unsigned long size,
struct vm_struct *area;
unsigned long requested_size = size;
- BUG_ON(in_interrupt());
+ BUG_ON(in_nmi() || in_hardirq());
size = ALIGN(size, 1ul << shift);
if (unlikely(!size))
return NULL;
diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h
index 3e6fde8f1726..23175cb2bd86 100644
--- a/net/core/netmem_priv.h
+++ b/net/core/netmem_priv.h
@@ -8,18 +8,21 @@ static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
return netmem_to_nmdesc(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
}
-static inline bool netmem_is_pp(netmem_ref netmem)
+static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
+{
+ netmem_to_nmdesc(netmem)->pp_magic |= pp_magic;
+}
+
+static inline void netmem_clear_pp_magic(netmem_ref netmem)
{
- struct page *page;
+ WARN_ON_ONCE(netmem_to_nmdesc(netmem)->pp_magic & PP_DMA_INDEX_MASK);
- /* XXX: Now that the offset of page_type is shared between
- * struct page and net_iov, just cast the netmem to struct page
- * unconditionally by clearing NET_IOV if any, no matter whether
- * it comes from struct net_iov or struct page. This should be
- * adjusted once the offset is no longer shared.
- */
- page = (struct page *)((__force unsigned long)netmem & ~NET_IOV);
- return PageNetpp(page);
+ netmem_to_nmdesc(netmem)->pp_magic = 0;
+}
+
+static inline bool netmem_is_pp(netmem_ref netmem)
+{
+ return (netmem_get_pp_magic(netmem) & PP_MAGIC_MASK) == PP_SIGNATURE;
}
static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 6e576dec80db..8171d1173221 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -707,18 +707,8 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict)
void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
{
- struct page *page;
-
netmem_set_pp(netmem, pool);
-
- /* XXX: Now that the offset of page_type is shared between
- * struct page and net_iov, just cast the netmem to struct page
- * unconditionally by clearing NET_IOV if any, no matter whether
- * it comes from struct net_iov or struct page. This should be
- * adjusted once the offset is no longer shared.
- */
- page = (struct page *)((__force unsigned long)netmem & ~NET_IOV);
- __SetPageNetpp(page);
+ netmem_or_pp_magic(netmem, PP_SIGNATURE);
/* Ensuring all pages have been split into one fragment initially:
* page_pool_set_pp_info() is only called once for every page when it
@@ -733,17 +723,7 @@ void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
void page_pool_clear_pp_info(netmem_ref netmem)
{
- struct page *page;
-
- /* XXX: Now that the offset of page_type is shared between
- * struct page and net_iov, just cast the netmem to struct page
- * unconditionally by clearing NET_IOV if any, no matter whether
- * it comes from struct net_iov or struct page. This should be
- * adjusted once the offset is no longer shared.
- */
- page = (struct page *)((__force unsigned long)netmem & ~NET_IOV);
- __ClearPageNetpp(page);
-
+ netmem_clear_pp_magic(netmem);
netmem_set_pp(netmem, NULL);
}