summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-04-14 16:30:35 -0700
committerJakub Kicinski <kuba@kernel.org>2025-04-14 16:30:36 -0700
commit63ce43f2d7da1f863f43fb1bcc9422466887dc6c (patch)
treef45d18f63f5071e158df2f9cee8d6f1c7a4ca17a /include/linux
parent452446f812867230cad0bfbb00afb77e3412bbf3 (diff)
parentee62ce7a1d909ccba0399680a03c2dee83bcae95 (diff)
Merge branch 'fix-late-dma-unmap-crash-for-page-pool'
Toke Høiland-Jørgensen says: ==================== Fix late DMA unmap crash for page pool This series fixes the late dma_unmap crash for page pool first reported by Yonglong Liu in [0]. It is an alternative approach to the one submitted by Yunsheng Lin, most recently in [1]. The first commit just wraps some tests in a helper function, in preparation of the main change in patch 2. See the commit message of patch 2 for the details. [0] https://lore.kernel.org/8067f204-1380-4d37-8ffd-007fc6f26738@kernel.org [1] https://lore.kernel.org/20250307092356.638242-1-linyunsheng@huawei.com v8: https://lore.kernel.org/20250407-page-pool-track-dma-v8-0-da9500d4ba21@redhat.com v7: https://lore.kernel.org/20250404-page-pool-track-dma-v7-0-ad34f069bc18@redhat.com v6: https://lore.kernel.org/20250401-page-pool-track-dma-v6-0-8b83474870d4@redhat.com v5: https://lore.kernel.org/20250328-page-pool-track-dma-v5-0-55002af683ad@redhat.com v4: https://lore.kernel.org/20250327-page-pool-track-dma-v4-0-b380dc6706d0@redhat.com v3: https://lore.kernel.org/20250326-page-pool-track-dma-v3-0-8e464016e0ac@redhat.com v2: https://lore.kernel.org/20250325-page-pool-track-dma-v2-0-113ebc1946f3@redhat.com v1: https://lore.kernel.org/20250314-page-pool-track-dma-v1-0-c212e57a74c2@redhat.com ==================== Link: https://patch.msgid.link/20250409-page-pool-track-dma-v9-0-6a9ef2e0cba8@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/mm.h58
-rw-r--r--include/linux/poison.h4
2 files changed, 62 insertions, 0 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b7f13f087954..130d3c9d2ee4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4248,4 +4248,62 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
#define VM_SEALED_SYSMAP VM_NONE
#endif
+/*
+ * DMA mapping IDs for page_pool
+ *
+ * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and
+ * stashes it in the upper bits of page->pp_magic. We always want to be able to
+ * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP
+ * pages can have arbitrary kernel pointers stored in the same field as pp_magic
+ * (since it overlaps with page->lru.next), so we must ensure that we cannot
+ * mistake a valid kernel pointer with any of the values we write into this
+ * field.
+ *
+ * On architectures that set POISON_POINTER_DELTA, this is already ensured,
+ * since this value becomes part of PP_SIGNATURE; meaning we can just use the
+ * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the
+ * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is
+ * 0, we make sure that we leave the two topmost bits empty, as that guarantees
+ * we won't mistake a valid kernel pointer for a value we set, regardless of the
+ * VMSPLIT setting.
+ *
+ * Altogether, this means that the number of bits available is constrained by
+ * the size of an unsigned long (at the upper end, subtracting two bits per the
+ * above), and the definition of PP_SIGNATURE (with or without
+ * POISON_POINTER_DELTA).
+ */
+#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA))
+#if POISON_POINTER_DELTA > 0
+/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA
+ * index to not overlap with that if set
+ */
+#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT)
+#else
+/* Always leave out the topmost two; see above. */
+#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2)
+#endif
+
+#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
+ PP_DMA_INDEX_SHIFT)
+
+/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is
+ * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
+ * the head page of compound page and bit 1 for pfmemalloc page, as well as the
+ * bits used for the DMA index. page_is_pfmemalloc() is checked in
+ * __page_pool_put_page() to avoid recycling the pfmemalloc page.
+ */
+#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL)
+
+#ifdef CONFIG_PAGE_POOL
+static inline bool page_pool_page_is_pp(struct page *page)
+{
+ return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE;
+}
+#else
+static inline bool page_pool_page_is_pp(struct page *page)
+{
+ return false;
+}
+#endif
+
#endif /* _LINUX_MM_H */
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 331a9a996fa8..8ca2235f78d5 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -70,6 +70,10 @@
#define KEY_DESTROY 0xbd
/********** net/core/page_pool.c **********/
+/*
+ * page_pool uses additional free bits within this value to store data, see the
+ * definition of PP_DMA_INDEX_MASK in mm.h
+ */
#define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA)
/********** net/core/skbuff.c **********/