diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2025-04-14 16:30:35 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-04-14 16:30:36 -0700 |
| commit | 63ce43f2d7da1f863f43fb1bcc9422466887dc6c (patch) | |
| tree | f45d18f63f5071e158df2f9cee8d6f1c7a4ca17a /include/linux | |
| parent | 452446f812867230cad0bfbb00afb77e3412bbf3 (diff) | |
| parent | ee62ce7a1d909ccba0399680a03c2dee83bcae95 (diff) | |
Merge branch 'fix-late-dma-unmap-crash-for-page-pool'
Toke Høiland-Jørgensen says:
====================
Fix late DMA unmap crash for page pool
This series fixes the late dma_unmap crash for page pool first reported
by Yonglong Liu in [0]. It is an alternative approach to the one
submitted by Yunsheng Lin, most recently in [1]. The first commit just
wraps some tests in a helper function, in preparation of the main change
in patch 2. See the commit message of patch 2 for the details.
[0] https://lore.kernel.org/8067f204-1380-4d37-8ffd-007fc6f26738@kernel.org
[1] https://lore.kernel.org/20250307092356.638242-1-linyunsheng@huawei.com
v8: https://lore.kernel.org/20250407-page-pool-track-dma-v8-0-da9500d4ba21@redhat.com
v7: https://lore.kernel.org/20250404-page-pool-track-dma-v7-0-ad34f069bc18@redhat.com
v6: https://lore.kernel.org/20250401-page-pool-track-dma-v6-0-8b83474870d4@redhat.com
v5: https://lore.kernel.org/20250328-page-pool-track-dma-v5-0-55002af683ad@redhat.com
v4: https://lore.kernel.org/20250327-page-pool-track-dma-v4-0-b380dc6706d0@redhat.com
v3: https://lore.kernel.org/20250326-page-pool-track-dma-v3-0-8e464016e0ac@redhat.com
v2: https://lore.kernel.org/20250325-page-pool-track-dma-v2-0-113ebc1946f3@redhat.com
v1: https://lore.kernel.org/20250314-page-pool-track-dma-v1-0-c212e57a74c2@redhat.com
====================
Link: https://patch.msgid.link/20250409-page-pool-track-dma-v9-0-6a9ef2e0cba8@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/mm.h | 58 | ||||
| -rw-r--r-- | include/linux/poison.h | 4 |
2 files changed, 62 insertions, 0 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index b7f13f087954..130d3c9d2ee4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4248,4 +4248,62 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); #define VM_SEALED_SYSMAP VM_NONE #endif +/* + * DMA mapping IDs for page_pool + * + * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and + * stashes it in the upper bits of page->pp_magic. We always want to be able to + * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP + * pages can have arbitrary kernel pointers stored in the same field as pp_magic + * (since it overlaps with page->lru.next), so we must ensure that we cannot + * mistake a valid kernel pointer with any of the values we write into this + * field. + * + * On architectures that set POISON_POINTER_DELTA, this is already ensured, + * since this value becomes part of PP_SIGNATURE; meaning we can just use the + * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the + * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is + * 0, we make sure that we leave the two topmost bits empty, as that guarantees + * we won't mistake a valid kernel pointer for a value we set, regardless of the + * VMSPLIT setting. + * + * Altogether, this means that the number of bits available is constrained by + * the size of an unsigned long (at the upper end, subtracting two bits per the + * above), and the definition of PP_SIGNATURE (with or without + * POISON_POINTER_DELTA). + */ +#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) +#if POISON_POINTER_DELTA > 0 +/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA + * index to not overlap with that if set + */ +#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) +#else +/* Always leave out the topmost two; see above. */ +#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2) +#endif + +#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ + PP_DMA_INDEX_SHIFT) + +/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is + * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for + * the head page of compound page and bit 1 for pfmemalloc page, as well as the + * bits used for the DMA index. page_is_pfmemalloc() is checked in + * __page_pool_put_page() to avoid recycling the pfmemalloc page. + */ +#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) + +#ifdef CONFIG_PAGE_POOL +static inline bool page_pool_page_is_pp(struct page *page) +{ + return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; +} +#else +static inline bool page_pool_page_is_pp(struct page *page) +{ + return false; +} +#endif + #endif /* _LINUX_MM_H */ diff --git a/include/linux/poison.h b/include/linux/poison.h index 331a9a996fa8..8ca2235f78d5 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -70,6 +70,10 @@ #define KEY_DESTROY 0xbd /********** net/core/page_pool.c **********/ +/* + * page_pool uses additional free bits within this value to store data, see the + * definition of PP_DMA_INDEX_MASK in mm.h + */ #define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) /********** net/core/skbuff.c **********/ |
