diff options
| author | Balbir Singh <balbirs@nvidia.com> | 2025-10-01 16:56:59 +1000 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2025-11-24 15:08:48 -0800 |
| commit | 4964099163d0524a769d039ffa886bb4515136d0 (patch) | |
| tree | fb5a0526beaf45c2d857f8b23f1e411d00da36ce | |
| parent | a30b48bf1b244f11bf9b6d20cdccfe0c2264130c (diff) | |
mm/memory/fault: add THP fault handling for zone device private pages
Implement CPU fault handling for zone device THP entries through
do_huge_pmd_device_private(), enabling transparent migration of
device-private large pages back to system memory on CPU access.
When the CPU accesses a zone device THP entry, the fault handler calls the
device driver's migrate_to_ram() callback to migrate the entire large page
back to system memory.
Link: https://lkml.kernel.org/r/20251001065707.920170-9-balbirs@nvidia.com
Signed-off-by: Balbir Singh <balbirs@nvidia.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Ying Huang <ying.huang@linux.alibaba.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mika Penttilä <mpenttil@redhat.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
| -rw-r--r-- | include/linux/huge_mm.h | 7 | ||||
| -rw-r--r-- | mm/huge_memory.c | 38 | ||||
| -rw-r--r-- | mm/memory.c | 5 |
3 files changed, 48 insertions, 2 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index fee4cf7fa300..82408c90b396 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -481,6 +481,8 @@ static inline bool folio_test_pmd_mappable(struct folio *folio) vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); +vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf); + extern struct folio *huge_zero_folio; extern unsigned long huge_zero_pfn; @@ -662,6 +664,11 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) return 0; } +static inline vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf) +{ + return 0; +} + static inline bool is_huge_zero_folio(const struct folio *folio) { return false; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 23db562cde07..ded707a50af8 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1288,6 +1288,44 @@ release: } +vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + vm_fault_t ret = 0; + spinlock_t *ptl; + swp_entry_t swp_entry; + struct page *page; + struct folio *folio; + + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + + ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd))) { + spin_unlock(ptl); + return 0; + } + + swp_entry = pmd_to_swp_entry(vmf->orig_pmd); + page = pfn_swap_entry_to_page(swp_entry); + folio = page_folio(page); + vmf->page = page; + vmf->pte = NULL; + if (folio_trylock(folio)) { + folio_get(folio); + spin_unlock(ptl); + ret = page_pgmap(page)->ops->migrate_to_ram(vmf); + folio_unlock(folio); + folio_put(folio); + } else { + spin_unlock(ptl); + } + + return ret; +} + /* * always: directly stall for all thp allocations * defer: wake kswapd and fail if not immediately available diff --git a/mm/memory.c b/mm/memory.c index 27bc457b32c2..732414852570 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6345,8 +6345,9 @@ retry_pud: vmf.orig_pmd = pmdp_get_lockless(vmf.pmd); if (unlikely(is_swap_pmd(vmf.orig_pmd))) { - VM_BUG_ON(thp_migration_supported() && - !is_pmd_migration_entry(vmf.orig_pmd)); + if (is_pmd_device_private_entry(vmf.orig_pmd)) + return do_huge_pmd_device_private(&vmf); + if (is_pmd_migration_entry(vmf.orig_pmd)) pmd_migration_entry_wait(mm, vmf.pmd); return 0; |
