summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorDaiane Angolini <daiane.angolini@foundries.io>2022-11-08 10:49:49 -0300
committerDaiane Angolini <daiane.angolini@foundries.io>2022-11-08 10:49:56 -0300
commit5958c271a9cc7d8ae60c534d79ed5947f34117d7 (patch)
tree76109653ad61c1ab448037b84d5a384eacb42cb4 /mm
parente4a2946594228aec1c0e369d6544c7b971c1a5cc (diff)
parent17aac9b7af2bc5f7b4426603940e92ae8aa73d5d (diff)
Merge tag 'v5.15.73' into 5.15-2.1.x-imx
This is the 5.15.73 stable release Signed-off-by: Daiane Angolini <daiane.angolini@foundries.io>
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c34
-rw-r--r--mm/huge_memory.c13
-rw-r--r--mm/khugepaged.c10
3 files changed, 42 insertions, 15 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 05068d3d2557..1a23cd0b4fba 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2266,8 +2266,28 @@ static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
}
#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
-static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
- unsigned int flags, struct page **pages, int *nr)
+/*
+ * Fast-gup relies on pte change detection to avoid concurrent pgtable
+ * operations.
+ *
+ * To pin the page, fast-gup needs to do below in order:
+ * (1) pin the page (by prefetching pte), then (2) check pte not changed.
+ *
+ * For the rest of pgtable operations where pgtable updates can be racy
+ * with fast-gup, we need to do (1) clear pte, then (2) check whether page
+ * is pinned.
+ *
+ * Above will work for all pte-level operations, including THP split.
+ *
+ * For THP collapse, it's a bit more complicated because fast-gup may be
+ * walking a pgtable page that is being freed (pte is still valid but pmd
+ * can be cleared already). To avoid race in such condition, we need to
+ * also check pmd here to make sure pmd doesn't change (corresponds to
+ * pmdp_collapse_flush() in the THP collapse code path).
+ */
+static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
+ unsigned long end, unsigned int flags,
+ struct page **pages, int *nr)
{
struct dev_pagemap *pgmap = NULL;
int nr_start = *nr, ret = 0;
@@ -2312,7 +2332,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
goto pte_unmap;
}
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+ if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) ||
+ unlikely(pte_val(pte) != pte_val(*ptep))) {
put_compound_head(head, 1, flags);
goto pte_unmap;
}
@@ -2357,8 +2378,9 @@ pte_unmap:
* get_user_pages_fast_only implementation that can pin pages. Thus it's still
* useful to have gup_huge_pmd even if we can't operate on ptes.
*/
-static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
- unsigned int flags, struct page **pages, int *nr)
+static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
+ unsigned long end, unsigned int flags,
+ struct page **pages, int *nr)
{
return 0;
}
@@ -2667,7 +2689,7 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
PMD_SHIFT, next, flags, pages, nr))
return 0;
- } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
+ } else if (!gup_pte_range(pmd, pmdp, addr, next, flags, pages, nr))
return 0;
} while (pmdp++, addr = next, addr != end);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8cc150a88361..07941a1540cb 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2879,14 +2879,15 @@ static void split_huge_pages_all(void)
unsigned long total = 0, split = 0;
pr_debug("Split all THPs\n");
- for_each_populated_zone(zone) {
+ for_each_zone(zone) {
+ if (!managed_zone(zone))
+ continue;
max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
- if (!pfn_valid(pfn))
- continue;
+ int nr_pages;
- page = pfn_to_page(pfn);
- if (!get_page_unless_zero(page))
+ page = pfn_to_online_page(pfn);
+ if (!page || !get_page_unless_zero(page))
continue;
if (zone != page_zone(page))
@@ -2897,8 +2898,10 @@ static void split_huge_pages_all(void)
total++;
lock_page(page);
+ nr_pages = thp_nr_pages(page);
if (!split_huge_page(page))
split++;
+ pfn += nr_pages - 1;
unlock_page(page);
next:
put_page(page);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 8a8b3aa92937..dd069afd9cb9 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1146,10 +1146,12 @@ static void collapse_huge_page(struct mm_struct *mm,
pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
/*
- * After this gup_fast can't run anymore. This also removes
- * any huge TLB entry from the CPU so we won't allow
- * huge and small TLB entries for the same virtual address
- * to avoid the risk of CPU bugs in that area.
+ * This removes any huge TLB entry from the CPU so we won't allow
+ * huge and small TLB entries for the same virtual address to
+ * avoid the risk of CPU bugs in that area.
+ *
+ * Parallel fast GUP is fine since fast GUP will back off when
+ * it detects PMD is changed.
*/
_pmd = pmdp_collapse_flush(vma, address, pmd);
spin_unlock(pmd_ptl);