diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/Kconfig | 7 | ||||
| -rw-r--r-- | mm/filemap.c | 27 | ||||
| -rw-r--r-- | mm/huge_memory.c | 28 | ||||
| -rw-r--r-- | mm/memblock.c | 3 | ||||
| -rw-r--r-- | mm/memfd.c | 27 | ||||
| -rw-r--r-- | mm/mempool.c | 32 | ||||
| -rw-r--r-- | mm/mmap_lock.c | 1 | ||||
| -rw-r--r-- | mm/page_alloc.c | 9 | ||||
| -rw-r--r-- | mm/shmem.c | 15 | ||||
| -rw-r--r-- | mm/slub.c | 8 | ||||
| -rw-r--r-- | mm/swap_state.c | 13 | ||||
| -rw-r--r-- | mm/swapfile.c | 4 |
12 files changed, 120 insertions, 54 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 0e26f4fc8717..ca3f146bc705 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -908,6 +908,13 @@ config PAGE_MAPCOUNT config PGTABLE_HAS_HUGE_LEAVES def_bool TRANSPARENT_HUGEPAGE || HUGETLB_PAGE +# +# We can end up creating gigantic folio. +# +config HAVE_GIGANTIC_FOLIOS + def_bool (HUGETLB_PAGE && ARCH_HAS_GIGANTIC_PAGE) || \ + (ZONE_DEVICE && HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) + # TODO: Allow to be enabled without THP config ARCH_SUPPORTS_HUGE_PFNMAP def_bool n diff --git a/mm/filemap.c b/mm/filemap.c index 2f1e7e283a51..024b71da5224 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3682,8 +3682,9 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, struct folio *folio, unsigned long start, unsigned long addr, unsigned int nr_pages, unsigned long *rss, unsigned short *mmap_miss, - bool can_map_large) + pgoff_t file_end) { + struct address_space *mapping = folio->mapping; unsigned int ref_from_caller = 1; vm_fault_t ret = 0; struct page *page = folio_page(folio, start); @@ -3692,12 +3693,16 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, unsigned long addr0; /* - * Map the large folio fully where possible. + * Map the large folio fully where possible: * - * The folio must not cross VMA or page table boundary. + * - The folio is fully within size of the file or belong + * to shmem/tmpfs; + * - The folio doesn't cross VMA boundary; + * - The folio doesn't cross page table boundary; */ addr0 = addr - start * PAGE_SIZE; - if (can_map_large && folio_within_vma(folio, vmf->vma) && + if ((file_end >= folio_next_index(folio) || shmem_mapping(mapping)) && + folio_within_vma(folio, vmf->vma) && (addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) { vmf->pte -= start; page -= start; @@ -3812,7 +3817,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, unsigned long rss = 0; unsigned int nr_pages = 0, folio_type; unsigned short mmap_miss = 0, mmap_miss_saved; - bool can_map_large; rcu_read_lock(); folio = next_uptodate_folio(&xas, mapping, end_pgoff); @@ -3823,16 +3827,14 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, end_pgoff = min(end_pgoff, file_end); /* - * Do not allow to map with PTEs beyond i_size and with PMD - * across i_size to preserve SIGBUS semantics. + * Do not allow to map with PMD across i_size to preserve + * SIGBUS semantics. * * Make an exception for shmem/tmpfs that for long time * intentionally mapped with PMDs across i_size. */ - can_map_large = shmem_mapping(mapping) || - file_end >= folio_next_index(folio); - - if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) { + if ((file_end >= folio_next_index(folio) || shmem_mapping(mapping)) && + filemap_map_pmd(vmf, folio, start_pgoff)) { ret = VM_FAULT_NOPAGE; goto out; } @@ -3861,8 +3863,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, else ret |= filemap_map_folio_range(vmf, folio, xas.xa_index - folio->index, addr, - nr_pages, &rss, &mmap_miss, - can_map_large); + nr_pages, &rss, &mmap_miss, file_end); folio_unlock(folio); } while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 323654fb4f8c..6cba1cb14b23 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3522,7 +3522,8 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, /* order-1 is not supported for anonymous THP. */ VM_WARN_ONCE(warns && new_order == 1, "Cannot split to order-1 folio"); - return new_order != 1; + if (new_order == 1) + return false; } else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !mapping_large_folio_support(folio->mapping)) { /* @@ -3553,7 +3554,8 @@ bool uniform_split_supported(struct folio *folio, unsigned int new_order, if (folio_test_anon(folio)) { VM_WARN_ONCE(warns && new_order == 1, "Cannot split to order-1 folio"); - return new_order != 1; + if (new_order == 1) + return false; } else if (new_order) { if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !mapping_large_folio_support(folio->mapping)) { @@ -3617,6 +3619,16 @@ static int __folio_split(struct folio *folio, unsigned int new_order, if (folio != page_folio(split_at) || folio != page_folio(lock_at)) return -EINVAL; + /* + * Folios that just got truncated cannot get split. Signal to the + * caller that there was a race. + * + * TODO: this will also currently refuse shmem folios that are in the + * swapcache. + */ + if (!is_anon && !folio->mapping) + return -EBUSY; + if (new_order >= folio_order(folio)) return -EINVAL; @@ -3657,18 +3669,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order, gfp_t gfp; mapping = folio->mapping; - - /* Truncated ? */ - /* - * TODO: add support for large shmem folio in swap cache. - * When shmem is in swap cache, mapping is NULL and - * folio_test_swapcache() is true. - */ - if (!mapping) { - ret = -EBUSY; - goto out; - } - min_order = mapping_min_folio_order(folio->mapping); if (new_order < min_order) { ret = -EINVAL; diff --git a/mm/memblock.c b/mm/memblock.c index e23e16618e9b..f0f2dc66e9a2 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1826,7 +1826,8 @@ phys_addr_t __init_memblock memblock_reserved_kern_size(phys_addr_t limit, int n */ unsigned long __init memblock_estimated_nr_free_pages(void) { - return PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size()); + return PHYS_PFN(memblock_phys_mem_size() - + memblock_reserved_kern_size(MEMBLOCK_ALLOC_ANYWHERE, NUMA_NO_NODE)); } /* lowest address */ diff --git a/mm/memfd.c b/mm/memfd.c index 1d109c1acf21..a405eaa451ee 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -96,9 +96,36 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) NULL, gfp_mask); if (folio) { + u32 hash; + + /* + * Zero the folio to prevent information leaks to userspace. + * Use folio_zero_user() which is optimized for huge/gigantic + * pages. Pass 0 as addr_hint since this is not a faulting path + * and we don't have a user virtual address yet. + */ + folio_zero_user(folio, 0); + + /* + * Mark the folio uptodate before adding to page cache, + * as required by filemap.c and other hugetlb paths. + */ + __folio_mark_uptodate(folio); + + /* + * Serialize hugepage allocation and instantiation to prevent + * races with concurrent allocations, as required by all other + * callers of hugetlb_add_to_page_cache(). + */ + hash = hugetlb_fault_mutex_hash(memfd->f_mapping, idx); + mutex_lock(&hugetlb_fault_mutex_table[hash]); + err = hugetlb_add_to_page_cache(folio, memfd->f_mapping, idx); + + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + if (err) { folio_put(folio); goto err_unresv; diff --git a/mm/mempool.c b/mm/mempool.c index 1c38e873e546..d7bbf1189db9 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -68,10 +68,20 @@ static void check_element(mempool_t *pool, void *element) } else if (pool->free == mempool_free_pages) { /* Mempools backed by page allocator */ int order = (int)(long)pool->pool_data; - void *addr = kmap_local_page((struct page *)element); - __check_element(pool, addr, 1UL << (PAGE_SHIFT + order)); - kunmap_local(addr); +#ifdef CONFIG_HIGHMEM + for (int i = 0; i < (1 << order); i++) { + struct page *page = (struct page *)element; + void *addr = kmap_local_page(page + i); + + __check_element(pool, addr, PAGE_SIZE); + kunmap_local(addr); + } +#else + void *addr = page_address((struct page *)element); + + __check_element(pool, addr, PAGE_SIZE << order); +#endif } } @@ -97,10 +107,20 @@ static void poison_element(mempool_t *pool, void *element) } else if (pool->alloc == mempool_alloc_pages) { /* Mempools backed by page allocator */ int order = (int)(long)pool->pool_data; - void *addr = kmap_local_page((struct page *)element); - __poison_element(addr, 1UL << (PAGE_SHIFT + order)); - kunmap_local(addr); +#ifdef CONFIG_HIGHMEM + for (int i = 0; i < (1 << order); i++) { + struct page *page = (struct page *)element; + void *addr = kmap_local_page(page + i); + + __poison_element(addr, PAGE_SIZE); + kunmap_local(addr); + } +#else + void *addr = page_address((struct page *)element); + + __poison_element(addr, PAGE_SIZE << order); +#endif } } #else /* CONFIG_SLUB_DEBUG_ON */ diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c index 0a0db5849b8e..42e3dde73e74 100644 --- a/mm/mmap_lock.c +++ b/mm/mmap_lock.c @@ -241,6 +241,7 @@ retry: if (PTR_ERR(vma) == -EAGAIN) { count_vm_vma_lock_event(VMA_LOCK_MISS); /* The area was replaced with another one */ + mas_set(&mas, address); goto retry; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 600d9e981c23..ed82ee55e66a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1822,14 +1822,9 @@ inline void post_alloc_hook(struct page *page, unsigned int order, * If memory tags should be zeroed * (which happens only when memory should be initialized as well). */ - if (zero_tags) { - /* Initialize both memory and memory tags. */ - for (i = 0; i != 1 << order; ++i) - tag_clear_highpage(page + i); + if (zero_tags) + init = !tag_clear_highpages(page, 1 << order); - /* Take note that memory was initialized by the loop above. */ - init = false; - } if (!should_skip_kasan_unpoison(gfp_flags) && kasan_unpoison_pages(page, order, init)) { /* Take note that memory was initialized by KASAN. */ diff --git a/mm/shmem.c b/mm/shmem.c index 58701d14dd96..5a3f0f754dc0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -131,8 +131,7 @@ struct shmem_options { #define SHMEM_SEEN_INODES 2 #define SHMEM_SEEN_HUGE 4 #define SHMEM_SEEN_INUMS 8 -#define SHMEM_SEEN_NOSWAP 16 -#define SHMEM_SEEN_QUOTA 32 +#define SHMEM_SEEN_QUOTA 16 }; #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -4680,7 +4679,6 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) "Turning off swap in unprivileged tmpfs mounts unsupported"); } ctx->noswap = true; - ctx->seen |= SHMEM_SEEN_NOSWAP; break; case Opt_quota: if (fc->user_ns != &init_user_ns) @@ -4830,14 +4828,15 @@ static int shmem_reconfigure(struct fs_context *fc) err = "Current inum too high to switch to 32-bit inums"; goto out; } - if ((ctx->seen & SHMEM_SEEN_NOSWAP) && ctx->noswap && !sbinfo->noswap) { + + /* + * "noswap" doesn't use fsparam_flag_no, i.e. there's no "swap" + * counterpart for (re-)enabling swap. + */ + if (ctx->noswap && !sbinfo->noswap) { err = "Cannot disable swap on remount"; goto out; } - if (!(ctx->seen & SHMEM_SEEN_NOSWAP) && !ctx->noswap && sbinfo->noswap) { - err = "Cannot enable swap on remount if it was disabled on first mount"; - goto out; - } if (ctx->seen & SHMEM_SEEN_QUOTA && !sb_any_quota_loaded(fc->root->d_sb)) { diff --git a/mm/slub.c b/mm/slub.c index 1bf65c421325..a0b905c2a557 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6336,8 +6336,6 @@ next_remote_batch: if (unlikely(!slab_free_hook(s, p[i], init, false))) { p[i] = p[--size]; - if (!size) - goto flush_remote; continue; } @@ -6352,6 +6350,9 @@ next_remote_batch: i++; } + if (!size) + goto flush_remote; + next_batch: if (!local_trylock(&s->cpu_sheaves->lock)) goto fallback; @@ -6406,6 +6407,9 @@ do_free: goto next_batch; } + if (remote_nr) + goto flush_remote; + return; no_empty: diff --git a/mm/swap_state.c b/mm/swap_state.c index b13e9c4baa90..f4980dde5394 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -748,6 +748,8 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, blk_start_plug(&plug); for (addr = start; addr < end; ilx++, addr += PAGE_SIZE) { + struct swap_info_struct *si = NULL; + if (!pte++) { pte = pte_offset_map(vmf->pmd, addr); if (!pte) @@ -761,8 +763,19 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, continue; pte_unmap(pte); pte = NULL; + /* + * Readahead entry may come from a device that we are not + * holding a reference to, try to grab a reference, or skip. + */ + if (swp_type(entry) != swp_type(targ_entry)) { + si = get_swap_device(entry); + if (!si) + continue; + } folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx, &page_allocated, false); + if (si) + put_swap_device(si); if (!folio) continue; if (page_allocated) { diff --git a/mm/swapfile.c b/mm/swapfile.c index 10760240a3a2..a1b4b9d80e3b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2005,10 +2005,8 @@ swp_entry_t get_swap_page_of_type(int type) local_lock(&percpu_swap_cluster.lock); offset = cluster_alloc_swap_entry(si, 0, 1); local_unlock(&percpu_swap_cluster.lock); - if (offset) { + if (offset) entry = swp_entry(si->type, offset); - atomic_long_dec(&nr_swap_pages); - } } put_swap_device(si); } |
