diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/damon/core.c | 41 | ||||
| -rw-r--r-- | mm/damon/sysfs-schemes.c | 10 | ||||
| -rw-r--r-- | mm/damon/sysfs.c | 9 | ||||
| -rw-r--r-- | mm/damon/vaddr.c | 2 | ||||
| -rw-r--r-- | mm/hugetlb.c | 16 | ||||
| -rw-r--r-- | mm/kasan/common.c | 32 | ||||
| -rw-r--r-- | mm/kasan/hw_tags.c | 2 | ||||
| -rw-r--r-- | mm/kasan/shadow.c | 4 | ||||
| -rw-r--r-- | mm/kmsan/shadow.c | 2 | ||||
| -rw-r--r-- | mm/ksm.c | 2 | ||||
| -rw-r--r-- | mm/memcontrol.c | 4 | ||||
| -rw-r--r-- | mm/memory-failure.c | 29 | ||||
| -rw-r--r-- | mm/memremap.c | 2 | ||||
| -rw-r--r-- | mm/numa_memblks.c | 2 | ||||
| -rw-r--r-- | mm/page_alloc.c | 83 | ||||
| -rw-r--r-- | mm/page_owner.c | 2 | ||||
| -rw-r--r-- | mm/shmem.c | 32 | ||||
| -rw-r--r-- | mm/slub.c | 2 | ||||
| -rw-r--r-- | mm/vma.c | 111 | ||||
| -rw-r--r-- | mm/vma.h | 3 | ||||
| -rw-r--r-- | mm/vmalloc.c | 10 | ||||
| -rw-r--r-- | mm/zswap.c | 2 |
22 files changed, 287 insertions, 115 deletions
diff --git a/mm/damon/core.c b/mm/damon/core.c index f9fc0375890a..84f80a20f233 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1431,6 +1431,35 @@ bool damon_is_running(struct damon_ctx *ctx) return running; } +/* + * damon_call_handle_inactive_ctx() - handle DAMON call request that added to + * an inactive context. + * @ctx: The inactive DAMON context. + * @control: Control variable of the call request. + * + * This function is called in a case that @control is added to @ctx but @ctx is + * not running (inactive). See if @ctx handled @control or not, and cleanup + * @control if it was not handled. + * + * Returns 0 if @control was handled by @ctx, negative error code otherwise. + */ +static int damon_call_handle_inactive_ctx( + struct damon_ctx *ctx, struct damon_call_control *control) +{ + struct damon_call_control *c; + + mutex_lock(&ctx->call_controls_lock); + list_for_each_entry(c, &ctx->call_controls, list) { + if (c == control) { + list_del(&control->list); + mutex_unlock(&ctx->call_controls_lock); + return -EINVAL; + } + } + mutex_unlock(&ctx->call_controls_lock); + return 0; +} + /** * damon_call() - Invoke a given function on DAMON worker thread (kdamond). * @ctx: DAMON context to call the function for. @@ -1461,7 +1490,7 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control) list_add_tail(&control->list, &ctx->call_controls); mutex_unlock(&ctx->call_controls_lock); if (!damon_is_running(ctx)) - return -EINVAL; + return damon_call_handle_inactive_ctx(ctx, control); if (control->repeat) return 0; wait_for_completion(&control->completion); @@ -2051,13 +2080,15 @@ static unsigned long damos_get_node_memcg_used_bp( rcu_read_lock(); memcg = mem_cgroup_from_id(goal->memcg_id); - rcu_read_unlock(); - if (!memcg) { + if (!memcg || !mem_cgroup_tryget(memcg)) { + rcu_read_unlock(); if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP) return 0; else /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */ return 10000; } + rcu_read_unlock(); + mem_cgroup_flush_stats(memcg); lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(goal->nid)); used_pages = lruvec_page_state(lruvec, NR_ACTIVE_ANON); @@ -2065,6 +2096,8 @@ static unsigned long damos_get_node_memcg_used_bp( used_pages += lruvec_page_state(lruvec, NR_ACTIVE_FILE); used_pages += lruvec_page_state(lruvec, NR_INACTIVE_FILE); + mem_cgroup_put(memcg); + si_meminfo_node(&i, goal->nid); if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP) numerator = used_pages; @@ -2751,13 +2784,13 @@ done: if (ctx->ops.cleanup) ctx->ops.cleanup(ctx); kfree(ctx->regions_score_histogram); + kdamond_call(ctx, true); pr_debug("kdamond (%d) finishes\n", current->pid); mutex_lock(&ctx->kdamond_lock); ctx->kdamond = NULL; mutex_unlock(&ctx->kdamond_lock); - kdamond_call(ctx, true); damos_walk_cancel(ctx); mutex_lock(&damon_lock); diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 30d20f5b3192..3a699dcd5a7f 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -2152,13 +2152,13 @@ static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme) return err; err = damos_sysfs_set_dests(scheme); if (err) - goto put_access_pattern_out; + goto rmdir_put_access_pattern_out; err = damon_sysfs_scheme_set_quotas(scheme); if (err) goto put_dests_out; err = damon_sysfs_scheme_set_watermarks(scheme); if (err) - goto put_quotas_access_pattern_out; + goto rmdir_put_quotas_access_pattern_out; err = damos_sysfs_set_filter_dirs(scheme); if (err) goto put_watermarks_quotas_access_pattern_out; @@ -2183,13 +2183,15 @@ put_filters_watermarks_quotas_access_pattern_out: put_watermarks_quotas_access_pattern_out: kobject_put(&scheme->watermarks->kobj); scheme->watermarks = NULL; -put_quotas_access_pattern_out: +rmdir_put_quotas_access_pattern_out: + damon_sysfs_quotas_rm_dirs(scheme->quotas); kobject_put(&scheme->quotas->kobj); scheme->quotas = NULL; put_dests_out: kobject_put(&scheme->dests->kobj); scheme->dests = NULL; -put_access_pattern_out: +rmdir_put_access_pattern_out: + damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern); kobject_put(&scheme->access_pattern->kobj); scheme->access_pattern = NULL; return err; diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index e2bd2d7becdd..95fd9375a7d8 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -792,7 +792,7 @@ static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs) nr_regions_range = damon_sysfs_ul_range_alloc(10, 1000); if (!nr_regions_range) { err = -ENOMEM; - goto put_intervals_out; + goto rmdir_put_intervals_out; } err = kobject_init_and_add(&nr_regions_range->kobj, @@ -806,6 +806,8 @@ static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs) put_nr_regions_intervals_out: kobject_put(&nr_regions_range->kobj); attrs->nr_regions_range = NULL; +rmdir_put_intervals_out: + damon_sysfs_intervals_rm_dirs(intervals); put_intervals_out: kobject_put(&intervals->kobj); attrs->intervals = NULL; @@ -948,7 +950,7 @@ static int damon_sysfs_context_add_dirs(struct damon_sysfs_context *context) err = damon_sysfs_context_set_targets(context); if (err) - goto put_attrs_out; + goto rmdir_put_attrs_out; err = damon_sysfs_context_set_schemes(context); if (err) @@ -958,7 +960,8 @@ static int damon_sysfs_context_add_dirs(struct damon_sysfs_context *context) put_targets_attrs_out: kobject_put(&context->targets->kobj); context->targets = NULL; -put_attrs_out: +rmdir_put_attrs_out: + damon_sysfs_attrs_rm_dirs(context->attrs); kobject_put(&context->attrs->kobj); context->attrs = NULL; return err; diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 2750c88e7225..23ed738a0bd6 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -743,7 +743,7 @@ huge_out: if (!folio) continue; if (damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL)) - return 0; + continue; damos_va_migrate_dests_add(folio, walk->vma, addr, dests, migration_lists); nr = folio_nr_pages(folio); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 51273baec9e5..e0ab14020513 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4286,6 +4286,11 @@ static int __init hugepages_setup(char *s) unsigned long tmp; char *p = s; + if (!hugepages_supported()) { + pr_warn("HugeTLB: hugepages unsupported, ignoring hugepages=%s cmdline\n", s); + return 0; + } + if (!parsed_valid_hugepagesz) { pr_warn("HugeTLB: hugepages=%s does not follow a valid hugepagesz, ignoring\n", s); parsed_valid_hugepagesz = true; @@ -4366,6 +4371,11 @@ static int __init hugepagesz_setup(char *s) unsigned long size; struct hstate *h; + if (!hugepages_supported()) { + pr_warn("HugeTLB: hugepages unsupported, ignoring hugepagesz=%s cmdline\n", s); + return 0; + } + parsed_valid_hugepagesz = false; size = (unsigned long)memparse(s, NULL); @@ -4414,6 +4424,12 @@ static int __init default_hugepagesz_setup(char *s) unsigned long size; int i; + if (!hugepages_supported()) { + pr_warn("HugeTLB: hugepages unsupported, ignoring default_hugepagesz=%s cmdline\n", + s); + return 0; + } + parsed_valid_hugepagesz = false; if (parsed_default_hugepagesz) { pr_err("HugeTLB: default_hugepagesz previously specified, ignoring %s\n", s); diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 1d27f1bd260b..ed489a14dddf 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -28,6 +28,7 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/bug.h> +#include <linux/vmalloc.h> #include "kasan.h" #include "../slab.h" @@ -575,3 +576,34 @@ bool __kasan_check_byte(const void *address, unsigned long ip) } return true; } + +#ifdef CONFIG_KASAN_VMALLOC +void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, + kasan_vmalloc_flags_t flags) +{ + unsigned long size; + void *addr; + int area; + u8 tag; + + /* + * If KASAN_VMALLOC_KEEP_TAG was set at this point, all vms[] pointers + * would be unpoisoned with the KASAN_TAG_KERNEL which would disable + * KASAN checks down the line. + */ + if (WARN_ON_ONCE(flags & KASAN_VMALLOC_KEEP_TAG)) + return; + + size = vms[0]->size; + addr = vms[0]->addr; + vms[0]->addr = __kasan_unpoison_vmalloc(addr, size, flags); + tag = get_tag(vms[0]->addr); + + for (area = 1 ; area < nr_vms ; area++) { + size = vms[area]->size; + addr = set_tag(vms[area]->addr, tag); + vms[area]->addr = + __kasan_unpoison_vmalloc(addr, size, flags | KASAN_VMALLOC_KEEP_TAG); + } +} +#endif diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 1c373cc4b3fa..cbef5e450954 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -361,7 +361,7 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, return (void *)start; } - tag = kasan_random_tag(); + tag = (flags & KASAN_VMALLOC_KEEP_TAG) ? get_tag(start) : kasan_random_tag(); start = set_tag(start, tag); /* Unpoison and initialize memory up to size. */ diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 29a751a8a08d..32fbdf759ea2 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -631,7 +631,9 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, !(flags & KASAN_VMALLOC_PROT_NORMAL)) return (void *)start; - start = set_tag(start, kasan_random_tag()); + if (unlikely(!(flags & KASAN_VMALLOC_KEEP_TAG))) + start = set_tag(start, kasan_random_tag()); + kasan_unpoison(start, size, false); return (void *)start; } diff --git a/mm/kmsan/shadow.c b/mm/kmsan/shadow.c index e7f554a31bb4..9e1c5f2b7a41 100644 --- a/mm/kmsan/shadow.c +++ b/mm/kmsan/shadow.c @@ -207,7 +207,7 @@ void kmsan_free_page(struct page *page, unsigned int order) if (!kmsan_enabled || kmsan_in_runtime()) return; kmsan_enter_runtime(); - kmsan_internal_poison_memory(page_address(page), page_size(page), + kmsan_internal_poison_memory(page_address(page), PAGE_SIZE << order, GFP_KERNEL & ~(__GFP_RECLAIM), KMSAN_POISON_CHECK | KMSAN_POISON_FREE); kmsan_leave_runtime(); @@ -650,7 +650,7 @@ static int break_ksm_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long en } } out_unlock: - pte_unmap_unlock(ptep, ptl); + pte_unmap_unlock(start_ptep, ptl); return found; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index be810c1fbfc3..86f43b7e5f71 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5638,6 +5638,6 @@ void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg) memcg = root_mem_cgroup; pr_warn("Memory cgroup min protection %lukB -- low protection %lukB", - K(atomic_long_read(&memcg->memory.children_min_usage)*PAGE_SIZE), - K(atomic_long_read(&memcg->memory.children_low_usage)*PAGE_SIZE)); + K(atomic_long_read(&memcg->memory.children_min_usage)), + K(atomic_long_read(&memcg->memory.children_low_usage))); } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index fbc5a01260c8..c80c2907da33 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2161,6 +2161,9 @@ int register_pfn_address_space(struct pfn_address_space *pfn_space) { guard(mutex)(&pfn_space_lock); + if (!pfn_space->pfn_to_vma_pgoff) + return -EINVAL; + if (interval_tree_iter_first(&pfn_space_itree, pfn_space->node.start, pfn_space->node.last)) @@ -2183,10 +2186,10 @@ void unregister_pfn_address_space(struct pfn_address_space *pfn_space) } EXPORT_SYMBOL_GPL(unregister_pfn_address_space); -static void add_to_kill_pfn(struct task_struct *tsk, - struct vm_area_struct *vma, - struct list_head *to_kill, - unsigned long pfn) +static void add_to_kill_pgoff(struct task_struct *tsk, + struct vm_area_struct *vma, + struct list_head *to_kill, + pgoff_t pgoff) { struct to_kill *tk; @@ -2197,12 +2200,12 @@ static void add_to_kill_pfn(struct task_struct *tsk, } /* Check for pgoff not backed by struct page */ - tk->addr = vma_address(vma, pfn, 1); + tk->addr = vma_address(vma, pgoff, 1); tk->size_shift = PAGE_SHIFT; if (tk->addr == -EFAULT) pr_info("Unable to find address %lx in %s\n", - pfn, tsk->comm); + pgoff, tsk->comm); get_task_struct(tsk); tk->tsk = tsk; @@ -2212,11 +2215,12 @@ static void add_to_kill_pfn(struct task_struct *tsk, /* * Collect processes when the error hit a PFN not backed by struct page. */ -static void collect_procs_pfn(struct address_space *mapping, +static void collect_procs_pfn(struct pfn_address_space *pfn_space, unsigned long pfn, struct list_head *to_kill) { struct vm_area_struct *vma; struct task_struct *tsk; + struct address_space *mapping = pfn_space->mapping; i_mmap_lock_read(mapping); rcu_read_lock(); @@ -2226,9 +2230,12 @@ static void collect_procs_pfn(struct address_space *mapping, t = task_early_kill(tsk, true); if (!t) continue; - vma_interval_tree_foreach(vma, &mapping->i_mmap, pfn, pfn) { - if (vma->vm_mm == t->mm) - add_to_kill_pfn(t, vma, to_kill, pfn); + vma_interval_tree_foreach(vma, &mapping->i_mmap, 0, ULONG_MAX) { + pgoff_t pgoff; + + if (vma->vm_mm == t->mm && + !pfn_space->pfn_to_vma_pgoff(vma, pfn, &pgoff)) + add_to_kill_pgoff(t, vma, to_kill, pgoff); } } rcu_read_unlock(); @@ -2264,7 +2271,7 @@ static int memory_failure_pfn(unsigned long pfn, int flags) struct pfn_address_space *pfn_space = container_of(node, struct pfn_address_space, node); - collect_procs_pfn(pfn_space->mapping, pfn, &tokill); + collect_procs_pfn(pfn_space, pfn, &tokill); mf_handled = true; } diff --git a/mm/memremap.c b/mm/memremap.c index 4c2e0d68eb27..63c6ab4fdf08 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -427,8 +427,6 @@ void free_zone_device_folio(struct folio *folio) if (folio_test_anon(folio)) { for (i = 0; i < nr; i++) __ClearPageAnonExclusive(folio_page(folio, i)); - } else { - VM_WARN_ON_ONCE(folio_test_large(folio)); } /* diff --git a/mm/numa_memblks.c b/mm/numa_memblks.c index 5b009a9cd8b4..8f5735fda0a2 100644 --- a/mm/numa_memblks.c +++ b/mm/numa_memblks.c @@ -7,6 +7,8 @@ #include <linux/numa.h> #include <linux/numa_memblks.h> +#include <asm/numa.h> + int numa_distance_cnt; static u8 *numa_distance; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 822e05f1a964..f65c4edf199d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -167,6 +167,33 @@ static inline void __pcp_trylock_noop(unsigned long *flags) { } pcp_trylock_finish(UP_flags); \ }) +/* + * With the UP spinlock implementation, when we spin_lock(&pcp->lock) (for i.e. + * a potentially remote cpu drain) and get interrupted by an operation that + * attempts pcp_spin_trylock(), we can't rely on the trylock failure due to UP + * spinlock assumptions making the trylock a no-op. So we have to turn that + * spin_lock() to a spin_lock_irqsave(). This works because on UP there are no + * remote cpu's so we can only be locking the only existing local one. + */ +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) +static inline void __flags_noop(unsigned long *flags) { } +#define pcp_spin_lock_maybe_irqsave(ptr, flags) \ +({ \ + __flags_noop(&(flags)); \ + spin_lock(&(ptr)->lock); \ +}) +#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \ +({ \ + spin_unlock(&(ptr)->lock); \ + __flags_noop(&(flags)); \ +}) +#else +#define pcp_spin_lock_maybe_irqsave(ptr, flags) \ + spin_lock_irqsave(&(ptr)->lock, flags) +#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \ + spin_unlock_irqrestore(&(ptr)->lock, flags) +#endif + #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DEFINE_PER_CPU(int, numa_node); EXPORT_PER_CPU_SYMBOL(numa_node); @@ -914,6 +941,17 @@ buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn, NULL) != NULL; } +static void change_pageblock_range(struct page *pageblock_page, + int start_order, int migratetype) +{ + int nr_pageblocks = 1 << (start_order - pageblock_order); + + while (nr_pageblocks--) { + set_pageblock_migratetype(pageblock_page, migratetype); + pageblock_page += pageblock_nr_pages; + } +} + /* * Freeing function for a buddy system allocator. * @@ -1000,7 +1038,7 @@ static inline void __free_one_page(struct page *page, * expand() down the line puts the sub-blocks * on the right freelists. */ - set_pageblock_migratetype(buddy, migratetype); + change_pageblock_range(buddy, order, migratetype); } combined_pfn = buddy_pfn & pfn; @@ -2147,17 +2185,6 @@ bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *pag #endif /* CONFIG_MEMORY_ISOLATION */ -static void change_pageblock_range(struct page *pageblock_page, - int start_order, int migratetype) -{ - int nr_pageblocks = 1 << (start_order - pageblock_order); - - while (nr_pageblocks--) { - set_pageblock_migratetype(pageblock_page, migratetype); - pageblock_page += pageblock_nr_pages; - } -} - static inline bool boost_watermark(struct zone *zone) { unsigned long max_boost; @@ -2556,6 +2583,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) { int high_min, to_drain, to_drain_batched, batch; + unsigned long UP_flags; bool todo = false; high_min = READ_ONCE(pcp->high_min); @@ -2575,9 +2603,9 @@ bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) to_drain = pcp->count - pcp->high; while (to_drain > 0) { to_drain_batched = min(to_drain, batch); - spin_lock(&pcp->lock); + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); free_pcppages_bulk(zone, to_drain_batched, pcp, 0); - spin_unlock(&pcp->lock); + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); todo = true; to_drain -= to_drain_batched; @@ -2594,14 +2622,15 @@ bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) */ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) { + unsigned long UP_flags; int to_drain, batch; batch = READ_ONCE(pcp->batch); to_drain = min(pcp->count, batch); if (to_drain > 0) { - spin_lock(&pcp->lock); + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); free_pcppages_bulk(zone, to_drain, pcp, 0); - spin_unlock(&pcp->lock); + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); } } #endif @@ -2612,10 +2641,11 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) static void drain_pages_zone(unsigned int cpu, struct zone *zone) { struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); + unsigned long UP_flags; int count; do { - spin_lock(&pcp->lock); + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); count = pcp->count; if (count) { int to_drain = min(count, @@ -2624,7 +2654,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) free_pcppages_bulk(zone, to_drain, pcp, 0); count -= to_drain; } - spin_unlock(&pcp->lock); + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); } while (count); } @@ -5924,7 +5954,7 @@ static int zone_batchsize(struct zone *zone) * recycled, this leads to the once large chunks of space being * fragmented and becoming unavailable for high-order allocations. */ - return 0; + return 1; #endif } @@ -6109,6 +6139,7 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu) { struct per_cpu_pages *pcp; struct cpu_cacheinfo *cci; + unsigned long UP_flags; pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); cci = get_cpu_cacheinfo(cpu); @@ -6119,12 +6150,12 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu) * This can reduce zone lock contention without hurting * cache-hot pages sharing. */ - spin_lock(&pcp->lock); + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch) pcp->flags |= PCPF_FREE_HIGH_BATCH; else pcp->flags &= ~PCPF_FREE_HIGH_BATCH; - spin_unlock(&pcp->lock); + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); } void setup_pcp_cacheinfo(unsigned int cpu) @@ -6667,11 +6698,19 @@ static int percpu_pagelist_high_fraction_sysctl_handler(const struct ctl_table * int old_percpu_pagelist_high_fraction; int ret; + /* + * Avoid using pcp_batch_high_lock for reads as the value is read + * atomically and a race with offlining is harmless. + */ + + if (!write) + return proc_dointvec_minmax(table, write, buffer, length, ppos); + mutex_lock(&pcp_batch_high_lock); old_percpu_pagelist_high_fraction = percpu_pagelist_high_fraction; ret = proc_dointvec_minmax(table, write, buffer, length, ppos); - if (!write || ret < 0) + if (ret < 0) goto out; /* Sanity checking to avoid pcp imbalance */ diff --git a/mm/page_owner.c b/mm/page_owner.c index a70245684206..b3260f0c17ba 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -952,7 +952,7 @@ static const struct file_operations page_owner_stack_fops = { .open = page_owner_stack_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; static int page_owner_threshold_get(void *data, u64 *val) diff --git a/mm/shmem.c b/mm/shmem.c index b329b5302c48..ec6c01378e9d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4019,22 +4019,10 @@ static int shmem_whiteout(struct mnt_idmap *idmap, whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name); if (!whiteout) return -ENOMEM; - error = shmem_mknod(idmap, old_dir, whiteout, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); dput(whiteout); - if (error) - return error; - - /* - * Cheat and hash the whiteout while the old dentry is still in - * place, instead of playing games with FS_RENAME_DOES_D_MOVE. - * - * d_lookup() will consistently find one of them at this point, - * not sure which one, but that isn't even important. - */ - d_rehash(whiteout); - return 0; + return error; } /* @@ -4050,6 +4038,7 @@ static int shmem_rename2(struct mnt_idmap *idmap, { struct inode *inode = d_inode(old_dentry); int they_are_dirs = S_ISDIR(inode->i_mode); + bool had_offset = false; int error; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) @@ -4062,16 +4051,23 @@ static int shmem_rename2(struct mnt_idmap *idmap, if (!simple_empty(new_dentry)) return -ENOTEMPTY; + error = simple_offset_add(shmem_get_offset_ctx(new_dir), new_dentry); + if (error == -EBUSY) + had_offset = true; + else if (unlikely(error)) + return error; + if (flags & RENAME_WHITEOUT) { error = shmem_whiteout(idmap, old_dir, old_dentry); - if (error) + if (error) { + if (!had_offset) + simple_offset_remove(shmem_get_offset_ctx(new_dir), + new_dentry); return error; + } } - error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry); - if (error) - return error; - + simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry); if (d_really_is_positive(new_dentry)) { (void) shmem_unlink(new_dir, new_dentry); if (they_are_dirs) { diff --git a/mm/slub.c b/mm/slub.c index f21b2f0c6f5a..861592ac5425 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6539,6 +6539,8 @@ static void defer_free(struct kmem_cache *s, void *head) guard(preempt)(); + head = kasan_reset_tag(head); + df = this_cpu_ptr(&defer_free_objects); if (llist_add(head + s->offset, &df->objects)) irq_work_queue(&df->work); @@ -67,18 +67,13 @@ struct mmap_state { .state = VMA_MERGE_START, \ } -/* - * If, at any point, the VMA had unCoW'd mappings from parents, it will maintain - * more than one anon_vma_chain connecting it to more than one anon_vma. A merge - * would mean a wider range of folios sharing the root anon_vma lock, and thus - * potential lock contention, we do not wish to encourage merging such that this - * scales to a problem. - */ -static bool vma_had_uncowed_parents(struct vm_area_struct *vma) +/* Was this VMA ever forked from a parent, i.e. maybe contains CoW mappings? */ +static bool vma_is_fork_child(struct vm_area_struct *vma) { /* * The list_is_singular() test is to avoid merging VMA cloned from - * parents. This can improve scalability caused by anon_vma lock. + * parents. This can improve scalability caused by the anon_vma root + * lock. */ return vma && vma->anon_vma && !list_is_singular(&vma->anon_vma_chain); } @@ -115,11 +110,19 @@ static bool is_mergeable_anon_vma(struct vma_merge_struct *vmg, bool merge_next) VM_WARN_ON(src && src_anon != src->anon_vma); /* Case 1 - we will dup_anon_vma() from src into tgt. */ - if (!tgt_anon && src_anon) - return !vma_had_uncowed_parents(src); + if (!tgt_anon && src_anon) { + struct vm_area_struct *copied_from = vmg->copied_from; + + if (vma_is_fork_child(src)) + return false; + if (vma_is_fork_child(copied_from)) + return false; + + return true; + } /* Case 2 - we will simply use tgt's anon_vma. */ if (tgt_anon && !src_anon) - return !vma_had_uncowed_parents(tgt); + return !vma_is_fork_child(tgt); /* Case 3 - the anon_vma's are already shared. */ return src_anon == tgt_anon; } @@ -829,6 +832,8 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( VM_WARN_ON_VMG(middle && !(vma_iter_addr(vmg->vmi) >= middle->vm_start && vma_iter_addr(vmg->vmi) < middle->vm_end), vmg); + /* An existing merge can never be used by the mremap() logic. */ + VM_WARN_ON_VMG(vmg->copied_from, vmg); vmg->state = VMA_MERGE_NOMERGE; @@ -1099,6 +1104,33 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg) } /* + * vma_merge_copied_range - Attempt to merge a VMA that is being copied by + * mremap() + * + * @vmg: Describes the VMA we are adding, in the copied-to range @vmg->start to + * @vmg->end (exclusive), which we try to merge with any adjacent VMAs if + * possible. + * + * vmg->prev, next, start, end, pgoff should all be relative to the COPIED TO + * range, i.e. the target range for the VMA. + * + * Returns: In instances where no merge was possible, NULL. Otherwise, a pointer + * to the VMA we expanded. + * + * ASSUMPTIONS: Same as vma_merge_new_range(), except vmg->middle must contain + * the copied-from VMA. + */ +static struct vm_area_struct *vma_merge_copied_range(struct vma_merge_struct *vmg) +{ + /* We must have a copied-from VMA. */ + VM_WARN_ON_VMG(!vmg->middle, vmg); + + vmg->copied_from = vmg->middle; + vmg->middle = NULL; + return vma_merge_new_range(vmg); +} + +/* * vma_expand - Expand an existing VMA * * @vmg: Describes a VMA expansion operation. @@ -1117,46 +1149,52 @@ struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg) int vma_expand(struct vma_merge_struct *vmg) { struct vm_area_struct *anon_dup = NULL; - bool remove_next = false; struct vm_area_struct *target = vmg->target; struct vm_area_struct *next = vmg->next; + bool remove_next = false; vm_flags_t sticky_flags; - - sticky_flags = vmg->vm_flags & VM_STICKY; - sticky_flags |= target->vm_flags & VM_STICKY; - - VM_WARN_ON_VMG(!target, vmg); + int ret = 0; mmap_assert_write_locked(vmg->mm); - vma_start_write(target); - if (next && (target != next) && (vmg->end == next->vm_end)) { - int ret; - sticky_flags |= next->vm_flags & VM_STICKY; + if (next && target != next && vmg->end == next->vm_end) remove_next = true; - /* This should already have been checked by this point. */ - VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg); - vma_start_write(next); - /* - * In this case we don't report OOM, so vmg->give_up_on_mm is - * safe. - */ - ret = dup_anon_vma(target, next, &anon_dup); - if (ret) - return ret; - } + /* We must have a target. */ + VM_WARN_ON_VMG(!target, vmg); + /* This should have already been checked by this point. */ + VM_WARN_ON_VMG(remove_next && !can_merge_remove_vma(next), vmg); /* Not merging but overwriting any part of next is not handled. */ VM_WARN_ON_VMG(next && !remove_next && next != target && vmg->end > next->vm_start, vmg); - /* Only handles expanding */ + /* Only handles expanding. */ VM_WARN_ON_VMG(target->vm_start < vmg->start || target->vm_end > vmg->end, vmg); + sticky_flags = vmg->vm_flags & VM_STICKY; + sticky_flags |= target->vm_flags & VM_STICKY; if (remove_next) - vmg->__remove_next = true; + sticky_flags |= next->vm_flags & VM_STICKY; + + /* + * If we are removing the next VMA or copying from a VMA + * (e.g. mremap()'ing), we must propagate anon_vma state. + * + * Note that, by convention, callers ignore OOM for this case, so + * we don't need to account for vmg->give_up_on_mm here. + */ + if (remove_next) + ret = dup_anon_vma(target, next, &anon_dup); + if (!ret && vmg->copied_from) + ret = dup_anon_vma(target, vmg->copied_from, &anon_dup); + if (ret) + return ret; + if (remove_next) { + vma_start_write(next); + vmg->__remove_next = true; + } if (commit_merge(vmg)) goto nomem; @@ -1828,10 +1866,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (new_vma && new_vma->vm_start < addr + len) return NULL; /* should never get here */ - vmg.middle = NULL; /* New VMA range. */ vmg.pgoff = pgoff; vmg.next = vma_iter_next_rewind(&vmi, NULL); - new_vma = vma_merge_new_range(&vmg); + new_vma = vma_merge_copied_range(&vmg); if (new_vma) { /* @@ -106,6 +106,9 @@ struct vma_merge_struct { struct anon_vma_name *anon_name; enum vma_merge_state state; + /* If copied from (i.e. mremap()'d) the VMA from which we are copying. */ + struct vm_area_struct *copied_from; + /* Flags which callers can use to modify merge behaviour: */ /* diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ecbac900c35f..628f96e83b11 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4248,7 +4248,7 @@ void *vzalloc_node_noprof(unsigned long size, int node) EXPORT_SYMBOL(vzalloc_node_noprof); /** - * vrealloc_node_align_noprof - reallocate virtually contiguous memory; contents + * vrealloc_node_align - reallocate virtually contiguous memory; contents * remain unchanged * @p: object to reallocate memory for * @size: the size to reallocate @@ -4331,7 +4331,9 @@ void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align */ if (size <= alloced_size) { kasan_unpoison_vmalloc(p + old_size, size - old_size, - KASAN_VMALLOC_PROT_NORMAL); + KASAN_VMALLOC_PROT_NORMAL | + KASAN_VMALLOC_VM_ALLOC | + KASAN_VMALLOC_KEEP_TAG); /* * No need to zero memory here, as unused memory will have * already been zeroed at initial allocation time or during @@ -5025,9 +5027,7 @@ retry: * With hardware tag-based KASAN, marking is skipped for * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). */ - for (area = 0; area < nr_vms; area++) - vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr, - vms[area]->size, KASAN_VMALLOC_PROT_NORMAL); + kasan_unpoison_vmap_areas(vms, nr_vms, KASAN_VMALLOC_PROT_NORMAL); kfree(vas); return vms; diff --git a/mm/zswap.c b/mm/zswap.c index 5d0f8b13a958..ac9b7a60736b 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -787,7 +787,7 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) return 0; fail: - if (acomp) + if (!IS_ERR_OR_NULL(acomp)) crypto_free_acomp(acomp); kfree(buffer); return ret; |
