diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 2 | ||||
-rw-r--r-- | mm/gup.c | 4 | ||||
-rw-r--r-- | mm/kfence/core.c | 51 | ||||
-rw-r--r-- | mm/memcontrol.c | 7 | ||||
-rw-r--r-- | mm/memory-failure.c | 11 | ||||
-rw-r--r-- | mm/memory.c | 10 | ||||
-rw-r--r-- | mm/oom_kill.c | 14 | ||||
-rw-r--r-- | mm/page_alloc.c | 1 | ||||
-rw-r--r-- | mm/percpu.c | 8 | ||||
-rw-r--r-- | mm/slab.c | 9 | ||||
-rw-r--r-- | mm/slab_common.c | 34 | ||||
-rw-r--r-- | mm/slob.c | 14 | ||||
-rw-r--r-- | mm/sparse.c | 2 | ||||
-rw-r--r-- | mm/vmalloc.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
15 files changed, 133 insertions, 40 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index c71e86c12418..cc86c5a127b9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2617,7 +2617,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(!iov_iter_count(iter))) return 0; - iov_iter_truncate(iter, inode->i_sb->s_maxbytes); + iov_iter_truncate(iter, inode->i_sb->s_maxbytes - iocb->ki_pos); pagevec_init(&pvec); do { @@ -1768,8 +1768,8 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size) } while (start != end); mmap_read_unlock(mm); - if (size > (unsigned long)uaddr - start) - return size - ((unsigned long)uaddr - start); + if (size > start - (unsigned long)uaddr) + return size - (start - (unsigned long)uaddr); return 0; } EXPORT_SYMBOL(fault_in_safe_writeable); diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 3eab72fb3d8c..c49bc76b3a38 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -21,6 +21,9 @@ #include <linux/log2.h> #include <linux/memblock.h> #include <linux/moduleparam.h> +#include <linux/nodemask.h> +#include <linux/notifier.h> +#include <linux/panic_notifier.h> #include <linux/random.h> #include <linux/rcupdate.h> #include <linux/sched/clock.h> @@ -89,6 +92,14 @@ module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_inte static unsigned long kfence_skip_covered_thresh __read_mostly = 75; module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644); +/* If true, use a deferrable timer. */ +static bool kfence_deferrable __read_mostly = IS_ENABLED(CONFIG_KFENCE_DEFERRABLE); +module_param_named(deferrable, kfence_deferrable, bool, 0444); + +/* If true, check all canary bytes on panic. */ +static bool kfence_check_on_panic __read_mostly; +module_param_named(check_on_panic, kfence_check_on_panic, bool, 0444); + /* The pool of pages used for guard pages and objects. */ char *__kfence_pool __ro_after_init; EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */ @@ -693,8 +704,35 @@ static int kfence_debugfs_init(void) late_initcall(kfence_debugfs_init); +/* === Panic Notifier ====================================================== */ + +static void kfence_check_all_canary(void) +{ + int i; + + for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { + struct kfence_metadata *meta = &kfence_metadata[i]; + + if (meta->state == KFENCE_OBJECT_ALLOCATED) + for_each_canary(meta, check_canary_byte); + } +} + +static int kfence_check_canary_callback(struct notifier_block *nb, + unsigned long reason, void *arg) +{ + kfence_check_all_canary(); + return NOTIFY_OK; +} + +static struct notifier_block kfence_check_canary_notifier = { + .notifier_call = kfence_check_canary_callback, +}; + /* === Allocation Gate Timer ================================================ */ +static struct delayed_work kfence_timer; + #ifdef CONFIG_KFENCE_STATIC_KEYS /* Wait queue to wake up allocation-gate timer task. */ static DECLARE_WAIT_QUEUE_HEAD(allocation_wait); @@ -717,7 +755,6 @@ static DEFINE_IRQ_WORK(wake_up_kfence_timer_work, wake_up_kfence_timer); * avoids IPIs, at the cost of not immediately capturing allocations if the * instructions remain cached. */ -static struct delayed_work kfence_timer; static void toggle_allocation_gate(struct work_struct *work) { if (!READ_ONCE(kfence_enabled)) @@ -745,7 +782,6 @@ static void toggle_allocation_gate(struct work_struct *work) queue_delayed_work(system_unbound_wq, &kfence_timer, msecs_to_jiffies(kfence_sample_interval)); } -static DECLARE_DELAYED_WORK(kfence_timer, toggle_allocation_gate); /* === Public interface ===================================================== */ @@ -774,8 +810,18 @@ void __init kfence_init(void) if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS)) static_branch_enable(&kfence_allocation_key); + + if (kfence_deferrable) + INIT_DEFERRABLE_WORK(&kfence_timer, toggle_allocation_gate); + else + INIT_DELAYED_WORK(&kfence_timer, toggle_allocation_gate); + + if (kfence_check_on_panic) + atomic_notifier_chain_register(&panic_notifier_list, &kfence_check_canary_notifier); + WRITE_ONCE(kfence_enabled, true); queue_delayed_work(system_unbound_wq, &kfence_timer, 0); + pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE, CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool, (void *)(__kfence_pool + KFENCE_POOL_SIZE)); @@ -861,6 +907,7 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) * properties (e.g. reside in DMAable memory). */ if ((flags & GFP_ZONEMASK) || + ((flags & __GFP_THISNODE) && num_online_nodes() > 1) || (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) { atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); return NULL; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6dd32ed164ea..804f7be74a65 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1194,6 +1194,7 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, { struct mem_cgroup *iter; int ret = 0; + int i = 0; BUG_ON(memcg == root_mem_cgroup); @@ -1202,8 +1203,12 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, struct task_struct *task; css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it); - while (!ret && (task = css_task_iter_next(&it))) + while (!ret && (task = css_task_iter_next(&it))) { + /* Avoid potential softlockup warning */ + if ((++i & 1023) == 0) + cond_resched(); ret = fn(task, arg); + } css_task_iter_end(&it); if (ret) { mem_cgroup_iter_break(memcg, iter); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8be244f6c320..7e39a4c9e0df 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -707,12 +707,17 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn, mmap_read_lock(p->mm); ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops, (void *)&priv); + /* + * ret = 1 when CMCI wins, regardless of whether try_to_unmap() + * succeeds or fails, then kill the process with SIGBUS. + * ret = 0 when poison page is a clean page and it's dropped, no + * SIGBUS is needed. + */ if (ret == 1 && priv.tk.addr) kill_proc(&priv.tk, pfn, flags); - else - ret = 0; mmap_read_unlock(p->mm); - return ret > 0 ? -EHWPOISON : -EFAULT; + + return ret > 0 ? -EHWPOISON : 0; } static const char *action_name[] = { diff --git a/mm/memory.c b/mm/memory.c index 4785aecca9a8..4998b4c49052 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2570,11 +2570,11 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, if (fn) { do { if (create || !pte_none(*pte)) { - err = fn(pte++, addr, data); + err = fn(pte, addr, data); if (err) break; } - } while (addr += PAGE_SIZE, addr != end); + } while (pte++, addr += PAGE_SIZE, addr != end); } *mask |= PGTBL_PTE_MODIFIED; @@ -2713,8 +2713,10 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, next = pgd_addr_end(addr, end); if (pgd_none(*pgd) && !create) continue; - if (WARN_ON_ONCE(pgd_leaf(*pgd))) - return -EINVAL; + if (WARN_ON_ONCE(pgd_leaf(*pgd))) { + err = -EINVAL; + break; + } if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) { if (!create) continue; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 262f752d3d51..5fd826f3f3da 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -44,6 +44,8 @@ #include <linux/kthread.h> #include <linux/init.h> #include <linux/mmu_notifier.h> +#include <linux/cred.h> +#include <linux/nmi.h> #include <asm/tlb.h> #include "internal.h" @@ -432,10 +434,15 @@ static void dump_tasks(struct oom_control *oc) mem_cgroup_scan_tasks(oc->memcg, dump_task, oc); else { struct task_struct *p; + int i = 0; rcu_read_lock(); - for_each_process(p) + for_each_process(p) { + /* Avoid potential softlockup warning */ + if ((++i & 1023) == 0) + touch_softlockup_watchdog(); dump_task(p, oc); + } rcu_read_unlock(); } } @@ -725,6 +732,7 @@ static inline void queue_oom_reaper(struct task_struct *tsk) */ static void mark_oom_victim(struct task_struct *tsk) { + const struct cred *cred; struct mm_struct *mm = tsk->mm; WARN_ON(oom_killer_disabled); @@ -746,7 +754,9 @@ static void mark_oom_victim(struct task_struct *tsk) */ __thaw_task(tsk); atomic_inc(&oom_victims); - trace_mark_victim(tsk->pid); + cred = get_task_cred(tsk); + trace_mark_victim(tsk, cred->uid.val); + put_cred(cred); } /** diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f0accb740cda..c177d3ab3bd0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4967,6 +4967,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, restart: compaction_retries = 0; no_progress_loops = 0; + compact_result = COMPACT_SKIPPED; compact_priority = DEF_COMPACT_PRIORITY; cpuset_mems_cookie = read_mems_allowed_begin(); zonelist_iter_cookie = zonelist_iter_begin(); diff --git a/mm/percpu.c b/mm/percpu.c index e0a986818903..f58318cb04c0 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -2472,7 +2472,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, */ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) { - memblock_free_early(__pa(ai), ai->__ai_size); + memblock_free(__pa(ai), ai->__ai_size); } /** @@ -3134,7 +3134,7 @@ out_free_areas: out_free: pcpu_free_alloc_info(ai); if (areas) - memblock_free_early(__pa(areas), areas_size); + memblock_free(__pa(areas), areas_size); return rc; } #endif /* BUILD_EMBED_FIRST_CHUNK */ @@ -3256,7 +3256,7 @@ enomem: free_fn(page_address(pages[j]), PAGE_SIZE); rc = -ENOMEM; out_free_ar: - memblock_free_early(__pa(pages), pages_size); + memblock_free(__pa(pages), pages_size); pcpu_free_alloc_info(ai); return rc; } @@ -3286,7 +3286,7 @@ static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_dfl_fc_free(void *ptr, size_t size) { - memblock_free_early(__pa(ptr), size); + memblock_free(__pa(ptr), size); } void __init setup_per_cpu_areas(void) diff --git a/mm/slab.c b/mm/slab.c index f5b2246f832d..e53e50d6c29b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4226,11 +4226,14 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, #endif /* CONFIG_HARDENED_USERCOPY */ /** - * __ksize -- Uninstrumented ksize. + * __ksize -- Report full size of underlying allocation * @objp: pointer to the object * - * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same - * safety checks as ksize() with KASAN instrumentation enabled. + * This should only be used internally to query the true size of allocations. + * It is not meant to be a way to discover the usable size of an allocation + * after the fact. Instead, use kmalloc_size_roundup(). Using memory beyond + * the originally requested allocation size may trigger KASAN, UBSAN_BOUNDS, + * and/or FORTIFY_SOURCE. * * Return: size of the actual memory used by @objp in bytes */ diff --git a/mm/slab_common.c b/mm/slab_common.c index f684b06649c3..315e83f5daea 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -758,6 +758,26 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) return kmalloc_caches[kmalloc_type(flags)][index]; } +size_t kmalloc_size_roundup(size_t size) +{ + struct kmem_cache *c; + + /* Short-circuit the 0 size case. */ + if (unlikely(size == 0)) + return 0; + /* Short-circuit saturated "too-large" case. */ + if (unlikely(size == SIZE_MAX)) + return SIZE_MAX; + /* Above the smaller buckets, size is a multiple of page size. */ + if (size > KMALLOC_MAX_CACHE_SIZE) + return PAGE_SIZE << get_order(size); + + /* The flags don't matter since size_index is common to all. */ + c = kmalloc_slab(size, GFP_KERNEL); + return c ? c->object_size : 0; +} +EXPORT_SYMBOL(kmalloc_size_roundup); + #ifdef CONFIG_ZONE_DMA #define KMALLOC_DMA_NAME(sz) .name[KMALLOC_DMA] = "dma-kmalloc-" #sz, #else @@ -1285,20 +1305,6 @@ void kfree_sensitive(const void *p) } EXPORT_SYMBOL(kfree_sensitive); -/** - * ksize - get the actual amount of memory allocated for a given object - * @objp: Pointer to the object - * - * kmalloc may internally round up allocations and return more memory - * than requested. ksize() can be used to determine the actual amount of - * memory allocated. The caller may use this additional memory, even though - * a smaller amount of memory was initially specified with the kmalloc call. - * The caller must guarantee that objp points to a valid object previously - * allocated with either kmalloc() or kmem_cache_alloc(). The object - * must not be freed during the duration of the call. - * - * Return: size of the actual memory used by @objp in bytes - */ size_t ksize(const void *objp) { size_t size; diff --git a/mm/slob.c b/mm/slob.c index f3fc15df971a..d4c80bf1930d 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -567,6 +567,20 @@ void kfree(const void *block) } EXPORT_SYMBOL(kfree); +size_t kmalloc_size_roundup(size_t size) +{ + /* Short-circuit the 0 size case. */ + if (unlikely(size == 0)) + return 0; + /* Short-circuit saturated "too-large" case. */ + if (unlikely(size == SIZE_MAX)) + return SIZE_MAX; + + return ALIGN(size, ARCH_KMALLOC_MINALIGN); +} + +EXPORT_SYMBOL(kmalloc_size_roundup); + /* can't use ksize for kmem_cache_alloc memory, only kmalloc */ size_t __ksize(const void *block) { diff --git a/mm/sparse.c b/mm/sparse.c index 27092badd15b..df9f8459043e 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -451,7 +451,7 @@ static void *sparsemap_buf_end __meminitdata; static inline void __meminit sparse_buffer_free(unsigned long size) { WARN_ON(!sparsemap_buf || size == 0); - memblock_free_early(__pa(sparsemap_buf), size); + memblock_free(__pa(sparsemap_buf), size); } static void __init sparse_buffer_init(unsigned long size, int nid) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4c51af6ec255..d35cbf52f7c8 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -557,13 +557,13 @@ static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end, mask |= PGTBL_PGD_MODIFIED; err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask); if (err) - return err; + break; } while (pgd++, addr = next, addr != end); if (mask & ARCH_PAGE_TABLE_SYNC_MASK) arch_sync_kernel_mappings(start, end); - return 0; + return err; } /* diff --git a/mm/vmscan.c b/mm/vmscan.c index f162e9159296..7ec7559acc8e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4581,7 +4581,7 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) return NODE_RECLAIM_NOSCAN; ret = __node_reclaim(pgdat, gfp_mask, order); - clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags); + clear_bit_unlock(PGDAT_RECLAIM_LOCKED, &pgdat->flags); if (!ret) count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED); |