diff options
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 65 |
1 files changed, 45 insertions, 20 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 5f3fdfdb14c7..13e38e89a1f3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -23,6 +23,7 @@ #include <linux/sched/task_stack.h> #include <linux/sched/cputime.h> #include <linux/sched/ext.h> +#include <linux/sched/exec_state.h> #include <linux/seq_file.h> #include <linux/rtmutex.h> #include <linux/init.h> @@ -204,7 +205,7 @@ static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); * accounting is performed by the code assigning/releasing stacks to tasks. * We need a zeroed memory without __GFP_ACCOUNT. */ -#define GFP_VMAP_STACK (GFP_KERNEL | __GFP_ZERO) +#define GFP_VMAP_STACK (GFP_KERNEL | __GFP_ZERO | __GFP_SKIP_KASAN) struct vm_stack { struct rcu_head rcu; @@ -342,7 +343,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) } /* Reset stack metadata. */ - kasan_unpoison_range(vm_area->addr, THREAD_SIZE); + if (!kasan_hw_tags_enabled()) + kasan_unpoison_range(vm_area->addr, THREAD_SIZE); stack = kasan_reset_tag(vm_area->addr); @@ -555,6 +557,7 @@ void free_task(struct task_struct *tsk) if (tsk->flags & PF_KTHREAD) free_kthread_struct(tsk); bpf_task_storage_free(tsk); + put_task_exec_state(rcu_access_pointer(tsk->exec_state)); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -726,12 +729,12 @@ void __mmdrop(struct mm_struct *mm) cleanup_lazy_tlbs(mm); WARN_ON_ONCE(mm == current->active_mm); + mm_destroy_sched(mm); mm_free_pgd(mm); mm_free_id(mm); destroy_context(mm); mmu_notifier_subscriptions_destroy(mm); check_mm(mm); - put_user_ns(mm->user_ns); mm_pasid_drop(mm); mm_destroy_cid(mm); percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS); @@ -946,6 +949,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->seccomp.filter = NULL; #endif + RCU_INIT_POINTER(tsk->exec_state, NULL); + setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); @@ -1072,8 +1077,7 @@ static void mmap_init_lock(struct mm_struct *mm) #endif } -static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, - struct user_namespace *user_ns) +static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) { mt_init_flags(&mm->mm_mt, MM_MT_FLAGS); mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock); @@ -1101,6 +1105,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, #endif mm_init_uprobes_state(mm); hugetlb_count_init(mm); + futex_mm_init(mm); mm_flags_clear_all(mm); if (current->mm) { @@ -1113,11 +1118,8 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm->def_flags = 0; } - if (futex_mm_init(mm)) - goto fail_mm_init; - if (mm_alloc_pgd(mm)) - goto fail_nopgd; + goto fail_mm_init; if (mm_alloc_id(mm)) goto fail_noid; @@ -1128,15 +1130,19 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, if (mm_alloc_cid(mm, p)) goto fail_cid; + if (mm_alloc_sched(mm)) + goto fail_sched; + if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT, NR_MM_COUNTERS)) goto fail_pcpu; - mm->user_ns = get_user_ns(user_ns); lru_gen_init_mm(mm); return mm; fail_pcpu: + mm_destroy_sched(mm); +fail_sched: mm_destroy_cid(mm); fail_cid: destroy_context(mm); @@ -1144,8 +1150,6 @@ fail_nocontext: mm_free_id(mm); fail_noid: mm_free_pgd(mm); -fail_nopgd: - futex_hash_free(mm); fail_mm_init: free_mm(mm); return NULL; @@ -1163,7 +1167,7 @@ struct mm_struct *mm_alloc(void) return NULL; memset(mm, 0, sizeof(*mm)); - return mm_init(mm, current, current_user_ns()); + return mm_init(mm, current); } EXPORT_SYMBOL_IF_KUNIT(mm_alloc); @@ -1527,7 +1531,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk, memcpy(mm, oldmm, sizeof(*mm)); - if (!mm_init(mm, tsk, mm->user_ns)) + if (!mm_init(mm, tsk)) goto fail_nomem; uprobe_start_dup_mmap(); @@ -1593,6 +1597,22 @@ static int copy_mm(u64 clone_flags, struct task_struct *tsk) return 0; } +static int copy_exec_state(u64 clone_flags, struct task_struct *tsk) +{ + struct task_exec_state *exec_state; + + /* CLONE_VM siblings refcount-share the parent's exec_state. */ + if (clone_flags & CLONE_VM) { + exec_state = rcu_dereference_protected(current->exec_state, true); + refcount_inc(&exec_state->count); + rcu_assign_pointer(tsk->exec_state, exec_state); + return 0; + } + + /* Everyone else inherits a fresh copy. */ + return task_exec_state_copy(tsk); +} + static int copy_fs(u64 clone_flags, struct task_struct *tsk) { struct fs_struct *fs = current->fs; @@ -2090,6 +2110,9 @@ __latent_entropy struct task_struct *copy_process( p = dup_task_struct(current, node); if (!p) goto fork_out; + retval = copy_exec_state(clone_flags, p); + if (retval) + goto bad_fork_free; p->flags &= ~PF_KTHREAD; if (args->kthread) p->flags |= PF_KTHREAD; @@ -2218,6 +2241,7 @@ __latent_entropy struct task_struct *copy_process( lockdep_init_task(p); p->blocked_on = NULL; /* not blocked yet */ + p->blocked_donor = NULL; /* nobody is boosting p yet */ #ifdef CONFIG_BCACHE p->sequential_io = 0; @@ -2314,6 +2338,7 @@ __latent_entropy struct task_struct *copy_process( #ifdef CONFIG_BLOCK p->plug = NULL; + p->flags &= ~PF_BLOCK_TS; #endif futex_init_task(p); @@ -2664,8 +2689,6 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. - * - * args->exit_signal is expected to be checked for sanity by the caller. */ pid_t kernel_clone(struct kernel_clone_args *args) { @@ -2700,6 +2723,9 @@ pid_t kernel_clone(struct kernel_clone_args *args) (args->pidfd == args->parent_tid)) return -EINVAL; + if (!valid_signal(args->exit_signal)) + return -EINVAL; + /* * Determine whether and which event to report to ptracer. When * called from kernel_thread or CLONE_UNTRACED is explicitly @@ -2898,11 +2924,9 @@ static noinline int copy_clone_args_from_user(struct kernel_clone_args *kargs, return -EINVAL; /* - * Verify that higher 32bits of exit_signal are unset and that - * it is a valid signal + * Verify that higher 32bits of exit_signal are unset */ - if (unlikely((args.exit_signal & ~((u64)CSIGNAL)) || - !valid_signal(args.exit_signal))) + if (unlikely(args.exit_signal & ~((u64)CSIGNAL))) return -EINVAL; if ((args.flags & CLONE_INTO_CGROUP) && @@ -3098,6 +3122,7 @@ void __init proc_caches_init(void) sizeof(struct signal_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); + exec_state_init(); files_cachep = kmem_cache_create("files_cache", sizeof(struct files_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, |
