summaryrefslogtreecommitdiff
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c65
1 files changed, 45 insertions, 20 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 5f3fdfdb14c7..13e38e89a1f3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -23,6 +23,7 @@
#include <linux/sched/task_stack.h>
#include <linux/sched/cputime.h>
#include <linux/sched/ext.h>
+#include <linux/sched/exec_state.h>
#include <linux/seq_file.h>
#include <linux/rtmutex.h>
#include <linux/init.h>
@@ -204,7 +205,7 @@ static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
* accounting is performed by the code assigning/releasing stacks to tasks.
* We need a zeroed memory without __GFP_ACCOUNT.
*/
-#define GFP_VMAP_STACK (GFP_KERNEL | __GFP_ZERO)
+#define GFP_VMAP_STACK (GFP_KERNEL | __GFP_ZERO | __GFP_SKIP_KASAN)
struct vm_stack {
struct rcu_head rcu;
@@ -342,7 +343,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
}
/* Reset stack metadata. */
- kasan_unpoison_range(vm_area->addr, THREAD_SIZE);
+ if (!kasan_hw_tags_enabled())
+ kasan_unpoison_range(vm_area->addr, THREAD_SIZE);
stack = kasan_reset_tag(vm_area->addr);
@@ -555,6 +557,7 @@ void free_task(struct task_struct *tsk)
if (tsk->flags & PF_KTHREAD)
free_kthread_struct(tsk);
bpf_task_storage_free(tsk);
+ put_task_exec_state(rcu_access_pointer(tsk->exec_state));
free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
@@ -726,12 +729,12 @@ void __mmdrop(struct mm_struct *mm)
cleanup_lazy_tlbs(mm);
WARN_ON_ONCE(mm == current->active_mm);
+ mm_destroy_sched(mm);
mm_free_pgd(mm);
mm_free_id(mm);
destroy_context(mm);
mmu_notifier_subscriptions_destroy(mm);
check_mm(mm);
- put_user_ns(mm->user_ns);
mm_pasid_drop(mm);
mm_destroy_cid(mm);
percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS);
@@ -946,6 +949,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->seccomp.filter = NULL;
#endif
+ RCU_INIT_POINTER(tsk->exec_state, NULL);
+
setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk);
clear_tsk_need_resched(tsk);
@@ -1072,8 +1077,7 @@ static void mmap_init_lock(struct mm_struct *mm)
#endif
}
-static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
- struct user_namespace *user_ns)
+static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
{
mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
@@ -1101,6 +1105,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
#endif
mm_init_uprobes_state(mm);
hugetlb_count_init(mm);
+ futex_mm_init(mm);
mm_flags_clear_all(mm);
if (current->mm) {
@@ -1113,11 +1118,8 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm->def_flags = 0;
}
- if (futex_mm_init(mm))
- goto fail_mm_init;
-
if (mm_alloc_pgd(mm))
- goto fail_nopgd;
+ goto fail_mm_init;
if (mm_alloc_id(mm))
goto fail_noid;
@@ -1128,15 +1130,19 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
if (mm_alloc_cid(mm, p))
goto fail_cid;
+ if (mm_alloc_sched(mm))
+ goto fail_sched;
+
if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT,
NR_MM_COUNTERS))
goto fail_pcpu;
- mm->user_ns = get_user_ns(user_ns);
lru_gen_init_mm(mm);
return mm;
fail_pcpu:
+ mm_destroy_sched(mm);
+fail_sched:
mm_destroy_cid(mm);
fail_cid:
destroy_context(mm);
@@ -1144,8 +1150,6 @@ fail_nocontext:
mm_free_id(mm);
fail_noid:
mm_free_pgd(mm);
-fail_nopgd:
- futex_hash_free(mm);
fail_mm_init:
free_mm(mm);
return NULL;
@@ -1163,7 +1167,7 @@ struct mm_struct *mm_alloc(void)
return NULL;
memset(mm, 0, sizeof(*mm));
- return mm_init(mm, current, current_user_ns());
+ return mm_init(mm, current);
}
EXPORT_SYMBOL_IF_KUNIT(mm_alloc);
@@ -1527,7 +1531,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,
memcpy(mm, oldmm, sizeof(*mm));
- if (!mm_init(mm, tsk, mm->user_ns))
+ if (!mm_init(mm, tsk))
goto fail_nomem;
uprobe_start_dup_mmap();
@@ -1593,6 +1597,22 @@ static int copy_mm(u64 clone_flags, struct task_struct *tsk)
return 0;
}
+static int copy_exec_state(u64 clone_flags, struct task_struct *tsk)
+{
+ struct task_exec_state *exec_state;
+
+ /* CLONE_VM siblings refcount-share the parent's exec_state. */
+ if (clone_flags & CLONE_VM) {
+ exec_state = rcu_dereference_protected(current->exec_state, true);
+ refcount_inc(&exec_state->count);
+ rcu_assign_pointer(tsk->exec_state, exec_state);
+ return 0;
+ }
+
+ /* Everyone else inherits a fresh copy. */
+ return task_exec_state_copy(tsk);
+}
+
static int copy_fs(u64 clone_flags, struct task_struct *tsk)
{
struct fs_struct *fs = current->fs;
@@ -2090,6 +2110,9 @@ __latent_entropy struct task_struct *copy_process(
p = dup_task_struct(current, node);
if (!p)
goto fork_out;
+ retval = copy_exec_state(clone_flags, p);
+ if (retval)
+ goto bad_fork_free;
p->flags &= ~PF_KTHREAD;
if (args->kthread)
p->flags |= PF_KTHREAD;
@@ -2218,6 +2241,7 @@ __latent_entropy struct task_struct *copy_process(
lockdep_init_task(p);
p->blocked_on = NULL; /* not blocked yet */
+ p->blocked_donor = NULL; /* nobody is boosting p yet */
#ifdef CONFIG_BCACHE
p->sequential_io = 0;
@@ -2314,6 +2338,7 @@ __latent_entropy struct task_struct *copy_process(
#ifdef CONFIG_BLOCK
p->plug = NULL;
+ p->flags &= ~PF_BLOCK_TS;
#endif
futex_init_task(p);
@@ -2664,8 +2689,6 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
*
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
- *
- * args->exit_signal is expected to be checked for sanity by the caller.
*/
pid_t kernel_clone(struct kernel_clone_args *args)
{
@@ -2700,6 +2723,9 @@ pid_t kernel_clone(struct kernel_clone_args *args)
(args->pidfd == args->parent_tid))
return -EINVAL;
+ if (!valid_signal(args->exit_signal))
+ return -EINVAL;
+
/*
* Determine whether and which event to report to ptracer. When
* called from kernel_thread or CLONE_UNTRACED is explicitly
@@ -2898,11 +2924,9 @@ static noinline int copy_clone_args_from_user(struct kernel_clone_args *kargs,
return -EINVAL;
/*
- * Verify that higher 32bits of exit_signal are unset and that
- * it is a valid signal
+ * Verify that higher 32bits of exit_signal are unset
*/
- if (unlikely((args.exit_signal & ~((u64)CSIGNAL)) ||
- !valid_signal(args.exit_signal)))
+ if (unlikely(args.exit_signal & ~((u64)CSIGNAL)))
return -EINVAL;
if ((args.flags & CLONE_INTO_CGROUP) &&
@@ -3098,6 +3122,7 @@ void __init proc_caches_init(void)
sizeof(struct signal_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
NULL);
+ exec_state_init();
files_cachep = kmem_cache_create("files_cache",
sizeof(struct files_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,