summaryrefslogtreecommitdiff
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c157
1 files changed, 97 insertions, 60 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 490495a39c7e..ddafdfac9456 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -29,7 +29,7 @@
#include <linux/nsproxy.h>
#include <linux/capability.h>
#include <linux/cpu.h>
-#include <linux/cpuset.h>
+#include <linux/cgroup.h>
#include <linux/security.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
@@ -50,6 +50,7 @@
#include <linux/taskstats_kern.h>
#include <linux/random.h>
#include <linux/tty.h>
+#include <linux/proc_fs.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -116,7 +117,7 @@ EXPORT_SYMBOL(free_task);
void __put_task_struct(struct task_struct *tsk)
{
- WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
+ WARN_ON(!tsk->exit_state);
WARN_ON(atomic_read(&tsk->usage));
WARN_ON(tsk == current);
@@ -205,7 +206,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
}
#ifdef CONFIG_MMU
-static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
{
struct vm_area_struct *mpnt, *tmp, **pprev;
struct rb_node **rb_link, *rb_parent;
@@ -268,7 +269,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
get_file(file);
if (tmp->vm_flags & VM_DENYWRITE)
atomic_dec(&inode->i_writecount);
-
+
/* insert tmp into the share list, just after mpnt */
spin_lock(&file->f_mapping->i_mmap_lock);
tmp->vm_truncate_count = mpnt->vm_truncate_count;
@@ -331,7 +332,7 @@ static inline void mm_free_pgd(struct mm_struct * mm)
#define mm_free_pgd(mm)
#endif /* CONFIG_MMU */
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
@@ -583,7 +584,7 @@ fail_nomem:
return retval;
}
-static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
+static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
{
struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
/* We don't need to lock fs - think why ;-) */
@@ -615,7 +616,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
EXPORT_SYMBOL_GPL(copy_fs_struct);
-static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
{
if (clone_flags & CLONE_FS) {
atomic_inc(&current->fs->count);
@@ -738,8 +739,8 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
/* compute the remainder to be cleared */
size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
- /* This is long word aligned thus could use a optimized version */
- memset(new_fds, 0, size);
+ /* This is long word aligned thus could use a optimized version */
+ memset(new_fds, 0, size);
if (new_fdt->max_fds > open_files) {
int left = (new_fdt->max_fds-open_files)/8;
@@ -818,7 +819,7 @@ int unshare_files(void)
EXPORT_SYMBOL(unshare_files);
-static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
{
struct sighand_struct *sig;
@@ -841,7 +842,7 @@ void __cleanup_sighand(struct sighand_struct *sighand)
kmem_cache_free(sighand_cachep, sighand);
}
-static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
{
struct signal_struct *sig;
int ret;
@@ -923,7 +924,7 @@ void __cleanup_signal(struct signal_struct *sig)
kmem_cache_free(signal_cachep, sig);
}
-static inline void cleanup_signal(struct task_struct *tsk)
+static void cleanup_signal(struct task_struct *tsk)
{
struct signal_struct *sig = tsk->signal;
@@ -933,7 +934,7 @@ static inline void cleanup_signal(struct task_struct *tsk)
__cleanup_signal(sig);
}
-static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
+static void copy_flags(unsigned long clone_flags, struct task_struct *p)
{
unsigned long new_flags = p->flags;
@@ -942,16 +943,17 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
if (!(clone_flags & CLONE_PTRACE))
p->ptrace = 0;
p->flags = new_flags;
+ clear_freeze_flag(p);
}
asmlinkage long sys_set_tid_address(int __user *tidptr)
{
current->clear_child_tid = tidptr;
- return current->pid;
+ return task_pid_vnr(current);
}
-static inline void rt_mutex_init_task(struct task_struct *p)
+static void rt_mutex_init_task(struct task_struct *p)
{
spin_lock_init(&p->pi_lock);
#ifdef CONFIG_RT_MUTEXES
@@ -972,12 +974,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
- int __user *parent_tidptr,
int __user *child_tidptr,
struct pid *pid)
{
int retval;
- struct task_struct *p = NULL;
+ struct task_struct *p;
+ int cgroup_callbacks_done = 0;
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
return ERR_PTR(-EINVAL);
@@ -1041,12 +1043,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->did_exec = 0;
delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
copy_flags(clone_flags, p);
- p->pid = pid_nr(pid);
- retval = -EFAULT;
- if (clone_flags & CLONE_PARENT_SETTID)
- if (put_user(p->pid, parent_tidptr))
- goto bad_fork_cleanup_delays_binfmt;
-
INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
p->vfork_done = NULL;
@@ -1058,6 +1054,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->utime = cputime_zero;
p->stime = cputime_zero;
p->gtime = cputime_zero;
+ p->utimescaled = cputime_zero;
+ p->stimescaled = cputime_zero;
#ifdef CONFIG_TASK_XACCT
p->rchar = 0; /* I/O counter: bytes read */
@@ -1068,12 +1066,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
task_io_accounting_init(p);
acct_clear_integrals(p);
- p->it_virt_expires = cputime_zero;
+ p->it_virt_expires = cputime_zero;
p->it_prof_expires = cputime_zero;
- p->it_sched_expires = 0;
- INIT_LIST_HEAD(&p->cpu_timers[0]);
- INIT_LIST_HEAD(&p->cpu_timers[1]);
- INIT_LIST_HEAD(&p->cpu_timers[2]);
+ p->it_sched_expires = 0;
+ INIT_LIST_HEAD(&p->cpu_timers[0]);
+ INIT_LIST_HEAD(&p->cpu_timers[1]);
+ INIT_LIST_HEAD(&p->cpu_timers[2]);
p->lock_depth = -1; /* -1 = no lock */
do_posix_clock_monotonic_gettime(&p->start_time);
@@ -1083,15 +1081,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->security = NULL;
#endif
p->io_context = NULL;
- p->io_wait = NULL;
p->audit_context = NULL;
- cpuset_fork(p);
+ cgroup_fork(p);
#ifdef CONFIG_NUMA
p->mempolicy = mpol_copy(p->mempolicy);
if (IS_ERR(p->mempolicy)) {
retval = PTR_ERR(p->mempolicy);
p->mempolicy = NULL;
- goto bad_fork_cleanup_cpuset;
+ goto bad_fork_cleanup_cgroup;
}
mpol_fix_fork_child_flag(p);
#endif
@@ -1124,10 +1121,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->blocked_on = NULL; /* not blocked yet */
#endif
- p->tgid = p->pid;
- if (clone_flags & CLONE_THREAD)
- p->tgid = current->tgid;
-
if ((retval = security_task_alloc(p)))
goto bad_fork_cleanup_policy;
if ((retval = audit_alloc(p)))
@@ -1153,6 +1146,24 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (retval)
goto bad_fork_cleanup_namespaces;
+ if (pid != &init_struct_pid) {
+ retval = -ENOMEM;
+ pid = alloc_pid(task_active_pid_ns(p));
+ if (!pid)
+ goto bad_fork_cleanup_namespaces;
+
+ if (clone_flags & CLONE_NEWPID) {
+ retval = pid_ns_prepare_proc(task_active_pid_ns(p));
+ if (retval < 0)
+ goto bad_fork_free_pid;
+ }
+ }
+
+ p->pid = pid_nr(pid);
+ p->tgid = p->pid;
+ if (clone_flags & CLONE_THREAD)
+ p->tgid = current->tgid;
+
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
/*
* Clear TID on mm_release()?
@@ -1202,6 +1213,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);
+ /* Now that the task is set up, run cgroup callbacks if
+ * necessary. We need to run them before the task is visible
+ * on the tasklist. */
+ cgroup_fork_callbacks(p);
+ cgroup_callbacks_done = 1;
+
/* Need tasklist lock for parent etc handling! */
write_lock_irq(&tasklist_lock);
@@ -1239,12 +1256,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* A fatal signal pending means that current will exit, so the new
* thread can't slip out of an OOM kill (or normal SIGKILL).
*/
- recalc_sigpending();
+ recalc_sigpending();
if (signal_pending(current)) {
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
retval = -ERESTARTNOINTR;
- goto bad_fork_cleanup_namespaces;
+ goto bad_fork_free_pid;
}
if (clone_flags & CLONE_THREAD) {
@@ -1273,11 +1290,22 @@ static struct task_struct *copy_process(unsigned long clone_flags,
__ptrace_link(p, current->parent);
if (thread_group_leader(p)) {
- p->signal->tty = current->signal->tty;
- p->signal->pgrp = process_group(current);
- set_signal_session(p->signal, process_session(current));
- attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
- attach_pid(p, PIDTYPE_SID, task_session(current));
+ if (clone_flags & CLONE_NEWPID) {
+ p->nsproxy->pid_ns->child_reaper = p;
+ p->signal->tty = NULL;
+ set_task_pgrp(p, p->pid);
+ set_task_session(p, p->pid);
+ attach_pid(p, PIDTYPE_PGID, pid);
+ attach_pid(p, PIDTYPE_SID, pid);
+ } else {
+ p->signal->tty = current->signal->tty;
+ set_task_pgrp(p, task_pgrp_nr(current));
+ set_task_session(p, task_session_nr(current));
+ attach_pid(p, PIDTYPE_PGID,
+ task_pgrp(current));
+ attach_pid(p, PIDTYPE_SID,
+ task_session(current));
+ }
list_add_tail_rcu(&p->tasks, &init_task.tasks);
__get_cpu_var(process_counts)++;
@@ -1290,8 +1318,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
proc_fork_connector(p);
+ cgroup_post_fork(p);
return p;
+bad_fork_free_pid:
+ if (pid != &init_struct_pid)
+ free_pid(pid);
bad_fork_cleanup_namespaces:
exit_task_namespaces(p);
bad_fork_cleanup_keys:
@@ -1316,10 +1348,9 @@ bad_fork_cleanup_security:
bad_fork_cleanup_policy:
#ifdef CONFIG_NUMA
mpol_free(p->mempolicy);
-bad_fork_cleanup_cpuset:
+bad_fork_cleanup_cgroup:
#endif
- cpuset_exit(p);
-bad_fork_cleanup_delays_binfmt:
+ cgroup_exit(p, cgroup_callbacks_done);
delayacct_tsk_free(p);
if (p->binfmt)
module_put(p->binfmt->module);
@@ -1346,7 +1377,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
struct task_struct *task;
struct pt_regs regs;
- task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL,
+ task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
&init_struct_pid);
if (!IS_ERR(task))
init_idle(task, cpu);
@@ -1354,7 +1385,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
return task;
}
-static inline int fork_traceflag (unsigned clone_flags)
+static int fork_traceflag(unsigned clone_flags)
{
if (clone_flags & CLONE_UNTRACED)
return 0;
@@ -1385,19 +1416,16 @@ long do_fork(unsigned long clone_flags,
{
struct task_struct *p;
int trace = 0;
- struct pid *pid = alloc_pid();
long nr;
- if (!pid)
- return -EAGAIN;
- nr = pid->nr;
if (unlikely(current->ptrace)) {
trace = fork_traceflag (clone_flags);
if (trace)
clone_flags |= CLONE_PTRACE;
}
- p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+ p = copy_process(clone_flags, stack_start, regs, stack_size,
+ child_tidptr, NULL);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
@@ -1405,6 +1433,17 @@ long do_fork(unsigned long clone_flags,
if (!IS_ERR(p)) {
struct completion vfork;
+ /*
+ * this is enough to call pid_nr_ns here, but this if
+ * improves optimisation of regular fork()
+ */
+ nr = (clone_flags & CLONE_NEWPID) ?
+ task_pid_nr_ns(p, current->nsproxy->pid_ns) :
+ task_pid_vnr(p);
+
+ if (clone_flags & CLONE_PARENT_SETTID)
+ put_user(nr, parent_tidptr);
+
if (clone_flags & CLONE_VFORK) {
p->vfork_done = &vfork;
init_completion(&vfork);
@@ -1438,7 +1477,6 @@ long do_fork(unsigned long clone_flags,
}
}
} else {
- free_pid(pid);
nr = PTR_ERR(p);
}
return nr;
@@ -1483,7 +1521,7 @@ void __init proc_caches_init(void)
* Check constraints on flags passed to the unshare system call and
* force unsharing of additional process context as appropriate.
*/
-static inline void check_unshare_flags(unsigned long *flags_ptr)
+static void check_unshare_flags(unsigned long *flags_ptr)
{
/*
* If unsharing a thread from a thread group, must also
@@ -1615,7 +1653,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
struct files_struct *fd, *new_fd = NULL;
struct sem_undo_list *new_ulist = NULL;
- struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
+ struct nsproxy *new_nsproxy = NULL;
check_unshare_flags(&unshare_flags);
@@ -1645,14 +1683,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) {
- task_lock(current);
-
if (new_nsproxy) {
- old_nsproxy = current->nsproxy;
- current->nsproxy = new_nsproxy;
- new_nsproxy = old_nsproxy;
+ switch_task_namespaces(current, new_nsproxy);
+ new_nsproxy = NULL;
}
+ task_lock(current);
+
if (new_fs) {
fs = current->fs;
current->fs = new_fs;