From cf475ad28ac35cc9ba612d67158f29b73b38b05d Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 29 Apr 2008 01:00:16 -0700 Subject: cgroups: add an owner to the mm_struct Remove the mem_cgroup member from mm_struct and instead adds an owner. This approach was suggested by Paul Menage. The advantage of this approach is that, once the mm->owner is known, using the subsystem id, the cgroup can be determined. It also allows several control groups that are virtually grouped by mm_struct, to exist independent of the memory controller i.e., without adding mem_cgroup's for each controller, to mm_struct. A new config option CONFIG_MM_OWNER is added and the memory resource controller selects this config option. This patch also adds cgroup callbacks to notify subsystems when mm->owner changes. The mm_cgroup_changed callback is called with the task_lock() of the new task held and is called just prior to changing the mm->owner. I am indebted to Paul Menage for the several reviews of this patchset and helping me make it lighter and simpler. This patch was tested on a powerpc box, it was compiled with both the MM_OWNER config turned on and off. After the thread group leader exits, it's moved to init_css_state by cgroup_exit(), thus all future charges from runnings threads would be redirected to the init_css_set's subsystem. Signed-off-by: Balbir Singh Cc: Pavel Emelianov Cc: Hugh Dickins Cc: Sudhir Kumar Cc: YAMAMOTO Takashi Cc: Hirokazu Takahashi Cc: David Rientjes , Cc: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Acked-by: Pekka Enberg Reviewed-by: Paul Menage Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 2a9d98c641ac..ae0f2c4e452b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk) EXPORT_SYMBOL_GPL(exit_fs); +#ifdef CONFIG_MM_OWNER +/* + * Task p is exiting and it owned mm, lets find a new owner for it + */ +static inline int +mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) +{ + /* + * If there are other users of the mm and the owner (us) is exiting + * we need to find a new owner to take on the responsibility. + */ + if (!mm) + return 0; + if (atomic_read(&mm->mm_users) <= 1) + return 0; + if (mm->owner != p) + return 0; + return 1; +} + +void mm_update_next_owner(struct mm_struct *mm) +{ + struct task_struct *c, *g, *p = current; + +retry: + if (!mm_need_new_owner(mm, p)) + return; + + read_lock(&tasklist_lock); + /* + * Search in the children + */ + list_for_each_entry(c, &p->children, sibling) { + if (c->mm == mm) + goto assign_new_owner; + } + + /* + * Search in the siblings + */ + list_for_each_entry(c, &p->parent->children, sibling) { + if (c->mm == mm) + goto assign_new_owner; + } + + /* + * Search through everything else. We should not get + * here often + */ + do_each_thread(g, c) { + if (c->mm == mm) + goto assign_new_owner; + } while_each_thread(g, c); + + read_unlock(&tasklist_lock); + return; + +assign_new_owner: + BUG_ON(c == p); + get_task_struct(c); + /* + * The task_lock protects c->mm from changing. + * We always want mm->owner->mm == mm + */ + task_lock(c); + /* + * Delay read_unlock() till we have the task_lock() + * to ensure that c does not slip away underneath us + */ + read_unlock(&tasklist_lock); + if (c->mm != mm) { + task_unlock(c); + put_task_struct(c); + goto retry; + } + cgroup_mm_owner_callbacks(mm->owner, c); + mm->owner = c; + task_unlock(c); + put_task_struct(c); +} +#endif /* CONFIG_MM_OWNER */ + /* * Turn us into a lazy TLB process if we * aren't already.. @@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk) /* We don't want this task to be frozen prematurely */ clear_freeze_flag(tsk); task_unlock(tsk); + mm_update_next_owner(mm); mmput(mm); } -- cgit v1.2.3 From bfc4b0890af566940de6e7aeb4b5faf46d3c3513 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:52:36 -0700 Subject: signals: do_group_exit(): use signal_group_exit() more consistently do_group_exit() checks SIGNAL_GROUP_EXIT to avoid taking sighand->siglock. Since ed5d2cac114202fe2978a9cbcab8f5032796d538 exec() doesn't set this flag, we should use signal_group_exit(). This is not needed for correctness, but can speedup the multithreaded exec and makes the code more consistent. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index ae0f2c4e452b..6d019aa8522e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1115,12 +1115,13 @@ asmlinkage long sys_exit(int error_code) NORET_TYPE void do_group_exit(int exit_code) { + struct signal_struct *sig = current->signal; + BUG_ON(exit_code & 0x80); /* core dumps don't get here */ - if (current->signal->flags & SIGNAL_GROUP_EXIT) - exit_code = current->signal->group_exit_code; + if (signal_group_exit(sig)) + exit_code = sig->group_exit_code; else if (!thread_group_empty(current)) { - struct signal_struct *const sig = current->signal; struct sighand_struct *const sighand = current->sighand; spin_lock_irq(&sighand->siglock); if (signal_group_exit(sig)) -- cgit v1.2.3 From d839fd4d2e95a5fbc4d50aa9d17eed6a5f2094e6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:11 -0700 Subject: ptrace: introduce task_detached() helper exit.c has numerous "->exit_signal == -1" comparisons, this check is subtle and deserves a helper. Imho makes the code more parseable for humans. At least it's surely more greppable. Also, a couple of whitespace cleanups. No functional changes. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 6d019aa8522e..4035d391a0d3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -52,6 +52,11 @@ static void exit_mm(struct task_struct * tsk); +static inline int task_detached(struct task_struct *p) +{ + return p->exit_signal == -1; +} + static void __unhash_process(struct task_struct *p) { nr_threads--; @@ -160,7 +165,7 @@ repeat: zap_leader = 0; leader = p->group_leader; if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { - BUG_ON(leader->exit_signal == -1); + BUG_ON(task_detached(leader)); do_notify_parent(leader, leader->exit_signal); /* * If we were the last child thread and the leader has @@ -170,7 +175,7 @@ repeat: * do_notify_parent() will have marked it self-reaping in * that case. */ - zap_leader = (leader->exit_signal == -1); + zap_leader = task_detached(leader); } write_unlock_irq(&tasklist_lock); @@ -721,14 +726,14 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) return; /* We don't want people slaying init. */ - if (p->exit_signal != -1) + if (!task_detached(p)) p->exit_signal = SIGCHLD; /* If we'd notified the old parent about this child's death, * also notify the new parent. */ if (!traced && p->exit_state == EXIT_ZOMBIE && - p->exit_signal != -1 && thread_group_empty(p)) + !task_detached(p) && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); kill_orphaned_pgrp(p, father); @@ -781,18 +786,18 @@ static void forget_original_parent(struct task_struct *father) } else { /* reparent ptraced task to its real parent */ __ptrace_unlink (p); - if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 && + if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); } /* - * if the ptraced child is a zombie with exit_signal == -1 - * we must collect it before we exit, or it will remain - * zombie forever since we prevented it from self-reap itself - * while it was being traced by us, to be able to see it in wait4. + * if the ptraced child is a detached zombie we must collect + * it before we exit, or it will remain zombie forever since + * we prevented it from self-reap itself while it was being + * traced by us, to be able to see it in wait4. */ - if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1)) + if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p))) list_add(&p->ptrace_list, &ptrace_dead); } @@ -849,26 +854,26 @@ static void exit_notify(struct task_struct *tsk, int group_dead) * we have changed execution domain as these two values started * the same after a fork. */ - if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 && + if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && (tsk->parent_exec_id != tsk->real_parent->self_exec_id || - tsk->self_exec_id != tsk->parent_exec_id) - && !capable(CAP_KILL)) + tsk->self_exec_id != tsk->parent_exec_id) && + !capable(CAP_KILL)) tsk->exit_signal = SIGCHLD; - /* If something other than our normal parent is ptracing us, then * send it a SIGCHLD instead of honoring exit_signal. exit_signal * only has special meaning to our real parent. */ - if (tsk->exit_signal != -1 && thread_group_empty(tsk)) { - int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD; + if (!task_detached(tsk) && thread_group_empty(tsk)) { + int signal = (tsk->parent == tsk->real_parent) + ? tsk->exit_signal : SIGCHLD; do_notify_parent(tsk, signal); } else if (tsk->ptrace) { do_notify_parent(tsk, SIGCHLD); } state = EXIT_ZOMBIE; - if (tsk->exit_signal == -1 && likely(!tsk->ptrace)) + if (task_detached(tsk) && likely(!tsk->ptrace)) state = EXIT_DEAD; tsk->exit_state = state; @@ -1173,7 +1178,7 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, * Do not consider detached threads that are * not ptraced: */ - if (p->exit_signal == -1 && !p->ptrace) + if (task_detached(p) && !p->ptrace) return 0; /* Wait for all children (clone and not) if __WALL is set; @@ -1365,9 +1370,9 @@ static int wait_task_zombie(struct task_struct *p, int noreap, * If it's still not detached after that, don't release * it now. */ - if (p->exit_signal != -1) { + if (!task_detached(p)) { do_notify_parent(p, p->exit_signal); - if (p->exit_signal != -1) { + if (!task_detached(p)) { p->exit_state = EXIT_ZOMBIE; p = NULL; } -- cgit v1.2.3 From 376e1d2531860358c8a79fecf5f4f42994d03c4d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:12 -0700 Subject: reparent_thread: use same_thread_group() Trivial, use same_thread_group() in reparent_thread(). Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 4035d391a0d3..413c81ec858e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -722,7 +722,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) /* If this is a threaded reparent there is no need to * notify anyone anything has happened. */ - if (p->real_parent->group_leader == father->group_leader) + if (same_thread_group(p->real_parent, father)) return; /* We don't want people slaying init. */ -- cgit v1.2.3 From 2800d8d19e51414403df8144eaa214bb03400b87 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:12 -0700 Subject: document de_thread() with exit_notify() connection Add a couple of small comments, it is not easy to see what this code does. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 413c81ec858e..879ed6e1c883 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -877,6 +877,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) state = EXIT_DEAD; tsk->exit_state = state; + /* mt-exec, de_thread() is waiting for us */ if (thread_group_leader(tsk) && tsk->signal->notify_count < 0 && tsk->signal->group_exit_task) -- cgit v1.2.3 From 53b6f9fbd3b63af14b4f6268e8b5b80d178d05bc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:13 -0700 Subject: ptrace: introduce ptrace_reparented() helper Add another trivial helper for the sake of grep. It also auto-documents the fact that ->parent != real_parent implies ->ptrace. No functional changes. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 879ed6e1c883..0da2921b1e7f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -698,7 +698,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) if (unlikely(traced)) { /* Preserve ptrace links if someone else is tracing this child. */ list_del_init(&p->ptrace_list); - if (p->parent != p->real_parent) + if (ptrace_reparented(p)) list_add(&p->ptrace_list, &p->real_parent->ptrace_children); } else { /* If this child is being traced, then we're the one tracing it @@ -865,8 +865,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead) * only has special meaning to our real parent. */ if (!task_detached(tsk) && thread_group_empty(tsk)) { - int signal = (tsk->parent == tsk->real_parent) - ? tsk->exit_signal : SIGCHLD; + int signal = ptrace_reparented(tsk) ? + SIGCHLD : tsk->exit_signal; do_notify_parent(tsk, signal); } else if (tsk->ptrace) { do_notify_parent(tsk, SIGCHLD); @@ -1269,8 +1269,7 @@ static int wait_task_zombie(struct task_struct *p, int noreap, return 0; } - /* traced means p->ptrace, but not vice versa */ - traced = (p->real_parent != p->parent); + traced = ptrace_reparented(p); if (likely(!traced)) { struct signal_struct *psig; -- cgit v1.2.3 From 7d8da0962eaee30b4a380ded177349bfbdd6ac46 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:54:27 -0700 Subject: pids: __set_special_pids: use change_pid() helper Use change_pid() instead of detach_pid() + attach_pid() in __set_special_pids(). This way task_session() is not NULL in between. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 0da2921b1e7f..d3ad54677f9c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -334,13 +334,11 @@ void __set_special_pids(struct pid *pid) pid_t nr = pid_nr(pid); if (task_session(curr) != pid) { - detach_pid(curr, PIDTYPE_SID); - attach_pid(curr, PIDTYPE_SID, pid); + change_pid(curr, PIDTYPE_SID, pid); set_task_session(curr, nr); } if (task_pgrp(curr) != pid) { - detach_pid(curr, PIDTYPE_PGID); - attach_pid(curr, PIDTYPE_PGID, pid); + change_pid(curr, PIDTYPE_PGID, pid); set_task_pgrp(curr, nr); } } -- cgit v1.2.3 From 9f3acc3140444a900ab280de942291959f0f615d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Apr 2008 07:44:08 -0400 Subject: [PATCH] split linux/file.h Initial splitoff of the low-level stuff; taken to fdtable.h Signed-off-by: Al Viro --- kernel/exit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index d3ad54677f9c..1510f78a0ffa 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3