From e25e2cbb4c6679bed5f52fb0f2cc381688297901 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 12 Dec 2011 18:12:21 -0800 Subject: cgroup: add cgroup_root_mutex cgroup wants to make threadgroup stable while modifying cgroup hierarchies which will introduce locking dependency on cred_guard_mutex from cgroup_mutex. This unfortunately completes circular dependency. A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem B. namespace_sem -> cgroup_mutex B is from cgroup_show_options() and this patch breaks it by introducing another mutex cgroup_root_mutex which nests inside cgroup_mutex and protects cgroupfs_root. Signed-off-by: Tejun Heo Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Li Zefan Cc: Oleg Nesterov --- kernel/cgroup.c | 64 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 22 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d9d5648f3cdc..6545fd61b10d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -63,7 +63,24 @@ #include +/* + * cgroup_mutex is the master lock. Any modification to cgroup or its + * hierarchy must be performed while holding it. + * + * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify + * cgroupfs_root of any cgroup hierarchy - subsys list, flags, + * release_agent_path and so on. Modifying requires both cgroup_mutex and + * cgroup_root_mutex. Readers can acquire either of the two. This is to + * break the following locking order cycle. + * + * A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem + * B. namespace_sem -> cgroup_mutex + * + * B happens only through cgroup_show_options() and using cgroup_root_mutex + * breaks it. + */ static DEFINE_MUTEX(cgroup_mutex); +static DEFINE_MUTEX(cgroup_root_mutex); /* * Generate an array of cgroup subsystem pointers. At boot time, this is @@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, int i; BUG_ON(!mutex_is_locked(&cgroup_mutex)); + BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); removed_bits = root->actual_subsys_bits & ~final_bits; added_bits = final_bits & ~root->actual_subsys_bits; @@ -1043,7 +1061,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info; struct cgroup_subsys *ss; - mutex_lock(&cgroup_mutex); + mutex_lock(&cgroup_root_mutex); for_each_subsys(root, ss) seq_printf(seq, ",%s", ss->name); if (test_bit(ROOT_NOPREFIX, &root->flags)) @@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",clone_children"); if (strlen(root->name)) seq_printf(seq, ",name=%s", root->name); - mutex_unlock(&cgroup_mutex); + mutex_unlock(&cgroup_root_mutex); return 0; } @@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) mutex_lock(&cgrp->dentry->d_inode->i_mutex); mutex_lock(&cgroup_mutex); + mutex_lock(&cgroup_root_mutex); /* See what subsystems are wanted */ ret = parse_cgroupfs_options(data, &opts); @@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) out_unlock: kfree(opts.release_agent); kfree(opts.name); + mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_mutex); mutex_unlock(&cgrp->dentry->d_inode->i_mutex); return ret; @@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, int ret = 0; struct super_block *sb; struct cgroupfs_root *new_root; + struct inode *inode; /* First find the desired set of subsystems */ mutex_lock(&cgroup_mutex); @@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, /* We used the new root structure, so this is a new hierarchy */ struct list_head tmp_cg_links; struct cgroup *root_cgrp = &root->top_cgroup; - struct inode *inode; struct cgroupfs_root *existing_root; const struct cred *cred; int i; @@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, mutex_lock(&inode->i_mutex); mutex_lock(&cgroup_mutex); + mutex_lock(&cgroup_root_mutex); - if (strlen(root->name)) { - /* Check for name clashes with existing mounts */ - for_each_active_root(existing_root) { - if (!strcmp(existing_root->name, root->name)) { - ret = -EBUSY; - mutex_unlock(&cgroup_mutex); - mutex_unlock(&inode->i_mutex); - goto drop_new_super; - } - } - } + /* Check for name clashes with existing mounts */ + ret = -EBUSY; + if (strlen(root->name)) + for_each_active_root(existing_root) + if (!strcmp(existing_root->name, root->name)) + goto unlock_drop; /* * We're accessing css_set_count without locking @@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, * have some link structures left over */ ret = allocate_cg_links(css_set_count, &tmp_cg_links); - if (ret) { - mutex_unlock(&cgroup_mutex); - mutex_unlock(&inode->i_mutex); - goto drop_new_super; - } + if (ret) + goto unlock_drop; ret = rebind_subsystems(root, root->subsys_bits); if (ret == -EBUSY) { - mutex_unlock(&cgroup_mutex); - mutex_unlock(&inode->i_mutex); free_cg_links(&tmp_cg_links); - goto drop_new_super; + goto unlock_drop; } /* * There must be no failure case after here, since rebinding @@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, cred = override_creds(&init_cred); cgroup_populate_dir(root_cgrp); revert_creds(cred); + mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_mutex); mutex_unlock(&inode->i_mutex); } else { @@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, kfree(opts.name); return dget(sb->s_root); + unlock_drop: + mutex_unlock(&cgroup_root_mutex); + mutex_unlock(&cgroup_mutex); + mutex_unlock(&inode->i_mutex); drop_new_super: deactivate_locked_super(sb); drop_modules: @@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) { BUG_ON(!list_empty(&cgrp->sibling)); mutex_lock(&cgroup_mutex); + mutex_lock(&cgroup_root_mutex); /* Rebind all subsystems back to the default hierarchy */ ret = rebind_subsystems(root, 0); @@ -1664,6 +1681,7 @@ static void cgroup_kill_sb(struct super_block *sb) { root_count--; } + mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_mutex); kill_litter_super(sb); @@ -2311,7 +2329,9 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, return -EINVAL; if (!cgroup_lock_live_group(cgrp)) return -ENODEV; + mutex_lock(&cgroup_root_mutex); strcpy(cgrp->root->release_agent_path, buffer); + mutex_unlock(&cgroup_root_mutex); cgroup_unlock(); return 0; } -- cgit v1.2.3 From 257058ae2b971646b96ab3a15605ac69186e562a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 12 Dec 2011 18:12:21 -0800 Subject: threadgroup: rename signal->threadgroup_fork_lock to ->group_rwsem Make the following renames to prepare for extension of threadgroup locking. * s/signal->threadgroup_fork_lock/signal->group_rwsem/ * s/threadgroup_fork_read_lock()/threadgroup_change_begin()/ * s/threadgroup_fork_read_unlock()/threadgroup_change_end()/ * s/threadgroup_fork_write_lock()/threadgroup_lock()/ * s/threadgroup_fork_write_unlock()/threadgroup_unlock()/ This patch doesn't cause any behavior change. -v2: Rename threadgroup_change_done() to threadgroup_change_end() per KAMEZAWA's suggestion. Signed-off-by: Tejun Heo Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Li Zefan Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage --- kernel/cgroup.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6545fd61b10d..b409df3b2e9d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2003,8 +2003,8 @@ static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, * @cgrp: the cgroup to attach to * @leader: the threadgroup leader task_struct of the group to be attached * - * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will - * take task_lock of each thread in leader's threadgroup individually in turn. + * Call holding cgroup_mutex and the group_rwsem of the leader. Will take + * task_lock of each thread in leader's threadgroup individually in turn. */ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) { @@ -2030,8 +2030,8 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) * step 0: in order to do expensive, possibly blocking operations for * every thread, we cannot iterate the thread group list, since it needs * rcu or tasklist locked. instead, build an array of all threads in the - * group - threadgroup_fork_lock prevents new threads from appearing, - * and if threads exit, this will just be an over-estimate. + * group - group_rwsem prevents new threads from appearing, and if + * threads exit, this will just be an over-estimate. */ group_size = get_nr_threads(leader); /* flex_array supports very large thread-groups better than kmalloc. */ @@ -2249,7 +2249,6 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) cgroup_unlock(); return -ESRCH; } - /* * even if we're attaching all tasks in the thread group, we * only need to check permissions on one of them. @@ -2273,9 +2272,9 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) } if (threadgroup) { - threadgroup_fork_write_lock(tsk); + threadgroup_lock(tsk); ret = cgroup_attach_proc(cgrp, tsk); - threadgroup_fork_write_unlock(tsk); + threadgroup_unlock(tsk); } else { ret = cgroup_attach_task(cgrp, tsk); } -- cgit v1.2.3 From cd3d095275374220921fcf0d4e0c16584b26ddbc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 12 Dec 2011 18:12:21 -0800 Subject: cgroup: always lock threadgroup during migration Update cgroup to take advantage of the fack that threadgroup_lock() guarantees stable threadgroup. * Lock threadgroup even if the target is a single task. This guarantees that when the target tasks stay stable during migration regardless of the target type. * Remove PF_EXITING early exit optimization from attach_task_by_pid() and check it in cgroup_task_migrate() instead. The optimization was for rather cold path to begin with and PF_EXITING state can be trusted throughout migration by checking it after locking threadgroup. * Don't add PF_EXITING tasks to target task array in cgroup_attach_proc(). This ensures that task migration is performed only for live tasks. * Remove -ESRCH failure path from cgroup_task_migrate(). With the above changes, it's guaranteed to be called only for live tasks. After the changes, only live tasks are migrated and they're guaranteed to stay alive until migration is complete. This removes problems caused by exec and exit racing against cgroup migration including symmetry among cgroup attach methods and different cgroup methods racing each other. v2: Oleg pointed out that one more PF_EXITING check can be removed from cgroup_attach_proc(). Removed. Signed-off-by: Tejun Heo Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Frederic Weisbecker Acked-by: Li Zefan Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage --- kernel/cgroup.c | 62 +++++++++++++++++++++++++-------------------------------- 1 file changed, 27 insertions(+), 35 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b409df3b2e9d..d71e012e81be 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1762,7 +1762,7 @@ EXPORT_SYMBOL_GPL(cgroup_path); * * 'guarantee' is set if the caller promises that a new css_set for the task * will already exist. If not set, this function might sleep, and can fail with - * -ENOMEM. Otherwise, it can only fail with -ESRCH. + * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked. */ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, struct task_struct *tsk, bool guarantee) @@ -1800,13 +1800,9 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, } put_css_set(oldcg); - /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */ + /* @tsk can't exit as its threadgroup is locked */ task_lock(tsk); - if (tsk->flags & PF_EXITING) { - task_unlock(tsk); - put_css_set(newcg); - return -ESRCH; - } + WARN_ON_ONCE(tsk->flags & PF_EXITING); rcu_assign_pointer(tsk->cgroups, newcg); task_unlock(tsk); @@ -1832,8 +1828,8 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, * @cgrp: the cgroup the task is attaching to * @tsk: the task to be attached * - * Call holding cgroup_mutex. May take task_lock of - * the task 'tsk' during call. + * Call with cgroup_mutex and threadgroup locked. May take task_lock of + * @tsk during call. */ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { @@ -1842,6 +1838,10 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) struct cgroup *oldcgrp; struct cgroupfs_root *root = cgrp->root; + /* @tsk either already exited or can't exit until the end */ + if (tsk->flags & PF_EXITING) + return -ESRCH; + /* Nothing to do if the task is already in that cgroup */ oldcgrp = task_cgroup_from_root(tsk, root); if (cgrp == oldcgrp) @@ -2062,6 +2062,10 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) tsk = leader; i = 0; do { + /* @tsk either already exited or can't exit until the end */ + if (tsk->flags & PF_EXITING) + continue; + /* as per above, nr_threads may decrease, but not increase. */ BUG_ON(i >= group_size); get_task_struct(tsk); @@ -2116,11 +2120,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) continue; /* get old css_set pointer */ task_lock(tsk); - if (tsk->flags & PF_EXITING) { - /* ignore this task if it's going away */ - task_unlock(tsk); - continue; - } oldcg = tsk->cgroups; get_css_set(oldcg); task_unlock(tsk); @@ -2153,16 +2152,12 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) oldcgrp = task_cgroup_from_root(tsk, root); if (cgrp == oldcgrp) continue; - /* if the thread is PF_EXITING, it can just get skipped. */ retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); - if (retval == 0) { - /* attach each task to each subsystem */ - for_each_subsys(root, ss) { - if (ss->attach_task) - ss->attach_task(cgrp, tsk); - } - } else { - BUG_ON(retval != -ESRCH); + BUG_ON(retval); + /* attach each task to each subsystem */ + for_each_subsys(root, ss) { + if (ss->attach_task) + ss->attach_task(cgrp, tsk); } } /* nothing is sensitive to fork() after this point. */ @@ -2215,8 +2210,8 @@ out_free_group_list: /* * Find the task_struct of the task to attach by vpid and pass it along to the - * function to attach either it or all tasks in its threadgroup. Will take - * cgroup_mutex; may take task_lock of task. + * function to attach either it or all tasks in its threadgroup. Will lock + * cgroup_mutex and threadgroup; may take task_lock of task. */ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) { @@ -2243,11 +2238,6 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) * detect it later. */ tsk = tsk->group_leader; - } else if (tsk->flags & PF_EXITING) { - /* optimization for the single-task-only case */ - rcu_read_unlock(); - cgroup_unlock(); - return -ESRCH; } /* * even if we're attaching all tasks in the thread group, we @@ -2271,13 +2261,15 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) get_task_struct(tsk); } - if (threadgroup) { - threadgroup_lock(tsk); + threadgroup_lock(tsk); + + if (threadgroup) ret = cgroup_attach_proc(cgrp, tsk); - threadgroup_unlock(tsk); - } else { + else ret = cgroup_attach_task(cgrp, tsk); - } + + threadgroup_unlock(tsk); + put_task_struct(tsk); cgroup_unlock(); return ret; -- cgit v1.2.3 From 134d33737f9015761c3832f6b268fae6274aac7f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 12 Dec 2011 18:12:21 -0800 Subject: cgroup: improve old cgroup handling in cgroup_attach_proc() cgroup_attach_proc() behaves differently from cgroup_attach_task() in the following aspects. * All hooks are invoked even if no task is actually being moved. * ->can_attach_task() is called for all tasks in the group whether the new cgrp is different from the current cgrp or not; however, ->attach_task() is skipped if new equals new. This makes the calls asymmetric. This patch improves old cgroup handling in cgroup_attach_proc() by looking up the current cgroup at the head, recording it in the flex array along with the task itself, and using it to remove the above two differences. This will also ease further changes. -v2: nr_todo renamed to nr_migrating_tasks as per Paul Menage's suggestion. Signed-off-by: Tejun Heo Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Frederic Weisbecker Acked-by: Paul Menage Acked-by: Li Zefan --- kernel/cgroup.c | 66 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 24 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d71e012e81be..0f2d00519d37 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1757,6 +1757,11 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) } EXPORT_SYMBOL_GPL(cgroup_path); +struct task_and_cgroup { + struct task_struct *task; + struct cgroup *cgrp; +}; + /* * cgroup_task_migrate - move a task from one cgroup to another. * @@ -2008,15 +2013,15 @@ static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, */ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) { - int retval, i, group_size; + int retval, i, group_size, nr_migrating_tasks; struct cgroup_subsys *ss, *failed_ss = NULL; bool cancel_failed_ss = false; /* guaranteed to be initialized later, but the compiler needs this */ - struct cgroup *oldcgrp = NULL; struct css_set *oldcg; struct cgroupfs_root *root = cgrp->root; /* threadgroup list cursor and array */ struct task_struct *tsk; + struct task_and_cgroup *tc; struct flex_array *group; /* * we need to make sure we have css_sets for all the tasks we're @@ -2035,8 +2040,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) */ group_size = get_nr_threads(leader); /* flex_array supports very large thread-groups better than kmalloc. */ - group = flex_array_alloc(sizeof(struct task_struct *), group_size, - GFP_KERNEL); + group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL); if (!group) return -ENOMEM; /* pre-allocate to guarantee space while iterating in rcu read-side. */ @@ -2060,8 +2064,10 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) } /* take a reference on each task in the group to go in the array. */ tsk = leader; - i = 0; + i = nr_migrating_tasks = 0; do { + struct task_and_cgroup ent; + /* @tsk either already exited or can't exit until the end */ if (tsk->flags & PF_EXITING) continue; @@ -2073,14 +2079,23 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) * saying GFP_ATOMIC has no effect here because we did prealloc * earlier, but it's good form to communicate our expectations. */ - retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC); + ent.task = tsk; + ent.cgrp = task_cgroup_from_root(tsk, root); + retval = flex_array_put(group, i, &ent, GFP_ATOMIC); BUG_ON(retval != 0); i++; + if (ent.cgrp != cgrp) + nr_migrating_tasks++; } while_each_thread(leader, tsk); /* remember the number of threads in the array for later. */ group_size = i; read_unlock(&tasklist_lock); + /* methods shouldn't be called if no task is actually migrating */ + retval = 0; + if (!nr_migrating_tasks) + goto out_put_tasks; + /* * step 1: check that we can legitimately attach to the cgroup. */ @@ -2096,8 +2111,10 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) if (ss->can_attach_task) { /* run on each task in the threadgroup. */ for (i = 0; i < group_size; i++) { - tsk = flex_array_get_ptr(group, i); - retval = ss->can_attach_task(cgrp, tsk); + tc = flex_array_get(group, i); + if (tc->cgrp == cgrp) + continue; + retval = ss->can_attach_task(cgrp, tc->task); if (retval) { failed_ss = ss; cancel_failed_ss = true; @@ -2113,18 +2130,17 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) */ INIT_LIST_HEAD(&newcg_list); for (i = 0; i < group_size; i++) { - tsk = flex_array_get_ptr(group, i); + tc = flex_array_get(group, i); /* nothing to do if this task is already in the cgroup */ - oldcgrp = task_cgroup_from_root(tsk, root); - if (cgrp == oldcgrp) + if (tc->cgrp == cgrp) continue; /* get old css_set pointer */ - task_lock(tsk); - oldcg = tsk->cgroups; + task_lock(tc->task); + oldcg = tc->task->cgroups; get_css_set(oldcg); - task_unlock(tsk); + task_unlock(tc->task); /* see if the new one for us is already in the list? */ - if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) { + if (css_set_check_fetched(cgrp, tc->task, oldcg, &newcg_list)) { /* was already there, nothing to do. */ put_css_set(oldcg); } else { @@ -2147,17 +2163,16 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) ss->pre_attach(cgrp); } for (i = 0; i < group_size; i++) { - tsk = flex_array_get_ptr(group, i); + tc = flex_array_get(group, i); /* leave current thread as it is if it's already there */ - oldcgrp = task_cgroup_from_root(tsk, root); - if (cgrp == oldcgrp) + if (tc->cgrp == cgrp) continue; - retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); + retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true); BUG_ON(retval); /* attach each task to each subsystem */ for_each_subsys(root, ss) { if (ss->attach_task) - ss->attach_task(cgrp, tsk); + ss->attach_task(cgrp, tc->task); } } /* nothing is sensitive to fork() after this point. */ @@ -2168,8 +2183,10 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) * being moved, this call will need to be reworked to communicate that. */ for_each_subsys(root, ss) { - if (ss->attach) - ss->attach(ss, cgrp, oldcgrp, leader); + if (ss->attach) { + tc = flex_array_get(group, 0); + ss->attach(ss, cgrp, tc->cgrp, tc->task); + } } /* @@ -2198,10 +2215,11 @@ out_cancel_attach: ss->cancel_attach(ss, cgrp, leader); } } +out_put_tasks: /* clean up the array of referenced threads in the group. */ for (i = 0; i < group_size; i++) { - tsk = flex_array_get_ptr(group, i); - put_task_struct(tsk); + tc = flex_array_get(group, i); + put_task_struct(tc->task); } out_free_group_list: flex_array_free(group); -- cgit v1.2.3 From 2f7ee5691eecb67c8108b92001a85563ea336ac5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 12 Dec 2011 18:12:21 -0800 Subject: cgroup: introduce cgroup_taskset and use it in subsys->can_attach(), cancel_attach() and attach() Currently, there's no way to pass multiple tasks to cgroup_subsys methods necessitating the need for separate per-process and per-task methods. This patch introduces cgroup_taskset which can be used to pass multiple tasks and their associated cgroups to cgroup_subsys methods. Three methods - can_attach(), cancel_attach() and attach() - are converted to use cgroup_taskset. This unifies passed parameters so that all methods have access to all information. Conversions in this patchset are identical and don't introduce any behavior change. -v2: documentation updated as per Paul Menage's suggestion. Signed-off-by: Tejun Heo Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Frederic Weisbecker Acked-by: Paul Menage Acked-by: Li Zefan Cc: Balbir Singh Cc: Daisuke Nishimura Cc: KAMEZAWA Hiroyuki Cc: James Morris --- kernel/cgroup.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 10 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0f2d00519d37..41ee01e392e6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1757,11 +1757,85 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) } EXPORT_SYMBOL_GPL(cgroup_path); +/* + * Control Group taskset + */ struct task_and_cgroup { struct task_struct *task; struct cgroup *cgrp; }; +struct cgroup_taskset { + struct task_and_cgroup single; + struct flex_array *tc_array; + int tc_array_len; + int idx; + struct cgroup *cur_cgrp; +}; + +/** + * cgroup_taskset_first - reset taskset and return the first task + * @tset: taskset of interest + * + * @tset iteration is initialized and the first task is returned. + */ +struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset) +{ + if (tset->tc_array) { + tset->idx = 0; + return cgroup_taskset_next(tset); + } else { + tset->cur_cgrp = tset->single.cgrp; + return tset->single.task; + } +} +EXPORT_SYMBOL_GPL(cgroup_taskset_first); + +/** + * cgroup_taskset_next - iterate to the next task in taskset + * @tset: taskset of interest + * + * Return the next task in @tset. Iteration must have been initialized + * with cgroup_taskset_first(). + */ +struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset) +{ + struct task_and_cgroup *tc; + + if (!tset->tc_array || tset->idx >= tset->tc_array_len) + return NULL; + + tc = flex_array_get(tset->tc_array, tset->idx++); + tset->cur_cgrp = tc->cgrp; + return tc->task; +} +EXPORT_SYMBOL_GPL(cgroup_taskset_next); + +/** + * cgroup_taskset_cur_cgroup - return the matching cgroup for the current task + * @tset: taskset of interest + * + * Return the cgroup for the current (last returned) task of @tset. This + * function must be preceded by either cgroup_taskset_first() or + * cgroup_taskset_next(). + */ +struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset) +{ + return tset->cur_cgrp; +} +EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup); + +/** + * cgroup_taskset_size - return the number of tasks in taskset + * @tset: taskset of interest + */ +int cgroup_taskset_size(struct cgroup_taskset *tset) +{ + return tset->tc_array ? tset->tc_array_len : 1; +} +EXPORT_SYMBOL_GPL(cgroup_taskset_size); + + /* * cgroup_task_migrate - move a task from one cgroup to another. * @@ -1842,6 +1916,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) struct cgroup_subsys *ss, *failed_ss = NULL; struct cgroup *oldcgrp; struct cgroupfs_root *root = cgrp->root; + struct cgroup_taskset tset = { }; /* @tsk either already exited or can't exit until the end */ if (tsk->flags & PF_EXITING) @@ -1852,9 +1927,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) if (cgrp == oldcgrp) return 0; + tset.single.task = tsk; + tset.single.cgrp = oldcgrp; + for_each_subsys(root, ss) { if (ss->can_attach) { - retval = ss->can_attach(ss, cgrp, tsk); + retval = ss->can_attach(ss, cgrp, &tset); if (retval) { /* * Remember on which subsystem the can_attach() @@ -1885,7 +1963,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) if (ss->attach_task) ss->attach_task(cgrp, tsk); if (ss->attach) - ss->attach(ss, cgrp, oldcgrp, tsk); + ss->attach(ss, cgrp, &tset); } synchronize_rcu(); @@ -1907,7 +1985,7 @@ out: */ break; if (ss->cancel_attach) - ss->cancel_attach(ss, cgrp, tsk); + ss->cancel_attach(ss, cgrp, &tset); } } return retval; @@ -2023,6 +2101,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) struct task_struct *tsk; struct task_and_cgroup *tc; struct flex_array *group; + struct cgroup_taskset tset = { }; /* * we need to make sure we have css_sets for all the tasks we're * going to move -before- we actually start moving them, so that in @@ -2089,6 +2168,8 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) } while_each_thread(leader, tsk); /* remember the number of threads in the array for later. */ group_size = i; + tset.tc_array = group; + tset.tc_array_len = group_size; read_unlock(&tasklist_lock); /* methods shouldn't be called if no task is actually migrating */ @@ -2101,7 +2182,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) */ for_each_subsys(root, ss) { if (ss->can_attach) { - retval = ss->can_attach(ss, cgrp, leader); + retval = ss->can_attach(ss, cgrp, &tset); if (retval) { failed_ss = ss; goto out_cancel_attach; @@ -2183,10 +2264,8 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) * being moved, this call will need to be reworked to communicate that. */ for_each_subsys(root, ss) { - if (ss->attach) { - tc = flex_array_get(group, 0); - ss->attach(ss, cgrp, tc->cgrp, tc->task); - } + if (ss->attach) + ss->attach(ss, cgrp, &tset); } /* @@ -2208,11 +2287,11 @@ out_cancel_attach: for_each_subsys(root, ss) { if (ss == failed_ss) { if (cancel_failed_ss && ss->cancel_attach) - ss->cancel_attach(ss, cgrp, leader); + ss->cancel_attach(ss, cgrp, &tset); break; } if (ss->cancel_attach) - ss->cancel_attach(ss, cgrp, leader); + ss->cancel_attach(ss, cgrp, &tset); } } out_put_tasks: -- cgit v1.2.3 From 494c167cf76d02000adf740c215adc69a824ecc9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 12 Dec 2011 18:12:22 -0800 Subject: cgroup: kill subsys->can_attach_task(), pre_attach() and attach_task() These three methods are no longer used. Kill them. Signed-off-by: Tejun Heo Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Frederic Weisbecker Acked-by: Paul Menage Cc: Li Zefan --- kernel/cgroup.c | 52 +++++----------------------------------------------- 1 file changed, 5 insertions(+), 47 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 41ee01e392e6..1b3b84174ead 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1944,13 +1944,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) goto out; } } - if (ss->can_attach_task) { - retval = ss->can_attach_task(cgrp, tsk); - if (retval) { - failed_ss = ss; - goto out; - } - } } retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); @@ -1958,10 +1951,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) goto out; for_each_subsys(root, ss) { - if (ss->pre_attach) - ss->pre_attach(cgrp); - if (ss->attach_task) - ss->attach_task(cgrp, tsk); if (ss->attach) ss->attach(ss, cgrp, &tset); } @@ -2093,7 +2082,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) { int retval, i, group_size, nr_migrating_tasks; struct cgroup_subsys *ss, *failed_ss = NULL; - bool cancel_failed_ss = false; /* guaranteed to be initialized later, but the compiler needs this */ struct css_set *oldcg; struct cgroupfs_root *root = cgrp->root; @@ -2188,21 +2176,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) goto out_cancel_attach; } } - /* a callback to be run on every thread in the threadgroup. */ - if (ss->can_attach_task) { - /* run on each task in the threadgroup. */ - for (i = 0; i < group_size; i++) { - tc = flex_array_get(group, i); - if (tc->cgrp == cgrp) - continue; - retval = ss->can_attach_task(cgrp, tc->task); - if (retval) { - failed_ss = ss; - cancel_failed_ss = true; - goto out_cancel_attach; - } - } - } } /* @@ -2234,15 +2207,10 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) } /* - * step 3: now that we're guaranteed success wrt the css_sets, proceed - * to move all tasks to the new cgroup, calling ss->attach_task for each - * one along the way. there are no failure cases after here, so this is - * the commit point. + * step 3: now that we're guaranteed success wrt the css_sets, + * proceed to move all tasks to the new cgroup. There are no + * failure cases after here, so this is the commit point. */ - for_each_subsys(root, ss) { - if (ss->pre_attach) - ss->pre_attach(cgrp); - } for (i = 0; i < group_size; i++) { tc = flex_array_get(group, i); /* leave current thread as it is if it's already there */ @@ -2250,18 +2218,11 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) continue; retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true); BUG_ON(retval); - /* attach each task to each subsystem */ - for_each_subsys(root, ss) { - if (ss->attach_task) - ss->attach_task(cgrp, tc->task); - } } /* nothing is sensitive to fork() after this point. */ /* - * step 4: do expensive, non-thread-specific subsystem callbacks. - * TODO: if ever a subsystem needs to know the oldcgrp for each task - * being moved, this call will need to be reworked to communicate that. + * step 4: do subsystem attach callbacks. */ for_each_subsys(root, ss) { if (ss->attach) @@ -2285,11 +2246,8 @@ out_cancel_attach: /* same deal as in cgroup_attach_task */ if (retval) { for_each_subsys(root, ss) { - if (ss == failed_ss) { - if (cancel_failed_ss && ss->cancel_attach) - ss->cancel_attach(ss, cgrp, &tset); + if (ss == failed_ss) break; - } if (ss->cancel_attach) ss->cancel_attach(ss, cgrp, &tset); } -- cgit v1.2.3 From 29e21368b9baf9c4b25060d65062da2dda926c70 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Thu, 15 Dec 2011 14:21:26 -0800 Subject: cgroups: remove redundant get/put of css_set from css_set_check_fetched() We already have a reference to all elements in newcg_list. Signed-off-by: Mandeep Singh Baines Reviewed-by: Li Zefan Signed-off-by: Tejun Heo Cc: containers@lists.linux-foundation.org Cc: cgroups@vger.kernel.org Cc: Paul Menage --- kernel/cgroup.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1b3b84174ead..bc3caff138d8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2025,23 +2025,17 @@ static bool css_set_check_fetched(struct cgroup *cgrp, read_lock(&css_set_lock); newcg = find_existing_css_set(cg, cgrp, template); - if (newcg) - get_css_set(newcg); read_unlock(&css_set_lock); /* doesn't exist at all? */ if (!newcg) return false; /* see if it's already in the list */ - list_for_each_entry(cg_entry, newcg_list, links) { - if (cg_entry->cg == newcg) { - put_css_set(newcg); + list_for_each_entry(cg_entry, newcg_list, links) + if (cg_entry->cg == newcg) return true; - } - } /* not found */ - put_css_set(newcg); return false; } -- cgit v1.2.3 From 7e381b0eb1e1a9805c37335562e8dc02e7d7848c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 21 Dec 2011 20:03:19 +0100 Subject: cgroup: Drop task_lock(parent) on cgroup_fork() We don't need to hold the parent task_lock() on the parent in cgroup_fork() because we are already synchronized against the two places that may change the parent css_set concurrently: - cgroup_exit(), but the parent obviously can't exit concurrently - cgroup migration: we are synchronized against threadgroup_lock() So we can safely remove the task_lock() there. Signed-off-by: Frederic Weisbecker Reviewed-by: Li Zefan Signed-off-by: Tejun Heo Cc: Containers Cc: Cgroups Cc: KAMEZAWA Hiroyuki Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage Cc: Mandeep Singh Baines --- kernel/cgroup.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index bc3caff138d8..dae50d0d8e4b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4556,20 +4556,31 @@ static const struct file_operations proc_cgroupstats_operations = { * * A pointer to the shared css_set was automatically copied in * fork.c by dup_task_struct(). However, we ignore that copy, since - * it was not made under the protection of RCU or cgroup_mutex, so - * might no longer be a valid cgroup pointer. cgroup_attach_task() might - * have already changed current->cgroups, allowing the previously - * referenced cgroup group to be removed and freed. + * it was not made under the protection of RCU, cgroup_mutex or + * threadgroup_change_begin(), so it might no longer be a valid + * cgroup pointer. cgroup_attach_task() might have already changed + * current->cgroups, allowing the previously referenced cgroup + * group to be removed and freed. + * + * Outside the pointer validity we also need to process the css_set + * inheritance between threadgoup_change_begin() and + * threadgoup_change_end(), this way there is no leak in any process + * wide migration performed by cgroup_attach_proc() that could otherwise + * miss a thread because it is too early or too late in the fork stage. * * At the point that cgroup_fork() is called, 'current' is the parent * task, and the passed argument 'child' points to the child task. */ void cgroup_fork(struct task_struct *child) { - task_lock(current); + /* + * We don't need to task_lock() current because current->cgroups + * can't be changed concurrently here. The parent obviously hasn't + * exited and called cgroup_exit(), and we are synchronized against + * cgroup migration through threadgroup_change_begin(). + */ child->cgroups = current->cgroups; get_css_set(child->cgroups); - task_unlock(current); INIT_LIST_HEAD(&child->cg_list); } -- cgit v1.2.3 From c84cdf75ccb2845f690579e838f13f7e744e3d23 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 21 Dec 2011 20:03:18 +0100 Subject: cgroup: Remove unnecessary task_lock before fetching css_set on migration When we fetch the css_set of the tasks on cgroup migration, we don't need anymore to synchronize against cgroup_exit() that could swap the old one with init_css_set. Now that we are using threadgroup_lock() during the migrations, we don't need to worry about it anymore. Signed-off-by: Frederic Weisbecker Reviewed-by: Mandeep Singh Baines Reviewed-by: Li Zefan Signed-off-by: Tejun Heo Cc: Containers Cc: Cgroups Cc: KAMEZAWA Hiroyuki Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage --- kernel/cgroup.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index dae50d0d8e4b..4936d8886b4f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1850,14 +1850,14 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, struct css_set *newcg; /* - * get old css_set. we need to take task_lock and refcount it, because - * an exiting task can change its css_set to init_css_set and drop its - * old one without taking cgroup_mutex. + * get old css_set. We are synchronized through threadgroup_lock() + * against PF_EXITING setting such that we can't race against + * cgroup_exit() changing the css_set to init_css_set and dropping the + * old one. */ - task_lock(tsk); + WARN_ON_ONCE(tsk->flags & PF_EXITING); oldcg = tsk->cgroups; get_css_set(oldcg); - task_unlock(tsk); /* locate or allocate a new css_set for this task. */ if (guarantee) { @@ -1879,9 +1879,7 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, } put_css_set(oldcg); - /* @tsk can't exit as its threadgroup is locked */ task_lock(tsk); - WARN_ON_ONCE(tsk->flags & PF_EXITING); rcu_assign_pointer(tsk->cgroups, newcg); task_unlock(tsk); @@ -2182,11 +2180,13 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) /* nothing to do if this task is already in the cgroup */ if (tc->cgrp == cgrp) continue; - /* get old css_set pointer */ - task_lock(tc->task); + /* + * get old css_set pointer. threadgroup is locked so this is + * safe against concurrent cgroup_exit() changing this to + * init_css_set. + */ oldcg = tc->task->cgroups; get_css_set(oldcg); - task_unlock(tc->task); /* see if the new one for us is already in the list? */ if (css_set_check_fetched(cgrp, tc->task, oldcg, &newcg_list)) { /* was already there, nothing to do. */ -- cgit v1.2.3 From 026085ef5ae07c3197f2baacc091ce067b86ed11 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Wed, 21 Dec 2011 20:18:35 -0800 Subject: cgroup: remove redundant get/put of old css_set from migrate We can now assume that the css_set reference held by the task will not go away for an exiting task. PF_EXITING state can be trusted throughout migration by checking it after locking threadgroup. Changes in V4: * https://lkml.org/lkml/2011/12/20/368 (Tejun Heo) * Fix typo in commit message * Undid the rename of css_set_check_fetched * https://lkml.org/lkml/2011/12/20/427 (Li Zefan) * Fix comment in cgroup_task_migrate() Changes in V3: * https://lkml.org/lkml/2011/12/20/255 (Frederic Weisbecker) * Fixed to put error in retval Changes in V2: * https://lkml.org/lkml/2011/12/19/289 (Tejun Heo) * Updated commit message -tj: removed stale patch description about dropped function rename. Signed-off-by: Mandeep Singh Baines Acked-by: Li Zefan Signed-off-by: Tejun Heo Cc: containers@lists.linux-foundation.org Cc: cgroups@vger.kernel.org Cc: KAMEZAWA Hiroyuki Cc: Frederic Weisbecker Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage --- kernel/cgroup.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4936d8886b4f..82288088f6a5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1850,14 +1850,12 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, struct css_set *newcg; /* - * get old css_set. We are synchronized through threadgroup_lock() - * against PF_EXITING setting such that we can't race against - * cgroup_exit() changing the css_set to init_css_set and dropping the - * old one. + * We are synchronized through threadgroup_lock() against PF_EXITING + * setting such that we can't race against cgroup_exit() changing the + * css_set to init_css_set and dropping the old one. */ WARN_ON_ONCE(tsk->flags & PF_EXITING); oldcg = tsk->cgroups; - get_css_set(oldcg); /* locate or allocate a new css_set for this task. */ if (guarantee) { @@ -1872,12 +1870,9 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, might_sleep(); /* find_css_set will give us newcg already referenced. */ newcg = find_css_set(oldcg, cgrp); - if (!newcg) { - put_css_set(oldcg); + if (!newcg) return -ENOMEM; - } } - put_css_set(oldcg); task_lock(tsk); rcu_assign_pointer(tsk->cgroups, newcg); @@ -2186,18 +2181,11 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) * init_css_set. */ oldcg = tc->task->cgroups; - get_css_set(oldcg); - /* see if the new one for us is already in the list? */ - if (css_set_check_fetched(cgrp, tc->task, oldcg, &newcg_list)) { - /* was already there, nothing to do. */ - put_css_set(oldcg); - } else { - /* we don't already have it. get new one. */ - retval = css_set_prefetch(cgrp, oldcg, &newcg_list); - put_css_set(oldcg); - if (retval) + + /* if we don't already have it in the list get a new one */ + if (!css_set_check_fetched(cgrp, tc->task, oldcg, &newcg_list)) + if (retval = css_set_prefetch(cgrp, oldcg, &newcg_list)) goto out_list_teardown; - } } /* -- cgit v1.2.3 From b07ef7741122a83575499c11417e514877941e76 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Wed, 21 Dec 2011 20:18:36 -0800 Subject: cgroup: remove redundant get/put of task struct threadgroup_lock() guarantees that the target threadgroup will remain stable - no new task will be added, no new PF_EXITING will be set and exec won't happen. Changes in V2: * https://lkml.org/lkml/2011/12/20/369 (Tejun Heo) * Undo incorrect removal of get/put from attach_task_by_pid() * Author * Remove a comment which is made stale by this change Signed-off-by: Mandeep Singh Baines Acked-by: Li Zefan Signed-off-by: Tejun Heo Cc: containers@lists.linux-foundation.org Cc: cgroups@vger.kernel.org Cc: KAMEZAWA Hiroyuki Cc: Frederic Weisbecker Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage --- kernel/cgroup.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 82288088f6a5..a85a7002ca33 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2116,7 +2116,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) retval = -EAGAIN; goto out_free_group_list; } - /* take a reference on each task in the group to go in the array. */ + tsk = leader; i = nr_migrating_tasks = 0; do { @@ -2128,7 +2128,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) /* as per above, nr_threads may decrease, but not increase. */ BUG_ON(i >= group_size); - get_task_struct(tsk); /* * saying GFP_ATOMIC has no effect here because we did prealloc * earlier, but it's good form to communicate our expectations. @@ -2150,7 +2149,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) /* methods shouldn't be called if no task is actually migrating */ retval = 0; if (!nr_migrating_tasks) - goto out_put_tasks; + goto out_free_group_list; /* * step 1: check that we can legitimately attach to the cgroup. @@ -2234,12 +2233,6 @@ out_cancel_attach: ss->cancel_attach(ss, cgrp, &tset); } } -out_put_tasks: - /* clean up the array of referenced threads in the group. */ - for (i = 0; i < group_size; i++) { - tc = flex_array_get(group, i); - put_task_struct(tc->task); - } out_free_group_list: flex_array_free(group); return retval; -- cgit v1.2.3 From 892a2b90ba15cb7dbee40979f23fdb492913abf8 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Wed, 21 Dec 2011 20:18:37 -0800 Subject: cgroup: only need to check oldcgrp==newgrp once In cgroup_attach_proc it is now sufficient to only check that oldcgrp==newcgrp once. Now that we are using threadgroup_lock() during the migrations, oldcgrp will not change. Signed-off-by: Mandeep Singh Baines Acked-by: Li Zefan Signed-off-by: Tejun Heo Cc: containers@lists.linux-foundation.org Cc: cgroups@vger.kernel.org Cc: KAMEZAWA Hiroyuki Cc: Frederic Weisbecker Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage --- kernel/cgroup.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a85a7002ca33..1042b3c41314 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2067,7 +2067,7 @@ static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, */ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) { - int retval, i, group_size, nr_migrating_tasks; + int retval, i, group_size; struct cgroup_subsys *ss, *failed_ss = NULL; /* guaranteed to be initialized later, but the compiler needs this */ struct css_set *oldcg; @@ -2118,7 +2118,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) } tsk = leader; - i = nr_migrating_tasks = 0; + i = 0; do { struct task_and_cgroup ent; @@ -2134,11 +2134,12 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) */ ent.task = tsk; ent.cgrp = task_cgroup_from_root(tsk, root); + /* nothing to do if this task is already in the cgroup */ + if (ent.cgrp == cgrp) + continue; retval = flex_array_put(group, i, &ent, GFP_ATOMIC); BUG_ON(retval != 0); i++; - if (ent.cgrp != cgrp) - nr_migrating_tasks++; } while_each_thread(leader, tsk); /* remember the number of threads in the array for later. */ group_size = i; @@ -2148,7 +2149,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) /* methods shouldn't be called if no task is actually migrating */ retval = 0; - if (!nr_migrating_tasks) + if (!group_size) goto out_free_group_list; /* @@ -2171,14 +2172,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) INIT_LIST_HEAD(&newcg_list); for (i = 0; i < group_size; i++) { tc = flex_array_get(group, i); - /* nothing to do if this task is already in the cgroup */ - if (tc->cgrp == cgrp) - continue; - /* - * get old css_set pointer. threadgroup is locked so this is - * safe against concurrent cgroup_exit() changing this to - * init_css_set. - */ oldcg = tc->task->cgroups; /* if we don't already have it in the list get a new one */ @@ -2194,9 +2187,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) */ for (i = 0; i < group_size; i++) { tc = flex_array_get(group, i); - /* leave current thread as it is if it's already there */ - if (tc->cgrp == cgrp) - continue; retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true); BUG_ON(retval); } -- cgit v1.2.3 From 1c6c3fad81787e8cb4c85ddfd573b0d8442fe630 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 27 Dec 2011 07:46:25 +0200 Subject: cgroup: mark cgroup_rmdir_waitq and cgroup_attach_proc() as static Signed-off-by: Kirill A. Shutemov Acked-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1042b3c41314..421557fcbfe4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -938,7 +938,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry) * * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; */ -DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); +static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) { @@ -2065,7 +2065,7 @@ static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, * Call holding cgroup_mutex and the group_rwsem of the leader. Will take * task_lock of each thread in leader's threadgroup individually in turn. */ -int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) +static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) { int retval, i, group_size; struct cgroup_subsys *ss, *failed_ss = NULL; -- cgit v1.2.3 From c6ca57500c23d57a4ccec9874b6a3c99c297e1b5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 27 Dec 2011 07:46:26 +0200 Subject: cgroup: add sparse annotation to cgroup_iter_start() and cgroup_iter_end() Signed-off-by: Kirill A. Shutemov Acked-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cgroup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 421557fcbfe4..c6bd67b3fcf6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2825,6 +2825,7 @@ static void cgroup_enable_task_cg_lists(void) } void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) + __acquires(css_set_lock) { /* * The first time anyone tries to iterate across a cgroup, @@ -2864,6 +2865,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, } void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) + __releases(css_set_lock) { read_unlock(&css_set_lock); } -- cgit v1.2.3 From 7e3aa30ac8c904a706518b725c451bb486daaae9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 23 Dec 2011 04:25:23 +0100 Subject: cgroup: Remove task_lock() from cgroup_post_fork() cgroup_post_fork() is protected between threadgroup_change_begin() and threadgroup_change_end() against concurrent changes of the child's css_set in cgroup_task_migrate(). Also the child can't exit and call cgroup_exit() at this stage, this means it's css_set can't be changed with init_css_set concurrently. For these reasons, we don't need to hold task_lock() on the child because it's css_set can only remain stable in this place. Let's remove the lock there. v2: Update comment to explain that we are safe against cgroup_exit() Signed-off-by: Frederic Weisbecker Acked-by: Li Zefan Signed-off-by: Tejun Heo Cc: Containers Cc: Cgroups Cc: KAMEZAWA Hiroyuki Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage Cc: Mandeep Singh Baines --- kernel/cgroup.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c6bd67b3fcf6..548d8d4e86d0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4595,10 +4595,19 @@ void cgroup_post_fork(struct task_struct *child) { if (use_task_css_set_links) { write_lock(&css_set_lock); - task_lock(child); - if (list_empty(&child->cg_list)) + if (list_empty(&child->cg_list)) { + /* + * It's safe to use child->cgroups without task_lock() + * here because we are protected through + * threadgroup_change_begin() against concurrent + * css_set change in cgroup_task_migrate(). Also + * the task can't exit at that point until + * wake_up_new_task() is called, so we are protected + * against cgroup_exit() setting child->cgroup to + * init_css_set. + */ list_add(&child->cg_list, &child->cgroups->tasks); - task_unlock(child); + } write_unlock(&css_set_lock); } } -- cgit v1.2.3 From 305f3c8b20ba1ca94829329acdbf22e689304dba Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jan 2012 10:24:29 +0300 Subject: cgroup: move assignement out of condition in cgroup_attach_proc() Gcc complains about this: "kernel/cgroup.c:2179:4: warning: suggest parentheses around assignment used as truth value [-Wparentheses]" Signed-off-by: Dan Carpenter Signed-off-by: Tejun Heo --- kernel/cgroup.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 548d8d4e86d0..bab5c17e7781 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2175,9 +2175,12 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) oldcg = tc->task->cgroups; /* if we don't already have it in the list get a new one */ - if (!css_set_check_fetched(cgrp, tc->task, oldcg, &newcg_list)) - if (retval = css_set_prefetch(cgrp, oldcg, &newcg_list)) + if (!css_set_check_fetched(cgrp, tc->task, oldcg, + &newcg_list)) { + retval = css_set_prefetch(cgrp, oldcg, &newcg_list); + if (retval) goto out_list_teardown; + } } /* -- cgit v1.2.3 From 0d19ea866562e46989412a0676412fa0983c9ce7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 27 Dec 2011 14:25:55 +0800 Subject: cgroup: fix to allow mounting a hierarchy by name If we mount a hierarchy with a specified name, the name is unique, and we can use it to mount the hierarchy without specifying its set of subsystem names. This feature is documented is Documentation/cgroups/cgroups.txt section 2.3 Here's an example: # mount -t cgroup -o cpuset,name=myhier xxx /cgroup1 # mount -t cgroup -o name=myhier xxx /cgroup2 But it was broken by commit 32a8cf235e2f192eb002755076994525cdbaa35a (cgroup: make the mount options parsing more accurate) This fixes the regression. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- kernel/cgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index bab5c17e7781..39c7caef085a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1193,10 +1193,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) /* * If the 'all' option was specified select all the subsystems, - * otherwise 'all, 'none' and a subsystem name options were not - * specified, let's default to 'all' + * otherwise if 'none', 'name=' and a subsystem name options + * were not specified, let's default to 'all' */ - if (all_ss || (!all_ss && !one_ss && !opts->none)) { + if (all_ss || (!one_ss && !opts->none && !opts->name)) { for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; if (ss == NULL) -- cgit v1.2.3