diff options
-rw-r--r-- | include/linux/cgroup-defs.h | 3 | ||||
-rw-r--r-- | include/linux/cgroup.h | 4 | ||||
-rw-r--r-- | kernel/cgroup.c | 87 |
3 files changed, 80 insertions, 14 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 17444505c870..62413c3e2f4b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -211,6 +211,9 @@ struct css_set { */ struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; + /* all css_task_iters currently walking this cset */ + struct list_head task_iters; + /* For RCU-protected deletion */ struct rcu_head rcu_head; }; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index bdfdb3a1a83c..a9dcf0e76865 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -42,6 +42,10 @@ struct css_task_iter { struct list_head *task_pos; struct list_head *tasks_head; struct list_head *mg_tasks_head; + + struct css_set *cur_cset; + struct task_struct *cur_task; + struct list_head iters_node; /* css_set->task_iters */ }; extern struct cgroup_root cgrp_dfl_root; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 56e2b772b143..0c5b0e605c47 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -216,6 +216,7 @@ static struct cftype cgroup_legacy_base_files[]; static int rebind_subsystems(struct cgroup_root *dst_root, unsigned long ss_mask); +static void css_task_iter_advance(struct css_task_iter *it); static int cgroup_destroy_locked(struct cgroup *cgrp); static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, bool visible); @@ -593,6 +594,7 @@ struct css_set init_css_set = { .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), + .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), }; static int css_set_count = 1; /* 1 for init_css_set */ @@ -675,8 +677,9 @@ static void css_set_update_populated(struct css_set *cset, bool populated) * css_set, @from_cset can be NULL. If @task is being disassociated * instead of moved, @to_cset can be NULL. * - * This function automatically handles populated_cnt updates but the caller - * is responsible for managing @from_cset and @to_cset's reference counts. + * This function automatically handles populated_cnt updates and + * css_task_iter adjustments but the caller is responsible for managing + * @from_cset and @to_cset's reference counts. */ static void css_set_move_task(struct task_struct *task, struct css_set *from_cset, struct css_set *to_cset, @@ -685,7 +688,22 @@ static void css_set_move_task(struct task_struct *task, lockdep_assert_held(&css_set_rwsem); if (from_cset) { + struct css_task_iter *it, *pos; + WARN_ON_ONCE(list_empty(&task->cg_list)); + + /* + * @task is leaving, advance task iterators which are + * pointing to it so that they can resume at the next + * position. Advancing an iterator might remove it from + * the list, use safe walk. See css_task_iter_advance*() + * for details. + */ + list_for_each_entry_safe(it, pos, &from_cset->task_iters, + iters_node) + if (it->task_pos == &task->cg_list) + css_task_iter_advance(it); + list_del_init(&task->cg_list); if (!css_set_populated(from_cset)) css_set_update_populated(from_cset, false); @@ -1019,6 +1037,7 @@ static struct css_set *find_css_set(struct css_set *old_cset, INIT_LIST_HEAD(&cset->mg_tasks); INIT_LIST_HEAD(&cset->mg_preload_node); INIT_LIST_HEAD(&cset->mg_node); + INIT_LIST_HEAD(&cset->task_iters); INIT_HLIST_NODE(&cset->hlist); /* Copy the set of subsystem state objects generated in @@ -3804,6 +3823,8 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) struct cgrp_cset_link *link; struct css_set *cset; + lockdep_assert_held(&css_set_rwsem); + /* Advance to the next non-empty css_set */ do { l = l->next; @@ -3831,12 +3852,36 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) it->tasks_head = &cset->tasks; it->mg_tasks_head = &cset->mg_tasks; + + /* + * We don't keep css_sets locked across iteration steps and thus + * need to take steps to ensure that iteration can be resumed after + * the lock is re-acquired. Iteration is performed at two levels - + * css_sets and tasks in them. + * + * Once created, a css_set never leaves its cgroup lists, so a + * pinned css_set is guaranteed to stay put and we can resume + * iteration afterwards. + * + * Tasks may leave @cset across iteration steps. This is resolved + * by registering each iterator with the css_set currently being + * walked and making css_set_move_task() advance iterators whose + * next task is leaving. + */ + if (it->cur_cset) { + list_del(&it->iters_node); + put_css_set_locked(it->cur_cset); + } + get_css_set(cset); + it->cur_cset = cset; + list_add(&it->iters_node, &cset->task_iters); } static void css_task_iter_advance(struct css_task_iter *it) { struct list_head *l = it->task_pos; + lockdep_assert_held(&css_set_rwsem); WARN_ON_ONCE(!l); /* @@ -3864,19 +3909,16 @@ static void css_task_iter_advance(struct css_task_iter *it) * css_task_iter_next() to walk through the tasks until the function * returns NULL. On completion of iteration, css_task_iter_end() must be * called. - * - * Note that this function acquires a lock which is released when the - * iteration finishes. The caller can't sleep while iteration is in - * progress. */ void css_task_iter_start(struct cgroup_subsys_state *css, struct css_task_iter *it) - __acquires(css_set_rwsem) { /* no one should try to iterate before mounting cgroups */ WARN_ON_ONCE(!use_task_css_set_links); - down_read(&css_set_rwsem); + memset(it, 0, sizeof(*it)); + + down_write(&css_set_rwsem); it->ss = css->ss; @@ -3888,6 +3930,8 @@ void css_task_iter_start(struct cgroup_subsys_state *css, it->cset_head = it->cset_pos; css_task_iter_advance_css_set(it); + + up_write(&css_set_rwsem); } /** @@ -3900,14 +3944,22 @@ void css_task_iter_start(struct cgroup_subsys_state *css, */ struct task_struct *css_task_iter_next(struct css_task_iter *it) { - struct task_struct *res; - if (!it->cset_pos) return NULL; - res = list_entry(it->task_pos, struct task_struct, cg_list); + if (it->cur_task) + put_task_struct(it->cur_task); + + down_write(&css_set_rwsem); + + it->cur_task = list_entry(it->task_pos, struct task_struct, cg_list); + get_task_struct(it->cur_task); + css_task_iter_advance(it); - return res; + + up_write(&css_set_rwsem); + + return it->cur_task; } /** @@ -3917,9 +3969,16 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it) * Finish task iteration started by css_task_iter_start(). */ void css_task_iter_end(struct css_task_iter *it) - __releases(css_set_rwsem) { - up_read(&css_set_rwsem); + if (it->cur_cset) { + down_write(&css_set_rwsem); + list_del(&it->iters_node); + put_css_set_locked(it->cur_cset); + up_write(&css_set_rwsem); + } + + if (it->cur_task) + put_task_struct(it->cur_task); } /** |