From c29adf24e0c443fb4433efb6a62bd91fdb739c34 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Jul 2014 18:02:56 -0400 Subject: cgroup: reorganize cgroup_subtree_control_write() Make the following two reorganizations to cgroup_subtree_control_write(). These are to prepare for future changes and shouldn't cause any functional difference. * Move availability above css offlining wait. * Move cgrp->child_subsys_mask update above new css creation. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Johannes Weiner --- kernel/cgroup.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7868fc3c0bc5..a46d7e2012b4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2613,6 +2613,14 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, continue; } + /* unavailable or not enabled on the parent? */ + if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) || + (cgroup_parent(cgrp) && + !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) { + ret = -ENOENT; + goto out_unlock; + } + /* * Because css offlining is asynchronous, userland * might try to re-enable the same controller while @@ -2635,14 +2643,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, return restart_syscall(); } - - /* unavailable or not enabled on the parent? */ - if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) || - (cgroup_parent(cgrp) && - !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) { - ret = -ENOENT; - goto out_unlock; - } } else if (disable & (1 << ssid)) { if (!(cgrp->child_subsys_mask & (1 << ssid))) { disable &= ~(1 << ssid); @@ -2673,12 +2673,10 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, goto out_unlock; } - /* - * Create csses for enables and update child_subsys_mask. This - * changes cgroup_e_css() results which in turn makes the - * subsequent cgroup_update_dfl_csses() associate all tasks in the - * subtree to the updated csses. - */ + cgrp->child_subsys_mask |= enable; + cgrp->child_subsys_mask &= ~disable; + + /* create new csses */ for_each_subsys(ss, ssid) { if (!(enable & (1 << ssid))) continue; @@ -2690,9 +2688,11 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, } } - cgrp->child_subsys_mask |= enable; - cgrp->child_subsys_mask &= ~disable; - + /* + * At this point, cgroup_e_css() results reflect the new csses + * making the following cgroup_update_dfl_csses() properly update + * css associations of all tasks in the subtree. + */ ret = cgroup_update_dfl_csses(cgrp); if (ret) goto err_undo_css; -- cgit v1.2.3 From 667c24917144e34880f821486bf0a6e4d05a3a14 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Jul 2014 18:02:56 -0400 Subject: cgroup: introduce cgroup->subtree_control cgroup is implementing support for subsystem dependency which would require a way to enable a subsystem even when it's not directly configured through "cgroup.subtree_control". Previously, cgroup->child_subsys_mask directly reflected "cgroup.subtree_control" and the enabled subsystems in the child cgroups. This patch adds cgroup->subtree_control which "cgroup.subtree_control" operates on. cgroup->child_subsys_mask is now calculated from cgroup->subtree_control by cgroup_refresh_child_subsys_mask(), which sets it identical to cgroup->subtree_control for now. This will allow using cgroup->child_subsys_mask for all the enabled subsystems including the implicit ones and ->subtree_control for tracking the explicitly requested ones. This patch keeps the two masks identical and doesn't introduce any behavior changes. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Johannes Weiner --- kernel/cgroup.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a46d7e2012b4..14a9d88eacf9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1036,6 +1036,11 @@ static void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } +static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) +{ + cgrp->child_subsys_mask = cgrp->subtree_control; +} + /** * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods * @kn: the kernfs_node being serviced @@ -1208,12 +1213,15 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask) up_write(&css_set_rwsem); src_root->subsys_mask &= ~(1 << ssid); - src_root->cgrp.child_subsys_mask &= ~(1 << ssid); + src_root->cgrp.subtree_control &= ~(1 << ssid); + cgroup_refresh_child_subsys_mask(&src_root->cgrp); /* default hierarchy doesn't enable controllers by default */ dst_root->subsys_mask |= 1 << ssid; - if (dst_root != &cgrp_dfl_root) - dst_root->cgrp.child_subsys_mask |= 1 << ssid; + if (dst_root != &cgrp_dfl_root) { + dst_root->cgrp.subtree_control |= 1 << ssid; + cgroup_refresh_child_subsys_mask(&dst_root->cgrp); + } if (ss->bind) ss->bind(css); @@ -2454,7 +2462,7 @@ static int cgroup_controllers_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->child_subsys_mask); + cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->subtree_control); return 0; } @@ -2463,7 +2471,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - cgroup_print_ss_mask(seq, cgrp->child_subsys_mask); + cgroup_print_ss_mask(seq, cgrp->subtree_control); return 0; } @@ -2608,7 +2616,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, for_each_subsys(ss, ssid) { if (enable & (1 << ssid)) { - if (cgrp->child_subsys_mask & (1 << ssid)) { + if (cgrp->subtree_control & (1 << ssid)) { enable &= ~(1 << ssid); continue; } @@ -2616,7 +2624,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, /* unavailable or not enabled on the parent? */ if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) || (cgroup_parent(cgrp) && - !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) { + !(cgroup_parent(cgrp)->subtree_control & (1 << ssid)))) { ret = -ENOENT; goto out_unlock; } @@ -2644,14 +2652,14 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, return restart_syscall(); } } else if (disable & (1 << ssid)) { - if (!(cgrp->child_subsys_mask & (1 << ssid))) { + if (!(cgrp->subtree_control & (1 << ssid))) { disable &= ~(1 << ssid); continue; } /* a child has it enabled? */ cgroup_for_each_live_child(child, cgrp) { - if (child->child_subsys_mask & (1 << ssid)) { + if (child->subtree_control & (1 << ssid)) { ret = -EBUSY; goto out_unlock; } @@ -2665,7 +2673,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, } /* - * Except for the root, child_subsys_mask must be zero for a cgroup + * Except for the root, subtree_control must be zero for a cgroup * with tasks so that child cgroups don't compete against tasks. */ if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) { @@ -2673,8 +2681,9 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, goto out_unlock; } - cgrp->child_subsys_mask |= enable; - cgrp->child_subsys_mask &= ~disable; + cgrp->subtree_control |= enable; + cgrp->subtree_control &= ~disable; + cgroup_refresh_child_subsys_mask(cgrp); /* create new csses */ for_each_subsys(ss, ssid) { @@ -2713,8 +2722,9 @@ out_unlock: return ret ?: nbytes; err_undo_css: - cgrp->child_subsys_mask &= ~enable; - cgrp->child_subsys_mask |= disable; + cgrp->subtree_control &= ~enable; + cgrp->subtree_control |= disable; + cgroup_refresh_child_subsys_mask(cgrp); for_each_subsys(ss, ssid) { if (!(enable & (1 << ssid))) @@ -4428,10 +4438,12 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, /* * On the default hierarchy, a child doesn't automatically inherit - * child_subsys_mask from the parent. Each is configured manually. + * subtree_control from the parent. Each is configured manually. */ - if (!cgroup_on_dfl(cgrp)) - cgrp->child_subsys_mask = parent->child_subsys_mask; + if (!cgroup_on_dfl(cgrp)) { + cgrp->subtree_control = parent->subtree_control; + cgroup_refresh_child_subsys_mask(cgrp); + } kernfs_activate(kn); -- cgit v1.2.3 From f63070d350e3562baa6196f1043e01cd8da2509a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Jul 2014 18:02:57 -0400 Subject: cgroup: make interface files visible iff enabled on cgroup->subtree_control cgroup is implementing support for subsystem dependency which would require a way to enable a subsystem even when it's not directly configured through "cgroup.subtree_control". The preceding patch distinguished cgroup->subtree_control and ->child_subsys_mask where the former is the subsystems explicitly configured by the userland and the latter is all enabled subsystems currently is equal to the former but will include subsystems implicitly enabled through dependency. Subsystems which are enabled due to dependency shouldn't be visible to userland. This patch updates cgroup_subtree_control_write() and create_css() such that interface files are not created for implicitly enabled subsytems. * @visible paramter is added to create_css(). Interface files are created only when true. * If an already implicitly enabled subsystem is turned on through "cgroup.subtree_control", the existing css should be used. css draining is skipped. * cgroup_subtree_control_write() computes the new target cgroup->child_subsys_mask and create/kill or show/hide csses accordingly. As the two subsystem masks are still kept identical, this patch doesn't introduce any behavior changes. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Johannes Weiner --- kernel/cgroup.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 14a9d88eacf9..331fa296c7e0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -186,7 +186,8 @@ static void cgroup_put(struct cgroup *cgrp); static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask); static int cgroup_destroy_locked(struct cgroup *cgrp); -static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss); +static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, + bool visible); static void css_release(struct percpu_ref *ref); static void kill_css(struct cgroup_subsys_state *css); static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], @@ -2577,6 +2578,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, loff_t off) { unsigned int enable = 0, disable = 0; + unsigned int css_enable, css_disable, old_ctrl, new_ctrl; struct cgroup *cgrp, *child; struct cgroup_subsys *ss; char *tok; @@ -2629,6 +2631,13 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, goto out_unlock; } + /* + * @ss is already enabled through dependency and + * we'll just make it visible. Skip draining. + */ + if (cgrp->child_subsys_mask & (1 << ssid)) + continue; + /* * Because css offlining is asynchronous, userland * might try to re-enable the same controller while @@ -2681,17 +2690,39 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, goto out_unlock; } + /* + * Update subsys masks and calculate what needs to be done. More + * subsystems than specified may need to be enabled or disabled + * depending on subsystem dependencies. + */ cgrp->subtree_control |= enable; cgrp->subtree_control &= ~disable; + + old_ctrl = cgrp->child_subsys_mask; cgroup_refresh_child_subsys_mask(cgrp); + new_ctrl = cgrp->child_subsys_mask; + + css_enable = ~old_ctrl & new_ctrl; + css_disable = old_ctrl & ~new_ctrl; + enable |= css_enable; + disable |= css_disable; - /* create new csses */ + /* + * Create new csses or make the existing ones visible. A css is + * created invisible if it's being implicitly enabled through + * dependency. An invisible css is made visible when the userland + * explicitly enables it. + */ for_each_subsys(ss, ssid) { if (!(enable & (1 << ssid))) continue; cgroup_for_each_live_child(child, cgrp) { - ret = create_css(child, ss); + if (css_enable & (1 << ssid)) + ret = create_css(child, ss, + cgrp->subtree_control & (1 << ssid)); + else + ret = cgroup_populate_dir(child, 1 << ssid); if (ret) goto err_undo_css; } @@ -2706,13 +2737,21 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, if (ret) goto err_undo_css; - /* all tasks are now migrated away from the old csses, kill them */ + /* + * All tasks are migrated out of disabled csses. Kill or hide + * them. A css is hidden when the userland requests it to be + * disabled while other subsystems are still depending on it. + */ for_each_subsys(ss, ssid) { if (!(disable & (1 << ssid))) continue; - cgroup_for_each_live_child(child, cgrp) - kill_css(cgroup_css(child, ss)); + cgroup_for_each_live_child(child, cgrp) { + if (css_disable & (1 << ssid)) + kill_css(cgroup_css(child, ss)); + else + cgroup_clear_dir(child, 1 << ssid); + } } kernfs_activate(cgrp->kn); @@ -2732,8 +2771,14 @@ err_undo_css: cgroup_for_each_live_child(child, cgrp) { struct cgroup_subsys_state *css = cgroup_css(child, ss); - if (css) + + if (!css) + continue; + + if (css_enable & (1 << ssid)) kill_css(css); + else + cgroup_clear_dir(child, 1 << ssid); } } goto out_unlock; @@ -4282,12 +4327,14 @@ static void offline_css(struct cgroup_subsys_state *css) * create_css - create a cgroup_subsys_state * @cgrp: the cgroup new css will be associated with * @ss: the subsys of new css + * @visible: whether to create control knobs for the new css or not * * Create a new css associated with @cgrp - @ss pair. On success, the new - * css is online and installed in @cgrp with all interface files created. - * Returns 0 on success, -errno on failure. + * css is online and installed in @cgrp with all interface files created if + * @visible. Returns 0 on success, -errno on failure. */ -static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) +static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, + bool visible) { struct cgroup *parent = cgroup_parent(cgrp); struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss); @@ -4311,9 +4358,11 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) goto err_free_percpu_ref; css->id = err; - err = cgroup_populate_dir(cgrp, 1 << ss->id); - if (err) - goto err_free_id; + if (visible) { + err = cgroup_populate_dir(cgrp, 1 << ss->id); + if (err) + goto err_free_id; + } /* @css is ready to be brought online now, make it visible */ list_add_tail_rcu(&css->sibling, &parent_css->children); @@ -4430,7 +4479,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, /* let's create and online css's */ for_each_subsys(ss, ssid) { if (parent->child_subsys_mask & (1 << ssid)) { - ret = create_css(cgrp, ss); + ret = create_css(cgrp, ss, + parent->subtree_control & (1 << ssid)); if (ret) goto out_destroy; } -- cgit v1.2.3 From b4536f0cab2b18414e26101a2b9d484c5cbea0c0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Jul 2014 18:02:57 -0400 Subject: cgroup: implement cgroup_subsys->css_reset() cgroup is implementing support for subsystem dependency which would require a way to enable a subsystem even when it's not directly configured through "cgroup.subtree_control". The previous patches added support for explicitly and implicitly enabled subsystems and showing/hiding their interface files. An explicitly enabled subsystem may become implicitly enabled if it's turned off through "cgroup.subtree_control" but there are subsystems depending on it. In such cases, the subsystem, as it's turned off when seen from userland, shouldn't enforce any resource control. Also, the subsystem may be explicitly turned on later again and its interface files should be as close to the intial state as possible. This patch adds cgroup_subsys->css_reset() which is invoked when a css is hidden. The callback should disable resource control and reset the state to the vanilla state. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Johannes Weiner --- kernel/cgroup.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 331fa296c7e0..3a6b77d7ba4a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2740,17 +2740,25 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, /* * All tasks are migrated out of disabled csses. Kill or hide * them. A css is hidden when the userland requests it to be - * disabled while other subsystems are still depending on it. + * disabled while other subsystems are still depending on it. The + * css must not actively control resources and be in the vanilla + * state if it's made visible again later. Controllers which may + * be depended upon should provide ->css_reset() for this purpose. */ for_each_subsys(ss, ssid) { if (!(disable & (1 << ssid))) continue; cgroup_for_each_live_child(child, cgrp) { - if (css_disable & (1 << ssid)) - kill_css(cgroup_css(child, ss)); - else + struct cgroup_subsys_state *css = cgroup_css(child, ss); + + if (css_disable & (1 << ssid)) { + kill_css(css); + } else { cgroup_clear_dir(child, 1 << ssid); + if (ss->css_reset) + ss->css_reset(css); + } } } -- cgit v1.2.3 From af0ba6789c8e43518635606d0af1ff475ba7471a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Jul 2014 18:02:57 -0400 Subject: cgroup: implement cgroup_subsys->depends_on Currently, the blkio subsystem attributes all of writeback IOs to the root. One of the issues is that there's no way to tell who originated a writeback IO from block layer. Those IOs are usually issued asynchronously from a task which didn't have anything to do with actually generating the dirty pages. The memory subsystem, when enabled, already keeps track of the ownership of each dirty page and it's desirable for blkio to piggyback instead of adding its own per-page tag. blkio piggybacking on memory is an implementation detail which preferably should be handled automatically without requiring explicit userland action. To achieve that, this patch implements cgroup_subsys->depends_on which contains the mask of subsystems which should be enabled together when the subsystem is enabled. The previous patches already implemented the support for enabled but invisible subsystems and cgroup_subsys->depends_on can be easily implemented by updating cgroup_refresh_child_subsys_mask() so that it calculates cgroup->child_subsys_mask considering cgroup_subsys->depends_on of the explicitly enabled subsystems. Documentation/cgroups/unified-hierarchy.txt is updated to explain that subsystems may not become immediately available after being unused from userland and that dependency could be a factor in it. As subsystems may already keep residual references, this doesn't significantly change how subsystem rebinding can be used. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Johannes Weiner --- kernel/cgroup.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3a6b77d7ba4a..cd02e99d5d3b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1037,9 +1037,56 @@ static void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } +/** + * cgroup_refresh_child_subsys_mask - update child_subsys_mask + * @cgrp: the target cgroup + * + * On the default hierarchy, a subsystem may request other subsystems to be + * enabled together through its ->depends_on mask. In such cases, more + * subsystems than specified in "cgroup.subtree_control" may be enabled. + * + * This function determines which subsystems need to be enabled given the + * current @cgrp->subtree_control and records it in + * @cgrp->child_subsys_mask. The resulting mask is always a superset of + * @cgrp->subtree_control and follows the usual hierarchy rules. + */ static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) { - cgrp->child_subsys_mask = cgrp->subtree_control; + struct cgroup *parent = cgroup_parent(cgrp); + unsigned int cur_ss_mask = cgrp->subtree_control; + struct cgroup_subsys *ss; + int ssid; + + lockdep_assert_held(&cgroup_mutex); + + if (!cgroup_on_dfl(cgrp)) { + cgrp->child_subsys_mask = cur_ss_mask; + return; + } + + while (true) { + unsigned int new_ss_mask = cur_ss_mask; + + for_each_subsys(ss, ssid) + if (cur_ss_mask & (1 << ssid)) + new_ss_mask |= ss->depends_on; + + /* + * Mask out subsystems which aren't available. This can + * happen only if some depended-upon subsystems were bound + * to non-default hierarchies. + */ + if (parent) + new_ss_mask &= parent->child_subsys_mask; + else + new_ss_mask &= cgrp->root->subsys_mask; + + if (new_ss_mask == cur_ss_mask) + break; + cur_ss_mask = new_ss_mask; + } + + cgrp->child_subsys_mask = cur_ss_mask; } /** -- cgit v1.2.3 From 7450e90bbb8d834c190cc8100d1cc41888358c7c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jul 2014 10:08:07 -0400 Subject: cgroup: remove CGRP_ROOT_OPTION_MASK cgroup_root->flags only contains CGRP_ROOT_* flags and there's no reason to mask the flags. Remove CGRP_ROOT_OPTION_MASK. This doesn't cause any behavior differences. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- kernel/cgroup.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index cd02e99d5d3b..5411fffa4b70 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1490,11 +1490,10 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) removed_mask = root->subsys_mask & ~opts.subsys_mask; /* Don't allow flags or name to change at remount */ - if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) || + if ((opts.flags ^ root->flags) || (opts.name && strcmp(opts.name, root->name))) { pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", - opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "", - root->flags & CGRP_ROOT_OPTION_MASK, root->name); + opts.flags, opts.name ?: "", root->flags, root->name); ret = -EINVAL; goto out_unlock; } @@ -1762,7 +1761,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, goto out_unlock; } - if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) { + if (root->flags ^ opts.flags) { if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) { pr_err("sane_behavior: new mount options should match the existing superblock\n"); ret = -EINVAL; -- cgit v1.2.3 From c1d5d42efdb3e0470c1cfd2fcb50bc3eae813283 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jul 2014 10:08:08 -0400 Subject: cgroup: make interface file "cgroup.sane_behavior" legacy-only "cgroup.sane_behavior" is added to help distinguishing whether sane_behavior is in effect or not. We now have the default hierarchy where the flag is always in effect and are planning to remove supporting sane behavior on the legacy hierarchies making this file on the default hierarchy rather pointless. Let's make it legacy only and thus always zero. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- kernel/cgroup.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5411fffa4b70..0ea54af6b133 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2470,9 +2470,7 @@ static int cgroup_release_agent_show(struct seq_file *seq, void *v) static int cgroup_sane_behavior_show(struct seq_file *seq, void *v) { - struct cgroup *cgrp = seq_css(seq)->cgroup; - - seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp)); + seq_puts(seq, "0\n"); return 0; } @@ -4126,7 +4124,7 @@ static struct cftype cgroup_base_files[] = { }, { .name = "cgroup.sane_behavior", - .flags = CFTYPE_ONLY_ON_ROOT, + .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, .seq_show = cgroup_sane_behavior_show, }, { -- cgit v1.2.3 From aa6ec29bee8692ce232132f1a1ea2a1f9196610e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jul 2014 10:08:08 -0400 Subject: cgroup: remove sane_behavior support on non-default hierarchies sane_behavior has been used as a development vehicle for the default unified hierarchy. Now that the default hierarchy is in place, the flag became redundant and confusing as its usage is allowed on all hierarchies. There are gonna be either the default hierarchy or legacy ones. Let's make that clear by removing sane_behavior support on non-default hierarchies. This patch replaces cgroup_sane_behavior() with cgroup_on_dfl(). The comment on top of CGRP_ROOT_SANE_BEHAVIOR is moved to on top of cgroup_on_dfl() with sane_behavior specific part dropped. On the default and legacy hierarchies w/o sane_behavior, this shouldn't cause any behavior differences. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko --- kernel/cgroup.c | 19 ++++++++++--------- kernel/cpuset.c | 33 +++++++++++++++------------------ 2 files changed, 25 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0ea54af6b133..fb07c6d43aff 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1470,8 +1470,8 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) struct cgroup_sb_opts opts; unsigned int added_mask, removed_mask; - if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) { - pr_err("sane_behavior: remount is not allowed\n"); + if (root == &cgrp_dfl_root) { + pr_err("remount is not allowed\n"); return -EINVAL; } @@ -2943,9 +2943,9 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, /* * This isn't a proper migration and its usefulness is very - * limited. Disallow if sane_behavior. + * limited. Disallow on the default hierarchy. */ - if (cgroup_sane_behavior(cgrp)) + if (cgroup_on_dfl(cgrp)) return -EPERM; /* @@ -3031,7 +3031,7 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], /* does cft->flags tell us to skip this file on @cgrp? */ if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) continue; - if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp)) + if ((cft->flags & CFTYPE_INSANE) && cgroup_on_dfl(cgrp)) continue; if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp)) continue; @@ -3764,8 +3764,9 @@ after: * * All this extra complexity was caused by the original implementation * committing to an entirely unnecessary property. In the long term, we - * want to do away with it. Explicitly scramble sort order if - * sane_behavior so that no such expectation exists in the new interface. + * want to do away with it. Explicitly scramble sort order if on the + * default hierarchy so that no such expectation exists in the new + * interface. * * Scrambling is done by swapping every two consecutive bits, which is * non-identity one-to-one mapping which disturbs sort order sufficiently. @@ -3780,7 +3781,7 @@ static pid_t pid_fry(pid_t pid) static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid) { - if (cgroup_sane_behavior(cgrp)) + if (cgroup_on_dfl(cgrp)) return pid_fry(pid); else return pid; @@ -3883,7 +3884,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, css_task_iter_end(&it); length = n; /* now sort & (if procs) strip out duplicates */ - if (cgroup_sane_behavior(cgrp)) + if (cgroup_on_dfl(cgrp)) sort(array, length, sizeof(pid_t), fried_cmppid, NULL); else sort(array, length, sizeof(pid_t), cmppid, NULL); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f6b33c696224..f9d4807c869f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1383,12 +1383,9 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css, mutex_lock(&cpuset_mutex); - /* - * We allow to move tasks into an empty cpuset if sane_behavior - * flag is set. - */ + /* allow moving tasks into an empty cpuset if on default hierarchy */ ret = -ENOSPC; - if (!cgroup_sane_behavior(css->cgroup) && + if (!cgroup_on_dfl(css->cgroup) && (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) goto out_unlock; @@ -2030,7 +2027,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) static cpumask_t off_cpus; static nodemask_t off_mems; bool is_empty; - bool sane = cgroup_sane_behavior(cs->css.cgroup); + bool on_dfl = cgroup_on_dfl(cs->css.cgroup); retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -2054,12 +2051,12 @@ retry: mutex_unlock(&callback_mutex); /* - * If sane_behavior flag is set, we need to update tasks' cpumask - * for empty cpuset to take on ancestor's cpumask. Otherwise, don't - * call update_tasks_cpumask() if the cpuset becomes empty, as - * the tasks in it will be migrated to an ancestor. + * If on_dfl, we need to update tasks' cpumask for empty cpuset to + * take on ancestor's cpumask. Otherwise, don't call + * update_tasks_cpumask() if the cpuset becomes empty, as the tasks + * in it will be migrated to an ancestor. */ - if ((sane && cpumask_empty(cs->cpus_allowed)) || + if ((on_dfl && cpumask_empty(cs->cpus_allowed)) || (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) update_tasks_cpumask(cs); @@ -2068,12 +2065,12 @@ retry: mutex_unlock(&callback_mutex); /* - * If sane_behavior flag is set, we need to update tasks' nodemask - * for empty cpuset to take on ancestor's nodemask. Otherwise, don't - * call update_tasks_nodemask() if the cpuset becomes empty, as - * the tasks in it will be migratd to an ancestor. + * If on_dfl, we need to update tasks' nodemask for empty cpuset to + * take on ancestor's nodemask. Otherwise, don't call + * update_tasks_nodemask() if the cpuset becomes empty, as the + * tasks in it will be migratd to an ancestor. */ - if ((sane && nodes_empty(cs->mems_allowed)) || + if ((on_dfl && nodes_empty(cs->mems_allowed)) || (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) update_tasks_nodemask(cs); @@ -2083,13 +2080,13 @@ retry: mutex_unlock(&cpuset_mutex); /* - * If sane_behavior flag is set, we'll keep tasks in empty cpusets. + * If on_dfl, we'll keep tasks in empty cpusets. * * Otherwise move tasks to the nearest ancestor with execution * resources. This is full cgroup operation which will * also call back into cpuset. Should be done outside any lock. */ - if (!sane && is_empty) + if (!on_dfl && is_empty) remove_tasks_in_empty_cpuset(cs); } -- cgit v1.2.3 From 7b9a6ba56e9519ed5413a002dc0b0f01aa598bb5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jul 2014 10:08:08 -0400 Subject: cgroup: clean up sane_behavior handling After the previous patch to remove sane_behavior support from non-default hierarchies, CGRP_ROOT_SANE_BEHAVIOR is used only to indicate the default hierarchy while parsing mount options. This patch makes the following cleanups around it. * Don't show it in the mount option. Eventually the default hierarchy will be assigned a different filesystem type. * As sane_behavior is no longer effective on non-default hierarchies and the default hierarchy doesn't accept any mount options, parse_cgroupfs_options() can consider sane_behavior mount option as indicating the default hierarchy and fail if any other options are specified with it. While at it, remove one of the double blank lines in the function. * cgroup_mount() can now simply test CGRP_ROOT_SANE_BEHAVIOR to tell whether to mount the default hierarchy or not. * As CGROUP_ROOT_SANE_BEHAVIOR's only role now is indicating whether to select the default hierarchy or not during mount, it doesn't need to be set in the default hierarchy itself. cgroup_init_early() updated accordingly. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- kernel/cgroup.c | 66 +++++++++++++++++++++++---------------------------------- 1 file changed, 27 insertions(+), 39 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index fb07c6d43aff..28f7d47c1d4a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1289,8 +1289,6 @@ static int cgroup_show_options(struct seq_file *seq, for_each_subsys(ss, ssid) if (root->subsys_mask & (1 << ssid)) seq_printf(seq, ",%s", ss->name); - if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) - seq_puts(seq, ",sane_behavior"); if (root->flags & CGRP_ROOT_NOPREFIX) seq_puts(seq, ",noprefix"); if (root->flags & CGRP_ROOT_XATTR) @@ -1324,6 +1322,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) bool all_ss = false, one_ss = false; unsigned int mask = -1U; struct cgroup_subsys *ss; + int nr_opts = 0; int i; #ifdef CONFIG_CPUSETS @@ -1333,6 +1332,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) memset(opts, 0, sizeof(*opts)); while ((token = strsep(&o, ",")) != NULL) { + nr_opts++; + if (!*token) return -EINVAL; if (!strcmp(token, "none")) { @@ -1417,36 +1418,32 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) return -ENOENT; } - /* Consistency checks */ - if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) { pr_warn("sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n"); - - if ((opts->flags & (CGRP_ROOT_NOPREFIX | CGRP_ROOT_XATTR)) || - opts->cpuset_clone_children || opts->release_agent || - opts->name) { - pr_err("sane_behavior: noprefix, xattr, clone_children, release_agent and name are not allowed\n"); + if (nr_opts != 1) { + pr_err("sane_behavior: no other mount options allowed\n"); return -EINVAL; } - } else { - /* - * If the 'all' option was specified select all the - * subsystems, otherwise if 'none', 'name=' and a subsystem - * name options were not specified, let's default to 'all' - */ - if (all_ss || (!one_ss && !opts->none && !opts->name)) - for_each_subsys(ss, i) - if (!ss->disabled) - opts->subsys_mask |= (1 << i); - - /* - * We either have to specify by name or by subsystems. (So - * all empty hierarchies must have a name). - */ - if (!opts->subsys_mask && !opts->name) - return -EINVAL; + return 0; } + /* + * If the 'all' option was specified select all the subsystems, + * otherwise if 'none', 'name=' and a subsystem name options were + * not specified, let's default to 'all' + */ + if (all_ss || (!one_ss && !opts->none && !opts->name)) + for_each_subsys(ss, i) + if (!ss->disabled) + opts->subsys_mask |= (1 << i); + + /* + * We either have to specify by name or by subsystems. (So all + * empty hierarchies must have a name). + */ + if (!opts->subsys_mask && !opts->name) + return -EINVAL; + /* * Option noprefix was introduced just for backward compatibility * with the old cpuset, so we allow noprefix only if mounting just @@ -1455,7 +1452,6 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask)) return -EINVAL; - /* Can't specify "none" and some subsystems */ if (opts->subsys_mask && opts->none) return -EINVAL; @@ -1724,7 +1720,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, goto out_unlock; /* look for a matching existing root */ - if (!opts.subsys_mask && !opts.none && !opts.name) { + if (opts.flags & CGRP_ROOT_SANE_BEHAVIOR) { cgrp_dfl_root_visible = true; root = &cgrp_dfl_root; cgroup_get(&root->cgrp); @@ -1761,15 +1757,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, goto out_unlock; } - if (root->flags ^ opts.flags) { - if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) { - pr_err("sane_behavior: new mount options should match the existing superblock\n"); - ret = -EINVAL; - goto out_unlock; - } else { - pr_warn("new mount options do not match the existing superblock, will be ignored\n"); - } - } + if (root->flags ^ opts.flags) + pr_warn("new mount options do not match the existing superblock, will be ignored\n"); /* * A root's lifetime is governed by its root cgroup. @@ -4809,8 +4798,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) */ int __init cgroup_init_early(void) { - static struct cgroup_sb_opts __initdata opts = - { .flags = CGRP_ROOT_SANE_BEHAVIOR }; + static struct cgroup_sb_opts __initdata opts; struct cgroup_subsys *ss; int i; -- cgit v1.2.3 From e2b9a3d7d8f4ab2f3491b8ed2ac6af692a2269b2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:47:03 +0800 Subject: cpuset: add cs->effective_cpus and cs->effective_mems We're going to have separate user-configured masks and effective ones. Eventually configured masks can only be changed by writing cpuset.cpus and cpuset.mems, and they won't be restricted by parent cpuset. While effective masks reflect cpu/memory hotplug and hierachical restriction, and these are the real masks that apply to the tasks in the cpuset. We calculate effective mask this way: - top cpuset's effective_mask == online_mask, otherwise - cpuset's effective_mask == configured_mask & parent effective_mask, if the result is empty, it inherits parent effective mask. Those behavior changes are for default hierarchy only. For legacy hierachy, effective_mask and configured_mask are the same, so we won't break old interfaces. This patch adds the effective masks to struct cpuset and initializes them. The effective masks of the top cpuset is the same with configured masks, and a child cpuset inherits its parent's effective masks. This won't introduce behavior change. v2: - s/real_{mems,cpus}_allowed/effective_{mems,cpus}, suggested by Tejun. - don't init effective masks in cpuset_css_online() if !cgroup_on_dfl. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f9d4807c869f..ef0974c73b4b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -76,8 +76,14 @@ struct cpuset { struct cgroup_subsys_state css; unsigned long flags; /* "unsigned long" so bitops work */ - cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ - nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ + + /* user-configured CPUs and Memory Nodes allow to tasks */ + cpumask_var_t cpus_allowed; + nodemask_t mems_allowed; + + /* effective CPUs and Memory Nodes allow to tasks */ + cpumask_var_t effective_cpus; + nodemask_t effective_mems; /* * This is old Memory Nodes tasks took on. @@ -376,13 +382,20 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) if (!trial) return NULL; - if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) { - kfree(trial); - return NULL; - } - cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); + if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) + goto free_cs; + if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL)) + goto free_cpus; + cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); + cpumask_copy(trial->effective_cpus, cs->effective_cpus); return trial; + +free_cpus: + free_cpumask_var(trial->cpus_allowed); +free_cs: + kfree(trial); + return NULL; } /** @@ -391,6 +404,7 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) */ static void free_trial_cpuset(struct cpuset *trial) { + free_cpumask_var(trial->effective_cpus); free_cpumask_var(trial->cpus_allowed); kfree(trial); } @@ -1848,18 +1862,26 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) cs = kzalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); - if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) { - kfree(cs); - return ERR_PTR(-ENOMEM); - } + if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) + goto free_cs; + if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL)) + goto free_cpus; set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); cpumask_clear(cs->cpus_allowed); nodes_clear(cs->mems_allowed); + cpumask_clear(cs->effective_cpus); + nodes_clear(cs->effective_mems); fmeter_init(&cs->fmeter); cs->relax_domain_level = -1; return &cs->css; + +free_cpus: + free_cpumask_var(cs->cpus_allowed); +free_cs: + kfree(cs); + return ERR_PTR(-ENOMEM); } static int cpuset_css_online(struct cgroup_subsys_state *css) @@ -1882,6 +1904,13 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cpuset_inc(); + mutex_lock(&callback_mutex); + if (cgroup_on_dfl(cs->css.cgroup)) { + cpumask_copy(cs->effective_cpus, parent->effective_cpus); + cs->effective_mems = parent->effective_mems; + } + mutex_unlock(&callback_mutex); + if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; @@ -1941,6 +1970,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); + free_cpumask_var(cs->effective_cpus); free_cpumask_var(cs->cpus_allowed); kfree(cs); } @@ -1969,9 +1999,13 @@ int __init cpuset_init(void) if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)) BUG(); + if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)) + BUG(); cpumask_setall(top_cpuset.cpus_allowed); nodes_setall(top_cpuset.mems_allowed); + cpumask_setall(top_cpuset.effective_cpus); + nodes_setall(top_cpuset.effective_mems); fmeter_init(&top_cpuset.fmeter); set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); @@ -2207,6 +2241,9 @@ void __init cpuset_init_smp(void) top_cpuset.mems_allowed = node_states[N_MEMORY]; top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; + cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); + top_cpuset.effective_mems = node_states[N_MEMORY]; + register_hotmemory_notifier(&cpuset_track_online_nodes_nb); } -- cgit v1.2.3 From 1344ab9c2991b45bacfd2e26a8800a62663ae427 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:47:16 +0800 Subject: cpuset: update cpuset->effective_{cpus,mems} at hotplug We're going to have separate user-configured masks and effective ones. Eventually configured masks can only be changed by writing cpuset.cpus and cpuset.mems, and they won't be restricted by parent cpuset. While effective masks reflect cpu/memory hotplug and hierachical restriction, and these are the real masks that apply to the tasks in the cpuset. We calculate effective mask this way: - top cpuset's effective_mask == online_mask, otherwise - cpuset's effective_mask == configured_mask & parent effective_mask, if the result is empty, it inherits parent effective mask. Those behavior changes are for default hierarchy only. For legacy hierarchy, effective_mask and configured_mask are the same, so we won't break old interfaces. To make cs->effective_{cpus,mems} to be effective masks, we need to - update the effective masks at hotplug - update the effective masks at config change - take on ancestor's mask when the effective mask is empty The first item is done here. This won't introduce behavior change. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ef0974c73b4b..94f651d2eee5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2082,6 +2082,7 @@ retry: mutex_lock(&callback_mutex); cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); + cpumask_andnot(cs->effective_cpus, cs->effective_cpus, &off_cpus); mutex_unlock(&callback_mutex); /* @@ -2096,6 +2097,7 @@ retry: mutex_lock(&callback_mutex); nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); + nodes_andnot(cs->effective_mems, cs->effective_mems, off_mems); mutex_unlock(&callback_mutex); /* @@ -2159,6 +2161,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) if (cpus_updated) { mutex_lock(&callback_mutex); cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); + cpumask_copy(top_cpuset.effective_cpus, &new_cpus); mutex_unlock(&callback_mutex); /* we don't mess with cpumasks of tasks in top_cpuset */ } @@ -2167,6 +2170,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) if (mems_updated) { mutex_lock(&callback_mutex); top_cpuset.mems_allowed = new_mems; + top_cpuset.effective_mems = new_mems; mutex_unlock(&callback_mutex); update_tasks_nodemask(&top_cpuset); } -- cgit v1.2.3 From 734d45130cb4f668fb33d182f6943523628582ef Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:47:29 +0800 Subject: cpuset: update cs->effective_{cpus, mems} when config changes We're going to have separate user-configured masks and effective ones. Eventually configured masks can only be changed by writing cpuset.cpus and cpuset.mems, and they won't be restricted by parent cpuset. While effective masks reflect cpu/memory hotplug and hierachical restriction, and these are the real masks that apply to the tasks in the cpuset. We calculate effective mask this way: - top cpuset's effective_mask == online_mask, otherwise - cpuset's effective_mask == configured_mask & parent effective_mask, if the result is empty, it inherits parent effective mask. Those behavior changes are for default hierarchy only. For legacy hierarchy, effective_mask and configured_mask are the same, so we won't break old interfaces. To make cs->effective_{cpus,mems} to be effective masks, we need to - update the effective masks at hotplug - update the effective masks at config change - take on ancestor's mask when the effective mask is empty The second item is done here. We don't need to treat root_cs specially in update_cpumasks_hier(). This won't introduce behavior change. v3: - add a WARN_ON() to check if effective masks are the same with configured masks on legacy hierarchy. - pass trialcs->cpus_allowed to update_cpumasks_hier() and add a comment for it. Similar change for update_nodemasks_hier(). Suggested by Tejun. v2: - revise the comment in update_{cpu,node}masks_hier(), suggested by Tejun. - fix to use @cp instead of @cs in these two functions. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 88 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 34 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 94f651d2eee5..da766c3736c4 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -855,36 +855,45 @@ static void update_tasks_cpumask(struct cpuset *cs) } /* - * update_tasks_cpumask_hier - Update the cpumasks of tasks in the hierarchy. - * @root_cs: the root cpuset of the hierarchy - * @update_root: update root cpuset or not? + * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree + * @cs: the cpuset to consider + * @new_cpus: temp variable for calculating new effective_cpus + * + * When congifured cpumask is changed, the effective cpumasks of this cpuset + * and all its descendants need to be updated. * - * This will update cpumasks of tasks in @root_cs and all other empty cpusets - * which take on cpumask of @root_cs. + * On legacy hierachy, effective_cpus will be the same with cpu_allowed. * * Called with cpuset_mutex held */ -static void update_tasks_cpumask_hier(struct cpuset *root_cs, bool update_root) +static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; rcu_read_lock(); - cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { - if (cp == root_cs) { - if (!update_root) - continue; - } else { - /* skip the whole subtree if @cp have some CPU */ - if (!cpumask_empty(cp->cpus_allowed)) { - pos_css = css_rightmost_descendant(pos_css); - continue; - } + cpuset_for_each_descendant_pre(cp, pos_css, cs) { + struct cpuset *parent = parent_cs(cp); + + cpumask_and(new_cpus, cp->cpus_allowed, parent->effective_cpus); + + /* Skip the whole subtree if the cpumask remains the same. */ + if (cpumask_equal(new_cpus, cp->effective_cpus)) { + pos_css = css_rightmost_descendant(pos_css); + continue; } + if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); + mutex_lock(&callback_mutex); + cpumask_copy(cp->effective_cpus, new_cpus); + mutex_unlock(&callback_mutex); + + WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && + !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); + update_tasks_cpumask(cp); rcu_read_lock(); @@ -940,7 +949,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); mutex_unlock(&callback_mutex); - update_tasks_cpumask_hier(cs, true); + /* use trialcs->cpus_allowed as a temp variable */ + update_cpumasks_hier(cs, trialcs->cpus_allowed); if (is_load_balanced) rebuild_sched_domains_locked(); @@ -1091,36 +1101,45 @@ static void update_tasks_nodemask(struct cpuset *cs) } /* - * update_tasks_nodemask_hier - Update the nodemasks of tasks in the hierarchy. - * @cs: the root cpuset of the hierarchy - * @update_root: update the root cpuset or not? + * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree + * @cs: the cpuset to consider + * @new_mems: a temp variable for calculating new effective_mems + * + * When configured nodemask is changed, the effective nodemasks of this cpuset + * and all its descendants need to be updated. * - * This will update nodemasks of tasks in @root_cs and all other empty cpusets - * which take on nodemask of @root_cs. + * On legacy hiearchy, effective_mems will be the same with mems_allowed. * * Called with cpuset_mutex held */ -static void update_tasks_nodemask_hier(struct cpuset *root_cs, bool update_root) +static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; rcu_read_lock(); - cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { - if (cp == root_cs) { - if (!update_root) - continue; - } else { - /* skip the whole subtree if @cp have some CPU */ - if (!nodes_empty(cp->mems_allowed)) { - pos_css = css_rightmost_descendant(pos_css); - continue; - } + cpuset_for_each_descendant_pre(cp, pos_css, cs) { + struct cpuset *parent = parent_cs(cp); + + nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); + + /* Skip the whole subtree if the nodemask remains the same. */ + if (nodes_equal(*new_mems, cp->effective_mems)) { + pos_css = css_rightmost_descendant(pos_css); + continue; } + if (!css_tryget_online(&cp->css)) continue; rcu_read_unlock(); + mutex_lock(&callback_mutex); + cp->effective_mems = *new_mems; + mutex_unlock(&callback_mutex); + + WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && + nodes_equal(cp->mems_allowed, cp->effective_mems)); + update_tasks_nodemask(cp); rcu_read_lock(); @@ -1188,7 +1207,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, cs->mems_allowed = trialcs->mems_allowed; mutex_unlock(&callback_mutex); - update_tasks_nodemask_hier(cs, true); + /* use trialcs->mems_allowed as a temp variable */ + update_nodemasks_hier(cs, &cs->mems_allowed); done: return retval; } -- cgit v1.2.3 From 554b0d1c845e42ef01d7f6f5f24b3e4c6129ce8f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:47:41 +0800 Subject: cpuset: inherit ancestor's masks if effective_{cpus, mems} becomes empty We're going to have separate user-configured masks and effective ones. Eventually configured masks can only be changed by writing cpuset.cpus and cpuset.mems, and they won't be restricted by parent cpuset. While effective masks reflect cpu/memory hotplug and hierachical restriction, and these are the real masks that apply to the tasks in the cpuset. We calculate effective mask this way: - top cpuset's effective_mask == online_mask, otherwise - cpuset's effective_mask == configured_mask & parent effective_mask, if the result is empty, it inherits parent effective mask. Those behavior changes are for default hierarchy only. For legacy hierarchy, effective_mask and configured_mask are the same, so we won't break old interfaces. To make cs->effective_{cpus,mems} to be effective masks, we need to - update the effective masks at hotplug - update the effective masks at config change - take on ancestor's mask when the effective mask is empty The last item is done here. This won't introduce behavior change. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index da766c3736c4..f8340026d01c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -877,6 +877,13 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) cpumask_and(new_cpus, cp->cpus_allowed, parent->effective_cpus); + /* + * If it becomes empty, inherit the effective mask of the + * parent, which is guaranteed to have some CPUs. + */ + if (cpumask_empty(new_cpus)) + cpumask_copy(new_cpus, parent->effective_cpus); + /* Skip the whole subtree if the cpumask remains the same. */ if (cpumask_equal(new_cpus, cp->effective_cpus)) { pos_css = css_rightmost_descendant(pos_css); @@ -1123,6 +1130,13 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); + /* + * If it becomes empty, inherit the effective mask of the + * parent, which is guaranteed to have some MEMs. + */ + if (nodes_empty(*new_mems)) + *new_mems = parent->effective_mems; + /* Skip the whole subtree if the nodemask remains the same. */ if (nodes_equal(*new_mems, cp->effective_mems)) { pos_css = css_rightmost_descendant(pos_css); @@ -2102,7 +2116,11 @@ retry: mutex_lock(&callback_mutex); cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); + + /* Inherit the effective mask of the parent, if it becomes empty. */ cpumask_andnot(cs->effective_cpus, cs->effective_cpus, &off_cpus); + if (on_dfl && cpumask_empty(cs->effective_cpus)) + cpumask_copy(cs->effective_cpus, parent_cs(cs)->effective_cpus); mutex_unlock(&callback_mutex); /* @@ -2117,7 +2135,11 @@ retry: mutex_lock(&callback_mutex); nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); + + /* Inherit the effective mask of the parent, if it becomes empty */ nodes_andnot(cs->effective_mems, cs->effective_mems, off_mems); + if (on_dfl && nodes_empty(cs->effective_mems)) + cs->effective_mems = parent_cs(cs)->effective_mems; mutex_unlock(&callback_mutex); /* -- cgit v1.2.3 From 8b5f1c52dcd1accd3a940cfcb148bef6de589524 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:47:50 +0800 Subject: cpuset: use effective cpumask to build sched domains We're going to have separate user-configured masks and effective ones. Eventually configured masks can only be changed by writing cpuset.cpus and cpuset.mems, and they won't be restricted by parent cpuset. While effective masks reflect cpu/memory hotplug and hierachical restriction, and these are the real masks that apply to the tasks in the cpuset. We calculate effective mask this way: - top cpuset's effective_mask == online_mask, otherwise - cpuset's effective_mask == configured_mask & parent effective_mask, if the result is empty, it inherits parent effective mask. Those behavior changes are for default hierarchy only. For legacy hierarchy, effective_mask and configured_mask are the same, so we won't break old interfaces. We should partition sched domains according to effective_cpus, which is the real cpulist that takes effects on tasks in the cpuset. This won't introduce behavior change. v2: - Add a comment for the call of rebuild_sched_domains(), suggested by Tejun. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f8340026d01c..60577ccdbfc7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -494,11 +494,11 @@ out: #ifdef CONFIG_SMP /* * Helper routine for generate_sched_domains(). - * Do cpusets a, b have overlapping cpus_allowed masks? + * Do cpusets a, b have overlapping effective cpus_allowed masks? */ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) { - return cpumask_intersects(a->cpus_allowed, b->cpus_allowed); + return cpumask_intersects(a->effective_cpus, b->effective_cpus); } static void @@ -615,7 +615,7 @@ static int generate_sched_domains(cpumask_var_t **domains, *dattr = SD_ATTR_INIT; update_domain_attr_tree(dattr, &top_cpuset); } - cpumask_copy(doms[0], top_cpuset.cpus_allowed); + cpumask_copy(doms[0], top_cpuset.effective_cpus); goto done; } @@ -719,7 +719,7 @@ restart: struct cpuset *b = csa[j]; if (apn == b->pn) { - cpumask_or(dp, dp, b->cpus_allowed); + cpumask_or(dp, dp, b->effective_cpus); if (dattr) update_domain_attr_tree(dattr + nslot, b); @@ -771,7 +771,7 @@ static void rebuild_sched_domains_locked(void) * passing doms with offlined cpu to partition_sched_domains(). * Anyways, hotplug work item will rebuild sched domains. */ - if (!cpumask_equal(top_cpuset.cpus_allowed, cpu_active_mask)) + if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) goto out; /* Generate domain masks and attrs */ @@ -870,6 +870,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; + bool need_rebuild_sched_domains = false; rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, cs) { @@ -903,10 +904,21 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) update_tasks_cpumask(cp); + /* + * If the effective cpumask of any non-empty cpuset is changed, + * we need to rebuild sched domains. + */ + if (!cpumask_empty(cp->cpus_allowed) && + is_sched_load_balance(cp)) + need_rebuild_sched_domains = true; + rcu_read_lock(); css_put(&cp->css); } rcu_read_unlock(); + + if (need_rebuild_sched_domains) + rebuild_sched_domains_locked(); } /** @@ -919,7 +931,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, const char *buf) { int retval; - int is_load_balanced; /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ if (cs == &top_cpuset) @@ -950,17 +961,12 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (retval < 0) return retval; - is_load_balanced = is_sched_load_balance(trialcs); - mutex_lock(&callback_mutex); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); mutex_unlock(&callback_mutex); /* use trialcs->cpus_allowed as a temp variable */ update_cpumasks_hier(cs, trialcs->cpus_allowed); - - if (is_load_balanced) - rebuild_sched_domains_locked(); return 0; } -- cgit v1.2.3 From 39bd0d15eca5af15ee1492964f317ecdb024a9d6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:48:01 +0800 Subject: cpuset: initialize top_cpuset's configured masks at mount We now have to support different behaviors for default hierachy and legacy hiearchy, top_cpuset's configured masks need to be initialized accordingly. Suppose we've offlined cpu1. On default hierarchy: # mount -t cgroup -o __DEVEL__sane_behavior xxx /cpuset # cat /cpuset/cpuset.cpus 0-15 On legacy hierarchy: # mount -t cgroup xxx /cpuset # cat /cpuset/cpuset.cpus 0,2-15 Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 60577ccdbfc7..e4c31e6b8716 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2015,16 +2015,35 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) kfree(cs); } +static void cpuset_bind(struct cgroup_subsys_state *root_css) +{ + mutex_lock(&cpuset_mutex); + mutex_lock(&callback_mutex); + + if (cgroup_on_dfl(root_css->cgroup)) { + cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); + top_cpuset.mems_allowed = node_possible_map; + } else { + cpumask_copy(top_cpuset.cpus_allowed, + top_cpuset.effective_cpus); + top_cpuset.mems_allowed = top_cpuset.effective_mems; + } + + mutex_unlock(&callback_mutex); + mutex_unlock(&cpuset_mutex); +} + struct cgroup_subsys cpuset_cgrp_subsys = { - .css_alloc = cpuset_css_alloc, - .css_online = cpuset_css_online, - .css_offline = cpuset_css_offline, - .css_free = cpuset_css_free, - .can_attach = cpuset_can_attach, - .cancel_attach = cpuset_cancel_attach, - .attach = cpuset_attach, - .base_cftypes = files, - .early_init = 1, + .css_alloc = cpuset_css_alloc, + .css_online = cpuset_css_online, + .css_offline = cpuset_css_offline, + .css_free = cpuset_css_free, + .can_attach = cpuset_can_attach, + .cancel_attach = cpuset_cancel_attach, + .attach = cpuset_attach, + .bind = cpuset_bind, + .base_cftypes = files, + .early_init = 1, }; /** -- cgit v1.2.3 From ae1c802382f7af60aa54879fb4f5920a9df1ff48 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:48:32 +0800 Subject: cpuset: apply cs->effective_{cpus,mems} Now we can use cs->effective_{cpus,mems} as effective masks. It's used whenever: - we update tasks' cpus_allowed/mems_allowed, - we want to retrieve tasks_cs(tsk)'s cpus_allowed/mems_allowed. They actually replace effective_{cpu,node}mask_cpuset(). effective_mask == configured_mask & parent effective_mask except when the reault is empty, in which case it inherits parent effective_mask. The result equals the mask computed from effective_{cpu,node}mask_cpuset(). This won't affect the original legacy hierarchy, because in this case we make sure the effective masks are always the same with user-configured masks. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 83 ++++++++++----------------------------------------------- 1 file changed, 14 insertions(+), 69 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e4c31e6b8716..820870a715f8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -313,9 +313,9 @@ static struct file_system_type cpuset_fs_type = { */ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) { - while (!cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) + while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) cs = parent_cs(cs); - cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask); + cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); } /* @@ -331,9 +331,9 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) */ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) { - while (!nodes_intersects(cs->mems_allowed, node_states[N_MEMORY])) + while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY])) cs = parent_cs(cs); - nodes_and(*pmask, cs->mems_allowed, node_states[N_MEMORY]); + nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]); } /* @@ -795,45 +795,6 @@ void rebuild_sched_domains(void) mutex_unlock(&cpuset_mutex); } -/* - * effective_cpumask_cpuset - return nearest ancestor with non-empty cpus - * @cs: the cpuset in interest - * - * A cpuset's effective cpumask is the cpumask of the nearest ancestor - * with non-empty cpus. We use effective cpumask whenever: - * - we update tasks' cpus_allowed. (they take on the ancestor's cpumask - * if the cpuset they reside in has no cpus) - * - we want to retrieve task_cs(tsk)'s cpus_allowed. - * - * Called with cpuset_mutex held. cpuset_cpus_allowed_fallback() is an - * exception. See comments there. - */ -static struct cpuset *effective_cpumask_cpuset(struct cpuset *cs) -{ - while (cpumask_empty(cs->cpus_allowed)) - cs = parent_cs(cs); - return cs; -} - -/* - * effective_nodemask_cpuset - return nearest ancestor with non-empty mems - * @cs: the cpuset in interest - * - * A cpuset's effective nodemask is the nodemask of the nearest ancestor - * with non-empty memss. We use effective nodemask whenever: - * - we update tasks' mems_allowed. (they take on the ancestor's nodemask - * if the cpuset they reside in has no mems) - * - we want to retrieve task_cs(tsk)'s mems_allowed. - * - * Called with cpuset_mutex held. - */ -static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) -{ - while (nodes_empty(cs->mems_allowed)) - cs = parent_cs(cs); - return cs; -} - /** * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed @@ -844,13 +805,12 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) */ static void update_tasks_cpumask(struct cpuset *cs) { - struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); struct css_task_iter it; struct task_struct *task; css_task_iter_start(&cs->css, &it); while ((task = css_task_iter_next(&it))) - set_cpus_allowed_ptr(task, cpus_cs->cpus_allowed); + set_cpus_allowed_ptr(task, cs->effective_cpus); css_task_iter_end(&it); } @@ -988,15 +948,13 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to) { struct task_struct *tsk = current; - struct cpuset *mems_cs; tsk->mems_allowed = *to; do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); rcu_read_lock(); - mems_cs = effective_nodemask_cpuset(task_cs(tsk)); - guarantee_online_mems(mems_cs, &tsk->mems_allowed); + guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed); rcu_read_unlock(); } @@ -1065,13 +1023,12 @@ static void *cpuset_being_rebound; static void update_tasks_nodemask(struct cpuset *cs) { static nodemask_t newmems; /* protected by cpuset_mutex */ - struct cpuset *mems_cs = effective_nodemask_cpuset(cs); struct css_task_iter it; struct task_struct *task; cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ - guarantee_online_mems(mems_cs, &newmems); + guarantee_online_mems(cs, &newmems); /* * The mpol_rebind_mm() call takes mmap_sem, which we couldn't @@ -1497,8 +1454,6 @@ static void cpuset_attach(struct cgroup_subsys_state *css, struct task_struct *leader = cgroup_taskset_first(tset); struct cpuset *cs = css_cs(css); struct cpuset *oldcs = cpuset_attach_old_cs; - struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); - struct cpuset *mems_cs = effective_nodemask_cpuset(cs); mutex_lock(&cpuset_mutex); @@ -1506,9 +1461,9 @@ static void cpuset_attach(struct cgroup_subsys_state *css, if (cs == &top_cpuset) cpumask_copy(cpus_attach, cpu_possible_mask); else - guarantee_online_cpus(cpus_cs, cpus_attach); + guarantee_online_cpus(cs, cpus_attach); - guarantee_online_mems(mems_cs, &cpuset_attach_nodemask_to); + guarantee_online_mems(cs, &cpuset_attach_nodemask_to); cgroup_taskset_for_each(task, tset) { /* @@ -1525,11 +1480,9 @@ static void cpuset_attach(struct cgroup_subsys_state *css, * Change mm, possibly for multiple threads in a threadgroup. This is * expensive and may sleep. */ - cpuset_attach_nodemask_to = cs->mems_allowed; + cpuset_attach_nodemask_to = cs->effective_mems; mm = get_task_mm(leader); if (mm) { - struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs); - mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); /* @@ -1540,7 +1493,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, * mm from. */ if (is_memory_migrate(cs)) { - cpuset_migrate_mm(mm, &mems_oldcs->old_mems_allowed, + cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, &cpuset_attach_nodemask_to); } mmput(mm); @@ -2331,23 +2284,17 @@ void __init cpuset_init_smp(void) void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { - struct cpuset *cpus_cs; - mutex_lock(&callback_mutex); rcu_read_lock(); - cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); - guarantee_online_cpus(cpus_cs, pmask); + guarantee_online_cpus(task_cs(tsk), pmask); rcu_read_unlock(); mutex_unlock(&callback_mutex); } void cpuset_cpus_allowed_fallback(struct task_struct *tsk) { - struct cpuset *cpus_cs; - rcu_read_lock(); - cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); - do_set_cpus_allowed(tsk, cpus_cs->cpus_allowed); + do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus); rcu_read_unlock(); /* @@ -2386,13 +2333,11 @@ void cpuset_init_current_mems_allowed(void) nodemask_t cpuset_mems_allowed(struct task_struct *tsk) { - struct cpuset *mems_cs; nodemask_t mask; mutex_lock(&callback_mutex); rcu_read_lock(); - mems_cs = effective_nodemask_cpuset(task_cs(tsk)); - guarantee_online_mems(mems_cs, &mask); + guarantee_online_mems(task_cs(tsk), &mask); rcu_read_unlock(); mutex_unlock(&callback_mutex); -- cgit v1.2.3 From 7e88291beefbb758fa3b27e500ee2e0c888d6e44 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:48:42 +0800 Subject: cpuset: make cs->{cpus, mems}_allowed as user-configured masks Now we've used effective cpumasks to enforce hierarchical manner, we can use cs->{cpus,mems}_allowed as configured masks. Configured masks can be changed by writing cpuset.cpus and cpuset.mems only. The new behaviors are: - They won't be changed by hotplug anymore. - They won't be limited by its parent's masks. This ia a behavior change, but won't take effect unless mount with sane_behavior. v2: - Add comments to explain the differences between configured masks and effective masks. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 820870a715f8..4b409d2ecbb9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -77,6 +77,26 @@ struct cpuset { unsigned long flags; /* "unsigned long" so bitops work */ + /* + * On default hierarchy: + * + * The user-configured masks can only be changed by writing to + * cpuset.cpus and cpuset.mems, and won't be limited by the + * parent masks. + * + * The effective masks is the real masks that apply to the tasks + * in the cpuset. They may be changed if the configured masks are + * changed or hotplug happens. + * + * effective_mask == configured_mask & parent's effective_mask, + * and if it ends up empty, it will inherit the parent's mask. + * + * + * On legacy hierachy: + * + * The user-configured masks are always the same with effective masks. + */ + /* user-configured CPUs and Memory Nodes allow to tasks */ cpumask_var_t cpus_allowed; nodemask_t mems_allowed; @@ -450,9 +470,9 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) par = parent_cs(cur); - /* We must be a subset of our parent cpuset */ + /* On legacy hiearchy, we must be a subset of our parent cpuset. */ ret = -EACCES; - if (!is_cpuset_subset(trial, par)) + if (!cgroup_on_dfl(cur->css.cgroup) && !is_cpuset_subset(trial, par)) goto out; /* @@ -2167,6 +2187,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) static cpumask_t new_cpus; static nodemask_t new_mems; bool cpus_updated, mems_updated; + bool on_dfl = cgroup_on_dfl(top_cpuset.css.cgroup); mutex_lock(&cpuset_mutex); @@ -2174,13 +2195,14 @@ static void cpuset_hotplug_workfn(struct work_struct *work) cpumask_copy(&new_cpus, cpu_active_mask); new_mems = node_states[N_MEMORY]; - cpus_updated = !cpumask_equal(top_cpuset.cpus_allowed, &new_cpus); - mems_updated = !nodes_equal(top_cpuset.mems_allowed, new_mems); + cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus); + mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { mutex_lock(&callback_mutex); - cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); + if (!on_dfl) + cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); cpumask_copy(top_cpuset.effective_cpus, &new_cpus); mutex_unlock(&callback_mutex); /* we don't mess with cpumasks of tasks in top_cpuset */ @@ -2189,7 +2211,8 @@ static void cpuset_hotplug_workfn(struct work_struct *work) /* synchronize mems_allowed to N_MEMORY */ if (mems_updated) { mutex_lock(&callback_mutex); - top_cpuset.mems_allowed = new_mems; + if (!on_dfl) + top_cpuset.mems_allowed = new_mems; top_cpuset.effective_mems = new_mems; mutex_unlock(&callback_mutex); update_tasks_nodemask(&top_cpuset); -- cgit v1.2.3 From 390a36aadf39e241c83035469aae48ed1a144088 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:48:54 +0800 Subject: cpuset: refactor cpuset_hotplug_update_tasks() We mix the handling for both default hierarchy and legacy hierarchy in the same function, and it's quite messy, so split into two functions. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 121 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 66 insertions(+), 55 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4b409d2ecbb9..41822e2027c1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2080,6 +2080,65 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) } } +static void hotplug_update_tasks_legacy(struct cpuset *cs, + struct cpumask *off_cpus, + nodemask_t *off_mems) +{ + bool is_empty; + + mutex_lock(&callback_mutex); + cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, off_cpus); + cpumask_andnot(cs->effective_cpus, cs->effective_cpus, off_cpus); + nodes_andnot(cs->mems_allowed, cs->mems_allowed, *off_mems); + nodes_andnot(cs->effective_mems, cs->effective_mems, *off_mems); + mutex_unlock(&callback_mutex); + + /* + * Don't call update_tasks_cpumask() if the cpuset becomes empty, + * as the tasks will be migratecd to an ancestor. + */ + if (!cpumask_empty(off_cpus) && !cpumask_empty(cs->cpus_allowed)) + update_tasks_cpumask(cs); + if (!nodes_empty(*off_mems) && !nodes_empty(cs->mems_allowed)) + update_tasks_nodemask(cs); + + is_empty = cpumask_empty(cs->cpus_allowed) || + nodes_empty(cs->mems_allowed); + + mutex_unlock(&cpuset_mutex); + + /* + * Move tasks to the nearest ancestor with execution resources, + * This is full cgroup operation which will also call back into + * cpuset. Should be done outside any lock. + */ + if (is_empty) + remove_tasks_in_empty_cpuset(cs); + + mutex_lock(&cpuset_mutex); +} + +static void hotplug_update_tasks(struct cpuset *cs, + struct cpumask *off_cpus, + nodemask_t *off_mems) +{ + mutex_lock(&callback_mutex); + cpumask_andnot(cs->effective_cpus, cs->effective_cpus, off_cpus); + if (cpumask_empty(cs->effective_cpus)) + cpumask_copy(cs->effective_cpus, + parent_cs(cs)->effective_cpus); + + nodes_andnot(cs->effective_mems, cs->effective_mems, *off_mems); + if (nodes_empty(cs->effective_mems)) + cs->effective_mems = parent_cs(cs)->effective_mems; + mutex_unlock(&callback_mutex); + + if (!cpumask_empty(off_cpus)) + update_tasks_cpumask(cs); + if (!nodes_empty(*off_mems)) + update_tasks_nodemask(cs); +} + /** * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug * @cs: cpuset in interest @@ -2092,9 +2151,6 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) { static cpumask_t off_cpus; static nodemask_t off_mems; - bool is_empty; - bool on_dfl = cgroup_on_dfl(cs->css.cgroup); - retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -2109,61 +2165,16 @@ retry: goto retry; } - cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); - nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); - - mutex_lock(&callback_mutex); - cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus); - - /* Inherit the effective mask of the parent, if it becomes empty. */ - cpumask_andnot(cs->effective_cpus, cs->effective_cpus, &off_cpus); - if (on_dfl && cpumask_empty(cs->effective_cpus)) - cpumask_copy(cs->effective_cpus, parent_cs(cs)->effective_cpus); - mutex_unlock(&callback_mutex); - - /* - * If on_dfl, we need to update tasks' cpumask for empty cpuset to - * take on ancestor's cpumask. Otherwise, don't call - * update_tasks_cpumask() if the cpuset becomes empty, as the tasks - * in it will be migrated to an ancestor. - */ - if ((on_dfl && cpumask_empty(cs->cpus_allowed)) || - (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) - update_tasks_cpumask(cs); - - mutex_lock(&callback_mutex); - nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); + cpumask_andnot(&off_cpus, cs->effective_cpus, + top_cpuset.effective_cpus); + nodes_andnot(off_mems, cs->effective_mems, top_cpuset.effective_mems); - /* Inherit the effective mask of the parent, if it becomes empty */ - nodes_andnot(cs->effective_mems, cs->effective_mems, off_mems); - if (on_dfl && nodes_empty(cs->effective_mems)) - cs->effective_mems = parent_cs(cs)->effective_mems; - mutex_unlock(&callback_mutex); - - /* - * If on_dfl, we need to update tasks' nodemask for empty cpuset to - * take on ancestor's nodemask. Otherwise, don't call - * update_tasks_nodemask() if the cpuset becomes empty, as the - * tasks in it will be migratd to an ancestor. - */ - if ((on_dfl && nodes_empty(cs->mems_allowed)) || - (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) - update_tasks_nodemask(cs); - - is_empty = cpumask_empty(cs->cpus_allowed) || - nodes_empty(cs->mems_allowed); + if (cgroup_on_dfl(cs->css.cgroup)) + hotplug_update_tasks(cs, &off_cpus, &off_mems); + else + hotplug_update_tasks_legacy(cs, &off_cpus, &off_mems); mutex_unlock(&cpuset_mutex); - - /* - * If on_dfl, we'll keep tasks in empty cpusets. - * - * Otherwise move tasks to the nearest ancestor with execution - * resources. This is full cgroup operation which will - * also call back into cpuset. Should be done outside any lock. - */ - if (!on_dfl && is_empty) - remove_tasks_in_empty_cpuset(cs); } /** -- cgit v1.2.3 From be4c9dd7aee5ecf3e748da68c27b38bdca70d444 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:49:04 +0800 Subject: cpuset: enable onlined cpu/node in effective masks Firstly offline cpu1: # echo 0-1 > cpuset.cpus # echo 0 > /sys/devices/system/cpu/cpu1/online # cat cpuset.cpus 0-1 # cat cpuset.effective_cpus 0 Then online it: # echo 1 > /sys/devices/system/cpu/cpu1/online # cat cpuset.cpus 0-1 # cat cpuset.effective_cpus 0-1 And cpuset will bring it back to the effective mask. The implementation is quite straightforward. Instead of calculating the offlined cpus/mems and do updates, we just set the new effective_mask to online_mask & congifured_mask. This is a behavior change for default hierarchy, so legacy hierarchy won't be affected. v2: - make refactoring of cpuset_hotplug_update_tasks() as seperate patch, suggested by Tejun. - make hotplug_update_tasks_insane() use @new_cpus and @new_mems as hotplug_update_tasks_sane() does. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 65 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 41822e2027c1..c47cb940712e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2080,26 +2080,27 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) } } -static void hotplug_update_tasks_legacy(struct cpuset *cs, - struct cpumask *off_cpus, - nodemask_t *off_mems) +static void +hotplug_update_tasks_legacy(struct cpuset *cs, + struct cpumask *new_cpus, nodemask_t *new_mems, + bool cpus_updated, bool mems_updated) { bool is_empty; mutex_lock(&callback_mutex); - cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, off_cpus); - cpumask_andnot(cs->effective_cpus, cs->effective_cpus, off_cpus); - nodes_andnot(cs->mems_allowed, cs->mems_allowed, *off_mems); - nodes_andnot(cs->effective_mems, cs->effective_mems, *off_mems); + cpumask_copy(cs->cpus_allowed, new_cpus); + cpumask_copy(cs->effective_cpus, new_cpus); + cs->mems_allowed = *new_mems; + cs->effective_mems = *new_mems; mutex_unlock(&callback_mutex); /* * Don't call update_tasks_cpumask() if the cpuset becomes empty, * as the tasks will be migratecd to an ancestor. */ - if (!cpumask_empty(off_cpus) && !cpumask_empty(cs->cpus_allowed)) + if (cpus_updated && !cpumask_empty(cs->cpus_allowed)) update_tasks_cpumask(cs); - if (!nodes_empty(*off_mems) && !nodes_empty(cs->mems_allowed)) + if (mems_updated && !nodes_empty(cs->mems_allowed)) update_tasks_nodemask(cs); is_empty = cpumask_empty(cs->cpus_allowed) || @@ -2118,24 +2119,24 @@ static void hotplug_update_tasks_legacy(struct cpuset *cs, mutex_lock(&cpuset_mutex); } -static void hotplug_update_tasks(struct cpuset *cs, - struct cpumask *off_cpus, - nodemask_t *off_mems) +static void +hotplug_update_tasks(struct cpuset *cs, + struct cpumask *new_cpus, nodemask_t *new_mems, + bool cpus_updated, bool mems_updated) { + if (cpumask_empty(new_cpus)) + cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus); + if (nodes_empty(*new_mems)) + *new_mems = parent_cs(cs)->effective_mems; + mutex_lock(&callback_mutex); - cpumask_andnot(cs->effective_cpus, cs->effective_cpus, off_cpus); - if (cpumask_empty(cs->effective_cpus)) - cpumask_copy(cs->effective_cpus, - parent_cs(cs)->effective_cpus); - - nodes_andnot(cs->effective_mems, cs->effective_mems, *off_mems); - if (nodes_empty(cs->effective_mems)) - cs->effective_mems = parent_cs(cs)->effective_mems; + cpumask_copy(cs->effective_cpus, new_cpus); + cs->effective_mems = *new_mems; mutex_unlock(&callback_mutex); - if (!cpumask_empty(off_cpus)) + if (cpus_updated) update_tasks_cpumask(cs); - if (!nodes_empty(*off_mems)) + if (mems_updated) update_tasks_nodemask(cs); } @@ -2149,8 +2150,10 @@ static void hotplug_update_tasks(struct cpuset *cs, */ static void cpuset_hotplug_update_tasks(struct cpuset *cs) { - static cpumask_t off_cpus; - static nodemask_t off_mems; + static cpumask_t new_cpus; + static nodemask_t new_mems; + bool cpus_updated; + bool mems_updated; retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -2165,14 +2168,18 @@ retry: goto retry; } - cpumask_andnot(&off_cpus, cs->effective_cpus, - top_cpuset.effective_cpus); - nodes_andnot(off_mems, cs->effective_mems, top_cpuset.effective_mems); + cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus); + nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems); + + cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); + mems_updated = !nodes_equal(new_mems, cs->effective_mems); if (cgroup_on_dfl(cs->css.cgroup)) - hotplug_update_tasks(cs, &off_cpus, &off_mems); + hotplug_update_tasks(cs, &new_cpus, &new_mems, + cpus_updated, mems_updated); else - hotplug_update_tasks_legacy(cs, &off_cpus, &off_mems); + hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems, + cpus_updated, mems_updated); mutex_unlock(&cpuset_mutex); } -- cgit v1.2.3 From 5d8ba82c3a1f10b77bf39ee3e7670d6789a8d149 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:49:12 +0800 Subject: cpuset: allow writing offlined masks to cpuset.cpus/mems As the configured masks won't be limited by its parent, and the top cpuset's masks won't change when hotplug happens, it's natural to allow writing offlined masks to the configured masks. If on default hierarchy: # echo 0 > /sys/devices/system/cpu/cpu1/online # mkdir /cpuset/sub # echo 1 > /cpuset/sub/cpuset.cpus # cat /cpuset/sub/cpuset.cpus 1 If on legacy hierarchy: # echo 0 > /sys/devices/system/cpu/cpu1/online # mkdir /cpuset/sub # echo 1 > /cpuset/sub/cpuset.cpus -bash: echo: write error: Invalid argument Note the checks don't need to be gated by cgroup_on_dfl, because we've initialized top_cpuset.{cpus,mems}_allowed accordingly in cpuset_bind(). Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c47cb940712e..65878a74a66b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -929,7 +929,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (retval < 0) return retval; - if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask)) + if (!cpumask_subset(trialcs->cpus_allowed, + top_cpuset.cpus_allowed)) return -EINVAL; } @@ -1186,8 +1187,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, goto done; if (!nodes_subset(trialcs->mems_allowed, - node_states[N_MEMORY])) { - retval = -EINVAL; + top_cpuset.mems_allowed)) { + retval = -EINVAL; goto done; } } -- cgit v1.2.3 From afd1a8b3e0bc4d045d762dfdbc4d0cee189893a4 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 9 Jul 2014 16:49:25 +0800 Subject: cpuset: export effective masks to userspace cpuset.cpus and cpuset.mems are the configured masks, and we need to export effective masks to userspace, so users know the real cpus_allowed and mems_allowed that apply to the tasks in a cpuset. v2: - export those masks unconditionally, suggested by Tejun. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 65878a74a66b..53a9bbf16391 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1535,6 +1535,8 @@ typedef enum { FILE_MEMORY_MIGRATE, FILE_CPULIST, FILE_MEMLIST, + FILE_EFFECTIVE_CPULIST, + FILE_EFFECTIVE_MEMLIST, FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, FILE_MEM_HARDWALL, @@ -1701,6 +1703,12 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) case FILE_MEMLIST: s += nodelist_scnprintf(s, count, cs->mems_allowed); break; + case FILE_EFFECTIVE_CPULIST: + s += cpulist_scnprintf(s, count, cs->effective_cpus); + break; + case FILE_EFFECTIVE_MEMLIST: + s += nodelist_scnprintf(s, count, cs->effective_mems); + break; default: ret = -EINVAL; goto out_unlock; @@ -1785,6 +1793,18 @@ static struct cftype files[] = { .private = FILE_MEMLIST, }, + { + .name = "effective_cpus", + .seq_show = cpuset_common_seq_show, + .private = FILE_EFFECTIVE_CPULIST, + }, + + { + .name = "effective_mems", + .seq_show = cpuset_common_seq_show, + .private = FILE_EFFECTIVE_MEMLIST, + }, + { .name = "cpu_exclusive", .read_u64 = cpuset_read_u64, -- cgit v1.2.3 From a14c6874be09a05a48093df8df87ca021f310332 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jul 2014 11:05:09 -0400 Subject: cgroup: split cgroup_base_files[] into cgroup_{dfl|legacy}_base_files[] Currently cgroup_base_files[] contains the cgroup core interface files for both legacy and default hierarchies with each file tagged with CFTYPE_INSANE and CFTYPE_ONLY_ON_DFL. This is difficult to read. Let's separate it out to two separate tables, cgroup_dfl_base_files[] and cgroup_legacy_base_files[], and use the appropriate one in cgroup_mkdir() depending on the hierarchy type. This makes tagging each file unnecessary. This patch doesn't introduce any behavior changes. v2: cgroup_dfl_base_files[] was missing the termination entry triggering WARN in cgroup_init_cftypes() for 0day kernel testing robot. Fixed. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Jet Chen --- kernel/cgroup.c | 76 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 28f7d47c1d4a..7e5fee5d6422 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -180,7 +180,8 @@ static u64 css_serial_nr_next = 1; */ static int need_forkexit_callback __read_mostly; -static struct cftype cgroup_base_files[]; +static struct cftype cgroup_dfl_base_files[]; +static struct cftype cgroup_legacy_base_files[]; static void cgroup_put(struct cgroup *cgrp); static int rebind_subsystems(struct cgroup_root *dst_root, @@ -1614,6 +1615,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) { LIST_HEAD(tmp_links); struct cgroup *root_cgrp = &root->cgrp; + struct cftype *base_files; struct css_set *cset; int i, ret; @@ -1651,7 +1653,12 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) } root_cgrp->kn = root->kf_root->kn; - ret = cgroup_addrm_files(root_cgrp, cgroup_base_files, true); + if (root == &cgrp_dfl_root) + base_files = cgroup_dfl_base_files; + else + base_files = cgroup_legacy_base_files; + + ret = cgroup_addrm_files(root_cgrp, base_files, true); if (ret) goto destroy_root; @@ -4095,7 +4102,8 @@ static int cgroup_clone_children_write(struct cgroup_subsys_state *css, return 0; } -static struct cftype cgroup_base_files[] = { +/* cgroup core interface files for the default hierarchy */ +static struct cftype cgroup_dfl_base_files[] = { { .name = "cgroup.procs", .seq_start = cgroup_pidlist_start, @@ -4106,47 +4114,53 @@ static struct cftype cgroup_base_files[] = { .write = cgroup_procs_write, .mode = S_IRUGO | S_IWUSR, }, - { - .name = "cgroup.clone_children", - .flags = CFTYPE_INSANE, - .read_u64 = cgroup_clone_children_read, - .write_u64 = cgroup_clone_children_write, - }, - { - .name = "cgroup.sane_behavior", - .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, - .seq_show = cgroup_sane_behavior_show, - }, { .name = "cgroup.controllers", - .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_ONLY_ON_ROOT, + .flags = CFTYPE_ONLY_ON_ROOT, .seq_show = cgroup_root_controllers_show, }, { .name = "cgroup.controllers", - .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT, + .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cgroup_controllers_show, }, { .name = "cgroup.subtree_control", - .flags = CFTYPE_ONLY_ON_DFL, .seq_show = cgroup_subtree_control_show, .write = cgroup_subtree_control_write, }, { .name = "cgroup.populated", - .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT, + .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cgroup_populated_show, }, + { } /* terminate */ +}; - /* - * Historical crazy stuff. These don't have "cgroup." prefix and - * don't exist if sane_behavior. If you're depending on these, be - * prepared to be burned. - */ +/* cgroup core interface files for the legacy hierarchies */ +static struct cftype cgroup_legacy_base_files[] = { + { + .name = "cgroup.procs", + .seq_start = cgroup_pidlist_start, + .seq_next = cgroup_pidlist_next, + .seq_stop = cgroup_pidlist_stop, + .seq_show = cgroup_pidlist_show, + .private = CGROUP_FILE_PROCS, + .write = cgroup_procs_write, + .mode = S_IRUGO | S_IWUSR, + }, + { + .name = "cgroup.clone_children", + .read_u64 = cgroup_clone_children_read, + .write_u64 = cgroup_clone_children_write, + }, + { + .name = "cgroup.sane_behavior", + .flags = CFTYPE_ONLY_ON_ROOT, + .seq_show = cgroup_sane_behavior_show, + }, { .name = "tasks", - .flags = CFTYPE_INSANE, /* use "procs" instead */ .seq_start = cgroup_pidlist_start, .seq_next = cgroup_pidlist_next, .seq_stop = cgroup_pidlist_stop, @@ -4157,13 +4171,12 @@ static struct cftype cgroup_base_files[] = { }, { .name = "notify_on_release", - .flags = CFTYPE_INSANE, .read_u64 = cgroup_read_notify_on_release, .write_u64 = cgroup_write_notify_on_release, }, { .name = "release_agent", - .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, + .flags = CFTYPE_ONLY_ON_ROOT, .seq_show = cgroup_release_agent_show, .write = cgroup_release_agent_write, .max_write_len = PATH_MAX - 1, @@ -4444,6 +4457,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, struct cgroup_root *root; struct cgroup_subsys *ss; struct kernfs_node *kn; + struct cftype *base_files; int ssid, ret; parent = cgroup_kn_lock_live(parent_kn); @@ -4514,7 +4528,12 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (ret) goto out_destroy; - ret = cgroup_addrm_files(cgrp, cgroup_base_files, true); + if (cgroup_on_dfl(cgrp)) + base_files = cgroup_dfl_base_files; + else + base_files = cgroup_legacy_base_files; + + ret = cgroup_addrm_files(cgrp, base_files, true); if (ret) goto out_destroy; @@ -4836,7 +4855,8 @@ int __init cgroup_init(void) unsigned long key; int ssid, err; - BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); + BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); + BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); mutex_lock(&cgroup_mutex); -- cgit v1.2.3 From 5577964e64692e17cc498854b7e0833e6532cd64 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jul 2014 11:05:09 -0400 Subject: cgroup: rename cgroup_subsys->base_cftypes to ->legacy_cftypes Currently, cgroup_subsys->base_cftypes is used for both the unified default hierarchy and legacy ones and subsystems can mark each file with either CFTYPE_ONLY_ON_DFL or CFTYPE_INSANE if it has to appear only on one of them. This is quite hairy and error-prone. Also, we may end up exposing interface files to the default hierarchy without thinking it through. cgroup_subsys will grow two separate cftype arrays and apply each only on the hierarchies of the matching type. This will allow organizing cftypes in a lot clearer way and encourage subsystems to scrutinize the interface which is being exposed in the new default hierarchy. In preparation, this patch renames cgroup_subsys->base_cftypes to cgroup_subsys->legacy_cftypes. This patch is pure rename. Signed-off-by: Tejun Heo Acked-by: Neil Horman Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko Cc: Vivek Goyal Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Aristeu Rozanski Cc: Aneesh Kumar K.V --- kernel/cgroup.c | 4 ++-- kernel/cgroup_freezer.c | 2 +- kernel/cpuset.c | 2 +- kernel/sched/core.c | 2 +- kernel/sched/cpuacct.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7e5fee5d6422..6496a83b0314 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4890,7 +4890,7 @@ int __init cgroup_init(void) */ if (!ss->disabled) { cgrp_dfl_root.subsys_mask |= 1 << ss->id; - WARN_ON(cgroup_add_cftypes(ss, ss->base_cftypes)); + WARN_ON(cgroup_add_cftypes(ss, ss->legacy_cftypes)); } } @@ -5480,6 +5480,6 @@ static struct cftype debug_files[] = { struct cgroup_subsys debug_cgrp_subsys = { .css_alloc = debug_css_alloc, .css_free = debug_css_free, - .base_cftypes = debug_files, + .legacy_cftypes = debug_files, }; #endif /* CONFIG_CGROUP_DEBUG */ diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index a79e40f9d700..92b98cc0ee76 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -480,5 +480,5 @@ struct cgroup_subsys freezer_cgrp_subsys = { .css_free = freezer_css_free, .attach = freezer_attach, .fork = freezer_fork, - .base_cftypes = files, + .legacy_cftypes = files, }; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 53a9bbf16391..f337f42a07ac 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2036,7 +2036,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .cancel_attach = cpuset_cancel_attach, .attach = cpuset_attach, .bind = cpuset_bind, - .base_cftypes = files, + .legacy_cftypes = files, .early_init = 1, }; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bdf01b494fe..6628e8014824 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8088,7 +8088,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, .exit = cpu_cgroup_exit, - .base_cftypes = cpu_files, + .legacy_cftypes = cpu_files, .early_init = 1, }; diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 9cf350c94ec4..dd7cbb55bbf2 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -278,6 +278,6 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val) struct cgroup_subsys cpuacct_cgrp_subsys = { .css_alloc = cpuacct_css_alloc, .css_free = cpuacct_css_free, - .base_cftypes = files, + .legacy_cftypes = files, .early_init = 1, }; -- cgit v1.2.3 From 2cf669a58dc08fa065a8bd0dca866c0e6cb358cc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jul 2014 11:05:09 -0400 Subject: cgroup: replace cgroup_add_cftypes() with cgroup_add_legacy_cftypes() Currently, cftypes added by cgroup_add_cftypes() are used for both the unified default hierarchy and legacy ones and subsystems can mark each file with either CFTYPE_ONLY_ON_DFL or CFTYPE_INSANE if it has to appear only on one of them. This is quite hairy and error-prone. Also, we may end up exposing interface files to the default hierarchy without thinking it through. cgroup_subsys will grow two separate cftype addition functions and apply each only on the hierarchies of the matching type. This will allow organizing cftypes in a lot clearer way and encourage subsystems to scrutinize the interface which is being exposed in the new default hierarchy. In preparation, this patch adds cgroup_add_legacy_cftypes() which currently is a simple wrapper around cgroup_add_cftypes() and replaces all cgroup_add_cftypes() usages with it. While at it, this patch drops a completely spurious return from __hugetlb_cgroup_file_init(). This patch doesn't introduce any functional differences. Signed-off-by: Tejun Heo Acked-by: Neil Horman Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko Cc: Aneesh Kumar K.V --- kernel/cgroup.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6496a83b0314..c275aa439a6f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3170,7 +3170,7 @@ int cgroup_rm_cftypes(struct cftype *cfts) * function currently returns 0 as long as @cfts registration is successful * even if some file creation attempts on existing cgroups fail. */ -int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) +static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { int ret; @@ -3195,6 +3195,11 @@ int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) return ret; } +int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) +{ + return cgroup_add_cftypes(ss, cfts); +} + /** * cgroup_task_count - count the number of tasks in a cgroup. * @cgrp: the cgroup in question -- cgit v1.2.3 From a8ddc8215e1a4cd9dc5d6210811cfc381a489ec2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jul 2014 11:05:10 -0400 Subject: cgroup: distinguish the default and legacy hierarchies when handling cftypes Until now, cftype arrays carried files for both the default and legacy hierarchies and the files which needed to be used on only one of them were flagged with either CFTYPE_ONLY_ON_DFL or CFTYPE_INSANE. This gets confusing very quickly and we may end up exposing interface files to the default hierarchy without thinking it through. This patch makes cgroup core provide separate sets of interfaces for cftype handling so that the cftypes for the default and legacy hierarchies are clearly distinguished. The previous two patches renamed the existing ones so that they clearly indicate that they're for the legacy hierarchies. This patch adds the interface for the default hierarchy and apply them selectively depending on the hierarchy type. * cftypes added through cgroup_subsys->dfl_cftypes and cgroup_add_dfl_cftypes() only show up on the default hierarchy. * cftypes added through cgroup_subsys->legacy_cftypes and cgroup_add_legacy_cftypes() only show up on the legacy hierarchies. * cgroup_subsys->dfl_cftypes and ->legacy_cftypes can point to the same array for the cases where the interface files are identical on both types of hierarchies. * This makes all the existing subsystem interface files legacy-only by default and all subsystems will have no interface file created when enabled on the default hierarchy. Each subsystem should explicitly review and compose the interface for the default hierarchy. * A boot param "cgroup__DEVEL__legacy_files_on_dfl" is added which makes subsystems which haven't decided the interface files for the default hierarchy to present the legacy files on the default hierarchy so that its behavior on the default hierarchy can be tested. As the awkward name suggests, this is for development only. * memcg's CFTYPE_INSANE on "use_hierarchy" is noop now as the whole array isn't used on the default hierarchy. The flag is removed. v2: Updated documentation for cgroup__DEVEL__legacy_files_on_dfl. v3: Clear CFTYPE_ONLY_ON_DFL and CFTYPE_INSANE when cfts are removed as suggested by Li. Signed-off-by: Tejun Heo Acked-by: Neil Horman Acked-by: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko Cc: Vivek Goyal Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Aristeu Rozanski Cc: Aneesh Kumar K.V --- kernel/cgroup.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c275aa439a6f..374ebdf74f35 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -149,6 +149,12 @@ struct cgroup_root cgrp_dfl_root; */ static bool cgrp_dfl_root_visible; +/* + * Set by the boot param of the same name and makes subsystems with NULL + * ->dfl_files to use ->legacy_files on the default hierarchy. + */ +static bool cgroup_legacy_files_on_dfl; + /* some controllers are not supported in the default hierarchy */ static const unsigned int cgrp_dfl_root_inhibit_ss_mask = 0 #ifdef CONFIG_CGROUP_DEBUG @@ -3085,6 +3091,9 @@ static void cgroup_exit_cftypes(struct cftype *cfts) kfree(cft->kf_ops); cft->kf_ops = NULL; cft->ss = NULL; + + /* revert flags set by cgroup core while adding @cfts */ + cft->flags &= ~(CFTYPE_ONLY_ON_DFL | CFTYPE_INSANE); } } @@ -3195,8 +3204,37 @@ static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) return ret; } +/** + * cgroup_add_dfl_cftypes - add an array of cftypes for default hierarchy + * @ss: target cgroup subsystem + * @cfts: zero-length name terminated array of cftypes + * + * Similar to cgroup_add_cftypes() but the added files are only used for + * the default hierarchy. + */ +int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) +{ + struct cftype *cft; + + for (cft = cfts; cft && cft->name[0] != '\0'; cft++) + cft->flags |= CFTYPE_ONLY_ON_DFL; + return cgroup_add_cftypes(ss, cfts); +} + +/** + * cgroup_add_legacy_cftypes - add an array of cftypes for legacy hierarchies + * @ss: target cgroup subsystem + * @cfts: zero-length name terminated array of cftypes + * + * Similar to cgroup_add_cftypes() but the added files are only used for + * the legacy hierarchies. + */ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { + struct cftype *cft; + + for (cft = cfts; cft && cft->name[0] != '\0'; cft++) + cft->flags |= CFTYPE_INSANE; return cgroup_add_cftypes(ss, cfts); } @@ -4893,9 +4931,19 @@ int __init cgroup_init(void) * disabled flag and cftype registration needs kmalloc, * both of which aren't available during early_init. */ - if (!ss->disabled) { - cgrp_dfl_root.subsys_mask |= 1 << ss->id; - WARN_ON(cgroup_add_cftypes(ss, ss->legacy_cftypes)); + if (ss->disabled) + continue; + + cgrp_dfl_root.subsys_mask |= 1 << ss->id; + + if (cgroup_legacy_files_on_dfl && !ss->dfl_cftypes) + ss->dfl_cftypes = ss->legacy_cftypes; + + if (ss->dfl_cftypes == ss->legacy_cftypes) { + WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes)); + } else { + WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes)); + WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes)); } } @@ -5291,6 +5339,14 @@ static int __init cgroup_disable(char *str) } __setup("cgroup_disable=", cgroup_disable); +static int __init cgroup_set_legacy_files_on_dfl(char *str) +{ + printk("cgroup: using legacy files on the default hierarchy\n"); + cgroup_legacy_files_on_dfl = true; + return 0; +} +__setup("cgroup__DEVEL__legacy_files_on_dfl", cgroup_set_legacy_files_on_dfl); + /** * css_tryget_online_from_dir - get corresponding css from a cgroup dentry * @dentry: directory dentry of interest -- cgit v1.2.3 From 05ebb6e60f044a9cef2549b6204559276500f363 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jul 2014 11:05:10 -0400 Subject: cgroup: make CFTYPE_ONLY_ON_DFL and CFTYPE_NO_ internal to cgroup core cgroup now distinguishes cftypes for the default and legacy hierarchies more explicitly by using separate arrays and CFTYPE_ONLY_ON_DFL and CFTYPE_INSANE should be and are used only inside cgroup core proper. Let's make it clear that the flags are internal by prefixing them with double underscores. CFTYPE_INSANE is renamed to __CFTYPE_NOT_ON_DFL for consistency. The two flags are also collected and assigned bits >= 16 so that they aren't mixed with the published flags. v2: Convert the extra ones in cgroup_exit_cftypes() which are added by revision to the previous patch. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- kernel/cgroup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 374ebdf74f35..f41d164a3d54 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3031,9 +3031,9 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], for (cft = cfts; cft->name[0] != '\0'; cft++) { /* does cft->flags tell us to skip this file on @cgrp? */ - if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) + if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) continue; - if ((cft->flags & CFTYPE_INSANE) && cgroup_on_dfl(cgrp)) + if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp)) continue; if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp)) continue; @@ -3093,7 +3093,7 @@ static void cgroup_exit_cftypes(struct cftype *cfts) cft->ss = NULL; /* revert flags set by cgroup core while adding @cfts */ - cft->flags &= ~(CFTYPE_ONLY_ON_DFL | CFTYPE_INSANE); + cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL); } } @@ -3217,7 +3217,7 @@ int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) struct cftype *cft; for (cft = cfts; cft && cft->name[0] != '\0'; cft++) - cft->flags |= CFTYPE_ONLY_ON_DFL; + cft->flags |= __CFTYPE_ONLY_ON_DFL; return cgroup_add_cftypes(ss, cfts); } @@ -3234,7 +3234,7 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) struct cftype *cft; for (cft = cfts; cft && cft->name[0] != '\0'; cft++) - cft->flags |= CFTYPE_INSANE; + cft->flags |= __CFTYPE_NOT_ON_DFL; return cgroup_add_cftypes(ss, cfts); } -- cgit v1.2.3 From 5de4fa13c4df302db41e80ca679df24fdad0d661 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Jul 2014 11:05:10 -0400 Subject: cgroup: initialize cgrp_dfl_root_inhibit_ss_mask from !->dfl_files test cgrp_dfl_root_inhibit_ss_mask determines which subsystems are not supported on the default hierarchy and is currently initialized statically and just includes the debug subsystem. Now that there's cgroup_subsys->dfl_files, we can easily tell which subsystems support the default hierarchy or not. Let's initialize cgrp_dfl_root_inhibit_ss_mask by testing whether cgroup_subsys->dfl_files is NULL. After all, subsystems with NULL ->dfl_files aren't useable on the default hierarchy anyway. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- kernel/cgroup.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f41d164a3d54..f2a6795e695b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -156,11 +156,7 @@ static bool cgrp_dfl_root_visible; static bool cgroup_legacy_files_on_dfl; /* some controllers are not supported in the default hierarchy */ -static const unsigned int cgrp_dfl_root_inhibit_ss_mask = 0 -#ifdef CONFIG_CGROUP_DEBUG - | (1 << debug_cgrp_id) -#endif - ; +static unsigned int cgrp_dfl_root_inhibit_ss_mask; /* The list of hierarchy roots */ @@ -4939,6 +4935,9 @@ int __init cgroup_init(void) if (cgroup_legacy_files_on_dfl && !ss->dfl_cftypes) ss->dfl_cftypes = ss->legacy_cftypes; + if (!ss->dfl_cftypes) + cgrp_dfl_root_inhibit_ss_mask |= 1 << ss->id; + if (ss->dfl_cftypes == ss->legacy_cftypes) { WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes)); } else { -- cgit v1.2.3 From a13812683f1118ee4deed88d8d9bc2c268358b2e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 30 Jul 2014 15:07:13 +0800 Subject: cpuset: fix the WARN_ON() in update_nodemasks_hier() The WARN_ON() is used to check if we break the legal hierarchy, on which the effective mems should be equal to configured mems. Reported-by: Mike Qiu Tested-by: Mike Qiu Signed-off-by: Li Zefan --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f337f42a07ac..9d7264beb74f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1136,7 +1136,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) mutex_unlock(&callback_mutex); WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && - nodes_equal(cp->mems_allowed, cp->effective_mems)); + !nodes_equal(cp->mems_allowed, cp->effective_mems)); update_tasks_nodemask(cp); -- cgit v1.2.3