summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup.h15
-rw-r--r--kernel/cgroup.c65
2 files changed, 76 insertions, 4 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ada239253ec7..4b38e2d6110d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -154,6 +154,14 @@ struct cgroup {
/* the number of attached css's */
int nr_css;
+ /*
+ * If this cgroup contains any tasks, it contributes one to
+ * populated_cnt. All children with non-zero popuplated_cnt of
+ * their own contribute one. The count is zero iff there's no task
+ * in this cgroup or its subtree.
+ */
+ int populated_cnt;
+
atomic_t refcnt;
/*
@@ -166,6 +174,7 @@ struct cgroup {
struct cgroup *parent; /* my parent */
struct kernfs_node *kn; /* cgroup kernfs entry */
struct kernfs_node *control_kn; /* kn for "cgroup.subtree_control" */
+ struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
/*
* Monotonically increasing unique serial number which defines a
@@ -264,6 +273,12 @@ enum {
*
* - "cgroup.clone_children" is removed.
*
+ * - "cgroup.subtree_populated" is available. Its value is 0 if
+ * the cgroup and its descendants contain no task; otherwise, 1.
+ * The file also generates kernfs notification which can be
+ * monitored through poll and [di]notify when the value of the
+ * file changes.
+ *
* - If mount is requested with sane_behavior but without any
* subsystem, the default unified hierarchy is mounted.
*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 809dd903ceb8..0f986f7afee4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -411,6 +411,43 @@ static struct css_set init_css_set = {
static int css_set_count = 1; /* 1 for init_css_set */
+/**
+ * cgroup_update_populated - updated populated count of a cgroup
+ * @cgrp: the target cgroup
+ * @populated: inc or dec populated count
+ *
+ * @cgrp is either getting the first task (css_set) or losing the last.
+ * Update @cgrp->populated_cnt accordingly. The count is propagated
+ * towards root so that a given cgroup's populated_cnt is zero iff the
+ * cgroup and all its descendants are empty.
+ *
+ * @cgrp's interface file "cgroup.populated" is zero if
+ * @cgrp->populated_cnt is zero and 1 otherwise. When @cgrp->populated_cnt
+ * changes from or to zero, userland is notified that the content of the
+ * interface file has changed. This can be used to detect when @cgrp and
+ * its descendants become populated or empty.
+ */
+static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
+{
+ lockdep_assert_held(&css_set_rwsem);
+
+ do {
+ bool trigger;
+
+ if (populated)
+ trigger = !cgrp->populated_cnt++;
+ else
+ trigger = !--cgrp->populated_cnt;
+
+ if (!trigger)
+ break;
+
+ if (cgrp->populated_kn)
+ kernfs_notify(cgrp->populated_kn);
+ cgrp = cgrp->parent;
+ } while (cgrp);
+}
+
/*
* hash table for cgroup groups. This improves the performance to find
* an existing css_set. This hash doesn't (currently) take into
@@ -456,10 +493,13 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
list_del(&link->cgrp_link);
/* @cgrp can't go away while we're holding css_set_rwsem */
- if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) {
- if (taskexit)
- set_bit(CGRP_RELEASABLE, &cgrp->flags);
- check_for_release(cgrp);
+ if (list_empty(&cgrp->cset_links)) {
+ cgroup_update_populated(cgrp, false);
+ if (notify_on_release(cgrp)) {
+ if (taskexit)
+ set_bit(CGRP_RELEASABLE, &cgrp->flags);
+ check_for_release(cgrp);
+ }
}
kfree(link);
@@ -668,7 +708,11 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
link->cset = cset;
link->cgrp = cgrp;
+
+ if (list_empty(&cgrp->cset_links))
+ cgroup_update_populated(cgrp, true);
list_move(&link->cset_link, &cgrp->cset_links);
+
/*
* Always add links to the tail of the list so that the list
* is sorted by order of hierarchy creation
@@ -2643,6 +2687,12 @@ err_undo_css:
goto out_unlock;
}
+static int cgroup_populated_show(struct seq_file *seq, void *v)
+{
+ seq_printf(seq, "%d\n", (bool)seq_css(seq)->cgroup->populated_cnt);
+ return 0;
+}
+
static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
@@ -2809,6 +2859,8 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
if (cft->seq_show == cgroup_subtree_control_show)
cgrp->control_kn = kn;
+ else if (cft->seq_show == cgroup_populated_show)
+ cgrp->populated_kn = kn;
return 0;
}
@@ -3918,6 +3970,11 @@ static struct cftype cgroup_base_files[] = {
.seq_show = cgroup_subtree_control_show,
.write_string = cgroup_subtree_control_write,
},
+ {
+ .name = "cgroup.populated",
+ .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT,
+ .seq_show = cgroup_populated_show,
+ },
/*
* Historical crazy stuff. These don't have "cgroup." prefix and