summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst9
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt8
-rw-r--r--include/linux/cgroup-defs.h21
-rw-r--r--include/linux/llist.h6
-rw-r--r--include/trace/events/cgroup.h47
-rw-r--r--kernel/cgroup/cgroup-v1.c14
-rw-r--r--kernel/cgroup/rstat.c195
-rw-r--r--mm/memcontrol.c10
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c4
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h5
-rw-r--r--tools/testing/selftests/cgroup/test_core.c84
-rw-r--r--tools/testing/selftests/cgroup/test_cpu.c63
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c5
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c2
14 files changed, 266 insertions, 207 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index bd98ea3175ec..d9d3cc7df348 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -435,6 +435,15 @@ both cgroups.
Controlling Controllers
-----------------------
+Availablity
+~~~~~~~~~~~
+
+A controller is available in a cgroup when it is supported by the kernel (i.e.,
+compiled in, not disabled and not attached to a v1 hierarchy) and listed in the
+"cgroup.controllers" file. Availability means the controller's interface files
+are exposed in the cgroup’s directory, allowing the distribution of the target
+resource to be observed or controlled within that cgroup.
+
Enabling and Disabling
~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2e369a6a5641..8981ae1c9355 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -633,6 +633,14 @@
named mounts. Specifying both "all" and "named" disables
all v1 hierarchies.
+ cgroup_v1_proc= [KNL] Show also missing controllers in /proc/cgroups
+ Format: { "true" | "false" }
+ /proc/cgroups lists only v1 controllers by default.
+ This compatibility option enables listing also v2
+ controllers (whose v1 code is not compiled!), so that
+ semi-legacy software can check this file to decide
+ about usage of v2 (sic) controllers.
+
cgroup_favordynmods= [KNL] Enable or Disable favordynmods.
Format: { "true" | "false" }
Defaults to the value of CONFIG_CGROUP_FAVOR_DYNMODS.
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index e61687d5e496..6b93a64115fe 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -375,15 +375,12 @@ struct css_rstat_cpu {
* Child cgroups with stat updates on this cpu since the last read
* are linked on the parent's ->updated_children through
* ->updated_next. updated_children is terminated by its container css.
- *
- * In addition to being more compact, singly-linked list pointing to
- * the css makes it unnecessary for each per-cpu struct to point back
- * to the associated css.
- *
- * Protected by per-cpu css->ss->rstat_ss_cpu_lock.
*/
struct cgroup_subsys_state *updated_children;
struct cgroup_subsys_state *updated_next; /* NULL if not on the list */
+
+ struct llist_node lnode; /* lockless list for update */
+ struct cgroup_subsys_state *owner; /* back pointer */
};
/*
@@ -821,7 +818,7 @@ struct cgroup_subsys {
unsigned int depends_on;
spinlock_t rstat_ss_lock;
- raw_spinlock_t __percpu *rstat_ss_cpu_lock;
+ struct llist_head __percpu *lhead; /* lockless update list head */
};
extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
@@ -898,14 +895,12 @@ static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
#endif
}
+#ifdef CONFIG_CGROUP_NET_CLASSID
static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
{
-#ifdef CONFIG_CGROUP_NET_CLASSID
return READ_ONCE(skcd->classid);
-#else
- return 0;
-#endif
}
+#endif
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
u16 prioidx)
@@ -915,13 +910,13 @@ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
#endif
}
+#ifdef CONFIG_CGROUP_NET_CLASSID
static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
u32 classid)
{
-#ifdef CONFIG_CGROUP_NET_CLASSID
WRITE_ONCE(skcd->classid, classid);
-#endif
}
+#endif
#else /* CONFIG_SOCK_CGROUP_DATA */
diff --git a/include/linux/llist.h b/include/linux/llist.h
index 27b17f64bcee..607b2360c938 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -83,7 +83,7 @@ static inline void init_llist_head(struct llist_head *list)
*/
static inline void init_llist_node(struct llist_node *node)
{
- node->next = node;
+ WRITE_ONCE(node->next, node);
}
/**
@@ -97,7 +97,7 @@ static inline void init_llist_node(struct llist_node *node)
*/
static inline bool llist_on_list(const struct llist_node *node)
{
- return node->next != node;
+ return READ_ONCE(node->next) != node;
}
/**
@@ -220,7 +220,7 @@ static inline bool llist_empty(const struct llist_head *head)
static inline struct llist_node *llist_next(struct llist_node *node)
{
- return node->next;
+ return READ_ONCE(node->next);
}
/**
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index 7d332387be6c..ba9229af9a34 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -257,53 +257,6 @@ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_unlock,
TP_ARGS(cgrp, cpu, contended)
);
-/*
- * Related to per CPU locks:
- * global rstat_base_cpu_lock for base stats
- * cgroup_subsys::rstat_ss_cpu_lock for subsystem stats
- */
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended_fastpath,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked_fastpath,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
-DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock_fastpath,
-
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
-
- TP_ARGS(cgrp, cpu, contended)
-);
-
#endif /* _TRACE_CGROUP_H */
/* This part must be outside protection */
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index fa24c032ed6f..2a4a387f867a 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -32,6 +32,9 @@ static u16 cgroup_no_v1_mask;
/* disable named v1 mounts */
static bool cgroup_no_v1_named;
+/* Show unavailable controllers in /proc/cgroups */
+static bool proc_show_all;
+
/*
* pidlist destructions need to be flushed on cgroup destruction. Use a
* separate workqueue as flush domain.
@@ -683,10 +686,11 @@ int proc_cgroupstats_show(struct seq_file *m, void *v)
*/
for_each_subsys(ss, i) {
- if (cgroup1_subsys_absent(ss))
- continue;
cgrp_v1_visible |= ss->root != &cgrp_dfl_root;
+ if (!proc_show_all && cgroup1_subsys_absent(ss))
+ continue;
+
seq_printf(m, "%s\t%d\t%d\t%d\n",
ss->legacy_name, ss->root->hierarchy_id,
atomic_read(&ss->root->nr_cgrps),
@@ -1359,3 +1363,9 @@ static int __init cgroup_no_v1(char *str)
return 1;
}
__setup("cgroup_no_v1=", cgroup_no_v1);
+
+static int __init cgroup_v1_proc(char *str)
+{
+ return (kstrtobool(str, &proc_show_all) == 0);
+}
+__setup("cgroup_v1_proc=", cgroup_v1_proc);
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index cbeaa499a96a..981e2f77ad4e 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -10,7 +10,7 @@
#include <trace/events/cgroup.h>
static DEFINE_SPINLOCK(rstat_base_lock);
-static DEFINE_PER_CPU(raw_spinlock_t, rstat_base_cpu_lock);
+static DEFINE_PER_CPU(struct llist_head, rstat_backlog_list);
static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
@@ -45,84 +45,11 @@ static spinlock_t *ss_rstat_lock(struct cgroup_subsys *ss)
return &rstat_base_lock;
}
-static raw_spinlock_t *ss_rstat_cpu_lock(struct cgroup_subsys *ss, int cpu)
+static inline struct llist_head *ss_lhead_cpu(struct cgroup_subsys *ss, int cpu)
{
- if (ss) {
- /*
- * Depending on config, the subsystem per-cpu lock type may be an
- * empty struct. In enviromnents where this is the case, allocation
- * of this field is not performed in ss_rstat_init(). Avoid a
- * cpu-based offset relative to NULL by returning early. When the
- * lock type is zero in size, the corresponding lock functions are
- * no-ops so passing them NULL is acceptable.
- */
- if (sizeof(*ss->rstat_ss_cpu_lock) == 0)
- return NULL;
-
- return per_cpu_ptr(ss->rstat_ss_cpu_lock, cpu);
- }
-
- return per_cpu_ptr(&rstat_base_cpu_lock, cpu);
-}
-
-/*
- * Helper functions for rstat per CPU locks.
- *
- * This makes it easier to diagnose locking issues and contention in
- * production environments. The parameter @fast_path determine the
- * tracepoints being added, allowing us to diagnose "flush" related
- * operations without handling high-frequency fast-path "update" events.
- */
-static __always_inline
-unsigned long _css_rstat_cpu_lock(struct cgroup_subsys_state *css, int cpu,
- const bool fast_path)
-{
- struct cgroup *cgrp = css->cgroup;
- raw_spinlock_t *cpu_lock;
- unsigned long flags;
- bool contended;
-
- /*
- * The _irqsave() is needed because the locks used for flushing are
- * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring this lock
- * with the _irq() suffix only disables interrupts on a non-PREEMPT_RT
- * kernel. The raw_spinlock_t below disables interrupts on both
- * configurations. The _irqsave() ensures that interrupts are always
- * disabled and later restored.
- */
- cpu_lock = ss_rstat_cpu_lock(css->ss, cpu);
- contended = !raw_spin_trylock_irqsave(cpu_lock, flags);
- if (contended) {
- if (fast_path)
- trace_cgroup_rstat_cpu_lock_contended_fastpath(cgrp, cpu, contended);
- else
- trace_cgroup_rstat_cpu_lock_contended(cgrp, cpu, contended);
-
- raw_spin_lock_irqsave(cpu_lock, flags);
- }
-
- if (fast_path)
- trace_cgroup_rstat_cpu_locked_fastpath(cgrp, cpu, contended);
- else
- trace_cgroup_rstat_cpu_locked(cgrp, cpu, contended);
-
- return flags;
-}
-
-static __always_inline
-void _css_rstat_cpu_unlock(struct cgroup_subsys_state *css, int cpu,
- unsigned long flags, const bool fast_path)
-{
- struct cgroup *cgrp = css->cgroup;
- raw_spinlock_t *cpu_lock;
-
- if (fast_path)
- trace_cgroup_rstat_cpu_unlock_fastpath(cgrp, cpu, false);
- else
- trace_cgroup_rstat_cpu_unlock(cgrp, cpu, false);
-
- cpu_lock = ss_rstat_cpu_lock(css->ss, cpu);
- raw_spin_unlock_irqrestore(cpu_lock, flags);
+ if (ss)
+ return per_cpu_ptr(ss->lhead, cpu);
+ return per_cpu_ptr(&rstat_backlog_list, cpu);
}
/**
@@ -130,13 +57,22 @@ void _css_rstat_cpu_unlock(struct cgroup_subsys_state *css, int cpu,
* @css: target cgroup subsystem state
* @cpu: cpu on which rstat_cpu was updated
*
- * @css's rstat_cpu on @cpu was updated. Put it on the parent's matching
- * rstat_cpu->updated_children list. See the comment on top of
- * css_rstat_cpu definition for details.
+ * Atomically inserts the css in the ss's llist for the given cpu. This is
+ * reentrant safe i.e. safe against softirq, hardirq and nmi. The ss's llist
+ * will be processed at the flush time to create the update tree.
+ *
+ * NOTE: if the user needs the guarantee that the updater either add itself in
+ * the lockless list or the concurrent flusher flushes its updated stats, a
+ * memory barrier is needed before the call to css_rstat_updated() i.e. a
+ * barrier after updating the per-cpu stats and before calling
+ * css_rstat_updated().
*/
__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
{
- unsigned long flags;
+ struct llist_head *lhead;
+ struct css_rstat_cpu *rstatc;
+ struct css_rstat_cpu __percpu *rstatc_pcpu;
+ struct llist_node *self;
/*
* Since bpf programs can call this function, prevent access to
@@ -145,19 +81,49 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
if (!css_uses_rstat(css))
return;
+ lockdep_assert_preemption_disabled();
+
+ /*
+ * For archs withnot nmi safe cmpxchg or percpu ops support, ignore
+ * the requests from nmi context.
+ */
+ if ((!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) ||
+ !IS_ENABLED(CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS)) && in_nmi())
+ return;
+
+ rstatc = css_rstat_cpu(css, cpu);
/*
- * Speculative already-on-list test. This may race leading to
- * temporary inaccuracies, which is fine.
+ * If already on list return. This check is racy and smp_mb() is needed
+ * to pair it with the smp_mb() in css_process_update_tree() if the
+ * guarantee that the updated stats are visible to concurrent flusher is
+ * needed.
+ */
+ if (llist_on_list(&rstatc->lnode))
+ return;
+
+ /*
+ * This function can be renentered by irqs and nmis for the same cgroup
+ * and may try to insert the same per-cpu lnode into the llist. Note
+ * that llist_add() does not protect against such scenarios.
*
- * Because @parent's updated_children is terminated with @parent
- * instead of NULL, we can tell whether @css is on the list by
- * testing the next pointer for NULL.
+ * To protect against such stacked contexts of irqs/nmis, we use the
+ * fact that lnode points to itself when not on a list and then use
+ * this_cpu_cmpxchg() to atomically set to NULL to select the winner
+ * which will call llist_add(). The losers can assume the insertion is
+ * successful and the winner will eventually add the per-cpu lnode to
+ * the llist.
*/
- if (data_race(css_rstat_cpu(css, cpu)->updated_next))
+ self = &rstatc->lnode;
+ rstatc_pcpu = css->rstat_cpu;
+ if (this_cpu_cmpxchg(rstatc_pcpu->lnode.next, self, NULL) != self)
return;
- flags = _css_rstat_cpu_lock(css, cpu, true);
+ lhead = ss_lhead_cpu(css->ss, cpu);
+ llist_add(&rstatc->lnode, lhead);
+}
+static void __css_process_update_tree(struct cgroup_subsys_state *css, int cpu)
+{
/* put @css and all ancestors on the corresponding updated lists */
while (true) {
struct css_rstat_cpu *rstatc = css_rstat_cpu(css, cpu);
@@ -183,8 +149,34 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
css = parent;
}
+}
+
+static void css_process_update_tree(struct cgroup_subsys *ss, int cpu)
+{
+ struct llist_head *lhead = ss_lhead_cpu(ss, cpu);
+ struct llist_node *lnode;
+
+ while ((lnode = llist_del_first_init(lhead))) {
+ struct css_rstat_cpu *rstatc;
+
+ /*
+ * smp_mb() is needed here (more specifically in between
+ * init_llist_node() and per-cpu stats flushing) if the
+ * guarantee is required by a rstat user where etiher the
+ * updater should add itself on the lockless list or the
+ * flusher flush the stats updated by the updater who have
+ * observed that they are already on the list. The
+ * corresponding barrier pair for this one should be before
+ * css_rstat_updated() by the user.
+ *
+ * For now, there aren't any such user, so not adding the
+ * barrier here but if such a use-case arise, please add
+ * smp_mb() here.
+ */
- _css_rstat_cpu_unlock(css, cpu, flags, true);
+ rstatc = container_of(lnode, struct css_rstat_cpu, lnode);
+ __css_process_update_tree(rstatc->owner, cpu);
+ }
}
/**
@@ -288,13 +280,12 @@ static struct cgroup_subsys_state *css_rstat_updated_list(
{
struct css_rstat_cpu *rstatc = css_rstat_cpu(root, cpu);
struct cgroup_subsys_state *head = NULL, *parent, *child;
- unsigned long flags;
- flags = _css_rstat_cpu_lock(root, cpu, false);
+ css_process_update_tree(root->ss, cpu);
/* Return NULL if this subtree is not on-list */
if (!rstatc->updated_next)
- goto unlock_ret;
+ return NULL;
/*
* Unlink @root from its parent. As the updated_children list is
@@ -326,8 +317,7 @@ static struct cgroup_subsys_state *css_rstat_updated_list(
rstatc->updated_children = root;
if (child != root)
head = css_rstat_push_children(head, child, cpu);
-unlock_ret:
- _css_rstat_cpu_unlock(root, cpu, flags, false);
+
return head;
}
@@ -468,7 +458,8 @@ int css_rstat_init(struct cgroup_subsys_state *css)
for_each_possible_cpu(cpu) {
struct css_rstat_cpu *rstatc = css_rstat_cpu(css, cpu);
- rstatc->updated_children = css;
+ rstatc->owner = rstatc->updated_children = css;
+ init_llist_node(&rstatc->lnode);
if (is_self) {
struct cgroup_rstat_base_cpu *rstatbc;
@@ -522,19 +513,15 @@ int __init ss_rstat_init(struct cgroup_subsys *ss)
{
int cpu;
- /*
- * Depending on config, the subsystem per-cpu lock type may be an empty
- * struct. Avoid allocating a size of zero in this case.
- */
- if (ss && sizeof(*ss->rstat_ss_cpu_lock)) {
- ss->rstat_ss_cpu_lock = alloc_percpu(raw_spinlock_t);
- if (!ss->rstat_ss_cpu_lock)
+ if (ss) {
+ ss->lhead = alloc_percpu(struct llist_head);
+ if (!ss->lhead)
return -ENOMEM;
}
spin_lock_init(ss_rstat_lock(ss));
for_each_possible_cpu(cpu)
- raw_spin_lock_init(ss_rstat_cpu_lock(ss, cpu));
+ init_llist_head(ss_lhead_cpu(ss, cpu));
return 0;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index de7d737fe011..8dd7fbed5a94 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -570,9 +570,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val,
if (!val)
return;
- /* TODO: add to cgroup update tree once it is nmi-safe. */
- if (!in_nmi())
- css_rstat_updated(&memcg->css, cpu);
+ css_rstat_updated(&memcg->css, cpu);
statc_pcpu = memcg->vmstats_percpu;
for (; statc_pcpu; statc_pcpu = statc->parent_pcpu) {
statc = this_cpu_ptr(statc_pcpu);
@@ -2527,7 +2525,8 @@ static inline void account_slab_nmi_safe(struct mem_cgroup *memcg,
} else {
struct mem_cgroup_per_node *pn = memcg->nodeinfo[pgdat->node_id];
- /* TODO: add to cgroup update tree once it is nmi-safe. */
+ /* preemption is disabled in_nmi(). */
+ css_rstat_updated(&memcg->css, smp_processor_id());
if (idx == NR_SLAB_RECLAIMABLE_B)
atomic_add(nr, &pn->slab_reclaimable);
else
@@ -2750,7 +2749,8 @@ static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val)
if (likely(!in_nmi())) {
mod_memcg_state(memcg, MEMCG_KMEM, val);
} else {
- /* TODO: add to cgroup update tree once it is nmi-safe. */
+ /* preemption is disabled in_nmi(). */
+ css_rstat_updated(&memcg->css, smp_processor_id());
atomic_add(val, &memcg->kmem_stat);
}
}
diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c
index 8832f3d1cb61..0e89fcff4d05 100644
--- a/tools/testing/selftests/cgroup/lib/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -19,6 +19,8 @@
#include "cgroup_util.h"
#include "../../clone3/clone3_selftests.h"
+bool cg_test_v1_named;
+
/* Returns read len on success, or -errno on failure. */
ssize_t read_text(const char *path, char *buf, size_t max_len)
{
@@ -361,7 +363,7 @@ int cg_enter_current(const char *cgroup)
int cg_enter_current_thread(const char *cgroup)
{
- return cg_write(cgroup, "cgroup.threads", "0");
+ return cg_write(cgroup, CG_THREADS_FILE, "0");
}
int cg_run(const char *cgroup,
diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index adb2bc193183..c69cab66254b 100644
--- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -13,6 +13,10 @@
#define TEST_UID 65534 /* usually nobody, any !root is fine */
+#define CG_THREADS_FILE (!cg_test_v1_named ? "cgroup.threads" : "tasks")
+#define CG_NAMED_NAME "selftest"
+#define CG_PATH_FORMAT (!cg_test_v1_named ? "0::%s" : (":name=" CG_NAMED_NAME ":%s"))
+
/*
* Checks if two given values differ by less than err% of their sum.
*/
@@ -65,3 +69,4 @@ extern int dirfd_open_opath(const char *dir);
extern int cg_prepare_for_wait(const char *cgroup);
extern int memcg_prepare_for_wait(const char *cgroup);
extern int cg_wait_for(int fd);
+extern bool cg_test_v1_named;
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index a5672a91d273..a360e2eb2eef 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -5,6 +5,8 @@
#include <linux/sched.h>
#include <sys/types.h>
#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>
@@ -19,6 +21,9 @@
#include "cgroup_util.h"
static bool nsdelegate;
+#ifndef CLONE_NEWCGROUP
+#define CLONE_NEWCGROUP 0
+#endif
static int touch_anon(char *buf, size_t size)
{
@@ -148,6 +153,9 @@ static int test_cgcore_populated(const char *root)
int cgroup_fd = -EBADF;
pid_t pid;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
cg_test_a = cg_name(root, "cg_test_a");
cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c");
@@ -277,6 +285,9 @@ static int test_cgcore_invalid_domain(const char *root)
int ret = KSFT_FAIL;
char *grandparent = NULL, *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
grandparent = cg_name(root, "cg_test_grandparent");
parent = cg_name(root, "cg_test_grandparent/cg_test_parent");
child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child");
@@ -339,6 +350,9 @@ static int test_cgcore_parent_becomes_threaded(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -378,7 +392,8 @@ static int test_cgcore_no_internal_process_constraint_on_threads(const char *roo
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
- if (cg_read_strstr(root, "cgroup.controllers", "cpu") ||
+ if (cg_test_v1_named ||
+ cg_read_strstr(root, "cgroup.controllers", "cpu") ||
cg_write(root, "cgroup.subtree_control", "+cpu")) {
ret = KSFT_SKIP;
goto cleanup;
@@ -430,6 +445,9 @@ static int test_cgcore_top_down_constraint_enable(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -465,6 +483,9 @@ static int test_cgcore_top_down_constraint_disable(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -506,6 +527,9 @@ static int test_cgcore_internal_process_constraint(const char *root)
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
+ if (cg_test_v1_named)
+ return KSFT_SKIP;
+
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
@@ -573,7 +597,7 @@ static int test_cgcore_proc_migration(const char *root)
}
cg_enter_current(dst);
- if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1)
+ if (cg_read_lc(dst, CG_THREADS_FILE) != n_threads + 1)
goto cleanup;
ret = KSFT_PASS;
@@ -605,7 +629,7 @@ static void *migrating_thread_fn(void *arg)
char lines[3][PATH_MAX];
for (g = 1; g < 3; ++g)
- snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0]));
+ snprintf(lines[g], sizeof(lines[g]), CG_PATH_FORMAT, grps[g] + strlen(grps[0]));
for (i = 0; i < n_iterations; ++i) {
cg_enter_current_thread(grps[(i % 2) + 1]);
@@ -642,10 +666,12 @@ static int test_cgcore_thread_migration(const char *root)
if (cg_create(grps[2]))
goto cleanup;
- if (cg_write(grps[1], "cgroup.type", "threaded"))
- goto cleanup;
- if (cg_write(grps[2], "cgroup.type", "threaded"))
- goto cleanup;
+ if (!cg_test_v1_named) {
+ if (cg_write(grps[1], "cgroup.type", "threaded"))
+ goto cleanup;
+ if (cg_write(grps[2], "cgroup.type", "threaded"))
+ goto cleanup;
+ }
if (cg_enter_current(grps[1]))
goto cleanup;
@@ -659,7 +685,7 @@ static int test_cgcore_thread_migration(const char *root)
if (retval)
goto cleanup;
- snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0]));
+ snprintf(line, sizeof(line), CG_PATH_FORMAT, grps[1] + strlen(grps[0]));
if (proc_read_strstr(0, 1, "cgroup", line))
goto cleanup;
@@ -842,6 +868,38 @@ cleanup:
return ret;
}
+static int setup_named_v1_root(char *root, size_t len, const char *name)
+{
+ char options[PATH_MAX];
+ int r;
+
+ r = snprintf(root, len, "/mnt/cg_selftest");
+ if (r < 0)
+ return r;
+
+ r = snprintf(options, sizeof(options), "none,name=%s", name);
+ if (r < 0)
+ return r;
+
+ r = mkdir(root, 0755);
+ if (r < 0 && errno != EEXIST)
+ return r;
+
+ r = mount("none", root, "cgroup", 0, options);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static void cleanup_named_v1_root(char *root)
+{
+ if (!cg_test_v1_named)
+ return;
+ umount(root);
+ rmdir(root);
+}
+
#define T(x) { x, #x }
struct corecg_test {
int (*fn)(const char *root);
@@ -867,13 +925,18 @@ int main(int argc, char *argv[])
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
- if (cg_find_unified_root(root, sizeof(root), &nsdelegate))
- ksft_exit_skip("cgroup v2 isn't mounted\n");
+ if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) {
+ if (setup_named_v1_root(root, sizeof(root), CG_NAMED_NAME))
+ ksft_exit_skip("cgroup v2 isn't mounted and could not setup named v1 hierarchy\n");
+ cg_test_v1_named = true;
+ goto post_v2_setup;
+ }
if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
if (cg_write(root, "cgroup.subtree_control", "+memory"))
ksft_exit_skip("Failed to set memory controller\n");
+post_v2_setup:
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
@@ -889,5 +952,6 @@ int main(int argc, char *argv[])
}
}
+ cleanup_named_v1_root(root);
return ret;
}
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index a2b50af8e9ee..2a60e6c41940 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -2,6 +2,7 @@
#define _GNU_SOURCE
#include <linux/limits.h>
+#include <sys/param.h>
#include <sys/sysinfo.h>
#include <sys/wait.h>
#include <errno.h>
@@ -645,10 +646,16 @@ test_cpucg_nested_weight_underprovisioned(const char *root)
static int test_cpucg_max(const char *root)
{
int ret = KSFT_FAIL;
- long usage_usec, user_usec;
- long usage_seconds = 1;
- long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+ long quota_usec = 1000;
+ long default_period_usec = 100000; /* cpu.max's default period */
+ long duration_seconds = 1;
+
+ long duration_usec = duration_seconds * USEC_PER_SEC;
+ long usage_usec, n_periods, remainder_usec, expected_usage_usec;
char *cpucg;
+ char quota_buf[32];
+
+ snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec);
cpucg = cg_name(root, "cpucg_test");
if (!cpucg)
@@ -657,13 +664,13 @@ static int test_cpucg_max(const char *root)
if (cg_create(cpucg))
goto cleanup;
- if (cg_write(cpucg, "cpu.max", "1000"))
+ if (cg_write(cpucg, "cpu.max", quota_buf))
goto cleanup;
struct cpu_hog_func_param param = {
.nprocs = 1,
.ts = {
- .tv_sec = usage_seconds,
+ .tv_sec = duration_seconds,
.tv_nsec = 0,
},
.clock_type = CPU_HOG_CLOCK_WALL,
@@ -672,14 +679,19 @@ static int test_cpucg_max(const char *root)
goto cleanup;
usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
- user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
- if (user_usec <= 0)
+ if (usage_usec <= 0)
goto cleanup;
- if (user_usec >= expected_usage_usec)
- goto cleanup;
+ /*
+ * The following calculation applies only since
+ * the cpu hog is set to run as per wall-clock time
+ */
+ n_periods = duration_usec / default_period_usec;
+ remainder_usec = duration_usec - n_periods * default_period_usec;
+ expected_usage_usec
+ = n_periods * quota_usec + MIN(remainder_usec, quota_usec);
- if (values_close(usage_usec, expected_usage_usec, 95))
+ if (!values_close(usage_usec, expected_usage_usec, 10))
goto cleanup;
ret = KSFT_PASS;
@@ -698,10 +710,16 @@ cleanup:
static int test_cpucg_max_nested(const char *root)
{
int ret = KSFT_FAIL;
- long usage_usec, user_usec;
- long usage_seconds = 1;
- long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+ long quota_usec = 1000;
+ long default_period_usec = 100000; /* cpu.max's default period */
+ long duration_seconds = 1;
+
+ long duration_usec = duration_seconds * USEC_PER_SEC;
+ long usage_usec, n_periods, remainder_usec, expected_usage_usec;
char *parent, *child;
+ char quota_buf[32];
+
+ snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec);
parent = cg_name(root, "cpucg_parent");
child = cg_name(parent, "cpucg_child");
@@ -717,13 +735,13 @@ static int test_cpucg_max_nested(const char *root)
if (cg_create(child))
goto cleanup;
- if (cg_write(parent, "cpu.max", "1000"))
+ if (cg_write(parent, "cpu.max", quota_buf))
goto cleanup;
struct cpu_hog_func_param param = {
.nprocs = 1,
.ts = {
- .tv_sec = usage_seconds,
+ .tv_sec = duration_seconds,
.tv_nsec = 0,
},
.clock_type = CPU_HOG_CLOCK_WALL,
@@ -732,14 +750,19 @@ static int test_cpucg_max_nested(const char *root)
goto cleanup;
usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec");
- user_usec = cg_read_key_long(child, "cpu.stat", "user_usec");
- if (user_usec <= 0)
+ if (usage_usec <= 0)
goto cleanup;
- if (user_usec >= expected_usage_usec)
- goto cleanup;
+ /*
+ * The following calculation applies only since
+ * the cpu hog is set to run as per wall-clock time
+ */
+ n_periods = duration_usec / default_period_usec;
+ remainder_usec = duration_usec - n_periods * default_period_usec;
+ expected_usage_usec
+ = n_periods * quota_usec + MIN(remainder_usec, quota_usec);
- if (values_close(usage_usec, expected_usage_usec, 95))
+ if (!values_close(usage_usec, expected_usage_usec, 10))
goto cleanup;
ret = KSFT_PASS;
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 96693d8772be..63b3c9aad399 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -308,6 +308,7 @@ static int test_kmem_dead_cgroups(const char *root)
char *parent;
long dead;
int i;
+ int max_time = 20;
parent = cg_name(root, "kmem_dead_cgroups_test");
if (!parent)
@@ -322,7 +323,7 @@ static int test_kmem_dead_cgroups(const char *root)
if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
goto cleanup;
- for (i = 0; i < 5; i++) {
+ for (i = 0; i < max_time; i++) {
dead = cg_read_key_long(parent, "cgroup.stat",
"nr_dying_descendants ");
if (dead == 0) {
@@ -334,6 +335,8 @@ static int test_kmem_dead_cgroups(const char *root)
* let's wait a bit and repeat.
*/
sleep(1);
+ if (i > 5)
+ printf("Waiting time longer than 5s; wait: %ds (dead: %ld)\n", i, dead);
}
cleanup:
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
index 40de679248b8..e1f578ca2841 100644
--- a/tools/testing/selftests/cgroup/test_zswap.c
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -338,7 +338,7 @@ static int test_zswap_writeback_one(const char *cgroup, bool wb)
return -1;
if (wb != !!zswpwb_after) {
- ksft_print_msg("zswpwb_after is %ld while wb is %s",
+ ksft_print_msg("zswpwb_after is %ld while wb is %s\n",
zswpwb_after, wb ? "enabled" : "disabled");
return -1;
}