summaryrefslogtreecommitdiff
path: root/kernel/cgroup
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup')
-rw-r--r--kernel/cgroup/cgroup.c2
-rw-r--r--kernel/cgroup/cpuset.c21
-rw-r--r--kernel/cgroup/rstat.c13
3 files changed, 25 insertions, 11 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e717208cfb18..554a02ee298b 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5847,7 +5847,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
int ret;
/* allocate the cgroup and its ID, 0 is reserved for the root */
- cgrp = kzalloc(struct_size(cgrp, ancestors, (level + 1)), GFP_KERNEL);
+ cgrp = kzalloc(struct_size(cgrp, _low_ancestors, level), GFP_KERNEL);
if (!cgrp)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 6e6eb09b8db6..3e8cc34d8d50 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1668,7 +1668,14 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
{
WARN_ON_ONCE(!is_remote_partition(cs));
- WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus));
+ /*
+ * When a CPU is offlined, top_cpuset may end up with no available CPUs,
+ * which should clear subpartitions_cpus. We should not emit a warning for this
+ * scenario: the hierarchy is updated from top to bottom, so subpartitions_cpus
+ * may already be cleared when disabling the partition.
+ */
+ WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus) &&
+ !cpumask_empty(subpartitions_cpus));
spin_lock_irq(&callback_lock);
cs->remote_partition = false;
@@ -3976,8 +3983,9 @@ retry:
if (remote || (is_partition_valid(cs) && is_partition_valid(parent)))
compute_partition_effective_cpumask(cs, &new_cpus);
- if (remote && cpumask_empty(&new_cpus) &&
- partition_is_populated(cs, NULL)) {
+ if (remote && (cpumask_empty(subpartitions_cpus) ||
+ (cpumask_empty(&new_cpus) &&
+ partition_is_populated(cs, NULL)))) {
cs->prs_err = PERR_HOTPLUG;
remote_partition_disable(cs, tmp);
compute_effective_cpumask(&new_cpus, cs, parent);
@@ -3990,9 +3998,12 @@ retry:
* 1) empty effective cpus but not valid empty partition.
* 2) parent is invalid or doesn't grant any cpus to child
* partitions.
+ * 3) subpartitions_cpus is empty.
*/
- if (is_local_partition(cs) && (!is_partition_valid(parent) ||
- tasks_nocpu_error(parent, cs, &new_cpus)))
+ if (is_local_partition(cs) &&
+ (!is_partition_valid(parent) ||
+ tasks_nocpu_error(parent, cs, &new_cpus) ||
+ cpumask_empty(subpartitions_cpus)))
partcmd = partcmd_invalidate;
/*
* On the other hand, an invalid partition root may be transitioned
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index a198e40c799b..150e5871e66f 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -71,7 +71,6 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
{
struct llist_head *lhead;
struct css_rstat_cpu *rstatc;
- struct css_rstat_cpu __percpu *rstatc_pcpu;
struct llist_node *self;
/*
@@ -104,18 +103,22 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
/*
* This function can be renentered by irqs and nmis for the same cgroup
* and may try to insert the same per-cpu lnode into the llist. Note
- * that llist_add() does not protect against such scenarios.
+ * that llist_add() does not protect against such scenarios. In addition
+ * this same per-cpu lnode can be modified through init_llist_node()
+ * from css_rstat_flush() running on a different CPU.
*
* To protect against such stacked contexts of irqs/nmis, we use the
* fact that lnode points to itself when not on a list and then use
- * this_cpu_cmpxchg() to atomically set to NULL to select the winner
+ * try_cmpxchg() to atomically set to NULL to select the winner
* which will call llist_add(). The losers can assume the insertion is
* successful and the winner will eventually add the per-cpu lnode to
* the llist.
+ *
+ * Please note that we can not use this_cpu_cmpxchg() here as on some
+ * archs it is not safe against modifications from multiple CPUs.
*/
self = &rstatc->lnode;
- rstatc_pcpu = css->rstat_cpu;
- if (this_cpu_cmpxchg(rstatc_pcpu->lnode.next, self, NULL) != self)
+ if (!try_cmpxchg(&rstatc->lnode.next, &self, NULL))
return;
lhead = ss_lhead_cpu(css->ss, cpu);