summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorFrederic Weisbecker <frederic@kernel.org>2025-05-28 18:05:32 +0200
committerFrederic Weisbecker <frederic@kernel.org>2026-02-03 15:23:34 +0100
commit03ff7351016983653bab5b38e58529d0e38a28cf (patch)
treee6d77f3863adc54129dab92ca5d7c7951677b757 /kernel
parent27c3a5967f054ab666704cb28f2aeb18ca07cab7 (diff)
cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset
Until now, HK_TYPE_DOMAIN used to only include boot defined isolated CPUs passed through isolcpus= boot option. Users interested in also knowing the runtime defined isolated CPUs through cpuset must use different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc... There are many drawbacks to that approach: 1) Most interested subsystems want to know about all isolated CPUs, not just those defined on boot time. 2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with concurrent cpuset changes. 3) Further cpuset modifications are not propagated to subsystems Solve 1) and 2) and centralize all isolated CPUs within the HK_TYPE_DOMAIN housekeeping cpumask. Subsystems can rely on RCU to synchronize against concurrent changes. The propagation mentioned in 3) will be handled in further patches. [Chen Ridong: Fix cpu_hotplug_lock deadlock and use correct static branch API] Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Reviewed-by: Waiman Long <longman@redhat.com> Reviewed-by: Chen Ridong <chenridong@huawei.com> Signed-off-by: Chen Ridong <chenridong@huawei.com> Cc: "Michal Koutný" <mkoutny@suse.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Marco Crivellari <marco.crivellari@suse.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tejun Heo <tj@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Waiman Long <longman@redhat.com> Cc: cgroups@vger.kernel.org
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/cpuset.c5
-rw-r--r--kernel/sched/isolation.c75
-rw-r--r--kernel/sched/sched.h1
3 files changed, 73 insertions, 8 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 5e2e3514c22e..e146e1f34bf9 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1482,14 +1482,15 @@ static void update_isolation_cpumasks(void)
if (!isolated_cpus_updating)
return;
- lockdep_assert_cpus_held();
-
ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
WARN_ON_ONCE(ret < 0);
ret = tmigr_isolated_exclude_cpumask(isolated_cpus);
WARN_ON_ONCE(ret < 0);
+ ret = housekeeping_update(isolated_cpus);
+ WARN_ON_ONCE(ret < 0);
+
isolated_cpus_updating = false;
}
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 6f77289c14c3..674a02891b38 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -29,18 +29,48 @@ static struct housekeeping housekeeping;
bool housekeeping_enabled(enum hk_type type)
{
- return !!(housekeeping.flags & BIT(type));
+ return !!(READ_ONCE(housekeeping.flags) & BIT(type));
}
EXPORT_SYMBOL_GPL(housekeeping_enabled);
+static bool housekeeping_dereference_check(enum hk_type type)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP) && type == HK_TYPE_DOMAIN) {
+ /* Cpuset isn't even writable yet? */
+ if (system_state <= SYSTEM_SCHEDULING)
+ return true;
+
+ /* CPU hotplug write locked, so cpuset partition can't be overwritten */
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held())
+ return true;
+
+ /* Cpuset lock held, partitions not writable */
+ if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held())
+ return true;
+
+ return false;
+ }
+
+ return true;
+}
+
+static inline struct cpumask *housekeeping_cpumask_dereference(enum hk_type type)
+{
+ return rcu_dereference_all_check(housekeeping.cpumasks[type],
+ housekeeping_dereference_check(type));
+}
+
const struct cpumask *housekeeping_cpumask(enum hk_type type)
{
+ const struct cpumask *mask = NULL;
+
if (static_branch_unlikely(&housekeeping_overridden)) {
- if (housekeeping.flags & BIT(type)) {
- return rcu_dereference_check(housekeeping.cpumasks[type], 1);
- }
+ if (READ_ONCE(housekeeping.flags) & BIT(type))
+ mask = housekeeping_cpumask_dereference(type);
}
- return cpu_possible_mask;
+ if (!mask)
+ mask = cpu_possible_mask;
+ return mask;
}
EXPORT_SYMBOL_GPL(housekeeping_cpumask);
@@ -80,12 +110,45 @@ EXPORT_SYMBOL_GPL(housekeeping_affine);
bool housekeeping_test_cpu(int cpu, enum hk_type type)
{
- if (static_branch_unlikely(&housekeeping_overridden) && housekeeping.flags & BIT(type))
+ if (static_branch_unlikely(&housekeeping_overridden) &&
+ READ_ONCE(housekeeping.flags) & BIT(type))
return cpumask_test_cpu(cpu, housekeeping_cpumask(type));
return true;
}
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
+int housekeeping_update(struct cpumask *isol_mask)
+{
+ struct cpumask *trial, *old = NULL;
+
+ lockdep_assert_cpus_held();
+
+ trial = kmalloc(cpumask_size(), GFP_KERNEL);
+ if (!trial)
+ return -ENOMEM;
+
+ cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), isol_mask);
+ if (!cpumask_intersects(trial, cpu_online_mask)) {
+ kfree(trial);
+ return -EINVAL;
+ }
+
+ if (!housekeeping.flags)
+ static_branch_enable_cpuslocked(&housekeeping_overridden);
+
+ if (housekeeping.flags & HK_FLAG_DOMAIN)
+ old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN);
+ else
+ WRITE_ONCE(housekeeping.flags, housekeeping.flags | HK_FLAG_DOMAIN);
+ rcu_assign_pointer(housekeeping.cpumasks[HK_TYPE_DOMAIN], trial);
+
+ synchronize_rcu();
+
+ kfree(old);
+
+ return 0;
+}
+
void __init housekeeping_init(void)
{
enum hk_type type;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 475bdab3b8db..653e898a996a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -30,6 +30,7 @@
#include <linux/context_tracking.h>
#include <linux/cpufreq.h>
#include <linux/cpumask_api.h>
+#include <linux/cpuset.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/fs_api.h>