diff options
| author | Frederic Weisbecker <frederic@kernel.org> | 2025-05-28 18:05:32 +0200 |
|---|---|---|
| committer | Frederic Weisbecker <frederic@kernel.org> | 2026-02-03 15:23:34 +0100 |
| commit | 03ff7351016983653bab5b38e58529d0e38a28cf (patch) | |
| tree | e6d77f3863adc54129dab92ca5d7c7951677b757 /kernel | |
| parent | 27c3a5967f054ab666704cb28f2aeb18ca07cab7 (diff) | |
cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset
Until now, HK_TYPE_DOMAIN used to only include boot defined isolated
CPUs passed through isolcpus= boot option. Users interested in also
knowing the runtime defined isolated CPUs through cpuset must use
different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc...
There are many drawbacks to that approach:
1) Most interested subsystems want to know about all isolated CPUs, not
just those defined on boot time.
2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with
concurrent cpuset changes.
3) Further cpuset modifications are not propagated to subsystems
Solve 1) and 2) and centralize all isolated CPUs within the
HK_TYPE_DOMAIN housekeeping cpumask.
Subsystems can rely on RCU to synchronize against concurrent changes.
The propagation mentioned in 3) will be handled in further patches.
[Chen Ridong: Fix cpu_hotplug_lock deadlock and use correct static
branch API]
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Chen Ridong <chenridong@huawei.com>
Signed-off-by: Chen Ridong <chenridong@huawei.com>
Cc: "Michal Koutný" <mkoutny@suse.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Marco Crivellari <marco.crivellari@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Waiman Long <longman@redhat.com>
Cc: cgroups@vger.kernel.org
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup/cpuset.c | 5 | ||||
| -rw-r--r-- | kernel/sched/isolation.c | 75 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 1 |
3 files changed, 73 insertions, 8 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 5e2e3514c22e..e146e1f34bf9 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1482,14 +1482,15 @@ static void update_isolation_cpumasks(void) if (!isolated_cpus_updating) return; - lockdep_assert_cpus_held(); - ret = workqueue_unbound_exclude_cpumask(isolated_cpus); WARN_ON_ONCE(ret < 0); ret = tmigr_isolated_exclude_cpumask(isolated_cpus); WARN_ON_ONCE(ret < 0); + ret = housekeeping_update(isolated_cpus); + WARN_ON_ONCE(ret < 0); + isolated_cpus_updating = false; } diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 6f77289c14c3..674a02891b38 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -29,18 +29,48 @@ static struct housekeeping housekeeping; bool housekeeping_enabled(enum hk_type type) { - return !!(housekeeping.flags & BIT(type)); + return !!(READ_ONCE(housekeeping.flags) & BIT(type)); } EXPORT_SYMBOL_GPL(housekeeping_enabled); +static bool housekeeping_dereference_check(enum hk_type type) +{ + if (IS_ENABLED(CONFIG_LOCKDEP) && type == HK_TYPE_DOMAIN) { + /* Cpuset isn't even writable yet? */ + if (system_state <= SYSTEM_SCHEDULING) + return true; + + /* CPU hotplug write locked, so cpuset partition can't be overwritten */ + if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held()) + return true; + + /* Cpuset lock held, partitions not writable */ + if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held()) + return true; + + return false; + } + + return true; +} + +static inline struct cpumask *housekeeping_cpumask_dereference(enum hk_type type) +{ + return rcu_dereference_all_check(housekeeping.cpumasks[type], + housekeeping_dereference_check(type)); +} + const struct cpumask *housekeeping_cpumask(enum hk_type type) { + const struct cpumask *mask = NULL; + if (static_branch_unlikely(&housekeeping_overridden)) { - if (housekeeping.flags & BIT(type)) { - return rcu_dereference_check(housekeeping.cpumasks[type], 1); - } + if (READ_ONCE(housekeeping.flags) & BIT(type)) + mask = housekeeping_cpumask_dereference(type); } - return cpu_possible_mask; + if (!mask) + mask = cpu_possible_mask; + return mask; } EXPORT_SYMBOL_GPL(housekeeping_cpumask); @@ -80,12 +110,45 @@ EXPORT_SYMBOL_GPL(housekeeping_affine); bool housekeeping_test_cpu(int cpu, enum hk_type type) { - if (static_branch_unlikely(&housekeeping_overridden) && housekeeping.flags & BIT(type)) + if (static_branch_unlikely(&housekeeping_overridden) && + READ_ONCE(housekeeping.flags) & BIT(type)) return cpumask_test_cpu(cpu, housekeeping_cpumask(type)); return true; } EXPORT_SYMBOL_GPL(housekeeping_test_cpu); +int housekeeping_update(struct cpumask *isol_mask) +{ + struct cpumask *trial, *old = NULL; + + lockdep_assert_cpus_held(); + + trial = kmalloc(cpumask_size(), GFP_KERNEL); + if (!trial) + return -ENOMEM; + + cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), isol_mask); + if (!cpumask_intersects(trial, cpu_online_mask)) { + kfree(trial); + return -EINVAL; + } + + if (!housekeeping.flags) + static_branch_enable_cpuslocked(&housekeeping_overridden); + + if (housekeeping.flags & HK_FLAG_DOMAIN) + old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN); + else + WRITE_ONCE(housekeeping.flags, housekeeping.flags | HK_FLAG_DOMAIN); + rcu_assign_pointer(housekeeping.cpumasks[HK_TYPE_DOMAIN], trial); + + synchronize_rcu(); + + kfree(old); + + return 0; +} + void __init housekeeping_init(void) { enum hk_type type; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 475bdab3b8db..653e898a996a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -30,6 +30,7 @@ #include <linux/context_tracking.h> #include <linux/cpufreq.h> #include <linux/cpumask_api.h> +#include <linux/cpuset.h> #include <linux/ctype.h> #include <linux/file.h> #include <linux/fs_api.h> |
