summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
authorFrederic Weisbecker <frederic@kernel.org>2026-05-08 15:16:41 +0200
committerThomas Gleixner <tglx@kernel.org>2026-06-02 21:27:25 +0200
commita5fe724e206ec7ff3ceb15b285d94316c7fe6c41 (patch)
tree0a866a6dcd56ab36cffd9b3ab127be79d93332e3 /kernel/sched
parentbd0c77cd46c63d02bc33dacdba56133ec1fe44a0 (diff)
tick/sched: Move dyntick-idle cputime accounting to cputime code
Although the dynticks-idle cputime accounting is necessarily tied to the tick subsystem, the actual related accounting code has no business residing there and should be part of the scheduler cputime code. Move away the relevant pieces and state machine to where they belong. Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Thomas Gleixner <tglx@kernel.org> Tested-by: Shrikanth Hegde <sshegde@linux.ibm.com> Link: https://patch.msgid.link/20260508131647.43868-10-frederic@kernel.org
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c6
-rw-r--r--kernel/sched/cputime.c148
2 files changed, 146 insertions, 8 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b8871449d3c6..d797d6696c58 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5518,7 +5518,11 @@ void sched_exec(void)
}
DEFINE_PER_CPU(struct kernel_stat, kstat);
-DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat) = {
+#ifdef CONFIG_NO_HZ_COMMON
+ .idle_sleeptime_seq = SEQCNT_ZERO(kernel_cpustat.idle_sleeptime_seq)
+#endif
+};
EXPORT_PER_CPU_SYMBOL(kstat);
EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a5733789e0bd..4c00163b74b9 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -2,6 +2,7 @@
/*
* Simple CPU accounting cgroup controller
*/
+#include <linux/sched/clock.h>
#include <linux/sched/cputime.h>
#include <linux/tsacct_kern.h>
#include "sched.h"
@@ -420,22 +421,155 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
#ifdef CONFIG_NO_HZ_COMMON
-void kcpustat_dyntick_start(void)
+static void kcpustat_idle_stop(struct kernel_cpustat *kc, u64 now)
{
- if (!vtime_generic_enabled_this_cpu()) {
- vtime_dyntick_start();
- __this_cpu_write(kernel_cpustat.idle_dyntick, 1);
- }
+ u64 *cpustat = kc->cpustat;
+ u64 delta;
+
+ if (!kc->idle_elapse)
+ return;
+
+ delta = now - kc->idle_entrytime;
+
+ write_seqcount_begin(&kc->idle_sleeptime_seq);
+ if (nr_iowait_cpu(smp_processor_id()) > 0)
+ cpustat[CPUTIME_IOWAIT] += delta;
+ else
+ cpustat[CPUTIME_IDLE] += delta;
+
+ kc->idle_entrytime = now;
+ kc->idle_elapse = false;
+ write_seqcount_end(&kc->idle_sleeptime_seq);
}
-void kcpustat_dyntick_stop(void)
+static void kcpustat_idle_start(struct kernel_cpustat *kc, u64 now)
{
+ write_seqcount_begin(&kc->idle_sleeptime_seq);
+ kc->idle_entrytime = now;
+ kc->idle_elapse = true;
+ write_seqcount_end(&kc->idle_sleeptime_seq);
+}
+
+void kcpustat_dyntick_stop(u64 now)
+{
+ struct kernel_cpustat *kc = kcpustat_this_cpu;
+
if (!vtime_generic_enabled_this_cpu()) {
- __this_cpu_write(kernel_cpustat.idle_dyntick, 0);
+ WARN_ON_ONCE(!kc->idle_dyntick);
+ kcpustat_idle_stop(kc, now);
+ kc->idle_dyntick = false;
vtime_dyntick_stop();
steal_account_process_time(ULONG_MAX);
}
}
+
+void kcpustat_dyntick_start(u64 now)
+{
+ struct kernel_cpustat *kc = kcpustat_this_cpu;
+
+ if (!vtime_generic_enabled_this_cpu()) {
+ vtime_dyntick_start();
+ kc->idle_dyntick = true;
+ kcpustat_idle_start(kc, now);
+ }
+}
+
+void kcpustat_irq_enter(u64 now)
+{
+ struct kernel_cpustat *kc = kcpustat_this_cpu;
+
+ if (!vtime_generic_enabled_this_cpu())
+ kcpustat_idle_stop(kc, now);
+}
+
+void kcpustat_irq_exit(u64 now)
+{
+ struct kernel_cpustat *kc = kcpustat_this_cpu;
+
+ if (!vtime_generic_enabled_this_cpu())
+ kcpustat_idle_start(kc, now);
+}
+
+static u64 get_cpu_sleep_time_us(int cpu, enum cpu_usage_stat idx,
+ bool compute_delta, u64 *last_update_time)
+{
+ struct kernel_cpustat *kc = &kcpustat_cpu(cpu);
+ u64 *cpustat = kc->cpustat;
+ unsigned int seq;
+ ktime_t now;
+ u64 idle;
+
+ now = ktime_get();
+ if (last_update_time)
+ *last_update_time = ktime_to_us(now);
+
+ if (vtime_generic_enabled_cpu(cpu)) {
+ idle = kcpustat_field(idx, cpu);
+ goto to_us;
+ }
+
+ do {
+ seq = read_seqcount_begin(&kc->idle_sleeptime_seq);
+
+ idle = cpustat[idx];
+ if (kc->idle_elapse && compute_delta && now > kc->idle_entrytime)
+ idle += (now - kc->idle_entrytime);
+ } while (read_seqcount_retry(&kc->idle_sleeptime_seq, seq));
+
+to_us:
+ do_div(idle, NSEC_PER_USEC);
+
+ return idle;
+}
+
+/**
+ * get_cpu_idle_time_us - get the total idle time of a CPU
+ * @cpu: CPU number to query
+ * @last_update_time: variable to store update time in. Do not update
+ * counters if NULL.
+ *
+ * Return the cumulative idle time (since boot) for a given
+ * CPU, in microseconds. Note that this is partially broken due to
+ * the counter of iowait tasks that can be remotely updated without
+ * any synchronization. Therefore it is possible to observe backward
+ * values within two consecutive reads.
+ *
+ * This time is measured via accounting rather than sampling,
+ * and is as accurate as ktime_get() is.
+ *
+ * Return: -1 if generic vtime is enabled, else total idle time of the @cpu
+ */
+u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
+{
+ return get_cpu_sleep_time_us(cpu, CPUTIME_IDLE,
+ !nr_iowait_cpu(cpu), last_update_time);
+}
+EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
+
+/**
+ * get_cpu_iowait_time_us - get the total iowait time of a CPU
+ * @cpu: CPU number to query
+ * @last_update_time: variable to store update time in. Do not update
+ * counters if NULL.
+ *
+ * Return the cumulative iowait time (since boot) for a given
+ * CPU, in microseconds. Note this is partially broken due to
+ * the counter of iowait tasks that can be remotely updated without
+ * any synchronization. Therefore it is possible to observe backward
+ * values within two consecutive reads.
+ *
+ * This time is measured via accounting rather than sampling,
+ * and is as accurate as ktime_get() is.
+ *
+ * Return: -1 if generic vtime is enabled, else total iowait time of @cpu
+ */
+u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
+{
+ return get_cpu_sleep_time_us(cpu, CPUTIME_IOWAIT,
+ nr_iowait_cpu(cpu), last_update_time);
+}
+EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
+
#endif /* CONFIG_NO_HZ_COMMON */
/*