diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched/core.c | 6 | ||||
| -rw-r--r-- | kernel/sched/cputime.c | 148 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 163 |
3 files changed, 178 insertions, 139 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b8871449d3c6..d797d6696c58 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5518,7 +5518,11 @@ void sched_exec(void) } DEFINE_PER_CPU(struct kernel_stat, kstat); -DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat); +DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat) = { +#ifdef CONFIG_NO_HZ_COMMON + .idle_sleeptime_seq = SEQCNT_ZERO(kernel_cpustat.idle_sleeptime_seq) +#endif +}; EXPORT_PER_CPU_SYMBOL(kstat); EXPORT_PER_CPU_SYMBOL(kernel_cpustat); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a5733789e0bd..4c00163b74b9 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -2,6 +2,7 @@ /* * Simple CPU accounting cgroup controller */ +#include <linux/sched/clock.h> #include <linux/sched/cputime.h> #include <linux/tsacct_kern.h> #include "sched.h" @@ -420,22 +421,155 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_ #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ #ifdef CONFIG_NO_HZ_COMMON -void kcpustat_dyntick_start(void) +static void kcpustat_idle_stop(struct kernel_cpustat *kc, u64 now) { - if (!vtime_generic_enabled_this_cpu()) { - vtime_dyntick_start(); - __this_cpu_write(kernel_cpustat.idle_dyntick, 1); - } + u64 *cpustat = kc->cpustat; + u64 delta; + + if (!kc->idle_elapse) + return; + + delta = now - kc->idle_entrytime; + + write_seqcount_begin(&kc->idle_sleeptime_seq); + if (nr_iowait_cpu(smp_processor_id()) > 0) + cpustat[CPUTIME_IOWAIT] += delta; + else + cpustat[CPUTIME_IDLE] += delta; + + kc->idle_entrytime = now; + kc->idle_elapse = false; + write_seqcount_end(&kc->idle_sleeptime_seq); } -void kcpustat_dyntick_stop(void) +static void kcpustat_idle_start(struct kernel_cpustat *kc, u64 now) { + write_seqcount_begin(&kc->idle_sleeptime_seq); + kc->idle_entrytime = now; + kc->idle_elapse = true; + write_seqcount_end(&kc->idle_sleeptime_seq); +} + +void kcpustat_dyntick_stop(u64 now) +{ + struct kernel_cpustat *kc = kcpustat_this_cpu; + if (!vtime_generic_enabled_this_cpu()) { - __this_cpu_write(kernel_cpustat.idle_dyntick, 0); + WARN_ON_ONCE(!kc->idle_dyntick); + kcpustat_idle_stop(kc, now); + kc->idle_dyntick = false; vtime_dyntick_stop(); steal_account_process_time(ULONG_MAX); } } + +void kcpustat_dyntick_start(u64 now) +{ + struct kernel_cpustat *kc = kcpustat_this_cpu; + + if (!vtime_generic_enabled_this_cpu()) { + vtime_dyntick_start(); + kc->idle_dyntick = true; + kcpustat_idle_start(kc, now); + } +} + +void kcpustat_irq_enter(u64 now) +{ + struct kernel_cpustat *kc = kcpustat_this_cpu; + + if (!vtime_generic_enabled_this_cpu()) + kcpustat_idle_stop(kc, now); +} + +void kcpustat_irq_exit(u64 now) +{ + struct kernel_cpustat *kc = kcpustat_this_cpu; + + if (!vtime_generic_enabled_this_cpu()) + kcpustat_idle_start(kc, now); +} + +static u64 get_cpu_sleep_time_us(int cpu, enum cpu_usage_stat idx, + bool compute_delta, u64 *last_update_time) +{ + struct kernel_cpustat *kc = &kcpustat_cpu(cpu); + u64 *cpustat = kc->cpustat; + unsigned int seq; + ktime_t now; + u64 idle; + + now = ktime_get(); + if (last_update_time) + *last_update_time = ktime_to_us(now); + + if (vtime_generic_enabled_cpu(cpu)) { + idle = kcpustat_field(idx, cpu); + goto to_us; + } + + do { + seq = read_seqcount_begin(&kc->idle_sleeptime_seq); + + idle = cpustat[idx]; + if (kc->idle_elapse && compute_delta && now > kc->idle_entrytime) + idle += (now - kc->idle_entrytime); + } while (read_seqcount_retry(&kc->idle_sleeptime_seq, seq)); + +to_us: + do_div(idle, NSEC_PER_USEC); + + return idle; +} + +/** + * get_cpu_idle_time_us - get the total idle time of a CPU + * @cpu: CPU number to query + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. + * + * Return the cumulative idle time (since boot) for a given + * CPU, in microseconds. Note that this is partially broken due to + * the counter of iowait tasks that can be remotely updated without + * any synchronization. Therefore it is possible to observe backward + * values within two consecutive reads. + * + * This time is measured via accounting rather than sampling, + * and is as accurate as ktime_get() is. + * + * Return: -1 if generic vtime is enabled, else total idle time of the @cpu + */ +u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) +{ + return get_cpu_sleep_time_us(cpu, CPUTIME_IDLE, + !nr_iowait_cpu(cpu), last_update_time); +} +EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); + +/** + * get_cpu_iowait_time_us - get the total iowait time of a CPU + * @cpu: CPU number to query + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. + * + * Return the cumulative iowait time (since boot) for a given + * CPU, in microseconds. Note this is partially broken due to + * the counter of iowait tasks that can be remotely updated without + * any synchronization. Therefore it is possible to observe backward + * values within two consecutive reads. + * + * This time is measured via accounting rather than sampling, + * and is as accurate as ktime_get() is. + * + * Return: -1 if generic vtime is enabled, else total iowait time of @cpu + */ +u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) +{ + return get_cpu_sleep_time_us(cpu, CPUTIME_IOWAIT, + nr_iowait_cpu(cpu), last_update_time); +} +EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); + #endif /* CONFIG_NO_HZ_COMMON */ /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index cb235ec7d2d6..fa03cf7b3cec 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -749,126 +749,6 @@ static void tick_nohz_update_jiffies(ktime_t now) touch_softlockup_watchdog_sched(); } -static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now) -{ - u64 *cpustat = kcpustat_this_cpu->cpustat; - ktime_t delta; - - if (vtime_generic_enabled_this_cpu()) - return; - - if (WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE))) - return; - - delta = ktime_sub(now, ts->idle_entrytime); - - write_seqcount_begin(&ts->idle_sleeptime_seq); - if (nr_iowait_cpu(smp_processor_id()) > 0) - cpustat[CPUTIME_IOWAIT] = ktime_add(cpustat[CPUTIME_IOWAIT], delta); - else - cpustat[CPUTIME_IDLE] = ktime_add(cpustat[CPUTIME_IDLE], delta); - - ts->idle_entrytime = now; - tick_sched_flag_clear(ts, TS_FLAG_IDLE_ACTIVE); - write_seqcount_end(&ts->idle_sleeptime_seq); - - sched_clock_idle_wakeup_event(); -} - -static void tick_nohz_start_idle(struct tick_sched *ts) -{ - if (vtime_generic_enabled_this_cpu()) - return; - - write_seqcount_begin(&ts->idle_sleeptime_seq); - ts->idle_entrytime = ktime_get(); - tick_sched_flag_set(ts, TS_FLAG_IDLE_ACTIVE); - write_seqcount_end(&ts->idle_sleeptime_seq); - sched_clock_idle_sleep_event(); -} - -static u64 get_cpu_sleep_time_us(int cpu, enum cpu_usage_stat idx, - bool compute_delta, u64 *last_update_time) -{ - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - u64 *cpustat = kcpustat_cpu(cpu).cpustat; - ktime_t now, idle; - unsigned int seq; - - now = ktime_get(); - if (last_update_time) - *last_update_time = ktime_to_us(now); - - if (vtime_generic_enabled_cpu(cpu)) { - idle = kcpustat_field(idx, cpu); - return ktime_to_us(idle); - } - - do { - ktime_t delta = 0; - - seq = read_seqcount_begin(&ts->idle_sleeptime_seq); - - if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE) && compute_delta) { - if (now > ts->idle_entrytime) - delta = ktime_sub(now, ts->idle_entrytime); - } - - idle = ktime_add(cpustat[idx], delta); - } while (read_seqcount_retry(&ts->idle_sleeptime_seq, seq)); - - return ktime_to_us(idle); - -} - -/** - * get_cpu_idle_time_us - get the total idle time of a CPU - * @cpu: CPU number to query - * @last_update_time: variable to store update time in. Do not update - * counters if NULL. - * - * Return the cumulative idle time (since boot) for a given - * CPU, in microseconds. Note that this is partially broken due to - * the counter of iowait tasks that can be remotely updated without - * any synchronization. Therefore it is possible to observe backward - * values within two consecutive reads. - * - * This time is measured via accounting rather than sampling, - * and is as accurate as ktime_get() is. - * - * Return: -1 if generic vtime is enabled, else total idle time of the @cpu - */ -u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) -{ - return get_cpu_sleep_time_us(cpu, CPUTIME_IDLE, - !nr_iowait_cpu(cpu), last_update_time); -} -EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); - -/** - * get_cpu_iowait_time_us - get the total iowait time of a CPU - * @cpu: CPU number to query - * @last_update_time: variable to store update time in. Do not update - * counters if NULL. - * - * Return the cumulative iowait time (since boot) for a given - * CPU, in microseconds. Note this is partially broken due to - * the counter of iowait tasks that can be remotely updated without - * any synchronization. Therefore it is possible to observe backward - * values within two consecutive reads. - * - * This time is measured via accounting rather than sampling, - * and is as accurate as ktime_get() is. - * - * Return: -1 if generic vtime is enabled, else total iowait time of @cpu - */ -u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) -{ - return get_cpu_sleep_time_us(cpu, CPUTIME_IOWAIT, - nr_iowait_cpu(cpu), last_update_time); -} -EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); - /* Simplified variant of hrtimer_forward_now() */ static ktime_t tick_forward_now(ktime_t expires, ktime_t now) { @@ -1289,6 +1169,20 @@ void tick_nohz_idle_retain_tick(void) tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched)); } +static void tick_nohz_clock_sleep(struct tick_sched *ts) +{ + tick_sched_flag_set(ts, TS_FLAG_IDLE_ACTIVE); + sched_clock_idle_sleep_event(); +} + +static void tick_nohz_clock_wakeup(struct tick_sched *ts) +{ + if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)) { + tick_sched_flag_clear(ts, TS_FLAG_IDLE_ACTIVE); + sched_clock_idle_wakeup_event(); + } +} + /** * tick_nohz_idle_enter - prepare for entering idle on the current CPU * @@ -1303,12 +1197,11 @@ void tick_nohz_idle_enter(void) local_irq_disable(); ts = this_cpu_ptr(&tick_cpu_sched); - WARN_ON_ONCE(ts->timer_expires_base); - tick_sched_flag_set(ts, TS_FLAG_INIDLE); - kcpustat_dyntick_start(); - tick_nohz_start_idle(ts); + ts->idle_entrytime = ktime_get(); + kcpustat_dyntick_start(ts->idle_entrytime); + tick_nohz_clock_sleep(ts); local_irq_enable(); } @@ -1336,10 +1229,13 @@ void tick_nohz_irq_exit(void) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); - if (tick_sched_flag_test(ts, TS_FLAG_INIDLE)) - tick_nohz_start_idle(ts); - else + if (tick_sched_flag_test(ts, TS_FLAG_INIDLE)) { + ts->idle_entrytime = ktime_get(); + kcpustat_irq_exit(ts->idle_entrytime); + tick_nohz_clock_sleep(ts); + } else { tick_nohz_full_update_tick(ts); + } } /** @@ -1484,11 +1380,11 @@ void tick_nohz_idle_exit(void) now = ktime_get(); if (idle_active) - tick_nohz_stop_idle(ts, now); + tick_nohz_clock_wakeup(ts); if (tick_stopped) tick_nohz_idle_update_tick(ts, now); - kcpustat_dyntick_stop(); + kcpustat_dyntick_stop(now); local_irq_enable(); } @@ -1545,9 +1441,14 @@ static inline void tick_nohz_irq_enter(void) if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED | TS_FLAG_IDLE_ACTIVE)) return; + now = ktime_get(); - if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)) - tick_nohz_stop_idle(ts, now); + + if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)) { + tick_nohz_clock_wakeup(ts); + kcpustat_irq_enter(now); + } + /* * If all CPUs are idle we may need to update a stale jiffies value. * Note nohz_full is a special case: a timekeeper is guaranteed to stay |
