From 2fb75e1b642f49253d8848c9e47e8942f5366221 Mon Sep 17 00:00:00 2001 From: Liu Xinpeng Date: Mon, 25 Oct 2021 11:46:26 +0800 Subject: psi: Add a missing SPDX license header Add the missing SPDX license header to include/linux/psi.h include/linux/psi_types.h kernel/sched/psi.c Signed-off-by: Liu Xinpeng Signed-off-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/1635133586-84611-2-git-send-email-liuxp11@chinatelecom.cn --- include/linux/psi.h | 1 + include/linux/psi_types.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/psi.h b/include/linux/psi.h index 65eb1476ac70..a70ca833c6d7 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PSI_H #define _LINUX_PSI_H diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 0a23300d49af..bf50068d5d4b 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PSI_TYPES_H #define _LINUX_PSI_TYPES_H -- cgit v1.2.3 From 4feee7d12603deca8775f9f9ae5e121093837444 Mon Sep 17 00:00:00 2001 From: Josh Don Date: Mon, 18 Oct 2021 13:34:28 -0700 Subject: sched/core: Forced idle accounting Adds accounting for "forced idle" time, which is time where a cookie'd task forces its SMT sibling to idle, despite the presence of runnable tasks. Forced idle time is one means to measure the cost of enabling core scheduling (ie. the capacity lost due to the need to force idle). Forced idle time is attributed to the thread responsible for causing the forced idle. A few details: - Forced idle time is displayed via /proc/PID/sched. It also requires that schedstats is enabled. - Forced idle is only accounted when a sibling hyperthread is held idle despite the presence of runnable tasks. No time is charged if a sibling is idle but has no runnable tasks. - Tasks with 0 cookie are never charged forced idle. - For SMT > 2, we scale the amount of forced idle charged based on the number of forced idle siblings. Additionally, we split the time up and evenly charge it to all running tasks, as each is equally responsible for the forced idle. Signed-off-by: Josh Don Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20211018203428.2025792-1-joshdon@google.com --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 78c351e35fec..d2e261adb8ea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -523,7 +523,11 @@ struct sched_statistics { u64 nr_wakeups_affine_attempts; u64 nr_wakeups_passive; u64 nr_wakeups_idle; + +#ifdef CONFIG_SCHED_CORE + u64 core_forceidle_sum; #endif +#endif /* CONFIG_SCHEDSTATS */ } ____cacheline_aligned; struct sched_entity { -- cgit v1.2.3 From cb0e52b7748737b2cf6481fdd9b920ce7e1ebbdf Mon Sep 17 00:00:00 2001 From: Brian Chen Date: Wed, 10 Nov 2021 21:33:12 +0000 Subject: psi: Fix PSI_MEM_FULL state when tasks are in memstall and doing reclaim We've noticed cases where tasks in a cgroup are stalled on memory but there is little memory FULL pressure since tasks stay on the runqueue in reclaim. A simple example involves a single threaded program that keeps leaking and touching large amounts of memory. It runs in a cgroup with swap enabled, memory.high set at 10M and cpu.max ratio set at 5%. Though there is significant CPU pressure and memory SOME, there is barely any memory FULL since the task enters reclaim and stays on the runqueue. However, this memory-bound task is effectively stalled on memory and we expect memory FULL to match memory SOME in this scenario. The code is confused about memstall && running, thinking there is a stalled task and a productive task when there's only one task: a reclaimer that's counted as both. To fix this, we redefine the condition for PSI_MEM_FULL to check that all running tasks are in an active memstall instead of checking that there are no running tasks. case PSI_MEM_FULL: - return unlikely(tasks[NR_MEMSTALL] && !tasks[NR_RUNNING]); + return unlikely(tasks[NR_MEMSTALL] && + tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]); This will capture reclaimers. It will also capture tasks that called psi_memstall_enter() and are about to sleep, but this should be negligible noise. Signed-off-by: Brian Chen Signed-off-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/20211110213312.310243-1-brianchen118@gmail.com --- include/linux/psi_types.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index bf50068d5d4b..516c0fe836fd 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -22,7 +22,17 @@ enum psi_task_count { * don't have to special case any state tracking for it. */ NR_ONCPU, - NR_PSI_TASK_COUNTS = 4, + /* + * For IO and CPU stalls the presence of running/oncpu tasks + * in the domain means a partial rather than a full stall. + * For memory it's not so simple because of page reclaimers: + * they are running/oncpu while representing a stall. To tell + * whether a domain has productivity left or not, we need to + * distinguish between regular running (i.e. productive) + * threads and memstall ones. + */ + NR_MEMSTALL_RUNNING, + NR_PSI_TASK_COUNTS = 5, }; /* Task state bitmasks */ @@ -30,6 +40,7 @@ enum psi_task_count { #define TSK_MEMSTALL (1 << NR_MEMSTALL) #define TSK_RUNNING (1 << NR_RUNNING) #define TSK_ONCPU (1 << NR_ONCPU) +#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING) /* Resources that workloads could be stalled on */ enum psi_res { -- cgit v1.2.3