From fa2c3254d7cfff5f7a916ab928a562d1165f17bb Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 20 Jan 2022 16:25:19 +0000 Subject: sched/tracing: Don't re-read p->state when emitting sched_switch event As of commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") the following sequence becomes possible: p->__state = TASK_INTERRUPTIBLE; __schedule() deactivate_task(p); ttwu() READ !p->on_rq p->__state=TASK_WAKING trace_sched_switch() __trace_sched_switch_state() task_state_index() return 0; TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in the trace event. Prevent this by pushing the value read from __schedule() down the trace event. Reported-by: Abhijeet Dharmapurikar Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20220120162520.570782-2-valentin.schneider@arm.com --- kernel/trace/ftrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f9feb197b2da..6762ae029fdd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7347,7 +7347,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) static void ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, - struct task_struct *prev, struct task_struct *next) + unsigned int prev_state, + struct task_struct *prev, + struct task_struct *next) { struct trace_array *tr = data; struct trace_pid_list *pid_list; -- cgit v1.2.3