diff options
| author | Ihor Solodrai <ihor.solodrai@linux.dev> | 2026-02-13 10:21:36 -0800 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2026-02-13 09:09:39 -1000 |
| commit | 0b82cc331d2e23537670878c62c19ee3f4147a93 (patch) | |
| tree | 4b02e6ee3c65ab1c91ee11aa7bc5cb6dc40f8885 /tools/testing/selftests | |
| parent | 048714d9df73a724d3f84b587f1110963e32f9b3 (diff) | |
selftests/sched_ext: Fix rt_stall flaky failure
The rt_stall test measures the runtime ratio between an EXT and an RT
task pinned to the same CPU, verifying that the deadline server prevents
RT tasks from starving SCHED_EXT tasks. It expects the EXT task to get
at least 4% of CPU time.
The test is flaky because sched_stress_test() calls sleep(RUN_TIME)
immediately after fork(), without waiting for the RT child to complete
its setup (set_affinity + set_sched). If the RT child experiences
scheduling latency before completing setup, that delay eats into the
measurement window: the RT child runs for less than RUN_TIME seconds,
and the EXT task's measured ratio drops below the 4% threshold.
For example, in the failing CI run [1]:
EXT=0.140s RT=4.750s total=4.890s (expected ~5.0s)
ratio=2.86% < 4% → FAIL
The 110ms gap (5.0 - 4.89) corresponds to the RT child's setup time
being counted inside the measurement window, during which fewer
deadline server ticks fire for the EXT task.
Fix by using pipes to synchronize: each child signals the parent after
completing its setup, and the parent waits for both signals before
starting sleep(RUN_TIME). This ensures the measurement window only
counts time when both tasks are fully configured and competing.
[1] https://github.com/kernel-patches/bpf/actions/runs/21961895809/job/63442490449
Fixes: be621a76341c ("selftests/sched_ext: Add test for sched_ext dl_server")
Assisted-by: claude-opus-4-6-v1
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'tools/testing/selftests')
| -rw-r--r-- | tools/testing/selftests/sched_ext/rt_stall.c | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/tools/testing/selftests/sched_ext/rt_stall.c b/tools/testing/selftests/sched_ext/rt_stall.c index 015200f80f6e..ab772e336f86 100644 --- a/tools/testing/selftests/sched_ext/rt_stall.c +++ b/tools/testing/selftests/sched_ext/rt_stall.c @@ -23,6 +23,30 @@ #define CORE_ID 0 /* CPU to pin tasks to */ #define RUN_TIME 5 /* How long to run the test in seconds */ +/* Signal the parent that setup is complete by writing to a pipe */ +static void signal_ready(int fd) +{ + char c = 1; + + if (write(fd, &c, 1) != 1) { + perror("write to ready pipe"); + exit(EXIT_FAILURE); + } + close(fd); +} + +/* Wait for a child to signal readiness via a pipe */ +static void wait_ready(int fd) +{ + char c; + + if (read(fd, &c, 1) != 1) { + perror("read from ready pipe"); + exit(EXIT_FAILURE); + } + close(fd); +} + /* Simple busy-wait function for test tasks */ static void process_func(void) { @@ -122,14 +146,24 @@ static bool sched_stress_test(bool is_ext) float ext_runtime, rt_runtime, actual_ratio; int ext_pid, rt_pid; + int ext_ready[2], rt_ready[2]; ksft_print_header(); ksft_set_plan(1); + if (pipe(ext_ready) || pipe(rt_ready)) { + perror("pipe"); + ksft_exit_fail(); + } + /* Create and set up a EXT task */ ext_pid = fork(); if (ext_pid == 0) { + close(ext_ready[0]); + close(rt_ready[0]); + close(rt_ready[1]); set_affinity(CORE_ID); + signal_ready(ext_ready[1]); process_func(); exit(0); } else if (ext_pid < 0) { @@ -140,8 +174,12 @@ static bool sched_stress_test(bool is_ext) /* Create an RT task */ rt_pid = fork(); if (rt_pid == 0) { + close(ext_ready[0]); + close(ext_ready[1]); + close(rt_ready[0]); set_affinity(CORE_ID); set_sched(SCHED_FIFO, 50); + signal_ready(rt_ready[1]); process_func(); exit(0); } else if (rt_pid < 0) { @@ -149,6 +187,17 @@ static bool sched_stress_test(bool is_ext) ksft_exit_fail(); } + /* + * Wait for both children to complete their setup (affinity and + * scheduling policy) before starting the measurement window. + * This prevents flaky failures caused by the RT child's setup + * time eating into the measurement period. + */ + close(ext_ready[1]); + close(rt_ready[1]); + wait_ready(ext_ready[0]); + wait_ready(rt_ready[0]); + /* Let the processes run for the specified time */ sleep(RUN_TIME); |
