diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-21 09:38:59 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-21 09:38:59 -0800 |
| commit | 4cf44657887b4c41374981d0afb2ca302b189e15 (patch) | |
| tree | f7bd3423c50ce9843335994b460969c32edac099 /tools | |
| parent | 8eb604d4ee8bf6183b00b8a96f0007b1be28ca9d (diff) | |
| parent | 640c9dc72f21f325700a4b0f839ad568ff21c697 (diff) | |
Merge tag 'sched_ext-for-7.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fixes from Tejun Heo:
- Various bug fixes for the example schedulers and selftests
* tag 'sched_ext-for-7.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
tools/sched_ext: fix getopt not re-parsed on restart
tools/sched_ext: scx_userland: fix data races on shared counters
tools/sched_ext: scx_pair: fix stride == 0 crash on single-CPU systems
tools/sched_ext: scx_central: fix CPU_SET and skeleton leak on early exit
tools/sched_ext: scx_userland: fix stale data on restart
tools/sched_ext: scx_flatcg: fix potential stack overflow from VLA in fcg_read_stats
selftests/sched_ext: Fix rt_stall flaky failure
tools/sched_ext: scx_userland: fix restart and stats thread lifecycle bugs
tools/sched_ext: scx_central: fix sched_setaffinity() call with the set size
tools/sched_ext: scx_flatcg: zero-initialize stats counter array
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/sched_ext/scx_central.c | 10 | ||||
| -rw-r--r-- | tools/sched_ext/scx_cpu0.c | 1 | ||||
| -rw-r--r-- | tools/sched_ext/scx_flatcg.c | 13 | ||||
| -rw-r--r-- | tools/sched_ext/scx_pair.c | 8 | ||||
| -rw-r--r-- | tools/sched_ext/scx_sdt.c | 1 | ||||
| -rw-r--r-- | tools/sched_ext/scx_simple.c | 1 | ||||
| -rw-r--r-- | tools/sched_ext/scx_userland.c | 31 | ||||
| -rw-r--r-- | tools/testing/selftests/sched_ext/rt_stall.c | 49 |
8 files changed, 96 insertions, 18 deletions
diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index 55931a4cd71c..2a805f1d6c8f 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -50,11 +50,13 @@ int main(int argc, char **argv) __u64 seq = 0, ecode; __s32 opt; cpu_set_t *cpuset; + size_t cpuset_size; libbpf_set_print(libbpf_print_fn); signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); restart: + optind = 1; skel = SCX_OPS_OPEN(central_ops, scx_central); skel->rodata->central_cpu = 0; @@ -73,6 +75,7 @@ restart: u32 central_cpu = strtoul(optarg, NULL, 0); if (central_cpu >= skel->rodata->nr_cpu_ids) { fprintf(stderr, "invalid central CPU id value, %u given (%u max)\n", central_cpu, skel->rodata->nr_cpu_ids); + scx_central__destroy(skel); return -1; } skel->rodata->central_cpu = (s32)central_cpu; @@ -106,9 +109,10 @@ restart: */ cpuset = CPU_ALLOC(skel->rodata->nr_cpu_ids); SCX_BUG_ON(!cpuset, "Failed to allocate cpuset"); - CPU_ZERO_S(CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids), cpuset); - CPU_SET(skel->rodata->central_cpu, cpuset); - SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset), + cpuset_size = CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids); + CPU_ZERO_S(cpuset_size, cpuset); + CPU_SET_S(skel->rodata->central_cpu, cpuset_size, cpuset); + SCX_BUG_ON(sched_setaffinity(0, cpuset_size, cpuset), "Failed to affinitize to central CPU %d (max %d)", skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1); CPU_FREE(cpuset); diff --git a/tools/sched_ext/scx_cpu0.c b/tools/sched_ext/scx_cpu0.c index 1e4fa4ab8da9..a6fba9978b9c 100644 --- a/tools/sched_ext/scx_cpu0.c +++ b/tools/sched_ext/scx_cpu0.c @@ -69,6 +69,7 @@ int main(int argc, char **argv) signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); restart: + optind = 1; skel = SCX_OPS_OPEN(cpu0_ops, scx_cpu0); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c index cd85eb401179..d865c381589b 100644 --- a/tools/sched_ext/scx_flatcg.c +++ b/tools/sched_ext/scx_flatcg.c @@ -102,21 +102,27 @@ static float read_cpu_util(__u64 *last_sum, __u64 *last_idle) static void fcg_read_stats(struct scx_flatcg *skel, __u64 *stats) { - __u64 cnts[FCG_NR_STATS][skel->rodata->nr_cpus]; + __u64 *cnts; __u32 idx; + cnts = calloc(skel->rodata->nr_cpus, sizeof(__u64)); + if (!cnts) + return; + memset(stats, 0, sizeof(stats[0]) * FCG_NR_STATS); for (idx = 0; idx < FCG_NR_STATS; idx++) { int ret, cpu; ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), - &idx, cnts[idx]); + &idx, cnts); if (ret < 0) continue; for (cpu = 0; cpu < skel->rodata->nr_cpus; cpu++) - stats[idx] += cnts[idx][cpu]; + stats[idx] += cnts[cpu]; } + + free(cnts); } int main(int argc, char **argv) @@ -135,6 +141,7 @@ int main(int argc, char **argv) signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); restart: + optind = 1; skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); diff --git a/tools/sched_ext/scx_pair.c b/tools/sched_ext/scx_pair.c index d3e97faa6334..2e509391f3da 100644 --- a/tools/sched_ext/scx_pair.c +++ b/tools/sched_ext/scx_pair.c @@ -53,10 +53,10 @@ int main(int argc, char **argv) signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); restart: + optind = 1; skel = SCX_OPS_OPEN(pair_ops, scx_pair); skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus(); - assert(skel->rodata->nr_cpu_ids > 0); skel->rodata->pair_batch_dur_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); /* pair up the earlier half to the latter by default, override with -s */ @@ -76,6 +76,12 @@ restart: } } + /* Stride must be positive to pair distinct CPUs. */ + if (stride <= 0) { + fprintf(stderr, "Invalid stride %d, must be positive\n", stride); + scx_pair__destroy(skel); + return -1; + } bpf_map__set_max_entries(skel->maps.pair_ctx, skel->rodata->nr_cpu_ids / 2); /* Resize arrays so their element count is equal to cpu count. */ diff --git a/tools/sched_ext/scx_sdt.c b/tools/sched_ext/scx_sdt.c index b0363363476d..d8ca9aa316a5 100644 --- a/tools/sched_ext/scx_sdt.c +++ b/tools/sched_ext/scx_sdt.c @@ -51,6 +51,7 @@ int main(int argc, char **argv) signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); restart: + optind = 1; skel = SCX_OPS_OPEN(sdt_ops, scx_sdt); while ((opt = getopt(argc, argv, "fvh")) != -1) { diff --git a/tools/sched_ext/scx_simple.c b/tools/sched_ext/scx_simple.c index 06d4b13bf76b..c3b48611712b 100644 --- a/tools/sched_ext/scx_simple.c +++ b/tools/sched_ext/scx_simple.c @@ -71,6 +71,7 @@ int main(int argc, char **argv) signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); restart: + optind = 1; skel = SCX_OPS_OPEN(simple_ops, scx_simple); while ((opt = getopt(argc, argv, "fvh")) != -1) { diff --git a/tools/sched_ext/scx_userland.c b/tools/sched_ext/scx_userland.c index 10b31020f44f..3f2aba658b4a 100644 --- a/tools/sched_ext/scx_userland.c +++ b/tools/sched_ext/scx_userland.c @@ -54,6 +54,7 @@ static bool verbose; static volatile int exit_req; static int enqueued_fd, dispatched_fd; +static pthread_t stats_printer; static struct scx_userland *skel; static struct bpf_link *ops_link; @@ -156,9 +157,9 @@ static int dispatch_task(__s32 pid) err = bpf_map_update_elem(dispatched_fd, NULL, &pid, 0); if (err) { - nr_vruntime_failed++; + __atomic_add_fetch(&nr_vruntime_failed, 1, __ATOMIC_RELAXED); } else { - nr_vruntime_dispatches++; + __atomic_add_fetch(&nr_vruntime_dispatches, 1, __ATOMIC_RELAXED); } return err; @@ -201,8 +202,8 @@ static int vruntime_enqueue(const struct scx_userland_enqueued_task *bpf_task) return ENOENT; update_enqueued(curr, bpf_task); - nr_vruntime_enqueues++; - nr_curr_enqueued++; + __atomic_add_fetch(&nr_vruntime_enqueues, 1, __ATOMIC_RELAXED); + __atomic_add_fetch(&nr_curr_enqueued, 1, __ATOMIC_RELAXED); /* * Enqueue the task in a vruntime-sorted list. A more optimal data @@ -278,9 +279,9 @@ static void dispatch_batch(void) LIST_INSERT_HEAD(&vruntime_head, task, entries); break; } - nr_curr_enqueued--; + __atomic_sub_fetch(&nr_curr_enqueued, 1, __ATOMIC_RELAXED); } - skel->bss->nr_scheduled = nr_curr_enqueued; + skel->bss->nr_scheduled = __atomic_load_n(&nr_curr_enqueued, __ATOMIC_RELAXED); } static void *run_stats_printer(void *arg) @@ -305,9 +306,9 @@ static void *run_stats_printer(void *arg) printf("|-----------------------|\n"); printf("| VRUNTIME / USER |\n"); printf("|-----------------------|\n"); - printf("| enq: %10llu |\n", nr_vruntime_enqueues); - printf("| disp: %10llu |\n", nr_vruntime_dispatches); - printf("| failed: %10llu |\n", nr_vruntime_failed); + printf("| enq: %10llu |\n", __atomic_load_n(&nr_vruntime_enqueues, __ATOMIC_RELAXED)); + printf("| disp: %10llu |\n", __atomic_load_n(&nr_vruntime_dispatches, __ATOMIC_RELAXED)); + printf("| failed: %10llu |\n", __atomic_load_n(&nr_vruntime_failed, __ATOMIC_RELAXED)); printf("o-----------------------o\n"); printf("\n\n"); fflush(stdout); @@ -319,8 +320,6 @@ static void *run_stats_printer(void *arg) static int spawn_stats_thread(void) { - pthread_t stats_printer; - return pthread_create(&stats_printer, NULL, run_stats_printer, NULL); } @@ -375,6 +374,15 @@ static void pre_bootstrap(int argc, char **argv) static void bootstrap(char *comm) { + exit_req = 0; + min_vruntime = 0.0; + __atomic_store_n(&nr_vruntime_enqueues, 0, __ATOMIC_RELAXED); + __atomic_store_n(&nr_vruntime_dispatches, 0, __ATOMIC_RELAXED); + __atomic_store_n(&nr_vruntime_failed, 0, __ATOMIC_RELAXED); + __atomic_store_n(&nr_curr_enqueued, 0, __ATOMIC_RELAXED); + memset(tasks, 0, pid_max * sizeof(*tasks)); + LIST_INIT(&vruntime_head); + skel = SCX_OPS_OPEN(userland_ops, scx_userland); skel->rodata->num_possible_cpus = libbpf_num_possible_cpus(); @@ -428,6 +436,7 @@ restart: exit_req = 1; bpf_link__destroy(ops_link); + pthread_join(stats_printer, NULL); ecode = UEI_REPORT(skel, uei); scx_userland__destroy(skel); diff --git a/tools/testing/selftests/sched_ext/rt_stall.c b/tools/testing/selftests/sched_ext/rt_stall.c index 015200f80f6e..ab772e336f86 100644 --- a/tools/testing/selftests/sched_ext/rt_stall.c +++ b/tools/testing/selftests/sched_ext/rt_stall.c @@ -23,6 +23,30 @@ #define CORE_ID 0 /* CPU to pin tasks to */ #define RUN_TIME 5 /* How long to run the test in seconds */ +/* Signal the parent that setup is complete by writing to a pipe */ +static void signal_ready(int fd) +{ + char c = 1; + + if (write(fd, &c, 1) != 1) { + perror("write to ready pipe"); + exit(EXIT_FAILURE); + } + close(fd); +} + +/* Wait for a child to signal readiness via a pipe */ +static void wait_ready(int fd) +{ + char c; + + if (read(fd, &c, 1) != 1) { + perror("read from ready pipe"); + exit(EXIT_FAILURE); + } + close(fd); +} + /* Simple busy-wait function for test tasks */ static void process_func(void) { @@ -122,14 +146,24 @@ static bool sched_stress_test(bool is_ext) float ext_runtime, rt_runtime, actual_ratio; int ext_pid, rt_pid; + int ext_ready[2], rt_ready[2]; ksft_print_header(); ksft_set_plan(1); + if (pipe(ext_ready) || pipe(rt_ready)) { + perror("pipe"); + ksft_exit_fail(); + } + /* Create and set up a EXT task */ ext_pid = fork(); if (ext_pid == 0) { + close(ext_ready[0]); + close(rt_ready[0]); + close(rt_ready[1]); set_affinity(CORE_ID); + signal_ready(ext_ready[1]); process_func(); exit(0); } else if (ext_pid < 0) { @@ -140,8 +174,12 @@ static bool sched_stress_test(bool is_ext) /* Create an RT task */ rt_pid = fork(); if (rt_pid == 0) { + close(ext_ready[0]); + close(ext_ready[1]); + close(rt_ready[0]); set_affinity(CORE_ID); set_sched(SCHED_FIFO, 50); + signal_ready(rt_ready[1]); process_func(); exit(0); } else if (rt_pid < 0) { @@ -149,6 +187,17 @@ static bool sched_stress_test(bool is_ext) ksft_exit_fail(); } + /* + * Wait for both children to complete their setup (affinity and + * scheduling policy) before starting the measurement window. + * This prevents flaky failures caused by the RT child's setup + * time eating into the measurement period. + */ + close(ext_ready[1]); + close(rt_ready[1]); + wait_ready(ext_ready[0]); + wait_ready(rt_ready[0]); + /* Let the processes run for the specified time */ sleep(RUN_TIME); |
