summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-21 09:38:59 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-21 09:38:59 -0800
commit4cf44657887b4c41374981d0afb2ca302b189e15 (patch)
treef7bd3423c50ce9843335994b460969c32edac099 /tools
parent8eb604d4ee8bf6183b00b8a96f0007b1be28ca9d (diff)
parent640c9dc72f21f325700a4b0f839ad568ff21c697 (diff)
Merge tag 'sched_ext-for-7.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fixes from Tejun Heo: - Various bug fixes for the example schedulers and selftests * tag 'sched_ext-for-7.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: tools/sched_ext: fix getopt not re-parsed on restart tools/sched_ext: scx_userland: fix data races on shared counters tools/sched_ext: scx_pair: fix stride == 0 crash on single-CPU systems tools/sched_ext: scx_central: fix CPU_SET and skeleton leak on early exit tools/sched_ext: scx_userland: fix stale data on restart tools/sched_ext: scx_flatcg: fix potential stack overflow from VLA in fcg_read_stats selftests/sched_ext: Fix rt_stall flaky failure tools/sched_ext: scx_userland: fix restart and stats thread lifecycle bugs tools/sched_ext: scx_central: fix sched_setaffinity() call with the set size tools/sched_ext: scx_flatcg: zero-initialize stats counter array
Diffstat (limited to 'tools')
-rw-r--r--tools/sched_ext/scx_central.c10
-rw-r--r--tools/sched_ext/scx_cpu0.c1
-rw-r--r--tools/sched_ext/scx_flatcg.c13
-rw-r--r--tools/sched_ext/scx_pair.c8
-rw-r--r--tools/sched_ext/scx_sdt.c1
-rw-r--r--tools/sched_ext/scx_simple.c1
-rw-r--r--tools/sched_ext/scx_userland.c31
-rw-r--r--tools/testing/selftests/sched_ext/rt_stall.c49
8 files changed, 96 insertions, 18 deletions
diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c
index 55931a4cd71c..2a805f1d6c8f 100644
--- a/tools/sched_ext/scx_central.c
+++ b/tools/sched_ext/scx_central.c
@@ -50,11 +50,13 @@ int main(int argc, char **argv)
__u64 seq = 0, ecode;
__s32 opt;
cpu_set_t *cpuset;
+ size_t cpuset_size;
libbpf_set_print(libbpf_print_fn);
signal(SIGINT, sigint_handler);
signal(SIGTERM, sigint_handler);
restart:
+ optind = 1;
skel = SCX_OPS_OPEN(central_ops, scx_central);
skel->rodata->central_cpu = 0;
@@ -73,6 +75,7 @@ restart:
u32 central_cpu = strtoul(optarg, NULL, 0);
if (central_cpu >= skel->rodata->nr_cpu_ids) {
fprintf(stderr, "invalid central CPU id value, %u given (%u max)\n", central_cpu, skel->rodata->nr_cpu_ids);
+ scx_central__destroy(skel);
return -1;
}
skel->rodata->central_cpu = (s32)central_cpu;
@@ -106,9 +109,10 @@ restart:
*/
cpuset = CPU_ALLOC(skel->rodata->nr_cpu_ids);
SCX_BUG_ON(!cpuset, "Failed to allocate cpuset");
- CPU_ZERO_S(CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids), cpuset);
- CPU_SET(skel->rodata->central_cpu, cpuset);
- SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset),
+ cpuset_size = CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids);
+ CPU_ZERO_S(cpuset_size, cpuset);
+ CPU_SET_S(skel->rodata->central_cpu, cpuset_size, cpuset);
+ SCX_BUG_ON(sched_setaffinity(0, cpuset_size, cpuset),
"Failed to affinitize to central CPU %d (max %d)",
skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1);
CPU_FREE(cpuset);
diff --git a/tools/sched_ext/scx_cpu0.c b/tools/sched_ext/scx_cpu0.c
index 1e4fa4ab8da9..a6fba9978b9c 100644
--- a/tools/sched_ext/scx_cpu0.c
+++ b/tools/sched_ext/scx_cpu0.c
@@ -69,6 +69,7 @@ int main(int argc, char **argv)
signal(SIGINT, sigint_handler);
signal(SIGTERM, sigint_handler);
restart:
+ optind = 1;
skel = SCX_OPS_OPEN(cpu0_ops, scx_cpu0);
skel->rodata->nr_cpus = libbpf_num_possible_cpus();
diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c
index cd85eb401179..d865c381589b 100644
--- a/tools/sched_ext/scx_flatcg.c
+++ b/tools/sched_ext/scx_flatcg.c
@@ -102,21 +102,27 @@ static float read_cpu_util(__u64 *last_sum, __u64 *last_idle)
static void fcg_read_stats(struct scx_flatcg *skel, __u64 *stats)
{
- __u64 cnts[FCG_NR_STATS][skel->rodata->nr_cpus];
+ __u64 *cnts;
__u32 idx;
+ cnts = calloc(skel->rodata->nr_cpus, sizeof(__u64));
+ if (!cnts)
+ return;
+
memset(stats, 0, sizeof(stats[0]) * FCG_NR_STATS);
for (idx = 0; idx < FCG_NR_STATS; idx++) {
int ret, cpu;
ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats),
- &idx, cnts[idx]);
+ &idx, cnts);
if (ret < 0)
continue;
for (cpu = 0; cpu < skel->rodata->nr_cpus; cpu++)
- stats[idx] += cnts[idx][cpu];
+ stats[idx] += cnts[cpu];
}
+
+ free(cnts);
}
int main(int argc, char **argv)
@@ -135,6 +141,7 @@ int main(int argc, char **argv)
signal(SIGINT, sigint_handler);
signal(SIGTERM, sigint_handler);
restart:
+ optind = 1;
skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
skel->rodata->nr_cpus = libbpf_num_possible_cpus();
diff --git a/tools/sched_ext/scx_pair.c b/tools/sched_ext/scx_pair.c
index d3e97faa6334..2e509391f3da 100644
--- a/tools/sched_ext/scx_pair.c
+++ b/tools/sched_ext/scx_pair.c
@@ -53,10 +53,10 @@ int main(int argc, char **argv)
signal(SIGINT, sigint_handler);
signal(SIGTERM, sigint_handler);
restart:
+ optind = 1;
skel = SCX_OPS_OPEN(pair_ops, scx_pair);
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
- assert(skel->rodata->nr_cpu_ids > 0);
skel->rodata->pair_batch_dur_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
/* pair up the earlier half to the latter by default, override with -s */
@@ -76,6 +76,12 @@ restart:
}
}
+ /* Stride must be positive to pair distinct CPUs. */
+ if (stride <= 0) {
+ fprintf(stderr, "Invalid stride %d, must be positive\n", stride);
+ scx_pair__destroy(skel);
+ return -1;
+ }
bpf_map__set_max_entries(skel->maps.pair_ctx, skel->rodata->nr_cpu_ids / 2);
/* Resize arrays so their element count is equal to cpu count. */
diff --git a/tools/sched_ext/scx_sdt.c b/tools/sched_ext/scx_sdt.c
index b0363363476d..d8ca9aa316a5 100644
--- a/tools/sched_ext/scx_sdt.c
+++ b/tools/sched_ext/scx_sdt.c
@@ -51,6 +51,7 @@ int main(int argc, char **argv)
signal(SIGINT, sigint_handler);
signal(SIGTERM, sigint_handler);
restart:
+ optind = 1;
skel = SCX_OPS_OPEN(sdt_ops, scx_sdt);
while ((opt = getopt(argc, argv, "fvh")) != -1) {
diff --git a/tools/sched_ext/scx_simple.c b/tools/sched_ext/scx_simple.c
index 06d4b13bf76b..c3b48611712b 100644
--- a/tools/sched_ext/scx_simple.c
+++ b/tools/sched_ext/scx_simple.c
@@ -71,6 +71,7 @@ int main(int argc, char **argv)
signal(SIGINT, sigint_handler);
signal(SIGTERM, sigint_handler);
restart:
+ optind = 1;
skel = SCX_OPS_OPEN(simple_ops, scx_simple);
while ((opt = getopt(argc, argv, "fvh")) != -1) {
diff --git a/tools/sched_ext/scx_userland.c b/tools/sched_ext/scx_userland.c
index 10b31020f44f..3f2aba658b4a 100644
--- a/tools/sched_ext/scx_userland.c
+++ b/tools/sched_ext/scx_userland.c
@@ -54,6 +54,7 @@ static bool verbose;
static volatile int exit_req;
static int enqueued_fd, dispatched_fd;
+static pthread_t stats_printer;
static struct scx_userland *skel;
static struct bpf_link *ops_link;
@@ -156,9 +157,9 @@ static int dispatch_task(__s32 pid)
err = bpf_map_update_elem(dispatched_fd, NULL, &pid, 0);
if (err) {
- nr_vruntime_failed++;
+ __atomic_add_fetch(&nr_vruntime_failed, 1, __ATOMIC_RELAXED);
} else {
- nr_vruntime_dispatches++;
+ __atomic_add_fetch(&nr_vruntime_dispatches, 1, __ATOMIC_RELAXED);
}
return err;
@@ -201,8 +202,8 @@ static int vruntime_enqueue(const struct scx_userland_enqueued_task *bpf_task)
return ENOENT;
update_enqueued(curr, bpf_task);
- nr_vruntime_enqueues++;
- nr_curr_enqueued++;
+ __atomic_add_fetch(&nr_vruntime_enqueues, 1, __ATOMIC_RELAXED);
+ __atomic_add_fetch(&nr_curr_enqueued, 1, __ATOMIC_RELAXED);
/*
* Enqueue the task in a vruntime-sorted list. A more optimal data
@@ -278,9 +279,9 @@ static void dispatch_batch(void)
LIST_INSERT_HEAD(&vruntime_head, task, entries);
break;
}
- nr_curr_enqueued--;
+ __atomic_sub_fetch(&nr_curr_enqueued, 1, __ATOMIC_RELAXED);
}
- skel->bss->nr_scheduled = nr_curr_enqueued;
+ skel->bss->nr_scheduled = __atomic_load_n(&nr_curr_enqueued, __ATOMIC_RELAXED);
}
static void *run_stats_printer(void *arg)
@@ -305,9 +306,9 @@ static void *run_stats_printer(void *arg)
printf("|-----------------------|\n");
printf("| VRUNTIME / USER |\n");
printf("|-----------------------|\n");
- printf("| enq: %10llu |\n", nr_vruntime_enqueues);
- printf("| disp: %10llu |\n", nr_vruntime_dispatches);
- printf("| failed: %10llu |\n", nr_vruntime_failed);
+ printf("| enq: %10llu |\n", __atomic_load_n(&nr_vruntime_enqueues, __ATOMIC_RELAXED));
+ printf("| disp: %10llu |\n", __atomic_load_n(&nr_vruntime_dispatches, __ATOMIC_RELAXED));
+ printf("| failed: %10llu |\n", __atomic_load_n(&nr_vruntime_failed, __ATOMIC_RELAXED));
printf("o-----------------------o\n");
printf("\n\n");
fflush(stdout);
@@ -319,8 +320,6 @@ static void *run_stats_printer(void *arg)
static int spawn_stats_thread(void)
{
- pthread_t stats_printer;
-
return pthread_create(&stats_printer, NULL, run_stats_printer, NULL);
}
@@ -375,6 +374,15 @@ static void pre_bootstrap(int argc, char **argv)
static void bootstrap(char *comm)
{
+ exit_req = 0;
+ min_vruntime = 0.0;
+ __atomic_store_n(&nr_vruntime_enqueues, 0, __ATOMIC_RELAXED);
+ __atomic_store_n(&nr_vruntime_dispatches, 0, __ATOMIC_RELAXED);
+ __atomic_store_n(&nr_vruntime_failed, 0, __ATOMIC_RELAXED);
+ __atomic_store_n(&nr_curr_enqueued, 0, __ATOMIC_RELAXED);
+ memset(tasks, 0, pid_max * sizeof(*tasks));
+ LIST_INIT(&vruntime_head);
+
skel = SCX_OPS_OPEN(userland_ops, scx_userland);
skel->rodata->num_possible_cpus = libbpf_num_possible_cpus();
@@ -428,6 +436,7 @@ restart:
exit_req = 1;
bpf_link__destroy(ops_link);
+ pthread_join(stats_printer, NULL);
ecode = UEI_REPORT(skel, uei);
scx_userland__destroy(skel);
diff --git a/tools/testing/selftests/sched_ext/rt_stall.c b/tools/testing/selftests/sched_ext/rt_stall.c
index 015200f80f6e..ab772e336f86 100644
--- a/tools/testing/selftests/sched_ext/rt_stall.c
+++ b/tools/testing/selftests/sched_ext/rt_stall.c
@@ -23,6 +23,30 @@
#define CORE_ID 0 /* CPU to pin tasks to */
#define RUN_TIME 5 /* How long to run the test in seconds */
+/* Signal the parent that setup is complete by writing to a pipe */
+static void signal_ready(int fd)
+{
+ char c = 1;
+
+ if (write(fd, &c, 1) != 1) {
+ perror("write to ready pipe");
+ exit(EXIT_FAILURE);
+ }
+ close(fd);
+}
+
+/* Wait for a child to signal readiness via a pipe */
+static void wait_ready(int fd)
+{
+ char c;
+
+ if (read(fd, &c, 1) != 1) {
+ perror("read from ready pipe");
+ exit(EXIT_FAILURE);
+ }
+ close(fd);
+}
+
/* Simple busy-wait function for test tasks */
static void process_func(void)
{
@@ -122,14 +146,24 @@ static bool sched_stress_test(bool is_ext)
float ext_runtime, rt_runtime, actual_ratio;
int ext_pid, rt_pid;
+ int ext_ready[2], rt_ready[2];
ksft_print_header();
ksft_set_plan(1);
+ if (pipe(ext_ready) || pipe(rt_ready)) {
+ perror("pipe");
+ ksft_exit_fail();
+ }
+
/* Create and set up a EXT task */
ext_pid = fork();
if (ext_pid == 0) {
+ close(ext_ready[0]);
+ close(rt_ready[0]);
+ close(rt_ready[1]);
set_affinity(CORE_ID);
+ signal_ready(ext_ready[1]);
process_func();
exit(0);
} else if (ext_pid < 0) {
@@ -140,8 +174,12 @@ static bool sched_stress_test(bool is_ext)
/* Create an RT task */
rt_pid = fork();
if (rt_pid == 0) {
+ close(ext_ready[0]);
+ close(ext_ready[1]);
+ close(rt_ready[0]);
set_affinity(CORE_ID);
set_sched(SCHED_FIFO, 50);
+ signal_ready(rt_ready[1]);
process_func();
exit(0);
} else if (rt_pid < 0) {
@@ -149,6 +187,17 @@ static bool sched_stress_test(bool is_ext)
ksft_exit_fail();
}
+ /*
+ * Wait for both children to complete their setup (affinity and
+ * scheduling policy) before starting the measurement window.
+ * This prevents flaky failures caused by the RT child's setup
+ * time eating into the measurement period.
+ */
+ close(ext_ready[1]);
+ close(rt_ready[1]);
+ wait_ready(ext_ready[0]);
+ wait_ready(rt_ready[0]);
+
/* Let the processes run for the specified time */
sleep(RUN_TIME);