diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-28 16:26:11 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-28 16:26:11 -0700 |
| commit | 664f0f6be37ce4ef80992cf2ed74761cd5bbe207 (patch) | |
| tree | 7e2714963b58fa6732df54be5685588158ec2a51 /tools/testing | |
| parent | dca922e019dd758b4c1b4bec8f1d509efddeaab4 (diff) | |
| parent | d99f7a32f09dccbe396187370ec1a74a31b73d7e (diff) | |
Merge tag 'sched_ext-for-7.1-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fixes from Tejun Heo:
"The merge window pulled in the cgroup sub-scheduler infrastructure,
and new AI reviews are accelerating bug reporting and fixing - hence
the larger than usual fixes batch:
- Use-after-frees during scheduler load/unload:
- The disable path could free the BPF scheduler while deferred
irq_work / kthread work was still in flight
- cgroup setter callbacks read the active scheduler outside the
rwsem that synchronizes against teardown
Fix both, and reuse the disable drain in the enable error paths so
the BPF JIT page can't be freed under live callbacks.
- Several BPF op invocations didn't tell the framework which runqueue
was already locked, so helper kfuncs that re-acquire the runqueue
by CPU could deadlock on the held lock
Fix the affected callsites, including recursive parent-into-child
dispatch.
- The hardlockup notifier ran from NMI but eventually took a
non-NMI-safe lock. Bounce it through irq_work.
- A handful of bugs in the new sub-scheduler hierarchy:
- helper kfuncs hard-coded the root instead of resolving the
caller's scheduler
- the enable error path tried to disable per-task state that had
never been initialized, and leaked cpus_read_lock on the way
out
- a sysfs object was leaked on every load/unload
- the dispatch fast-path used the root scheduler instead of the
task's
- a couple of CONFIG #ifdef guards were misclassified
- Verifier-time hardening: BPF programs of unrelated struct_ops types
(e.g. tcp_congestion_ops) could call sched_ext kfuncs - a semantic
bug and, once sub-sched was enabled, a KASAN out-of-bounds read.
Now rejected at load. Plus a few NULL and cross-task argument
checks on sched_ext kfuncs, and a selftest covering the new deny.
- rhashtable (Herbert): restore the insecure_elasticity toggle and
bounce the deferred-resize kick through irq_work to break a
lock-order cycle observable from raw-spinlock callers. sched_ext's
scheduler-instance hash is the first user of both.
- The bypass-mode load balancer used file-scope cpumasks; with
multiple scheduler instances now possible, those raced. Move to
per-instance cpumasks, plus a follow-up to skip tasks whose
recorded CPU is stale relative to the new owning runqueue.
- Smaller fixes:
- a dispatch queue's first-task tracking misbehaved when a parked
iterator cursor sat in the list
- the runqueue's next-class wasn't promoted on local-queue
enqueue, leaving an SCX task behind RT in edge cases
- the reference qmap scheduler stopped erroring on legitimate
cross-scheduler task-storage misses"
* tag 'sched_ext-for-7.1-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: (26 commits)
sched_ext: Fix scx_flush_disable_work() UAF race
sched_ext: Call wakeup_preempt() in local_dsq_post_enq()
sched_ext: Release cpus_read_lock on scx_link_sched() failure in root enable
sched_ext: Reject NULL-sch callers in scx_bpf_task_set_slice/dsq_vtime
sched_ext: Refuse cross-task select_cpu_from_kfunc calls
sched_ext: Align cgroup #ifdef guards with SUB_SCHED vs GROUP_SCHED
sched_ext: Make bypass LB cpumasks per-scheduler
sched_ext: Pass held rq to SCX_CALL_OP() for core_sched_before
sched_ext: Pass held rq to SCX_CALL_OP() for dump_cpu/dump_task
sched_ext: Save and restore scx_locked_rq across SCX_CALL_OP
sched_ext: Use dsq->first_task instead of list_empty() in dispatch_enqueue() FIFO-tail
sched_ext: Resolve caller's scheduler in scx_bpf_destroy_dsq() / scx_bpf_dsq_nr_queued()
sched_ext: Read scx_root under scx_cgroup_ops_rwsem in cgroup setters
sched_ext: Don't disable tasks in scx_sub_enable_workfn() abort path
sched_ext: Skip tasks with stale task_rq in bypass_lb_cpu()
sched_ext: Guard scx_dsq_move() against NULL kit->dsq after failed iter_new
sched_ext: Unregister sub_kset on scheduler disable
sched_ext: Defer scx_hardlockup() out of NMI
sched_ext: sync disable_irq_work in bpf_scx_unreg()
sched_ext: Fix local_dsq_post_enq() to use task's scheduler in sub-sched
...
Diffstat (limited to 'tools/testing')
| -rw-r--r-- | tools/testing/selftests/sched_ext/Makefile | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c | 44 | ||||
| -rw-r--r-- | tools/testing/selftests/sched_ext/non_scx_kfunc_deny.c | 47 |
3 files changed, 92 insertions, 0 deletions
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 789037be44c7..5d2dffca0e91 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -175,6 +175,7 @@ auto-test-targets := \ maximal \ maybe_null \ minimal \ + non_scx_kfunc_deny \ numa \ allowed_cpus \ peek_dsq \ diff --git a/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c b/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c new file mode 100644 index 000000000000..9f16d39255e7 --- /dev/null +++ b/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Verify that context-sensitive SCX kfuncs (even "unlocked" ones) are + * restricted to only SCX struct_ops programs. Non-SCX struct_ops programs, + * such as TCP congestion control programs, should be rejected by the BPF + * verifier when attempting to call these kfuncs. + * + * Copyright (C) 2026 Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw> + * Copyright (C) 2026 Cheng-Yang Chou <yphbchou0911@gmail.com> + */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* SCX kfunc from scx_kfunc_ids_any set */ +void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym; + +SEC("struct_ops/ssthresh") +__u32 BPF_PROG(tcp_ca_ssthresh, struct sock *sk) +{ + /* + * This call should be rejected by the verifier because this is a + * TCP congestion control program (non-SCX struct_ops). + */ + scx_bpf_kick_cpu(0, 0); + return 2; +} + +SEC("struct_ops/cong_avoid") +void BPF_PROG(tcp_ca_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) {} + +SEC("struct_ops/undo_cwnd") +__u32 BPF_PROG(tcp_ca_undo_cwnd, struct sock *sk) { return 2; } + +SEC(".struct_ops") +struct tcp_congestion_ops tcp_non_scx_ca = { + .ssthresh = (void *)tcp_ca_ssthresh, + .cong_avoid = (void *)tcp_ca_cong_avoid, + .undo_cwnd = (void *)tcp_ca_undo_cwnd, + .name = "tcp_kfunc_deny", +}; + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.c b/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.c new file mode 100644 index 000000000000..1c031575fb87 --- /dev/null +++ b/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.c @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Verify that context-sensitive SCX kfuncs (even "unlocked" ones) are + * restricted to only SCX struct_ops programs. Non-SCX struct_ops programs, + * such as TCP congestion control programs, should be rejected by the BPF + * verifier when attempting to call these kfuncs. + * + * Copyright (C) 2026 Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw> + * Copyright (C) 2026 Cheng-Yang Chou <yphbchou0911@gmail.com> + */ + +#include <bpf/bpf.h> +#include <scx/common.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include "non_scx_kfunc_deny.bpf.skel.h" +#include "scx_test.h" + +static enum scx_test_status run(void *ctx) +{ + struct non_scx_kfunc_deny *skel; + int err; + + skel = non_scx_kfunc_deny__open(); + if (!skel) { + SCX_ERR("Failed to open skel"); + return SCX_TEST_FAIL; + } + + err = non_scx_kfunc_deny__load(skel); + non_scx_kfunc_deny__destroy(skel); + + if (err == 0) { + SCX_ERR("non-SCX BPF program loaded when it should have been rejected"); + return SCX_TEST_FAIL; + } + + return SCX_TEST_PASS; +} + +struct scx_test non_scx_kfunc_deny = { + .name = "non_scx_kfunc_deny", + .description = "Verify that non-SCX struct_ops programs cannot call SCX kfuncs", + .run = run, +}; +REGISTER_SCX_TEST(&non_scx_kfunc_deny) |
