summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-23 13:36:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-23 13:36:09 -0700
commit7603d8e78023e5883e075b4625fbdf059c6384f7 (patch)
treebe2df135ed8bacad0a458e1ee01401db028945a3
parenta1a8bab74176eed204a3139ab7ad840caa3d73b8 (diff)
parent4437ad129cf5b37c00a5bc9fa5989d1da4d64d07 (diff)
Merge tag 'sched_ext-for-7.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext tree reorg from Tejun Heo: "Pure source reorganization with no functional change: - the kernel/sched/ext* files move into a new kernel/sched/ext/ subdirectory - the headers and sources are made self-contained so editor tooling can parse each file on its own" * tag 'sched_ext-for-7.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: sched_ext: Move shared helpers from ext.c into internal.h and cid.h sched_ext: Make kernel/sched/ext/ sources self-contained for clangd sched_ext: Move sources under kernel/sched/ext/
-rw-r--r--Documentation/scheduler/sched-ext.rst8
-rw-r--r--MAINTAINERS2
-rw-r--r--kernel/sched/build_policy.c18
-rw-r--r--kernel/sched/ext/arena.c (renamed from kernel/sched/ext_arena.c)4
-rw-r--r--kernel/sched/ext/arena.h (renamed from kernel/sched/ext_arena.h)2
-rw-r--r--kernel/sched/ext/cid.c (renamed from kernel/sched/ext_cid.c)5
-rw-r--r--kernel/sched/ext/cid.h (renamed from kernel/sched/ext_cid.h)25
-rw-r--r--kernel/sched/ext/ext.c (renamed from kernel/sched/ext.c)158
-rw-r--r--kernel/sched/ext/ext.h (renamed from kernel/sched/ext.h)0
-rw-r--r--kernel/sched/ext/idle.c (renamed from kernel/sched/ext_idle.c)3
-rw-r--r--kernel/sched/ext/idle.h (renamed from kernel/sched/ext_idle.h)4
-rw-r--r--kernel/sched/ext/internal.h (renamed from kernel/sched/ext_internal.h)129
-rw-r--r--kernel/sched/ext/types.h (renamed from kernel/sched/ext_types.h)6
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--tools/sched_ext/include/scx/cid.bpf.h6
15 files changed, 209 insertions, 163 deletions
diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst
index c4f59c08d8a4..4b1ffd03f516 100644
--- a/Documentation/scheduler/sched-ext.rst
+++ b/Documentation/scheduler/sched-ext.rst
@@ -114,7 +114,7 @@ counters. Each counter occupies one ``name value`` line:
SCX_EV_INSERT_NOT_OWNED 0
SCX_EV_SUB_BYPASS_DISPATCH 0
-The counters are described in ``kernel/sched/ext_internal.h``; briefly:
+The counters are described in ``kernel/sched/ext/internal.h``; briefly:
* ``SCX_EV_SELECT_CPU_FALLBACK``: ops.select_cpu() returned a CPU unusable by
the task and the core scheduler silently picked a fallback CPU.
@@ -496,11 +496,11 @@ Where to Look
* ``include/linux/sched/ext.h`` defines the core data structures, ops table
and constants.
-* ``kernel/sched/ext.c`` contains sched_ext core implementation and helpers.
+* ``kernel/sched/ext/ext.c`` contains sched_ext core implementation and helpers.
The functions prefixed with ``scx_bpf_`` can be called from the BPF
scheduler.
-* ``kernel/sched/ext_idle.c`` contains the built-in idle CPU selection policy.
+* ``kernel/sched/ext/idle.c`` contains the built-in idle CPU selection policy.
* ``tools/sched_ext/`` hosts example BPF scheduler implementations.
@@ -557,7 +557,7 @@ ABI Instability
The APIs provided by sched_ext to BPF schedulers programs have no stability
guarantees. This includes the ops table callbacks and constants defined in
``include/linux/sched/ext.h``, as well as the ``scx_bpf_`` kfuncs defined in
-``kernel/sched/ext.c`` and ``kernel/sched/ext_idle.c``.
+``kernel/sched/ext/ext.c`` and ``kernel/sched/ext/idle.c``.
While we will attempt to provide a relatively stable API surface when
possible, they are subject to change without warning between kernel
diff --git a/MAINTAINERS b/MAINTAINERS
index 4b32a2a7443b..8cf73cf798b2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -24204,7 +24204,7 @@ S: Maintained
W: https://github.com/sched-ext/scx
T: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git
F: include/linux/sched/ext.h
-F: kernel/sched/ext*
+F: kernel/sched/ext/
F: tools/sched_ext/
F: tools/testing/selftests/sched_ext
diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c
index 067979a7b69e..d74b54f81992 100644
--- a/kernel/sched/build_policy.c
+++ b/kernel/sched/build_policy.c
@@ -61,15 +61,15 @@
# include <linux/btf_ids.h>
# include <linux/find.h>
# include <linux/genalloc.h>
-# include "ext_types.h"
-# include "ext_internal.h"
-# include "ext_cid.h"
-# include "ext_arena.h"
-# include "ext_idle.h"
-# include "ext.c"
-# include "ext_cid.c"
-# include "ext_arena.c"
-# include "ext_idle.c"
+# include "ext/types.h"
+# include "ext/internal.h"
+# include "ext/cid.h"
+# include "ext/arena.h"
+# include "ext/idle.h"
+# include "ext/ext.c"
+# include "ext/cid.c"
+# include "ext/arena.c"
+# include "ext/idle.c"
#endif
#include "syscalls.c"
diff --git a/kernel/sched/ext_arena.c b/kernel/sched/ext/arena.c
index 493c2424f842..5783694ec21d 100644
--- a/kernel/sched/ext_arena.c
+++ b/kernel/sched/ext/arena.c
@@ -15,6 +15,10 @@
* Copyright (c) 2026 Meta Platforms, Inc. and affiliates.
* Copyright (c) 2026 Tejun Heo <tj@kernel.org>
*/
+#include <linux/genalloc.h>
+
+#include "internal.h"
+#include "arena.h"
enum scx_arena_consts {
SCX_ARENA_MIN_ORDER = 3, /* 8-byte minimum sub-allocation */
diff --git a/kernel/sched/ext_arena.h b/kernel/sched/ext/arena.h
index 4f3610160102..c378ae5fbc02 100644
--- a/kernel/sched/ext_arena.h
+++ b/kernel/sched/ext/arena.h
@@ -8,6 +8,8 @@
#ifndef _KERNEL_SCHED_EXT_ARENA_H
#define _KERNEL_SCHED_EXT_ARENA_H
+#include <linux/types.h>
+
struct scx_sched;
s32 scx_arena_pool_init(struct scx_sched *sch);
diff --git a/kernel/sched/ext_cid.c b/kernel/sched/ext/cid.c
index 66944a7ef79d..af83084ec740 100644
--- a/kernel/sched/ext_cid.c
+++ b/kernel/sched/ext/cid.c
@@ -7,6 +7,9 @@
*/
#include <linux/cacheinfo.h>
+#include "internal.h"
+#include "cid.h"
+
/*
* cid tables.
*
@@ -71,7 +74,7 @@ static s32 scx_cid_arrays_alloc(void)
* scx_cid_init - build the cid mapping
* @sch: the scx_sched being initialized; used as the scx_error() target
*
- * See "Topological CPU IDs" in ext_cid.h for the model. Walk online cpus by
+ * See "Topological CPU IDs" in cid.h for the model. Walk online cpus by
* intersection at each level (parent_scratch & this_level_mask), which keeps
* containment correct by construction and naturally splits a physical LLC
* straddling two NUMA nodes into two LLC units. The caller must hold
diff --git a/kernel/sched/ext_cid.h b/kernel/sched/ext/cid.h
index 5745e5785e89..9c4f4b907f12 100644
--- a/kernel/sched/ext_cid.h
+++ b/kernel/sched/ext/cid.h
@@ -33,6 +33,8 @@
#ifndef _KERNEL_SCHED_EXT_CID_H
#define _KERNEL_SCHED_EXT_CID_H
+#include "internal.h"
+
struct scx_sched;
/*
@@ -43,7 +45,7 @@ struct scx_sched;
* possible-but-not-online cpus and carries all-(-1) topo info (see
* scx_cid_topo); callers detect it via the -1 sentinels.
*
- * See the comment above the table definitions in ext_cid.c for the
+ * See the comment above the table definitions in cid.c for the
* memory-ordering and visibility contract.
*/
extern s16 *scx_cid_to_cpu_tbl;
@@ -268,4 +270,25 @@ static inline u32 scx_cmask_nr_used_words(const struct scx_cmask *m)
__w && ((cid) = __bs + __wi * 64 + __ffs64(__w), true); \
__w &= __w - 1)
+/*
+ * scx_cpu_arg() wraps a cpu arg being handed to an SCX op. For cid-form
+ * schedulers it resolves to the matching cid; for cpu-form it passes @cpu
+ * through. scx_cpu_ret() is the inverse for a cpu/cid returned from an op
+ * (currently only ops.select_cpu); it validates the BPF-supplied cid and
+ * triggers scx_error() on @sch if invalid.
+ */
+static inline s32 scx_cpu_arg(s32 cpu)
+{
+ if (scx_is_cid_type())
+ return __scx_cpu_to_cid(cpu);
+ return cpu;
+}
+
+static inline s32 scx_cpu_ret(struct scx_sched *sch, s32 cpu_or_cid)
+{
+ if (cpu_or_cid < 0 || !scx_is_cid_type())
+ return cpu_or_cid;
+ return scx_cid_to_cpu(sch, cpu_or_cid);
+}
+
#endif /* _KERNEL_SCHED_EXT_CID_H */
diff --git a/kernel/sched/ext.c b/kernel/sched/ext/ext.c
index 0db6fa2daea3..691d53fe0f64 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext/ext.c
@@ -6,6 +6,19 @@
* Copyright (c) 2022 Tejun Heo <tj@kernel.org>
* Copyright (c) 2022 David Vernet <dvernet@meta.com>
*/
+#include <linux/bitmap.h>
+#include <linux/btf_ids.h>
+#include <linux/rhashtable.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/isolation.h>
+#include <linux/suspend.h>
+#include <linux/sysrq.h>
+
+#include "../pelt.h"
+#include "internal.h"
+#include "cid.h"
+#include "arena.h"
+#include "idle.h"
static DEFINE_RAW_SPINLOCK(scx_sched_lock);
@@ -246,8 +259,6 @@ __printf(5, 6) bool __scx_exit(struct scx_sched *sch,
return ret;
}
-#define SCX_HAS_OP(sch, op) test_bit(SCX_OP_IDX(op), (sch)->has_op)
-
static long jiffies_delta_msecs(unsigned long at, unsigned long now)
{
if (time_after(at, now))
@@ -263,20 +274,6 @@ static bool u32_before(u32 a, u32 b)
#ifdef CONFIG_EXT_SUB_SCHED
/**
- * scx_parent - Find the parent sched
- * @sch: sched to find the parent of
- *
- * Returns the parent scheduler or %NULL if @sch is root.
- */
-static struct scx_sched *scx_parent(struct scx_sched *sch)
-{
- if (sch->level)
- return sch->ancestors[sch->level - 1];
- else
- return NULL;
-}
-
-/**
* scx_next_descendant_pre - find the next descendant for pre-order walk
* @pos: the current position (%NULL to initiate traversal)
* @root: sched whose descendants to walk
@@ -323,7 +320,6 @@ static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch)
rcu_assign_pointer(p->scx.sched, sch);
}
#else /* CONFIG_EXT_SUB_SCHED */
-static inline struct scx_sched *scx_parent(struct scx_sched *sch) { return NULL; }
static inline struct scx_sched *scx_next_descendant_pre(struct scx_sched *pos, struct scx_sched *root) { return pos ? NULL : root; }
static inline void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch) {}
#endif /* CONFIG_EXT_SUB_SCHED */
@@ -483,123 +479,12 @@ static bool rq_is_open(struct rq *rq, u64 enq_flags)
*/
DEFINE_PER_CPU(struct rq *, scx_locked_rq_state);
-static inline void update_locked_rq(struct rq *rq)
-{
- /*
- * Check whether @rq is actually locked. This can help expose bugs
- * or incorrect assumptions about the context in which a kfunc or
- * callback is executed.
- */
- if (rq)
- lockdep_assert_rq_held(rq);
- __this_cpu_write(scx_locked_rq_state, rq);
-}
-
-/*
- * SCX ops can recurse via scx_bpf_sub_dispatch() - the inner call must not
- * clobber the outer's scx_locked_rq_state. Save it on entry, restore on exit.
- */
-#define SCX_CALL_OP(sch, op, locked_rq, args...) \
-do { \
- struct rq *__prev_locked_rq; \
- \
- if (locked_rq) { \
- __prev_locked_rq = scx_locked_rq(); \
- update_locked_rq(locked_rq); \
- } \
- (sch)->ops.op(args); \
- if (locked_rq) \
- update_locked_rq(__prev_locked_rq); \
-} while (0)
-
/*
- * Flipped on enable per sch->is_cid_type. Declared in ext_internal.h so
+ * Flipped on enable per sch->is_cid_type. Declared in internal.h so
* subsystem inlines can read it.
*/
DEFINE_STATIC_KEY_FALSE(__scx_is_cid_type);
-/*
- * scx_cpu_arg() wraps a cpu arg being handed to an SCX op. For cid-form
- * schedulers it resolves to the matching cid; for cpu-form it passes @cpu
- * through. scx_cpu_ret() is the inverse for a cpu/cid returned from an op
- * (currently only ops.select_cpu); it validates the BPF-supplied cid and
- * triggers scx_error() on @sch if invalid.
- */
-static s32 scx_cpu_arg(s32 cpu)
-{
- if (scx_is_cid_type())
- return __scx_cpu_to_cid(cpu);
- return cpu;
-}
-
-static s32 scx_cpu_ret(struct scx_sched *sch, s32 cpu_or_cid)
-{
- if (cpu_or_cid < 0 || !scx_is_cid_type())
- return cpu_or_cid;
- return scx_cid_to_cpu(sch, cpu_or_cid);
-}
-
-#define SCX_CALL_OP_RET(sch, op, locked_rq, args...) \
-({ \
- struct rq *__prev_locked_rq; \
- __typeof__((sch)->ops.op(args)) __ret; \
- \
- if (locked_rq) { \
- __prev_locked_rq = scx_locked_rq(); \
- update_locked_rq(locked_rq); \
- } \
- __ret = (sch)->ops.op(args); \
- if (locked_rq) \
- update_locked_rq(__prev_locked_rq); \
- __ret; \
-})
-
-/*
- * SCX_CALL_OP_TASK*() invokes an SCX op that takes one or two task arguments
- * and records them in current->scx.kf_tasks[] for the duration of the call. A
- * kfunc invoked from inside such an op can then use
- * scx_kf_arg_task_ok() to verify that its task argument is one of
- * those subject tasks.
- *
- * Every SCX_CALL_OP_TASK*() call site invokes its op with @p's rq lock held -
- * either via the @locked_rq argument here, or (for ops.select_cpu()) via @p's
- * pi_lock held by try_to_wake_up() with rq tracking via scx_rq.in_select_cpu.
- * So if kf_tasks[] is set, @p's scheduler-protected fields are stable.
- *
- * kf_tasks[] can not stack, so task-based SCX ops must not nest. The
- * WARN_ON_ONCE() in each macro catches a re-entry of any of the three variants
- * while a previous one is still in progress.
- */
-#define SCX_CALL_OP_TASK(sch, op, locked_rq, task, args...) \
-do { \
- WARN_ON_ONCE(current->scx.kf_tasks[0]); \
- current->scx.kf_tasks[0] = task; \
- SCX_CALL_OP((sch), op, locked_rq, task, ##args); \
- current->scx.kf_tasks[0] = NULL; \
-} while (0)
-
-#define SCX_CALL_OP_TASK_RET(sch, op, locked_rq, task, args...) \
-({ \
- __typeof__((sch)->ops.op(task, ##args)) __ret; \
- WARN_ON_ONCE(current->scx.kf_tasks[0]); \
- current->scx.kf_tasks[0] = task; \
- __ret = SCX_CALL_OP_RET((sch), op, locked_rq, task, ##args); \
- current->scx.kf_tasks[0] = NULL; \
- __ret; \
-})
-
-#define SCX_CALL_OP_2TASKS_RET(sch, op, locked_rq, task0, task1, args...) \
-({ \
- __typeof__((sch)->ops.op(task0, task1, ##args)) __ret; \
- WARN_ON_ONCE(current->scx.kf_tasks[0]); \
- current->scx.kf_tasks[0] = task0; \
- current->scx.kf_tasks[1] = task1; \
- __ret = SCX_CALL_OP_RET((sch), op, locked_rq, task0, task1, ##args); \
- current->scx.kf_tasks[0] = NULL; \
- current->scx.kf_tasks[1] = NULL; \
- __ret; \
-})
-
/**
* scx_call_op_set_cpumask - invoke ops.set_cpumask / ops_cid.set_cmask for @task
* @sch: scx_sched being invoked
@@ -608,7 +493,7 @@ do { \
* @cpumask: new cpumask
*
* For cid-form schedulers, translate @cpumask to a cmask via the per-cpu
- * scratch in ext_cid.c and dispatch through the ops_cid union view. Caller
+ * scratch in cid.c and dispatch through the ops_cid union view. Caller
* must hold @rq's rq lock so this_cpu_ptr is stable across the call.
*/
static inline void scx_call_op_set_cpumask(struct scx_sched *sch, struct rq *rq,
@@ -638,19 +523,6 @@ static inline void scx_call_op_set_cpumask(struct scx_sched *sch, struct rq *rq,
current->scx.kf_tasks[0] = NULL;
}
-/* see SCX_CALL_OP_TASK() */
-static __always_inline bool scx_kf_arg_task_ok(struct scx_sched *sch,
- struct task_struct *p)
-{
- if (unlikely((p != current->scx.kf_tasks[0] &&
- p != current->scx.kf_tasks[1]))) {
- scx_error(sch, "called on a task not being operated on");
- return false;
- }
-
- return true;
-}
-
enum scx_dsq_iter_flags {
/* iterate in the reverse dispatch order */
SCX_DSQ_ITER_REV = 1U << 16,
diff --git a/kernel/sched/ext.h b/kernel/sched/ext/ext.h
index 0b7fc46aee08..0b7fc46aee08 100644
--- a/kernel/sched/ext.h
+++ b/kernel/sched/ext/ext.h
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext/idle.c
index 2077373d8da3..8e8c6201b7df 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext/idle.c
@@ -9,6 +9,9 @@
* Copyright (c) 2022 David Vernet <dvernet@meta.com>
* Copyright (c) 2024 Andrea Righi <arighi@nvidia.com>
*/
+#include "internal.h"
+#include "cid.h"
+#include "idle.h"
/* Enable/disable built-in idle CPU selection policy */
static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_enabled);
diff --git a/kernel/sched/ext_idle.h b/kernel/sched/ext/idle.h
index 8d169d3bbdf9..87a0e58f1eb7 100644
--- a/kernel/sched/ext_idle.h
+++ b/kernel/sched/ext/idle.h
@@ -10,7 +10,11 @@
#ifndef _KERNEL_SCHED_EXT_IDLE_H
#define _KERNEL_SCHED_EXT_IDLE_H
+#include <linux/btf_ids.h>
+
+struct cpumask;
struct sched_ext_ops;
+struct task_struct;
extern struct btf_id_set8 scx_kfunc_ids_idle;
extern struct btf_id_set8 scx_kfunc_ids_select_cpu;
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext/internal.h
index b04701190b23..145272cb4d8a 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext/internal.h
@@ -5,6 +5,12 @@
* Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
* Copyright (c) 2025 Tejun Heo <tj@kernel.org>
*/
+#ifndef _KERNEL_SCHED_EXT_INTERNAL_H
+#define _KERNEL_SCHED_EXT_INTERNAL_H
+
+#include "../sched.h"
+#include "types.h"
+
#define SCX_OP_IDX(op) (offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void)))
#define SCX_MOFF_IDX(moff) ((moff) / sizeof(void (*)(void)))
@@ -1547,6 +1553,111 @@ static inline struct rq *scx_locked_rq(void)
return __this_cpu_read(scx_locked_rq_state);
}
+static inline void update_locked_rq(struct rq *rq)
+{
+ /*
+ * Check whether @rq is actually locked. This can help expose bugs
+ * or incorrect assumptions about the context in which a kfunc or
+ * callback is executed.
+ */
+ if (rq)
+ lockdep_assert_rq_held(rq);
+ __this_cpu_write(scx_locked_rq_state, rq);
+}
+
+#define SCX_HAS_OP(sch, op) test_bit(SCX_OP_IDX(op), (sch)->has_op)
+
+/*
+ * SCX ops can recurse via scx_bpf_sub_dispatch() - the inner call must not
+ * clobber the outer's scx_locked_rq_state. Save it on entry, restore on exit.
+ */
+#define SCX_CALL_OP(sch, op, locked_rq, args...) \
+do { \
+ struct rq *__prev_locked_rq; \
+ \
+ if (locked_rq) { \
+ __prev_locked_rq = scx_locked_rq(); \
+ update_locked_rq(locked_rq); \
+ } \
+ (sch)->ops.op(args); \
+ if (locked_rq) \
+ update_locked_rq(__prev_locked_rq); \
+} while (0)
+
+#define SCX_CALL_OP_RET(sch, op, locked_rq, args...) \
+({ \
+ struct rq *__prev_locked_rq; \
+ __typeof__((sch)->ops.op(args)) __ret; \
+ \
+ if (locked_rq) { \
+ __prev_locked_rq = scx_locked_rq(); \
+ update_locked_rq(locked_rq); \
+ } \
+ __ret = (sch)->ops.op(args); \
+ if (locked_rq) \
+ update_locked_rq(__prev_locked_rq); \
+ __ret; \
+})
+
+/*
+ * SCX_CALL_OP_TASK*() invokes an SCX op that takes one or two task arguments
+ * and records them in current->scx.kf_tasks[] for the duration of the call. A
+ * kfunc invoked from inside such an op can then use
+ * scx_kf_arg_task_ok() to verify that its task argument is one of
+ * those subject tasks.
+ *
+ * Every SCX_CALL_OP_TASK*() call site invokes its op with @p's rq lock held -
+ * either via the @locked_rq argument here, or (for ops.select_cpu()) via @p's
+ * pi_lock held by try_to_wake_up() with rq tracking via scx_rq.in_select_cpu.
+ * So if kf_tasks[] is set, @p's scheduler-protected fields are stable.
+ *
+ * kf_tasks[] can not stack, so task-based SCX ops must not nest. The
+ * WARN_ON_ONCE() in each macro catches a re-entry of any of the three variants
+ * while a previous one is still in progress.
+ */
+#define SCX_CALL_OP_TASK(sch, op, locked_rq, task, args...) \
+do { \
+ WARN_ON_ONCE(current->scx.kf_tasks[0]); \
+ current->scx.kf_tasks[0] = task; \
+ SCX_CALL_OP((sch), op, locked_rq, task, ##args); \
+ current->scx.kf_tasks[0] = NULL; \
+} while (0)
+
+#define SCX_CALL_OP_TASK_RET(sch, op, locked_rq, task, args...) \
+({ \
+ __typeof__((sch)->ops.op(task, ##args)) __ret; \
+ WARN_ON_ONCE(current->scx.kf_tasks[0]); \
+ current->scx.kf_tasks[0] = task; \
+ __ret = SCX_CALL_OP_RET((sch), op, locked_rq, task, ##args); \
+ current->scx.kf_tasks[0] = NULL; \
+ __ret; \
+})
+
+#define SCX_CALL_OP_2TASKS_RET(sch, op, locked_rq, task0, task1, args...) \
+({ \
+ __typeof__((sch)->ops.op(task0, task1, ##args)) __ret; \
+ WARN_ON_ONCE(current->scx.kf_tasks[0]); \
+ current->scx.kf_tasks[0] = task0; \
+ current->scx.kf_tasks[1] = task1; \
+ __ret = SCX_CALL_OP_RET((sch), op, locked_rq, task0, task1, ##args); \
+ current->scx.kf_tasks[0] = NULL; \
+ current->scx.kf_tasks[1] = NULL; \
+ __ret; \
+})
+
+/* see SCX_CALL_OP_TASK() */
+static __always_inline bool scx_kf_arg_task_ok(struct scx_sched *sch,
+ struct task_struct *p)
+{
+ if (unlikely((p != current->scx.kf_tasks[0] &&
+ p != current->scx.kf_tasks[1]))) {
+ scx_error(sch, "called on a task not being operated on");
+ return false;
+ }
+
+ return true;
+}
+
static inline bool scx_bypassing(struct scx_sched *sch, s32 cpu)
{
return unlikely(per_cpu_ptr(sch->pcpu, cpu)->flags &
@@ -1627,6 +1738,20 @@ static inline struct scx_sched *scx_prog_sched(const struct bpf_prog_aux *aux)
return NULL;
}
+
+/**
+ * scx_parent - Find the parent sched
+ * @sch: sched to find the parent of
+ *
+ * Returns the parent scheduler or %NULL if @sch is root.
+ */
+static inline struct scx_sched *scx_parent(struct scx_sched *sch)
+{
+ if (sch->level)
+ return sch->ancestors[sch->level - 1];
+ else
+ return NULL;
+}
#else /* CONFIG_EXT_SUB_SCHED */
static inline struct scx_sched *scx_task_sched(const struct task_struct *p)
{
@@ -1650,4 +1775,8 @@ static inline struct scx_sched *scx_prog_sched(const struct bpf_prog_aux *aux)
{
return rcu_dereference_all(scx_root);
}
+
+static inline struct scx_sched *scx_parent(struct scx_sched *sch) { return NULL; }
#endif /* CONFIG_EXT_SUB_SCHED */
+
+#endif /* _KERNEL_SCHED_EXT_INTERNAL_H */
diff --git a/kernel/sched/ext_types.h b/kernel/sched/ext/types.h
index 8b3527e21fca..bc74eafd43f1 100644
--- a/kernel/sched/ext_types.h
+++ b/kernel/sched/ext/types.h
@@ -8,6 +8,12 @@
#ifndef _KERNEL_SCHED_EXT_TYPES_H
#define _KERNEL_SCHED_EXT_TYPES_H
+#include <linux/types.h>
+#include <linux/jiffies.h>
+#include <linux/overflow.h>
+#include <linux/time64.h>
+#include <linux/sched/topology.h>
+
enum scx_consts {
SCX_DSP_DFL_MAX_BATCH = 32,
SCX_DSP_MAX_LOOPS = 32,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c7c2dea65edd..56acf502ba26 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -4211,6 +4211,6 @@ DEFINE_CLASS(sched_change, struct sched_change_ctx *,
DEFINE_CLASS_IS_UNCONDITIONAL(sched_change)
-#include "ext.h"
+#include "ext/ext.h"
#endif /* _KERNEL_SCHED_SCHED_H */
diff --git a/tools/sched_ext/include/scx/cid.bpf.h b/tools/sched_ext/include/scx/cid.bpf.h
index 9d89bb57e201..db247e42fb45 100644
--- a/tools/sched_ext/include/scx/cid.bpf.h
+++ b/tools/sched_ext/include/scx/cid.bpf.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * BPF-side helpers for cids and cmasks. See kernel/sched/ext_cid.h for the
+ * BPF-side helpers for cids and cmasks. See kernel/sched/ext/cid.h for the
* authoritative layout and semantics. The BPF-side helpers use the cmask_*
* naming (no scx_ prefix); cmask is the SCX bitmap type so the prefix is
* redundant in BPF code. Atomics use __sync_val_compare_and_swap and every
@@ -33,7 +33,7 @@
#endif
/*
- * Mirrors SCX_CMASK_NR_WORDS in kernel/sched/ext_types.h. The u64 cast keeps
+ * Mirrors SCX_CMASK_NR_WORDS in kernel/sched/ext/types.h. The u64 cast keeps
* the +63 from wrapping when @nr_cids is near U32_MAX, so cmask_reframe()
* bounds-checking the result against alloc_words catches the overflow instead
* of seeing a small value.
@@ -281,7 +281,7 @@ static __always_inline void cmask_zero(struct scx_cmask __arena *m)
/*
* BPF_-prefixed to avoid colliding with the kernel's anonymous CMASK_OP_*
- * enum in ext_cid.c, which is exported via BTF and reachable through
+ * enum in ext/cid.c, which is exported via BTF and reachable through
* vmlinux.h.
*/
enum {