summaryrefslogtreecommitdiff
path: root/kernel/sched/ext_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/ext_internal.h')
-rw-r--r--kernel/sched/ext_internal.h462
1 files changed, 422 insertions, 40 deletions
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 386c677e4c9a..a075732d4430 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -6,6 +6,7 @@
* Copyright (c) 2025 Tejun Heo <tj@kernel.org>
*/
#define SCX_OP_IDX(op) (offsetof(struct sched_ext_ops, op) / sizeof(void (*)(void)))
+#define SCX_MOFF_IDX(moff) ((moff) / sizeof(void (*)(void)))
enum scx_consts {
SCX_DSP_DFL_MAX_BATCH = 32,
@@ -24,10 +25,16 @@ enum scx_consts {
*/
SCX_TASK_ITER_BATCH = 32,
+ SCX_BYPASS_HOST_NTH = 2,
+
SCX_BYPASS_LB_DFL_INTV_US = 500 * USEC_PER_MSEC,
SCX_BYPASS_LB_DONOR_PCT = 125,
SCX_BYPASS_LB_MIN_DELTA_DIV = 4,
SCX_BYPASS_LB_BATCH = 256,
+
+ SCX_REENQ_LOCAL_MAX_REPEAT = 256,
+
+ SCX_SUB_MAX_DEPTH = 4,
};
enum scx_exit_kind {
@@ -38,6 +45,7 @@ enum scx_exit_kind {
SCX_EXIT_UNREG_BPF, /* BPF-initiated unregistration */
SCX_EXIT_UNREG_KERN, /* kernel-initiated unregistration */
SCX_EXIT_SYSRQ, /* requested by 'S' sysrq */
+ SCX_EXIT_PARENT, /* parent exiting */
SCX_EXIT_ERROR = 1024, /* runtime error, error msg contains details */
SCX_EXIT_ERROR_BPF, /* ERROR but triggered through scx_bpf_error() */
@@ -62,6 +70,7 @@ enum scx_exit_kind {
enum scx_exit_code {
/* Reasons */
SCX_ECODE_RSN_HOTPLUG = 1LLU << 32,
+ SCX_ECODE_RSN_CGROUP_OFFLINE = 2LLU << 32,
/* Actions */
SCX_ECODE_ACT_RESTART = 1LLU << 48,
@@ -74,7 +83,7 @@ enum scx_exit_flags {
* info communication. The following flag indicates whether ops.init()
* finished successfully.
*/
- SCX_EFLAG_INITIALIZED,
+ SCX_EFLAG_INITIALIZED = 1LLU << 0,
};
/*
@@ -175,9 +184,10 @@ enum scx_ops_flags {
SCX_OPS_BUILTIN_IDLE_PER_NODE = 1LLU << 6,
/*
- * CPU cgroup support flags
+ * If set, %SCX_ENQ_IMMED is assumed to be set on all local DSQ
+ * enqueues.
*/
- SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */
+ SCX_OPS_ALWAYS_ENQ_IMMED = 1LLU << 7,
SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE |
SCX_OPS_ENQ_LAST |
@@ -186,7 +196,7 @@ enum scx_ops_flags {
SCX_OPS_ALLOW_QUEUED_WAKEUP |
SCX_OPS_SWITCH_PARTIAL |
SCX_OPS_BUILTIN_IDLE_PER_NODE |
- SCX_OPS_HAS_CGROUP_WEIGHT,
+ SCX_OPS_ALWAYS_ENQ_IMMED,
/* high 8 bits are internal, don't include in SCX_OPS_ALL_FLAGS */
__SCX_OPS_INTERNAL_MASK = 0xffLLU << 56,
@@ -213,7 +223,7 @@ struct scx_exit_task_args {
bool cancelled;
};
-/* argument container for ops->cgroup_init() */
+/* argument container for ops.cgroup_init() */
struct scx_cgroup_init_args {
/* the weight of the cgroup [1..10000] */
u32 weight;
@@ -236,12 +246,12 @@ enum scx_cpu_preempt_reason {
};
/*
- * Argument container for ops->cpu_acquire(). Currently empty, but may be
+ * Argument container for ops.cpu_acquire(). Currently empty, but may be
* expanded in the future.
*/
struct scx_cpu_acquire_args {};
-/* argument container for ops->cpu_release() */
+/* argument container for ops.cpu_release() */
struct scx_cpu_release_args {
/* the reason the CPU was preempted */
enum scx_cpu_preempt_reason reason;
@@ -250,9 +260,7 @@ struct scx_cpu_release_args {
struct task_struct *task;
};
-/*
- * Informational context provided to dump operations.
- */
+/* informational context provided to dump operations */
struct scx_dump_ctx {
enum scx_exit_kind kind;
s64 exit_code;
@@ -261,6 +269,18 @@ struct scx_dump_ctx {
u64 at_jiffies;
};
+/* argument container for ops.sub_attach() */
+struct scx_sub_attach_args {
+ struct sched_ext_ops *ops;
+ char *cgroup_path;
+};
+
+/* argument container for ops.sub_detach() */
+struct scx_sub_detach_args {
+ struct sched_ext_ops *ops;
+ char *cgroup_path;
+};
+
/**
* struct sched_ext_ops - Operation table for BPF scheduler implementation
*
@@ -721,6 +741,20 @@ struct sched_ext_ops {
#endif /* CONFIG_EXT_GROUP_SCHED */
+ /**
+ * @sub_attach: Attach a sub-scheduler
+ * @args: argument container, see the struct definition
+ *
+ * Return 0 to accept the sub-scheduler. -errno to reject.
+ */
+ s32 (*sub_attach)(struct scx_sub_attach_args *args);
+
+ /**
+ * @sub_detach: Detach a sub-scheduler
+ * @args: argument container, see the struct definition
+ */
+ void (*sub_detach)(struct scx_sub_detach_args *args);
+
/*
* All online ops must come before ops.cpu_online().
*/
@@ -762,6 +796,10 @@ struct sched_ext_ops {
*/
void (*exit)(struct scx_exit_info *info);
+ /*
+ * Data fields must comes after all ops fields.
+ */
+
/**
* @dispatch_max_batch: Max nr of tasks that dispatch() can dispatch
*/
@@ -797,6 +835,12 @@ struct sched_ext_ops {
u64 hotplug_seq;
/**
+ * @cgroup_id: When >1, attach the scheduler as a sub-scheduler on the
+ * specified cgroup.
+ */
+ u64 sub_cgroup_id;
+
+ /**
* @name: BPF scheduler's name
*
* Must be a non-zero valid BPF object name including only isalnum(),
@@ -806,7 +850,7 @@ struct sched_ext_ops {
char name[SCX_OPS_NAME_LEN];
/* internal use only, must be NULL */
- void *priv;
+ void __rcu *priv;
};
enum scx_opi {
@@ -854,6 +898,24 @@ struct scx_event_stats {
s64 SCX_EV_ENQ_SKIP_MIGRATION_DISABLED;
/*
+ * The number of times a task, enqueued on a local DSQ with
+ * SCX_ENQ_IMMED, was re-enqueued because the CPU was not available for
+ * immediate execution.
+ */
+ s64 SCX_EV_REENQ_IMMED;
+
+ /*
+ * The number of times a reenq of local DSQ caused another reenq of
+ * local DSQ. This can happen when %SCX_ENQ_IMMED races against a higher
+ * priority class task even if the BPF scheduler always satisfies the
+ * prerequisites for %SCX_ENQ_IMMED at the time of enqueue. However,
+ * that scenario is very unlikely and this count going up regularly
+ * indicates that the BPF scheduler is handling %SCX_ENQ_REENQ
+ * incorrectly causing recursive reenqueues.
+ */
+ s64 SCX_EV_REENQ_LOCAL_REPEAT;
+
+ /*
* Total number of times a task's time slice was refilled with the
* default value (SCX_SLICE_DFL).
*/
@@ -873,15 +935,77 @@ struct scx_event_stats {
* The number of times the bypassing mode has been activated.
*/
s64 SCX_EV_BYPASS_ACTIVATE;
+
+ /*
+ * The number of times the scheduler attempted to insert a task that it
+ * doesn't own into a DSQ. Such attempts are ignored.
+ *
+ * As BPF schedulers are allowed to ignore dequeues, it's difficult to
+ * tell whether such an attempt is from a scheduler malfunction or an
+ * ignored dequeue around sub-sched enabling. If this count keeps going
+ * up regardless of sub-sched enabling, it likely indicates a bug in the
+ * scheduler.
+ */
+ s64 SCX_EV_INSERT_NOT_OWNED;
+
+ /*
+ * The number of times tasks from bypassing descendants are scheduled
+ * from sub_bypass_dsq's.
+ */
+ s64 SCX_EV_SUB_BYPASS_DISPATCH;
+};
+
+struct scx_sched;
+
+enum scx_sched_pcpu_flags {
+ SCX_SCHED_PCPU_BYPASSING = 1LLU << 0,
+};
+
+/* dispatch buf */
+struct scx_dsp_buf_ent {
+ struct task_struct *task;
+ unsigned long qseq;
+ u64 dsq_id;
+ u64 enq_flags;
+};
+
+struct scx_dsp_ctx {
+ struct rq *rq;
+ u32 cursor;
+ u32 nr_tasks;
+ struct scx_dsp_buf_ent buf[];
+};
+
+struct scx_deferred_reenq_local {
+ struct list_head node;
+ u64 flags;
+ u64 seq;
+ u32 cnt;
};
struct scx_sched_pcpu {
+ struct scx_sched *sch;
+ u64 flags; /* protected by rq lock */
+
/*
* The event counters are in a per-CPU variable to minimize the
* accounting overhead. A system-wide view on the event counter is
* constructed when requested by scx_bpf_events().
*/
struct scx_event_stats event_stats;
+
+ struct scx_deferred_reenq_local deferred_reenq_local;
+ struct scx_dispatch_q bypass_dsq;
+#ifdef CONFIG_EXT_SUB_SCHED
+ u32 bypass_host_seq;
+#endif
+
+ /* must be the last entry - contains flex array */
+ struct scx_dsp_ctx dsp_ctx;
+};
+
+struct scx_sched_pnode {
+ struct scx_dispatch_q global_dsq;
};
struct scx_sched {
@@ -897,15 +1021,50 @@ struct scx_sched {
* per-node split isn't sufficient, it can be further split.
*/
struct rhashtable dsq_hash;
- struct scx_dispatch_q **global_dsqs;
+ struct scx_sched_pnode **pnode;
struct scx_sched_pcpu __percpu *pcpu;
+ u64 slice_dfl;
+ u64 bypass_timestamp;
+ s32 bypass_depth;
+
+ /* bypass dispatch path enable state, see bypass_dsp_enabled() */
+ unsigned long bypass_dsp_claim;
+ atomic_t bypass_dsp_enable_depth;
+
+ bool aborting;
+ bool dump_disabled; /* protected by scx_dump_lock */
+ u32 dsp_max_batch;
+ s32 level;
+
/*
* Updates to the following warned bitfields can race causing RMW issues
* but it doesn't really matter.
*/
bool warned_zero_slice:1;
bool warned_deprecated_rq:1;
+ bool warned_unassoc_progs:1;
+
+ struct list_head all;
+
+#ifdef CONFIG_EXT_SUB_SCHED
+ struct rhash_head hash_node;
+
+ struct list_head children;
+ struct list_head sibling;
+ struct cgroup *cgrp;
+ char *cgrp_path;
+ struct kset *sub_kset;
+
+ bool sub_attached;
+#endif /* CONFIG_EXT_SUB_SCHED */
+
+ /*
+ * The maximum amount of time in jiffies that a task may be runnable
+ * without being scheduled on a CPU. If this timeout is exceeded, it
+ * will trigger scx_error().
+ */
+ unsigned long watchdog_timeout;
atomic_t exit_kind;
struct scx_exit_info *exit_info;
@@ -913,9 +1072,15 @@ struct scx_sched {
struct kobject kobj;
struct kthread_worker *helper;
- struct irq_work error_irq_work;
+ struct irq_work disable_irq_work;
struct kthread_work disable_work;
+ struct timer_list bypass_lb_timer;
+ cpumask_var_t bypass_lb_donee_cpumask;
+ cpumask_var_t bypass_lb_resched_cpumask;
struct rcu_work rcu_work;
+
+ /* all ancestors including self */
+ struct scx_sched *ancestors[];
};
enum scx_wake_flags {
@@ -942,13 +1107,27 @@ enum scx_enq_flags {
SCX_ENQ_PREEMPT = 1LLU << 32,
/*
- * The task being enqueued was previously enqueued on the current CPU's
- * %SCX_DSQ_LOCAL, but was removed from it in a call to the
- * scx_bpf_reenqueue_local() kfunc. If scx_bpf_reenqueue_local() was
- * invoked in a ->cpu_release() callback, and the task is again
- * dispatched back to %SCX_LOCAL_DSQ by this current ->enqueue(), the
- * task will not be scheduled on the CPU until at least the next invocation
- * of the ->cpu_acquire() callback.
+ * Only allowed on local DSQs. Guarantees that the task either gets
+ * on the CPU immediately and stays on it, or gets reenqueued back
+ * to the BPF scheduler. It will never linger on a local DSQ or be
+ * silently put back after preemption.
+ *
+ * The protection persists until the next fresh enqueue - it
+ * survives SAVE/RESTORE cycles, slice extensions and preemption.
+ * If the task can't stay on the CPU for any reason, it gets
+ * reenqueued back to the BPF scheduler.
+ *
+ * Exiting and migration-disabled tasks bypass ops.enqueue() and
+ * are placed directly on a local DSQ without IMMED protection
+ * unless %SCX_OPS_ENQ_EXITING and %SCX_OPS_ENQ_MIGRATION_DISABLED
+ * are set respectively.
+ */
+ SCX_ENQ_IMMED = 1LLU << 33,
+
+ /*
+ * The task being enqueued was previously enqueued on a DSQ, but was
+ * removed and is being re-enqueued. See SCX_TASK_REENQ_* flags to find
+ * out why a given task is being reenqueued.
*/
SCX_ENQ_REENQ = 1LLU << 40,
@@ -969,6 +1148,7 @@ enum scx_enq_flags {
SCX_ENQ_CLEAR_OPSS = 1LLU << 56,
SCX_ENQ_DSQ_PRIQ = 1LLU << 57,
SCX_ENQ_NESTED = 1LLU << 58,
+ SCX_ENQ_GDSQ_FALLBACK = 1LLU << 59, /* fell back to global DSQ */
};
enum scx_deq_flags {
@@ -982,6 +1162,28 @@ enum scx_deq_flags {
* it hasn't been dispatched yet. Dequeue from the BPF side.
*/
SCX_DEQ_CORE_SCHED_EXEC = 1LLU << 32,
+
+ /*
+ * The task is being dequeued due to a property change (e.g.,
+ * sched_setaffinity(), sched_setscheduler(), set_user_nice(),
+ * etc.).
+ */
+ SCX_DEQ_SCHED_CHANGE = 1LLU << 33,
+};
+
+enum scx_reenq_flags {
+ /* low 16bits determine which tasks should be reenqueued */
+ SCX_REENQ_ANY = 1LLU << 0, /* all tasks */
+
+ __SCX_REENQ_FILTER_MASK = 0xffffLLU,
+
+ __SCX_REENQ_USER_MASK = SCX_REENQ_ANY,
+
+ /* bits 32-35 used by task_should_reenq() */
+ SCX_REENQ_TSR_RQ_OPEN = 1LLU << 32,
+ SCX_REENQ_TSR_NOT_FIRST = 1LLU << 33,
+
+ __SCX_REENQ_TSR_MASK = 0xfLLU << 32,
};
enum scx_pick_idle_cpu_flags {
@@ -1035,26 +1237,108 @@ static const char *scx_enable_state_str[] = {
};
/*
- * sched_ext_entity->ops_state
+ * Task Ownership State Machine (sched_ext_entity->ops_state)
*
- * Used to track the task ownership between the SCX core and the BPF scheduler.
- * State transitions look as follows:
+ * The sched_ext core uses this state machine to track task ownership
+ * between the SCX core and the BPF scheduler. This allows the BPF
+ * scheduler to dispatch tasks without strict ordering requirements, while
+ * the SCX core safely rejects invalid dispatches.
*
- * NONE -> QUEUEING -> QUEUED -> DISPATCHING
- * ^ | |
- * | v v
- * \-------------------------------/
+ * State Transitions
*
- * QUEUEING and DISPATCHING states can be waited upon. See wait_ops_state() call
- * sites for explanations on the conditions being waited upon and why they are
- * safe. Transitions out of them into NONE or QUEUED must store_release and the
- * waiters should load_acquire.
+ * .------------> NONE (owned by SCX core)
+ * | | ^
+ * | enqueue | | direct dispatch
+ * | v |
+ * | QUEUEING -------'
+ * | |
+ * | enqueue |
+ * | completes |
+ * | v
+ * | QUEUED (owned by BPF scheduler)
+ * | |
+ * | dispatch |
+ * | |
+ * | v
+ * | DISPATCHING
+ * | |
+ * | dispatch |
+ * | completes |
+ * `---------------'
*
- * Tracking scx_ops_state enables sched_ext core to reliably determine whether
- * any given task can be dispatched by the BPF scheduler at all times and thus
- * relaxes the requirements on the BPF scheduler. This allows the BPF scheduler
- * to try to dispatch any task anytime regardless of its state as the SCX core
- * can safely reject invalid dispatches.
+ * State Descriptions
+ *
+ * - %SCX_OPSS_NONE:
+ * Task is owned by the SCX core. It's either on a run queue, running,
+ * or being manipulated by the core scheduler. The BPF scheduler has no
+ * claim on this task.
+ *
+ * - %SCX_OPSS_QUEUEING:
+ * Transitional state while transferring a task from the SCX core to
+ * the BPF scheduler. The task's rq lock is held during this state.
+ * Since QUEUEING is both entered and exited under the rq lock, dequeue
+ * can never observe this state (it would be a BUG). When finishing a
+ * dispatch, if the task is still in %SCX_OPSS_QUEUEING the completion
+ * path busy-waits for it to leave this state (via wait_ops_state())
+ * before retrying.
+ *
+ * - %SCX_OPSS_QUEUED:
+ * Task is owned by the BPF scheduler. It's on a DSQ (dispatch queue)
+ * and the BPF scheduler is responsible for dispatching it. A QSEQ
+ * (queue sequence number) is embedded in this state to detect
+ * dispatch/dequeue races: if a task is dequeued and re-enqueued, the
+ * QSEQ changes and any in-flight dispatch operations targeting the old
+ * QSEQ are safely ignored.
+ *
+ * - %SCX_OPSS_DISPATCHING:
+ * Transitional state while transferring a task from the BPF scheduler
+ * back to the SCX core. This state indicates the BPF scheduler has
+ * selected the task for execution. When dequeue needs to take the task
+ * off a DSQ and it is still in %SCX_OPSS_DISPATCHING, the dequeue path
+ * busy-waits for it to leave this state (via wait_ops_state()) before
+ * proceeding. Exits to %SCX_OPSS_NONE when dispatch completes.
+ *
+ * Memory Ordering
+ *
+ * Transitions out of %SCX_OPSS_QUEUEING and %SCX_OPSS_DISPATCHING into
+ * %SCX_OPSS_NONE or %SCX_OPSS_QUEUED must use atomic_long_set_release()
+ * and waiters must use atomic_long_read_acquire(). This ensures proper
+ * synchronization between concurrent operations.
+ *
+ * Cross-CPU Task Migration
+ *
+ * When moving a task in the %SCX_OPSS_DISPATCHING state, we can't simply
+ * grab the target CPU's rq lock because a concurrent dequeue might be
+ * waiting on %SCX_OPSS_DISPATCHING while holding the source rq lock
+ * (deadlock).
+ *
+ * The sched_ext core uses a "lock dancing" protocol coordinated by
+ * p->scx.holding_cpu. When moving a task to a different rq:
+ *
+ * 1. Verify task can be moved (CPU affinity, migration_disabled, etc.)
+ * 2. Set p->scx.holding_cpu to the current CPU
+ * 3. Set task state to %SCX_OPSS_NONE; dequeue waits while DISPATCHING
+ * is set, so clearing DISPATCHING first prevents the circular wait
+ * (safe to lock the rq we need)
+ * 4. Unlock the current CPU's rq
+ * 5. Lock src_rq (where the task currently lives)
+ * 6. Verify p->scx.holding_cpu == current CPU, if not, dequeue won the
+ * race (dequeue clears holding_cpu to -1 when it takes the task), in
+ * this case migration is aborted
+ * 7. If src_rq == dst_rq: clear holding_cpu and enqueue directly
+ * into dst_rq's local DSQ (no lock swap needed)
+ * 8. Otherwise: call move_remote_task_to_local_dsq(), which releases
+ * src_rq, locks dst_rq, and performs the deactivate/activate
+ * migration cycle (dst_rq is held on return)
+ * 9. Unlock dst_rq and re-lock the current CPU's rq to restore
+ * the lock state expected by the caller
+ *
+ * If any verification fails, abort the migration.
+ *
+ * This state tracking allows the BPF scheduler to try to dispatch any task
+ * at any time regardless of its state. The SCX core can safely
+ * reject/ignore invalid dispatches, simplifying the BPF scheduler
+ * implementation.
*/
enum scx_ops_state {
SCX_OPSS_NONE, /* owned by the SCX core */
@@ -1079,8 +1363,11 @@ enum scx_ops_state {
#define SCX_OPSS_STATE_MASK ((1LU << SCX_OPSS_QSEQ_SHIFT) - 1)
#define SCX_OPSS_QSEQ_MASK (~SCX_OPSS_STATE_MASK)
+extern struct scx_sched __rcu *scx_root;
DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
+int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id);
+
/*
* Return the rq currently locked from an scx callback, or NULL if no rq is
* locked.
@@ -1090,12 +1377,107 @@ static inline struct rq *scx_locked_rq(void)
return __this_cpu_read(scx_locked_rq_state);
}
-static inline bool scx_kf_allowed_if_unlocked(void)
+static inline bool scx_bypassing(struct scx_sched *sch, s32 cpu)
+{
+ return unlikely(per_cpu_ptr(sch->pcpu, cpu)->flags &
+ SCX_SCHED_PCPU_BYPASSING);
+}
+
+#ifdef CONFIG_EXT_SUB_SCHED
+/**
+ * scx_task_sched - Find scx_sched scheduling a task
+ * @p: task of interest
+ *
+ * Return @p's scheduler instance. Must be called with @p's pi_lock or rq lock
+ * held.
+ */
+static inline struct scx_sched *scx_task_sched(const struct task_struct *p)
+{
+ return rcu_dereference_protected(p->scx.sched,
+ lockdep_is_held(&p->pi_lock) ||
+ lockdep_is_held(__rq_lockp(task_rq(p))));
+}
+
+/**
+ * scx_task_sched_rcu - Find scx_sched scheduling a task
+ * @p: task of interest
+ *
+ * Return @p's scheduler instance. The returned scx_sched is RCU protected.
+ */
+static inline struct scx_sched *scx_task_sched_rcu(const struct task_struct *p)
+{
+ return rcu_dereference_all(p->scx.sched);
+}
+
+/**
+ * scx_task_on_sched - Is a task on the specified sched?
+ * @sch: sched to test against
+ * @p: task of interest
+ *
+ * Returns %true if @p is on @sch, %false otherwise.
+ */
+static inline bool scx_task_on_sched(struct scx_sched *sch,
+ const struct task_struct *p)
+{
+ return rcu_access_pointer(p->scx.sched) == sch;
+}
+
+/**
+ * scx_prog_sched - Find scx_sched associated with a BPF prog
+ * @aux: aux passed in from BPF to a kfunc
+ *
+ * To be called from kfuncs. Return the scheduler instance associated with the
+ * BPF program given the implicit kfunc argument aux. The returned scx_sched is
+ * RCU protected.
+ */
+static inline struct scx_sched *scx_prog_sched(const struct bpf_prog_aux *aux)
+{
+ struct sched_ext_ops *ops;
+ struct scx_sched *root;
+
+ ops = bpf_prog_get_assoc_struct_ops(aux);
+ if (likely(ops))
+ return rcu_dereference_all(ops->priv);
+
+ root = rcu_dereference_all(scx_root);
+ if (root) {
+ /*
+ * COMPAT-v6.19: Schedulers built before sub-sched support was
+ * introduced may have unassociated non-struct_ops programs.
+ */
+ if (!root->ops.sub_attach)
+ return root;
+
+ if (!root->warned_unassoc_progs) {
+ printk_deferred(KERN_WARNING "sched_ext: Unassociated program %s (id %d)\n",
+ aux->name, aux->id);
+ root->warned_unassoc_progs = true;
+ }
+ }
+
+ return NULL;
+}
+#else /* CONFIG_EXT_SUB_SCHED */
+static inline struct scx_sched *scx_task_sched(const struct task_struct *p)
+{
+ return rcu_dereference_protected(scx_root,
+ lockdep_is_held(&p->pi_lock) ||
+ lockdep_is_held(__rq_lockp(task_rq(p))));
+}
+
+static inline struct scx_sched *scx_task_sched_rcu(const struct task_struct *p)
+{
+ return rcu_dereference_all(scx_root);
+}
+
+static inline bool scx_task_on_sched(struct scx_sched *sch,
+ const struct task_struct *p)
{
- return !current->scx.kf_mask;
+ return true;
}
-static inline bool scx_rq_bypassing(struct rq *rq)
+static struct scx_sched *scx_prog_sched(const struct bpf_prog_aux *aux)
{
- return unlikely(rq->scx.flags & SCX_RQ_BYPASSING);
+ return rcu_dereference_all(scx_root);
}
+#endif /* CONFIG_EXT_SUB_SCHED */