diff options
Diffstat (limited to 'kernel/sched/ext_idle.c')
| -rw-r--r-- | kernel/sched/ext_idle.c | 246 |
1 files changed, 167 insertions, 79 deletions
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index ba298ac3ce6c..7468560a6d80 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -368,7 +368,7 @@ void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops) /* * Enable NUMA optimization only when there are multiple NUMA domains - * among the online CPUs and the NUMA domains don't perfectly overlaps + * among the online CPUs and the NUMA domains don't perfectly overlap * with the LLC domains. * * If all CPUs belong to the same NUMA node and the same LLC domain, @@ -424,18 +424,24 @@ static inline bool task_affinity_all(const struct task_struct *p) * - prefer the last used CPU to take advantage of cached data (L1, L2) and * branch prediction optimizations. * - * 3. Pick a CPU within the same LLC (Last-Level Cache): + * 3. Prefer @prev_cpu's SMT sibling: + * - if @prev_cpu is busy and no fully idle core is available, try to + * place the task on an idle SMT sibling of @prev_cpu; keeping the + * task on the same core makes migration cheaper, preserves L1 cache + * locality and reduces wakeup latency. + * + * 4. Pick a CPU within the same LLC (Last-Level Cache): * - if the above conditions aren't met, pick a CPU that shares the same * LLC, if the LLC domain is a subset of @cpus_allowed, to maintain * cache locality. * - * 4. Pick a CPU within the same NUMA node, if enabled: + * 5. Pick a CPU within the same NUMA node, if enabled: * - choose a CPU from the same NUMA node, if the node cpumask is a * subset of @cpus_allowed, to reduce memory access latency. * - * 5. Pick any idle CPU within the @cpus_allowed domain. + * 6. Pick any idle CPU within the @cpus_allowed domain. * - * Step 3 and 4 are performed only if the system has, respectively, + * Step 4 and 5 are performed only if the system has, respectively, * multiple LLCs / multiple NUMA nodes (see scx_selcpu_topo_llc and * scx_selcpu_topo_numa) and they don't contain the same subset of CPUs. * @@ -543,7 +549,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, * piled up on it even if there is an idle core elsewhere on * the system. */ - waker_node = cpu_to_node(cpu); + waker_node = scx_cpu_node_if_enabled(cpu); if (!(current->flags & PF_EXITING) && cpu_rq(cpu)->scx.local_dsq.nr == 0 && (!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) && @@ -616,6 +622,20 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, goto out_unlock; } +#ifdef CONFIG_SCHED_SMT + /* + * Use @prev_cpu's sibling if it's idle. + */ + if (sched_smt_active()) { + for_each_cpu_and(cpu, cpu_smt_mask(prev_cpu), allowed) { + if (cpu == prev_cpu) + continue; + if (scx_idle_test_and_clear_cpu(cpu)) + goto out_unlock; + } + } +#endif + /* * Search for any idle CPU in the same LLC domain. */ @@ -767,8 +787,9 @@ void __scx_update_idle(struct rq *rq, bool idle, bool do_notify) * either enqueue() sees the idle bit or update_idle() sees the task * that enqueue() queued. */ - if (SCX_HAS_OP(sch, update_idle) && do_notify && !scx_rq_bypassing(rq)) - SCX_CALL_OP(sch, SCX_KF_REST, update_idle, rq, cpu_of(rq), idle); + if (SCX_HAS_OP(sch, update_idle) && do_notify && + !scx_bypassing(sch, cpu_of(rq))) + SCX_CALL_OP(sch, update_idle, rq, cpu_of(rq), idle); } static void reset_idle_masks(struct sched_ext_ops *ops) @@ -860,33 +881,40 @@ static bool check_builtin_idle_enabled(struct scx_sched *sch) * code. * * We can't simply check whether @p->migration_disabled is set in a - * sched_ext callback, because migration is always disabled for the current - * task while running BPF code. + * sched_ext callback, because the BPF prolog (__bpf_prog_enter) may disable + * migration for the current task while running BPF code. + * + * Since the BPF prolog calls migrate_disable() only when CONFIG_PREEMPT_RCU + * is enabled (via rcu_read_lock_dont_migrate()), migration_disabled == 1 for + * the current task is ambiguous only in that case: it could be from the BPF + * prolog rather than a real migrate_disable() call. * - * The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) respectively - * disable and re-enable migration. For this reason, the current task - * inside a sched_ext callback is always a migration-disabled task. + * Without CONFIG_PREEMPT_RCU, the BPF prolog never calls migrate_disable(), + * so migration_disabled == 1 always means the task is truly + * migration-disabled. * - * Therefore, when @p->migration_disabled == 1, check whether @p is the - * current task or not: if it is, then migration was not disabled before - * entering the callback, otherwise migration was disabled. + * Therefore, when migration_disabled == 1 and CONFIG_PREEMPT_RCU is enabled, + * check whether @p is the current task or not: if it is, then migration was + * not disabled before entering the callback, otherwise migration was disabled. * * Returns true if @p is migration-disabled, false otherwise. */ static bool is_bpf_migration_disabled(const struct task_struct *p) { - if (p->migration_disabled == 1) - return p != current; - else - return p->migration_disabled; + if (p->migration_disabled == 1) { + if (IS_ENABLED(CONFIG_PREEMPT_RCU)) + return p != current; + return true; + } + return p->migration_disabled; } static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p, s32 prev_cpu, u64 wake_flags, const struct cpumask *allowed, u64 flags) { - struct rq *rq; - struct rq_flags rf; + unsigned long irq_flags; + bool we_locked = false; s32 cpu; if (!ops_cpu_valid(sch, prev_cpu, NULL)) @@ -896,29 +924,32 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p, return -EBUSY; /* - * If called from an unlocked context, acquire the task's rq lock, - * so that we can safely access p->cpus_ptr and p->nr_cpus_allowed. + * Accessing p->cpus_ptr / p->nr_cpus_allowed needs either @p's rq + * lock or @p's pi_lock. Three cases: + * + * - inside ops.select_cpu(): try_to_wake_up() holds the wake-up + * task's pi_lock; the wake-up task is recorded in kf_tasks[0] + * by SCX_CALL_OP_TASK_RET(). + * - other rq-locked SCX op: scx_locked_rq() points at the held rq. + * - truly unlocked (UNLOCKED ops, SYSCALL, non-SCX struct_ops): + * nothing held, take pi_lock ourselves. * - * Otherwise, allow to use this kfunc only from ops.select_cpu() - * and ops.select_enqueue(). + * In the first two cases, BPF schedulers may pass an arbitrary task + * that the held lock doesn't cover. Refuse those. */ - if (scx_kf_allowed_if_unlocked()) { - rq = task_rq_lock(p, &rf); + if (this_rq()->scx.in_select_cpu) { + if (!scx_kf_arg_task_ok(sch, p)) + return -EINVAL; + lockdep_assert_held(&p->pi_lock); + } else if (scx_locked_rq()) { + if (task_rq(p) != scx_locked_rq()) + goto cross_task; } else { - if (!scx_kf_allowed(sch, SCX_KF_SELECT_CPU | SCX_KF_ENQUEUE)) - return -EPERM; - rq = scx_locked_rq(); + raw_spin_lock_irqsave(&p->pi_lock, irq_flags); + we_locked = true; } /* - * Validate locking correctness to access p->cpus_ptr and - * p->nr_cpus_allowed: if we're holding an rq lock, we're safe; - * otherwise, assert that p->pi_lock is held. - */ - if (!rq) - lockdep_assert_held(&p->pi_lock); - - /* * This may also be called from ops.enqueue(), so we need to handle * per-CPU tasks as well. For these tasks, we can skip all idle CPU * selection optimizations and simply check whether the previously @@ -935,24 +966,30 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p, allowed ?: p->cpus_ptr, flags); } - if (scx_kf_allowed_if_unlocked()) - task_rq_unlock(rq, p, &rf); + if (we_locked) + raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); return cpu; + +cross_task: + scx_error(sch, "select_cpu kfunc called cross-task on %s[%d]", + p->comm, p->pid); + return -EINVAL; } /** * scx_bpf_cpu_node - Return the NUMA node the given @cpu belongs to, or * trigger an error if @cpu is invalid * @cpu: target CPU + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs */ -__bpf_kfunc int scx_bpf_cpu_node(s32 cpu) +__bpf_kfunc s32 scx_bpf_cpu_node(s32 cpu, const struct bpf_prog_aux *aux) { struct scx_sched *sch; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch) || !ops_cpu_valid(sch, cpu, NULL)) return NUMA_NO_NODE; return cpu_to_node(cpu); @@ -964,6 +1001,7 @@ __bpf_kfunc int scx_bpf_cpu_node(s32 cpu) * @prev_cpu: CPU @p was on previously * @wake_flags: %SCX_WAKE_* flags * @is_idle: out parameter indicating whether the returned CPU is idle + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Can be called from ops.select_cpu(), ops.enqueue(), or from an unlocked * context such as a BPF test_run() call, as long as built-in CPU selection @@ -974,14 +1012,15 @@ __bpf_kfunc int scx_bpf_cpu_node(s32 cpu) * currently idle and thus a good candidate for direct dispatching. */ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, - u64 wake_flags, bool *is_idle) + u64 wake_flags, bool *is_idle, + const struct bpf_prog_aux *aux) { struct scx_sched *sch; s32 cpu; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) return -ENODEV; @@ -1009,6 +1048,7 @@ struct scx_bpf_select_cpu_and_args { * @args->prev_cpu: CPU @p was on previously * @args->wake_flags: %SCX_WAKE_* flags * @args->flags: %SCX_PICK_IDLE* flags + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Wrapper kfunc that takes arguments via struct to work around BPF's 5 argument * limit. BPF programs should use scx_bpf_select_cpu_and() which is provided @@ -1027,13 +1067,14 @@ struct scx_bpf_select_cpu_and_args { */ __bpf_kfunc s32 __scx_bpf_select_cpu_and(struct task_struct *p, const struct cpumask *cpus_allowed, - struct scx_bpf_select_cpu_and_args *args) + struct scx_bpf_select_cpu_and_args *args, + const struct bpf_prog_aux *aux) { struct scx_sched *sch; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) return -ENODEV; @@ -1055,6 +1096,17 @@ __bpf_kfunc s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 if (unlikely(!sch)) return -ENODEV; +#ifdef CONFIG_EXT_SUB_SCHED + /* + * Disallow if any sub-scheds are attached. There is no way to tell + * which scheduler called us, just error out @p's scheduler. + */ + if (unlikely(!list_empty(&sch->children))) { + scx_error(scx_task_sched(p), "__scx_bpf_select_cpu_and() must be used"); + return -EINVAL; + } +#endif + return select_cpu_from_kfunc(sch, p, prev_cpu, wake_flags, cpus_allowed, flags); } @@ -1063,18 +1115,20 @@ __bpf_kfunc s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 * scx_bpf_get_idle_cpumask_node - Get a referenced kptr to the * idle-tracking per-CPU cpumask of a target NUMA node. * @node: target NUMA node + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Returns an empty cpumask if idle tracking is not enabled, if @node is * not valid, or running on a UP kernel. In this case the actual error will * be reported to the BPF scheduler via scx_error(). */ -__bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask_node(int node) +__bpf_kfunc const struct cpumask * +scx_bpf_get_idle_cpumask_node(s32 node, const struct bpf_prog_aux *aux) { struct scx_sched *sch; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) return cpu_none_mask; @@ -1088,17 +1142,18 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask_node(int node) /** * scx_bpf_get_idle_cpumask - Get a referenced kptr to the idle-tracking * per-CPU cpumask. + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Returns an empty mask if idle tracking is not enabled, or running on a * UP kernel. */ -__bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void) +__bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(const struct bpf_prog_aux *aux) { struct scx_sched *sch; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) return cpu_none_mask; @@ -1118,18 +1173,20 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void) * idle-tracking, per-physical-core cpumask of a target NUMA node. Can be * used to determine if an entire physical core is free. * @node: target NUMA node + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Returns an empty cpumask if idle tracking is not enabled, if @node is * not valid, or running on a UP kernel. In this case the actual error will * be reported to the BPF scheduler via scx_error(). */ -__bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask_node(int node) +__bpf_kfunc const struct cpumask * +scx_bpf_get_idle_smtmask_node(s32 node, const struct bpf_prog_aux *aux) { struct scx_sched *sch; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) return cpu_none_mask; @@ -1147,17 +1204,18 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask_node(int node) * scx_bpf_get_idle_smtmask - Get a referenced kptr to the idle-tracking, * per-physical-core cpumask. Can be used to determine if an entire physical * core is free. + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Returns an empty mask if idle tracking is not enabled, or running on a * UP kernel. */ -__bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask(void) +__bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask(const struct bpf_prog_aux *aux) { struct scx_sched *sch; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) return cpu_none_mask; @@ -1193,6 +1251,7 @@ __bpf_kfunc void scx_bpf_put_idle_cpumask(const struct cpumask *idle_mask) /** * scx_bpf_test_and_clear_cpu_idle - Test and clear @cpu's idle state * @cpu: cpu to test and clear idle for + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Returns %true if @cpu was idle and its idle state was successfully cleared. * %false otherwise. @@ -1200,13 +1259,13 @@ __bpf_kfunc void scx_bpf_put_idle_cpumask(const struct cpumask *idle_mask) * Unavailable if ops.update_idle() is implemented and * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. */ -__bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) +__bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu, const struct bpf_prog_aux *aux) { struct scx_sched *sch; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) return false; @@ -1224,6 +1283,7 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) * @cpus_allowed: Allowed cpumask * @node: target NUMA node * @flags: %SCX_PICK_IDLE_* flags + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Pick and claim an idle cpu in @cpus_allowed from the NUMA node @node. * @@ -1239,13 +1299,14 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) * %SCX_OPS_BUILTIN_IDLE_PER_NODE is not set. */ __bpf_kfunc s32 scx_bpf_pick_idle_cpu_node(const struct cpumask *cpus_allowed, - int node, u64 flags) + s32 node, u64 flags, + const struct bpf_prog_aux *aux) { struct scx_sched *sch; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) return -ENODEV; @@ -1260,6 +1321,7 @@ __bpf_kfunc s32 scx_bpf_pick_idle_cpu_node(const struct cpumask *cpus_allowed, * scx_bpf_pick_idle_cpu - Pick and claim an idle cpu * @cpus_allowed: Allowed cpumask * @flags: %SCX_PICK_IDLE_CPU_* flags + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Pick and claim an idle cpu in @cpus_allowed. Returns the picked idle cpu * number on success. -%EBUSY if no matching cpu was found. @@ -1279,13 +1341,13 @@ __bpf_kfunc s32 scx_bpf_pick_idle_cpu_node(const struct cpumask *cpus_allowed, * scx_bpf_pick_idle_cpu_node() instead. */ __bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, - u64 flags) + u64 flags, const struct bpf_prog_aux *aux) { struct scx_sched *sch; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) return -ENODEV; @@ -1306,6 +1368,7 @@ __bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, * @cpus_allowed: Allowed cpumask * @node: target NUMA node * @flags: %SCX_PICK_IDLE_CPU_* flags + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Pick and claim an idle cpu in @cpus_allowed. If none is available, pick any * CPU in @cpus_allowed. Guaranteed to succeed and returns the picked idle cpu @@ -1322,14 +1385,15 @@ __bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, * CPU. */ __bpf_kfunc s32 scx_bpf_pick_any_cpu_node(const struct cpumask *cpus_allowed, - int node, u64 flags) + s32 node, u64 flags, + const struct bpf_prog_aux *aux) { struct scx_sched *sch; s32 cpu; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) return -ENODEV; @@ -1355,6 +1419,7 @@ __bpf_kfunc s32 scx_bpf_pick_any_cpu_node(const struct cpumask *cpus_allowed, * scx_bpf_pick_any_cpu - Pick and claim an idle cpu if available or pick any CPU * @cpus_allowed: Allowed cpumask * @flags: %SCX_PICK_IDLE_CPU_* flags + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Pick and claim an idle cpu in @cpus_allowed. If none is available, pick any * CPU in @cpus_allowed. Guaranteed to succeed and returns the picked idle cpu @@ -1369,14 +1434,14 @@ __bpf_kfunc s32 scx_bpf_pick_any_cpu_node(const struct cpumask *cpus_allowed, * scx_bpf_pick_any_cpu_node() instead. */ __bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed, - u64 flags) + u64 flags, const struct bpf_prog_aux *aux) { struct scx_sched *sch; s32 cpu; guard(rcu)(); - sch = rcu_dereference(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) return -ENODEV; @@ -1401,25 +1466,46 @@ __bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed, __bpf_kfunc_end_defs(); BTF_KFUNCS_START(scx_kfunc_ids_idle) -BTF_ID_FLAGS(func, scx_bpf_cpu_node) -BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask_node, KF_ACQUIRE) -BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE) -BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask_node, KF_ACQUIRE) -BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE) +BTF_ID_FLAGS(func, scx_bpf_cpu_node, KF_IMPLICIT_ARGS) +BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask_node, KF_IMPLICIT_ARGS | KF_ACQUIRE) +BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_IMPLICIT_ARGS | KF_ACQUIRE) +BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask_node, KF_IMPLICIT_ARGS | KF_ACQUIRE) +BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_IMPLICIT_ARGS | KF_ACQUIRE) BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE) -BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle) -BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu_node, KF_RCU) -BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) -BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu_node, KF_RCU) -BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) -BTF_ID_FLAGS(func, __scx_bpf_select_cpu_and, KF_RCU) -BTF_ID_FLAGS(func, scx_bpf_select_cpu_and, KF_RCU) -BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle, KF_IMPLICIT_ARGS) +BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu_node, KF_IMPLICIT_ARGS | KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_IMPLICIT_ARGS | KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu_node, KF_IMPLICIT_ARGS | KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_IMPLICIT_ARGS | KF_RCU) BTF_KFUNCS_END(scx_kfunc_ids_idle) static const struct btf_kfunc_id_set scx_kfunc_set_idle = { .owner = THIS_MODULE, .set = &scx_kfunc_ids_idle, + .filter = scx_kfunc_context_filter, +}; + +/* + * The select_cpu kfuncs internally call task_rq_lock() when invoked from an + * rq-unlocked context, and thus cannot be safely called from arbitrary tracing + * contexts where @p's pi_lock state is unknown. Keep them out of + * BPF_PROG_TYPE_TRACING by registering them in their own set which is exposed + * only to STRUCT_OPS and SYSCALL programs. + * + * These kfuncs are also members of scx_kfunc_ids_unlocked (see ext.c) because + * they're callable from unlocked contexts in addition to ops.select_cpu() and + * ops.enqueue(). + */ +BTF_KFUNCS_START(scx_kfunc_ids_select_cpu) +BTF_ID_FLAGS(func, __scx_bpf_select_cpu_and, KF_IMPLICIT_ARGS | KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_select_cpu_and, KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_IMPLICIT_ARGS | KF_RCU) +BTF_KFUNCS_END(scx_kfunc_ids_select_cpu) + +static const struct btf_kfunc_id_set scx_kfunc_set_select_cpu = { + .owner = THIS_MODULE, + .set = &scx_kfunc_ids_select_cpu, + .filter = scx_kfunc_context_filter, }; int scx_idle_init(void) @@ -1428,7 +1514,9 @@ int scx_idle_init(void) ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_idle) || register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &scx_kfunc_set_idle) || - register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &scx_kfunc_set_idle); + register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &scx_kfunc_set_idle) || + register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_select_cpu) || + register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &scx_kfunc_set_select_cpu); return ret; } |
