From ec2444530612a886b406e2830d7f314d1a07d4bb Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 19 Jan 2022 14:39:39 -0800 Subject: psi: Fix "no previous prototype" warnings when CONFIG_CGROUPS=n When CONFIG_CGROUPS is disabled psi code generates the following warnings: kernel/sched/psi.c:1112:21: warning: no previous prototype for 'psi_trigger_create' [-Wmissing-prototypes] 1112 | struct psi_trigger *psi_trigger_create(struct psi_group *group, | ^~~~~~~~~~~~~~~~~~ kernel/sched/psi.c:1182:6: warning: no previous prototype for 'psi_trigger_destroy' [-Wmissing-prototypes] 1182 | void psi_trigger_destroy(struct psi_trigger *t) | ^~~~~~~~~~~~~~~~~~~ kernel/sched/psi.c:1249:10: warning: no previous prototype for 'psi_trigger_poll' [-Wmissing-prototypes] 1249 | __poll_t psi_trigger_poll(void **trigger_ptr, | ^~~~~~~~~~~~~~~~ Change declarations of these functions in the header to provide the prototypes even when they are unused. Fixes: 0e94682b73bf ("psi: introduce psi monitor") Reported-by: kernel test robot Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220119223940.787748-2-surenb@google.com --- include/linux/psi.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/psi.h b/include/linux/psi.h index a70ca833c6d7..827970278d62 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -25,18 +25,17 @@ void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); - -#ifdef CONFIG_CGROUPS -int psi_cgroup_alloc(struct cgroup *cgrp); -void psi_cgroup_free(struct cgroup *cgrp); -void cgroup_move_task(struct task_struct *p, struct css_set *to); - struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); + +#ifdef CONFIG_CGROUPS +int psi_cgroup_alloc(struct cgroup *cgrp); +void psi_cgroup_free(struct cgroup *cgrp); +void cgroup_move_task(struct task_struct *p, struct css_set *to); #endif #else /* CONFIG_PSI */ -- cgit v1.2.3 From c8eaf6ac76f40f6c59fc7d056e2e08c4a57ea9c7 Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Fri, 28 Jan 2022 17:50:25 +0800 Subject: sched: move autogroup sysctls into its own file move autogroup sysctls to autogroup.c and use the new register_sysctl_init() to register the sysctl interface. Signed-off-by: Zhen Ni Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220128095025.8745-1-nizhen@uniontech.com --- include/linux/sched/sysctl.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index c19dd5a2c05c..3f2b70f8d32c 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -45,10 +45,6 @@ extern unsigned int sysctl_sched_uclamp_util_min_rt_default; extern unsigned int sysctl_sched_cfs_bandwidth_slice; #endif -#ifdef CONFIG_SCHED_AUTOGROUP -extern unsigned int sysctl_sched_autogroup_enabled; -#endif - extern int sysctl_sched_rr_timeslice; extern int sched_rr_timeslice; -- cgit v1.2.3 From e496132ebedd870b67f1f6d2428f9bb9d7ae27fd Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 8 Feb 2022 09:43:34 +0000 Subject: sched/fair: Adjust the allowed NUMA imbalance when SD_NUMA spans multiple LLCs Commit 7d2b5dd0bcc4 ("sched/numa: Allow a floating imbalance between NUMA nodes") allowed an imbalance between NUMA nodes such that communicating tasks would not be pulled apart by the load balancer. This works fine when there is a 1:1 relationship between LLC and node but can be suboptimal for multiple LLCs if independent tasks prematurely use CPUs sharing cache. Zen* has multiple LLCs per node with local memory channels and due to the allowed imbalance, it's far harder to tune some workloads to run optimally than it is on hardware that has 1 LLC per node. This patch allows an imbalance to exist up to the point where LLCs should be balanced between nodes. On a Zen3 machine running STREAM parallelised with OMP to have on instance per LLC the results and without binding, the results are 5.17.0-rc0 5.17.0-rc0 vanilla sched-numaimb-v6 MB/sec copy-16 162596.94 ( 0.00%) 580559.74 ( 257.05%) MB/sec scale-16 136901.28 ( 0.00%) 374450.52 ( 173.52%) MB/sec add-16 157300.70 ( 0.00%) 564113.76 ( 258.62%) MB/sec triad-16 151446.88 ( 0.00%) 564304.24 ( 272.61%) STREAM can use directives to force the spread if the OpenMP is new enough but that doesn't help if an application uses threads and it's not known in advance how many threads will be created. Coremark is a CPU and cache intensive benchmark parallelised with threads. When running with 1 thread per core, the vanilla kernel allows threads to contend on cache. With the patch; 5.17.0-rc0 5.17.0-rc0 vanilla sched-numaimb-v5 Min Score-16 368239.36 ( 0.00%) 389816.06 ( 5.86%) Hmean Score-16 388607.33 ( 0.00%) 427877.08 * 10.11%* Max Score-16 408945.69 ( 0.00%) 481022.17 ( 17.62%) Stddev Score-16 15247.04 ( 0.00%) 24966.82 ( -63.75%) CoeffVar Score-16 3.92 ( 0.00%) 5.82 ( -48.48%) It can also make a big difference for semi-realistic workloads like specjbb which can execute arbitrary numbers of threads without advance knowledge of how they should be placed. Even in cases where the average performance is neutral, the results are more stable. 5.17.0-rc0 5.17.0-rc0 vanilla sched-numaimb-v6 Hmean tput-1 71631.55 ( 0.00%) 73065.57 ( 2.00%) Hmean tput-8 582758.78 ( 0.00%) 556777.23 ( -4.46%) Hmean tput-16 1020372.75 ( 0.00%) 1009995.26 ( -1.02%) Hmean tput-24 1416430.67 ( 0.00%) 1398700.11 ( -1.25%) Hmean tput-32 1687702.72 ( 0.00%) 1671357.04 ( -0.97%) Hmean tput-40 1798094.90 ( 0.00%) 2015616.46 * 12.10%* Hmean tput-48 1972731.77 ( 0.00%) 2333233.72 ( 18.27%) Hmean tput-56 2386872.38 ( 0.00%) 2759483.38 ( 15.61%) Hmean tput-64 2909475.33 ( 0.00%) 2925074.69 ( 0.54%) Hmean tput-72 2585071.36 ( 0.00%) 2962443.97 ( 14.60%) Hmean tput-80 2994387.24 ( 0.00%) 3015980.59 ( 0.72%) Hmean tput-88 3061408.57 ( 0.00%) 3010296.16 ( -1.67%) Hmean tput-96 3052394.82 ( 0.00%) 2784743.41 ( -8.77%) Hmean tput-104 2997814.76 ( 0.00%) 2758184.50 ( -7.99%) Hmean tput-112 2955353.29 ( 0.00%) 2859705.09 ( -3.24%) Hmean tput-120 2889770.71 ( 0.00%) 2764478.46 ( -4.34%) Hmean tput-128 2871713.84 ( 0.00%) 2750136.73 ( -4.23%) Stddev tput-1 5325.93 ( 0.00%) 2002.53 ( 62.40%) Stddev tput-8 6630.54 ( 0.00%) 10905.00 ( -64.47%) Stddev tput-16 25608.58 ( 0.00%) 6851.16 ( 73.25%) Stddev tput-24 12117.69 ( 0.00%) 4227.79 ( 65.11%) Stddev tput-32 27577.16 ( 0.00%) 8761.05 ( 68.23%) Stddev tput-40 59505.86 ( 0.00%) 2048.49 ( 96.56%) Stddev tput-48 168330.30 ( 0.00%) 93058.08 ( 44.72%) Stddev tput-56 219540.39 ( 0.00%) 30687.02 ( 86.02%) Stddev tput-64 121750.35 ( 0.00%) 9617.36 ( 92.10%) Stddev tput-72 223387.05 ( 0.00%) 34081.13 ( 84.74%) Stddev tput-80 128198.46 ( 0.00%) 22565.19 ( 82.40%) Stddev tput-88 136665.36 ( 0.00%) 27905.97 ( 79.58%) Stddev tput-96 111925.81 ( 0.00%) 99615.79 ( 11.00%) Stddev tput-104 146455.96 ( 0.00%) 28861.98 ( 80.29%) Stddev tput-112 88740.49 ( 0.00%) 58288.23 ( 34.32%) Stddev tput-120 186384.86 ( 0.00%) 45812.03 ( 75.42%) Stddev tput-128 78761.09 ( 0.00%) 57418.48 ( 27.10%) Similarly, for embarassingly parallel problems like NPB-ep, there are improvements due to better spreading across LLC when the machine is not fully utilised. vanilla sched-numaimb-v6 Min ep.D 31.79 ( 0.00%) 26.11 ( 17.87%) Amean ep.D 31.86 ( 0.00%) 26.17 * 17.86%* Stddev ep.D 0.07 ( 0.00%) 0.05 ( 24.41%) CoeffVar ep.D 0.22 ( 0.00%) 0.20 ( 7.97%) Max ep.D 31.93 ( 0.00%) 26.21 ( 17.91%) Signed-off-by: Mel Gorman Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Gautham R. Shenoy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20220208094334.16379-3-mgorman@techsingularity.net --- include/linux/sched/topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 8054641c0a7b..56cffe42abbc 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -93,6 +93,7 @@ struct sched_domain { unsigned int busy_factor; /* less balancing by factor if busy */ unsigned int imbalance_pct; /* No balance until over watermark */ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ + unsigned int imb_numa_nr; /* Nr running tasks that allows a NUMA imbalance */ int nohz_idle; /* NOHZ IDLE status */ int flags; /* See SD_* */ -- cgit v1.2.3 From e6df4ead85d9da1b07dd40bd4c6d2182f3e210c4 Mon Sep 17 00:00:00 2001 From: Zhaoyang Huang Date: Tue, 25 Jan 2022 14:56:58 +0800 Subject: psi: fix possible trigger missing in the window When a new threshold breaching stall happens after a psi event was generated and within the window duration, the new event is not generated because the events are rate-limited to one per window. If after that no new stall is recorded then the event will not be generated even after rate-limiting duration has passed. This is happening because with no new stall, window_update will not be called even though threshold was previously breached. To fix this, record threshold breaching occurrence and generate the event once window duration is passed. Suggested-by: Suren Baghdasaryan Signed-off-by: Zhaoyang Huang Signed-off-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Acked-by: Suren Baghdasaryan Link: https://lore.kernel.org/r/1643093818-19835-1-git-send-email-huangzhaoyang@gmail.com --- include/linux/psi_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 516c0fe836fd..dc3ec5e4b9ee 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -144,6 +144,9 @@ struct psi_trigger { /* Refcounting to prevent premature destruction */ struct kref refcount; + + /* Deferred event(s) from previous ratelimit window */ + bool pending_event; }; struct psi_group { -- cgit v1.2.3 From 04d4e665a60902cf36e7ad39af1179cb5df542ad Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 7 Feb 2022 16:59:06 +0100 Subject: sched/isolation: Use single feature type while referring to housekeeping cpumask Refer to housekeeping APIs using single feature types instead of flags. This prevents from passing multiple isolation features at once to housekeeping interfaces, which soon won't be possible anymore as each isolation features will have their own cpumask. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Juri Lelli Reviewed-by: Phil Auld Link: https://lore.kernel.org/r/20220207155910.527133-5-frederic@kernel.org --- include/linux/sched/isolation.h | 43 +++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index cc9f393e2a70..8c15abd67aed 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -5,54 +5,55 @@ #include #include -enum hk_flags { - HK_FLAG_TIMER = 1, - HK_FLAG_RCU = (1 << 1), - HK_FLAG_MISC = (1 << 2), - HK_FLAG_SCHED = (1 << 3), - HK_FLAG_TICK = (1 << 4), - HK_FLAG_DOMAIN = (1 << 5), - HK_FLAG_WQ = (1 << 6), - HK_FLAG_MANAGED_IRQ = (1 << 7), - HK_FLAG_KTHREAD = (1 << 8), +enum hk_type { + HK_TYPE_TIMER, + HK_TYPE_RCU, + HK_TYPE_MISC, + HK_TYPE_SCHED, + HK_TYPE_TICK, + HK_TYPE_DOMAIN, + HK_TYPE_WQ, + HK_TYPE_MANAGED_IRQ, + HK_TYPE_KTHREAD, + HK_TYPE_MAX }; #ifdef CONFIG_CPU_ISOLATION DECLARE_STATIC_KEY_FALSE(housekeeping_overridden); -extern int housekeeping_any_cpu(enum hk_flags flags); -extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); -extern bool housekeeping_enabled(enum hk_flags flags); -extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); -extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); +extern int housekeeping_any_cpu(enum hk_type type); +extern const struct cpumask *housekeeping_cpumask(enum hk_type type); +extern bool housekeeping_enabled(enum hk_type type); +extern void housekeeping_affine(struct task_struct *t, enum hk_type type); +extern bool housekeeping_test_cpu(int cpu, enum hk_type type); extern void __init housekeeping_init(void); #else -static inline int housekeeping_any_cpu(enum hk_flags flags) +static inline int housekeeping_any_cpu(enum hk_type type) { return smp_processor_id(); } -static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags) +static inline const struct cpumask *housekeeping_cpumask(enum hk_type type) { return cpu_possible_mask; } -static inline bool housekeeping_enabled(enum hk_flags flags) +static inline bool housekeeping_enabled(enum hk_type type) { return false; } static inline void housekeeping_affine(struct task_struct *t, - enum hk_flags flags) { } + enum hk_type type) { } static inline void housekeeping_init(void) { } #endif /* CONFIG_CPU_ISOLATION */ -static inline bool housekeeping_cpu(int cpu, enum hk_flags flags) +static inline bool housekeeping_cpu(int cpu, enum hk_type type) { #ifdef CONFIG_CPU_ISOLATION if (static_branch_unlikely(&housekeeping_overridden)) - return housekeeping_test_cpu(cpu, flags); + return housekeeping_test_cpu(cpu, type); #endif return true; } -- cgit v1.2.3 From 8a69fe0be143b0a1af829f85f0e9a1ae7d6a04db Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 14 Feb 2022 16:52:11 +0000 Subject: sched/preempt: Refactor sched_dynamic_update() Currently sched_dynamic_update needs to open-code the enabled/disabled function names for each preemption model it supports, when in practice this is a boolean enabled/disabled state for each function. Make this clearer and avoid repetition by defining the enabled/disabled states at the function definition, and using helper macros to perform the static_call_update(). Where x86 currently overrides the enabled function, it is made to provide both the enabled and disabled states for consistency, with defaults provided by the core code otherwise. In subsequent patches this will allow us to support PREEMPT_DYNAMIC without static calls. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ard Biesheuvel Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20220214165216.2231574-3-mark.rutland@arm.com --- include/linux/entry-common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 2e2b8d6140ed..a01ac1a0a292 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -456,6 +456,8 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); */ void irqentry_exit_cond_resched(void); #ifdef CONFIG_PREEMPT_DYNAMIC +#define irqentry_exit_cond_resched_dynamic_enabled irqentry_exit_cond_resched +#define irqentry_exit_cond_resched_dynamic_disabled NULL DECLARE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched); #endif -- cgit v1.2.3 From 4624a14f4daa8ab4578d274555fd8847254ce339 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 14 Feb 2022 16:52:12 +0000 Subject: sched/preempt: Simplify irqentry_exit_cond_resched() callers Currently callers of irqentry_exit_cond_resched() need to be aware of whether the function should be indirected via a static call, leading to ugly ifdeffery in callers. Save them the hassle with a static inline wrapper that does the right thing. The raw_irqentry_exit_cond_resched() will also be useful in subsequent patches which will add conditional wrappers for preemption functions. Note: in arch/x86/entry/common.c, xen_pv_evtchn_do_upcall() always calls irqentry_exit_cond_resched() directly, even when PREEMPT_DYNAMIC is in use. I believe this is a latent bug (which this patch corrects), but I'm not entirely certain this wasn't deliberate. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ard Biesheuvel Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20220214165216.2231574-4-mark.rutland@arm.com --- include/linux/entry-common.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index a01ac1a0a292..dfd84c59b144 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -454,11 +454,14 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); * * Conditional reschedule with additional sanity checks. */ -void irqentry_exit_cond_resched(void); +void raw_irqentry_exit_cond_resched(void); #ifdef CONFIG_PREEMPT_DYNAMIC -#define irqentry_exit_cond_resched_dynamic_enabled irqentry_exit_cond_resched +#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched #define irqentry_exit_cond_resched_dynamic_disabled NULL -DECLARE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched); +DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); +#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() +#else +#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() #endif /** -- cgit v1.2.3 From 99cf983cc8bca4adb461b519664c939a565cfd4d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 14 Feb 2022 16:52:14 +0000 Subject: sched/preempt: Add PREEMPT_DYNAMIC using static keys Where an architecture selects HAVE_STATIC_CALL but not HAVE_STATIC_CALL_INLINE, each static call has an out-of-line trampoline which will either branch to a callee or return to the caller. On such architectures, a number of constraints can conspire to make those trampolines more complicated and potentially less useful than we'd like. For example: * Hardware and software control flow integrity schemes can require the addition of "landing pad" instructions (e.g. `BTI` for arm64), which will also be present at the "real" callee. * Limited branch ranges can require that trampolines generate or load an address into a register and perform an indirect branch (or at least have a slow path that does so). This loses some of the benefits of having a direct branch. * Interaction with SW CFI schemes can be complicated and fragile, e.g. requiring that we can recognise idiomatic codegen and remove indirections understand, at least until clang proves more helpful mechanisms for dealing with this. For PREEMPT_DYNAMIC, we don't need the full power of static calls, as we really only need to enable/disable specific preemption functions. We can achieve the same effect without a number of the pain points above by using static keys to fold early returns into the preemption functions themselves rather than in an out-of-line trampoline, effectively inlining the trampoline into the start of the function. For arm64, this results in good code generation. For example, the dynamic_cond_resched() wrapper looks as follows when enabled. When disabled, the first `B` is replaced with a `NOP`, resulting in an early return. | : | bti c | b // or `nop` | mov w0, #0x0 | ret | mrs x0, sp_el0 | ldr x0, [x0, #8] | cbnz x0, | paciasp | stp x29, x30, [sp, #-16]! | mov x29, sp | bl | mov w0, #0x1 | ldp x29, x30, [sp], #16 | autiasp | ret ... compared to the regular form of the function: | <__cond_resched>: | bti c | mrs x0, sp_el0 | ldr x1, [x0, #8] | cbz x1, <__cond_resched+0x18> | mov w0, #0x0 | ret | paciasp | stp x29, x30, [sp, #-16]! | mov x29, sp | bl | mov w0, #0x1 | ldp x29, x30, [sp], #16 | autiasp | ret Any architecture which implements static keys should be able to use this to implement PREEMPT_DYNAMIC with similar cost to non-inlined static calls. Since this is likely to have greater overhead than (inlined) static calls, PREEMPT_DYNAMIC is only defaulted to enabled when HAVE_PREEMPT_DYNAMIC_CALL is selected. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ard Biesheuvel Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20220214165216.2231574-6-mark.rutland@arm.com --- include/linux/entry-common.h | 10 ++++++++-- include/linux/kernel.h | 7 ++++++- include/linux/sched.h | 10 +++++++++- 3 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index dfd84c59b144..141952f4fee8 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -456,13 +456,19 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); */ void raw_irqentry_exit_cond_resched(void); #ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) #define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched #define irqentry_exit_cond_resched_dynamic_disabled NULL DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); #define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() -#else -#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() +#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +void dynamic_irqentry_exit_cond_resched(void); +#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() #endif +#else /* CONFIG_PREEMPT_DYNAMIC */ +#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() +#endif /* CONFIG_PREEMPT_DYNAMIC */ /** * irqentry_exit - Handle return from exception that used irqentry_enter() diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 33f47a996513..a890428bcc1a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -99,7 +99,7 @@ struct user; extern int __cond_resched(void); # define might_resched() __cond_resched() -#elif defined(CONFIG_PREEMPT_DYNAMIC) +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) extern int __cond_resched(void); @@ -110,6 +110,11 @@ static __always_inline void might_resched(void) static_call_mod(might_resched)(); } +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) + +extern int dynamic_might_resched(void); +# define might_resched() dynamic_might_resched() + #else # define might_resched() do { } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 508b91d57470..de03ddeb064b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2020,7 +2020,7 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) extern int __cond_resched(void); -#ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) DECLARE_STATIC_CALL(cond_resched, __cond_resched); @@ -2029,6 +2029,14 @@ static __always_inline int _cond_resched(void) return static_call_mod(cond_resched)(); } +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +extern int dynamic_cond_resched(void); + +static __always_inline int _cond_resched(void) +{ + return dynamic_cond_resched(); +} + #else static inline int _cond_resched(void) -- cgit v1.2.3 From d0b9d6dcaa5ac480c272683919f387cc6d82b638 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Sep 2021 20:42:44 +0200 Subject: sched/headers: Fix header to build standalone: Uses various kernel types that don't build standalone. Signed-off-by: Ingo Molnar Reviewed-by: Peter Zijlstra --- include/linux/sched_clock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index 835ee87ed792..cb41c5edb4d4 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -5,6 +5,8 @@ #ifndef LINUX_SCHED_CLOCK #define LINUX_SCHED_CLOCK +#include + #ifdef CONFIG_GENERIC_SCHED_CLOCK /** * struct clock_read_data - data required to read from sched_clock() -- cgit v1.2.3 From 669f45f19cf7feea7066dce86a0ce89dfc8a2065 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Feb 2022 13:23:59 +0100 Subject: sched/headers: Add initial new headers as identity mappings This allows code sharing between fast-headers tree and the vanilla scheduler tree. Signed-off-by: Ingo Molnar Reviewed-by: Peter Zijlstra --- include/linux/cgroup_api.h | 1 + include/linux/cpumask_api.h | 1 + include/linux/fs_api.h | 1 + include/linux/gfp_api.h | 1 + include/linux/hashtable_api.h | 1 + include/linux/hrtimer_api.h | 1 + include/linux/kobject_api.h | 1 + include/linux/kref_api.h | 1 + include/linux/ktime_api.h | 1 + include/linux/llist_api.h | 1 + include/linux/lockdep_api.h | 1 + include/linux/mm_api.h | 1 + include/linux/mutex_api.h | 1 + include/linux/perf_event_api.h | 1 + include/linux/pgtable_api.h | 1 + include/linux/ptrace_api.h | 1 + include/linux/rcuwait_api.h | 1 + include/linux/refcount_api.h | 1 + include/linux/sched/affinity.h | 1 + include/linux/sched/cond_resched.h | 1 + include/linux/sched/posix-timers.h | 1 + include/linux/sched/rseq_api.h | 1 + include/linux/sched/task_flags.h | 1 + include/linux/sched/thread_info_api.h | 1 + include/linux/seqlock_api.h | 1 + include/linux/softirq.h | 1 + include/linux/spinlock_api.h | 1 + include/linux/swait_api.h | 1 + include/linux/syscalls_api.h | 1 + include/linux/u64_stats_sync_api.h | 1 + include/linux/wait_api.h | 1 + include/linux/workqueue_api.h | 1 + 32 files changed, 32 insertions(+) create mode 100644 include/linux/cgroup_api.h create mode 100644 include/linux/cpumask_api.h create mode 100644 include/linux/fs_api.h create mode 100644 include/linux/gfp_api.h create mode 100644 include/linux/hashtable_api.h create mode 100644 include/linux/hrtimer_api.h create mode 100644 include/linux/kobject_api.h create mode 100644 include/linux/kref_api.h create mode 100644 include/linux/ktime_api.h create mode 100644 include/linux/llist_api.h create mode 100644 include/linux/lockdep_api.h create mode 100644 include/linux/mm_api.h create mode 100644 include/linux/mutex_api.h create mode 100644 include/linux/perf_event_api.h create mode 100644 include/linux/pgtable_api.h create mode 100644 include/linux/ptrace_api.h create mode 100644 include/linux/rcuwait_api.h create mode 100644 include/linux/refcount_api.h create mode 100644 include/linux/sched/affinity.h create mode 100644 include/linux/sched/cond_resched.h create mode 100644 include/linux/sched/posix-timers.h create mode 100644 include/linux/sched/rseq_api.h create mode 100644 include/linux/sched/task_flags.h create mode 100644 include/linux/sched/thread_info_api.h create mode 100644 include/linux/seqlock_api.h create mode 100644 include/linux/softirq.h create mode 100644 include/linux/spinlock_api.h create mode 100644 include/linux/swait_api.h create mode 100644 include/linux/syscalls_api.h create mode 100644 include/linux/u64_stats_sync_api.h create mode 100644 include/linux/wait_api.h create mode 100644 include/linux/workqueue_api.h (limited to 'include/linux') diff --git a/include/linux/cgroup_api.h b/include/linux/cgroup_api.h new file mode 100644 index 000000000000..d0cfe8025111 --- /dev/null +++ b/include/linux/cgroup_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/cpumask_api.h b/include/linux/cpumask_api.h new file mode 100644 index 000000000000..83bd3ebe82b0 --- /dev/null +++ b/include/linux/cpumask_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/fs_api.h b/include/linux/fs_api.h new file mode 100644 index 000000000000..83be38d6d413 --- /dev/null +++ b/include/linux/fs_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/gfp_api.h b/include/linux/gfp_api.h new file mode 100644 index 000000000000..5a05a2764a86 --- /dev/null +++ b/include/linux/gfp_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/hashtable_api.h b/include/linux/hashtable_api.h new file mode 100644 index 000000000000..c268ac2c5c0e --- /dev/null +++ b/include/linux/hashtable_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/hrtimer_api.h b/include/linux/hrtimer_api.h new file mode 100644 index 000000000000..8d9700894468 --- /dev/null +++ b/include/linux/hrtimer_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/kobject_api.h b/include/linux/kobject_api.h new file mode 100644 index 000000000000..6e36a054c2d6 --- /dev/null +++ b/include/linux/kobject_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/kref_api.h b/include/linux/kref_api.h new file mode 100644 index 000000000000..d67e554721d2 --- /dev/null +++ b/include/linux/kref_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/ktime_api.h b/include/linux/ktime_api.h new file mode 100644 index 000000000000..f697d493960f --- /dev/null +++ b/include/linux/ktime_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/llist_api.h b/include/linux/llist_api.h new file mode 100644 index 000000000000..625bec0393a1 --- /dev/null +++ b/include/linux/llist_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/lockdep_api.h b/include/linux/lockdep_api.h new file mode 100644 index 000000000000..907e66979ab2 --- /dev/null +++ b/include/linux/lockdep_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/mm_api.h b/include/linux/mm_api.h new file mode 100644 index 000000000000..a5ace2b198b8 --- /dev/null +++ b/include/linux/mm_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/mutex_api.h b/include/linux/mutex_api.h new file mode 100644 index 000000000000..85ab9491e13e --- /dev/null +++ b/include/linux/mutex_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/perf_event_api.h b/include/linux/perf_event_api.h new file mode 100644 index 000000000000..c2fd6048b790 --- /dev/null +++ b/include/linux/perf_event_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/pgtable_api.h b/include/linux/pgtable_api.h new file mode 100644 index 000000000000..ff367a4ba8c4 --- /dev/null +++ b/include/linux/pgtable_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/ptrace_api.h b/include/linux/ptrace_api.h new file mode 100644 index 000000000000..26e7d275ad8d --- /dev/null +++ b/include/linux/ptrace_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/rcuwait_api.h b/include/linux/rcuwait_api.h new file mode 100644 index 000000000000..f962e28544dd --- /dev/null +++ b/include/linux/rcuwait_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/refcount_api.h b/include/linux/refcount_api.h new file mode 100644 index 000000000000..5f032589f568 --- /dev/null +++ b/include/linux/refcount_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/affinity.h b/include/linux/sched/affinity.h new file mode 100644 index 000000000000..227f5be81bcd --- /dev/null +++ b/include/linux/sched/affinity.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/cond_resched.h b/include/linux/sched/cond_resched.h new file mode 100644 index 000000000000..227f5be81bcd --- /dev/null +++ b/include/linux/sched/cond_resched.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/posix-timers.h b/include/linux/sched/posix-timers.h new file mode 100644 index 000000000000..523a381d6c88 --- /dev/null +++ b/include/linux/sched/posix-timers.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/rseq_api.h b/include/linux/sched/rseq_api.h new file mode 100644 index 000000000000..cf2af72693e1 --- /dev/null +++ b/include/linux/sched/rseq_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/task_flags.h b/include/linux/sched/task_flags.h new file mode 100644 index 000000000000..227f5be81bcd --- /dev/null +++ b/include/linux/sched/task_flags.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/sched/thread_info_api.h b/include/linux/sched/thread_info_api.h new file mode 100644 index 000000000000..2c60fbc16c08 --- /dev/null +++ b/include/linux/sched/thread_info_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/seqlock_api.h b/include/linux/seqlock_api.h new file mode 100644 index 000000000000..be91e7d3b826 --- /dev/null +++ b/include/linux/seqlock_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/softirq.h b/include/linux/softirq.h new file mode 100644 index 000000000000..c73d7dcb4cb5 --- /dev/null +++ b/include/linux/softirq.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/spinlock_api.h b/include/linux/spinlock_api.h new file mode 100644 index 000000000000..6338b27f98df --- /dev/null +++ b/include/linux/spinlock_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/swait_api.h b/include/linux/swait_api.h new file mode 100644 index 000000000000..1eeaaaaa5ea7 --- /dev/null +++ b/include/linux/swait_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/syscalls_api.h b/include/linux/syscalls_api.h new file mode 100644 index 000000000000..23e012b04db4 --- /dev/null +++ b/include/linux/syscalls_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/u64_stats_sync_api.h b/include/linux/u64_stats_sync_api.h new file mode 100644 index 000000000000..c72ca63da44b --- /dev/null +++ b/include/linux/u64_stats_sync_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/wait_api.h b/include/linux/wait_api.h new file mode 100644 index 000000000000..4e930548935a --- /dev/null +++ b/include/linux/wait_api.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/workqueue_api.h b/include/linux/workqueue_api.h new file mode 100644 index 000000000000..77debb5d2760 --- /dev/null +++ b/include/linux/workqueue_api.h @@ -0,0 +1 @@ +#include -- cgit v1.2.3 From fbed5664b73830dcb1a19af4f7f1f1b424f54609 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Feb 2022 13:28:20 +0100 Subject: sched/headers: Make the header build standalone This header depends on various scheduler definitions. Signed-off-by: Ingo Molnar Reviewed-by: Peter Zijlstra --- include/linux/sched/deadline.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 1aff00b65f3c..7c83d4d5a971 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -6,6 +6,8 @@ * NORMAL/BATCH tasks. */ +#include + #define MAX_DL_PRIO 0 static inline int dl_prio(int prio) -- cgit v1.2.3 From dc6e0818bc9a0336d9accf3ea35d146d72aa7a18 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sun, 20 Feb 2022 13:14:25 +0800 Subject: sched/cpuacct: Optimize away RCU read lock Since cpuacct_charge() is called from the scheduler update_curr(), we must already have rq lock held, then the RCU read lock can be optimized away. And do the same thing in it's wrapper cgroup_account_cputime(), but we can't use lockdep_assert_rq_held() there, which defined in kernel/sched/sched.h. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220220051426.5274-2-zhouchengming@bytedance.com --- include/linux/cgroup.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 75c151413fda..9a109c6ac0e0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -791,11 +791,9 @@ static inline void cgroup_account_cputime(struct task_struct *task, cpuacct_charge(task, delta_exec); - rcu_read_lock(); cgrp = task_dfl_cgroup(task); if (cgroup_parent(cgrp)) __cgroup_account_cputime(cgrp, delta_exec); - rcu_read_unlock(); } static inline void cgroup_account_cputime_field(struct task_struct *task, -- cgit v1.2.3 From 3eba0505d03a9c1eb30d40c2330c0880b22d1b3a Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sun, 20 Feb 2022 13:14:26 +0800 Subject: sched/cpuacct: Remove redundant RCU read lock The cpuacct_account_field() and it's cgroup v2 wrapper cgroup_account_cputime_field() is only called from cputime in task_group_account_field(), which is already in RCU read-side critical section. So remove these redundant RCU read lock. Suggested-by: Tejun Heo Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220220051426.5274-3-zhouchengming@bytedance.com --- include/linux/cgroup.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9a109c6ac0e0..1e356c222756 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -804,11 +804,9 @@ static inline void cgroup_account_cputime_field(struct task_struct *task, cpuacct_account_field(task, index, delta_exec); - rcu_read_lock(); cgrp = task_dfl_cgroup(task); if (cgroup_parent(cgrp)) __cgroup_account_cputime_field(cgrp, index, delta_exec); - rcu_read_unlock(); } #else /* CONFIG_CGROUPS */ -- cgit v1.2.3 From fa2c3254d7cfff5f7a916ab928a562d1165f17bb Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 20 Jan 2022 16:25:19 +0000 Subject: sched/tracing: Don't re-read p->state when emitting sched_switch event As of commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") the following sequence becomes possible: p->__state = TASK_INTERRUPTIBLE; __schedule() deactivate_task(p); ttwu() READ !p->on_rq p->__state=TASK_WAKING trace_sched_switch() __trace_sched_switch_state() task_state_index() return 0; TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in the trace event. Prevent this by pushing the value read from __schedule() down the trace event. Reported-by: Abhijeet Dharmapurikar Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20220120162520.570782-2-valentin.schneider@arm.com --- include/linux/sched.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f00132e7ef6e..457c8a058b77 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1620,10 +1620,10 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) -static inline unsigned int task_state_index(struct task_struct *tsk) +static inline unsigned int __task_state_index(unsigned int tsk_state, + unsigned int tsk_exit_state) { - unsigned int tsk_state = READ_ONCE(tsk->__state); - unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; + unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT; BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); @@ -1633,6 +1633,11 @@ static inline unsigned int task_state_index(struct task_struct *tsk) return fls(state); } +static inline unsigned int task_state_index(struct task_struct *tsk) +{ + return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state); +} + static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; -- cgit v1.2.3 From 25795ef6299f07ce3838f3253a9cb34f64efcfae Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 20 Jan 2022 16:25:20 +0000 Subject: sched/tracing: Report TASK_RTLOCK_WAIT tasks as TASK_UNINTERRUPTIBLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TASK_RTLOCK_WAIT currently isn't part of TASK_REPORT, thus a task blocking on an rtlock will appear as having a task state == 0, IOW TASK_RUNNING. The actual state is saved in p->saved_state, but reading it after reading p->__state has a few issues: o that could still be TASK_RUNNING in the case of e.g. rt_spin_lock o ttwu_state_match() might have changed that to TASK_RUNNING As pointed out by Eric, adding TASK_RTLOCK_WAIT to TASK_REPORT implies exposing a new state to userspace tools which way not know what to do with them. The only information that needs to be conveyed here is that a task is waiting on an rt_mutex, which matches TASK_UNINTERRUPTIBLE - there's no need for a new state. Reported-by: Uwe Kleine-König Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20220120162520.570782-3-valentin.schneider@arm.com --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 457c8a058b77..78b606c9ab38 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1630,6 +1630,14 @@ static inline unsigned int __task_state_index(unsigned int tsk_state, if (tsk_state == TASK_IDLE) state = TASK_REPORT_IDLE; + /* + * We're lying here, but rather than expose a completely new task state + * to userspace, we can make this appear as if the task has gone through + * a regular rt_mutex_lock() call. + */ + if (tsk_state == TASK_RTLOCK_WAIT) + state = TASK_UNINTERRUPTIBLE; + return fls(state); } -- cgit v1.2.3 From f2aa197e4794bf4c2c0c9570684f86e6fa103e8b Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sat, 5 Mar 2022 11:41:03 +0800 Subject: cgroup: Fix suspicious rcu_dereference_check() usage warning task_css_set_check() will use rcu_dereference_check() to check for rcu_read_lock_held() on the read-side, which is not true after commit dc6e0818bc9a ("sched/cpuacct: Optimize away RCU read lock"). This commit drop explicit rcu_read_lock(), change to RCU-sched read-side critical section. So fix the RCU warning by adding check for rcu_read_lock_sched_held(). Fixes: dc6e0818bc9a ("sched/cpuacct: Optimize away RCU read lock") Reported-by: Linux Kernel Functional Testing Reported-by: syzbot+16e3f2c77e7c5a0113f9@syzkaller.appspotmail.com Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Tested-by: Zhouyi Zhou Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20220305034103.57123-1-zhouchengming@bytedance.com --- include/linux/cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1e356c222756..0d1ada8968d7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -450,6 +450,7 @@ extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ + rcu_read_lock_sched_held() || \ lockdep_is_held(&cgroup_mutex) || \ lockdep_is_held(&css_set_lock) || \ ((task)->flags & PF_EXITING) || (__c)) -- cgit v1.2.3 From 3387ce4d8a5f2956fab827edf499fe6780e83faa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Mar 2022 11:05:50 +0100 Subject: headers/prep: Fix header to build standalone: Add the dependency to , because cgroup_move_task() will dereference 'struct css_set'. ( Only older toolchains are affected, due to variations in the implementation of rcu_assign_pointer() et al. ) Cc: Peter Zijlstra Cc: Linus Torvalds Reported-by: Sachin Sant Reported-by: Andrew Morton Reported-by: Borislav Petkov Signed-off-by: Ingo Molnar --- include/linux/psi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/psi.h b/include/linux/psi.h index 7f7d1d88c3bb..89784763d19e 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -6,6 +6,7 @@ #include #include #include +#include struct seq_file; struct css_set; -- cgit v1.2.3