diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2021-02-22 21:35:15 -0800 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2021-02-22 21:35:15 -0800 |
| commit | cbecf716ca618fd44feda6bd9a64a8179d031fc5 (patch) | |
| tree | 186c9f69f0d11f773253c440dac85087f67288b7 /include/linux/sched | |
| parent | 6524d8eac258452e547f8a49c8a965ac6dd8a161 (diff) | |
| parent | 4c47097f8514e4b35a31e04e33172d0193cb38ed (diff) | |
Merge branch 'next' into for-linus
Prepare input updates for 5.12 merge window.
Diffstat (limited to 'include/linux/sched')
| -rw-r--r-- | include/linux/sched/coredump.h | 1 | ||||
| -rw-r--r-- | include/linux/sched/cpufreq.h | 5 | ||||
| -rw-r--r-- | include/linux/sched/debug.h | 2 | ||||
| -rw-r--r-- | include/linux/sched/hotplug.h | 2 | ||||
| -rw-r--r-- | include/linux/sched/jobctl.h | 4 | ||||
| -rw-r--r-- | include/linux/sched/mm.h | 88 | ||||
| -rw-r--r-- | include/linux/sched/sd_flags.h | 156 | ||||
| -rw-r--r-- | include/linux/sched/signal.h | 29 | ||||
| -rw-r--r-- | include/linux/sched/task.h | 4 | ||||
| -rw-r--r-- | include/linux/sched/topology.h | 45 |
10 files changed, 262 insertions, 74 deletions
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index ecdc6542070f..dfd82eab2902 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -72,6 +72,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ +#define MMF_MULTIPROCESS 27 /* mm is shared between processes */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index 3ed5aa18593f..6205578ab6ee 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -28,6 +28,11 @@ static inline unsigned long map_util_freq(unsigned long util, { return (freq + (freq >> 2)) * util / cap; } + +static inline unsigned long map_util_perf(unsigned long util) +{ + return util + (util >> 2); +} #endif /* CONFIG_CPU_FREQ */ #endif /* _LINUX_SCHED_CPUFREQ_H */ diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index 00c45a0e6abe..ae51f4529fc9 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -43,7 +43,7 @@ extern void proc_sched_set_task(struct task_struct *p); #endif /* Attach to any functions which should be ignored in wchan output. */ -#define __sched __attribute__((__section__(".sched.text"))) +#define __sched __section(".sched.text") /* Linker adds these: start and end of __sched functions */ extern char __sched_text_start[], __sched_text_end[]; diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h index 9a62ffdd296f..412cdaba33eb 100644 --- a/include/linux/sched/hotplug.h +++ b/include/linux/sched/hotplug.h @@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu); extern int sched_cpu_deactivate(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU +extern int sched_cpu_wait_empty(unsigned int cpu); extern int sched_cpu_dying(unsigned int cpu); #else +# define sched_cpu_wait_empty NULL # define sched_cpu_dying NULL #endif diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index d2b4204ba4d3..fa067de9f1a9 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -19,7 +19,6 @@ struct task_struct; #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ -#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -29,10 +28,9 @@ struct task_struct; #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) -#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); extern void task_clear_jobctl_trapping(struct task_struct *task); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index f889e332912f..1ae08b8462a4 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -49,31 +49,6 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } -/* - * This has to be called after a get_task_mm()/mmget_not_zero() - * followed by taking the mmap_lock for writing before modifying the - * vmas or anything the coredump pretends not to change from under it. - * - * It also has to be called when mmgrab() is used in the context of - * the process, but then the mm_count refcount is transferred outside - * the context of the process to run down_write() on that pinned mm. - * - * NOTE: find_extend_vma() called from GUP context is the only place - * that can modify the "mm" (notably the vm_start/end) under mmap_lock - * for reading and outside the context of the process, so it is also - * the only case that holds the mmap_lock for reading that must call - * this function. Generally if the mmap_lock is hold for reading - * there's no need of this check after get_task_mm()/mmget_not_zero(). - * - * This function can be obsoleted and the check can be removed, after - * the coredump code will hold the mmap_lock for writing before - * invoking the ->core_dump methods. - */ -static inline bool mmget_still_valid(struct mm_struct *mm) -{ - return likely(!mm->core_state); -} - /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. @@ -206,6 +181,22 @@ static inline void fs_reclaim_release(gfp_t gfp_mask) { } #endif /** + * might_alloc - Mark possible allocation sites + * @gfp_mask: gfp_t flags that would be used to allocate + * + * Similar to might_sleep() and other annotations, this can be used in functions + * that might allocate, but often don't. Compiles to nothing without + * CONFIG_LOCKDEP. Includes a conditional might_sleep() if @gfp allows blocking. + */ +static inline void might_alloc(gfp_t gfp_mask) +{ + fs_reclaim_acquire(gfp_mask); + fs_reclaim_release(gfp_mask); + + might_sleep_if(gfpflags_allow_blocking(gfp_mask)); +} + +/** * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. * * This functions marks the beginning of the GFP_NOIO allocation scope. @@ -304,39 +295,38 @@ static inline void memalloc_nocma_restore(unsigned int flags) #endif #ifdef CONFIG_MEMCG +DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); /** - * memalloc_use_memcg - Starts the remote memcg charging scope. + * set_active_memcg - Starts the remote memcg charging scope. * @memcg: memcg to charge. * * This function marks the beginning of the remote memcg charging scope. All the * __GFP_ACCOUNT allocations till the end of the scope will be charged to the * given memcg. * - * NOTE: This function is not nesting safe. + * NOTE: This function can nest. Users must save the return value and + * reset the previous value after their own charging scope is over. */ -static inline void memalloc_use_memcg(struct mem_cgroup *memcg) +static inline struct mem_cgroup * +set_active_memcg(struct mem_cgroup *memcg) { - WARN_ON_ONCE(current->active_memcg); - current->active_memcg = memcg; -} + struct mem_cgroup *old; + + if (in_interrupt()) { + old = this_cpu_read(int_active_memcg); + this_cpu_write(int_active_memcg, memcg); + } else { + old = current->active_memcg; + current->active_memcg = memcg; + } -/** - * memalloc_unuse_memcg - Ends the remote memcg charging scope. - * - * This function marks the end of the remote memcg charging scope started by - * memalloc_use_memcg(). - */ -static inline void memalloc_unuse_memcg(void) -{ - current->active_memcg = NULL; + return old; } #else -static inline void memalloc_use_memcg(struct mem_cgroup *memcg) -{ -} - -static inline void memalloc_unuse_memcg(void) +static inline struct mem_cgroup * +set_active_memcg(struct mem_cgroup *memcg) { + return NULL; } #endif @@ -348,10 +338,13 @@ enum { MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3), MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4), MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5), + MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY = (1U << 6), + MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ = (1U << 7), }; enum { MEMBARRIER_FLAG_SYNC_CORE = (1U << 0), + MEMBARRIER_FLAG_RSEQ = (1U << 1), }; #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS @@ -370,6 +363,8 @@ static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) extern void membarrier_exec_mmap(struct mm_struct *mm); +extern void membarrier_update_current_mm(struct mm_struct *next_mm); + #else #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS static inline void membarrier_arch_switch_mm(struct mm_struct *prev, @@ -384,6 +379,9 @@ static inline void membarrier_exec_mmap(struct mm_struct *mm) static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) { } +static inline void membarrier_update_current_mm(struct mm_struct *next_mm) +{ +} #endif #endif /* _LINUX_SCHED_MM_H */ diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h new file mode 100644 index 000000000000..34b21e971d77 --- /dev/null +++ b/include/linux/sched/sd_flags.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * sched-domains (multiprocessor balancing) flag declarations. + */ + +#ifndef SD_FLAG +# error "Incorrect import of SD flags definitions" +#endif + +/* + * Hierarchical metaflags + * + * SHARED_CHILD: These flags are meant to be set from the base domain upwards. + * If a domain has this flag set, all of its children should have it set. This + * is usually because the flag describes some shared resource (all CPUs in that + * domain share the same resource), or because they are tied to a scheduling + * behaviour that we want to disable at some point in the hierarchy for + * scalability reasons. + * + * In those cases it doesn't make sense to have the flag set for a domain but + * not have it in (some of) its children: sched domains ALWAYS span their child + * domains, so operations done with parent domains will cover CPUs in the lower + * child domains. + * + * + * SHARED_PARENT: These flags are meant to be set from the highest domain + * downwards. If a domain has this flag set, all of its parents should have it + * set. This is usually for topology properties that start to appear above a + * certain level (e.g. domain starts spanning CPUs outside of the base CPU's + * socket). + */ +#define SDF_SHARED_CHILD 0x1 +#define SDF_SHARED_PARENT 0x2 + +/* + * Behavioural metaflags + * + * NEEDS_GROUPS: These flags are only relevant if the domain they are set on has + * more than one group. This is usually for balancing flags (load balancing + * involves equalizing a metric between groups), or for flags describing some + * shared resource (which would be shared between groups). + */ +#define SDF_NEEDS_GROUPS 0x4 + +/* + * Balance when about to become idle + * + * SHARED_CHILD: Set from the base domain up to cpuset.sched_relax_domain_level. + * NEEDS_GROUPS: Load balancing flag. + */ +SD_FLAG(SD_BALANCE_NEWIDLE, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) + +/* + * Balance on exec + * + * SHARED_CHILD: Set from the base domain up to the NUMA reclaim level. + * NEEDS_GROUPS: Load balancing flag. + */ +SD_FLAG(SD_BALANCE_EXEC, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) + +/* + * Balance on fork, clone + * + * SHARED_CHILD: Set from the base domain up to the NUMA reclaim level. + * NEEDS_GROUPS: Load balancing flag. + */ +SD_FLAG(SD_BALANCE_FORK, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) + +/* + * Balance on wakeup + * + * SHARED_CHILD: Set from the base domain up to cpuset.sched_relax_domain_level. + * NEEDS_GROUPS: Load balancing flag. + */ +SD_FLAG(SD_BALANCE_WAKE, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) + +/* + * Consider waking task on waking CPU. + * + * SHARED_CHILD: Set from the base domain up to the NUMA reclaim level. + */ +SD_FLAG(SD_WAKE_AFFINE, SDF_SHARED_CHILD) + +/* + * Domain members have different CPU capacities + * + * SHARED_PARENT: Set from the topmost domain down to the first domain where + * asymmetry is detected. + * NEEDS_GROUPS: Per-CPU capacity is asymmetric between groups. + */ +SD_FLAG(SD_ASYM_CPUCAPACITY, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) + +/* + * Domain members share CPU capacity (i.e. SMT) + * + * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share + * CPU capacity. + * NEEDS_GROUPS: Capacity is shared between groups. + */ +SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) + +/* + * Domain members share CPU package resources (i.e. caches) + * + * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share + * the same cache(s). + * NEEDS_GROUPS: Caches are shared between groups. + */ +SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) + +/* + * Only a single load balancing instance + * + * SHARED_PARENT: Set for all NUMA levels above NODE. Could be set from a + * different level upwards, but it doesn't change that if a + * domain has this flag set, then all of its parents need to have + * it too (otherwise the serialization doesn't make sense). + * NEEDS_GROUPS: No point in preserving domain if it has a single group. + */ +SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) + +/* + * Place busy tasks earlier in the domain + * + * SHARED_CHILD: Usually set on the SMT level. Technically could be set further + * up, but currently assumed to be set from the base domain + * upwards (see update_top_cache_domain()). + * NEEDS_GROUPS: Load balancing flag. + */ +SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) + +/* + * Prefer to place tasks in a sibling domain + * + * Set up until domains start spanning NUMA nodes. Close to being a SHARED_CHILD + * flag, but cleared below domains with SD_ASYM_CPUCAPACITY. + * + * NEEDS_GROUPS: Load balancing flag. + */ +SD_FLAG(SD_PREFER_SIBLING, SDF_NEEDS_GROUPS) + +/* + * sched_groups of this level overlap + * + * SHARED_PARENT: Set for all NUMA levels above NODE. + * NEEDS_GROUPS: Overlaps can only exist with more than one group. + */ +SD_FLAG(SD_OVERLAP, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) + +/* + * Cross-node balancing + * + * SHARED_PARENT: Set for all NUMA levels above NODE. + * NEEDS_GROUPS: No point in preserving domain if it has a single group. + */ +SD_FLAG(SD_NUMA, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 1bad18a1d8ba..3f6a0fcaa10c 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -228,12 +228,13 @@ struct signal_struct { * credential calculations * (notably. ptrace) * Deprecated do not use in new code. - * Use exec_update_mutex instead. - */ - struct mutex exec_update_mutex; /* Held while task_struct is being - * updated during exec, and may have - * inconsistent permissions. + * Use exec_update_lock instead. */ + struct rw_semaphore exec_update_lock; /* Held while task_struct is + * being updated during exec, + * and may have inconsistent + * permissions. + */ } __randomize_layout; /* @@ -353,11 +354,23 @@ static inline int restart_syscall(void) return -ERESTARTNOINTR; } -static inline int signal_pending(struct task_struct *p) +static inline int task_sigpending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } +static inline int signal_pending(struct task_struct *p) +{ + /* + * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same + * behavior in terms of ensuring that we break out of wait loops + * so that notify signal callbacks can be processed. + */ + if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) + return 1; + return task_sigpending(p); +} + static inline int __fatal_signal_pending(struct task_struct *p) { return unlikely(sigismember(&p->pending.signal, SIGKILL)); @@ -365,7 +378,7 @@ static inline int __fatal_signal_pending(struct task_struct *p) static inline int fatal_signal_pending(struct task_struct *p) { - return signal_pending(p) && __fatal_signal_pending(p); + return task_sigpending(p) && __fatal_signal_pending(p); } static inline int signal_pending_state(long state, struct task_struct *p) @@ -502,7 +515,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize); static inline void restore_saved_sigmask_unless(bool interrupted) { if (interrupted) - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); + WARN_ON(!signal_pending(current)); else restore_saved_sigmask(); } diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index a98965007eef..c0f71f2e7160 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -47,9 +47,7 @@ extern spinlock_t mmlist_lock; extern union thread_union init_thread_union; extern struct task_struct init_task; -#ifdef CONFIG_PROVE_RCU extern int lockdep_tasklist_lock_is_held(void); -#endif /* #ifdef CONFIG_PROVE_RCU */ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); @@ -83,7 +81,7 @@ extern void do_group_exit(int); extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); -extern long _do_fork(struct kernel_clone_args *kargs); +extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 820511289857..8f0f778b7c91 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -11,20 +11,29 @@ */ #ifdef CONFIG_SMP -#define SD_BALANCE_NEWIDLE 0x0001 /* Balance when about to become idle */ -#define SD_BALANCE_EXEC 0x0002 /* Balance on exec */ -#define SD_BALANCE_FORK 0x0004 /* Balance on fork, clone */ -#define SD_BALANCE_WAKE 0x0008 /* Balance on wakeup */ -#define SD_WAKE_AFFINE 0x0010 /* Wake task to waking CPU */ -#define SD_ASYM_CPUCAPACITY 0x0020 /* Domain members have different CPU capacities */ -#define SD_SHARE_CPUCAPACITY 0x0040 /* Domain members share CPU capacity */ -#define SD_SHARE_POWERDOMAIN 0x0080 /* Domain members share power domain */ -#define SD_SHARE_PKG_RESOURCES 0x0100 /* Domain members share CPU pkg resources */ -#define SD_SERIALIZE 0x0200 /* Only a single load balancing instance */ -#define SD_ASYM_PACKING 0x0400 /* Place busy groups earlier in the domain */ -#define SD_PREFER_SIBLING 0x0800 /* Prefer to place tasks in a sibling domain */ -#define SD_OVERLAP 0x1000 /* sched_domains of this level overlap */ -#define SD_NUMA 0x2000 /* cross-node balancing */ +/* Generate SD flag indexes */ +#define SD_FLAG(name, mflags) __##name, +enum { + #include <linux/sched/sd_flags.h> + __SD_FLAG_CNT, +}; +#undef SD_FLAG +/* Generate SD flag bits */ +#define SD_FLAG(name, mflags) name = 1 << __##name, +enum { + #include <linux/sched/sd_flags.h> +}; +#undef SD_FLAG + +#ifdef CONFIG_SCHED_DEBUG + +struct sd_flag_debug { + unsigned int meta_flags; + char *name; +}; +extern const struct sd_flag_debug sd_flag_debug[]; + +#endif #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) @@ -216,6 +225,14 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) #endif /* !CONFIG_SMP */ +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) +extern void rebuild_sched_domains_energy(void); +#else +static inline void rebuild_sched_domains_energy(void) +{ +} +#endif + #ifndef arch_scale_cpu_capacity /** * arch_scale_cpu_capacity - get the capacity scale factor of a given CPU. |
