summaryrefslogtreecommitdiff
path: root/include/linux/sched
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-03-17 20:34:30 +0100
committerThomas Gleixner <tglx@linutronix.de>2017-03-17 20:34:30 +0100
commit79a21d572cf66968a2272fdf9711f835518256d9 (patch)
tree5fe3e4692fb8375faf8e1aeea1c2eae38c342250 /include/linux/sched
parentd1eb98143c56f24fef125f5bbed49ae0b52fb7d6 (diff)
parent822f5845f710e57d7e2df1fd1ee00d6e19d334fe (diff)
Merge tag 'efi-urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi into efi/urgent
Pull a single UEFI fix from Ard: - Reduce the severity of the notice that appears when the ESRT table points to memory that is not covered by the memory map. It is scaring our users and interfering with their nice splash screens. Note that the ESRT may still be perfectly usable, and is currently (to my knowledge) not widely used to begin with.
Diffstat (limited to 'include/linux/sched')
-rw-r--r--include/linux/sched/autogroup.h31
-rw-r--r--include/linux/sched/clock.h104
-rw-r--r--include/linux/sched/coredump.h74
-rw-r--r--include/linux/sched/cpufreq.h27
-rw-r--r--include/linux/sched/cputime.h187
-rw-r--r--include/linux/sched/deadline.h8
-rw-r--r--include/linux/sched/debug.h50
-rw-r--r--include/linux/sched/hotplug.h24
-rw-r--r--include/linux/sched/idle.h86
-rw-r--r--include/linux/sched/init.h11
-rw-r--r--include/linux/sched/jobctl.h36
-rw-r--r--include/linux/sched/loadavg.h31
-rw-r--r--include/linux/sched/mm.h174
-rw-r--r--include/linux/sched/nohz.h43
-rw-r--r--include/linux/sched/numa_balancing.h46
-rw-r--r--include/linux/sched/prio.h6
-rw-r--r--include/linux/sched/rt.h10
-rw-r--r--include/linux/sched/signal.h613
-rw-r--r--include/linux/sched/stat.h40
-rw-r--r--include/linux/sched/sysctl.h10
-rw-r--r--include/linux/sched/task.h139
-rw-r--r--include/linux/sched/task_stack.h121
-rw-r--r--include/linux/sched/topology.h226
-rw-r--r--include/linux/sched/user.h61
-rw-r--r--include/linux/sched/wake_q.h53
-rw-r--r--include/linux/sched/xacct.h48
26 files changed, 2246 insertions, 13 deletions
diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
new file mode 100644
index 000000000000..55cd496df884
--- /dev/null
+++ b/include/linux/sched/autogroup.h
@@ -0,0 +1,31 @@
+#ifndef _LINUX_SCHED_AUTOGROUP_H
+#define _LINUX_SCHED_AUTOGROUP_H
+
+struct signal_struct;
+struct task_struct;
+struct task_group;
+struct seq_file;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern void sched_autogroup_create_attach(struct task_struct *p);
+extern void sched_autogroup_detach(struct task_struct *p);
+extern void sched_autogroup_fork(struct signal_struct *sig);
+extern void sched_autogroup_exit(struct signal_struct *sig);
+extern void sched_autogroup_exit_task(struct task_struct *p);
+#ifdef CONFIG_PROC_FS
+extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
+extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
+#endif
+#else
+static inline void sched_autogroup_create_attach(struct task_struct *p) { }
+static inline void sched_autogroup_detach(struct task_struct *p) { }
+static inline void sched_autogroup_fork(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit_task(struct task_struct *p) { }
+#endif
+
+#ifdef CONFIG_CGROUP_SCHED
+extern struct task_group root_task_group;
+#endif /* CONFIG_CGROUP_SCHED */
+
+#endif /* _LINUX_SCHED_AUTOGROUP_H */
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
new file mode 100644
index 000000000000..4a68c6791207
--- /dev/null
+++ b/include/linux/sched/clock.h
@@ -0,0 +1,104 @@
+#ifndef _LINUX_SCHED_CLOCK_H
+#define _LINUX_SCHED_CLOCK_H
+
+#include <linux/smp.h>
+
+/*
+ * Do not use outside of architecture code which knows its limitations.
+ *
+ * sched_clock() has no promise of monotonicity or bounded drift between
+ * CPUs, use (which you should not) requires disabling IRQs.
+ *
+ * Please use one of the three interfaces below.
+ */
+extern unsigned long long notrace sched_clock(void);
+
+/*
+ * See the comment in kernel/sched/clock.c
+ */
+extern u64 running_clock(void);
+extern u64 sched_clock_cpu(int cpu);
+
+
+extern void sched_clock_init(void);
+
+#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline void sched_clock_init_late(void)
+{
+}
+
+static inline void sched_clock_tick(void)
+{
+}
+
+static inline void clear_sched_clock_stable(void)
+{
+}
+
+static inline void sched_clock_idle_sleep_event(void)
+{
+}
+
+static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+}
+
+static inline u64 cpu_clock(int cpu)
+{
+ return sched_clock();
+}
+
+static inline u64 local_clock(void)
+{
+ return sched_clock();
+}
+#else
+extern void sched_clock_init_late(void);
+/*
+ * Architectures can set this to 1 if they have specified
+ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+ * but then during bootup it turns out that sched_clock()
+ * is reliable after all:
+ */
+extern int sched_clock_stable(void);
+extern void clear_sched_clock_stable(void);
+
+extern void sched_clock_tick(void);
+extern void sched_clock_idle_sleep_event(void);
+extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+
+/*
+ * As outlined in clock.c, provides a fast, high resolution, nanosecond
+ * time source that is monotonic per cpu argument and has bounded drift
+ * between cpus.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !! #
+ * ####################################################################
+ */
+static inline u64 cpu_clock(int cpu)
+{
+ return sched_clock_cpu(cpu);
+}
+
+static inline u64 local_clock(void)
+{
+ return sched_clock_cpu(raw_smp_processor_id());
+}
+#endif
+
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+/*
+ * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
+ * The reason for this explicit opt-in is not to have perf penalty with
+ * slow sched_clocks.
+ */
+extern void enable_sched_clock_irqtime(void);
+extern void disable_sched_clock_irqtime(void);
+#else
+static inline void enable_sched_clock_irqtime(void) {}
+static inline void disable_sched_clock_irqtime(void) {}
+#endif
+
+#endif /* _LINUX_SCHED_CLOCK_H */
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
new file mode 100644
index 000000000000..69eedcef8f03
--- /dev/null
+++ b/include/linux/sched/coredump.h
@@ -0,0 +1,74 @@
+#ifndef _LINUX_SCHED_COREDUMP_H
+#define _LINUX_SCHED_COREDUMP_H
+
+#include <linux/mm_types.h>
+
+#define SUID_DUMP_DISABLE 0 /* No setuid dumping */
+#define SUID_DUMP_USER 1 /* Dump as user of process */
+#define SUID_DUMP_ROOT 2 /* Dump as root */
+
+/* mm flags */
+
+/* for SUID_DUMP_* above */
+#define MMF_DUMPABLE_BITS 2
+#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
+
+extern void set_dumpable(struct mm_struct *mm, int value);
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
+static inline int __get_dumpable(unsigned long mm_flags)
+{
+ return mm_flags & MMF_DUMPABLE_MASK;
+}
+
+static inline int get_dumpable(struct mm_struct *mm)
+{
+ return __get_dumpable(mm->flags);
+}
+
+/* coredump filter bits */
+#define MMF_DUMP_ANON_PRIVATE 2
+#define MMF_DUMP_ANON_SHARED 3
+#define MMF_DUMP_MAPPED_PRIVATE 4
+#define MMF_DUMP_MAPPED_SHARED 5
+#define MMF_DUMP_ELF_HEADERS 6
+#define MMF_DUMP_HUGETLB_PRIVATE 7
+#define MMF_DUMP_HUGETLB_SHARED 8
+#define MMF_DUMP_DAX_PRIVATE 9
+#define MMF_DUMP_DAX_SHARED 10
+
+#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
+#define MMF_DUMP_FILTER_BITS 9
+#define MMF_DUMP_FILTER_MASK \
+ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
+#define MMF_DUMP_FILTER_DEFAULT \
+ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\
+ (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+
+#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS)
+#else
+# define MMF_DUMP_MASK_DEFAULT_ELF 0
+#endif
+ /* leave room for more dump flags */
+#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
+#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
+/*
+ * This one-shot flag is dropped due to necessity of changing exe once again
+ * on NFS restore
+ */
+//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
+
+#define MMF_HAS_UPROBES 19 /* has uprobes */
+#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
+#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */
+#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */
+#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
+
+#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
+
+#endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
new file mode 100644
index 000000000000..d2be2ccbb372
--- /dev/null
+++ b/include/linux/sched/cpufreq.h
@@ -0,0 +1,27 @@
+#ifndef _LINUX_SCHED_CPUFREQ_H
+#define _LINUX_SCHED_CPUFREQ_H
+
+#include <linux/types.h>
+
+/*
+ * Interface between cpufreq drivers and the scheduler:
+ */
+
+#define SCHED_CPUFREQ_RT (1U << 0)
+#define SCHED_CPUFREQ_DL (1U << 1)
+#define SCHED_CPUFREQ_IOWAIT (1U << 2)
+
+#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
+
+#ifdef CONFIG_CPU_FREQ
+struct update_util_data {
+ void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
+};
+
+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
+ void (*func)(struct update_util_data *data, u64 time,
+ unsigned int flags));
+void cpufreq_remove_update_util_hook(int cpu);
+#endif /* CONFIG_CPU_FREQ */
+
+#endif /* _LINUX_SCHED_CPUFREQ_H */
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
new file mode 100644
index 000000000000..4c5b9735c1ae
--- /dev/null
+++ b/include/linux/sched/cputime.h
@@ -0,0 +1,187 @@
+#ifndef _LINUX_SCHED_CPUTIME_H
+#define _LINUX_SCHED_CPUTIME_H
+
+#include <linux/sched/signal.h>
+
+/*
+ * cputime accounting APIs:
+ */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#include <asm/cputime.h>
+
+#ifndef cputime_to_nsecs
+# define cputime_to_nsecs(__ct) \
+ (cputime_to_usecs(__ct) * NSEC_PER_USEC)
+#endif
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void task_cputime(struct task_struct *t,
+ u64 *utime, u64 *stime);
+extern u64 task_gtime(struct task_struct *t);
+#else
+static inline void task_cputime(struct task_struct *t,
+ u64 *utime, u64 *stime)
+{
+ *utime = t->utime;
+ *stime = t->stime;
+}
+
+static inline u64 task_gtime(struct task_struct *t)
+{
+ return t->gtime;
+}
+#endif
+
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+static inline void task_cputime_scaled(struct task_struct *t,
+ u64 *utimescaled,
+ u64 *stimescaled)
+{
+ *utimescaled = t->utimescaled;
+ *stimescaled = t->stimescaled;
+}
+#else
+static inline void task_cputime_scaled(struct task_struct *t,
+ u64 *utimescaled,
+ u64 *stimescaled)
+{
+ task_cputime(t, utimescaled, stimescaled);
+}
+#endif
+
+extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+
+
+/*
+ * Thread group CPU time accounting.
+ */
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
+
+
+/*
+ * The following are functions that support scheduler-internal time accounting.
+ * These functions are generally called at the timer tick. None of this depends
+ * on CONFIG_SCHEDSTATS.
+ */
+
+/**
+ * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running
+ *
+ * @tsk: Pointer to target task.
+ */
+#ifdef CONFIG_POSIX_TIMERS
+static inline
+struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
+{
+ struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+
+ /* Check if cputimer isn't running. This is accessed without locking. */
+ if (!READ_ONCE(cputimer->running))
+ return NULL;
+
+ /*
+ * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime
+ * in __exit_signal(), we won't account to the signal struct further
+ * cputime consumed by that task, even though the task can still be
+ * ticking after __exit_signal().
+ *
+ * In order to keep a consistent behaviour between thread group cputime
+ * and thread group cputimer accounting, lets also ignore the cputime
+ * elapsing after __exit_signal() in any thread group timer running.
+ *
+ * This makes sure that POSIX CPU clocks and timers are synchronized, so
+ * that a POSIX CPU timer won't expire while the corresponding POSIX CPU
+ * clock delta is behind the expiring timer value.
+ */
+ if (unlikely(!tsk->sighand))
+ return NULL;
+
+ return cputimer;
+}
+#else
+static inline
+struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
+{
+ return NULL;
+}
+#endif
+
+/**
+ * account_group_user_time - Maintain utime for a thread group.
+ *
+ * @tsk: Pointer to task structure.
+ * @cputime: Time value by which to increment the utime field of the
+ * thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void account_group_user_time(struct task_struct *tsk,
+ u64 cputime)
+{
+ struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+ if (!cputimer)
+ return;
+
+ atomic64_add(cputime, &cputimer->cputime_atomic.utime);
+}
+
+/**
+ * account_group_system_time - Maintain stime for a thread group.
+ *
+ * @tsk: Pointer to task structure.
+ * @cputime: Time value by which to increment the stime field of the
+ * thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void account_group_system_time(struct task_struct *tsk,
+ u64 cputime)
+{
+ struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+ if (!cputimer)
+ return;
+
+ atomic64_add(cputime, &cputimer->cputime_atomic.stime);
+}
+
+/**
+ * account_group_exec_runtime - Maintain exec runtime for a thread group.
+ *
+ * @tsk: Pointer to task structure.
+ * @ns: Time value by which to increment the sum_exec_runtime field
+ * of the thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+ unsigned long long ns)
+{
+ struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+ if (!cputimer)
+ return;
+
+ atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime);
+}
+
+static inline void prev_cputime_init(struct prev_cputime *prev)
+{
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ prev->utime = prev->stime = 0;
+ raw_spin_lock_init(&prev->lock);
+#endif
+}
+
+extern unsigned long long
+task_sched_runtime(struct task_struct *task);
+
+#endif /* _LINUX_SCHED_CPUTIME_H */
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 9089a2ae913d..975be862e083 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -1,5 +1,7 @@
-#ifndef _SCHED_DEADLINE_H
-#define _SCHED_DEADLINE_H
+#ifndef _LINUX_SCHED_DEADLINE_H
+#define _LINUX_SCHED_DEADLINE_H
+
+#include <linux/sched.h>
/*
* SCHED_DEADLINE tasks has negative priorities, reflecting
@@ -26,4 +28,4 @@ static inline bool dl_time_before(u64 a, u64 b)
return (s64)(a - b) < 0;
}
-#endif /* _SCHED_DEADLINE_H */
+#endif /* _LINUX_SCHED_DEADLINE_H */
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
new file mode 100644
index 000000000000..e0eaee54c5a4
--- /dev/null
+++ b/include/linux/sched/debug.h
@@ -0,0 +1,50 @@
+#ifndef _LINUX_SCHED_DEBUG_H
+#define _LINUX_SCHED_DEBUG_H
+
+/*
+ * Various scheduler/task debugging interfaces:
+ */
+
+struct task_struct;
+
+extern void dump_cpu_task(int cpu);
+
+/*
+ * Only dump TASK_* tasks. (0 for all tasks)
+ */
+extern void show_state_filter(unsigned long state_filter);
+
+static inline void show_state(void)
+{
+ show_state_filter(0);
+}
+
+struct pt_regs;
+
+extern void show_regs(struct pt_regs *);
+
+/*
+ * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
+ * task), SP is the stack pointer of the first frame that should be shown in the back
+ * trace (or NULL if the entire call-chain of the task should be shown).
+ */
+extern void show_stack(struct task_struct *task, unsigned long *sp);
+
+extern void sched_show_task(struct task_struct *p);
+
+#ifdef CONFIG_SCHED_DEBUG
+struct seq_file;
+extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
+extern void proc_sched_set_task(struct task_struct *p);
+#endif
+
+/* Attach to any functions which should be ignored in wchan output. */
+#define __sched __attribute__((__section__(".sched.text")))
+
+/* Linker adds these: start and end of __sched functions */
+extern char __sched_text_start[], __sched_text_end[];
+
+/* Is this address in the __sched functions? */
+extern int in_sched_functions(unsigned long addr);
+
+#endif /* _LINUX_SCHED_DEBUG_H */
diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
new file mode 100644
index 000000000000..752ac7e628d7
--- /dev/null
+++ b/include/linux/sched/hotplug.h
@@ -0,0 +1,24 @@
+#ifndef _LINUX_SCHED_HOTPLUG_H
+#define _LINUX_SCHED_HOTPLUG_H
+
+/*
+ * Scheduler interfaces for hotplug CPU support:
+ */
+
+extern int sched_cpu_starting(unsigned int cpu);
+extern int sched_cpu_activate(unsigned int cpu);
+extern int sched_cpu_deactivate(unsigned int cpu);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern int sched_cpu_dying(unsigned int cpu);
+#else
+# define sched_cpu_dying NULL
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void idle_task_exit(void);
+#else
+static inline void idle_task_exit(void) {}
+#endif
+
+#endif /* _LINUX_SCHED_HOTPLUG_H */
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
new file mode 100644
index 000000000000..5ca63ebad6b4
--- /dev/null
+++ b/include/linux/sched/idle.h
@@ -0,0 +1,86 @@
+#ifndef _LINUX_SCHED_IDLE_H
+#define _LINUX_SCHED_IDLE_H
+
+#include <linux/sched.h>
+
+enum cpu_idle_type {
+ CPU_IDLE,
+ CPU_NOT_IDLE,
+ CPU_NEWLY_IDLE,
+ CPU_MAX_IDLE_TYPES
+};
+
+extern void wake_up_if_idle(int cpu);
+
+/*
+ * Idle thread specific functions to determine the need_resched
+ * polling state.
+ */
+#ifdef TIF_POLLING_NRFLAG
+
+static inline void __current_set_polling(void)
+{
+ set_thread_flag(TIF_POLLING_NRFLAG);
+}
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+ __current_set_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_curr()
+ */
+ smp_mb__after_atomic();
+
+ return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
+{
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+ __current_clr_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_curr()
+ */
+ smp_mb__after_atomic();
+
+ return unlikely(tif_need_resched());
+}
+
+#else
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+ return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+ return unlikely(tif_need_resched());
+}
+#endif
+
+static inline void current_clr_polling(void)
+{
+ __current_clr_polling();
+
+ /*
+ * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
+ * Once the bit is cleared, we'll get IPIs with every new
+ * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
+ * fold.
+ */
+ smp_mb(); /* paired with resched_curr() */
+
+ preempt_fold_need_resched();
+}
+
+#endif /* _LINUX_SCHED_IDLE_H */
diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h
new file mode 100644
index 000000000000..127215045285
--- /dev/null
+++ b/include/linux/sched/init.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_SCHED_INIT_H
+#define _LINUX_SCHED_INIT_H
+
+/*
+ * Scheduler init related prototypes:
+ */
+
+extern void sched_init(void);
+extern void sched_init_smp(void);
+
+#endif /* _LINUX_SCHED_INIT_H */
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
new file mode 100644
index 000000000000..016afa0fb3bb
--- /dev/null
+++ b/include/linux/sched/jobctl.h
@@ -0,0 +1,36 @@
+#ifndef _LINUX_SCHED_JOBCTL_H
+#define _LINUX_SCHED_JOBCTL_H
+
+#include <linux/types.h>
+
+struct task_struct;
+
+/*
+ * task->jobctl flags
+ */
+#define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */
+
+#define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */
+#define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */
+#define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */
+#define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */
+#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */
+#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
+#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
+
+#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT)
+#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT)
+#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT)
+#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT)
+#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT)
+#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT)
+#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT)
+
+#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
+#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
+
+extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask);
+extern void task_clear_jobctl_trapping(struct task_struct *task);
+extern void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask);
+
+#endif /* _LINUX_SCHED_JOBCTL_H */
diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
new file mode 100644
index 000000000000..4264bc6b2c27
--- /dev/null
+++ b/include/linux/sched/loadavg.h
@@ -0,0 +1,31 @@
+#ifndef _LINUX_SCHED_LOADAVG_H
+#define _LINUX_SCHED_LOADAVG_H
+
+/*
+ * These are the constant used to fake the fixed-point load-average
+ * counting. Some notes:
+ * - 11 bit fractions expand to 22 bits by the multiplies: this gives
+ * a load-average precision of 10 bits integer + 11 bits fractional
+ * - if you want to count load-averages more often, you need more
+ * precision, or rounding will get you. With 2-second counting freq,
+ * the EXP_n values would be 1981, 2034 and 2043 if still using only
+ * 11 bit fractions.
+ */
+extern unsigned long avenrun[]; /* Load averages */
+extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
+
+#define FSHIFT 11 /* nr of bits of precision */
+#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */
+#define LOAD_FREQ (5*HZ+1) /* 5 sec intervals */
+#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
+#define EXP_5 2014 /* 1/exp(5sec/5min) */
+#define EXP_15 2037 /* 1/exp(5sec/15min) */
+
+#define CALC_LOAD(load,exp,n) \
+ load *= exp; \
+ load += n*(FIXED_1-exp); \
+ load >>= FSHIFT;
+
+extern void calc_global_load(unsigned long ticks);
+
+#endif /* _LINUX_SCHED_LOADAVG_H */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
new file mode 100644
index 000000000000..830953ebb391
--- /dev/null
+++ b/include/linux/sched/mm.h
@@ -0,0 +1,174 @@
+#ifndef _LINUX_SCHED_MM_H
+#define _LINUX_SCHED_MM_H
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+#include <linux/mm_types.h>
+#include <linux/gfp.h>
+
+/*
+ * Routines for handling mm_structs
+ */
+extern struct mm_struct * mm_alloc(void);
+
+/**
+ * mmgrab() - Pin a &struct mm_struct.
+ * @mm: The &struct mm_struct to pin.
+ *
+ * Make sure that @mm will not get freed even after the owning task
+ * exits. This doesn't guarantee that the associated address space
+ * will still exist later on and mmget_not_zero() has to be used before
+ * accessing it.
+ *
+ * This is a preferred way to to pin @mm for a longer/unbounded amount
+ * of time.
+ *
+ * Use mmdrop() to release the reference acquired by mmgrab().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmgrab(struct mm_struct *mm)
+{
+ atomic_inc(&mm->mm_count);
+}
+
+/* mmdrop drops the mm and the page tables */
+extern void __mmdrop(struct mm_struct *);
+static inline void mmdrop(struct mm_struct *mm)
+{
+ if (unlikely(atomic_dec_and_test(&mm->mm_count)))
+ __mmdrop(mm);
+}
+
+static inline void mmdrop_async_fn(struct work_struct *work)
+{
+ struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
+ __mmdrop(mm);
+}
+
+static inline void mmdrop_async(struct mm_struct *mm)
+{
+ if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
+ INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
+ schedule_work(&mm->async_put_work);
+ }
+}
+
+/**
+ * mmget() - Pin the address space associated with a &struct mm_struct.
+ * @mm: The address space to pin.
+ *
+ * Make sure that the address space of the given &struct mm_struct doesn't
+ * go away. This does not protect against parts of the address space being
+ * modified or freed, however.
+ *
+ * Never use this function to pin this address space for an
+ * unbounded/indefinite amount of time.
+ *
+ * Use mmput() to release the reference acquired by mmget().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmget(struct mm_struct *mm)
+{
+ atomic_inc(&mm->mm_users);
+}
+
+static inline bool mmget_not_zero(struct mm_struct *mm)
+{
+ return atomic_inc_not_zero(&mm->mm_users);
+}
+
+/* mmput gets rid of the mappings and all user-space */
+extern void mmput(struct mm_struct *);
+#ifdef CONFIG_MMU
+/* same as above but performs the slow path from the async context. Can
+ * be called from the atomic context as well
+ */
+extern void mmput_async(struct mm_struct *);
+#endif
+
+/* Grab a reference to a task's mm, if it is not already going away */
+extern struct mm_struct *get_task_mm(struct task_struct *task);
+/*
+ * Grab a reference to a task's mm, if it is not already going away
+ * and ptrace_may_access with the mode parameter passed to it
+ * succeeds.
+ */
+extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
+/* Remove the current tasks stale references to the old mm_struct */
+extern void mm_release(struct task_struct *, struct mm_struct *);
+
+#ifdef CONFIG_MEMCG
+extern void mm_update_next_owner(struct mm_struct *mm);
+#else
+static inline void mm_update_next_owner(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_MEMCG */
+
+#ifdef CONFIG_MMU
+extern void arch_pick_mmap_layout(struct mm_struct *mm);
+extern unsigned long
+arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
+ unsigned long, unsigned long);
+extern unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
+#else
+static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+#endif
+
+static inline bool in_vfork(struct task_struct *tsk)
+{
+ bool ret;
+
+ /*
+ * need RCU to access ->real_parent if CLONE_VM was used along with
+ * CLONE_PARENT.
+ *
+ * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
+ * imply CLONE_VM
+ *
+ * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
+ * ->real_parent is not necessarily the task doing vfork(), so in
+ * theory we can't rely on task_lock() if we want to dereference it.
+ *
+ * And in this case we can't trust the real_parent->mm == tsk->mm
+ * check, it can be false negative. But we do not care, if init or
+ * another oom-unkillable task does this it should blame itself.
+ */
+ rcu_read_lock();
+ ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
+ * __GFP_FS is also cleared as it implies __GFP_IO.
+ */
+static inline gfp_t memalloc_noio_flags(gfp_t flags)
+{
+ if (unlikely(current->flags & PF_MEMALLOC_NOIO))
+ flags &= ~(__GFP_IO | __GFP_FS);
+ return flags;
+}
+
+static inline unsigned int memalloc_noio_save(void)
+{
+ unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
+ current->flags |= PF_MEMALLOC_NOIO;
+ return flags;
+}
+
+static inline void memalloc_noio_restore(unsigned int flags)
+{
+ current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
+}
+
+#endif /* _LINUX_SCHED_MM_H */
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
new file mode 100644
index 000000000000..4995b717500b
--- /dev/null
+++ b/include/linux/sched/nohz.h
@@ -0,0 +1,43 @@
+#ifndef _LINUX_SCHED_NOHZ_H
+#define _LINUX_SCHED_NOHZ_H
+
+/*
+ * This is the interface between the scheduler and nohz/dyntics:
+ */
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+extern void cpu_load_update_nohz_start(void);
+extern void cpu_load_update_nohz_stop(void);
+#else
+static inline void cpu_load_update_nohz_start(void) { }
+static inline void cpu_load_update_nohz_stop(void) { }
+#endif
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+extern void nohz_balance_enter_idle(int cpu);
+extern void set_cpu_sd_state_idle(void);
+extern int get_nohz_timer_target(void);
+#else
+static inline void nohz_balance_enter_idle(int cpu) { }
+static inline void set_cpu_sd_state_idle(void) { }
+#endif
+
+#ifdef CONFIG_NO_HZ_COMMON
+void calc_load_enter_idle(void);
+void calc_load_exit_idle(void);
+#else
+static inline void calc_load_enter_idle(void) { }
+static inline void calc_load_exit_idle(void) { }
+#endif /* CONFIG_NO_HZ_COMMON */
+
+#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
+extern void wake_up_nohz_cpu(int cpu);
+#else
+static inline void wake_up_nohz_cpu(int cpu) { }
+#endif
+
+#ifdef CONFIG_NO_HZ_FULL
+extern u64 scheduler_tick_max_deferment(void);
+#endif
+
+#endif /* _LINUX_SCHED_NOHZ_H */
diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
new file mode 100644
index 000000000000..35d5fc77b4be
--- /dev/null
+++ b/include/linux/sched/numa_balancing.h
@@ -0,0 +1,46 @@
+#ifndef _LINUX_SCHED_NUMA_BALANCING_H
+#define _LINUX_SCHED_NUMA_BALANCING_H
+
+/*
+ * This is the interface between the scheduler and the MM that
+ * implements memory access pattern based NUMA-balancing:
+ */
+
+#include <linux/sched.h>
+
+#define TNF_MIGRATED 0x01
+#define TNF_NO_GROUP 0x02
+#define TNF_SHARED 0x04
+#define TNF_FAULT_LOCAL 0x08
+#define TNF_MIGRATE_FAIL 0x10
+
+#ifdef CONFIG_NUMA_BALANCING
+extern void task_numa_fault(int last_node, int node, int pages, int flags);
+extern pid_t task_numa_group_id(struct task_struct *p);
+extern void set_numabalancing_state(bool enabled);
+extern void task_numa_free(struct task_struct *p);
+extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
+ int src_nid, int dst_cpu);
+#else
+static inline void task_numa_fault(int last_node, int node, int pages,
+ int flags)
+{
+}
+static inline pid_t task_numa_group_id(struct task_struct *p)
+{
+ return 0;
+}
+static inline void set_numabalancing_state(bool enabled)
+{
+}
+static inline void task_numa_free(struct task_struct *p)
+{
+}
+static inline bool should_numa_migrate_memory(struct task_struct *p,
+ struct page *page, int src_nid, int dst_cpu)
+{
+ return true;
+}
+#endif
+
+#endif /* _LINUX_SCHED_NUMA_BALANCING_H */
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
index d9cf5a5762d9..2cc450f6ec54 100644
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -1,5 +1,5 @@
-#ifndef _SCHED_PRIO_H
-#define _SCHED_PRIO_H
+#ifndef _LINUX_SCHED_PRIO_H
+#define _LINUX_SCHED_PRIO_H
#define MAX_NICE 19
#define MIN_NICE -20
@@ -57,4 +57,4 @@ static inline long rlimit_to_nice(long prio)
return (MAX_NICE - prio + 1);
}
-#endif /* _SCHED_PRIO_H */
+#endif /* _LINUX_SCHED_PRIO_H */
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index a30b172df6e1..3bd668414f61 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -1,7 +1,9 @@
-#ifndef _SCHED_RT_H
-#define _SCHED_RT_H
+#ifndef _LINUX_SCHED_RT_H
+#define _LINUX_SCHED_RT_H
-#include <linux/sched/prio.h>
+#include <linux/sched.h>
+
+struct task_struct;
static inline int rt_prio(int prio)
{
@@ -57,4 +59,4 @@ extern void normalize_rt_tasks(void);
*/
#define RR_TIMESLICE (100 * HZ / 1000)
-#endif /* _SCHED_RT_H */
+#endif /* _LINUX_SCHED_RT_H */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
new file mode 100644
index 000000000000..2cf446704cd4
--- /dev/null
+++ b/include/linux/sched/signal.h
@@ -0,0 +1,613 @@
+#ifndef _LINUX_SCHED_SIGNAL_H
+#define _LINUX_SCHED_SIGNAL_H
+
+#include <linux/rculist.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/sched/jobctl.h>
+#include <linux/sched/task.h>
+#include <linux/cred.h>
+
+/*
+ * Types defining task->signal and task->sighand and APIs using them:
+ */
+
+struct sighand_struct {
+ atomic_t count;
+ struct k_sigaction action[_NSIG];
+ spinlock_t siglock;
+ wait_queue_head_t signalfd_wqh;
+};
+
+/*
+ * Per-process accounting stats:
+ */
+struct pacct_struct {
+ int ac_flag;
+ long ac_exitcode;
+ unsigned long ac_mem;
+ u64 ac_utime, ac_stime;
+ unsigned long ac_minflt, ac_majflt;
+};
+
+struct cpu_itimer {
+ u64 expires;
+ u64 incr;
+};
+
+/*
+ * This is the atomic variant of task_cputime, which can be used for
+ * storing and updating task_cputime statistics without locking.
+ */
+struct task_cputime_atomic {
+ atomic64_t utime;
+ atomic64_t stime;
+ atomic64_t sum_exec_runtime;
+};
+
+#define INIT_CPUTIME_ATOMIC \
+ (struct task_cputime_atomic) { \
+ .utime = ATOMIC64_INIT(0), \
+ .stime = ATOMIC64_INIT(0), \
+ .sum_exec_runtime = ATOMIC64_INIT(0), \
+ }
+/**
+ * struct thread_group_cputimer - thread group interval timer counts
+ * @cputime_atomic: atomic thread group interval timers.
+ * @running: true when there are timers running and
+ * @cputime_atomic receives updates.
+ * @checking_timer: true when a thread in the group is in the
+ * process of checking for thread group timers.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU timer calculations.
+ */
+struct thread_group_cputimer {
+ struct task_cputime_atomic cputime_atomic;
+ bool running;
+ bool checking_timer;
+};
+
+/*
+ * NOTE! "signal_struct" does not have its own
+ * locking, because a shared signal_struct always
+ * implies a shared sighand_struct, so locking
+ * sighand_struct is always a proper superset of
+ * the locking of signal_struct.
+ */
+struct signal_struct {
+ atomic_t sigcnt;
+ atomic_t live;
+ int nr_threads;
+ struct list_head thread_head;
+
+ wait_queue_head_t wait_chldexit; /* for wait4() */
+
+ /* current thread group signal load-balancing target: */
+ struct task_struct *curr_target;
+
+ /* shared signal handling: */
+ struct sigpending shared_pending;
+
+ /* thread group exit support */
+ int group_exit_code;
+ /* overloaded:
+ * - notify group_exit_task when ->count is equal to notify_count
+ * - everyone except group_exit_task is stopped during signal delivery
+ * of fatal signals, group_exit_task processes the signal.
+ */
+ int notify_count;
+ struct task_struct *group_exit_task;
+
+ /* thread group stop support, overloads group_exit_code too */
+ int group_stop_count;
+ unsigned int flags; /* see SIGNAL_* flags below */
+
+ /*
+ * PR_SET_CHILD_SUBREAPER marks a process, like a service
+ * manager, to re-parent orphan (double-forking) child processes
+ * to this process instead of 'init'. The service manager is
+ * able to receive SIGCHLD signals and is able to investigate
+ * the process until it calls wait(). All children of this
+ * process will inherit a flag if they should look for a
+ * child_subreaper process at exit.
+ */
+ unsigned int is_child_subreaper:1;
+ unsigned int has_child_subreaper:1;
+
+#ifdef CONFIG_POSIX_TIMERS
+
+ /* POSIX.1b Interval Timers */
+ int posix_timer_id;
+ struct list_head posix_timers;
+
+ /* ITIMER_REAL timer for the process */
+ struct hrtimer real_timer;
+ ktime_t it_real_incr;
+
+ /*
+ * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
+ * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
+ * values are defined to 0 and 1 respectively
+ */
+ struct cpu_itimer it[2];
+
+ /*
+ * Thread group totals for process CPU timers.
+ * See thread_group_cputimer(), et al, for details.
+ */
+ struct thread_group_cputimer cputimer;
+
+ /* Earliest-expiration cache. */
+ struct task_cputime cputime_expires;
+
+ struct list_head cpu_timers[3];
+
+#endif
+
+ struct pid *leader_pid;
+
+#ifdef CONFIG_NO_HZ_FULL
+ atomic_t tick_dep_mask;
+#endif
+
+ struct pid *tty_old_pgrp;
+
+ /* boolean value for session group leader */
+ int leader;
+
+ struct tty_struct *tty; /* NULL if no tty */
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+ struct autogroup *autogroup;
+#endif
+ /*
+ * Cumulative resource counters for dead threads in the group,
+ * and for reaped dead child processes forked by this group.
+ * Live threads maintain their own counters and add to these
+ * in __exit_signal, except for the group leader.
+ */
+ seqlock_t stats_lock;
+ u64 utime, stime, cutime, cstime;
+ u64 gtime;
+ u64 cgtime;
+ struct prev_cputime prev_cputime;
+ unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
+ unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
+ unsigned long inblock, oublock, cinblock, coublock;
+ unsigned long maxrss, cmaxrss;
+ struct task_io_accounting ioac;
+
+ /*
+ * Cumulative ns of schedule CPU time fo dead threads in the
+ * group, not including a zombie group leader, (This only differs
+ * from jiffies_to_ns(utime + stime) if sched_clock uses something
+ * other than jiffies.)
+ */
+ unsigned long long sum_sched_runtime;
+
+ /*
+ * We don't bother to synchronize most readers of this at all,
+ * because there is no reader checking a limit that actually needs
+ * to get both rlim_cur and rlim_max atomically, and either one
+ * alone is a single word that can safely be read normally.
+ * getrlimit/setrlimit use task_lock(current->group_leader) to
+ * protect this instead of the siglock, because they really
+ * have no need to disable irqs.
+ */
+ struct rlimit rlim[RLIM_NLIMITS];
+
+#ifdef CONFIG_BSD_PROCESS_ACCT
+ struct pacct_struct pacct; /* per-process accounting information */
+#endif
+#ifdef CONFIG_TASKSTATS
+ struct taskstats *stats;
+#endif
+#ifdef CONFIG_AUDIT
+ unsigned audit_tty;
+ struct tty_audit_buf *tty_audit_buf;
+#endif
+
+ /*
+ * Thread is the potential origin of an oom condition; kill first on
+ * oom
+ */
+ bool oom_flag_origin;
+ short oom_score_adj; /* OOM kill score adjustment */
+ short oom_score_adj_min; /* OOM kill score adjustment min value.
+ * Only settable by CAP_SYS_RESOURCE. */
+ struct mm_struct *oom_mm; /* recorded mm when the thread group got
+ * killed by the oom killer */
+
+ struct mutex cred_guard_mutex; /* guard against foreign influences on
+ * credential calculations
+ * (notably. ptrace) */
+};
+
+/*
+ * Bits in flags field of signal_struct.
+ */
+#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */
+#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */
+#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */
+#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */
+/*
+ * Pending notifications to parent.
+ */
+#define SIGNAL_CLD_STOPPED 0x00000010
+#define SIGNAL_CLD_CONTINUED 0x00000020
+#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)
+
+#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */
+
+#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
+ SIGNAL_STOP_CONTINUED)
+
+static inline void signal_set_stop_flags(struct signal_struct *sig,
+ unsigned int flags)
+{
+ WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
+ sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
+}
+
+/* If true, all threads except ->group_exit_task have pending SIGKILL */
+static inline int signal_group_exit(const struct signal_struct *sig)
+{
+ return (sig->flags & SIGNAL_GROUP_EXIT) ||
+ (sig->group_exit_task != NULL);
+}
+
+extern void flush_signals(struct task_struct *);
+extern void ignore_signals(struct task_struct *);
+extern void flush_signal_handlers(struct task_struct *, int force_default);
+extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
+
+static inline int kernel_dequeue_signal(siginfo_t *info)
+{
+ struct task_struct *tsk = current;
+ siginfo_t __info;
+ int ret;
+
+ spin_lock_irq(&tsk->sighand->siglock);
+ ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
+ spin_unlock_irq(&tsk->sighand->siglock);
+
+ return ret;
+}
+
+static inline void kernel_signal_stop(void)
+{
+ spin_lock_irq(&current->sighand->siglock);
+ if (current->jobctl & JOBCTL_STOP_DEQUEUED)
+ __set_current_state(TASK_STOPPED);
+ spin_unlock_irq(&current->sighand->siglock);
+
+ schedule();
+}
+extern int send_sig_info(int, struct siginfo *, struct task_struct *);
+extern int force_sigsegv(int, struct task_struct *);
+extern int force_sig_info(int, struct siginfo *, struct task_struct *);
+extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
+extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
+extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
+ const struct cred *, u32);
+extern int kill_pgrp(struct pid *pid, int sig, int priv);
+extern int kill_pid(struct pid *pid, int sig, int priv);
+extern int kill_proc_info(int, struct siginfo *, pid_t);
+extern __must_check bool do_notify_parent(struct task_struct *, int);
+extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
+extern void force_sig(int, struct task_struct *);
+extern int send_sig(int, struct task_struct *, int);
+extern int zap_other_threads(struct task_struct *p);
+extern struct sigqueue *sigqueue_alloc(void);
+extern void sigqueue_free(struct sigqueue *);
+extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group);
+extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
+
+static inline int restart_syscall(void)
+{
+ set_tsk_thread_flag(current, TIF_SIGPENDING);
+ return -ERESTARTNOINTR;
+}
+
+static inline int signal_pending(struct task_struct *p)
+{
+ return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
+}
+
+static inline int __fatal_signal_pending(struct task_struct *p)
+{
+ return unlikely(sigismember(&p->pending.signal, SIGKILL));
+}
+
+static inline int fatal_signal_pending(struct task_struct *p)
+{
+ return signal_pending(p) && __fatal_signal_pending(p);
+}
+
+static inline int signal_pending_state(long state, struct task_struct *p)
+{
+ if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
+ return 0;
+ if (!signal_pending(p))
+ return 0;
+
+ return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
+}
+
+/*
+ * Reevaluate whether the task has signals pending delivery.
+ * Wake the task if so.
+ * This is required every time the blocked sigset_t changes.
+ * callers must hold sighand->siglock.
+ */
+extern void recalc_sigpending_and_wake(struct task_struct *t);
+extern void recalc_sigpending(void);
+
+extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
+
+static inline void signal_wake_up(struct task_struct *t, bool resume)
+{
+ signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
+}
+static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
+{
+ signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
+}
+
+#ifdef TIF_RESTORE_SIGMASK
+/*
+ * Legacy restore_sigmask accessors. These are inefficient on
+ * SMP architectures because they require atomic operations.
+ */
+
+/**
+ * set_restore_sigmask() - make sure saved_sigmask processing gets done
+ *
+ * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code
+ * will run before returning to user mode, to process the flag. For
+ * all callers, TIF_SIGPENDING is already set or it's no harm to set
+ * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the
+ * arch code will notice on return to user mode, in case those bits
+ * are scarce. We set TIF_SIGPENDING here to ensure that the arch
+ * signal code always gets run when TIF_RESTORE_SIGMASK is set.
+ */
+static inline void set_restore_sigmask(void)
+{
+ set_thread_flag(TIF_RESTORE_SIGMASK);
+ WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+}
+static inline void clear_restore_sigmask(void)
+{
+ clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_restore_sigmask(void)
+{
+ return test_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+ return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+
+#else /* TIF_RESTORE_SIGMASK */
+
+/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */
+static inline void set_restore_sigmask(void)
+{
+ current->restore_sigmask = true;
+ WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+}
+static inline void clear_restore_sigmask(void)
+{
+ current->restore_sigmask = false;
+}
+static inline bool test_restore_sigmask(void)
+{
+ return current->restore_sigmask;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+ if (!current->restore_sigmask)
+ return false;
+ current->restore_sigmask = false;
+ return true;
+}
+#endif
+
+static inline void restore_saved_sigmask(void)
+{
+ if (test_and_clear_restore_sigmask())
+ __set_current_blocked(&current->saved_sigmask);
+}
+
+static inline sigset_t *sigmask_to_save(void)
+{
+ sigset_t *res = &current->blocked;
+ if (unlikely(test_restore_sigmask()))
+ res = &current->saved_sigmask;
+ return res;
+}
+
+static inline int kill_cad_pid(int sig, int priv)
+{
+ return kill_pid(cad_pid, sig, priv);
+}
+
+/* These can be the second arg to send_sig_info/send_group_sig_info. */
+#define SEND_SIG_NOINFO ((struct siginfo *) 0)
+#define SEND_SIG_PRIV ((struct siginfo *) 1)
+#define SEND_SIG_FORCED ((struct siginfo *) 2)
+
+/*
+ * True if we are on the alternate signal stack.
+ */
+static inline int on_sig_stack(unsigned long sp)
+{
+ /*
+ * If the signal stack is SS_AUTODISARM then, by construction, we
+ * can't be on the signal stack unless user code deliberately set
+ * SS_AUTODISARM when we were already on it.
+ *
+ * This improves reliability: if user state gets corrupted such that
+ * the stack pointer points very close to the end of the signal stack,
+ * then this check will enable the signal to be handled anyway.
+ */
+ if (current->sas_ss_flags & SS_AUTODISARM)
+ return 0;
+
+#ifdef CONFIG_STACK_GROWSUP
+ return sp >= current->sas_ss_sp &&
+ sp - current->sas_ss_sp < current->sas_ss_size;
+#else
+ return sp > current->sas_ss_sp &&
+ sp - current->sas_ss_sp <= current->sas_ss_size;
+#endif
+}
+
+static inline int sas_ss_flags(unsigned long sp)
+{
+ if (!current->sas_ss_size)
+ return SS_DISABLE;
+
+ return on_sig_stack(sp) ? SS_ONSTACK : 0;
+}
+
+static inline void sas_ss_reset(struct task_struct *p)
+{
+ p->sas_ss_sp = 0;
+ p->sas_ss_size = 0;
+ p->sas_ss_flags = SS_DISABLE;
+}
+
+static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
+{
+ if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
+#ifdef CONFIG_STACK_GROWSUP
+ return current->sas_ss_sp;
+#else
+ return current->sas_ss_sp + current->sas_ss_size;
+#endif
+ return sp;
+}
+
+extern void __cleanup_sighand(struct sighand_struct *);
+extern void flush_itimer_signals(void);
+
+#define tasklist_empty() \
+ list_empty(&init_task.tasks)
+
+#define next_task(p) \
+ list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
+
+#define for_each_process(p) \
+ for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+
+extern bool current_is_single_threaded(void);
+
+/*
+ * Careful: do_each_thread/while_each_thread is a double loop so
+ * 'break' will not work as expected - use goto instead.
+ */
+#define do_each_thread(g, t) \
+ for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
+
+#define while_each_thread(g, t) \
+ while ((t = next_thread(t)) != g)
+
+#define __for_each_thread(signal, t) \
+ list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
+
+#define for_each_thread(p, t) \
+ __for_each_thread((p)->signal, t)
+
+/* Careful: this is a double loop, 'break' won't work as expected. */
+#define for_each_process_thread(p, t) \
+ for_each_process(p) for_each_thread(p, t)
+
+typedef int (*proc_visitor)(struct task_struct *p, void *data);
+void walk_process_tree(struct task_struct *top, proc_visitor, void *);
+
+static inline int get_nr_threads(struct task_struct *tsk)
+{
+ return tsk->signal->nr_threads;
+}
+
+static inline bool thread_group_leader(struct task_struct *p)
+{
+ return p->exit_signal >= 0;
+}
+
+/* Do to the insanities of de_thread it is possible for a process
+ * to have the pid of the thread group leader without actually being
+ * the thread group leader. For iteration through the pids in proc
+ * all we care about is that we have a task with the appropriate
+ * pid, we don't actually care if we have the right task.
+ */
+static inline bool has_group_leader_pid(struct task_struct *p)
+{
+ return task_pid(p) == p->signal->leader_pid;
+}
+
+static inline
+bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
+{
+ return p1->signal == p2->signal;
+}
+
+static inline struct task_struct *next_thread(const struct task_struct *p)
+{
+ return list_entry_rcu(p->thread_group.next,
+ struct task_struct, thread_group);
+}
+
+static inline int thread_group_empty(struct task_struct *p)
+{
+ return list_empty(&p->thread_group);
+}
+
+#define delay_group_leader(p) \
+ (thread_group_leader(p) && !thread_group_empty(p))
+
+extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
+ unsigned long *flags);
+
+static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
+ unsigned long *flags)
+{
+ struct sighand_struct *ret;
+
+ ret = __lock_task_sighand(tsk, flags);
+ (void)__cond_lock(&tsk->sighand->siglock, ret);
+ return ret;
+}
+
+static inline void unlock_task_sighand(struct task_struct *tsk,
+ unsigned long *flags)
+{
+ spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
+}
+
+static inline unsigned long task_rlimit(const struct task_struct *tsk,
+ unsigned int limit)
+{
+ return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
+}
+
+static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
+ unsigned int limit)
+{
+ return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
+}
+
+static inline unsigned long rlimit(unsigned int limit)
+{
+ return task_rlimit(current, limit);
+}
+
+static inline unsigned long rlimit_max(unsigned int limit)
+{
+ return task_rlimit_max(current, limit);
+}
+
+#endif /* _LINUX_SCHED_SIGNAL_H */
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
new file mode 100644
index 000000000000..141b74c53fad
--- /dev/null
+++ b/include/linux/sched/stat.h
@@ -0,0 +1,40 @@
+#ifndef _LINUX_SCHED_STAT_H
+#define _LINUX_SCHED_STAT_H
+
+#include <linux/percpu.h>
+
+/*
+ * Various counters maintained by the scheduler and fork(),
+ * exposed via /proc, sys.c or used by drivers via these APIs.
+ *
+ * ( Note that all these values are aquired without locking,
+ * so they can only be relied on in narrow circumstances. )
+ */
+
+extern unsigned long total_forks;
+extern int nr_threads;
+DECLARE_PER_CPU(unsigned long, process_counts);
+extern int nr_processes(void);
+extern unsigned long nr_running(void);
+extern bool single_task_running(void);
+extern unsigned long nr_iowait(void);
+extern unsigned long nr_iowait_cpu(int cpu);
+extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
+
+static inline int sched_info_on(void)
+{
+#ifdef CONFIG_SCHEDSTATS
+ return 1;
+#elif defined(CONFIG_TASK_DELAY_ACCT)
+ extern int delayacct_on;
+ return delayacct_on;
+#else
+ return 0;
+#endif
+}
+
+#ifdef CONFIG_SCHEDSTATS
+void force_schedstat_enabled(void);
+#endif
+
+#endif /* _LINUX_SCHED_STAT_H */
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 49308e142aae..0f5ecd4d298e 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -1,5 +1,9 @@
-#ifndef _SCHED_SYSCTL_H
-#define _SCHED_SYSCTL_H
+#ifndef _LINUX_SCHED_SYSCTL_H
+#define _LINUX_SCHED_SYSCTL_H
+
+#include <linux/types.h>
+
+struct ctl_table;
#ifdef CONFIG_DETECT_HUNG_TASK
extern int sysctl_hung_task_check_count;
@@ -78,4 +82,4 @@ extern int sysctl_schedstats(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
-#endif /* _SCHED_SYSCTL_H */
+#endif /* _LINUX_SCHED_SYSCTL_H */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
new file mode 100644
index 000000000000..a978d7189cfd
--- /dev/null
+++ b/include/linux/sched/task.h
@@ -0,0 +1,139 @@
+#ifndef _LINUX_SCHED_TASK_H
+#define _LINUX_SCHED_TASK_H
+
+/*
+ * Interface between the scheduler and various task lifetime (fork()/exit())
+ * functionality:
+ */
+
+#include <linux/sched.h>
+
+struct task_struct;
+union thread_union;
+
+/*
+ * This serializes "schedule()" and also protects
+ * the run-queue from deletions/modifications (but
+ * _adding_ to the beginning of the run-queue has
+ * a separate lock).
+ */
+extern rwlock_t tasklist_lock;
+extern spinlock_t mmlist_lock;
+
+extern union thread_union init_thread_union;
+extern struct task_struct init_task;
+
+#ifdef CONFIG_PROVE_RCU
+extern int lockdep_tasklist_lock_is_held(void);
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
+extern asmlinkage void schedule_tail(struct task_struct *prev);
+extern void init_idle(struct task_struct *idle, int cpu);
+extern void init_idle_bootup_task(struct task_struct *idle);
+
+extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
+extern void sched_dead(struct task_struct *p);
+
+void __noreturn do_task_dead(void);
+
+extern void proc_caches_init(void);
+
+extern void release_task(struct task_struct * p);
+
+#ifdef CONFIG_HAVE_COPY_THREAD_TLS
+extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
+ struct task_struct *, unsigned long);
+#else
+extern int copy_thread(unsigned long, unsigned long, unsigned long,
+ struct task_struct *);
+
+/* Architectures that haven't opted into copy_thread_tls get the tls argument
+ * via pt_regs, so ignore the tls argument passed via C. */
+static inline int copy_thread_tls(
+ unsigned long clone_flags, unsigned long sp, unsigned long arg,
+ struct task_struct *p, unsigned long tls)
+{
+ return copy_thread(clone_flags, sp, arg, p);
+}
+#endif
+extern void flush_thread(void);
+
+#ifdef CONFIG_HAVE_EXIT_THREAD
+extern void exit_thread(struct task_struct *tsk);
+#else
+static inline void exit_thread(struct task_struct *tsk)
+{
+}
+#endif
+extern void do_group_exit(int);
+
+extern void exit_files(struct task_struct *);
+extern void exit_itimers(struct signal_struct *);
+
+extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
+extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
+struct task_struct *fork_idle(int);
+extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+extern void free_task(struct task_struct *tsk);
+
+/* sched_exec is called by processes performing an exec */
+#ifdef CONFIG_SMP
+extern void sched_exec(void);
+#else
+#define sched_exec() {}
+#endif
+
+#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
+
+extern void __put_task_struct(struct task_struct *t);
+
+static inline void put_task_struct(struct task_struct *t)
+{
+ if (atomic_dec_and_test(&t->usage))
+ __put_task_struct(t);
+}
+
+struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+struct task_struct *try_get_task_struct(struct task_struct **ptask);
+
+
+#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
+extern int arch_task_struct_size __read_mostly;
+#else
+# define arch_task_struct_size (sizeof(struct task_struct))
+#endif
+
+#ifdef CONFIG_VMAP_STACK
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+ return t->stack_vm_area;
+}
+#else
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+ return NULL;
+}
+#endif
+
+/*
+ * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
+ * subscriptions and synchronises with wait4(). Also used in procfs. Also
+ * pins the final release of task.io_context. Also protects ->cpuset and
+ * ->cgroup.subsys[]. And ->vfork_done.
+ *
+ * Nests both inside and outside of read_lock(&tasklist_lock).
+ * It must not be nested with write_lock_irq(&tasklist_lock),
+ * neither inside nor outside.
+ */
+static inline void task_lock(struct task_struct *p)
+{
+ spin_lock(&p->alloc_lock);
+}
+
+static inline void task_unlock(struct task_struct *p)
+{
+ spin_unlock(&p->alloc_lock);
+}
+
+#endif /* _LINUX_SCHED_TASK_H */
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
new file mode 100644
index 000000000000..df6ea6665b31
--- /dev/null
+++ b/include/linux/sched/task_stack.h
@@ -0,0 +1,121 @@
+#ifndef _LINUX_SCHED_TASK_STACK_H
+#define _LINUX_SCHED_TASK_STACK_H
+
+/*
+ * task->stack (kernel stack) handling interfaces:
+ */
+
+#include <linux/sched.h>
+#include <linux/magic.h>
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+
+/*
+ * When accessing the stack of a non-current task that might exit, use
+ * try_get_task_stack() instead. task_stack_page will return a pointer
+ * that could get freed out from under you.
+ */
+static inline void *task_stack_page(const struct task_struct *task)
+{
+ return task->stack;
+}
+
+#define setup_thread_stack(new,old) do { } while(0)
+
+static inline unsigned long *end_of_stack(const struct task_struct *task)
+{
+ return task->stack;
+}
+
+#elif !defined(__HAVE_THREAD_FUNCTIONS)
+
+#define task_stack_page(task) ((void *)(task)->stack)
+
+static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
+{
+ *task_thread_info(p) = *task_thread_info(org);
+ task_thread_info(p)->task = p;
+}
+
+/*
+ * Return the address of the last usable long on the stack.
+ *
+ * When the stack grows down, this is just above the thread
+ * info struct. Going any lower will corrupt the threadinfo.
+ *
+ * When the stack grows up, this is the highest address.
+ * Beyond that position, we corrupt data on the next page.
+ */
+static inline unsigned long *end_of_stack(struct task_struct *p)
+{
+#ifdef CONFIG_STACK_GROWSUP
+ return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
+#else
+ return (unsigned long *)(task_thread_info(p) + 1);
+#endif
+}
+
+#endif
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+ return atomic_inc_not_zero(&tsk->stack_refcount) ?
+ task_stack_page(tsk) : NULL;
+}
+
+extern void put_task_stack(struct task_struct *tsk);
+#else
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+ return task_stack_page(tsk);
+}
+
+static inline void put_task_stack(struct task_struct *tsk) {}
+#endif
+
+#define task_stack_end_corrupted(task) \
+ (*(end_of_stack(task)) != STACK_END_MAGIC)
+
+static inline int object_is_on_stack(void *obj)
+{
+ void *stack = task_stack_page(current);
+
+ return (obj >= stack) && (obj < (stack + THREAD_SIZE));
+}
+
+extern void thread_stack_cache_init(void);
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long stack_not_used(struct task_struct *p)
+{
+ unsigned long *n = end_of_stack(p);
+
+ do { /* Skip over canary */
+# ifdef CONFIG_STACK_GROWSUP
+ n--;
+# else
+ n++;
+# endif
+ } while (!*n);
+
+# ifdef CONFIG_STACK_GROWSUP
+ return (unsigned long)end_of_stack(p) - (unsigned long)n;
+# else
+ return (unsigned long)n - (unsigned long)end_of_stack(p);
+# endif
+}
+#endif
+extern void set_task_stack_end_magic(struct task_struct *tsk);
+
+#ifndef __HAVE_ARCH_KSTACK_END
+static inline int kstack_end(void *addr)
+{
+ /* Reliable end of stack detection:
+ * Some APM bios versions misalign the stack
+ */
+ return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
+}
+#endif
+
+#endif /* _LINUX_SCHED_TASK_STACK_H */
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
new file mode 100644
index 000000000000..7d065abc7a47
--- /dev/null
+++ b/include/linux/sched/topology.h
@@ -0,0 +1,226 @@
+#ifndef _LINUX_SCHED_TOPOLOGY_H
+#define _LINUX_SCHED_TOPOLOGY_H
+
+#include <linux/topology.h>
+
+#include <linux/sched/idle.h>
+
+/*
+ * sched-domains (multiprocessor balancing) declarations:
+ */
+#ifdef CONFIG_SMP
+
+#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */
+#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */
+#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */
+#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
+#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
+#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
+#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */
+#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */
+#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */
+#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
+#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
+#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
+#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
+#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
+#define SD_NUMA 0x4000 /* cross-node balancing */
+
+/*
+ * Increase resolution of cpu_capacity calculations
+ */
+#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
+#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
+
+#ifdef CONFIG_SCHED_SMT
+static inline int cpu_smt_flags(void)
+{
+ return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_SCHED_MC
+static inline int cpu_core_flags(void)
+{
+ return SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_NUMA
+static inline int cpu_numa_flags(void)
+{
+ return SD_NUMA;
+}
+#endif
+
+extern int arch_asym_cpu_priority(int cpu);
+
+struct sched_domain_attr {
+ int relax_domain_level;
+};
+
+#define SD_ATTR_INIT (struct sched_domain_attr) { \
+ .relax_domain_level = -1, \
+}
+
+extern int sched_domain_level_max;
+
+struct sched_group;
+
+struct sched_domain_shared {
+ atomic_t ref;
+ atomic_t nr_busy_cpus;
+ int has_idle_cores;
+};
+
+struct sched_domain {
+ /* These fields must be setup */
+ struct sched_domain *parent; /* top domain must be null terminated */
+ struct sched_domain *child; /* bottom domain must be null terminated */
+ struct sched_group *groups; /* the balancing groups of the domain */
+ unsigned long min_interval; /* Minimum balance interval ms */
+ unsigned long max_interval; /* Maximum balance interval ms */
+ unsigned int busy_factor; /* less balancing by factor if busy */
+ unsigned int imbalance_pct; /* No balance until over watermark */
+ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
+ unsigned int busy_idx;
+ unsigned int idle_idx;
+ unsigned int newidle_idx;
+ unsigned int wake_idx;
+ unsigned int forkexec_idx;
+ unsigned int smt_gain;
+
+ int nohz_idle; /* NOHZ IDLE status */
+ int flags; /* See SD_* */
+ int level;
+
+ /* Runtime fields. */
+ unsigned long last_balance; /* init to jiffies. units in jiffies */
+ unsigned int balance_interval; /* initialise to 1. units in ms. */
+ unsigned int nr_balance_failed; /* initialise to 0 */
+
+ /* idle_balance() stats */
+ u64 max_newidle_lb_cost;
+ unsigned long next_decay_max_lb_cost;
+
+ u64 avg_scan_cost; /* select_idle_sibling */
+
+#ifdef CONFIG_SCHEDSTATS
+ /* load_balance() stats */
+ unsigned int lb_count[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
+
+ /* Active load balancing */
+ unsigned int alb_count;
+ unsigned int alb_failed;
+ unsigned int alb_pushed;
+
+ /* SD_BALANCE_EXEC stats */
+ unsigned int sbe_count;
+ unsigned int sbe_balanced;
+ unsigned int sbe_pushed;
+
+ /* SD_BALANCE_FORK stats */
+ unsigned int sbf_count;
+ unsigned int sbf_balanced;
+ unsigned int sbf_pushed;
+
+ /* try_to_wake_up() stats */
+ unsigned int ttwu_wake_remote;
+ unsigned int ttwu_move_affine;
+ unsigned int ttwu_move_balance;
+#endif
+#ifdef CONFIG_SCHED_DEBUG
+ char *name;
+#endif
+ union {
+ void *private; /* used during construction */
+ struct rcu_head rcu; /* used during destruction */
+ };
+ struct sched_domain_shared *shared;
+
+ unsigned int span_weight;
+ /*
+ * Span of all CPUs in this domain.
+ *
+ * NOTE: this field is variable length. (Allocated dynamically
+ * by attaching extra space to the end of the structure,
+ * depending on how many CPUs the kernel has booted up with)
+ */
+ unsigned long span[0];
+};
+
+static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
+{
+ return to_cpumask(sd->span);
+}
+
+extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+ struct sched_domain_attr *dattr_new);
+
+/* Allocate an array of sched domains, for partition_sched_domains(). */
+cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
+void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
+
+bool cpus_share_cache(int this_cpu, int that_cpu);
+
+typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+typedef int (*sched_domain_flags_f)(void);
+
+#define SDTL_OVERLAP 0x01
+
+struct sd_data {
+ struct sched_domain **__percpu sd;
+ struct sched_domain_shared **__percpu sds;
+ struct sched_group **__percpu sg;
+ struct sched_group_capacity **__percpu sgc;
+};
+
+struct sched_domain_topology_level {
+ sched_domain_mask_f mask;
+ sched_domain_flags_f sd_flags;
+ int flags;
+ int numa_level;
+ struct sd_data data;
+#ifdef CONFIG_SCHED_DEBUG
+ char *name;
+#endif
+};
+
+extern void set_sched_topology(struct sched_domain_topology_level *tl);
+
+#ifdef CONFIG_SCHED_DEBUG
+# define SD_INIT_NAME(type) .name = #type
+#else
+# define SD_INIT_NAME(type)
+#endif
+
+#else /* CONFIG_SMP */
+
+struct sched_domain_attr;
+
+static inline void
+partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+ struct sched_domain_attr *dattr_new)
+{
+}
+
+static inline bool cpus_share_cache(int this_cpu, int that_cpu)
+{
+ return true;
+}
+
+#endif /* !CONFIG_SMP */
+
+static inline int task_node(const struct task_struct *p)
+{
+ return cpu_to_node(task_cpu(p));
+}
+
+#endif /* _LINUX_SCHED_TOPOLOGY_H */
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
new file mode 100644
index 000000000000..5d5415e129d4
--- /dev/null
+++ b/include/linux/sched/user.h
@@ -0,0 +1,61 @@
+#ifndef _LINUX_SCHED_USER_H
+#define _LINUX_SCHED_USER_H
+
+#include <linux/uidgid.h>
+#include <linux/atomic.h>
+
+struct key;
+
+/*
+ * Some day this will be a full-fledged user tracking system..
+ */
+struct user_struct {
+ atomic_t __count; /* reference count */
+ atomic_t processes; /* How many processes does this user have? */
+ atomic_t sigpending; /* How many pending signals does this user have? */
+#ifdef CONFIG_FANOTIFY
+ atomic_t fanotify_listeners;
+#endif
+#ifdef CONFIG_EPOLL
+ atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
+#endif
+#ifdef CONFIG_POSIX_MQUEUE
+ /* protected by mq_lock */
+ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
+#endif
+ unsigned long locked_shm; /* How many pages of mlocked shm ? */
+ unsigned long unix_inflight; /* How many files in flight in unix sockets */
+ atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */
+
+#ifdef CONFIG_KEYS
+ struct key *uid_keyring; /* UID specific keyring */
+ struct key *session_keyring; /* UID's default session keyring */
+#endif
+
+ /* Hash table maintenance information */
+ struct hlist_node uidhash_node;
+ kuid_t uid;
+
+#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
+ atomic_long_t locked_vm;
+#endif
+};
+
+extern int uids_sysfs_init(void);
+
+extern struct user_struct *find_user(kuid_t);
+
+extern struct user_struct root_user;
+#define INIT_USER (&root_user)
+
+
+/* per-UID process charging. */
+extern struct user_struct * alloc_uid(kuid_t);
+static inline struct user_struct *get_uid(struct user_struct *u)
+{
+ atomic_inc(&u->__count);
+ return u;
+}
+extern void free_uid(struct user_struct *);
+
+#endif /* _LINUX_SCHED_USER_H */
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
new file mode 100644
index 000000000000..d03d8a9047dc
--- /dev/null
+++ b/include/linux/sched/wake_q.h
@@ -0,0 +1,53 @@
+#ifndef _LINUX_SCHED_WAKE_Q_H
+#define _LINUX_SCHED_WAKE_Q_H
+
+/*
+ * Wake-queues are lists of tasks with a pending wakeup, whose
+ * callers have already marked the task as woken internally,
+ * and can thus carry on. A common use case is being able to
+ * do the wakeups once the corresponding user lock as been
+ * released.
+ *
+ * We hold reference to each task in the list across the wakeup,
+ * thus guaranteeing that the memory is still valid by the time
+ * the actual wakeups are performed in wake_up_q().
+ *
+ * One per task suffices, because there's never a need for a task to be
+ * in two wake queues simultaneously; it is forbidden to abandon a task
+ * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
+ * already in a wake queue, the wakeup will happen soon and the second
+ * waker can just skip it.
+ *
+ * The DEFINE_WAKE_Q macro declares and initializes the list head.
+ * wake_up_q() does NOT reinitialize the list; it's expected to be
+ * called near the end of a function. Otherwise, the list can be
+ * re-initialized for later re-use by wake_q_init().
+ *
+ * Note that this can cause spurious wakeups. schedule() callers
+ * must ensure the call is done inside a loop, confirming that the
+ * wakeup condition has in fact occurred.
+ */
+
+#include <linux/sched.h>
+
+struct wake_q_head {
+ struct wake_q_node *first;
+ struct wake_q_node **lastp;
+};
+
+#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
+
+#define DEFINE_WAKE_Q(name) \
+ struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
+
+static inline void wake_q_init(struct wake_q_head *head)
+{
+ head->first = WAKE_Q_TAIL;
+ head->lastp = &head->first;
+}
+
+extern void wake_q_add(struct wake_q_head *head,
+ struct task_struct *task);
+extern void wake_up_q(struct wake_q_head *head);
+
+#endif /* _LINUX_SCHED_WAKE_Q_H */
diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h
new file mode 100644
index 000000000000..a28156a0d34a
--- /dev/null
+++ b/include/linux/sched/xacct.h
@@ -0,0 +1,48 @@
+#ifndef _LINUX_SCHED_XACCT_H
+#define _LINUX_SCHED_XACCT_H
+
+/*
+ * Extended task accounting methods:
+ */
+
+#include <linux/sched.h>
+
+#ifdef CONFIG_TASK_XACCT
+static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
+{
+ tsk->ioac.rchar += amt;
+}
+
+static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
+{
+ tsk->ioac.wchar += amt;
+}
+
+static inline void inc_syscr(struct task_struct *tsk)
+{
+ tsk->ioac.syscr++;
+}
+
+static inline void inc_syscw(struct task_struct *tsk)
+{
+ tsk->ioac.syscw++;
+}
+#else
+static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
+{
+}
+
+static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
+{
+}
+
+static inline void inc_syscr(struct task_struct *tsk)
+{
+}
+
+static inline void inc_syscw(struct task_struct *tsk)
+{
+}
+#endif
+
+#endif /* _LINUX_SCHED_XACCT_H */