From 7897986bad8f6cd50d6149345aca7f6480f49464 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:13 -0700 Subject: [PATCH] sched: balance timers Do CPU load averaging over a number of different intervals. Allow each interval to be chosen by sending a parameter to source_load and target_load. 0 is instantaneous, idx > 0 returns a decaying average with the most recent sample weighted at 2^(idx-1). To a maximum of 3 (could be easily increased). So generally a higher number will result in more conservative balancing. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c69682b0444..664981ac1fb6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -488,6 +488,10 @@ struct sched_domain { unsigned long long cache_hot_time; /* Task considered cache hot (ns) */ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ unsigned int per_cpu_gain; /* CPU % gained by adding domain cpus */ + unsigned int busy_idx; + unsigned int idle_idx; + unsigned int newidle_idx; + unsigned int wake_idx; int flags; /* See SD_* */ /* Runtime fields. */ -- cgit v1.2.3 From 147cbb4bbe991452698f0772d8292f22825710ba Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:19 -0700 Subject: [PATCH] sched: balance on fork Reimplement the balance on exec balancing to be sched-domains aware. Use this to also do balance on fork balancing. Make x86_64 do balance on fork over the NUMA domain. The problem that the non sched domains aware blancing became apparent on dual core, multi socket opterons. What we want is for the new tasks to be sent to a different socket, but more often than not, we would first load up our sibling core, or fill two cores of a single remote socket before selecting a new one. This gives large improvements to STREAM on such systems. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 664981ac1fb6..613491d3a875 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -460,10 +460,11 @@ enum idle_type #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ #define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */ #define SD_BALANCE_EXEC 4 /* Balance on exec */ -#define SD_WAKE_IDLE 8 /* Wake to idle CPU on task wakeup */ -#define SD_WAKE_AFFINE 16 /* Wake task to waking CPU */ -#define SD_WAKE_BALANCE 32 /* Perform balancing at task wakeup */ -#define SD_SHARE_CPUPOWER 64 /* Domain members share cpu power */ +#define SD_BALANCE_FORK 8 /* Balance on fork, clone */ +#define SD_WAKE_IDLE 16 /* Wake to idle CPU on task wakeup */ +#define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */ +#define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */ +#define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ struct sched_group { struct sched_group *next; /* Must be a circular list */ @@ -492,6 +493,7 @@ struct sched_domain { unsigned int idle_idx; unsigned int newidle_idx; unsigned int wake_idx; + unsigned int forkexec_idx; int flags; /* See SD_* */ /* Runtime fields. */ -- cgit v1.2.3 From 68767a0ae428801649d510d9a65bb71feed44dd1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:20 -0700 Subject: [PATCH] sched: schedstats update for balance on fork Add SCHEDSTAT statistics for sched-balance-fork. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 613491d3a875..36a10781c3f3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -517,10 +517,16 @@ struct sched_domain { unsigned long alb_failed; unsigned long alb_pushed; - /* sched_balance_exec() stats */ - unsigned long sbe_attempts; + /* SD_BALANCE_EXEC stats */ + unsigned long sbe_cnt; + unsigned long sbe_balanced; unsigned long sbe_pushed; + /* SD_BALANCE_FORK stats */ + unsigned long sbf_cnt; + unsigned long sbf_balanced; + unsigned long sbf_pushed; + /* try_to_wake_up() stats */ unsigned long ttwu_wake_remote; unsigned long ttwu_move_affine; -- cgit v1.2.3 From 4866cde064afbb6c2a488c265e696879de616daa Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:23 -0700 Subject: [PATCH] sched: cleanup context switch locking Instead of requiring architecture code to interact with the scheduler's locking implementation, provide a couple of defines that can be used by the architecture to request runqueue unlocked context switches, and ask for interrupts to be enabled over the context switch. Also replaces the "switch_lock" used by these architectures with an oncpu flag (note, not a potentially slow bitflag). This eliminates one bus locked memory operation when context switching, and simplifies the task_running function. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 36a10781c3f3..d27be9337425 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -368,6 +368,11 @@ struct signal_struct { #endif }; +/* Context switch must be unlocked if interrupts are to be enabled */ +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW +# define __ARCH_WANT_UNLOCKED_CTXSW +#endif + /* * Bits in flags field of signal_struct. */ @@ -594,6 +599,9 @@ struct task_struct { int lock_depth; /* BKL lock depth */ +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) + int oncpu; +#endif int prio, static_prio; struct list_head run_list; prio_array_t *array; @@ -716,8 +724,6 @@ struct task_struct { spinlock_t alloc_lock; /* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */ spinlock_t proc_lock; -/* context-switch lock */ - spinlock_t switch_lock; /* journalling filesystem info */ void *journal_info; -- cgit v1.2.3 From 476d139c218e44e045e4bc6d4cc02b010b343939 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:29 -0700 Subject: [PATCH] sched: consolidate sbe sbf Consolidate balance-on-exec with balance-on-fork. This is made easy by the sched-domains RCU patches. As well as the general goodness of code reduction, this allows the runqueues to be unlocked during balance-on-fork. schedstats is a problem. Maybe just have balance-on-event instead of distinguishing fork and exec? Signed-off-by: Nick Piggin Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index d27be9337425..edb2c69a8873 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -930,7 +930,7 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk, #else static inline void kick_process(struct task_struct *tsk) { } #endif -extern void FASTCALL(sched_fork(task_t * p)); +extern void FASTCALL(sched_fork(task_t * p, int clone_flags)); extern void FASTCALL(sched_exit(task_t * p)); extern int in_group_p(gid_t); -- cgit v1.2.3 From 1a20ff27ef75d866730ee796acd811a925af762f Mon Sep 17 00:00:00 2001 From: Dinakar Guniguntala Date: Sat, 25 Jun 2005 14:57:33 -0700 Subject: [PATCH] Dynamic sched domains: sched changes The following patches add dynamic sched domains functionality that was extensively discussed on lkml and lse-tech. I would like to see this added to -mm o The main advantage with this feature is that it ensures that the scheduler load balacing code only balances against the cpus that are in the sched domain as defined by an exclusive cpuset and not all of the cpus in the system. This removes any overhead due to load balancing code trying to pull tasks outside of the cpu exclusive cpuset only to be prevented by the tasks' cpus_allowed mask. o cpu exclusive cpusets are useful for servers running orthogonal workloads such as RT applications requiring low latency and HPC applications that are throughput sensitive o It provides a new API partition_sched_domains in sched.c that makes dynamic sched domains possible. o cpu_exclusive cpusets sets are now associated with a sched domain. Which means that the users can dynamically modify the sched domains through the cpuset file system interface o ia64 sched domain code has been updated to support this feature as well o Currently, this does not support hotplug. (However some of my tests indicate hotplug+preempt is currently broken) o I have tested it extensively on x86. o This should have very minimal impact on performance as none of the fast paths are affected Signed-off-by: Dinakar Guniguntala Acked-by: Paul Jackson Acked-by: Nick Piggin Acked-by: Matthew Dobson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index edb2c69a8873..98c109e4f43d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -539,6 +539,8 @@ struct sched_domain { #endif }; +extern void partition_sched_domains(cpumask_t *partition1, + cpumask_t *partition2); #ifdef ARCH_HAS_SCHED_DOMAIN /* Useful helpers that arch setup code may use. Defined in kernel/sched.c */ extern cpumask_t cpu_isolated_map; -- cgit v1.2.3 From 3e1d1d28d99dabe63c64f7f40f1ca1d646de1f73 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 24 Jun 2005 23:13:50 -0700 Subject: [PATCH] Cleanup patch for process freezing 1. Establish a simple API for process freezing defined in linux/include/sched.h: frozen(process) Check for frozen process freezing(process) Check if a process is being frozen freeze(process) Tell a process to freeze (go to refrigerator) thaw_process(process) Restart process frozen_process(process) Process is frozen now 2. Remove all references to PF_FREEZE and PF_FROZEN from all kernel sources except sched.h 3. Fix numerous locations where try_to_freeze is manually done by a driver 4. Remove the argument that is no longer necessary from two function calls. 5. Some whitespace cleanup 6. Clear potential race in refrigerator (provides an open window of PF_FREEZE cleared before setting PF_FROZEN, recalc_sigpending does not check PF_FROZEN). This patch does not address the problem of freeze_processes() violating the rule that a task may only modify its own flags by setting PF_FREEZE. This is not clean in an SMP environment. freeze(process) is therefore not SMP safe! Signed-off-by: Christoph Lameter Signed-off-by: Linus Torvalds --- include/linux/sched.h | 73 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 14 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c69682b0444..e7fd09b0557f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1245,33 +1245,78 @@ extern void normalize_rt_tasks(void); #endif -/* try_to_freeze - * - * Checks whether we need to enter the refrigerator - * and returns 1 if we did so. - */ #ifdef CONFIG_PM -extern void refrigerator(unsigned long); +/* + * Check if a process has been frozen + */ +static inline int frozen(struct task_struct *p) +{ + return p->flags & PF_FROZEN; +} + +/* + * Check if there is a request to freeze a process + */ +static inline int freezing(struct task_struct *p) +{ + return p->flags & PF_FREEZE; +} + +/* + * Request that a process be frozen + * FIXME: SMP problem. We may not modify other process' flags! + */ +static inline void freeze(struct task_struct *p) +{ + p->flags |= PF_FREEZE; +} + +/* + * Wake up a frozen process + */ +static inline int thaw_process(struct task_struct *p) +{ + if (frozen(p)) { + p->flags &= ~PF_FROZEN; + wake_up_process(p); + return 1; + } + return 0; +} + +/* + * freezing is complete, mark process as frozen + */ +static inline void frozen_process(struct task_struct *p) +{ + p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN; +} + +extern void refrigerator(void); extern int freeze_processes(void); extern void thaw_processes(void); -static inline int try_to_freeze(unsigned long refrigerator_flags) +static inline int try_to_freeze(void) { - if (unlikely(current->flags & PF_FREEZE)) { - refrigerator(refrigerator_flags); + if (freezing(current)) { + refrigerator(); return 1; } else return 0; } #else -static inline void refrigerator(unsigned long flag) {} +static inline int frozen(struct task_struct *p) { return 0; } +static inline int freezing(struct task_struct *p) { return 0; } +static inline void freeze(struct task_struct *p) { BUG(); } +static inline int thaw_process(struct task_struct *p) { return 1; } +static inline void frozen_process(struct task_struct *p) { BUG(); } + +static inline void refrigerator(void) {} static inline int freeze_processes(void) { BUG(); return 0; } static inline void thaw_processes(void) {} -static inline int try_to_freeze(unsigned long refrigerator_flags) -{ - return 0; -} +static inline int try_to_freeze(void) { return 0; } + #endif /* CONFIG_PM */ #endif /* __KERNEL__ */ -- cgit v1.2.3