From 7ddf96b02fe8dd441f452deef879040def5f7b34 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 24 May 2012 19:46:55 +0530 Subject: cpusets, hotplug: Restructure functions that are invoked during hotplug Separate out the cpuset related handling for CPU/Memory online/offline. This also helps us exploit the most obvious and basic level of optimization that any notification mechanism (CPU/Mem online/offline) has to offer us: "We *know* why we have been invoked. So stop pretending that we are lost, and do only the necessary amount of processing!". And while at it, rename scan_for_empty_cpusets() to scan_cpusets_upon_hotplug(), which is more appropriate considering how it is restructured. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20120524141650.3692.48637.stgit@srivatsabhat.in.ibm.com Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 668f66baac7b..838320fc3d1d 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -20,7 +20,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ extern int cpuset_init(void); extern void cpuset_init_smp(void); -extern void cpuset_update_active_cpus(void); +extern void cpuset_update_active_cpus(bool cpu_online); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed_fallback(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); @@ -124,7 +124,7 @@ static inline void set_mems_allowed(nodemask_t nodemask) static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} -static inline void cpuset_update_active_cpus(void) +static inline void cpuset_update_active_cpus(bool cpu_online) { partition_sched_domains(1, NULL, NULL); } -- cgit v1.2.3 From 970e178985cadbca660feb02f4d2ee3a09f7fdda Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 12 Jun 2012 05:18:32 +0200 Subject: sched: Improve scalability via 'CPU buddies', which withstand random perturbations Traversing an entire package is not only expensive, it also leads to tasks bouncing all over a partially idle and possible quite large package. Fix that up by assigning a 'buddy' CPU to try to motivate. Each buddy may try to motivate that one other CPU, if it's busy, tough, it may then try its SMT sibling, but that's all this optimization is allowed to cost. Sibling cache buddies are cross-wired to prevent bouncing. 4 socket 40 core + SMT Westmere box, single 30 sec tbench runs, higher is better: clients 1 2 4 8 16 32 64 128 .......................................................................... pre 30 41 118 645 3769 6214 12233 14312 post 299 603 1211 2418 4697 6847 11606 14557 A nice increase in performance. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1339471112.7352.32.camel@marge.simpson.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a1f493e0fef..bc9952991710 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -949,6 +949,7 @@ struct sched_domain { unsigned int smt_gain; int flags; /* See SD_* */ int level; + int idle_buddy; /* cpu assigned to select_idle_sibling() */ /* Runtime fields. */ unsigned long last_balance; /* init to jiffies. units in jiffies */ -- cgit v1.2.3 From 8323f26ce3425460769605a6aece7a174edaa7d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jun 2012 13:36:05 +0200 Subject: sched: Fix race in task_group() Stefan reported a crash on a kernel before a3e5d1091c1 ("sched: Don't call task_group() too many times in set_task_rq()"), he found the reason to be that the multiple task_group() invocations in set_task_rq() returned different values. Looking at all that I found a lack of serialization and plain wrong comments. The below tries to fix it using an extra pointer which is updated under the appropriate scheduler locks. Its not pretty, but I can't really see another way given how all the cgroup stuff works. Reported-and-tested-by: Stefan Bader Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1340364965.18025.71.camel@twins Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 12 +++++++++++- include/linux/sched.h | 5 ++++- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9e65eff6af3b..b806b821e735 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -123,8 +123,17 @@ extern struct group_info init_groups; extern struct cred init_cred; +extern struct task_group root_task_group; + +#ifdef CONFIG_CGROUP_SCHED +# define INIT_CGROUP_SCHED(tsk) \ + .sched_task_group = &root_task_group, +#else +# define INIT_CGROUP_SCHED(tsk) +#endif + #ifdef CONFIG_PERF_EVENTS -# define INIT_PERF_EVENTS(tsk) \ +# define INIT_PERF_EVENTS(tsk) \ .perf_event_mutex = \ __MUTEX_INITIALIZER(tsk.perf_event_mutex), \ .perf_event_list = LIST_HEAD_INIT(tsk.perf_event_list), @@ -161,6 +170,7 @@ extern struct cred init_cred; }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ INIT_PUSHABLE_TASKS(tsk) \ + INIT_CGROUP_SCHED(tsk) \ .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &tsk, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index bc9952991710..fd9436a3a545 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1245,6 +1245,9 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; +#ifdef CONFIG_CGROUP_SCHED + struct task_group *sched_task_group; +#endif #ifdef CONFIG_PREEMPT_NOTIFIERS /* list of struct preempt_notifier: */ @@ -2724,7 +2727,7 @@ extern int sched_group_set_rt_period(struct task_group *tg, extern long sched_group_rt_period(struct task_group *tg); extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); #endif -#endif +#endif /* CONFIG_CGROUP_SCHED */ extern int task_can_switch_user(struct user_struct *up, struct task_struct *tsk); -- cgit v1.2.3