summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/asm-x86_64/topology.h2
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/topology.h2
-rw-r--r--kernel/sched.c164
4 files changed, 119 insertions, 59 deletions
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index 9cb7459ce722..802d09b9c99f 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -44,9 +44,11 @@ extern int __node_distance(int, int);
.idle_idx = 2, \
.newidle_idx = 1, \
.wake_idx = 1, \
+ .forkexec_idx = 1, \
.per_cpu_gain = 100, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
+ | SD_BALANCE_FORK \
| SD_BALANCE_EXEC \
| SD_WAKE_BALANCE, \
.last_balance = jiffies, \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 664981ac1fb6..613491d3a875 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -460,10 +460,11 @@ enum idle_type
#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */
#define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */
#define SD_BALANCE_EXEC 4 /* Balance on exec */
-#define SD_WAKE_IDLE 8 /* Wake to idle CPU on task wakeup */
-#define SD_WAKE_AFFINE 16 /* Wake task to waking CPU */
-#define SD_WAKE_BALANCE 32 /* Perform balancing at task wakeup */
-#define SD_SHARE_CPUPOWER 64 /* Domain members share cpu power */
+#define SD_BALANCE_FORK 8 /* Balance on fork, clone */
+#define SD_WAKE_IDLE 16 /* Wake to idle CPU on task wakeup */
+#define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */
+#define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */
+#define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */
struct sched_group {
struct sched_group *next; /* Must be a circular list */
@@ -492,6 +493,7 @@ struct sched_domain {
unsigned int idle_idx;
unsigned int newidle_idx;
unsigned int wake_idx;
+ unsigned int forkexec_idx;
int flags; /* See SD_* */
/* Runtime fields. */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index b23ec64df7f1..665597207def 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -93,6 +93,7 @@
.idle_idx = 0, \
.newidle_idx = 0, \
.wake_idx = 0, \
+ .forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
@@ -123,6 +124,7 @@
.idle_idx = 0, \
.newidle_idx = 1, \
.wake_idx = 1, \
+ .forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
diff --git a/kernel/sched.c b/kernel/sched.c
index 396724a2519f..7ecc237e2aab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -893,6 +893,79 @@ static inline unsigned long target_load(int cpu, int type)
return max(rq->cpu_load[type-1], load_now);
}
+/*
+ * find_idlest_group finds and returns the least busy CPU group within the
+ * domain.
+ */
+static struct sched_group *
+find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
+{
+ struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
+ unsigned long min_load = ULONG_MAX, this_load = 0;
+ int load_idx = sd->forkexec_idx;
+ int imbalance = 100 + (sd->imbalance_pct-100)/2;
+
+ do {
+ unsigned long load, avg_load;
+ int local_group;
+ int i;
+
+ local_group = cpu_isset(this_cpu, group->cpumask);
+ /* XXX: put a cpus allowed check */
+
+ /* Tally up the load of all CPUs in the group */
+ avg_load = 0;
+
+ for_each_cpu_mask(i, group->cpumask) {
+ /* Bias balancing toward cpus of our domain */
+ if (local_group)
+ load = source_load(i, load_idx);
+ else
+ load = target_load(i, load_idx);
+
+ avg_load += load;
+ }
+
+ /* Adjust by relative CPU power of the group */
+ avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
+
+ if (local_group) {
+ this_load = avg_load;
+ this = group;
+ } else if (avg_load < min_load) {
+ min_load = avg_load;
+ idlest = group;
+ }
+ group = group->next;
+ } while (group != sd->groups);
+
+ if (!idlest || 100*this_load < imbalance*min_load)
+ return NULL;
+ return idlest;
+}
+
+/*
+ * find_idlest_queue - find the idlest runqueue among the cpus in group.
+ */
+static int find_idlest_cpu(struct sched_group *group, int this_cpu)
+{
+ unsigned long load, min_load = ULONG_MAX;
+ int idlest = -1;
+ int i;
+
+ for_each_cpu_mask(i, group->cpumask) {
+ load = source_load(i, 0);
+
+ if (load < min_load || (load == min_load && i == this_cpu)) {
+ min_load = load;
+ idlest = i;
+ }
+ }
+
+ return idlest;
+}
+
+
#endif
/*
@@ -1107,11 +1180,6 @@ int fastcall wake_up_state(task_t *p, unsigned int state)
return try_to_wake_up(p, state, 0);
}
-#ifdef CONFIG_SMP
-static int find_idlest_cpu(struct task_struct *p, int this_cpu,
- struct sched_domain *sd);
-#endif
-
/*
* Perform scheduler related setup for a newly forked process p.
* p is forked by current.
@@ -1181,12 +1249,38 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags)
unsigned long flags;
int this_cpu, cpu;
runqueue_t *rq, *this_rq;
+#ifdef CONFIG_SMP
+ struct sched_domain *tmp, *sd = NULL;
+#endif
rq = task_rq_lock(p, &flags);
- cpu = task_cpu(p);
+ BUG_ON(p->state != TASK_RUNNING);
this_cpu = smp_processor_id();
+ cpu = task_cpu(p);
- BUG_ON(p->state != TASK_RUNNING);
+#ifdef CONFIG_SMP
+ for_each_domain(cpu, tmp)
+ if (tmp->flags & SD_BALANCE_FORK)
+ sd = tmp;
+
+ if (sd) {
+ struct sched_group *group;
+
+ cpu = task_cpu(p);
+ group = find_idlest_group(sd, p, cpu);
+ if (group) {
+ int new_cpu;
+ new_cpu = find_idlest_cpu(group, cpu);
+ if (new_cpu != -1 && new_cpu != cpu &&
+ cpu_isset(new_cpu, p->cpus_allowed)) {
+ set_task_cpu(p, new_cpu);
+ task_rq_unlock(rq, &flags);
+ rq = task_rq_lock(p, &flags);
+ cpu = task_cpu(p);
+ }
+ }
+ }
+#endif
/*
* We decrease the sleep average of forking parents
@@ -1481,51 +1575,6 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
}
/*
- * find_idlest_cpu - find the least busy runqueue.
- */
-static int find_idlest_cpu(struct task_struct *p, int this_cpu,
- struct sched_domain *sd)
-{
- unsigned long load, min_load, this_load;
- int i, min_cpu;
- cpumask_t mask;
-
- min_cpu = UINT_MAX;
- min_load = ULONG_MAX;
-
- cpus_and(mask, sd->span, p->cpus_allowed);
-
- for_each_cpu_mask(i, mask) {
- load = target_load(i, sd->wake_idx);
-
- if (load < min_load) {
- min_cpu = i;
- min_load = load;
-
- /* break out early on an idle CPU: */
- if (!min_load)
- break;
- }
- }
-
- /* add +1 to account for the new task */
- this_load = source_load(this_cpu, sd->wake_idx) + SCHED_LOAD_SCALE;
-
- /*
- * Would with the addition of the new task to the
- * current CPU there be an imbalance between this
- * CPU and the idlest CPU?
- *
- * Use half of the balancing threshold - new-context is
- * a good opportunity to balance.
- */
- if (min_load*(100 + (sd->imbalance_pct-100)/2) < this_load*100)
- return min_cpu;
-
- return this_cpu;
-}
-
-/*
* If dest_cpu is allowed for this process, migrate the task to it.
* This is accomplished by forcing the cpu_allowed mask to only
* allow dest_cpu, which will force the cpu onto dest_cpu. Then
@@ -1578,8 +1627,15 @@ void sched_exec(void)
sd = tmp;
if (sd) {
+ struct sched_group *group;
schedstat_inc(sd, sbe_attempts);
- new_cpu = find_idlest_cpu(current, this_cpu, sd);
+ group = find_idlest_group(sd, current, this_cpu);
+ if (!group)
+ goto out;
+ new_cpu = find_idlest_cpu(group, this_cpu);
+ if (new_cpu == -1)
+ goto out;
+
if (new_cpu != this_cpu) {
schedstat_inc(sd, sbe_pushed);
put_cpu();
@@ -1792,12 +1848,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
if (local_group) {
this_load = avg_load;
this = group;
- goto nextgroup;
} else if (avg_load > max_load) {
max_load = avg_load;
busiest = group;
}
-nextgroup:
group = group->next;
} while (group != sd->groups);