diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 31 |
1 files changed, 28 insertions, 3 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index df77c605c7a6..e8b652ebe027 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1000,7 +1000,7 @@ struct numa_stats { */ static void update_numa_stats(struct numa_stats *ns, int nid) { - int cpu; + int cpu, cpus = 0; memset(ns, 0, sizeof(*ns)); for_each_cpu(cpu, cpumask_of_node(nid)) { @@ -1009,8 +1009,21 @@ static void update_numa_stats(struct numa_stats *ns, int nid) ns->nr_running += rq->nr_running; ns->load += weighted_cpuload(cpu); ns->power += power_of(cpu); + + cpus++; } + /* + * If we raced with hotplug and there are no CPUs left in our mask + * the @ns structure is NULL'ed and task_numa_compare() will + * not find this node attractive. + * + * We'll either bail at !has_capacity, or we'll detect a huge imbalance + * and bail there. + */ + if (!cpus) + return; + ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power; ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE); ns->has_capacity = (ns->nr_running < ns->capacity); @@ -1201,9 +1214,21 @@ static int task_numa_migrate(struct task_struct *p) */ rcu_read_lock(); sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu)); - env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2; + if (sd) + env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2; rcu_read_unlock(); + /* + * Cpusets can break the scheduler domain tree into smaller + * balance domains, some of which do not cross NUMA boundaries. + * Tasks that are "trapped" in such domains cannot be migrated + * elsewhere, so there is no point in (re)trying. + */ + if (unlikely(!sd)) { + p->numa_preferred_nid = cpu_to_node(task_cpu(p)); + return -EINVAL; + } + taskweight = task_weight(p, env.src_nid); groupweight = group_weight(p, env.src_nid); update_numa_stats(&env.src_stats, env.src_nid); @@ -2153,7 +2178,7 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa, long contrib; /* The fraction of a cpu used by this cfs_rq */ - contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT, + contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT, sa->runnable_avg_period + 1); contrib -= cfs_rq->tg_runnable_contrib; |