diff options
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r-- | kernel/workqueue.c | 637 |
1 files changed, 195 insertions, 442 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 42fa9ad0a810..e7f3e79184dd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -41,6 +41,7 @@ #include <linux/debug_locks.h> #include <linux/lockdep.h> #include <linux/idr.h> +#include <linux/delay.h> #include "workqueue_sched.h" @@ -57,20 +58,10 @@ enum { WORKER_DIE = 1 << 1, /* die die die */ WORKER_IDLE = 1 << 2, /* is idle */ WORKER_PREP = 1 << 3, /* preparing to run works */ - WORKER_ROGUE = 1 << 4, /* not bound to any cpu */ - WORKER_REBIND = 1 << 5, /* mom is home, come back */ - WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */ - WORKER_UNBOUND = 1 << 7, /* worker is unbound */ + WORKER_CPU_INTENSIVE = 1 << 4, /* cpu intensive */ + WORKER_UNBOUND = 1 << 5, /* worker is unbound */ - WORKER_NOT_RUNNING = WORKER_PREP | WORKER_ROGUE | WORKER_REBIND | - WORKER_CPU_INTENSIVE | WORKER_UNBOUND, - - /* gcwq->trustee_state */ - TRUSTEE_START = 0, /* start */ - TRUSTEE_IN_CHARGE = 1, /* trustee in charge of gcwq */ - TRUSTEE_BUTCHER = 2, /* butcher workers */ - TRUSTEE_RELEASE = 3, /* release workers */ - TRUSTEE_DONE = 4, /* trustee is done */ + WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE | WORKER_UNBOUND, BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER, @@ -84,7 +75,6 @@ enum { (min two ticks) */ MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ CREATE_COOLDOWN = HZ, /* time to breath after fail */ - TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ /* * Rescue workers are used only on emergencies and shared by @@ -136,7 +126,7 @@ struct worker { unsigned long last_active; /* L: last active timestamp */ unsigned int flags; /* X: flags */ int id; /* I: worker id */ - struct work_struct rebind_work; /* L: rebind worker to cpu */ + int sleeping; /* None */ }; /* @@ -163,10 +153,8 @@ struct global_cwq { struct ida worker_ida; /* L: for worker IDs */ - struct task_struct *trustee; /* L: for gcwq shutdown */ - unsigned int trustee_state; /* L: trustee state */ - wait_queue_head_t trustee_wait; /* trustee wait */ struct worker *first_idle; /* L: first idle worker */ + wait_queue_head_t idle_wait; } ____cacheline_aligned_in_smp; /* @@ -658,66 +646,58 @@ static void wake_up_worker(struct global_cwq *gcwq) } /** - * wq_worker_waking_up - a worker is waking up - * @task: task waking up - * @cpu: CPU @task is waking up to - * - * This function is called during try_to_wake_up() when a worker is - * being awoken. + * wq_worker_running - a worker is running again + * @task: task returning from sleep * - * CONTEXT: - * spin_lock_irq(rq->lock) + * This function is called when a worker returns from schedule() */ -void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) +void wq_worker_running(struct task_struct *task) { struct worker *worker = kthread_data(task); + if (!worker->sleeping) + return; if (!(worker->flags & WORKER_NOT_RUNNING)) - atomic_inc(get_gcwq_nr_running(cpu)); + atomic_inc(get_gcwq_nr_running(smp_processor_id())); + worker->sleeping = 0; } /** * wq_worker_sleeping - a worker is going to sleep * @task: task going to sleep - * @cpu: CPU in question, must be the current CPU number - * - * This function is called during schedule() when a busy worker is - * going to sleep. Worker on the same cpu can be woken up by - * returning pointer to its task. * - * CONTEXT: - * spin_lock_irq(rq->lock) - * - * RETURNS: - * Worker task on @cpu to wake up, %NULL if none. + * This function is called from schedule() when a busy worker is + * going to sleep. */ -struct task_struct *wq_worker_sleeping(struct task_struct *task, - unsigned int cpu) +void wq_worker_sleeping(struct task_struct *task) { - struct worker *worker = kthread_data(task), *to_wakeup = NULL; - struct global_cwq *gcwq = get_gcwq(cpu); - atomic_t *nr_running = get_gcwq_nr_running(cpu); + struct worker *worker = kthread_data(task); + struct global_cwq *gcwq; + int cpu; if (worker->flags & WORKER_NOT_RUNNING) - return NULL; + return; - /* this can only happen on the local cpu */ - BUG_ON(cpu != raw_smp_processor_id()); + if (WARN_ON_ONCE(worker->sleeping)) + return; + + worker->sleeping = 1; + cpu = smp_processor_id(); + gcwq = get_gcwq(cpu); + spin_lock_irq(&gcwq->lock); /* * The counterpart of the following dec_and_test, implied mb, * worklist not empty test sequence is in insert_work(). * Please read comment there. - * - * NOT_RUNNING is clear. This means that trustee is not in - * charge and we're running on the local cpu w/ rq lock held - * and preemption disabled, which in turn means that none else - * could be manipulating idle_list, so dereferencing idle_list - * without gcwq lock is safe. */ - if (atomic_dec_and_test(nr_running) && !list_empty(&gcwq->worklist)) - to_wakeup = first_worker(gcwq); - return to_wakeup ? to_wakeup->task : NULL; + if (atomic_dec_and_test(get_gcwq_nr_running(cpu)) && + !list_empty(&gcwq->worklist)) { + worker = first_worker(gcwq); + if (worker) + wake_up_process(worker->task); + } + spin_unlock_irq(&gcwq->lock); } /** @@ -979,13 +959,38 @@ static bool is_chained_work(struct workqueue_struct *wq) return false; } -static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, - struct work_struct *work) +static void ___queue_work(struct workqueue_struct *wq, struct global_cwq *gcwq, + struct work_struct *work) { - struct global_cwq *gcwq; struct cpu_workqueue_struct *cwq; struct list_head *worklist; unsigned int work_flags; + + /* gcwq determined, get cwq and queue */ + cwq = get_cwq(gcwq->cpu, wq); + trace_workqueue_queue_work(gcwq->cpu, cwq, work); + + BUG_ON(!list_empty(&work->entry)); + + cwq->nr_in_flight[cwq->work_color]++; + work_flags = work_color_to_flags(cwq->work_color); + + if (likely(cwq->nr_active < cwq->max_active)) { + trace_workqueue_activate_work(work); + cwq->nr_active++; + worklist = gcwq_determine_ins_pos(gcwq, cwq); + } else { + work_flags |= WORK_STRUCT_DELAYED; + worklist = &cwq->delayed_works; + } + + insert_work(cwq, work, worklist, work_flags); +} + +static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, + struct work_struct *work) +{ + struct global_cwq *gcwq; unsigned long flags; debug_work_activate(work); @@ -1031,27 +1036,32 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, spin_lock_irqsave(&gcwq->lock, flags); } - /* gcwq determined, get cwq and queue */ - cwq = get_cwq(gcwq->cpu, wq); - trace_workqueue_queue_work(cpu, cwq, work); + ___queue_work(wq, gcwq, work); - BUG_ON(!list_empty(&work->entry)); + spin_unlock_irqrestore(&gcwq->lock, flags); +} - cwq->nr_in_flight[cwq->work_color]++; - work_flags = work_color_to_flags(cwq->work_color); +/** + * queue_work_on - queue work on specific cpu + * @cpu: CPU number to execute work on + * @wq: workqueue to use + * @work: work to queue + * + * Returns 0 if @work was already on a queue, non-zero otherwise. + * + * We queue the work to a specific CPU, the caller must ensure it + * can't go away. + */ +static int +__queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work) +{ + int ret = 0; - if (likely(cwq->nr_active < cwq->max_active)) { - trace_workqueue_activate_work(work); - cwq->nr_active++; - worklist = gcwq_determine_ins_pos(gcwq, cwq); - } else { - work_flags |= WORK_STRUCT_DELAYED; - worklist = &cwq->delayed_works; + if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { + __queue_work(cpu, wq, work); + ret = 1; } - - insert_work(cwq, work, worklist, work_flags); - - spin_unlock_irqrestore(&gcwq->lock, flags); + return ret; } /** @@ -1068,34 +1078,19 @@ int queue_work(struct workqueue_struct *wq, struct work_struct *work) { int ret; - ret = queue_work_on(get_cpu(), wq, work); - put_cpu(); + ret = __queue_work_on(get_cpu_light(), wq, work); + put_cpu_light(); return ret; } EXPORT_SYMBOL_GPL(queue_work); -/** - * queue_work_on - queue work on specific cpu - * @cpu: CPU number to execute work on - * @wq: workqueue to use - * @work: work to queue - * - * Returns 0 if @work was already on a queue, non-zero otherwise. - * - * We queue the work to a specific CPU, the caller must ensure it - * can't go away. - */ int queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work) { - int ret = 0; + WARN_ON(wq->flags & WQ_NON_AFFINE); - if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { - __queue_work(cpu, wq, work); - ret = 1; - } - return ret; + return __queue_work_on(cpu, wq, work); } EXPORT_SYMBOL_GPL(queue_work_on); @@ -1141,6 +1136,8 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; + WARN_ON((wq->flags & WQ_NON_AFFINE) && cpu != -1); + if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { unsigned int lcpu; @@ -1206,12 +1203,13 @@ static void worker_enter_idle(struct worker *worker) /* idle_list is LIFO */ list_add(&worker->entry, &gcwq->idle_list); - if (likely(!(worker->flags & WORKER_ROGUE))) { - if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer)) - mod_timer(&gcwq->idle_timer, - jiffies + IDLE_WORKER_TIMEOUT); - } else - wake_up_all(&gcwq->trustee_wait); + if (gcwq->nr_idle == gcwq->nr_workers) + wake_up_all(&gcwq->idle_wait); + + if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer)) { + mod_timer(&gcwq->idle_timer, + jiffies + IDLE_WORKER_TIMEOUT); + } /* sanity check nr_running */ WARN_ON_ONCE(gcwq->nr_workers == gcwq->nr_idle && @@ -1288,8 +1286,14 @@ __acquires(&gcwq->lock) return false; if (task_cpu(task) == gcwq->cpu && cpumask_equal(¤t->cpus_allowed, - get_cpu_mask(gcwq->cpu))) + get_cpu_mask(gcwq->cpu))) { + /* + * Since we're binding to a particular cpu and need to + * stay there for correctness, mark us PF_THREAD_BOUND. + */ + task->flags |= PF_THREAD_BOUND; return true; + } spin_unlock_irq(&gcwq->lock); /* @@ -1303,20 +1307,15 @@ __acquires(&gcwq->lock) } } -/* - * Function for worker->rebind_work used to rebind rogue busy workers - * to the associated cpu which is coming back online. This is - * scheduled by cpu up but can race with other cpu hotplug operations - * and may be executed twice without intervening cpu down. - */ -static void worker_rebind_fn(struct work_struct *work) +static void worker_unbind_and_unlock(struct worker *worker) { - struct worker *worker = container_of(work, struct worker, rebind_work); struct global_cwq *gcwq = worker->gcwq; + struct task_struct *task = worker->task; - if (worker_maybe_bind_and_lock(worker)) - worker_clr_flags(worker, WORKER_REBIND); - + /* + * Its no longer required we're PF_THREAD_BOUND, the work is done. + */ + task->flags &= ~PF_THREAD_BOUND; spin_unlock_irq(&gcwq->lock); } @@ -1328,7 +1327,6 @@ static struct worker *alloc_worker(void) if (worker) { INIT_LIST_HEAD(&worker->entry); INIT_LIST_HEAD(&worker->scheduled); - INIT_WORK(&worker->rebind_work, worker_rebind_fn); /* on creation a worker is in !idle && prep state */ worker->flags = WORKER_PREP; } @@ -1383,15 +1381,9 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind) if (IS_ERR(worker->task)) goto fail; - /* - * A rogue worker will become a regular one if CPU comes - * online later on. Make sure every worker has - * PF_THREAD_BOUND set. - */ if (bind && !on_unbound_cpu) kthread_bind(worker->task, gcwq->cpu); else { - worker->task->flags |= PF_THREAD_BOUND; if (on_unbound_cpu) worker->flags |= WORKER_UNBOUND; } @@ -1668,13 +1660,6 @@ static bool manage_workers(struct worker *worker) gcwq->flags &= ~GCWQ_MANAGING_WORKERS; - /* - * The trustee might be waiting to take over the manager - * position, tell it we're done. - */ - if (unlikely(gcwq->trustee)) - wake_up_all(&gcwq->trustee_wait); - return ret; } @@ -2075,7 +2060,7 @@ repeat: if (keep_working(gcwq)) wake_up_worker(gcwq); - spin_unlock_irq(&gcwq->lock); + worker_unbind_and_unlock(rescuer); } schedule(); @@ -3024,7 +3009,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, if (IS_ERR(rescuer->task)) goto err; - rescuer->task->flags |= PF_THREAD_BOUND; wake_up_process(rescuer->task); } @@ -3214,171 +3198,76 @@ EXPORT_SYMBOL_GPL(work_busy); * gcwqs serve mix of short, long and very long running works making * blocked draining impractical. * - * This is solved by allowing a gcwq to be detached from CPU, running - * it with unbound (rogue) workers and allowing it to be reattached - * later if the cpu comes back online. A separate thread is created - * to govern a gcwq in such state and is called the trustee of the - * gcwq. - * - * Trustee states and their descriptions. - * - * START Command state used on startup. On CPU_DOWN_PREPARE, a - * new trustee is started with this state. - * - * IN_CHARGE Once started, trustee will enter this state after - * assuming the manager role and making all existing - * workers rogue. DOWN_PREPARE waits for trustee to - * enter this state. After reaching IN_CHARGE, trustee - * tries to execute the pending worklist until it's empty - * and the state is set to BUTCHER, or the state is set - * to RELEASE. - * - * BUTCHER Command state which is set by the cpu callback after - * the cpu has went down. Once this state is set trustee - * knows that there will be no new works on the worklist - * and once the worklist is empty it can proceed to - * killing idle workers. - * - * RELEASE Command state which is set by the cpu callback if the - * cpu down has been canceled or it has come online - * again. After recognizing this state, trustee stops - * trying to drain or butcher and clears ROGUE, rebinds - * all remaining workers back to the cpu and releases - * manager role. - * - * DONE Trustee will enter this state after BUTCHER or RELEASE - * is complete. - * - * trustee CPU draining - * took over down complete - * START -----------> IN_CHARGE -----------> BUTCHER -----------> DONE - * | | ^ - * | CPU is back online v return workers | - * ----------------> RELEASE -------------- */ -/** - * trustee_wait_event_timeout - timed event wait for trustee - * @cond: condition to wait for - * @timeout: timeout in jiffies - * - * wait_event_timeout() for trustee to use. Handles locking and - * checks for RELEASE request. - * - * CONTEXT: - * spin_lock_irq(gcwq->lock) which may be released and regrabbed - * multiple times. To be used by trustee. - * - * RETURNS: - * Positive indicating left time if @cond is satisfied, 0 if timed - * out, -1 if canceled. - */ -#define trustee_wait_event_timeout(cond, timeout) ({ \ - long __ret = (timeout); \ - while (!((cond) || (gcwq->trustee_state == TRUSTEE_RELEASE)) && \ - __ret) { \ - spin_unlock_irq(&gcwq->lock); \ - __wait_event_timeout(gcwq->trustee_wait, (cond) || \ - (gcwq->trustee_state == TRUSTEE_RELEASE), \ - __ret); \ - spin_lock_irq(&gcwq->lock); \ - } \ - gcwq->trustee_state == TRUSTEE_RELEASE ? -1 : (__ret); \ -}) +static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct global_cwq *gcwq = get_gcwq(cpu); + struct worker *uninitialized_var(new_worker); + unsigned long flags; -/** - * trustee_wait_event - event wait for trustee - * @cond: condition to wait for - * - * wait_event() for trustee to use. Automatically handles locking and - * checks for CANCEL request. - * - * CONTEXT: - * spin_lock_irq(gcwq->lock) which may be released and regrabbed - * multiple times. To be used by trustee. - * - * RETURNS: - * 0 if @cond is satisfied, -1 if canceled. - */ -#define trustee_wait_event(cond) ({ \ - long __ret1; \ - __ret1 = trustee_wait_event_timeout(cond, MAX_SCHEDULE_TIMEOUT);\ - __ret1 < 0 ? -1 : 0; \ -}) + action &= ~CPU_TASKS_FROZEN; -static int __cpuinit trustee_thread(void *__gcwq) -{ - struct global_cwq *gcwq = __gcwq; - struct worker *worker; - struct work_struct *work; - struct hlist_node *pos; - long rc; - int i; + switch (action) { + case CPU_UP_PREPARE: + BUG_ON(gcwq->first_idle); + new_worker = create_worker(gcwq, false); + if (!new_worker) + return NOTIFY_BAD; + case CPU_UP_CANCELED: + case CPU_ONLINE: + break; + default: + return notifier_from_errno(0); + } - BUG_ON(gcwq->cpu != smp_processor_id()); + /* some are called w/ irq disabled, don't disturb irq status */ + spin_lock_irqsave(&gcwq->lock, flags); - spin_lock_irq(&gcwq->lock); - /* - * Claim the manager position and make all workers rogue. - * Trustee must be bound to the target cpu and can't be - * cancelled. - */ - BUG_ON(gcwq->cpu != smp_processor_id()); - rc = trustee_wait_event(!(gcwq->flags & GCWQ_MANAGING_WORKERS)); - BUG_ON(rc < 0); + switch (action) { + case CPU_UP_PREPARE: + BUG_ON(gcwq->first_idle); + gcwq->first_idle = new_worker; + break; - gcwq->flags |= GCWQ_MANAGING_WORKERS; + case CPU_UP_CANCELED: + destroy_worker(gcwq->first_idle); + gcwq->first_idle = NULL; + break; - list_for_each_entry(worker, &gcwq->idle_list, entry) - worker->flags |= WORKER_ROGUE; + case CPU_ONLINE: + spin_unlock_irq(&gcwq->lock); + kthread_bind(gcwq->first_idle->task, cpu); + spin_lock_irq(&gcwq->lock); + gcwq->flags |= GCWQ_MANAGE_WORKERS; + start_worker(gcwq->first_idle); + gcwq->first_idle = NULL; + break; + } - for_each_busy_worker(worker, i, pos, gcwq) - worker->flags |= WORKER_ROGUE; + spin_unlock_irqrestore(&gcwq->lock, flags); - /* - * Call schedule() so that we cross rq->lock and thus can - * guarantee sched callbacks see the rogue flag. This is - * necessary as scheduler callbacks may be invoked from other - * cpus. - */ - spin_unlock_irq(&gcwq->lock); - schedule(); - spin_lock_irq(&gcwq->lock); + return notifier_from_errno(0); +} - /* - * Sched callbacks are disabled now. Zap nr_running. After - * this, nr_running stays zero and need_more_worker() and - * keep_working() are always true as long as the worklist is - * not empty. - */ - atomic_set(get_gcwq_nr_running(gcwq->cpu), 0); +static void flush_gcwq(struct global_cwq *gcwq) +{ + struct work_struct *work, *nw; + struct worker *worker, *n; + LIST_HEAD(non_affine_works); - spin_unlock_irq(&gcwq->lock); - del_timer_sync(&gcwq->idle_timer); spin_lock_irq(&gcwq->lock); + list_for_each_entry_safe(work, nw, &gcwq->worklist, entry) { + struct workqueue_struct *wq = get_work_cwq(work)->wq; - /* - * We're now in charge. Notify and proceed to drain. We need - * to keep the gcwq running during the whole CPU down - * procedure as other cpu hotunplug callbacks may need to - * flush currently running tasks. - */ - gcwq->trustee_state = TRUSTEE_IN_CHARGE; - wake_up_all(&gcwq->trustee_wait); + if (wq->flags & WQ_NON_AFFINE) + list_move(&work->entry, &non_affine_works); + } - /* - * The original cpu is in the process of dying and may go away - * anytime now. When that happens, we and all workers would - * be migrated to other cpus. Try draining any left work. We - * want to get it over with ASAP - spam rescuers, wake up as - * many idlers as necessary and create new ones till the - * worklist is empty. Note that if the gcwq is frozen, there - * may be frozen works in freezable cwqs. Don't declare - * completion while frozen. - */ - while (gcwq->nr_workers != gcwq->nr_idle || - gcwq->flags & GCWQ_FREEZING || - gcwq->trustee_state == TRUSTEE_IN_CHARGE) { + while (!list_empty(&gcwq->worklist)) { int nr_works = 0; list_for_each_entry(work, &gcwq->worklist, entry) { @@ -3392,189 +3281,54 @@ static int __cpuinit trustee_thread(void *__gcwq) wake_up_process(worker->task); } + spin_unlock_irq(&gcwq->lock); + if (need_to_create_worker(gcwq)) { - spin_unlock_irq(&gcwq->lock); - worker = create_worker(gcwq, false); - spin_lock_irq(&gcwq->lock); - if (worker) { - worker->flags |= WORKER_ROGUE; + worker = create_worker(gcwq, true); + if (worker) start_worker(worker); - } } - /* give a breather */ - if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0) - break; - } - - /* - * Either all works have been scheduled and cpu is down, or - * cpu down has already been canceled. Wait for and butcher - * all workers till we're canceled. - */ - do { - rc = trustee_wait_event(!list_empty(&gcwq->idle_list)); - while (!list_empty(&gcwq->idle_list)) - destroy_worker(list_first_entry(&gcwq->idle_list, - struct worker, entry)); - } while (gcwq->nr_workers && rc >= 0); + wait_event_timeout(gcwq->idle_wait, + gcwq->nr_idle == gcwq->nr_workers, HZ/10); - /* - * At this point, either draining has completed and no worker - * is left, or cpu down has been canceled or the cpu is being - * brought back up. There shouldn't be any idle one left. - * Tell the remaining busy ones to rebind once it finishes the - * currently scheduled works by scheduling the rebind_work. - */ - WARN_ON(!list_empty(&gcwq->idle_list)); - - for_each_busy_worker(worker, i, pos, gcwq) { - struct work_struct *rebind_work = &worker->rebind_work; - - /* - * Rebind_work may race with future cpu hotplug - * operations. Use a separate flag to mark that - * rebinding is scheduled. - */ - worker->flags |= WORKER_REBIND; - worker->flags &= ~WORKER_ROGUE; + spin_lock_irq(&gcwq->lock); + } - /* queue rebind_work, wq doesn't matter, use the default one */ - if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, - work_data_bits(rebind_work))) - continue; + WARN_ON(gcwq->nr_workers != gcwq->nr_idle); - debug_work_activate(rebind_work); - insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work, - worker->scheduled.next, - work_color_to_flags(WORK_NO_COLOR)); - } + list_for_each_entry_safe(worker, n, &gcwq->idle_list, entry) + destroy_worker(worker); - /* relinquish manager role */ - gcwq->flags &= ~GCWQ_MANAGING_WORKERS; + WARN_ON(gcwq->nr_workers || gcwq->nr_idle); - /* notify completion */ - gcwq->trustee = NULL; - gcwq->trustee_state = TRUSTEE_DONE; - wake_up_all(&gcwq->trustee_wait); spin_unlock_irq(&gcwq->lock); - return 0; -} -/** - * wait_trustee_state - wait for trustee to enter the specified state - * @gcwq: gcwq the trustee of interest belongs to - * @state: target state to wait for - * - * Wait for the trustee to reach @state. DONE is already matched. - * - * CONTEXT: - * spin_lock_irq(gcwq->lock) which may be released and regrabbed - * multiple times. To be used by cpu_callback. - */ -static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state) -__releases(&gcwq->lock) -__acquires(&gcwq->lock) -{ - if (!(gcwq->trustee_state == state || - gcwq->trustee_state == TRUSTEE_DONE)) { - spin_unlock_irq(&gcwq->lock); - __wait_event(gcwq->trustee_wait, - gcwq->trustee_state == state || - gcwq->trustee_state == TRUSTEE_DONE); - spin_lock_irq(&gcwq->lock); + gcwq = get_gcwq(get_cpu_light()); + spin_lock_irq(&gcwq->lock); + list_for_each_entry_safe(work, nw, &non_affine_works, entry) { + list_del_init(&work->entry); + ___queue_work(get_work_cwq(work)->wq, gcwq, work); } + spin_unlock_irq(&gcwq->lock); + put_cpu_light(); } -static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, +static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; struct global_cwq *gcwq = get_gcwq(cpu); - struct task_struct *new_trustee = NULL; - struct worker *uninitialized_var(new_worker); - unsigned long flags; action &= ~CPU_TASKS_FROZEN; - switch (action) { - case CPU_DOWN_PREPARE: - new_trustee = kthread_create(trustee_thread, gcwq, - "workqueue_trustee/%d\n", cpu); - if (IS_ERR(new_trustee)) - return notifier_from_errno(PTR_ERR(new_trustee)); - kthread_bind(new_trustee, cpu); - /* fall through */ - case CPU_UP_PREPARE: - BUG_ON(gcwq->first_idle); - new_worker = create_worker(gcwq, false); - if (!new_worker) { - if (new_trustee) - kthread_stop(new_trustee); - return NOTIFY_BAD; - } - } + switch (action) { + case CPU_DOWN_PREPARE: + flush_gcwq(gcwq); + break; + } - /* some are called w/ irq disabled, don't disturb irq status */ - spin_lock_irqsave(&gcwq->lock, flags); - - switch (action) { - case CPU_DOWN_PREPARE: - /* initialize trustee and tell it to acquire the gcwq */ - BUG_ON(gcwq->trustee || gcwq->trustee_state != TRUSTEE_DONE); - gcwq->trustee = new_trustee; - gcwq->trustee_state = TRUSTEE_START; - wake_up_process(gcwq->trustee); - wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE); - /* fall through */ - case CPU_UP_PREPARE: - BUG_ON(gcwq->first_idle); - gcwq->first_idle = new_worker; - break; - - case CPU_DYING: - /* - * Before this, the trustee and all workers except for - * the ones which are still executing works from - * before the last CPU down must be on the cpu. After - * this, they'll all be diasporas. - */ - gcwq->flags |= GCWQ_DISASSOCIATED; - break; - - case CPU_POST_DEAD: - gcwq->trustee_state = TRUSTEE_BUTCHER; - /* fall through */ - case CPU_UP_CANCELED: - destroy_worker(gcwq->first_idle); - gcwq->first_idle = NULL; - break; - - case CPU_DOWN_FAILED: - case CPU_ONLINE: - gcwq->flags &= ~GCWQ_DISASSOCIATED; - if (gcwq->trustee_state != TRUSTEE_DONE) { - gcwq->trustee_state = TRUSTEE_RELEASE; - wake_up_process(gcwq->trustee); - wait_trustee_state(gcwq, TRUSTEE_DONE); - } - - /* - * Trustee is done and there might be no worker left. - * Put the first_idle in and request a real manager to - * take a look. - */ - spin_unlock_irq(&gcwq->lock); - kthread_bind(gcwq->first_idle->task, cpu); - spin_lock_irq(&gcwq->lock); - gcwq->flags |= GCWQ_MANAGE_WORKERS; - start_worker(gcwq->first_idle); - gcwq->first_idle = NULL; - break; - } - - spin_unlock_irqrestore(&gcwq->lock, flags); return notifier_from_errno(0); } @@ -3772,7 +3526,8 @@ static int __init init_workqueues(void) unsigned int cpu; int i; - cpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE); + cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_ACTIVE); + hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_INACTIVE); /* initialize gcwqs */ for_each_gcwq_cpu(cpu) { @@ -3795,9 +3550,7 @@ static int __init init_workqueues(void) (unsigned long)gcwq); ida_init(&gcwq->worker_ida); - - gcwq->trustee_state = TRUSTEE_DONE; - init_waitqueue_head(&gcwq->trustee_wait); + init_waitqueue_head(&gcwq->idle_wait); } /* create the initial worker */ |