diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 7 | ||||
-rw-r--r-- | block/blk-cgroup.h | 3 | ||||
-rw-r--r-- | block/blk-core.c | 180 | ||||
-rw-r--r-- | block/blk-exec.c | 2 | ||||
-rw-r--r-- | block/blk-flush.c | 4 | ||||
-rw-r--r-- | block/blk-sysfs.c | 11 | ||||
-rw-r--r-- | block/blk-throttle.c | 9 | ||||
-rw-r--r-- | block/cfq-iosched.c | 37 | ||||
-rw-r--r-- | block/elevator.c | 7 | ||||
-rw-r--r-- | block/genhd.c | 8 |
10 files changed, 163 insertions, 105 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f0605ab2a761..471fdcc5df85 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -114,6 +114,13 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) } EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); +struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk) +{ + return container_of(task_subsys_state(tsk, blkio_subsys_id), + struct blkio_cgroup, css); +} +EXPORT_SYMBOL_GPL(task_blkio_cgroup); + static inline void blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight) { diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 10919fae2d3a..c774930cc206 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -291,6 +291,7 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {} #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) extern struct blkio_cgroup blkio_root_cgroup; extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); +extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk); extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, struct blkio_group *blkg, void *key, dev_t dev, enum blkio_policy_id plid); @@ -314,6 +315,8 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg, struct cgroup; static inline struct blkio_cgroup * cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } +static inline struct blkio_cgroup * +task_blkio_cgroup(struct task_struct *tsk) { return NULL; } static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, struct blkio_group *blkg, void *key, dev_t dev, diff --git a/block/blk-core.c b/block/blk-core.c index 90f22cc30799..3fe00a14822a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -198,26 +198,13 @@ void blk_dump_rq_flags(struct request *rq, char *msg) } EXPORT_SYMBOL(blk_dump_rq_flags); -/* - * Make sure that plugs that were pending when this function was entered, - * are now complete and requests pushed to the queue. -*/ -static inline void queue_sync_plugs(struct request_queue *q) -{ - /* - * If the current process is plugged and has barriers submitted, - * we will livelock if we don't unplug first. - */ - blk_flush_plug(current); -} - static void blk_delay_work(struct work_struct *work) { struct request_queue *q; q = container_of(work, struct request_queue, delay_work.work); spin_lock_irq(q->queue_lock); - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irq(q->queue_lock); } @@ -233,7 +220,8 @@ static void blk_delay_work(struct work_struct *work) */ void blk_delay_queue(struct request_queue *q, unsigned long msecs) { - schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs)); + queue_delayed_work(kblockd_workqueue, &q->delay_work, + msecs_to_jiffies(msecs)); } EXPORT_SYMBOL(blk_delay_queue); @@ -251,7 +239,7 @@ void blk_start_queue(struct request_queue *q) WARN_ON(!irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); - __blk_run_queue(q, false); + __blk_run_queue(q); } EXPORT_SYMBOL(blk_start_queue); @@ -298,38 +286,44 @@ void blk_sync_queue(struct request_queue *q) { del_timer_sync(&q->timeout); cancel_delayed_work_sync(&q->delay_work); - queue_sync_plugs(q); } EXPORT_SYMBOL(blk_sync_queue); /** * __blk_run_queue - run a single device queue * @q: The queue to run - * @force_kblockd: Don't run @q->request_fn directly. Use kblockd. * * Description: * See @blk_run_queue. This variant must be called with the queue lock * held and interrupts disabled. - * */ -void __blk_run_queue(struct request_queue *q, bool force_kblockd) +void __blk_run_queue(struct request_queue *q) { if (unlikely(blk_queue_stopped(q))) return; - /* - * Only recurse once to avoid overrunning the stack, let the unplug - * handling reinvoke the handler shortly if we already got there. - */ - if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { - q->request_fn(q); - queue_flag_clear(QUEUE_FLAG_REENTER, q); - } else - queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); + q->request_fn(q); } EXPORT_SYMBOL(__blk_run_queue); /** + * blk_run_queue_async - run a single device queue in workqueue context + * @q: The queue to run + * + * Description: + * Tells kblockd to perform the equivalent of @blk_run_queue on behalf + * of us. + */ +void blk_run_queue_async(struct request_queue *q) +{ + if (likely(!blk_queue_stopped(q))) { + __cancel_delayed_work(&q->delay_work); + queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); + } +} +EXPORT_SYMBOL(blk_run_queue_async); + +/** * blk_run_queue - run a single device queue * @q: The queue to run * @@ -342,7 +336,7 @@ void blk_run_queue(struct request_queue *q) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_run_queue); @@ -991,7 +985,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq, blk_queue_end_tag(q, rq); add_acct_request(q, rq, where); - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_insert_request); @@ -1311,7 +1305,15 @@ get_rq: plug = current->plug; if (plug) { - if (!plug->should_sort && !list_empty(&plug->list)) { + /* + * If this is the first request added after a plug, fire + * of a plug trace. If others have been added before, check + * if we have multiple devices in this plug. If so, make a + * note to sort the list before dispatch. + */ + if (list_empty(&plug->list)) + trace_block_plug(q); + else if (!plug->should_sort) { struct request *__rq; __rq = list_entry_rq(plug->list.prev); @@ -1327,7 +1329,7 @@ get_rq: } else { spin_lock_irq(q->queue_lock); add_acct_request(q, req, where); - __blk_run_queue(q, false); + __blk_run_queue(q); out_unlock: spin_unlock_irq(q->queue_lock); } @@ -2644,6 +2646,7 @@ void blk_start_plug(struct blk_plug *plug) plug->magic = PLUG_MAGIC; INIT_LIST_HEAD(&plug->list); + INIT_LIST_HEAD(&plug->cb_list); plug->should_sort = 0; /* @@ -2668,33 +2671,93 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) return !(rqa->q <= rqb->q); } -static void flush_plug_list(struct blk_plug *plug) +/* + * If 'from_schedule' is true, then postpone the dispatch of requests + * until a safe kblockd context. We due this to avoid accidental big + * additional stack usage in driver dispatch, in places where the originally + * plugger did not intend it. + */ +static void queue_unplugged(struct request_queue *q, unsigned int depth, + bool from_schedule) + __releases(q->queue_lock) +{ + trace_block_unplug(q, depth, !from_schedule); + + /* + * If we are punting this to kblockd, then we can safely drop + * the queue_lock before waking kblockd (which needs to take + * this lock). + */ + if (from_schedule) { + spin_unlock(q->queue_lock); + blk_run_queue_async(q); + } else { + __blk_run_queue(q); + spin_unlock(q->queue_lock); + } + +} + +static void flush_plug_callbacks(struct blk_plug *plug) +{ + LIST_HEAD(callbacks); + + if (list_empty(&plug->cb_list)) + return; + + list_splice_init(&plug->cb_list, &callbacks); + + while (!list_empty(&callbacks)) { + struct blk_plug_cb *cb = list_first_entry(&callbacks, + struct blk_plug_cb, + list); + list_del(&cb->list); + cb->callback(cb); + } +} + +void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct request_queue *q; unsigned long flags; struct request *rq; + LIST_HEAD(list); + unsigned int depth; BUG_ON(plug->magic != PLUG_MAGIC); + flush_plug_callbacks(plug); if (list_empty(&plug->list)) return; - if (plug->should_sort) - list_sort(NULL, &plug->list, plug_rq_cmp); + list_splice_init(&plug->list, &list); + + if (plug->should_sort) { + list_sort(NULL, &list, plug_rq_cmp); + plug->should_sort = 0; + } q = NULL; + depth = 0; + + /* + * Save and disable interrupts here, to avoid doing it for every + * queue lock we have to take. + */ local_irq_save(flags); - while (!list_empty(&plug->list)) { - rq = list_entry_rq(plug->list.next); + while (!list_empty(&list)) { + rq = list_entry_rq(list.next); list_del_init(&rq->queuelist); BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); BUG_ON(!rq->q); if (rq->q != q) { - if (q) { - __blk_run_queue(q, false); - spin_unlock(q->queue_lock); - } + /* + * This drops the queue lock + */ + if (q) + queue_unplugged(q, depth, from_schedule); q = rq->q; + depth = 0; spin_lock(q->queue_lock); } rq->cmd_flags &= ~REQ_ON_PLUG; @@ -2706,38 +2769,27 @@ static void flush_plug_list(struct blk_plug *plug) __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); else __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); - } - if (q) { - __blk_run_queue(q, false); - spin_unlock(q->queue_lock); + depth++; } - BUG_ON(!list_empty(&plug->list)); - local_irq_restore(flags); -} - -static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug) -{ - flush_plug_list(plug); + /* + * This drops the queue lock + */ + if (q) + queue_unplugged(q, depth, from_schedule); - if (plug == tsk->plug) - tsk->plug = NULL; + local_irq_restore(flags); } void blk_finish_plug(struct blk_plug *plug) { - if (plug) - __blk_finish_plug(current, plug); -} -EXPORT_SYMBOL(blk_finish_plug); + blk_flush_plug_list(plug, false); -void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug) -{ - __blk_finish_plug(tsk, plug); - tsk->plug = plug; + if (plug == current->plug) + current->plug = NULL; } -EXPORT_SYMBOL(__blk_flush_plug); +EXPORT_SYMBOL(blk_finish_plug); int __init blk_dev_init(void) { diff --git a/block/blk-exec.c b/block/blk-exec.c index 7482b7fa863b..81e31819a597 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -55,7 +55,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, WARN_ON(irqs_disabled()); spin_lock_irq(q->queue_lock); __elv_add_request(q, rq, where); - __blk_run_queue(q, false); + __blk_run_queue(q); /* the queue is stopped so it won't be plugged+unplugged */ if (rq->cmd_type == REQ_TYPE_PM_RESUME) q->request_fn(q); diff --git a/block/blk-flush.c b/block/blk-flush.c index eba4a2790c6c..6c9b5e189e62 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -218,7 +218,7 @@ static void flush_end_io(struct request *flush_rq, int error) * request_fn may confuse the driver. Always use kblockd. */ if (queued) - __blk_run_queue(q, true); + blk_run_queue_async(q); } /** @@ -274,7 +274,7 @@ static void flush_data_end_io(struct request *rq, int error) * the comment in flush_end_io(). */ if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) - __blk_run_queue(q, true); + blk_run_queue_async(q); } /** diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 261c75c665ae..bd236313f35d 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -66,14 +66,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { blk_set_queue_full(q, BLK_RW_SYNC); - } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) { + } else { blk_clear_queue_full(q, BLK_RW_SYNC); wake_up(&rl->wait[BLK_RW_SYNC]); } if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { blk_set_queue_full(q, BLK_RW_ASYNC); - } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) { + } else { blk_clear_queue_full(q, BLK_RW_ASYNC); wake_up(&rl->wait[BLK_RW_ASYNC]); } @@ -498,7 +498,6 @@ int blk_register_queue(struct gendisk *disk) { int ret; struct device *dev = disk_to_dev(disk); - struct request_queue *q = disk->queue; if (WARN_ON(!q)) @@ -509,8 +508,10 @@ int blk_register_queue(struct gendisk *disk) return ret; ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); - if (ret < 0) + if (ret < 0) { + blk_trace_remove_sysfs(dev); return ret; + } kobject_uevent(&q->kobj, KOBJ_ADD); @@ -521,7 +522,7 @@ int blk_register_queue(struct gendisk *disk) if (ret) { kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - blk_trace_remove_sysfs(disk_to_dev(disk)); + blk_trace_remove_sysfs(dev); kobject_put(&dev->kobj); return ret; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 0475a22a420d..252a81a306f7 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -160,9 +160,8 @@ static void throtl_put_tg(struct throtl_grp *tg) } static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, - struct cgroup *cgroup) + struct blkio_cgroup *blkcg) { - struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); struct throtl_grp *tg = NULL; void *key = td; struct backing_dev_info *bdi = &td->queue->backing_dev_info; @@ -229,12 +228,12 @@ done: static struct throtl_grp * throtl_get_tg(struct throtl_data *td) { - struct cgroup *cgroup; struct throtl_grp *tg = NULL; + struct blkio_cgroup *blkcg; rcu_read_lock(); - cgroup = task_cgroup(current, blkio_subsys_id); - tg = throtl_find_alloc_tg(td, cgroup); + blkcg = task_blkio_cgroup(current); + tg = throtl_find_alloc_tg(td, blkcg); if (!tg) tg = &td->root_tg; rcu_read_unlock(); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3be881ec95ad..ab7a9e6a9b1c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1014,10 +1014,9 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, cfqg->needs_update = true; } -static struct cfq_group * -cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) +static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd, + struct blkio_cgroup *blkcg, int create) { - struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); struct cfq_group *cfqg = NULL; void *key = cfqd; int i, j; @@ -1079,12 +1078,12 @@ done: */ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) { - struct cgroup *cgroup; + struct blkio_cgroup *blkcg; struct cfq_group *cfqg = NULL; rcu_read_lock(); - cgroup = task_cgroup(current, blkio_subsys_id); - cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create); + blkcg = task_blkio_cgroup(current); + cfqg = cfq_find_alloc_cfqg(cfqd, blkcg, create); if (!cfqg && create) cfqg = &cfqd->root_group; rcu_read_unlock(); @@ -2582,28 +2581,20 @@ static void cfq_put_queue(struct cfq_queue *cfqq) } /* - * Must always be called with the rcu_read_lock() held + * Call func for each cic attached to this ioc. */ static void -__call_for_each_cic(struct io_context *ioc, - void (*func)(struct io_context *, struct cfq_io_context *)) +call_for_each_cic(struct io_context *ioc, + void (*func)(struct io_context *, struct cfq_io_context *)) { struct cfq_io_context *cic; struct hlist_node *n; + rcu_read_lock(); + hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) func(ioc, cic); -} -/* - * Call func for each cic attached to this ioc. - */ -static void -call_for_each_cic(struct io_context *ioc, - void (*func)(struct io_context *, struct cfq_io_context *)) -{ - rcu_read_lock(); - __call_for_each_cic(ioc, func); rcu_read_unlock(); } @@ -2664,7 +2655,7 @@ static void cfq_free_io_context(struct io_context *ioc) * should be ok to iterate over the known list, we will see all cic's * since no new ones are added. */ - __call_for_each_cic(ioc, cic_free_func); + call_for_each_cic(ioc, cic_free_func); } static void cfq_put_cooperator(struct cfq_queue *cfqq) @@ -3368,7 +3359,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqd->busy_queues > 1) { cfq_del_timer(cfqd, cfqq); cfq_clear_cfqq_wait_request(cfqq); - __blk_run_queue(cfqd->queue, false); + __blk_run_queue(cfqd->queue); } else { cfq_blkiocg_update_idle_time_stats( &cfqq->cfqg->blkg); @@ -3383,7 +3374,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, * this new queue is RT and the current one is BE */ cfq_preempt_queue(cfqd, cfqq); - __blk_run_queue(cfqd->queue, false); + __blk_run_queue(cfqd->queue); } } @@ -3743,7 +3734,7 @@ static void cfq_kick_queue(struct work_struct *work) struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); - __blk_run_queue(cfqd->queue, false); + __blk_run_queue(cfqd->queue); spin_unlock_irq(q->queue_lock); } diff --git a/block/elevator.c b/block/elevator.c index 0cdb4e7ebab4..45ca1e34f582 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -642,7 +642,7 @@ void elv_quiesce_start(struct request_queue *q) */ elv_drain_elevator(q); while (q->rq.elvpriv) { - __blk_run_queue(q, false); + __blk_run_queue(q); spin_unlock_irq(q->queue_lock); msleep(10); spin_lock_irq(q->queue_lock); @@ -671,7 +671,8 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) q->boundary_rq = rq; } } else if (!(rq->cmd_flags & REQ_ELVPRIV) && - where == ELEVATOR_INSERT_SORT) + (where == ELEVATOR_INSERT_SORT || + where == ELEVATOR_INSERT_SORT_MERGE)) where = ELEVATOR_INSERT_BACK; switch (where) { @@ -695,7 +696,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) * with anything. There's no point in delaying queue * processing. */ - __blk_run_queue(q, false); + __blk_run_queue(q); break; case ELEVATOR_INSERT_SORT_MERGE: diff --git a/block/genhd.c b/block/genhd.c index b364bd038a18..2dd988723d73 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1588,9 +1588,13 @@ static void disk_events_workfn(struct work_struct *work) spin_unlock_irq(&ev->lock); - /* tell userland about new events */ + /* + * Tell userland about new events. Only the events listed in + * @disk->events are reported. Unlisted events are processed the + * same internally but never get reported to userland. + */ for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) - if (events & (1 << i)) + if (events & disk->events & (1 << i)) envp[nr_events++] = disk_uevents[i]; if (nr_events) |