summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c7
-rw-r--r--block/blk-cgroup.h3
-rw-r--r--block/blk-core.c180
-rw-r--r--block/blk-exec.c2
-rw-r--r--block/blk-flush.c4
-rw-r--r--block/blk-sysfs.c11
-rw-r--r--block/blk-throttle.c9
-rw-r--r--block/cfq-iosched.c37
-rw-r--r--block/elevator.c7
-rw-r--r--block/genhd.c8
10 files changed, 163 insertions, 105 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index f0605ab2a761..471fdcc5df85 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -114,6 +114,13 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
}
EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
+struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
+{
+ return container_of(task_subsys_state(tsk, blkio_subsys_id),
+ struct blkio_cgroup, css);
+}
+EXPORT_SYMBOL_GPL(task_blkio_cgroup);
+
static inline void
blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
{
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 10919fae2d3a..c774930cc206 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -291,6 +291,7 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
extern struct blkio_cgroup blkio_root_cgroup;
extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
+extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
struct blkio_group *blkg, void *key, dev_t dev,
enum blkio_policy_id plid);
@@ -314,6 +315,8 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
struct cgroup;
static inline struct blkio_cgroup *
cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
+static inline struct blkio_cgroup *
+task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
struct blkio_group *blkg, void *key, dev_t dev,
diff --git a/block/blk-core.c b/block/blk-core.c
index 90f22cc30799..3fe00a14822a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -198,26 +198,13 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
}
EXPORT_SYMBOL(blk_dump_rq_flags);
-/*
- * Make sure that plugs that were pending when this function was entered,
- * are now complete and requests pushed to the queue.
-*/
-static inline void queue_sync_plugs(struct request_queue *q)
-{
- /*
- * If the current process is plugged and has barriers submitted,
- * we will livelock if we don't unplug first.
- */
- blk_flush_plug(current);
-}
-
static void blk_delay_work(struct work_struct *work)
{
struct request_queue *q;
q = container_of(work, struct request_queue, delay_work.work);
spin_lock_irq(q->queue_lock);
- __blk_run_queue(q, false);
+ __blk_run_queue(q);
spin_unlock_irq(q->queue_lock);
}
@@ -233,7 +220,8 @@ static void blk_delay_work(struct work_struct *work)
*/
void blk_delay_queue(struct request_queue *q, unsigned long msecs)
{
- schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs));
+ queue_delayed_work(kblockd_workqueue, &q->delay_work,
+ msecs_to_jiffies(msecs));
}
EXPORT_SYMBOL(blk_delay_queue);
@@ -251,7 +239,7 @@ void blk_start_queue(struct request_queue *q)
WARN_ON(!irqs_disabled());
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
- __blk_run_queue(q, false);
+ __blk_run_queue(q);
}
EXPORT_SYMBOL(blk_start_queue);
@@ -298,38 +286,44 @@ void blk_sync_queue(struct request_queue *q)
{
del_timer_sync(&q->timeout);
cancel_delayed_work_sync(&q->delay_work);
- queue_sync_plugs(q);
}
EXPORT_SYMBOL(blk_sync_queue);
/**
* __blk_run_queue - run a single device queue
* @q: The queue to run
- * @force_kblockd: Don't run @q->request_fn directly. Use kblockd.
*
* Description:
* See @blk_run_queue. This variant must be called with the queue lock
* held and interrupts disabled.
- *
*/
-void __blk_run_queue(struct request_queue *q, bool force_kblockd)
+void __blk_run_queue(struct request_queue *q)
{
if (unlikely(blk_queue_stopped(q)))
return;
- /*
- * Only recurse once to avoid overrunning the stack, let the unplug
- * handling reinvoke the handler shortly if we already got there.
- */
- if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
- q->request_fn(q);
- queue_flag_clear(QUEUE_FLAG_REENTER, q);
- } else
- queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+ q->request_fn(q);
}
EXPORT_SYMBOL(__blk_run_queue);
/**
+ * blk_run_queue_async - run a single device queue in workqueue context
+ * @q: The queue to run
+ *
+ * Description:
+ * Tells kblockd to perform the equivalent of @blk_run_queue on behalf
+ * of us.
+ */
+void blk_run_queue_async(struct request_queue *q)
+{
+ if (likely(!blk_queue_stopped(q))) {
+ __cancel_delayed_work(&q->delay_work);
+ queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+ }
+}
+EXPORT_SYMBOL(blk_run_queue_async);
+
+/**
* blk_run_queue - run a single device queue
* @q: The queue to run
*
@@ -342,7 +336,7 @@ void blk_run_queue(struct request_queue *q)
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
- __blk_run_queue(q, false);
+ __blk_run_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_run_queue);
@@ -991,7 +985,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
blk_queue_end_tag(q, rq);
add_acct_request(q, rq, where);
- __blk_run_queue(q, false);
+ __blk_run_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_insert_request);
@@ -1311,7 +1305,15 @@ get_rq:
plug = current->plug;
if (plug) {
- if (!plug->should_sort && !list_empty(&plug->list)) {
+ /*
+ * If this is the first request added after a plug, fire
+ * of a plug trace. If others have been added before, check
+ * if we have multiple devices in this plug. If so, make a
+ * note to sort the list before dispatch.
+ */
+ if (list_empty(&plug->list))
+ trace_block_plug(q);
+ else if (!plug->should_sort) {
struct request *__rq;
__rq = list_entry_rq(plug->list.prev);
@@ -1327,7 +1329,7 @@ get_rq:
} else {
spin_lock_irq(q->queue_lock);
add_acct_request(q, req, where);
- __blk_run_queue(q, false);
+ __blk_run_queue(q);
out_unlock:
spin_unlock_irq(q->queue_lock);
}
@@ -2644,6 +2646,7 @@ void blk_start_plug(struct blk_plug *plug)
plug->magic = PLUG_MAGIC;
INIT_LIST_HEAD(&plug->list);
+ INIT_LIST_HEAD(&plug->cb_list);
plug->should_sort = 0;
/*
@@ -2668,33 +2671,93 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
return !(rqa->q <= rqb->q);
}
-static void flush_plug_list(struct blk_plug *plug)
+/*
+ * If 'from_schedule' is true, then postpone the dispatch of requests
+ * until a safe kblockd context. We due this to avoid accidental big
+ * additional stack usage in driver dispatch, in places where the originally
+ * plugger did not intend it.
+ */
+static void queue_unplugged(struct request_queue *q, unsigned int depth,
+ bool from_schedule)
+ __releases(q->queue_lock)
+{
+ trace_block_unplug(q, depth, !from_schedule);
+
+ /*
+ * If we are punting this to kblockd, then we can safely drop
+ * the queue_lock before waking kblockd (which needs to take
+ * this lock).
+ */
+ if (from_schedule) {
+ spin_unlock(q->queue_lock);
+ blk_run_queue_async(q);
+ } else {
+ __blk_run_queue(q);
+ spin_unlock(q->queue_lock);
+ }
+
+}
+
+static void flush_plug_callbacks(struct blk_plug *plug)
+{
+ LIST_HEAD(callbacks);
+
+ if (list_empty(&plug->cb_list))
+ return;
+
+ list_splice_init(&plug->cb_list, &callbacks);
+
+ while (!list_empty(&callbacks)) {
+ struct blk_plug_cb *cb = list_first_entry(&callbacks,
+ struct blk_plug_cb,
+ list);
+ list_del(&cb->list);
+ cb->callback(cb);
+ }
+}
+
+void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
struct request_queue *q;
unsigned long flags;
struct request *rq;
+ LIST_HEAD(list);
+ unsigned int depth;
BUG_ON(plug->magic != PLUG_MAGIC);
+ flush_plug_callbacks(plug);
if (list_empty(&plug->list))
return;
- if (plug->should_sort)
- list_sort(NULL, &plug->list, plug_rq_cmp);
+ list_splice_init(&plug->list, &list);
+
+ if (plug->should_sort) {
+ list_sort(NULL, &list, plug_rq_cmp);
+ plug->should_sort = 0;
+ }
q = NULL;
+ depth = 0;
+
+ /*
+ * Save and disable interrupts here, to avoid doing it for every
+ * queue lock we have to take.
+ */
local_irq_save(flags);
- while (!list_empty(&plug->list)) {
- rq = list_entry_rq(plug->list.next);
+ while (!list_empty(&list)) {
+ rq = list_entry_rq(list.next);
list_del_init(&rq->queuelist);
BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
BUG_ON(!rq->q);
if (rq->q != q) {
- if (q) {
- __blk_run_queue(q, false);
- spin_unlock(q->queue_lock);
- }
+ /*
+ * This drops the queue lock
+ */
+ if (q)
+ queue_unplugged(q, depth, from_schedule);
q = rq->q;
+ depth = 0;
spin_lock(q->queue_lock);
}
rq->cmd_flags &= ~REQ_ON_PLUG;
@@ -2706,38 +2769,27 @@ static void flush_plug_list(struct blk_plug *plug)
__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
else
__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
- }
- if (q) {
- __blk_run_queue(q, false);
- spin_unlock(q->queue_lock);
+ depth++;
}
- BUG_ON(!list_empty(&plug->list));
- local_irq_restore(flags);
-}
-
-static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)
-{
- flush_plug_list(plug);
+ /*
+ * This drops the queue lock
+ */
+ if (q)
+ queue_unplugged(q, depth, from_schedule);
- if (plug == tsk->plug)
- tsk->plug = NULL;
+ local_irq_restore(flags);
}
void blk_finish_plug(struct blk_plug *plug)
{
- if (plug)
- __blk_finish_plug(current, plug);
-}
-EXPORT_SYMBOL(blk_finish_plug);
+ blk_flush_plug_list(plug, false);
-void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)
-{
- __blk_finish_plug(tsk, plug);
- tsk->plug = plug;
+ if (plug == current->plug)
+ current->plug = NULL;
}
-EXPORT_SYMBOL(__blk_flush_plug);
+EXPORT_SYMBOL(blk_finish_plug);
int __init blk_dev_init(void)
{
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 7482b7fa863b..81e31819a597 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -55,7 +55,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
WARN_ON(irqs_disabled());
spin_lock_irq(q->queue_lock);
__elv_add_request(q, rq, where);
- __blk_run_queue(q, false);
+ __blk_run_queue(q);
/* the queue is stopped so it won't be plugged+unplugged */
if (rq->cmd_type == REQ_TYPE_PM_RESUME)
q->request_fn(q);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index eba4a2790c6c..6c9b5e189e62 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -218,7 +218,7 @@ static void flush_end_io(struct request *flush_rq, int error)
* request_fn may confuse the driver. Always use kblockd.
*/
if (queued)
- __blk_run_queue(q, true);
+ blk_run_queue_async(q);
}
/**
@@ -274,7 +274,7 @@ static void flush_data_end_io(struct request *rq, int error)
* the comment in flush_end_io().
*/
if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
- __blk_run_queue(q, true);
+ blk_run_queue_async(q);
}
/**
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 261c75c665ae..bd236313f35d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -66,14 +66,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
blk_set_queue_full(q, BLK_RW_SYNC);
- } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) {
+ } else {
blk_clear_queue_full(q, BLK_RW_SYNC);
wake_up(&rl->wait[BLK_RW_SYNC]);
}
if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
blk_set_queue_full(q, BLK_RW_ASYNC);
- } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) {
+ } else {
blk_clear_queue_full(q, BLK_RW_ASYNC);
wake_up(&rl->wait[BLK_RW_ASYNC]);
}
@@ -498,7 +498,6 @@ int blk_register_queue(struct gendisk *disk)
{
int ret;
struct device *dev = disk_to_dev(disk);
-
struct request_queue *q = disk->queue;
if (WARN_ON(!q))
@@ -509,8 +508,10 @@ int blk_register_queue(struct gendisk *disk)
return ret;
ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
- if (ret < 0)
+ if (ret < 0) {
+ blk_trace_remove_sysfs(dev);
return ret;
+ }
kobject_uevent(&q->kobj, KOBJ_ADD);
@@ -521,7 +522,7 @@ int blk_register_queue(struct gendisk *disk)
if (ret) {
kobject_uevent(&q->kobj, KOBJ_REMOVE);
kobject_del(&q->kobj);
- blk_trace_remove_sysfs(disk_to_dev(disk));
+ blk_trace_remove_sysfs(dev);
kobject_put(&dev->kobj);
return ret;
}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 0475a22a420d..252a81a306f7 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -160,9 +160,8 @@ static void throtl_put_tg(struct throtl_grp *tg)
}
static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
- struct cgroup *cgroup)
+ struct blkio_cgroup *blkcg)
{
- struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
struct throtl_grp *tg = NULL;
void *key = td;
struct backing_dev_info *bdi = &td->queue->backing_dev_info;
@@ -229,12 +228,12 @@ done:
static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
{
- struct cgroup *cgroup;
struct throtl_grp *tg = NULL;
+ struct blkio_cgroup *blkcg;
rcu_read_lock();
- cgroup = task_cgroup(current, blkio_subsys_id);
- tg = throtl_find_alloc_tg(td, cgroup);
+ blkcg = task_blkio_cgroup(current);
+ tg = throtl_find_alloc_tg(td, blkcg);
if (!tg)
tg = &td->root_tg;
rcu_read_unlock();
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3be881ec95ad..ab7a9e6a9b1c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1014,10 +1014,9 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
cfqg->needs_update = true;
}
-static struct cfq_group *
-cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
+static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd,
+ struct blkio_cgroup *blkcg, int create)
{
- struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
struct cfq_group *cfqg = NULL;
void *key = cfqd;
int i, j;
@@ -1079,12 +1078,12 @@ done:
*/
static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
{
- struct cgroup *cgroup;
+ struct blkio_cgroup *blkcg;
struct cfq_group *cfqg = NULL;
rcu_read_lock();
- cgroup = task_cgroup(current, blkio_subsys_id);
- cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create);
+ blkcg = task_blkio_cgroup(current);
+ cfqg = cfq_find_alloc_cfqg(cfqd, blkcg, create);
if (!cfqg && create)
cfqg = &cfqd->root_group;
rcu_read_unlock();
@@ -2582,28 +2581,20 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
}
/*
- * Must always be called with the rcu_read_lock() held
+ * Call func for each cic attached to this ioc.
*/
static void
-__call_for_each_cic(struct io_context *ioc,
- void (*func)(struct io_context *, struct cfq_io_context *))
+call_for_each_cic(struct io_context *ioc,
+ void (*func)(struct io_context *, struct cfq_io_context *))
{
struct cfq_io_context *cic;
struct hlist_node *n;
+ rcu_read_lock();
+
hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
func(ioc, cic);
-}
-/*
- * Call func for each cic attached to this ioc.
- */
-static void
-call_for_each_cic(struct io_context *ioc,
- void (*func)(struct io_context *, struct cfq_io_context *))
-{
- rcu_read_lock();
- __call_for_each_cic(ioc, func);
rcu_read_unlock();
}
@@ -2664,7 +2655,7 @@ static void cfq_free_io_context(struct io_context *ioc)
* should be ok to iterate over the known list, we will see all cic's
* since no new ones are added.
*/
- __call_for_each_cic(ioc, cic_free_func);
+ call_for_each_cic(ioc, cic_free_func);
}
static void cfq_put_cooperator(struct cfq_queue *cfqq)
@@ -3368,7 +3359,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
cfqd->busy_queues > 1) {
cfq_del_timer(cfqd, cfqq);
cfq_clear_cfqq_wait_request(cfqq);
- __blk_run_queue(cfqd->queue, false);
+ __blk_run_queue(cfqd->queue);
} else {
cfq_blkiocg_update_idle_time_stats(
&cfqq->cfqg->blkg);
@@ -3383,7 +3374,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
* this new queue is RT and the current one is BE
*/
cfq_preempt_queue(cfqd, cfqq);
- __blk_run_queue(cfqd->queue, false);
+ __blk_run_queue(cfqd->queue);
}
}
@@ -3743,7 +3734,7 @@ static void cfq_kick_queue(struct work_struct *work)
struct request_queue *q = cfqd->queue;
spin_lock_irq(q->queue_lock);
- __blk_run_queue(cfqd->queue, false);
+ __blk_run_queue(cfqd->queue);
spin_unlock_irq(q->queue_lock);
}
diff --git a/block/elevator.c b/block/elevator.c
index 0cdb4e7ebab4..45ca1e34f582 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -642,7 +642,7 @@ void elv_quiesce_start(struct request_queue *q)
*/
elv_drain_elevator(q);
while (q->rq.elvpriv) {
- __blk_run_queue(q, false);
+ __blk_run_queue(q);
spin_unlock_irq(q->queue_lock);
msleep(10);
spin_lock_irq(q->queue_lock);
@@ -671,7 +671,8 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
q->boundary_rq = rq;
}
} else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
- where == ELEVATOR_INSERT_SORT)
+ (where == ELEVATOR_INSERT_SORT ||
+ where == ELEVATOR_INSERT_SORT_MERGE))
where = ELEVATOR_INSERT_BACK;
switch (where) {
@@ -695,7 +696,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
* with anything. There's no point in delaying queue
* processing.
*/
- __blk_run_queue(q, false);
+ __blk_run_queue(q);
break;
case ELEVATOR_INSERT_SORT_MERGE:
diff --git a/block/genhd.c b/block/genhd.c
index b364bd038a18..2dd988723d73 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1588,9 +1588,13 @@ static void disk_events_workfn(struct work_struct *work)
spin_unlock_irq(&ev->lock);
- /* tell userland about new events */
+ /*
+ * Tell userland about new events. Only the events listed in
+ * @disk->events are reported. Unlisted events are processed the
+ * same internally but never get reported to userland.
+ */
for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
- if (events & (1 << i))
+ if (events & disk->events & (1 << i))
envp[nr_events++] = disk_uevents[i];
if (nr_events)