summaryrefslogtreecommitdiff
path: root/block/blk-mq-sched.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 13:19:59 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 13:19:59 -0800
commit0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch)
tree2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /block/blk-mq-sched.c
parentb12a9124eeb71d766a3e3eb594ebbb3fefc66902 (diff)
parent00203ba40d40d7f33857416adfb18adaf0e40123 (diff)
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block/storage for 4.21. Larger than usual, it was a busy round with lots of goodies queued up. Most notable is the removal of the old IO stack, which has been a long time coming. No new features for a while, everything coming in this week has all been fixes for things that were previously merged. This contains: - Use atomic counters instead of semaphores for mtip32xx (Arnd) - Cleanup of the mtip32xx request setup (Christoph) - Fix for circular locking dependency in loop (Jan, Tetsuo) - bcache (Coly, Guoju, Shenghui) * Optimizations for writeback caching * Various fixes and improvements - nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith) * host and target support for NVMe over TCP * Error log page support * Support for separate read/write/poll queues * Much improved polling * discard OOM fallback * Tracepoint improvements - lightnvm (Hans, Hua, Igor, Matias, Javier) * Igor added packed metadata to pblk. Now drives without metadata per LBA can be used as well. * Fix from Geert on uninitialized value on chunk metadata reads. * Fixes from Hans and Javier to pblk recovery and write path. * Fix from Hua Su to fix a race condition in the pblk recovery code. * Scan optimization added to pblk recovery from Zhoujie. * Small geometry cleanup from me. - Conversion of the last few drivers that used the legacy path to blk-mq (me) - Removal of legacy IO path in SCSI (me, Christoph) - Removal of legacy IO stack and schedulers (me) - Support for much better polling, now without interrupts at all. blk-mq adds support for multiple queue maps, which enables us to have a map per type. This in turn enables nvme to have separate completion queues for polling, which can then be interrupt-less. Also means we're ready for async polled IO, which is hopefully coming in the next release. - Killing of (now) unused block exports (Christoph) - Unification of the blk-rq-qos and blk-wbt wait handling (Josef) - Support for zoned testing with null_blk (Masato) - sx8 conversion to per-host tag sets (Christoph) - IO priority improvements (Damien) - mq-deadline zoned fix (Damien) - Ref count blkcg series (Dennis) - Lots of blk-mq improvements and speedups (me) - sbitmap scalability improvements (me) - Make core inflight IO accounting per-cpu (Mikulas) - Export timeout setting in sysfs (Weiping) - Cleanup the direct issue path (Jianchao) - Export blk-wbt internals in block debugfs for easier debugging (Ming) - Lots of other fixes and improvements" * tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits) kyber: use sbitmap add_wait_queue/list_del wait helpers sbitmap: add helpers for add/del wait queue handling block: save irq state in blkg_lookup_create() dm: don't reuse bio for flushes nvme-pci: trace SQ status on completions nvme-rdma: implement polling queue map nvme-fabrics: allow user to pass in nr_poll_queues nvme-fabrics: allow nvmf_connect_io_queue to poll nvme-core: optionally poll sync commands block: make request_to_qc_t public nvme-tcp: fix spelling mistake "attepmpt" -> "attempt" nvme-tcp: fix endianess annotations nvmet-tcp: fix endianess annotations nvme-pci: refactor nvme_poll_irqdisable to make sparse happy nvme-pci: only set nr_maps to 2 if poll queues are supported nvmet: use a macro for default error location nvmet: fix comparison of a u16 with -1 blk-mq: enable IO poll if .nr_queues of type poll > 0 blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight() blk-mq: skip zero-queue maps in blk_mq_map_swqueue ...
Diffstat (limited to 'block/blk-mq-sched.c')
-rw-r--r--block/blk-mq-sched.c82
1 files changed, 46 insertions, 36 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 29bfe8017a2d..140933e4a7d1 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -31,15 +31,22 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
-void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
+void blk_mq_sched_assign_ioc(struct request *rq)
{
struct request_queue *q = rq->q;
- struct io_context *ioc = rq_ioc(bio);
+ struct io_context *ioc;
struct io_cq *icq;
- spin_lock_irq(q->queue_lock);
+ /*
+ * May not have an IO context if it's a passthrough request
+ */
+ ioc = current->io_context;
+ if (!ioc)
+ return;
+
+ spin_lock_irq(&q->queue_lock);
icq = ioc_lookup_icq(ioc, q);
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
if (!icq) {
icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
@@ -54,13 +61,14 @@ void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
* Mark a hardware queue as needing a restart. For shared queues, maintain
* a count of how many hardware queues are marked for restart.
*/
-static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
+void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return;
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
+EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
{
@@ -85,14 +93,13 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
do {
struct request *rq;
- if (e->type->ops.mq.has_work &&
- !e->type->ops.mq.has_work(hctx))
+ if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
break;
if (!blk_mq_get_dispatch_budget(hctx))
break;
- rq = e->type->ops.mq.dispatch_request(hctx);
+ rq = e->type->ops.dispatch_request(hctx);
if (!rq) {
blk_mq_put_dispatch_budget(hctx);
break;
@@ -110,7 +117,7 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx)
{
- unsigned idx = ctx->index_hw;
+ unsigned short idx = ctx->index_hw[hctx->type];
if (++idx == hctx->nr_ctx)
idx = 0;
@@ -163,7 +170,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct elevator_queue *e = q->elevator;
- const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
+ const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
LIST_HEAD(rq_list);
/* RCU or SRCU read lock is needed before checking quiesced flag */
@@ -295,11 +302,14 @@ EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
* too much time checking for merges.
*/
static bool blk_mq_attempt_merge(struct request_queue *q,
+ struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx, struct bio *bio)
{
+ enum hctx_type type = hctx->type;
+
lockdep_assert_held(&ctx->lock);
- if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
+ if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio)) {
ctx->rq_merged++;
return true;
}
@@ -311,19 +321,21 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx->cpu);
bool ret = false;
+ enum hctx_type type;
- if (e && e->type->ops.mq.bio_merge) {
+ if (e && e->type->ops.bio_merge) {
blk_mq_put_ctx(ctx);
- return e->type->ops.mq.bio_merge(hctx, bio);
+ return e->type->ops.bio_merge(hctx, bio);
}
+ type = hctx->type;
if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
- !list_empty_careful(&ctx->rq_list)) {
+ !list_empty_careful(&ctx->rq_lists[type])) {
/* default per sw-queue merge */
spin_lock(&ctx->lock);
- ret = blk_mq_attempt_merge(q, ctx, bio);
+ ret = blk_mq_attempt_merge(q, hctx, ctx, bio);
spin_unlock(&ctx->lock);
}
@@ -367,7 +379,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = rq->mq_ctx;
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
/* flush rq in flush machinery need to be dispatched directly */
if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) {
@@ -380,11 +392,11 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
goto run;
- if (e && e->type->ops.mq.insert_requests) {
+ if (e && e->type->ops.insert_requests) {
LIST_HEAD(list);
list_add(&rq->queuelist, &list);
- e->type->ops.mq.insert_requests(hctx, &list, at_head);
+ e->type->ops.insert_requests(hctx, &list, at_head);
} else {
spin_lock(&ctx->lock);
__blk_mq_insert_request(hctx, rq, at_head);
@@ -396,27 +408,25 @@ run:
blk_mq_run_hw_queue(hctx, async);
}
-void blk_mq_sched_insert_requests(struct request_queue *q,
+void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx,
struct list_head *list, bool run_queue_async)
{
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
- struct elevator_queue *e = hctx->queue->elevator;
+ struct elevator_queue *e;
- if (e && e->type->ops.mq.insert_requests)
- e->type->ops.mq.insert_requests(hctx, list, false);
+ e = hctx->queue->elevator;
+ if (e && e->type->ops.insert_requests)
+ e->type->ops.insert_requests(hctx, list, false);
else {
/*
* try to issue requests directly if the hw queue isn't
* busy in case of 'none' scheduler, and this way may save
* us one extra enqueue & dequeue to sw queue.
*/
- if (!hctx->dispatch_busy && !e && !run_queue_async) {
+ if (!hctx->dispatch_busy && !e && !run_queue_async)
blk_mq_try_issue_list_directly(hctx, list);
- if (list_empty(list))
- return;
- }
- blk_mq_insert_requests(hctx, ctx, list);
+ else
+ blk_mq_insert_requests(hctx, ctx, list);
}
blk_mq_run_hw_queue(hctx, run_queue_async);
@@ -489,15 +499,15 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
goto err;
}
- ret = e->ops.mq.init_sched(q, e);
+ ret = e->ops.init_sched(q, e);
if (ret)
goto err;
blk_mq_debugfs_register_sched(q);
queue_for_each_hw_ctx(q, hctx, i) {
- if (e->ops.mq.init_hctx) {
- ret = e->ops.mq.init_hctx(hctx, i);
+ if (e->ops.init_hctx) {
+ ret = e->ops.init_hctx(hctx, i);
if (ret) {
eq = q->elevator;
blk_mq_exit_sched(q, eq);
@@ -523,14 +533,14 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
queue_for_each_hw_ctx(q, hctx, i) {
blk_mq_debugfs_unregister_sched_hctx(hctx);
- if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
- e->type->ops.mq.exit_hctx(hctx, i);
+ if (e->type->ops.exit_hctx && hctx->sched_data) {
+ e->type->ops.exit_hctx(hctx, i);
hctx->sched_data = NULL;
}
}
blk_mq_debugfs_unregister_sched(q);
- if (e->type->ops.mq.exit_sched)
- e->type->ops.mq.exit_sched(e);
+ if (e->type->ops.exit_sched)
+ e->type->ops.exit_sched(e);
blk_mq_sched_tags_teardown(q);
q->elevator = NULL;
}