diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 13:19:59 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 13:19:59 -0800 |
commit | 0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch) | |
tree | 2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /block/blk-mq-sched.c | |
parent | b12a9124eeb71d766a3e3eb594ebbb3fefc66902 (diff) | |
parent | 00203ba40d40d7f33857416adfb18adaf0e40123 (diff) |
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"This is the main pull request for block/storage for 4.21.
Larger than usual, it was a busy round with lots of goodies queued up.
Most notable is the removal of the old IO stack, which has been a long
time coming. No new features for a while, everything coming in this
week has all been fixes for things that were previously merged.
This contains:
- Use atomic counters instead of semaphores for mtip32xx (Arnd)
- Cleanup of the mtip32xx request setup (Christoph)
- Fix for circular locking dependency in loop (Jan, Tetsuo)
- bcache (Coly, Guoju, Shenghui)
* Optimizations for writeback caching
* Various fixes and improvements
- nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith)
* host and target support for NVMe over TCP
* Error log page support
* Support for separate read/write/poll queues
* Much improved polling
* discard OOM fallback
* Tracepoint improvements
- lightnvm (Hans, Hua, Igor, Matias, Javier)
* Igor added packed metadata to pblk. Now drives without metadata
per LBA can be used as well.
* Fix from Geert on uninitialized value on chunk metadata reads.
* Fixes from Hans and Javier to pblk recovery and write path.
* Fix from Hua Su to fix a race condition in the pblk recovery
code.
* Scan optimization added to pblk recovery from Zhoujie.
* Small geometry cleanup from me.
- Conversion of the last few drivers that used the legacy path to
blk-mq (me)
- Removal of legacy IO path in SCSI (me, Christoph)
- Removal of legacy IO stack and schedulers (me)
- Support for much better polling, now without interrupts at all.
blk-mq adds support for multiple queue maps, which enables us to
have a map per type. This in turn enables nvme to have separate
completion queues for polling, which can then be interrupt-less.
Also means we're ready for async polled IO, which is hopefully
coming in the next release.
- Killing of (now) unused block exports (Christoph)
- Unification of the blk-rq-qos and blk-wbt wait handling (Josef)
- Support for zoned testing with null_blk (Masato)
- sx8 conversion to per-host tag sets (Christoph)
- IO priority improvements (Damien)
- mq-deadline zoned fix (Damien)
- Ref count blkcg series (Dennis)
- Lots of blk-mq improvements and speedups (me)
- sbitmap scalability improvements (me)
- Make core inflight IO accounting per-cpu (Mikulas)
- Export timeout setting in sysfs (Weiping)
- Cleanup the direct issue path (Jianchao)
- Export blk-wbt internals in block debugfs for easier debugging
(Ming)
- Lots of other fixes and improvements"
* tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits)
kyber: use sbitmap add_wait_queue/list_del wait helpers
sbitmap: add helpers for add/del wait queue handling
block: save irq state in blkg_lookup_create()
dm: don't reuse bio for flushes
nvme-pci: trace SQ status on completions
nvme-rdma: implement polling queue map
nvme-fabrics: allow user to pass in nr_poll_queues
nvme-fabrics: allow nvmf_connect_io_queue to poll
nvme-core: optionally poll sync commands
block: make request_to_qc_t public
nvme-tcp: fix spelling mistake "attepmpt" -> "attempt"
nvme-tcp: fix endianess annotations
nvmet-tcp: fix endianess annotations
nvme-pci: refactor nvme_poll_irqdisable to make sparse happy
nvme-pci: only set nr_maps to 2 if poll queues are supported
nvmet: use a macro for default error location
nvmet: fix comparison of a u16 with -1
blk-mq: enable IO poll if .nr_queues of type poll > 0
blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight()
blk-mq: skip zero-queue maps in blk_mq_map_swqueue
...
Diffstat (limited to 'block/blk-mq-sched.c')
-rw-r--r-- | block/blk-mq-sched.c | 82 |
1 files changed, 46 insertions, 36 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 29bfe8017a2d..140933e4a7d1 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -31,15 +31,22 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); -void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio) +void blk_mq_sched_assign_ioc(struct request *rq) { struct request_queue *q = rq->q; - struct io_context *ioc = rq_ioc(bio); + struct io_context *ioc; struct io_cq *icq; - spin_lock_irq(q->queue_lock); + /* + * May not have an IO context if it's a passthrough request + */ + ioc = current->io_context; + if (!ioc) + return; + + spin_lock_irq(&q->queue_lock); icq = ioc_lookup_icq(ioc, q); - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); if (!icq) { icq = ioc_create_icq(ioc, q, GFP_ATOMIC); @@ -54,13 +61,14 @@ void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio) * Mark a hardware queue as needing a restart. For shared queues, maintain * a count of how many hardware queues are marked for restart. */ -static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) +void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) { if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) return; set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); } +EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx); void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) { @@ -85,14 +93,13 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) do { struct request *rq; - if (e->type->ops.mq.has_work && - !e->type->ops.mq.has_work(hctx)) + if (e->type->ops.has_work && !e->type->ops.has_work(hctx)) break; if (!blk_mq_get_dispatch_budget(hctx)) break; - rq = e->type->ops.mq.dispatch_request(hctx); + rq = e->type->ops.dispatch_request(hctx); if (!rq) { blk_mq_put_dispatch_budget(hctx); break; @@ -110,7 +117,7 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { - unsigned idx = ctx->index_hw; + unsigned short idx = ctx->index_hw[hctx->type]; if (++idx == hctx->nr_ctx) idx = 0; @@ -163,7 +170,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct elevator_queue *e = q->elevator; - const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; + const bool has_sched_dispatch = e && e->type->ops.dispatch_request; LIST_HEAD(rq_list); /* RCU or SRCU read lock is needed before checking quiesced flag */ @@ -295,11 +302,14 @@ EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge); * too much time checking for merges. */ static bool blk_mq_attempt_merge(struct request_queue *q, + struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct bio *bio) { + enum hctx_type type = hctx->type; + lockdep_assert_held(&ctx->lock); - if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) { + if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio)) { ctx->rq_merged++; return true; } @@ -311,19 +321,21 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) { struct elevator_queue *e = q->elevator; struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx->cpu); bool ret = false; + enum hctx_type type; - if (e && e->type->ops.mq.bio_merge) { + if (e && e->type->ops.bio_merge) { blk_mq_put_ctx(ctx); - return e->type->ops.mq.bio_merge(hctx, bio); + return e->type->ops.bio_merge(hctx, bio); } + type = hctx->type; if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) && - !list_empty_careful(&ctx->rq_list)) { + !list_empty_careful(&ctx->rq_lists[type])) { /* default per sw-queue merge */ spin_lock(&ctx->lock); - ret = blk_mq_attempt_merge(q, ctx, bio); + ret = blk_mq_attempt_merge(q, hctx, ctx, bio); spin_unlock(&ctx->lock); } @@ -367,7 +379,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; struct blk_mq_ctx *ctx = rq->mq_ctx; - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; /* flush rq in flush machinery need to be dispatched directly */ if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) { @@ -380,11 +392,11 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) goto run; - if (e && e->type->ops.mq.insert_requests) { + if (e && e->type->ops.insert_requests) { LIST_HEAD(list); list_add(&rq->queuelist, &list); - e->type->ops.mq.insert_requests(hctx, &list, at_head); + e->type->ops.insert_requests(hctx, &list, at_head); } else { spin_lock(&ctx->lock); __blk_mq_insert_request(hctx, rq, at_head); @@ -396,27 +408,25 @@ run: blk_mq_run_hw_queue(hctx, async); } -void blk_mq_sched_insert_requests(struct request_queue *q, +void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list, bool run_queue_async) { - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); - struct elevator_queue *e = hctx->queue->elevator; + struct elevator_queue *e; - if (e && e->type->ops.mq.insert_requests) - e->type->ops.mq.insert_requests(hctx, list, false); + e = hctx->queue->elevator; + if (e && e->type->ops.insert_requests) + e->type->ops.insert_requests(hctx, list, false); else { /* * try to issue requests directly if the hw queue isn't * busy in case of 'none' scheduler, and this way may save * us one extra enqueue & dequeue to sw queue. */ - if (!hctx->dispatch_busy && !e && !run_queue_async) { + if (!hctx->dispatch_busy && !e && !run_queue_async) blk_mq_try_issue_list_directly(hctx, list); - if (list_empty(list)) - return; - } - blk_mq_insert_requests(hctx, ctx, list); + else + blk_mq_insert_requests(hctx, ctx, list); } blk_mq_run_hw_queue(hctx, run_queue_async); @@ -489,15 +499,15 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) goto err; } - ret = e->ops.mq.init_sched(q, e); + ret = e->ops.init_sched(q, e); if (ret) goto err; blk_mq_debugfs_register_sched(q); queue_for_each_hw_ctx(q, hctx, i) { - if (e->ops.mq.init_hctx) { - ret = e->ops.mq.init_hctx(hctx, i); + if (e->ops.init_hctx) { + ret = e->ops.init_hctx(hctx, i); if (ret) { eq = q->elevator; blk_mq_exit_sched(q, eq); @@ -523,14 +533,14 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) queue_for_each_hw_ctx(q, hctx, i) { blk_mq_debugfs_unregister_sched_hctx(hctx); - if (e->type->ops.mq.exit_hctx && hctx->sched_data) { - e->type->ops.mq.exit_hctx(hctx, i); + if (e->type->ops.exit_hctx && hctx->sched_data) { + e->type->ops.exit_hctx(hctx, i); hctx->sched_data = NULL; } } blk_mq_debugfs_unregister_sched(q); - if (e->type->ops.mq.exit_sched) - e->type->ops.mq.exit_sched(e); + if (e->type->ops.exit_sched) + e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q); q->elevator = NULL; } |