summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu Kuai <yukuai@fnnas.com>2026-02-03 16:19:47 +0800
committerJens Axboe <axboe@kernel.dk>2026-02-03 07:45:36 -0700
commit988bb1b9ededab9aed83df8c1f5be0232b71ded3 (patch)
tree1e00498044a0a110a4ce41e465533086223affb4
parent8cbe62f4d8c37b74947569c7b874848f39f09a22 (diff)
mq-deadline: covert to use request_queue->async_depth
In downstream kernel, we test with mq-deadline with many fio workloads, and we found a performance regression after commit 39823b47bbd4 ("block/mq-deadline: Fix the tag reservation code") with following test: [global] rw=randread direct=1 ramp_time=1 ioengine=libaio iodepth=1024 numjobs=24 bs=1024k group_reporting=1 runtime=60 [job1] filename=/dev/sda Root cause is that mq-deadline now support configuring async_depth, although the default value is nr_request, however the minimal value is 1, hence min_shallow_depth is set to 1, causing wake_batch to be 1. For consequence, sbitmap_queue will be waken up after each IO instead of 8 IO. In this test case, sda is HDD and max_sectors is 128k, hence each submitted 1M io will be splited into 8 sequential 128k requests, however due to there are 24 jobs and total tags are exhausted, the 8 requests are unlikely to be dispatched sequentially, and changing wake_batch to 1 will make this much worse, accounting blktrace D stage, the percentage of sequential io is decreased from 8% to 0.8%. Fix this problem by converting to request_queue->async_depth, where min_shallow_depth is set each time async_depth is updated. Noted elevator attribute async_depth is now removed, queue attribute with the same name is used instead. Fixes: 39823b47bbd4 ("block/mq-deadline: Fix the tag reservation code") Signed-off-by: Yu Kuai <yukuai@fnnas.com> Reviewed-by: Nilay Shroff <nilay@linux.ibm.com> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/mq-deadline.c39
1 files changed, 5 insertions, 34 deletions
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 29d00221fbea..95917a88976f 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -98,7 +98,6 @@ struct deadline_data {
int fifo_batch;
int writes_starved;
int front_merges;
- u32 async_depth;
int prio_aging_expire;
spinlock_t lock;
@@ -486,32 +485,16 @@ unlock:
return rq;
}
-/*
- * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
- * function is used by __blk_mq_get_tag().
- */
static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
{
- struct deadline_data *dd = data->q->elevator->elevator_data;
-
- /* Do not throttle synchronous reads. */
- if (blk_mq_is_sync_read(opf))
- return;
-
- /*
- * Throttle asynchronous requests and writes such that these requests
- * do not block the allocation of synchronous requests.
- */
- data->shallow_depth = dd->async_depth;
+ if (!blk_mq_is_sync_read(opf))
+ data->shallow_depth = data->q->async_depth;
}
-/* Called by blk_mq_update_nr_requests(). */
+/* Called by blk_mq_init_sched() and blk_mq_update_nr_requests(). */
static void dd_depth_updated(struct request_queue *q)
{
- struct deadline_data *dd = q->elevator->elevator_data;
-
- dd->async_depth = q->nr_requests;
- blk_mq_set_min_shallow_depth(q, 1);
+ blk_mq_set_min_shallow_depth(q, q->async_depth);
}
static void dd_exit_sched(struct elevator_queue *e)
@@ -576,6 +559,7 @@ static int dd_init_sched(struct request_queue *q, struct elevator_queue *eq)
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
q->elevator = eq;
+ q->async_depth = q->nr_requests;
dd_depth_updated(q);
return 0;
}
@@ -763,7 +747,6 @@ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
SHOW_INT(deadline_front_merges_show, dd->front_merges);
-SHOW_INT(deadline_async_depth_show, dd->async_depth);
SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
#undef SHOW_INT
#undef SHOW_JIFFIES
@@ -793,7 +776,6 @@ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MA
STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
-STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
#undef STORE_FUNCTION
#undef STORE_INT
@@ -807,7 +789,6 @@ static const struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(write_expire),
DD_ATTR(writes_starved),
DD_ATTR(front_merges),
- DD_ATTR(async_depth),
DD_ATTR(fifo_batch),
DD_ATTR(prio_aging_expire),
__ATTR_NULL
@@ -894,15 +875,6 @@ static int deadline_starved_show(void *data, struct seq_file *m)
return 0;
}
-static int dd_async_depth_show(void *data, struct seq_file *m)
-{
- struct request_queue *q = data;
- struct deadline_data *dd = q->elevator->elevator_data;
-
- seq_printf(m, "%u\n", dd->async_depth);
- return 0;
-}
-
static int dd_queued_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;
@@ -1002,7 +974,6 @@ static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
DEADLINE_NEXT_RQ_ATTR(write2),
{"batching", 0400, deadline_batching_show},
{"starved", 0400, deadline_starved_show},
- {"async_depth", 0400, dd_async_depth_show},
{"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops},
{"owned_by_driver", 0400, dd_owned_by_driver_show},
{"queued", 0400, dd_queued_show},