summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/blk-mq-sched.c65
-rw-r--r--block/blk-mq-sched.h4
-rw-r--r--block/blk-mq.c16
-rw-r--r--block/blk.h4
-rw-r--r--block/elevator.c15
5 files changed, 89 insertions, 15 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 2d6d1ebdd8fb..e2ce4a28e6c9 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -427,6 +427,32 @@ void blk_mq_free_sched_tags(struct elevator_tags *et,
kfree(et);
}
+void blk_mq_free_sched_tags_batch(struct xarray *et_table,
+ struct blk_mq_tag_set *set)
+{
+ struct request_queue *q;
+ struct elevator_tags *et;
+
+ lockdep_assert_held_write(&set->update_nr_hwq_lock);
+
+ list_for_each_entry(q, &set->tag_list, tag_set_list) {
+ /*
+ * Accessing q->elevator without holding q->elevator_lock is
+ * safe because we're holding here set->update_nr_hwq_lock in
+ * the writer context. So, scheduler update/switch code (which
+ * acquires the same lock but in the reader context) can't run
+ * concurrently.
+ */
+ if (q->elevator) {
+ et = xa_load(et_table, q->id);
+ if (unlikely(!et))
+ WARN_ON_ONCE(1);
+ else
+ blk_mq_free_sched_tags(et, set);
+ }
+ }
+}
+
struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set,
unsigned int nr_hw_queues)
{
@@ -477,6 +503,45 @@ out:
return NULL;
}
+int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
+ struct blk_mq_tag_set *set, unsigned int nr_hw_queues)
+{
+ struct request_queue *q;
+ struct elevator_tags *et;
+ gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
+
+ lockdep_assert_held_write(&set->update_nr_hwq_lock);
+
+ list_for_each_entry(q, &set->tag_list, tag_set_list) {
+ /*
+ * Accessing q->elevator without holding q->elevator_lock is
+ * safe because we're holding here set->update_nr_hwq_lock in
+ * the writer context. So, scheduler update/switch code (which
+ * acquires the same lock but in the reader context) can't run
+ * concurrently.
+ */
+ if (q->elevator) {
+ et = blk_mq_alloc_sched_tags(set, nr_hw_queues);
+ if (!et)
+ goto out_unwind;
+ if (xa_insert(et_table, q->id, et, gfp))
+ goto out_free_tags;
+ }
+ }
+ return 0;
+out_free_tags:
+ blk_mq_free_sched_tags(et, set);
+out_unwind:
+ list_for_each_entry_continue_reverse(q, &set->tag_list, tag_set_list) {
+ if (q->elevator) {
+ et = xa_load(et_table, q->id);
+ if (et)
+ blk_mq_free_sched_tags(et, set);
+ }
+ }
+ return -ENOMEM;
+}
+
/* caller must have a reference to @e, will grab another one if successful */
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e,
struct elevator_tags *et)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 0cde00cd1c47..b554e1d55950 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -25,8 +25,12 @@ void blk_mq_sched_free_rqs(struct request_queue *q);
struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set,
unsigned int nr_hw_queues);
+int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
+ struct blk_mq_tag_set *set, unsigned int nr_hw_queues);
void blk_mq_free_sched_tags(struct elevator_tags *et,
struct blk_mq_tag_set *set);
+void blk_mq_free_sched_tags_batch(struct xarray *et_table,
+ struct blk_mq_tag_set *set);
static inline void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
{
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9692fa4c3ef2..b67d6c02eceb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4974,12 +4974,13 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
* Switch back to the elevator type stored in the xarray.
*/
static void blk_mq_elv_switch_back(struct request_queue *q,
- struct xarray *elv_tbl)
+ struct xarray *elv_tbl, struct xarray *et_tbl)
{
struct elevator_type *e = xa_load(elv_tbl, q->id);
+ struct elevator_tags *t = xa_load(et_tbl, q->id);
/* The elv_update_nr_hw_queues unfreezes the queue. */
- elv_update_nr_hw_queues(q, e);
+ elv_update_nr_hw_queues(q, e, t);
/* Drop the reference acquired in blk_mq_elv_switch_none. */
if (e)
@@ -5031,7 +5032,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
int prev_nr_hw_queues = set->nr_hw_queues;
unsigned int memflags;
int i;
- struct xarray elv_tbl;
+ struct xarray elv_tbl, et_tbl;
lockdep_assert_held(&set->tag_list_lock);
@@ -5044,6 +5045,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
memflags = memalloc_noio_save();
+ xa_init(&et_tbl);
+ if (blk_mq_alloc_sched_tags_batch(&et_tbl, set, nr_hw_queues) < 0)
+ goto out_memalloc_restore;
+
xa_init(&elv_tbl);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
@@ -5087,7 +5092,7 @@ fallback:
switch_back:
/* The blk_mq_elv_switch_back unfreezes queue for us. */
list_for_each_entry(q, &set->tag_list, tag_set_list)
- blk_mq_elv_switch_back(q, &elv_tbl);
+ blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_sysfs_register_hctxs(q);
@@ -5098,7 +5103,8 @@ switch_back:
}
xa_destroy(&elv_tbl);
-
+ xa_destroy(&et_tbl);
+out_memalloc_restore:
memalloc_noio_restore(memflags);
/* Free the excess tags when nr_hw_queues shrink. */
diff --git a/block/blk.h b/block/blk.h
index 76901a39997f..0a2eccf28ca4 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -12,6 +12,7 @@
#include "blk-crypto-internal.h"
struct elevator_type;
+struct elevator_tags;
/*
* Default upper limit for the software max_sectors limit used for regular I/Os.
@@ -330,7 +331,8 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
bool blk_insert_flush(struct request *rq);
-void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e);
+void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
+ struct elevator_tags *t);
void elevator_set_default(struct request_queue *q);
void elevator_set_none(struct request_queue *q);
diff --git a/block/elevator.c b/block/elevator.c
index e9dc837b7b70..fe96c6f4753c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -705,7 +705,8 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
* The I/O scheduler depends on the number of hardware queues, this forces a
* reattachment when nr_hw_queues changes.
*/
-void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e)
+void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
+ struct elevator_tags *t)
{
struct blk_mq_tag_set *set = q->tag_set;
struct elv_change_ctx ctx = {};
@@ -715,25 +716,21 @@ void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e)
if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
ctx.name = e->elevator_name;
- ctx.et = blk_mq_alloc_sched_tags(set, set->nr_hw_queues);
- if (!ctx.et) {
- WARN_ON_ONCE(1);
- goto unfreeze;
- }
+ ctx.et = t;
+
mutex_lock(&q->elevator_lock);
/* force to reattach elevator after nr_hw_queue is updated */
ret = elevator_switch(q, &ctx);
mutex_unlock(&q->elevator_lock);
}
-unfreeze:
blk_mq_unfreeze_queue_nomemrestore(q);
if (!ret)
WARN_ON_ONCE(elevator_change_done(q, &ctx));
/*
* Free sched tags if it's allocated but we couldn't switch elevator.
*/
- if (ctx.et && !ctx.new)
- blk_mq_free_sched_tags(ctx.et, set);
+ if (t && !ctx.new)
+ blk_mq_free_sched_tags(t, set);
}
/*