summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorJason Liu <jason.hui.liu@nxp.com>2022-06-29 12:58:02 -0500
committerJason Liu <jason.hui.liu@nxp.com>2022-06-29 12:58:02 -0500
commit7928826df2e302fb0b9756e1b256ea269059a3a9 (patch)
treebca8b8524415a880b602bf9e7c8a8c62438c99e0 /block
parenteba369f0f66db8e57d52d788f455ebf80b52efa1 (diff)
parent18a33c8dabb88b50b860e0177a73933f2c0ddf68 (diff)
Merge tag 'v5.15.50' into lf-5.15.y
This is the 5.15.50 stable release * tag 'v5.15.50': (1395 commits) Linux 5.15.50 arm64: mm: Don't invalidate FROM_DEVICE buffers at start of DMA transfer serial: core: Initialize rs485 RTS polarity already on probe ... Signed-off-by: Jason Liu <jason.hui.liu@nxp.com> Conflicts: drivers/bus/fsl-mc/fsl-mc-bus.c drivers/crypto/caam/ctrl.c drivers/pci/controller/dwc/pci-imx6.c drivers/spi/spi-fsl-qspi.c drivers/tty/serial/fsl_lpuart.c include/uapi/linux/dma-buf.h
Diffstat (limited to 'block')
-rw-r--r--block/bfq-cgroup.c111
-rw-r--r--block/bfq-iosched.c64
-rw-r--r--block/bfq-iosched.h7
-rw-r--r--block/bio.c9
-rw-r--r--block/blk-cgroup.c8
-rw-r--r--block/blk-iolatency.c122
-rw-r--r--block/blk-mq.c9
7 files changed, 184 insertions, 146 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index e37af3f8a733..e2e765a54fe9 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -555,6 +555,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
*/
bfqg->bfqd = bfqd;
bfqg->active_entities = 0;
+ bfqg->online = true;
bfqg->rq_pos_tree = RB_ROOT;
}
@@ -583,28 +584,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg,
entity->sched_data = &parent->sched_data;
}
-static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
- struct blkcg *blkcg)
+static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
{
- struct blkcg_gq *blkg;
-
- blkg = blkg_lookup(blkcg, bfqd->queue);
- if (likely(blkg))
- return blkg_to_bfqg(blkg);
- return NULL;
-}
-
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
- struct blkcg *blkcg)
-{
- struct bfq_group *bfqg, *parent;
+ struct bfq_group *parent;
struct bfq_entity *entity;
- bfqg = bfq_lookup_bfqg(bfqd, blkcg);
-
- if (unlikely(!bfqg))
- return NULL;
-
/*
* Update chain of bfq_groups as we might be handling a leaf group
* which, along with some of its relatives, has not been hooked yet
@@ -621,8 +605,24 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
bfq_group_set_parent(curr_bfqg, parent);
}
}
+}
- return bfqg;
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
+{
+ struct blkcg_gq *blkg = bio->bi_blkg;
+ struct bfq_group *bfqg;
+
+ while (blkg) {
+ bfqg = blkg_to_bfqg(blkg);
+ if (bfqg->online) {
+ bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
+ return bfqg;
+ }
+ blkg = blkg->parent;
+ }
+ bio_associate_blkg_from_css(bio,
+ &bfqg_to_blkg(bfqd->root_group)->blkcg->css);
+ return bfqd->root_group;
}
/**
@@ -704,25 +704,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* Move bic to blkcg, assuming that bfqd->lock is held; which makes
* sure that the reference to cgroup is valid across the call (see
* comments in bfq_bic_update_cgroup on this issue)
- *
- * NOTE: an alternative approach might have been to store the current
- * cgroup in bfqq and getting a reference to it, reducing the lookup
- * time here, at the price of slightly more complex code.
*/
-static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
- struct bfq_io_cq *bic,
- struct blkcg *blkcg)
+static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+ struct bfq_io_cq *bic,
+ struct bfq_group *bfqg)
{
struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
- struct bfq_group *bfqg;
struct bfq_entity *entity;
- bfqg = bfq_find_set_group(bfqd, blkcg);
-
- if (unlikely(!bfqg))
- bfqg = bfqd->root_group;
-
if (async_bfqq) {
entity = &async_bfqq->entity;
@@ -733,9 +723,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
}
if (sync_bfqq) {
- entity = &sync_bfqq->entity;
- if (entity->sched_data != &bfqg->sched_data)
- bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
+ /* We are the only user of this bfqq, just move it */
+ if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
+ bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ } else {
+ struct bfq_queue *bfqq;
+
+ /*
+ * The queue was merged to a different queue. Check
+ * that the merge chain still belongs to the same
+ * cgroup.
+ */
+ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
+ if (bfqq->entity.sched_data !=
+ &bfqg->sched_data)
+ break;
+ if (bfqq) {
+ /*
+ * Some queue changed cgroup so the merge is
+ * not valid anymore. We cannot easily just
+ * cancel the merge (by clearing new_bfqq) as
+ * there may be other processes using this
+ * queue and holding refs to all queues below
+ * sync_bfqq->new_bfqq. Similarly if the merge
+ * already happened, we need to detach from
+ * bfqq now so that we cannot merge bio to a
+ * request from the old cgroup.
+ */
+ bfq_put_cooperator(sync_bfqq);
+ bfq_release_process_ref(bfqd, sync_bfqq);
+ bic_set_bfqq(bic, NULL, 1);
+ }
+ }
}
return bfqg;
@@ -744,20 +764,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
{
struct bfq_data *bfqd = bic_to_bfqd(bic);
- struct bfq_group *bfqg = NULL;
+ struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
uint64_t serial_nr;
- rcu_read_lock();
- serial_nr = __bio_blkcg(bio)->css.serial_nr;
+ serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;
/*
* Check whether blkcg has changed. The condition may trigger
* spuriously on a newly created cic but there's no harm.
*/
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
- goto out;
+ return;
- bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
+ /*
+ * New cgroup for this process. Make sure it is linked to bfq internal
+ * cgroup hierarchy.
+ */
+ bfq_link_bfqg(bfqd, bfqg);
+ __bfq_bic_change_cgroup(bfqd, bic, bfqg);
/*
* Update blkg_path for bfq_log_* functions. We cache this
* path, and update it here, for the following
@@ -810,8 +834,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
*/
blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
bic->blkcg_serial_nr = serial_nr;
-out:
- rcu_read_unlock();
}
/**
@@ -939,6 +961,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
put_async_queues:
bfq_put_async_queues(bfqd, bfqg);
+ bfqg->online = false;
spin_unlock_irqrestore(&bfqd->lock, flags);
/*
@@ -1428,7 +1451,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
}
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
{
return bfqd->root_group;
}
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 63d2d66dece5..4b862f18f4b2 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2022,9 +2022,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfqd->last_completed_rq_bfqq ||
bfqd->last_completed_rq_bfqq == bfqq ||
bfq_bfqq_has_short_ttime(bfqq) ||
- bfqq->dispatched > 0 ||
- now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
- bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
+ now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC)
return;
if (bfqd->last_completed_rq_bfqq !=
@@ -2084,7 +2082,7 @@ static void bfq_add_request(struct request *rq)
bfqq->queued[rq_is_sync(rq)]++;
bfqd->queued++;
- if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
+ if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) {
bfq_check_waker(bfqd, bfqq, now_ns);
/*
@@ -2337,10 +2335,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
spin_lock_irq(&bfqd->lock);
- if (bic)
+ if (bic) {
+ /*
+ * Make sure cgroup info is uptodate for current process before
+ * considering the merge.
+ */
+ bfq_bic_update_cgroup(bic, bio);
+
bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
- else
+ } else {
bfqd->bio_bfqq = NULL;
+ }
bfqd->bio_bic = bic;
ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
@@ -2370,8 +2375,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
return ELEVATOR_NO_MERGE;
}
-static struct bfq_queue *bfq_init_rq(struct request *rq);
-
static void bfq_request_merged(struct request_queue *q, struct request *req,
enum elv_merge type)
{
@@ -2380,7 +2383,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
blk_rq_pos(req) <
blk_rq_pos(container_of(rb_prev(&req->rb_node),
struct request, rb_node))) {
- struct bfq_queue *bfqq = bfq_init_rq(req);
+ struct bfq_queue *bfqq = RQ_BFQQ(req);
struct bfq_data *bfqd;
struct request *prev, *next_rq;
@@ -2432,8 +2435,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
static void bfq_requests_merged(struct request_queue *q, struct request *rq,
struct request *next)
{
- struct bfq_queue *bfqq = bfq_init_rq(rq),
- *next_bfqq = bfq_init_rq(next);
+ struct bfq_queue *bfqq = RQ_BFQQ(rq),
+ *next_bfqq = RQ_BFQQ(next);
if (!bfqq)
goto remove;
@@ -2638,6 +2641,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
if (process_refs == 0 || new_process_refs == 0)
return NULL;
+ /*
+ * Make sure merged queues belong to the same parent. Parents could
+ * have changed since the time we decided the two queues are suitable
+ * for merging.
+ */
+ if (new_bfqq->entity.parent != bfqq->entity.parent)
+ return NULL;
+
bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
new_bfqq->pid);
@@ -2775,9 +2786,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
struct bfq_queue *new_bfqq =
bfq_setup_merge(bfqq, stable_merge_bfqq);
- bic->stably_merged = true;
- if (new_bfqq && new_bfqq->bic)
- new_bfqq->bic->stably_merged = true;
+ if (new_bfqq) {
+ bic->stably_merged = true;
+ if (new_bfqq->bic)
+ new_bfqq->bic->stably_merged =
+ true;
+ }
return new_bfqq;
} else
return NULL;
@@ -5184,7 +5198,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void bfq_put_cooperator(struct bfq_queue *bfqq)
+void bfq_put_cooperator(struct bfq_queue *bfqq)
{
struct bfq_queue *__bfqq, *next;
@@ -5590,14 +5604,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
struct bfq_queue *bfqq;
struct bfq_group *bfqg;
- rcu_read_lock();
-
- bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
- if (!bfqg) {
- bfqq = &bfqd->oom_bfqq;
- goto out;
- }
-
+ bfqg = bfq_bio_bfqg(bfqd, bio);
if (!is_sync) {
async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
ioprio);
@@ -5643,8 +5650,6 @@ out:
if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn)
bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic);
-
- rcu_read_unlock();
return bfqq;
}
@@ -5975,6 +5980,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
unsigned int cmd_flags) {}
#endif /* CONFIG_BFQ_CGROUP_DEBUG */
+static struct bfq_queue *bfq_init_rq(struct request *rq);
+
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head)
{
@@ -5990,18 +5997,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bfqg_stats_update_legacy_io(q, rq);
#endif
spin_lock_irq(&bfqd->lock);
+ bfqq = bfq_init_rq(rq);
if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
spin_unlock_irq(&bfqd->lock);
blk_mq_free_requests(&free);
return;
}
- spin_unlock_irq(&bfqd->lock);
-
trace_block_rq_insert(rq);
- spin_lock_irq(&bfqd->lock);
- bfqq = bfq_init_rq(rq);
if (!bfqq || at_head) {
if (at_head)
list_add(&rq->queuelist, &bfqd->dispatch);
@@ -6422,6 +6426,7 @@ static void bfq_finish_requeue_request(struct request *rq)
bfq_completed_request(bfqq, bfqd);
}
bfq_finish_requeue_request_body(bfqq);
+ RQ_BIC(rq)->requests--;
spin_unlock_irqrestore(&bfqd->lock, flags);
/*
@@ -6643,6 +6648,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
bfqq->allocated++;
bfqq->ref++;
+ bic->requests++;
bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
rq, bfqq, bfqq->ref);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index a73488eec8a4..2bd696aaf02c 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -466,6 +466,7 @@ struct bfq_io_cq {
struct bfq_queue *stable_merge_bfqq;
bool stably_merged; /* non splittable if true */
+ unsigned int requests; /* Number of requests this process has in flight */
};
/**
@@ -925,6 +926,8 @@ struct bfq_group {
/* reference counter (see comments in bfq_bic_update_cgroup) */
int ref;
+ /* Is bfq_group still online? */
+ bool online;
struct bfq_entity entity;
struct bfq_sched_data sched_data;
@@ -976,6 +979,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool compensate, enum bfqq_expiration reason);
void bfq_put_queue(struct bfq_queue *bfqq);
+void bfq_put_cooperator(struct bfq_queue *bfqq);
void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_schedule_dispatch(struct bfq_data *bfqd);
@@ -1003,8 +1007,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
void bfq_end_wr_async(struct bfq_data *bfqd);
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
- struct blkcg *blkcg);
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio);
struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
diff --git a/block/bio.c b/block/bio.c
index 8906c9856a7d..8381c6690dd6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -665,6 +665,7 @@ static void bio_alloc_cache_destroy(struct bio_set *bs)
bio_alloc_cache_prune(cache, -1U);
}
free_percpu(bs->cache);
+ bs->cache = NULL;
}
/**
@@ -1289,10 +1290,12 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
- void *src_buf;
+ void *src_buf = bvec_kmap_local(&src_bv);
+ void *dst_buf = bvec_kmap_local(&dst_bv);
- src_buf = bvec_kmap_local(&src_bv);
- memcpy_to_bvec(&dst_bv, src_buf);
+ memcpy(dst_buf, src_buf, bytes);
+
+ kunmap_local(dst_buf);
kunmap_local(src_buf);
bio_advance_iter_single(src, src_iter, bytes);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 07a2524e6efd..ce5858dadca5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1886,12 +1886,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg);
*/
void bio_clone_blkg_association(struct bio *dst, struct bio *src)
{
- if (src->bi_blkg) {
- if (dst->bi_blkg)
- blkg_put(dst->bi_blkg);
- blkg_get(src->bi_blkg);
- dst->bi_blkg = src->bi_blkg;
- }
+ if (src->bi_blkg)
+ bio_associate_blkg_from_css(dst, &bio_blkcg(src)->css);
}
EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index c0545f9da549..ce3847499d85 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -86,7 +86,17 @@ struct iolatency_grp;
struct blk_iolatency {
struct rq_qos rqos;
struct timer_list timer;
- atomic_t enabled;
+
+ /*
+ * ->enabled is the master enable switch gating the throttling logic and
+ * inflight tracking. The number of cgroups which have iolat enabled is
+ * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly
+ * from ->enable_work with the request_queue frozen. For details, See
+ * blkiolatency_enable_work_fn().
+ */
+ bool enabled;
+ atomic_t enable_cnt;
+ struct work_struct enable_work;
};
static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
@@ -94,11 +104,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
return container_of(rqos, struct blk_iolatency, rqos);
}
-static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
-{
- return atomic_read(&blkiolat->enabled) > 0;
-}
-
struct child_latency_info {
spinlock_t lock;
@@ -463,7 +468,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
struct blkcg_gq *blkg = bio->bi_blkg;
bool issue_as_root = bio_issue_as_root_blkg(bio);
- if (!blk_iolatency_enabled(blkiolat))
+ if (!blkiolat->enabled)
return;
while (blkg && blkg->parent) {
@@ -593,7 +598,6 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
u64 window_start;
u64 now;
bool issue_as_root = bio_issue_as_root_blkg(bio);
- bool enabled = false;
int inflight = 0;
blkg = bio->bi_blkg;
@@ -604,8 +608,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
if (!iolat)
return;
- enabled = blk_iolatency_enabled(iolat->blkiolat);
- if (!enabled)
+ if (!iolat->blkiolat->enabled)
return;
now = ktime_to_ns(ktime_get());
@@ -644,6 +647,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
del_timer_sync(&blkiolat->timer);
+ flush_work(&blkiolat->enable_work);
blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
kfree(blkiolat);
}
@@ -715,6 +719,44 @@ next:
rcu_read_unlock();
}
+/**
+ * blkiolatency_enable_work_fn - Enable or disable iolatency on the device
+ * @work: enable_work of the blk_iolatency of interest
+ *
+ * iolatency needs to keep track of the number of in-flight IOs per cgroup. This
+ * is relatively expensive as it involves walking up the hierarchy twice for
+ * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we
+ * want to disable the in-flight tracking.
+ *
+ * We have to make sure that the counting is balanced - we don't want to leak
+ * the in-flight counts by disabling accounting in the completion path while IOs
+ * are in flight. This is achieved by ensuring that no IO is in flight by
+ * freezing the queue while flipping ->enabled. As this requires a sleepable
+ * context, ->enabled flipping is punted to this work function.
+ */
+static void blkiolatency_enable_work_fn(struct work_struct *work)
+{
+ struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency,
+ enable_work);
+ bool enabled;
+
+ /*
+ * There can only be one instance of this function running for @blkiolat
+ * and it's guaranteed to be executed at least once after the latest
+ * ->enabled_cnt modification. Acting on the latest ->enable_cnt is
+ * sufficient.
+ *
+ * Also, we know @blkiolat is safe to access as ->enable_work is flushed
+ * in blkcg_iolatency_exit().
+ */
+ enabled = atomic_read(&blkiolat->enable_cnt);
+ if (enabled != blkiolat->enabled) {
+ blk_mq_freeze_queue(blkiolat->rqos.q);
+ blkiolat->enabled = enabled;
+ blk_mq_unfreeze_queue(blkiolat->rqos.q);
+ }
+}
+
int blk_iolatency_init(struct request_queue *q)
{
struct blk_iolatency *blkiolat;
@@ -740,17 +782,15 @@ int blk_iolatency_init(struct request_queue *q)
}
timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
+ INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
return 0;
}
-/*
- * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
- * return 0.
- */
-static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
{
struct iolatency_grp *iolat = blkg_to_lat(blkg);
+ struct blk_iolatency *blkiolat = iolat->blkiolat;
u64 oldval = iolat->min_lat_nsec;
iolat->min_lat_nsec = val;
@@ -758,13 +798,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
BLKIOLATENCY_MAX_WIN_SIZE);
- if (!oldval && val)
- return 1;
+ if (!oldval && val) {
+ if (atomic_inc_return(&blkiolat->enable_cnt) == 1)
+ schedule_work(&blkiolat->enable_work);
+ }
if (oldval && !val) {
blkcg_clear_delay(blkg);
- return -1;
+ if (atomic_dec_return(&blkiolat->enable_cnt) == 0)
+ schedule_work(&blkiolat->enable_work);
}
- return 0;
}
static void iolatency_clear_scaling(struct blkcg_gq *blkg)
@@ -796,7 +838,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
u64 lat_val = 0;
u64 oldval;
int ret;
- int enable = 0;
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
if (ret)
@@ -831,41 +872,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
blkg = ctx.blkg;
oldval = iolat->min_lat_nsec;
- enable = iolatency_set_min_lat_nsec(blkg, lat_val);
- if (enable) {
- if (!blk_get_queue(blkg->q)) {
- ret = -ENODEV;
- goto out;
- }
-
- blkg_get(blkg);
- }
-
- if (oldval != iolat->min_lat_nsec) {
+ iolatency_set_min_lat_nsec(blkg, lat_val);
+ if (oldval != iolat->min_lat_nsec)
iolatency_clear_scaling(blkg);
- }
-
ret = 0;
out:
blkg_conf_finish(&ctx);
- if (ret == 0 && enable) {
- struct iolatency_grp *tmp = blkg_to_lat(blkg);
- struct blk_iolatency *blkiolat = tmp->blkiolat;
-
- blk_mq_freeze_queue(blkg->q);
-
- if (enable == 1)
- atomic_inc(&blkiolat->enabled);
- else if (enable == -1)
- atomic_dec(&blkiolat->enabled);
- else
- WARN_ON_ONCE(1);
-
- blk_mq_unfreeze_queue(blkg->q);
-
- blkg_put(blkg);
- blk_put_queue(blkg->q);
- }
return ret ?: nbytes;
}
@@ -1006,14 +1018,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
struct blkcg_gq *blkg = lat_to_blkg(iolat);
- struct blk_iolatency *blkiolat = iolat->blkiolat;
- int ret;
- ret = iolatency_set_min_lat_nsec(blkg, 0);
- if (ret == 1)
- atomic_inc(&blkiolat->enabled);
- if (ret == -1)
- atomic_dec(&blkiolat->enabled);
+ iolatency_set_min_lat_nsec(blkg, 0);
iolatency_clear_scaling(blkg);
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 82de39926a9f..95993c4efa49 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -476,6 +476,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
if (!blk_mq_hw_queue_mapped(data.hctx))
goto out_queue_exit;
cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ goto out_queue_exit;
data.ctx = __blk_mq_get_ctx(q, cpu);
if (!q->elevator)
@@ -1642,8 +1644,7 @@ static bool blk_mq_has_sqsched(struct request_queue *q)
*/
static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
{
- struct blk_mq_hw_ctx *hctx;
-
+ struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
/*
* If the IO scheduler does not respect hardware queues when
* dispatching, we just don't bother with multiple HW queues and
@@ -1651,8 +1652,8 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
* just causes lock contention inside the scheduler and pointless cache
* bouncing.
*/
- hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT,
- raw_smp_processor_id());
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx);
+
if (!blk_mq_hctx_stopped(hctx))
return hctx;
return NULL;