From 506a2001b71964d3a75b62d354cd66ce8ce9781b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 9 Jun 2021 09:58:21 +0800 Subject: block: fix race between adding/removing rq qos and normal IO [ Upstream commit 2cafe29a8d03f02a3d16193bdaae2f3e82a423f9 ] Yi reported several kernel panics on: [16687.001777] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 ... [16687.163549] pc : __rq_qos_track+0x38/0x60 or [ 997.690455] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 ... [ 997.850347] pc : __rq_qos_done+0x2c/0x50 Turns out it is caused by race between adding rq qos(wbt) and normal IO because rq_qos_add can be run when IO is being submitted, fix this issue by freezing queue before adding/deleting rq qos to queue. rq_qos_exit() needn't to freeze queue because it is called after queue has been frozen. iolatency calls rq_qos_add() during allocating queue, so freezing won't add delay because queue usage refcount works at atomic mode at that time. iocost calls rq_qos_add() when writing cgroup attribute file, that is fine to freeze queue at that time since we usually freeze queue when storing to queue sysfs attribute, meantime iocost only exists on the root cgroup. wbt_init calls it in blk_register_queue() and queue sysfs attribute store(queue_wb_lat_store() when write it 1st time in case of !BLK_WBT_MQ), the following patch will speedup the queue freezing in wbt_init. Reported-by: Yi Zhang Cc: Bart Van Assche Signed-off-by: Ming Lei Reviewed-by: Bart Van Assche Tested-by: Yi Zhang Link: https://lore.kernel.org/r/20210609015822.103433-2-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-rq-qos.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'block') diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 2bc43e94f4c4..2bcb3495e376 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "blk-mq-debugfs.h" @@ -99,8 +100,21 @@ static inline void rq_wait_init(struct rq_wait *rq_wait) static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) { + /* + * No IO can be in-flight when adding rqos, so freeze queue, which + * is fine since we only support rq_qos for blk-mq queue. + * + * Reuse ->queue_lock for protecting against other concurrent + * rq_qos adding/deleting + */ + blk_mq_freeze_queue(q); + + spin_lock_irq(&q->queue_lock); rqos->next = q->rq_qos; q->rq_qos = rqos; + spin_unlock_irq(&q->queue_lock); + + blk_mq_unfreeze_queue(q); if (rqos->ops->debugfs_attrs) blk_mq_debugfs_register_rqos(rqos); @@ -110,12 +124,22 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) { struct rq_qos **cur; + /* + * See comment in rq_qos_add() about freezing queue & using + * ->queue_lock. + */ + blk_mq_freeze_queue(q); + + spin_lock_irq(&q->queue_lock); for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; break; } } + spin_unlock_irq(&q->queue_lock); + + blk_mq_unfreeze_queue(q); blk_mq_debugfs_unregister_rqos(rqos); } -- cgit v1.2.3 From 05bc31902755f9a916f060257eb760d39cfd6ba7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 28 Jun 2021 10:33:12 +0800 Subject: block: fix discard request merge [ Upstream commit 2705dfb2094777e405e065105e307074af8965c1 ] ll_new_hw_segment() is reached only in case of single range discard merge, and we don't have max discard segment size limit actually, so it is wrong to run the following check: if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req)) it may be always false since req->nr_phys_segments is initialized as one, and bio's segment count is still 1, blk_rq_get_max_segments(reg) is 1 too. Fix the issue by not doing the check and bypassing the calculation of discard request's nr_phys_segments. Based on analysis from Wang Shanker. Cc: Christoph Hellwig Reported-by: Wang Shanker Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20210628023312.1903255-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-merge.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-merge.c b/block/blk-merge.c index 03959bfe961c..4b022f0c49d2 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -571,10 +571,14 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq) static inline int ll_new_hw_segment(struct request *req, struct bio *bio, unsigned int nr_phys_segs) { - if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req)) + if (blk_integrity_merge_bio(req->q, req, bio) == false) goto no_merge; - if (blk_integrity_merge_bio(req->q, req, bio) == false) + /* discard request merge won't add new segment */ + if (req_op(req) == REQ_OP_DISCARD) + return 1; + + if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req)) goto no_merge; /* -- cgit v1.2.3 From bcf3f42bda3a49021080e6c848e7ffc7b3364aa2 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 19 Jun 2021 17:36:59 +0800 Subject: blk-wbt: introduce a new disable state to prevent false positive by rwb_enabled() [ Upstream commit 1d0903d61e9645c6330b94247b96dd873dfc11c8 ] Now that we disable wbt by simply zero out rwb->wb_normal in wbt_disable_default() when switch elevator to bfq, but it's not safe because it will become false positive if we change queue depth. If it become false positive between wbt_wait() and wbt_track() when submit write request, it will lead to drop rqw->inflight to -1 in wbt_done(), which will end up trigger IO hung. Fix this issue by introduce a new state which mean the wbt was disabled. Fixes: a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt") Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20210619093700.920393-2-yi.zhang@huawei.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-wbt.c | 5 +++-- block/blk-wbt.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 8641ba9793c5..7c7dd5c14391 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -77,7 +77,8 @@ enum { static inline bool rwb_enabled(struct rq_wb *rwb) { - return rwb && rwb->wb_normal != 0; + return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT && + rwb->wb_normal != 0; } static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) @@ -710,7 +711,7 @@ void wbt_disable_default(struct request_queue *q) rwb = RQWB(rqos); if (rwb->enable_state == WBT_STATE_ON_DEFAULT) { blk_stat_deactivate(rwb->cb); - rwb->wb_normal = 0; + rwb->enable_state = WBT_STATE_OFF_DEFAULT; } } EXPORT_SYMBOL_GPL(wbt_disable_default); diff --git a/block/blk-wbt.h b/block/blk-wbt.h index 8e4e37660971..d8d9f41b42f9 100644 --- a/block/blk-wbt.h +++ b/block/blk-wbt.h @@ -34,6 +34,7 @@ enum { enum { WBT_STATE_ON_DEFAULT = 1, WBT_STATE_ON_MANUAL = 2, + WBT_STATE_OFF_DEFAULT }; struct rq_wb { -- cgit v1.2.3 From 7b33b04f85dfb868328609efc71c55a21a5a3cab Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 19 Jun 2021 17:37:00 +0800 Subject: blk-wbt: make sure throttle is enabled properly [ Upstream commit 76a8040817b4b9c69b53f9b326987fa891b4082a ] After commit a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt"), if throttle was disabled by wbt_disable_default(), we could not enable again, fix this by set enable_state back to WBT_STATE_ON_DEFAULT. Fixes: a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt") Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20210619093700.920393-3-yi.zhang@huawei.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-wbt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 7c7dd5c14391..ee708c1bc352 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -645,9 +645,13 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on) void wbt_enable_default(struct request_queue *q) { struct rq_qos *rqos = wbt_rq_qos(q); + /* Throttling already enabled? */ - if (rqos) + if (rqos) { + if (RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) + RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT; return; + } /* Queue not registered? Maybe shutting down... */ if (!blk_queue_registered(q)) -- cgit v1.2.3