From 3af3d772f7216cf23081bb4176e86f1219d32ebc Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Sat, 13 Mar 2021 11:01:45 +0800 Subject: block_dump: remove block_dump feature We have already delete block_dump feature in mark_inode_dirty() because it can be replaced by tracepoints, now we also remove the part in submit_bio() for the same reason. The part of block dump feature in submit_bio() dump the write process, write region and sectors on the target disk into kernel message. it can be replaced by block_bio_queue tracepoint in submit_bio_checks(), so we do not need block_dump anymore, remove the whole block_dump feature. Signed-off-by: zhangyi (F) Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210313030146.2882027-3-yi.zhang@huawei.com Signed-off-by: Jens Axboe --- include/linux/writeback.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 8e5c5bb16e2d..9ef50176f3a1 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -360,7 +360,6 @@ extern unsigned int dirty_writeback_interval; extern unsigned int dirty_expire_interval; extern unsigned int dirtytime_expire_interval; extern int vm_highmem_is_dirtyable; -extern int block_dump; extern int laptop_mode; int dirty_background_ratio_handler(struct ctl_table *table, int write, -- cgit v1.2.3 From d97e594c51660bea510a387731637b894651e4b5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 13 May 2021 20:00:58 +0800 Subject: blk-mq: Use request queue-wide tags for tagset-wide sbitmap The tags used for an IO scheduler are currently per hctx. As such, when q->nr_hw_queues grows, so does the request queue total IO scheduler tag depth. This may cause problems for SCSI MQ HBAs whose total driver depth is fixed. Ming and Yanhui report higher CPU usage and lower throughput in scenarios where the fixed total driver tag depth is appreciably lower than the total scheduler tag depth: https://lore.kernel.org/linux-block/440dfcfc-1a2c-bd98-1161-cec4d78c6dfc@huawei.com/T/#mc0d6d4f95275a2743d1c8c3e4dc9ff6c9aa3a76b In that scenario, since the scheduler tag is got first, much contention is introduced since a driver tag may not be available after we have got the sched tag. Improve this scenario by introducing request queue-wide tags for when a tagset-wide sbitmap is used. The static sched requests are still allocated per hctx, as requests are initialised per hctx, as in blk_mq_init_request(..., hctx_idx, ...) -> set->ops->init_request(.., hctx_idx, ...). For simplicity of resizing the request queue sbitmap when updating the request queue depth, just init at the max possible size, so we don't need to deal with the possibly with swapping out a new sbitmap for old if we need to grow. Signed-off-by: John Garry Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/1620907258-30910-3-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f69c75bd6d27..2c28577b50f4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -25,6 +25,7 @@ #include #include #include +#include struct module; struct scsi_ioctl_command; @@ -493,6 +494,9 @@ struct request_queue { atomic_t nr_active_requests_shared_sbitmap; + struct sbitmap_queue sched_bitmap_tags; + struct sbitmap_queue sched_breserved_tags; + struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); -- cgit v1.2.3 From 958229a7c55f219b1cff99f939dabbc1b6ba7161 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 May 2021 07:50:54 +0200 Subject: block: add a flag to make put_disk on partially initalized disks safer Add a flag to indicate that __device_add_disk did grab a queue reference so that disk_release only drops it if we actually had it. This sort out one of the major pitfals with partially initialized gendisk that a lot of drivers did get wrong or still do. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Luis Chamberlain Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20210521055116.1053587-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6fc26f7bdf71..4d3ee8b6b297 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -153,6 +153,7 @@ struct gendisk { unsigned long state; #define GD_NEED_PART_SCAN 0 #define GD_READ_ONLY 1 +#define GD_QUEUE_REF 2 struct kobject *slave_dir; struct timer_rand_state *random; -- cgit v1.2.3 From f525464a8000f092c20b00eead3eaa9d849c599e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 May 2021 07:50:55 +0200 Subject: block: add blk_alloc_disk and blk_cleanup_disk APIs Add two new APIs to allocate and free a gendisk including the request_queue for use with BIO based drivers. This is to avoid boilerplate code in drivers. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20210521055116.1053587-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 4d3ee8b6b297..782f0171d104 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -278,6 +278,28 @@ extern void put_disk(struct gendisk *disk); #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) +/** + * blk_alloc_disk - allocate a gendisk structure + * @node_id: numa node to allocate on + * + * Allocate and pre-initialize a gendisk structure for use with BIO based + * drivers. + * + * Context: can sleep + */ +#define blk_alloc_disk(node_id) \ +({ \ + struct gendisk *__disk = __blk_alloc_disk(node_id); \ + static struct lock_class_key __key; \ + \ + if (__disk) \ + lockdep_init_map(&__disk->lockdep_map, \ + "(bio completion)", &__key, 0); \ + __disk; \ +}) +struct gendisk *__blk_alloc_disk(int node); +void blk_cleanup_disk(struct gendisk *disk); + int __register_blkdev(unsigned int major, const char *name, void (*probe)(dev_t devt)); #define register_blkdev(major, name) \ -- cgit v1.2.3 From da7ba72960ca2a9b968e47fcf414d16f3d4c0c42 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 May 2021 07:51:16 +0200 Subject: block: unexport blk_alloc_queue blk_alloc_queue is just an internal helper now, unexport it and remove it from the public header. Signed-off-by: Christoph Hellwig Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20210521055116.1053587-27-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c28577b50f4..d66d0da72529 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1213,7 +1213,6 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, extern void blk_dump_rq_flags(struct request *, char *); bool __must_check blk_get_queue(struct request_queue *); -struct request_queue *blk_alloc_queue(int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); -- cgit v1.2.3 From a8698707a1835be3abd12a3b28079a80999f8dee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 May 2021 08:12:56 +0200 Subject: block: move bd_mutex to struct gendisk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the per-block device bd_mutex with a per-gendisk open_mutex, thus simplifying locking wherever we deal with partitions. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Acked-by: Roger Pau Monné Link: https://lore.kernel.org/r/20210525061301.2242282-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 - include/linux/genhd.h | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index db026b6ec15a..a09660671fa4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -29,7 +29,6 @@ struct block_device { int bd_openers; struct inode * bd_inode; /* will die */ struct super_block * bd_super; - struct mutex bd_mutex; /* open/close mutex */ void * bd_claiming; struct device bd_device; void * bd_holder; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 782f0171d104..1fabb1559110 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -154,6 +154,9 @@ struct gendisk { #define GD_NEED_PART_SCAN 0 #define GD_READ_ONLY 1 #define GD_QUEUE_REF 2 + + struct mutex open_mutex; /* open/close mutex */ + struct kobject *slave_dir; struct timer_rand_state *random; -- cgit v1.2.3 From ab4b57057d744861f670b47b163209727b26418b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 May 2021 08:12:59 +0200 Subject: block: move bd_part_count to struct gendisk The bd_part_count value only makes sense for whole devices, so move it to struct gendisk and give it a more descriptive name. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210525061301.2242282-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 --- include/linux/genhd.h | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a09660671fa4..fd3860d18d7e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -39,9 +39,6 @@ struct block_device { #endif struct kobject *bd_holder_dir; u8 bd_partno; - /* number of times partitions within this device have been opened. */ - unsigned bd_part_count; - spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 1fabb1559110..47d4605c0e7e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -156,6 +156,7 @@ struct gendisk { #define GD_QUEUE_REF 2 struct mutex open_mutex; /* open/close mutex */ + unsigned open_partitions; /* number of open partitions */ struct kobject *slave_dir; -- cgit v1.2.3 From c97d93c31e5734a16bfe663085ec91b8c9fb20f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 May 2021 08:13:00 +0200 Subject: block: factor out a part_devt helper Add a helper to find the dev_t for a disk + partno tuple. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210525061301.2242282-8-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 47d4605c0e7e..64a8431202b7 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -333,6 +333,7 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, } #endif /* CONFIG_SYSFS */ +dev_t part_devt(struct gendisk *disk, u8 partno); dev_t blk_lookup_devt(const char *name, int partno); void blk_request_module(dev_t devt); #ifdef CONFIG_BLOCK -- cgit v1.2.3 From 0e0ccdecb3cff95a350b4364e7ebbaa754d0e47d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 May 2021 08:13:01 +0200 Subject: block: remove bdget_disk Just opencode the xa_load in the callers, as none of them actually needs a reference to the bdev. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210525061301.2242282-9-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 64a8431202b7..03d684f0498f 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -223,7 +223,6 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk) } extern void del_gendisk(struct gendisk *gp); -extern struct block_device *bdget_disk(struct gendisk *disk, int partno); void set_disk_ro(struct gendisk *disk, bool read_only); -- cgit v1.2.3 From c9c9762d4d44dcb1b2ba90cfb4122dc11ceebf31 Mon Sep 17 00:00:00 2001 From: Long Li Date: Mon, 7 Jun 2021 12:34:05 -0700 Subject: block: return the correct bvec when checking for gaps After commit 07173c3ec276 ("block: enable multipage bvecs"), a bvec can have multiple pages. But bio_will_gap() still assumes one page bvec while checking for merging. If the pages in the bvec go across the seg_boundary_mask, this check for merging can potentially succeed if only the 1st page is tested, and can fail if all the pages are tested. Later, when SCSI builds the SG list the same check for merging is done in __blk_segment_map_sg_merge() with all the pages in the bvec tested. This time the check may fail if the pages in bvec go across the seg_boundary_mask (but tested okay in bio_will_gap() earlier, so those BIOs were merged). If this check fails, we end up with a broken SG list for drivers assuming the SG list not having offsets in intermediate pages. This results in incorrect pages written to the disk. Fix this by returning the multi-page bvec when testing gaps for merging. Cc: Jens Axboe Cc: Johannes Thumshirn Cc: Pavel Begunkov Cc: Ming Lei Cc: Tejun Heo Cc: "Matthew Wilcox (Oracle)" Cc: Jeffle Xu Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Fixes: 07173c3ec276 ("block: enable multipage bvecs") Signed-off-by: Long Li Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/1623094445-22332-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Jens Axboe --- include/linux/bio.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index a0b4cfdf62a4..d2b98efb5cc5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -44,9 +44,6 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs) #define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter) #define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter) -#define bio_multiple_segments(bio) \ - ((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len) - #define bvec_iter_sectors(iter) ((iter).bi_size >> 9) #define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter))) @@ -271,7 +268,7 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit) static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) { - *bv = bio_iovec(bio); + *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); } static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) @@ -279,10 +276,9 @@ static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) struct bvec_iter iter = bio->bi_iter; int idx; - if (unlikely(!bio_multiple_segments(bio))) { - *bv = bio_iovec(bio); - return; - } + bio_get_first_bvec(bio, bv); + if (bv->bv_len == bio->bi_iter.bi_size) + return; /* this bio only has a single bvec */ bio_advance_iter(bio, &iter, iter.bi_size); -- cgit v1.2.3 From 11c7aa0ddea8611007768d3e6b58d45dc60a19e1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 7 Jun 2021 13:26:13 +0200 Subject: rq-qos: fix missed wake-ups in rq_qos_throttle try two Commit 545fbd0775ba ("rq-qos: fix missed wake-ups in rq_qos_throttle") tried to fix a problem that a process could be sleeping in rq_qos_wait() without anyone to wake it up. However the fix is not complete and the following can still happen: CPU1 (waiter1) CPU2 (waiter2) CPU3 (waker) rq_qos_wait() rq_qos_wait() acquire_inflight_cb() -> fails acquire_inflight_cb() -> fails completes IOs, inflight decreased prepare_to_wait_exclusive() prepare_to_wait_exclusive() has_sleeper = !wq_has_single_sleeper() -> true as there are two sleepers has_sleeper = !wq_has_single_sleeper() -> true io_schedule() io_schedule() Deadlock as now there's nobody to wakeup the two waiters. The logic automatically blocking when there are already sleepers is really subtle and the only way to make it work reliably is that we check whether there are some waiters in the queue when adding ourselves there. That way, we are guaranteed that at least the first process to enter the wait queue will recheck the waiting condition before going to sleep and thus guarantee forward progress. Fixes: 545fbd0775ba ("rq-qos: fix missed wake-ups in rq_qos_throttle") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20210607112613.25344-1-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/wait.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index fe10e8570a52..6598ae35e1b5 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -1136,7 +1136,7 @@ do { \ * Waitqueues which are removed from the waitqueue_head at wakeup time */ void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); -void prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); -- cgit v1.2.3 From cdb14e0f7775e767484843e8ecd736bb21754c58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Jun 2021 09:53:16 +0300 Subject: blk-mq: factor out a blk_mq_alloc_sq_tag_set helper Factour out a helper to initialize a simple single hw queue tag_set from blk_mq_init_sq_queue. This will allow to phase out blk_mq_init_sq_queue in favor of a more symmetric and general API. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210602065345.355274-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 359486940fa0..bb950fc669ef 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -439,6 +439,9 @@ struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, void blk_mq_unregister_dev(struct device *, struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); +int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int queue_depth, + unsigned int set_flags); void blk_mq_free_tag_set(struct blk_mq_tag_set *set); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -- cgit v1.2.3 From 26a9750aa875126e4b7fc5ee6de652a529c5b7ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Jun 2021 09:53:17 +0300 Subject: blk-mq: improve the blk_mq_init_allocated_queue interface Don't return the passed in request_queue but a normal error code, and drop the elevator_init argument in favor of just calling elevator_init_mq directly from dm-rq. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210602065345.355274-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 5 ++--- include/linux/elevator.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index bb950fc669ef..73750b2838d2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -429,9 +429,8 @@ enum { struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, void *queuedata); -struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, - struct request_queue *q, - bool elevator_init); +int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, + struct request_queue *q); struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int queue_depth, diff --git a/include/linux/elevator.h b/include/linux/elevator.h index dcb2f9022c1d..783ecb3cb77a 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -120,6 +120,7 @@ extern void elv_merged_request(struct request_queue *, struct request *, extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); +void elevator_init_mq(struct request_queue *q); /* * io scheduler registration -- cgit v1.2.3 From b461dfc49eb6fbabc60b9dad476e787ada56b7b4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Jun 2021 09:53:18 +0300 Subject: blk-mq: add the blk_mq_alloc_disk APIs Add a new API to allocate a gendisk including the request_queue for use with blk-mq based drivers. This is to avoid boilerplate code in drivers. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210602065345.355274-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 73750b2838d2..f496c6c5b5d2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -426,6 +426,18 @@ enum { ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ << BLK_MQ_F_ALLOC_POLICY_START_BIT) +#define blk_mq_alloc_disk(set, queuedata) \ +({ \ + static struct lock_class_key __key; \ + struct gendisk *__disk = __blk_mq_alloc_disk(set, queuedata); \ + \ + if (__disk) \ + lockdep_init_map(&__disk->lockdep_map, \ + "(bio completion)", &__key, 0); \ + __disk; \ +}) +struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, + void *queuedata); struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, void *queuedata); -- cgit v1.2.3 From 08c1d480ed38995690a7d83f2c6a505f6cbbed9f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Jun 2021 09:53:30 +0300 Subject: blk-mq: remove blk_mq_init_sq_queue All users are gone now. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210602065345.355274-16-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f496c6c5b5d2..02a4aab0aeac 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -443,10 +443,6 @@ struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, void *queuedata); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); -struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, - const struct blk_mq_ops *ops, - unsigned int queue_depth, - unsigned int set_flags); void blk_mq_unregister_dev(struct device *, struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); -- cgit v1.2.3 From 52d7e288444906aa5c99888e80a9cc1a1423ed92 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 18 Jun 2021 16:45:22 +0300 Subject: blk-mq: fix an IS_ERR() vs NULL bug The __blk_mq_alloc_disk() function doesn't return NULLs it returns error pointers. Fixes: b461dfc49eb6 ("blk-mq: add the blk_mq_alloc_disk APIs") Signed-off-by: Dan Carpenter Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/YMyjci35WBqrtqG+@mwanda Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 02a4aab0aeac..fd2de2b422ed 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -431,7 +431,7 @@ enum { static struct lock_class_key __key; \ struct gendisk *__disk = __blk_mq_alloc_disk(set, queuedata); \ \ - if (__disk) \ + if (!IS_ERR(__disk)) \ lockdep_init_map(&__disk->lockdep_map, \ "(bio completion)", &__key, 0); \ __disk; \ -- cgit v1.2.3 From 630161cfdf5cdc696a82b59410d1ff00b23d946e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Jun 2021 14:32:39 +0200 Subject: block: move bdev_disk_changed Move bdev_disk_changed to block/partitions/core.c, together with the rest of the partition scanning code. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210624123240.441814-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 03d684f0498f..f5f0c9bdf1d2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -257,7 +257,6 @@ static inline sector_t get_capacity(struct gendisk *disk) } int bdev_disk_changed(struct block_device *bdev, bool invalidate); -int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); void blk_drop_partitions(struct gendisk *disk); extern struct gendisk *__alloc_disk_node(int minors, int node_id); -- cgit v1.2.3 From 0384264ea8a39bd98c9a3158060565f650c056a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Jun 2021 14:32:40 +0200 Subject: block: pass a gendisk to bdev_disk_changed bdev_disk_changed can only operate on whole devices. Make that clear by passing a gendisk instead of the struct block_device. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210624123240.441814-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index f5f0c9bdf1d2..13b34177cc85 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -256,7 +256,7 @@ static inline sector_t get_capacity(struct gendisk *disk) return bdev_nr_sectors(disk->part0); } -int bdev_disk_changed(struct block_device *bdev, bool invalidate); +int bdev_disk_changed(struct gendisk *disk, bool invalidate); void blk_drop_partitions(struct gendisk *disk); extern struct gendisk *__alloc_disk_node(int minors, int node_id); -- cgit v1.2.3 From fd2ef39cc9a6b9c4c41864ac506906c52f94b06a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Jun 2021 11:36:34 +0200 Subject: blk: Fix lock inversion between ioc lock and bfqd lock Lockdep complains about lock inversion between ioc->lock and bfqd->lock: bfqd -> ioc: put_io_context+0x33/0x90 -> ioc->lock grabbed blk_mq_free_request+0x51/0x140 blk_put_request+0xe/0x10 blk_attempt_req_merge+0x1d/0x30 elv_attempt_insert_merge+0x56/0xa0 blk_mq_sched_try_insert_merge+0x4b/0x60 bfq_insert_requests+0x9e/0x18c0 -> bfqd->lock grabbed blk_mq_sched_insert_requests+0xd6/0x2b0 blk_mq_flush_plug_list+0x154/0x280 blk_finish_plug+0x40/0x60 ext4_writepages+0x696/0x1320 do_writepages+0x1c/0x80 __filemap_fdatawrite_range+0xd7/0x120 sync_file_range+0xac/0xf0 ioc->bfqd: bfq_exit_icq+0xa3/0xe0 -> bfqd->lock grabbed put_io_context_active+0x78/0xb0 -> ioc->lock grabbed exit_io_context+0x48/0x50 do_exit+0x7e9/0xdd0 do_group_exit+0x54/0xc0 To avoid this inversion we change blk_mq_sched_try_insert_merge() to not free the merged request but rather leave that upto the caller similarly to blk_mq_sched_try_merge(). And in bfq_insert_requests() we make sure to free all the merged requests after dropping bfqd->lock. Fixes: aee69d78dec0 ("block, bfq: introduce the BFQ-v0 I/O scheduler as an extra scheduler") Reviewed-by: Ming Lei Acked-by: Paolo Valente Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20210623093634.27879-3-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/elevator.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 783ecb3cb77a..ef9ceead3db1 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -117,7 +117,8 @@ extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); extern void elv_merged_request(struct request_queue *, struct request *, enum elv_merge); -extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); +extern bool elv_attempt_insert_merge(struct request_queue *, struct request *, + struct list_head *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); void elevator_init_mq(struct request_queue *q); -- cgit v1.2.3