From c6ea70604249bc357ce09e9f8e16c29df0fb2fa2 Mon Sep 17 00:00:00 2001 From: "dougmill@linux.vnet.ibm.com" Date: Tue, 16 Aug 2022 15:07:13 +0100 Subject: block: sed-opal: Add ioctl to return device status Provide a mechanism to retrieve basic status information about the device, including the "supported" flag indicating whether SED-OPAL is supported. The information returned is from the various feature descriptors received during the discovery0 step, and so this ioctl does nothing more than perform the discovery0 step and then save the information received. See "struct opal_status" and OPAL_FL_* bits for the status information currently returned. This is necessary to be able to check whether a device is OPAL enabled, set up, locked or unlocked from userspace programs like systemd-cryptsetup and libcryptsetup. Right now we just have to assume the user 'knows' or blindly attempt setup/lock/unlock operations. Signed-off-by: Douglas Miller Tested-by: Luca Boccassi Reviewed-by: Scott Bauer Acked-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20220816140713.84893-1-luca.boccassi@gmail.com Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 1ac0d712a9c3..6f837bb6c715 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -43,6 +43,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_MBR_DONE: case IOC_OPAL_WRITE_SHADOW_MBR: case IOC_OPAL_GENERIC_TABLE_RW: + case IOC_OPAL_GET_STATUS: return true; } return false; -- cgit v1.2.3 From a4e1d0b76e7b32c0839e72679c530445172a2564 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 15 Aug 2022 10:00:43 -0700 Subject: block: Change the return type of blk_mq_map_queues() into void Since blk_mq_map_queues() and the .map_queues() callbacks always return 0, change their return type into void. Most callers ignore the returned value anyway. Cc: Christoph Hellwig Cc: Jason Wang Cc: Keith Busch Cc: Martin K. Petersen Cc: Doug Gilbert Cc: Michael S. Tsirkin Signed-off-by: Bart Van Assche Reviewed-by: John Garry Acked-by: Md Haris Iqbal Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20220815170043.19489-3-bvanassche@acm.org [axboe: fold in fix from Bart] Signed-off-by: Jens Axboe --- include/linux/blk-mq-pci.h | 4 ++-- include/linux/blk-mq-rdma.h | 2 +- include/linux/blk-mq-virtio.h | 2 +- include/linux/blk-mq.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h index 0b1f45c62623..ca544e1d3508 100644 --- a/include/linux/blk-mq-pci.h +++ b/include/linux/blk-mq-pci.h @@ -5,7 +5,7 @@ struct blk_mq_queue_map; struct pci_dev; -int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, - int offset); +void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, + int offset); #endif /* _LINUX_BLK_MQ_PCI_H */ diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h index 5cc5f0f36218..53b58c610e76 100644 --- a/include/linux/blk-mq-rdma.h +++ b/include/linux/blk-mq-rdma.h @@ -5,7 +5,7 @@ struct blk_mq_tag_set; struct ib_device; -int blk_mq_rdma_map_queues(struct blk_mq_queue_map *map, +void blk_mq_rdma_map_queues(struct blk_mq_queue_map *map, struct ib_device *dev, int first_vec); #endif /* _LINUX_BLK_MQ_RDMA_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h index 687ae287e1dc..13226e9b22dd 100644 --- a/include/linux/blk-mq-virtio.h +++ b/include/linux/blk-mq-virtio.h @@ -5,7 +5,7 @@ struct blk_mq_queue_map; struct virtio_device; -int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, +void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, struct virtio_device *vdev, int first_vec); #endif /* _LINUX_BLK_MQ_VIRTIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 92294a5fb083..c38575209d51 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -630,7 +630,7 @@ struct blk_mq_ops { * @map_queues: This allows drivers specify their own queue mapping by * overriding the setup-time function that builds the mq_map. */ - int (*map_queues)(struct blk_mq_tag_set *set); + void (*map_queues)(struct blk_mq_tag_set *set); #ifdef CONFIG_BLK_DEBUG_FS /** @@ -880,7 +880,7 @@ void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); -int blk_mq_map_queues(struct blk_mq_queue_map *qmap); +void blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); -- cgit v1.2.3 From f5d632d15e9e0a037339601680d82bb840f85d10 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 5 Aug 2022 16:39:04 -0600 Subject: block: shrink rq_map_data a bit We don't need full ints for several of these members. Change the page_order and nr_entries to unsigned shorts, and the true/false from_user and null_mapped to booleans. This shrinks the struct from 32 to 24 bytes on 64-bit archs. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c38575209d51..74b99d716b0b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -963,11 +963,11 @@ blk_status_t blk_insert_cloned_request(struct request *rq); struct rq_map_data { struct page **pages; - int page_order; - int nr_entries; unsigned long offset; - int null_mapped; - int from_user; + unsigned short page_order; + unsigned short nr_entries; + bool null_mapped; + bool from_user; }; int blk_rq_map_user(struct request_queue *, struct request *, -- cgit v1.2.3 From 16ede66973c84f890c03584f79158dd5b2d725f5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 25 Aug 2022 07:53:12 -0700 Subject: sbitmap: fix batched wait_cnt accounting Batched completions can clear multiple bits, but we're only decrementing the wait_cnt by one each time. This can cause waiters to never be woken, stalling IO. Use the batched count instead. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215679 Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20220825145312.1217900-1-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 8f5a86e210b9..4d2d5205ab58 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -575,8 +575,9 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue * on a &struct sbitmap_queue. * @sbq: Bitmap queue to wake up. + * @nr: Number of bits cleared. */ -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr); /** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct -- cgit v1.2.3 From bce1b56c73826fec8caf6187f0c922ede397a5a8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 4 Sep 2022 06:39:25 -0600 Subject: Revert "sbitmap: fix batched wait_cnt accounting" This reverts commit 16ede66973c84f890c03584f79158dd5b2d725f5. This is causing issues with CPU stalls on my test box, revert it for now until we understand what is going on. It looks like infinite looping off sbitmap_queue_wake_up(), but hard to tell with a lot of CPUs hitting this issue and the console scrolling infinitely. Link: https://lore.kernel.org/linux-block/e742813b-ce5c-0d58-205b-1626f639b1bd@kernel.dk/ Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 4d2d5205ab58..8f5a86e210b9 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -575,9 +575,8 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue * on a &struct sbitmap_queue. * @sbq: Bitmap queue to wake up. - * @nr: Number of bits cleared. */ -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr); +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); /** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct -- cgit v1.2.3 From 4acb83417cadfdcbe64215f9d0ddcf3132af808e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 9 Sep 2022 11:40:22 -0700 Subject: sbitmap: fix batched wait_cnt accounting Batched completions can clear multiple bits, but we're only decrementing the wait_cnt by one each time. This can cause waiters to never be woken, stalling IO. Use the batched count instead. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215679 Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20220909184022.1709476-1-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 8f5a86e210b9..4d2d5205ab58 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -575,8 +575,9 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue * on a &struct sbitmap_queue. * @sbq: Bitmap queue to wake up. + * @nr: Number of bits cleared. */ -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr); /** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct -- cgit v1.2.3 From 320fb0f91e55ba248d4bad106b408e59099cfa89 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 29 Aug 2022 10:22:37 +0800 Subject: blk-throttle: fix that io throttle can only work for single bio Test scripts: cd /sys/fs/cgroup/blkio/ echo "8:0 1024" > blkio.throttle.write_bps_device echo $$ > cgroup.procs dd if=/dev/zero of=/dev/sda bs=10k count=1 oflag=direct & dd if=/dev/zero of=/dev/sda bs=10k count=1 oflag=direct & Test result: 10240 bytes (10 kB, 10 KiB) copied, 10.0134 s, 1.0 kB/s 10240 bytes (10 kB, 10 KiB) copied, 10.0135 s, 1.0 kB/s The problem is that the second bio is finished after 10s instead of 20s. Root cause: 1) second bio will be flagged: __blk_throtl_bio while (true) { ... if (sq->nr_queued[rw]) -> some bio is throttled already break }; bio_set_flag(bio, BIO_THROTTLED); -> flag the bio 2) flagged bio will be dispatched without waiting: throtl_dispatch_tg tg_may_dispatch tg_with_in_bps_limit if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) *wait = 0; -> wait time is zero return true; commit 9f5ede3c01f9 ("block: throttle split bio in case of iops limit") support to count split bios for iops limit, thus it adds flagged bio checking in tg_with_in_bps_limit() so that split bios will only count once for bps limit, however, it introduce a new problem that io throttle won't work if multiple bios are throttled. In order to fix the problem, handle iops/bps limit in different ways: 1) for iops limit, there is no flag to record if the bio is throttled, and iops is always applied. 2) for bps limit, original bio will be flagged with BIO_BPS_THROTTLED, and io throttle will ignore bio with the flag. Noted this patch also remove the code to set flag in __bio_clone(), it's introduced in commit 111be8839817 ("block-throttle: avoid double charge"), and author thinks split bio can be resubmited and throttled again, which is wrong because split bio will continue to dispatch from caller. Fixes: 9f5ede3c01f9 ("block: throttle split bio in case of iops limit") Cc: Signed-off-by: Yu Kuai Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20220829022240.3348319-2-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- include/linux/blk_types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index ca22b06700a9..2c5806997bbf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -509,7 +509,7 @@ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) { bio_clear_flag(bio, BIO_REMAPPED); if (bio->bi_bdev != bdev) - bio_clear_flag(bio, BIO_THROTTLED); + bio_clear_flag(bio, BIO_BPS_THROTTLED); bio->bi_bdev = bdev; bio_associate_blkg(bio); } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1ef99790f6ed..41afb4cfb9b0 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -325,7 +325,7 @@ enum { BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ BIO_REFFED, /* bio has elevated ->bi_cnt */ - BIO_THROTTLED, /* This bio has already been subjected to + BIO_BPS_THROTTLED, /* This bio has already been subjected to * throttling rules. Don't do it again. */ BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ -- cgit v1.2.3 From 176042404ee6a96ba7e9054e1bda6220360a26ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Sep 2022 10:41:56 +0100 Subject: mm: add PSI accounting around ->read_folio and ->readahead calls PSI tries to account for the cost of bringing back in pages discarded by the MM LRU management. Currently the prime place for that is hooked into the bio submission path, which is a rather bad place: - it does not actually account I/O for non-block file systems, of which we have many - it adds overhead and a layering violation to the block layer Add the accounting into the two places in the core MM code that read pages into an address space by calling into ->read_folio and ->readahead so that the entire file system operations are covered, to broaden the coverage and allow removing the accounting in the block layer going forward. As psi_memstall_enter can deal with nested calls this will not lead to double accounting even while the bio annotations are still present. Signed-off-by: Christoph Hellwig Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/20220915094200.139713-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0178b2040ea3..201dc7281640 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1173,6 +1173,8 @@ struct readahead_control { pgoff_t _index; unsigned int _nr_pages; unsigned int _batch_count; + bool _workingset; + unsigned long _pflags; }; #define DEFINE_READAHEAD(ractl, f, r, m, i) \ -- cgit v1.2.3 From 118f3663fbc658e9ad6165e129076981c7b685c5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Sep 2022 10:42:00 +0100 Subject: block: remove PSI accounting from the bio layer PSI accounting is now done by the VM code, where it should have been since the beginning. Signed-off-by: Christoph Hellwig Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/20220915094200.139713-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 41afb4cfb9b0..e0b098089ef2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -321,7 +321,6 @@ enum { BIO_NO_PAGE_REF, /* don't put release vec pages */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ - BIO_WORKINGSET, /* contains userspace workingset pages */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ BIO_REFFED, /* bio has elevated ->bi_cnt */ -- cgit v1.2.3 From b2bed51a5261f4266ecb857bba680a7f668d3ddf Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 20 Sep 2022 13:06:26 -0700 Subject: block: Fix the enum blk_eh_timer_return documentation The documentation of the blk_eh_timer_return enumeration values does not reflect correctly how e.g. the SCSI core uses these values. Fix the documentation. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Fixes: 88b0cfad2888 ("block: document the blk_eh_timer_return values") Signed-off-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20220920200626.3422296-1-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 74b99d716b0b..e21ff85751cf 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -268,9 +268,16 @@ static inline void rq_list_move(struct request **src, struct request **dst, rq_list_add(dst, rq); } +/** + * enum blk_eh_timer_return - How the timeout handler should proceed + * @BLK_EH_DONE: The block driver completed the command or will complete it at + * a later time. + * @BLK_EH_RESET_TIMER: Reset the request timer and continue waiting for the + * request to complete. + */ enum blk_eh_timer_return { - BLK_EH_DONE, /* drivers has completed the command */ - BLK_EH_RESET_TIMER, /* reset timer and try again */ + BLK_EH_DONE, + BLK_EH_RESET_TIMER, }; #define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ -- cgit v1.2.3 From de185b56e8a62822d4e1cdb3e068b38ca709aa47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Sep 2022 20:05:00 +0200 Subject: blk-cgroup: pass a gendisk to blkcg_schedule_throttle Pass the gendisk to blkcg_schedule_throttle as part of moving the blk-cgroup infrastructure to be gendisk based. Remove the unused !BLK_CGROUP stub while we're at it. Signed-off-by: Christoph Hellwig Reviewed-by: Andreas Herrmann Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20220921180501.1539876-17-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 9f40dbc65f82..dd5841a42c33 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -18,14 +18,14 @@ struct bio; struct cgroup_subsys_state; -struct request_queue; +struct gendisk; #define FC_APPID_LEN 129 #ifdef CONFIG_BLK_CGROUP extern struct cgroup_subsys_state * const blkcg_root_css; -void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); +void blkcg_schedule_throttle(struct gendisk *disk, bool use_memdelay); void blkcg_maybe_throttle_current(void); bool blk_cgroup_congested(void); void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css); @@ -39,7 +39,6 @@ struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio); static inline void blkcg_maybe_throttle_current(void) { } static inline bool blk_cgroup_congested(void) { return false; } -static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } static inline struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio) { return NULL; -- cgit v1.2.3 From 1c32a8012b7fabe469b6af826edfd4ae2a6201d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 15:38:58 +0200 Subject: nvme: improve the NVME_CONNECT_AUTHREQ* definitions Mark them as unsigned so that we don't need extra casts, and define them relative to cdword0 instead of requiring extra shifts. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index ae53d74f3696..050d7d0cd81b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1482,8 +1482,8 @@ struct nvmf_connect_command { }; enum { - NVME_CONNECT_AUTHREQ_ASCR = (1 << 2), - NVME_CONNECT_AUTHREQ_ATR = (1 << 1), + NVME_CONNECT_AUTHREQ_ASCR = (1U << 18), + NVME_CONNECT_AUTHREQ_ATR = (1U << 17), }; struct nvmf_connect_data { -- cgit v1.2.3 From 568ec936bf1384fc15873908c96a9aeb62536edb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Sep 2022 09:58:15 +0200 Subject: block: replace blk_queue_nowait with bdev_nowait Replace blk_queue_nowait with a bdev_nowait helpers that takes the block_device given that the I/O submission path should not have to look into the request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Pankaj Raghav Link: https://lore.kernel.org/r/20220927075815.269694-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 84b13fdd34a7..4750772ef228 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -618,7 +618,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) #define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) -#define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags) #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); @@ -1280,6 +1279,11 @@ static inline bool bdev_fua(struct block_device *bdev) return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags); } +static inline bool bdev_nowait(struct block_device *bdev) +{ + return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags); +} + static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); -- cgit v1.2.3 From 8cafdb5ab94cda3ebb0975be16e2d564a05132ea Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Thu, 29 Sep 2022 09:47:44 +0200 Subject: block: adapt blk_mq_plug() to not plug for writes that require a zone lock The current implementation of blk_mq_plug() disables plugging for all operations that involves a transfer to the device as we just check if the last bit in op_is_write() function. Modify blk_mq_plug() to disable plugging only for REQ_OP_WRITE and REQ_OP_WRITE_ZEROS as they might require a zone lock. Suggested-by: Christoph Hellwig Suggested-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Pankaj Raghav Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220929074745.103073-2-p.raghav@samsung.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4750772ef228..49373d002631 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1304,6 +1304,15 @@ static inline bool bdev_is_zoned(struct block_device *bdev) return false; } +static inline bool bdev_op_is_zoned_write(struct block_device *bdev, + blk_opf_t op) +{ + if (!bdev_is_zoned(bdev)) + return false; + + return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES; +} + static inline sector_t bdev_zone_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); -- cgit v1.2.3