From e64a0e16928415648d53d721b3d6fc3635eddf92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Apr 2020 09:42:21 +0200 Subject: block: remove RQF_COPY_USER The RQF_COPY_USER is set for bio where the passthrough request mapping helpers decided that bounce buffering is required. It is then used to pad scatterlist for drivers that required it. But given that non-passthrough requests are per definition aligned, and directly mapped pass-through request must be aligned it is not actually required at all. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 32868fbedc9e..76da162b6ae9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -82,8 +82,6 @@ typedef __u32 __bitwise req_flags_t; /* set for "ide_preempt" requests and also for requests for which the SCSI "quiesce" state must be ignored. */ #define RQF_PREEMPT ((__force req_flags_t)(1 << 8)) -/* contains copies of user pages */ -#define RQF_COPY_USER ((__force req_flags_t)(1 << 9)) /* vaguely specified driver internal error. Ignored by the block layer */ #define RQF_FAILED ((__force req_flags_t)(1 << 10)) /* don't warn about errors */ -- cgit v1.2.3 From 89de1504d53b59b12bfff227328ee3e63dd3a112 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Apr 2020 09:42:22 +0200 Subject: block: provide a blk_rq_map_sg variant that returns the last element To be able to move some of the special purpose hacks in blk_rq_map_sg into the callers we need a variant that returns the last mapped S/G list element to the caller. Add that variant as __blk_rq_map_sg and make blk_rq_map_sg a trivial inline wrapper around it. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 76da162b6ae9..496dc9491026 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1136,7 +1136,15 @@ static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) return max_t(unsigned short, rq->nr_phys_segments, 1); } -extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); +int __blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist, struct scatterlist **last_sg); +static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist) +{ + struct scatterlist *last_sg = NULL; + + return __blk_rq_map_sg(q, rq, sglist, &last_sg); +} extern void blk_dump_rq_flags(struct request *, char *); extern long nr_blockdev_pages(void); -- cgit v1.2.3 From cc97923a5bccc776851c242b61015faf288d5c22 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Apr 2020 09:42:24 +0200 Subject: block: move dma drain handling to scsi Don't burden the common block code with with specifics of the libata DMA draining mechanism. Instead move most of the code to the scsi midlayer. That also means the nr_phys_segments adjustments in the blk-mq fast path can go away entirely, given that SCSI never looks at nr_phys_segments after mapping the request to a scatterlist. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 496dc9491026..8e4726bce498 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -288,7 +288,6 @@ struct blk_queue_ctx; typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); struct bio_vec; -typedef int (dma_drain_needed_fn)(struct request *); enum blk_eh_timer_return { BLK_EH_DONE, /* drivers has completed the command */ @@ -397,7 +396,6 @@ struct request_queue { struct rq_qos *rq_qos; make_request_fn *make_request_fn; - dma_drain_needed_fn *dma_drain_needed; const struct blk_mq_ops *mq_ops; @@ -467,8 +465,6 @@ struct request_queue { */ unsigned long nr_requests; /* Max # of requests */ - unsigned int dma_drain_size; - void *dma_drain_buffer; unsigned int dma_pad_mask; unsigned int dma_alignment; @@ -1097,9 +1093,6 @@ extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); -extern int blk_queue_dma_drain(struct request_queue *q, - dma_drain_needed_fn *dma_drain_needed, - void *buf, unsigned int size); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_dma_alignment(struct request_queue *, int); -- cgit v1.2.3 From bdf8710d69f82ee6fd41b0166300c3306898b3c1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Apr 2020 09:42:25 +0200 Subject: block: move dma_pad handling from blk_rq_map_sg into the callers There are only two callers of blk_rq_map_sg/__blk_rq_map_sg that set the dma_pad value in the queue. Move the handling into those callers instead of burdening the common code, and move the ->extra_len field from struct request to struct scsi_cmnd. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8e4726bce498..f00bd4042295 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -224,8 +224,6 @@ struct request { unsigned short write_hint; unsigned short ioprio; - unsigned int extra_len; /* length of alignment and padding */ - enum mq_rq_state state; refcount_t ref; -- cgit v1.2.3 From e6249cdd46e43a7d3bdb8cce5fe24565d6c11e94 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 3 May 2020 09:54:22 +0800 Subject: block: add blk_io_schedule() for avoiding task hung in sync dio Sync dio could be big, or may take long time in discard or in case of IO failure. We have prevented task hung in submit_bio_wait() and blk_execute_rq(), so apply the same trick for prevent task hung from happening in sync dio. Add helper of blk_io_schedule() and use io_schedule_timeout() to prevent task hung warning. Signed-off-by: Ming Lei Reviewed-by: Bart Van Assche Cc: Salman Qazi Cc: Jesse Barnes Cc: Christoph Hellwig Cc: Bart Van Assche Cc: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f00bd4042295..222eb5f32279 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -27,6 +27,7 @@ #include #include #include +#include struct module; struct scsi_ioctl_command; @@ -1827,4 +1828,15 @@ static inline void blk_wake_io_task(struct task_struct *waiter) wake_up_process(waiter); } +static inline void blk_io_schedule(void) +{ + /* Prevent hang_check timer from firing at us during very long I/O */ + unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; + + if (timeout) + io_schedule_timeout(timeout); + else + io_schedule(); +} + #endif -- cgit v1.2.3 From 02992df822e7e36685593aad10721a5a9f8d3402 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 12 May 2020 17:55:45 +0900 Subject: block: provide fallbacks for blk_queue_zone_is_seq and blk_queue_zone_no blk_queue_zone_is_seq() and blk_queue_zone_no() have not been called with CONFIG_BLK_DEV_ZONED disabled until now. The introduction of REQ_OP_ZONE_APPEND will change this, so we need to provide noop fallbacks for the !CONFIG_BLK_DEV_ZONED case. Signed-off-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 222eb5f32279..d736acf7f564 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -722,6 +722,16 @@ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { return 0; } +static inline bool blk_queue_zone_is_seq(struct request_queue *q, + sector_t sector) +{ + return false; +} +static inline unsigned int blk_queue_zone_no(struct request_queue *q, + sector_t sector) +{ + return 0; +} #endif /* CONFIG_BLK_DEV_ZONED */ static inline bool rq_is_sync(struct request *rq) -- cgit v1.2.3 From 0512a75b98f847c2e9a4b664013424e603e202f7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 12 May 2020 17:55:47 +0900 Subject: block: Introduce REQ_OP_ZONE_APPEND Define REQ_OP_ZONE_APPEND to append-write sectors to a zone of a zoned block device. This is a no-merge write operation. A zone append write BIO must: * Target a zoned block device * Have a sector position indicating the start sector of the target zone * The target zone must be a sequential write zone * The BIO must not cross a zone boundary * The BIO size must not be split to ensure that a single range of LBAs is written with a single command. Implement these checks in generic_make_request_checks() using the helper function blk_check_zone_append(). To avoid write append BIO splitting, introduce the new max_zone_append_sectors queue limit attribute and ensure that a BIO size is always lower than this limit. Export this new limit through sysfs and check these limits in bio_full(). Also when a LLDD can't dispatch a request to a specific zone, it will return BLK_STS_ZONE_RESOURCE indicating this request needs to be delayed, e.g. because the zone it will be dispatched to is still write-locked. If this happens set the request aside in a local list to continue trying dispatching requests such as READ requests or a WRITE/ZONE_APPEND requests targetting other zones. This way we can still keep a high queue depth without starving other requests even if one request can't be served due to zone write-locking. Finally, make sure that the bio sector position indicates the actual write position as indicated by the device on completion. Signed-off-by: Keith Busch [ jth: added zone-append specific add_page and merge_page helpers ] Signed-off-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d736acf7f564..5647c78bb876 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -332,6 +332,7 @@ struct queue_limits { unsigned int max_hw_discard_sectors; unsigned int max_write_same_sectors; unsigned int max_write_zeroes_sectors; + unsigned int max_zone_append_sectors; unsigned int discard_granularity; unsigned int discard_alignment; @@ -750,6 +751,9 @@ static inline bool rq_mergeable(struct request *rq) if (req_op(rq) == REQ_OP_WRITE_ZEROES) return false; + if (req_op(rq) == REQ_OP_ZONE_APPEND) + return false; + if (rq->cmd_flags & REQ_NOMERGE_FLAGS) return false; if (rq->rq_flags & RQF_NOMERGE_FLAGS) @@ -1084,6 +1088,8 @@ extern void blk_queue_max_write_same_sectors(struct request_queue *q, extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); +extern void blk_queue_max_zone_append_sectors(struct request_queue *q, + unsigned int max_zone_append_sectors); extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); @@ -1301,6 +1307,11 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q) return q->limits.max_segment_size; } +static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q) +{ + return q->limits.max_zone_append_sectors; +} + static inline unsigned queue_logical_block_size(const struct request_queue *q) { int retval = 512; -- cgit v1.2.3 From 1392d37018d4f68c5bb2c98dae9a018b73926865 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 12 May 2020 17:55:48 +0900 Subject: block: introduce blk_req_zone_write_trylock Introduce blk_req_zone_write_trylock(), which either grabs the write-lock for a sequential zone or returns false, if the zone is already locked. Signed-off-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5647c78bb876..73f4f4f1df92 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1738,6 +1738,7 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *, #ifdef CONFIG_BLK_DEV_ZONED bool blk_req_needs_zone_write_lock(struct request *rq); +bool blk_req_zone_write_trylock(struct request *rq); void __blk_req_zone_write_lock(struct request *rq); void __blk_req_zone_write_unlock(struct request *rq); -- cgit v1.2.3 From e732671aa5f67232cf760666a15242dead003362 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 May 2020 17:55:49 +0900 Subject: block: Modify revalidate zones Modify the interface of blk_revalidate_disk_zones() to add an optional driver callback function that a driver can use to extend processing done during zone revalidation. The callback, if defined, is executed with the device request queue frozen, after all zones have been inspected. Signed-off-by: Damien Le Moal Signed-off-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 73f4f4f1df92..5360696d85ff 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -358,7 +358,8 @@ unsigned int blkdev_nr_zones(struct gendisk *disk); extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); -extern int blk_revalidate_disk_zones(struct gendisk *disk); +int blk_revalidate_disk_zones(struct gendisk *disk, + void (*update_driver_data)(struct gendisk *disk)); extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); -- cgit v1.2.3 From 71ac860af824ce9ebbbe8de20044e777c0fc33b9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 14 May 2020 16:45:09 +0800 Subject: block: move blk_io_schedule() out of header file blk_io_schedule() isn't called from performance sensitive code path, and it is easier to maintain by exporting it as symbol. Also blk_io_schedule() is only called by CONFIG_BLOCK code, so it is safe to do this way. Meantime fixes build failure when CONFIG_BLOCK is off. Cc: Christoph Hellwig Fixes: e6249cdd46e4 ("block: add blk_io_schedule() for avoiding task hung in sync dio") Reported-by: Satya Tangirala Tested-by: Satya Tangirala Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5360696d85ff..f9e4b21b051b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -27,7 +27,6 @@ #include #include #include -#include struct module; struct scsi_ioctl_command; @@ -1221,6 +1220,8 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) !list_empty(&plug->cb_list)); } +extern void blk_io_schedule(void); + extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); @@ -1851,15 +1852,4 @@ static inline void blk_wake_io_task(struct task_struct *waiter) wake_up_process(waiter); } -static inline void blk_io_schedule(void) -{ - /* Prevent hang_check timer from firing at us during very long I/O */ - unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; - - if (timeout) - io_schedule_timeout(timeout); - else - io_schedule(); -} - #endif -- cgit v1.2.3 From 1b2628397058ebce7277480960b29c788138de90 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 14 May 2020 00:37:17 +0000 Subject: block: Keyslot Manager for Inline Encryption Inline Encryption hardware allows software to specify an encryption context (an encryption key, crypto algorithm, data unit num, data unit size) along with a data transfer request to a storage device, and the inline encryption hardware will use that context to en/decrypt the data. The inline encryption hardware is part of the storage device, and it conceptually sits on the data path between system memory and the storage device. Inline Encryption hardware implementations often function around the concept of "keyslots". These implementations often have a limited number of "keyslots", each of which can hold a key (we say that a key can be "programmed" into a keyslot). Requests made to the storage device may have a keyslot and a data unit number associated with them, and the inline encryption hardware will en/decrypt the data in the requests using the key programmed into that associated keyslot and the data unit number specified with the request. As keyslots are limited, and programming keys may be expensive in many implementations, and multiple requests may use exactly the same encryption contexts, we introduce a Keyslot Manager to efficiently manage keyslots. We also introduce a blk_crypto_key, which will represent the key that's programmed into keyslots managed by keyslot managers. The keyslot manager also functions as the interface that upper layers will use to program keys into inline encryption hardware. For more information on the Keyslot Manager, refer to documentation found in block/keyslot-manager.c and linux/keyslot-manager.h. Co-developed-by: Eric Biggers Signed-off-by: Eric Biggers Signed-off-by: Satya Tangirala Reviewed-by: Eric Biggers Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f9e4b21b051b..354e44eebef9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -43,6 +43,7 @@ struct pr_ops; struct rq_qos; struct blk_queue_stats; struct blk_stat_callback; +struct blk_keyslot_manager; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -468,6 +469,11 @@ struct request_queue { unsigned int dma_pad_mask; unsigned int dma_alignment; +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + /* Inline crypto capabilities */ + struct blk_keyslot_manager *ksm; +#endif + unsigned int rq_timeout; int poll_nsec; -- cgit v1.2.3 From a892c8d52c02284076fbbacae6692aa5c5807d11 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 14 May 2020 00:37:18 +0000 Subject: block: Inline encryption support for blk-mq We must have some way of letting a storage device driver know what encryption context it should use for en/decrypting a request. However, it's the upper layers (like the filesystem/fscrypt) that know about and manages encryption contexts. As such, when the upper layer submits a bio to the block layer, and this bio eventually reaches a device driver with support for inline encryption, the device driver will need to have been told the encryption context for that bio. We want to communicate the encryption context from the upper layer to the storage device along with the bio, when the bio is submitted to the block layer. To do this, we add a struct bio_crypt_ctx to struct bio, which can represent an encryption context (note that we can't use the bi_private field in struct bio to do this because that field does not function to pass information across layers in the storage stack). We also introduce various functions to manipulate the bio_crypt_ctx and make the bio/request merging logic aware of the bio_crypt_ctx. We also make changes to blk-mq to make it handle bios with encryption contexts. blk-mq can merge many bios into the same request. These bios need to have contiguous data unit numbers (the necessary changes to blk-merge are also made to ensure this) - as such, it suffices to keep the data unit number of just the first bio, since that's all a storage driver needs to infer the data unit number to use for each data block in each bio in a request. blk-mq keeps track of the encryption context to be used for all the bios in a request with the request's rq_crypt_ctx. When the first bio is added to an empty request, blk-mq will program the encryption context of that bio into the request_queue's keyslot manager, and store the returned keyslot in the request's rq_crypt_ctx. All the functions to operate on encryption contexts are in blk-crypto.c. Upper layers only need to call bio_crypt_set_ctx with the encryption key, algorithm and data_unit_num; they don't have to worry about getting a keyslot for each encryption context, as blk-mq/blk-crypto handles that. Blk-crypto also makes it possible for request-based layered devices like dm-rq to make use of inline encryption hardware by cloning the rq_crypt_ctx and programming a keyslot in the new request_queue when necessary. Note that any user of the block layer can submit bios with an encryption context, such as filesystems, device-mapper targets, etc. Signed-off-by: Satya Tangirala Reviewed-by: Eric Biggers Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 354e44eebef9..52a9f456cadf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -222,6 +222,11 @@ struct request { unsigned short nr_integrity_segments; #endif +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + struct bio_crypt_ctx *crypt_ctx; + struct blk_ksm_keyslot *crypt_keyslot; +#endif + unsigned short write_hint; unsigned short ioprio; -- cgit v1.2.3 From d145dc23030bbf2de3a8ca5e0c29c2e568f69737 Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 14 May 2020 00:37:19 +0000 Subject: block: Make blk-integrity preclude hardware inline encryption Whenever a device supports blk-integrity, make the kernel pretend that the device doesn't support inline encryption (essentially by setting the keyslot manager in the request queue to NULL). There's no hardware currently that supports both integrity and inline encryption. However, it seems possible that there will be such hardware in the near future (like the NVMe key per I/O support that might support both inline encryption and PI). But properly integrating both features is not trivial, and without real hardware that implements both, it is difficult to tell if it will be done correctly by the majority of hardware that support both. So it seems best not to support both features together right now, and to decide what to do at probe time. Signed-off-by: Satya Tangirala Reviewed-by: Eric Biggers Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 52a9f456cadf..2b33166b9daf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1583,6 +1583,12 @@ struct blk_integrity *bdev_get_integrity(struct block_device *bdev) return blk_get_integrity(bdev->bd_disk); } +static inline bool +blk_integrity_queue_supports_integrity(struct request_queue *q) +{ + return q->integrity.profile; +} + static inline bool blk_integrity_rq(struct request *rq) { return rq->cmd_flags & REQ_INTEGRITY; @@ -1663,6 +1669,11 @@ static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) { return NULL; } +static inline bool +blk_integrity_queue_supports_integrity(struct request_queue *q) +{ + return false; +} static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) { return 0; @@ -1714,6 +1725,25 @@ static inline struct bio_vec *rq_integrity_vec(struct request *rq) #endif /* CONFIG_BLK_DEV_INTEGRITY */ +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + +bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q); + +void blk_ksm_unregister(struct request_queue *q); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ + +static inline bool blk_ksm_register(struct blk_keyslot_manager *ksm, + struct request_queue *q) +{ + return true; +} + +static inline void blk_ksm_unregister(struct request_queue *q) { } + +#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + + struct block_device_operations { int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); -- cgit v1.2.3 From 9398554fb3979852512ff4f1405e759889b45c16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 May 2020 14:36:00 +0200 Subject: block: remove the error_sector argument to blkdev_issue_flush The argument isn't used by any caller, and drivers don't fill out bi_sector for flush requests either. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2b33166b9daf..7d10f4e63232 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1233,7 +1233,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) extern void blk_io_schedule(void); -extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); +int blkdev_issue_flush(struct block_device *, gfp_t); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); @@ -1872,8 +1872,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) return false; } -static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, - sector_t *error_sector) +static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask) { return 0; } -- cgit v1.2.3 From 956d510ee78caebc83c0eaeb892db5b239a36a06 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 27 May 2020 07:24:04 +0200 Subject: block: add disk/bio-based accounting helpers Add two new helpers to simplify I/O accounting for bio based drivers. Currently these drivers use the generic_start_io_acct and generic_end_io_acct helpers which have very cumbersome calling conventions, don't actually return the time they started accounting, and try to deal with accounting for partitions, which can't happen for bio based drivers. The new helpers will be used to subsequently replace uses of the old helpers. The main API is the bio based wrappes in blkdev.h, but for zram which wants to account rw_page based I/O lower level routines are provided as well. Signed-off-by: Christoph Hellwig Reviewed-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7d10f4e63232..6f7ff0fa8fcf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1892,4 +1892,32 @@ static inline void blk_wake_io_task(struct task_struct *waiter) wake_up_process(waiter); } +unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, + unsigned int op); +void disk_end_io_acct(struct gendisk *disk, unsigned int op, + unsigned long start_time); + +#ifdef CONFIG_BLOCK +/** + * bio_start_io_acct - start I/O accounting for bio based drivers + * @bio: bio to start account for + * + * Returns the start time that should be passed back to bio_end_io_acct(). + */ +static inline unsigned long bio_start_io_acct(struct bio *bio) +{ + return disk_start_io_acct(bio->bi_disk, bio_sectors(bio), bio_op(bio)); +} + +/** + * bio_end_io_acct - end I/O accounting for bio based drivers + * @bio: bio to end account for + * @start: start time returned by bio_start_io_acct() + */ +static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) +{ + return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time); +} +#endif /* CONFIG_BLOCK */ + #endif -- cgit v1.2.3 From dc35ada4251f183137ee3a524543c9329d7a4fa2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 15:41:23 +0200 Subject: block: fix a warning when blkdev.h is included for !CONFIG_BLOCK builds disk_start_io_acct and disk_end_io_acct need at least a struct gendisk forward declaration, but for weird historic reasons much of blkdev.h is stubbed out for CONFIG_BLOCK=n. Fix this by stubbing more out for now, but eventually this header will need a massive cleanup. Fixes: 956d510ee78 ("block: add disk/bio-based accounting helpers") Reported-by: Stephen Rothwell Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6f7ff0fa8fcf..8fd900998b4e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1892,12 +1892,12 @@ static inline void blk_wake_io_task(struct task_struct *waiter) wake_up_process(waiter); } +#ifdef CONFIG_BLOCK unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op); void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); -#ifdef CONFIG_BLOCK /** * bio_start_io_acct - start I/O accounting for bio based drivers * @bio: bio to start account for -- cgit v1.2.3 From 5a473e8311b582a40c10409a0f4bb39f42aa8123 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 4 Jun 2020 11:23:39 -0600 Subject: block: provide plug based way of signaling forced no-wait semantics Provide a way for the caller to specify that IO should be marked with REQ_NOWAIT to avoid blocking on allocation. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8fd900998b4e..6e067dca94cf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1189,6 +1189,7 @@ struct blk_plug { struct list_head cb_list; /* md requires an unplug callback */ unsigned short rq_count; bool multiple_queues; + bool nowait; }; #define BLK_MAX_REQUEST_COUNT 16 #define BLK_PLUG_FLUSH_SIZE (128 * 1024) -- cgit v1.2.3 From c3077b5d97a39223a2d4b95a21ccff660836170f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jun 2020 08:44:41 +0200 Subject: blk-mq: merge blk-softirq.c into blk-mq.c __blk_complete_request is only called from the blk-mq code, and duplicates a lot of code from blk-mq.c. Move it there to prepare for better code sharing and simplifications. Signed-off-by: Christoph Hellwig Reviewed-by: Daniel Wagner Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8fd900998b4e..98712cfc7a34 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1078,7 +1078,6 @@ void blk_steal_bios(struct bio_list *list, struct request *rq); extern bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); /* -- cgit v1.2.3 From e8c7d14ac6c37c173ec606907d38802b00302988 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 19 Jun 2020 20:47:25 +0000 Subject: block: revert back to synchronous request_queue removal Commit dc9edc44de6c ("block: Fix a blk_exit_rl() regression") merged on v4.12 moved the work behind blk_release_queue() into a workqueue after a splat floated around which indicated some work on blk_release_queue() could sleep in blk_exit_rl(). This splat would be possible when a driver called blk_put_queue() or blk_cleanup_queue() (which calls blk_put_queue() as its final call) from an atomic context. blk_put_queue() decrements the refcount for the request_queue kobject, and upon reaching 0 blk_release_queue() is called. Although blk_exit_rl() is now removed through commit db6d99523560 ("block: remove request_list code") on v5.0, we reserve the right to be able to sleep within blk_release_queue() context. The last reference for the request_queue must not be called from atomic context. *When* the last reference to the request_queue reaches 0 varies, and so let's take the opportunity to document when that is expected to happen and also document the context of the related calls as best as possible so we can avoid future issues, and with the hopes that the synchronous request_queue removal sticks. We revert back to synchronous request_queue removal because asynchronous removal creates a regression with expected userspace interaction with several drivers. An example is when removing the loopback driver, one uses ioctls from userspace to do so, but upon return and if successful, one expects the device to be removed. Likewise if one races to add another device the new one may not be added as it is still being removed. This was expected behavior before and it now fails as the device is still present and busy still. Moving to asynchronous request_queue removal could have broken many scripts which relied on the removal to have been completed if there was no error. Document this expectation as well so that this doesn't regress userspace again. Using asynchronous request_queue removal however has helped us find other bugs. In the future we can test what could break with this arrangement by enabling CONFIG_DEBUG_KOBJECT_RELEASE. While at it, update the docs with the context expectations for the request_queue / gendisk refcount decrement, and make these expectations explicit by using might_sleep(). Fixes: dc9edc44de6c ("block: Fix a blk_exit_rl() regression") Suggested-by: Nicolai Stange Signed-off-by: Luis Chamberlain Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Cc: Bart Van Assche Cc: Omar Sandoval Cc: Hannes Reinecke Cc: Nicolai Stange Cc: Greg Kroah-Hartman Cc: Michal Hocko Cc: yu kuai Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 98712cfc7a34..e214e0e9f868 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -584,8 +584,6 @@ struct request_queue { size_t cmd_size; - struct work_struct release_work; - #define BLK_MAX_WRITE_HINTS 5 u64 write_hints[BLK_MAX_WRITE_HINTS]; }; -- cgit v1.2.3 From 85e0cbbb8a79537dbc465e9deb449a08b2b092a6 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 19 Jun 2020 20:47:30 +0000 Subject: block: create the request_queue debugfs_dir on registration We were only creating the request_queue debugfs_dir only for make_request block drivers (multiqueue), but never for request-based block drivers. We did this as we were only creating non-blktrace additional debugfs files on that directory for make_request drivers. However, since blktrace *always* creates that directory anyway, we special-case the use of that directory on blktrace. Other than this being an eye-sore, this exposes request-based block drivers to the same debugfs fragile race that used to exist with make_request block drivers where if we start adding files onto that directory we can later run a race with a double removal of dentries on the directory if we don't deal with this carefully on blktrace. Instead, just simplify things by always creating the request_queue debugfs_dir on request_queue registration. Rename the mutex also to reflect the fact that this is used outside of the blktrace context. Signed-off-by: Luis Chamberlain Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e214e0e9f868..c0701237116d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -528,9 +528,9 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; + struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; - struct mutex blk_trace_mutex; #endif /* * for flush operations @@ -574,8 +574,9 @@ struct request_queue { struct list_head tag_set_list; struct bio_set bio_split; -#ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; + +#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; #endif -- cgit v1.2.3 From 3f1266f1f82d7b8c72472a8921e80aa3e611fb62 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:41 +0200 Subject: block: move block-related definitions out of fs.h Move most of the block related definition out of fs.h into more suitable headers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c0701237116d..cf8f692f62a9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1918,4 +1918,50 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) } #endif /* CONFIG_BLOCK */ +int bdev_read_only(struct block_device *bdev); +int set_blocksize(struct block_device *bdev, int size); + +const char *bdevname(struct block_device *bdev, char *buffer); +struct block_device *lookup_bdev(const char *); + +void blkdev_show(struct seq_file *seqf, off_t offset); + +#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ +#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ +#ifdef CONFIG_BLOCK +#define BLKDEV_MAJOR_MAX 512 +#else +#define BLKDEV_MAJOR_MAX 0 +#endif + +int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); +struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, + void *holder); +struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); +struct block_device *bd_start_claiming(struct block_device *bdev, void *holder); +void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, + void *holder); +void blkdev_put(struct block_device *bdev, fmode_t mode); + +struct block_device *bdget(dev_t); +struct block_device *bdgrab(struct block_device *bdev); +void bdput(struct block_device *); + +#ifdef CONFIG_BLOCK +void invalidate_bdev(struct block_device *bdev); +int sync_blockdev(struct block_device *bdev); +#else +static inline void invalidate_bdev(struct block_device *bdev) +{ +} +static inline int sync_blockdev(struct block_device *bdev) +{ + return 0; +} #endif +int fsync_bdev(struct block_device *bdev); + +struct super_block *freeze_bdev(struct block_device *bdev); +int thaw_bdev(struct block_device *bdev, struct super_block *sb); + +#endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From d2de7ea48d83195ef1310555f1fdd9e8e1bab0d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:42 +0200 Subject: fs: move the buffer_heads_over_limit stub to buffer_head.h Move the !CONFIG_BLOCK stub to the same place as the non-stub declaration. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cf8f692f62a9..c824c6fee35d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1838,7 +1838,6 @@ struct block_device; /* * stubs for when the block layer is configured out */ -#define buffer_heads_over_limit 0 static inline long nr_blockdev_pages(void) { -- cgit v1.2.3 From 1a4dcfa8bc10d6bf4f94ac20adc2b30a1da72cfd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:43 +0200 Subject: block: reduce ifdef CONFIG_BLOCK madness in headers Large part of bio.h, blkdev.h and genhd.h are under ifdef CONFIG_BLOCK for no good reason. Only stub out function that are called from code that is not dependent on CONFIG_BLOCK and leave the harmless other declarations around. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 92 ++++++++++++++++++++++---------------------------- 1 file changed, 40 insertions(+), 52 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c824c6fee35d..f788bddc9219 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -4,9 +4,6 @@ #include #include - -#ifdef CONFIG_BLOCK - #include #include #include @@ -1163,13 +1160,13 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, return __blk_rq_map_sg(q, rq, sglist, &last_sg); } extern void blk_dump_rq_flags(struct request *, char *); -extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); +#ifdef CONFIG_BLOCK /* * blk_plug permits building a queue of related requests by holding the I/O * fragments for a short period. This allows merging of sequential requests @@ -1229,9 +1226,47 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) !list_empty(&plug->cb_list)); } +int blkdev_issue_flush(struct block_device *, gfp_t); +long nr_blockdev_pages(void); +#else /* CONFIG_BLOCK */ +struct blk_plug { +}; + +static inline void blk_start_plug(struct blk_plug *plug) +{ +} + +static inline void blk_finish_plug(struct blk_plug *plug) +{ +} + +static inline void blk_flush_plug(struct task_struct *task) +{ +} + +static inline void blk_schedule_flush_plug(struct task_struct *task) +{ +} + + +static inline bool blk_needs_flush_plug(struct task_struct *tsk) +{ + return false; +} + +static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask) +{ + return 0; +} + +static inline long nr_blockdev_pages(void) +{ + return 0; +} +#endif /* CONFIG_BLOCK */ + extern void blk_io_schedule(void); -int blkdev_issue_flush(struct block_device *, gfp_t); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); @@ -1831,51 +1866,6 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq) } #endif /* CONFIG_BLK_DEV_ZONED */ -#else /* CONFIG_BLOCK */ - -struct block_device; - -/* - * stubs for when the block layer is configured out - */ - -static inline long nr_blockdev_pages(void) -{ - return 0; -} - -struct blk_plug { -}; - -static inline void blk_start_plug(struct blk_plug *plug) -{ -} - -static inline void blk_finish_plug(struct blk_plug *plug) -{ -} - -static inline void blk_flush_plug(struct task_struct *task) -{ -} - -static inline void blk_schedule_flush_plug(struct task_struct *task) -{ -} - - -static inline bool blk_needs_flush_plug(struct task_struct *tsk) -{ - return false; -} - -static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask) -{ - return 0; -} - -#endif /* CONFIG_BLOCK */ - static inline void blk_wake_io_task(struct task_struct *waiter) { /* @@ -1889,7 +1879,6 @@ static inline void blk_wake_io_task(struct task_struct *waiter) wake_up_process(waiter); } -#ifdef CONFIG_BLOCK unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op); void disk_end_io_acct(struct gendisk *disk, unsigned int op, @@ -1915,7 +1904,6 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) { return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time); } -#endif /* CONFIG_BLOCK */ int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); -- cgit v1.2.3 From 621c1f42945e76015c3a585e7a9fe6e71665eba0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:44 +0200 Subject: block: move struct block_device to blk_types.h Move the struct block_device definition together with most of the block layer definitions, as it has nothing to do with the rest of fs.h. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f788bddc9219..15497782c176 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1930,6 +1930,7 @@ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); +struct block_device *I_BDEV(struct inode *inode); struct block_device *bdget(dev_t); struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); -- cgit v1.2.3 From bfe373f608cf81b7626dfeb904001b0e867c5110 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 28 Apr 2020 09:54:56 +0800 Subject: blk-mq-debugfs: update blk_queue_flag_name[] accordingly for new flags Else there may be magic numbers in /sys/kernel/debug/block/*/state. Signed-off-by: Hou Tao Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8fd900998b4e..57241417ff2f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -590,6 +590,7 @@ struct request_queue { u64 write_hints[BLK_MAX_WRITE_HINTS]; }; +/* Keep blk_queue_flag_name[] in sync with the definitions below */ #define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ #define QUEUE_FLAG_DYING 1 /* queue being torn down */ #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ -- cgit v1.2.3 From f695ca3886ce72b027af7aa6040cd420cae2088c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Jul 2020 10:59:39 +0200 Subject: block: remove the request_queue argument from blk_queue_split The queue can be trivially derived from the bio, so pass one less argument. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 15497782c176..d002defc1789 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -871,7 +871,7 @@ extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio **bio); -extern void blk_queue_split(struct request_queue *, struct bio **); +extern void blk_queue_split(struct bio **); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, unsigned int, void __user *); -- cgit v1.2.3 From c62b37d96b6eb3ec5ae4cbe00db107bf15aebc93 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Jul 2020 10:59:43 +0200 Subject: block: move ->make_request_fn to struct block_device_operations The make_request_fn is a little weird in that it sits directly in struct request_queue instead of an operation vector. Replace it with a block_device_operations method called submit_bio (which describes much better what it does). Also remove the request_queue argument to it, as the queue can be derived pretty trivially from the bio. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d002defc1789..083ffc5bc51b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -286,8 +286,6 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; -typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); - struct bio_vec; enum blk_eh_timer_return { @@ -398,8 +396,6 @@ struct request_queue { struct blk_queue_stats *stats; struct rq_qos *rq_qos; - make_request_fn *make_request_fn; - const struct blk_mq_ops *mq_ops; /* sw queues */ @@ -1162,7 +1158,7 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, extern void blk_dump_rq_flags(struct request *, char *); bool __must_check blk_get_queue(struct request_queue *); -struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id); +struct request_queue *blk_alloc_queue(int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); @@ -1778,6 +1774,7 @@ static inline void blk_ksm_unregister(struct request_queue *q) { } struct block_device_operations { + blk_qc_t (*submit_bio) (struct bio *bio); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); -- cgit v1.2.3 From ed00aabd5eb9fb44d6aff1173234a2e911b9fead Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Jul 2020 10:59:44 +0200 Subject: block: rename generic_make_request to submit_bio_noacct generic_make_request has always been very confusingly misnamed, so rename it to submit_bio_noacct to make it clear that it is submit_bio minus accounting and a few checks. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 083ffc5bc51b..b73cfa6a5141 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -852,7 +852,7 @@ static inline void rq_flush_dcache_pages(struct request *rq) extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); -extern blk_qc_t generic_make_request(struct bio *bio); +blk_qc_t submit_bio_noacct(struct bio *bio); extern blk_qc_t direct_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); -- cgit v1.2.3 From 5a6c35f9af416114588298aa7a90b15bbed15a41 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Jul 2020 10:59:47 +0200 Subject: block: remove direct_make_request Now that submit_bio_noacct has a decent blk-mq fast path there is no more need for this bypass. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b73cfa6a5141..1cc913ffdbe2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -853,7 +853,6 @@ static inline void rq_flush_dcache_pages(struct request *rq) extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); blk_qc_t submit_bio_noacct(struct bio *bio); -extern blk_qc_t direct_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, -- cgit v1.2.3 From 6b7b181b67aa8177e57732723106a0411570a86d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Jun 2020 10:01:55 +0200 Subject: block: remove the bd_block_size field from struct block_device We can trivially calculate the block size from the inodes i_blkbits variable. Use that instead of keeping two redundant copies of the information in slightly different formats. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1cc913ffdbe2..408eb66a82fd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1543,7 +1543,7 @@ static inline unsigned int blksize_bits(unsigned int size) static inline unsigned int block_size(struct block_device *bdev) { - return bdev->bd_block_size; + return 1 << bdev->bd_inode->i_blkbits; } int kblockd_schedule_work(struct work_struct *work); -- cgit v1.2.3 From a564e23f0f99759f453dbefcb9160dec6d99df96 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Jul 2020 14:25:41 +0200 Subject: md: switch to ->check_events for media change notifications md is the last driver using the legacy media_changed method. Switch it over to (not so) new ->clear_events approach, which also removes the need for the ->revalidate_disk method. Signed-off-by: Christoph Hellwig [axboe: remove unused 'bdops' variable in disk_clear_events()] Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 408eb66a82fd..71173a1ffa8b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1781,8 +1781,6 @@ struct block_device_operations { int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); - /* ->media_changed() is DEPRECATED, use ->check_events() instead */ - int (*media_changed) (struct gendisk *); void (*unlock_native_capacity) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); -- cgit v1.2.3 From e15864f8ea05b24071b07300459ae7e511d0b938 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 14 Jul 2020 23:18:23 +0200 Subject: block: add max_open_zones to blk-sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new max_open_zones definition in the sysfs documentation. This definition will be common for all devices utilizing the zoned block device support in the kernel. Export max open zones according to this new definition for NVMe Zoned Namespace devices, ZAC ATA devices (which are treated as SCSI devices by the kernel), and ZBC SCSI devices. Add the new max_open_zones member to struct request_queue, rather than as a queue limit, since this property cannot be split across stacking drivers. Signed-off-by: Niklas Cassel Reviewed-by: Javier González Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index de7adc59b993..c8beb8bbdb08 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -513,6 +513,7 @@ struct request_queue { unsigned int nr_zones; unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; + unsigned int max_open_zones; #endif /* CONFIG_BLK_DEV_ZONED */ /* @@ -722,6 +723,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, return true; return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); } + +static inline void blk_queue_max_open_zones(struct request_queue *q, + unsigned int max_open_zones) +{ + q->max_open_zones = max_open_zones; +} + +static inline unsigned int queue_max_open_zones(const struct request_queue *q) +{ + return q->max_open_zones; +} #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -737,6 +749,10 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, { return 0; } +static inline unsigned int queue_max_open_zones(const struct request_queue *q) +{ + return 0; +} #endif /* CONFIG_BLK_DEV_ZONED */ static inline bool rq_is_sync(struct request *rq) @@ -1519,6 +1535,15 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) return 0; } +static inline unsigned int bdev_max_open_zones(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return queue_max_open_zones(q); + return 0; +} + static inline int queue_dma_alignment(const struct request_queue *q) { return q ? q->dma_alignment : 511; -- cgit v1.2.3 From 659bf827ba8f1183b714341d8a1d4b1e446178d9 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 14 Jul 2020 23:18:24 +0200 Subject: block: add max_active_zones to blk-sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new max_active zones definition in the sysfs documentation. This definition will be common for all devices utilizing the zoned block device support in the kernel. Export max_active_zones according to this new definition for NVMe Zoned Namespace devices, ZAC ATA devices (which are treated as SCSI devices by the kernel), and ZBC SCSI devices. Add the new max_active_zones member to struct request_queue, rather than as a queue limit, since this property cannot be split across stacking drivers. For SCSI devices, even though max active zones is not part of the ZBC/ZAC spec, export max_active_zones as 0, signifying "no limit". Signed-off-by: Niklas Cassel Reviewed-by: Javier González Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c8beb8bbdb08..285b59cfc064 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -514,6 +514,7 @@ struct request_queue { unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; unsigned int max_open_zones; + unsigned int max_active_zones; #endif /* CONFIG_BLK_DEV_ZONED */ /* @@ -734,6 +735,17 @@ static inline unsigned int queue_max_open_zones(const struct request_queue *q) { return q->max_open_zones; } + +static inline void blk_queue_max_active_zones(struct request_queue *q, + unsigned int max_active_zones) +{ + q->max_active_zones = max_active_zones; +} + +static inline unsigned int queue_max_active_zones(const struct request_queue *q) +{ + return q->max_active_zones; +} #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -753,6 +765,10 @@ static inline unsigned int queue_max_open_zones(const struct request_queue *q) { return 0; } +static inline unsigned int queue_max_active_zones(const struct request_queue *q) +{ + return 0; +} #endif /* CONFIG_BLK_DEV_ZONED */ static inline bool rq_is_sync(struct request *rq) @@ -1544,6 +1560,15 @@ static inline unsigned int bdev_max_open_zones(struct block_device *bdev) return 0; } +static inline unsigned int bdev_max_active_zones(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return queue_max_active_zones(q); + return 0; +} + static inline int queue_dma_alignment(const struct request_queue *q) { return q ? q->dma_alignment : 511; -- cgit v1.2.3 From ecbe6bc0003bfd5bf8581cb679cae0eb944432cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Jul 2020 16:33:09 +0200 Subject: block: use bd_prepare_to_claim directly in the loop driver The arcane magic in bd_start_claiming is only needed to be able to claim a block_device that hasn't been fully set up. Switch the loop driver that claims from the ioctl path with a fully set up struct block_device to just use the much simpler bd_prepare_to_claim directly. Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 71173a1ffa8b..06995b96e946 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1919,7 +1919,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); -struct block_device *bd_start_claiming(struct block_device *bdev, void *holder); +int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, + void *holder); void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); -- cgit v1.2.3 From 3093a479727be194996dbc40f803711af5877be4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Jul 2020 08:12:49 +0200 Subject: block: inherit the zoned characteristics in blk_stack_limits Lift the code from device mapper into blk_stack_limits to inherity the stacking limitations. This ensures we do the right thing for all stacked zoned block devices. Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 63078944909c..9e331a1eb35f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -306,11 +306,14 @@ enum blk_queue_state { /* * Zoned block device models (zoned limit). + * + * Note: This needs to be ordered from the least to the most severe + * restrictions for the inheritance in blk_stack_limits() to work. */ enum blk_zoned_model { - BLK_ZONED_NONE, /* Regular block device */ - BLK_ZONED_HA, /* Host-aware zoned block device */ - BLK_ZONED_HM, /* Host-managed zoned block device */ + BLK_ZONED_NONE = 0, /* Regular block device */ + BLK_ZONED_HA, /* Host-aware zoned block device */ + BLK_ZONED_HM, /* Host-managed zoned block device */ }; struct queue_limits { -- cgit v1.2.3 From 9efa82ef2b15d1757dd6cc518988a4506554e893 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Jul 2020 08:12:50 +0200 Subject: block: remove bdev_stack_limits This function is just a tiny wrapper around blk_stack_limit and has two callers. Simplify the stack a bit by open coding it in the two callers. Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9e331a1eb35f..54b963109e64 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1139,8 +1139,6 @@ extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); -extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, - sector_t offset); extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); -- cgit v1.2.3 From b9b1a5d71533f2ccd54b810dffdcf0789b30ba9b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Jul 2020 08:12:51 +0200 Subject: block: remove blk_queue_stack_limits This function is just a tiny wrapper around blk_stack_limits. Open code it int the two callers. Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 54b963109e64..bbdd3cf62038 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1141,7 +1141,6 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); -extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); -- cgit v1.2.3 From 27ba3e8ff3ab86449e63d38a8d623053591e65fa Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 15 Sep 2020 16:33:46 +0900 Subject: scsi: sd: sd_zbc: Fix handling of host-aware ZBC disks When CONFIG_BLK_DEV_ZONED is disabled, allow using host-aware ZBC disks as regular disks. In this case, ensure that command completion is correctly executed by changing sd_zbc_complete() to return good_bytes instead of 0 and causing a hang during device probe (endless retries). When CONFIG_BLK_DEV_ZONED is enabled and a host-aware disk is detected to have partitions, it will be used as a regular disk. In this case, make sure to not do anything in sd_zbc_revalidate_zones() as that triggers warnings. Since all these different cases result in subtle settings of the disk queue zoned model, introduce the block layer helper function blk_queue_set_zoned() to generically implement setting up the effective zoned model according to the disk type, the presence of partitions on the disk and CONFIG_BLK_DEV_ZONED configuration. Link: https://lore.kernel.org/r/20200915073347.832424-2-damien.lemoal@wdc.com Fixes: b72053072c0b ("block: allow partitions on host aware zone devices") Cc: Reported-by: Borislav Petkov Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bb5636cc17b9..868e11face00 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -352,6 +352,8 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); +void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model); + #ifdef CONFIG_BLK_DEV_ZONED #define BLK_ALL_ZONES ((unsigned int)-1) -- cgit v1.2.3