From 1652b0bafeaa8281ca9a805d81e13d7647bd2f44 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:08 +0200 Subject: block: remove unused queue limits API Remove all APIs that are unused now that sd and sr have been converted to the atomic queue limits API. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: John Garry Reviewed-by: Nitesh Shetty Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-14-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 24c36929920b..ad995e7a7698 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -332,8 +332,6 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); -void disk_set_zoned(struct gendisk *disk); - #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); @@ -638,18 +636,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline void disk_set_max_open_zones(struct gendisk *disk, - unsigned int max_open_zones) -{ - disk->queue->limits.max_open_zones = max_open_zones; -} - -static inline void disk_set_max_active_zones(struct gendisk *disk, - unsigned int max_active_zones) -{ - disk->queue->limits.max_active_zones = max_active_zones; -} - static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return bdev->bd_disk->queue->limits.max_open_zones; @@ -929,24 +915,14 @@ static inline void queue_limits_cancel_update(struct request_queue *q) /* * Access functions for manipulating queue properties */ -extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); void blk_queue_max_secure_erase_sectors(struct request_queue *q, unsigned int max_sectors); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); -extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); -extern void blk_queue_max_zone_append_sectors(struct request_queue *q, - unsigned int max_zone_append_sectors); -extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); -void blk_queue_zone_write_granularity(struct request_queue *q, - unsigned int size); -extern void blk_queue_alignment_offset(struct request_queue *q, - unsigned int alignment); void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); -extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_stacking_limits(struct queue_limits *lim); -- cgit v1.2.3 From 73e3715ed14844067c5c598e72777641004a7f60 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:09 +0200 Subject: block: add special APIs for run-time disabling of discard and friends A few drivers optimistically try to support discard, write zeroes and secure erase and disable the features from the I/O completion handler if the hardware can't support them. This disable can't be done using the atomic queue limits API because the I/O completion handlers can't take sleeping locks or freeze the queue. Keep the existing clearing of the relevant field to zero, but replace the old blk_queue_max_* APIs with new disable APIs that force the value to 0. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Nitesh Shetty Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-15-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ad995e7a7698..ac8e0cb2353a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -912,15 +912,31 @@ static inline void queue_limits_cancel_update(struct request_queue *q) mutex_unlock(&q->limits_lock); } +/* + * These helpers are for drivers that have sloppy feature negotiation and might + * have to disable DISCARD, WRITE_ZEROES or SECURE_DISCARD from the I/O + * completion handler when the device returned an indicator that the respective + * feature is not actually supported. They are racy and the driver needs to + * cope with that. Try to avoid this scheme if you can. + */ +static inline void blk_queue_disable_discard(struct request_queue *q) +{ + q->limits.max_discard_sectors = 0; +} + +static inline void blk_queue_disable_secure_erase(struct request_queue *q) +{ + q->limits.max_secure_erase_sectors = 0; +} + +static inline void blk_queue_disable_write_zeroes(struct request_queue *q) +{ + q->limits.max_write_zeroes_sectors = 0; +} + /* * Access functions for manipulating queue properties */ -void blk_queue_max_secure_erase_sectors(struct request_queue *q, - unsigned int max_sectors); -extern void blk_queue_max_discard_sectors(struct request_queue *q, - unsigned int max_discard_sectors); -extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, - unsigned int max_write_same_sectors); void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); -- cgit v1.2.3 From e9f5f44ad3725335d9c559c3c22cd3726152a7b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:15 +0200 Subject: block: remove the blk_integrity_profile structure Block layer integrity configuration is a bit complex right now, as it indirects through operation vectors for a simple two-dimensional configuration: a) the checksum type of none, ip checksum, crc, crc64 b) the presence or absence of a reference tag Remove the integrity profile, and instead add a separate csum_type flag which replaces the existing ip-checksum field and a new flag that indicates the presence of the reference tag. This removes up to two layers of indirect calls, remove the need to offload the no-op verification of non-PI metadata to a workqueue and generally simplifies the code. The downside is that block/t10-pi.c now has to be built into the kernel when CONFIG_BLK_DEV_INTEGRITY is supported. Given that both nvme and SCSI require t10-pi.ko, it is loaded for all usual configurations that enabled CONFIG_BLK_DEV_INTEGRITY already, though. Signed-off-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240613084839.1044015-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ac8e0cb2353a..bdd33388e1ce 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -105,9 +105,16 @@ enum { struct disk_events; struct badblocks; +enum blk_integrity_checksum { + BLK_INTEGRITY_CSUM_NONE = 0, + BLK_INTEGRITY_CSUM_IP = 1, + BLK_INTEGRITY_CSUM_CRC = 2, + BLK_INTEGRITY_CSUM_CRC64 = 3, +} __packed ; + struct blk_integrity { - const struct blk_integrity_profile *profile; unsigned char flags; + enum blk_integrity_checksum csum_type; unsigned char tuple_size; unsigned char pi_offset; unsigned char interval_exp; -- cgit v1.2.3 From 3c3e85ddffae93eba1a257eb6939bf5dc1e93b9e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:20 +0200 Subject: block: bypass the STABLE_WRITES flag for protection information Currently registering a checksum-enabled (aka PI) integrity profile sets the QUEUE_FLAG_STABLE_WRITE flag, and unregistering it clears the flag. This can incorrectly clear the flag when the driver requires stable writes even without PI, e.g. in case of iSCSI or NVMe/TCP with data digest enabled. Fix this by looking at the csum_type directly in bdev_stable_writes and not setting the queue flag. Also remove the blk_queue_stable_writes helper as the only user in nvme wants to only look at the actual QUEUE_FLAG_STABLE_WRITE flag as it inherits the integrity configuration by other means. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240613084839.1044015-11-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bdd33388e1ce..f9089750919c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -571,8 +571,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) -#define blk_queue_stable_writes(q) \ - test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ @@ -1300,8 +1298,14 @@ static inline bool bdev_synchronous(struct block_device *bdev) static inline bool bdev_stable_writes(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_STABLE_WRITES, - &bdev_get_queue(bdev)->queue_flags); + struct request_queue *q = bdev_get_queue(bdev); + +#ifdef CONFIG_BLK_DEV_INTEGRITY + /* BLK_INTEGRITY_CSUM_NONE is not available in blkdev.h */ + if (q->integrity.csum_type != 0) + return true; +#endif + return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); } static inline bool bdev_write_cache(struct block_device *bdev) -- cgit v1.2.3 From c6e56cf6b2e79a463af21286ba951714ed20828c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:22 +0200 Subject: block: move integrity information into queue_limits Move the integrity information into the queue limits so that it can be set atomically with other queue limits, and that the sysfs changes to the read_verify and write_generate flags are properly synchronized. This also allows to provide a more useful helper to stack the integrity fields, although it still is separate from the main stacking function as not all stackable devices want to inherit the integrity settings. Even with that it greatly simplifies the code in md and dm. Note that the integrity field is moved as-is into the queue limits. While there are good arguments for removing the separate blk_integrity structure, this would cause a lot of churn and might better be done at a later time if desired. However the integrity field in the queue_limits structure is now unconditional so that various ifdefs can be avoided or replaced with IS_ENABLED(). Given that tiny size of it that seems like a worthwhile trade off. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240613084839.1044015-13-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f9089750919c..0c247a716885 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -334,6 +334,8 @@ struct queue_limits { * due to possible offsets. */ unsigned int dma_alignment; + + struct blk_integrity integrity; }; typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, @@ -419,10 +421,6 @@ struct request_queue { struct queue_limits limits; -#ifdef CONFIG_BLK_DEV_INTEGRITY - struct blk_integrity integrity; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - #ifdef CONFIG_PM struct device *dev; enum rpm_status rpm_status; @@ -1300,11 +1298,9 @@ static inline bool bdev_stable_writes(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); -#ifdef CONFIG_BLK_DEV_INTEGRITY - /* BLK_INTEGRITY_CSUM_NONE is not available in blkdev.h */ - if (q->integrity.csum_type != 0) + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE) return true; -#endif return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); } -- cgit v1.2.3 From c3042a5403ef2be622023fcc3b11fc1aa08ba7fa Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 14 Jun 2024 09:03:44 +0000 Subject: block: Drop locking annotation for limits_lock Currently compiling block/blk-settings.c with C=1 gives the following warning: block/blk-settings.c:262:9: warning: context imbalance in 'queue_limits_commit_update' - wrong count at exit request_queue.limits_lock is a mutex. Sparse locking annotation for mutexes are currently not supported - see [0] - so drop that locking annotation. [0] https://lore.kernel.org/lkml/cover.1579893447.git.jbi.octave@gmail.com/T/#mbb8bda6c0a7ca7ce19f46df976a8e3b489745488 Fixes: d690cb8ae14bd ("block: add an API to atomically update queue limits") Signed-off-by: John Garry Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240614090345.655716-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0c247a716885..83af6ac5aa48 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -893,7 +893,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, */ static inline struct queue_limits queue_limits_start_update(struct request_queue *q) - __acquires(q->limits_lock) { mutex_lock(&q->limits_lock); return q->limits; -- cgit v1.2.3 From 1122c0c1cc71f740fa4d5f14f239194e06a1d5e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:40 +0200 Subject: block: move cache control settings out of queue->flags Move the cache control settings into the queue_limits so that the flags can be set atomically with the device queue frozen. Add new features and flags field for the driver set flags, and internal (usually sysfs-controlled) flags in the block layer. Note that we'll eventually remove enough field from queue_limits to bring it back to the previous size. The disable flag is inverted compared to the previous meaning, which means it now survives a rescan, similar to the max_sectors and max_discard_sectors user limits. The FLUSH and FUA flags are now inherited by blk_stack_limits, which simplified the code in dm a lot, but also causes a slight behavior change in that dm-switch and dm-unstripe now advertise a write cache despite setting num_flush_bios to 0. The I/O path will handle this gracefully, but as far as I can tell the lack of num_flush_bios and thus flush support is a pre-existing data integrity bug in those targets that really needs fixing, after which a non-zero num_flush_bios should be required in dm for targets that map to underlying devices. Signed-off-by: Christoph Hellwig Acked-by: Ulf Hansson Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-14-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0c247a716885..acdfe5122faa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -282,6 +282,28 @@ static inline bool blk_op_is_passthrough(blk_opf_t op) return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; } +/* flags set by the driver in queue_limits.features */ +enum { + /* supports a volatile write cache */ + BLK_FEAT_WRITE_CACHE = (1u << 0), + + /* supports passing on the FUA bit */ + BLK_FEAT_FUA = (1u << 1), +}; + +/* + * Flags automatically inherited when stacking limits. + */ +#define BLK_FEAT_INHERIT_MASK \ + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA) + + +/* internal flags in queue_limits.flags */ +enum { + /* do not send FLUSH or FUA command despite advertised write cache */ + BLK_FLAGS_WRITE_CACHE_DISABLED = (1u << 31), +}; + /* * BLK_BOUNCE_NONE: never bounce (default) * BLK_BOUNCE_HIGH: bounce all highmem pages @@ -292,6 +314,8 @@ enum blk_bounce { }; struct queue_limits { + unsigned int features; + unsigned int flags; enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; @@ -536,12 +560,9 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ -#define QUEUE_FLAG_WC 17 /* Write back caching */ -#define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ @@ -951,7 +972,6 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, sector_t offset, const char *pfx); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); -extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); struct blk_independent_access_ranges * disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); @@ -1304,14 +1324,20 @@ static inline bool bdev_stable_writes(struct block_device *bdev) return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); } +static inline bool blk_queue_write_cache(struct request_queue *q) +{ + return (q->limits.features & BLK_FEAT_WRITE_CACHE) && + !(q->limits.flags & BLK_FLAGS_WRITE_CACHE_DISABLED); +} + static inline bool bdev_write_cache(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags); + return blk_queue_write_cache(bdev_get_queue(bdev)); } static inline bool bdev_fua(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags); + return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA; } static inline bool bdev_nowait(struct block_device *bdev) -- cgit v1.2.3 From bd4a633b6f7c3c6b6ebc1a07317643270e751a94 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:41 +0200 Subject: block: move the nonrot flag to queue_limits Move the nonrot flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Use the chance to switch to defaulting to non-rotational and require the driver to opt into rotational, which matches the polarity of the sysfs interface. For the z2ram, ps3vram, 2x memstick, ubiblock and dcssblk the new rotational flag is not set as they clearly are not rotational despite this being a behavior change. There are some other drivers that unconditionally set the rotational flag to keep the existing behavior as they arguably can be used on rotational devices even if that is probably not their main use today (e.g. virtio_blk and drbd). The flag is automatically inherited in blk_stack_limits matching the existing behavior in dm and md. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-15-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index acdfe5122faa..988e3248cffe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -289,14 +289,16 @@ enum { /* supports passing on the FUA bit */ BLK_FEAT_FUA = (1u << 1), + + /* rotational device (hard drive or floppy) */ + BLK_FEAT_ROTATIONAL = (1u << 2), }; /* * Flags automatically inherited when stacking limits. */ #define BLK_FEAT_INHERIT_MASK \ - (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA) - + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL) /* internal flags in queue_limits.flags */ enum { @@ -553,8 +555,6 @@ struct request_queue { #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ -#define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */ -#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ @@ -589,7 +589,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) -#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) +#define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ -- cgit v1.2.3 From 39a9f1c334f9f27b3b3e6d0005c10ed667268346 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:42 +0200 Subject: block: move the add_random flag to queue_limits Move the add_random flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Note that this also removes code from dm to clear the flag based on the underlying devices, which can't be reached as dm devices will always start out without the flag set. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-16-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 988e3248cffe..cf1bbf566b2b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -292,6 +292,9 @@ enum { /* rotational device (hard drive or floppy) */ BLK_FEAT_ROTATIONAL = (1u << 2), + + /* contributes to the random number pool */ + BLK_FEAT_ADD_RANDOM = (1u << 3), }; /* @@ -557,7 +560,6 @@ struct request_queue { #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ -#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ @@ -591,7 +593,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) -#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) -- cgit v1.2.3 From cdb2497918cc2929691408bac87b58433b45b6d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:43 +0200 Subject: block: move the io_stat flag setting to queue_limits Move the io_stat flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Simplify md and dm to set the flag unconditionally instead of avoiding setting a simple flag for cases where it already is set by other means, which is a bit pointless. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-17-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cf1bbf566b2b..5fafb2f95fd1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -295,6 +295,9 @@ enum { /* contributes to the random number pool */ BLK_FEAT_ADD_RANDOM = (1u << 3), + + /* do disk/partitions IO accounting */ + BLK_FEAT_IO_STAT = (1u << 4), }; /* @@ -558,7 +561,6 @@ struct request_queue { #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ -#define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ @@ -577,8 +579,7 @@ struct request_queue { #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ #define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ -#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \ - (1UL << QUEUE_FLAG_SAME_COMP) | \ +#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_SAME_COMP) | \ (1UL << QUEUE_FLAG_NOWAIT)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); @@ -592,7 +593,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) -#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) +#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) -- cgit v1.2.3 From 1a02f3a73f8c670eddeb44bf52a75ae7f67cfc11 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:44 +0200 Subject: block: move the stable_writes flag to queue_limits Move the stable_writes flag into the queue_limits feature field so that it can be set atomically with the queue frozen. The flag is now inherited by blk_stack_limits, which greatly simplifies the code in dm, and fixed md which previously did not pass on the flag set on lower devices. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-18-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5fafb2f95fd1..8936eb6ba609 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -298,13 +298,17 @@ enum { /* do disk/partitions IO accounting */ BLK_FEAT_IO_STAT = (1u << 4), + + /* don't modify data until writeback is done */ + BLK_FEAT_STABLE_WRITES = (1u << 5), }; /* * Flags automatically inherited when stacking limits. */ #define BLK_FEAT_INHERIT_MASK \ - (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL) + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ + BLK_FEAT_STABLE_WRITES) /* internal flags in queue_limits.flags */ enum { @@ -565,7 +569,6 @@ struct request_queue { #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ -#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ @@ -1323,7 +1326,7 @@ static inline bool bdev_stable_writes(struct block_device *bdev) if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE) return true; - return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); + return q->limits.features & BLK_FEAT_STABLE_WRITES; } static inline bool blk_queue_write_cache(struct request_queue *q) -- cgit v1.2.3 From aadd5c59c910427c0464c217d5ed588ff14e2502 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:45 +0200 Subject: block: move the synchronous flag to queue_limits Move the synchronous flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-19-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8936eb6ba609..cee7b44a1425 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -301,6 +301,9 @@ enum { /* don't modify data until writeback is done */ BLK_FEAT_STABLE_WRITES = (1u << 5), + + /* always completes in submit context */ + BLK_FEAT_SYNCHRONOUS = (1u << 6), }; /* @@ -566,7 +569,6 @@ struct request_queue { #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ -#define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ @@ -1315,8 +1317,7 @@ static inline bool bdev_nonrot(struct block_device *bdev) static inline bool bdev_synchronous(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_SYNCHRONOUS, - &bdev_get_queue(bdev)->queue_flags); + return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS; } static inline bool bdev_stable_writes(struct block_device *bdev) -- cgit v1.2.3 From f76af42f8bf13d2620084f305f01691de9238fc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:46 +0200 Subject: block: move the nowait flag to queue_limits Move the nowait flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Stacking drivers are simplified in that they now can simply set the flag, and blk_stack_limits will clear it when the features is not supported by any of the underlying devices. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-20-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cee7b44a1425..f3d4519d609d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -304,6 +304,9 @@ enum { /* always completes in submit context */ BLK_FEAT_SYNCHRONOUS = (1u << 6), + + /* supports REQ_NOWAIT */ + BLK_FEAT_NOWAIT = (1u << 7), }; /* @@ -580,12 +583,10 @@ struct request_queue { #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ -#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ #define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ -#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_SAME_COMP) | \ - (1UL << QUEUE_FLAG_NOWAIT)) +#define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); @@ -1348,7 +1349,7 @@ static inline bool bdev_fua(struct block_device *bdev) static inline bool bdev_nowait(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags); + return bdev->bd_disk->queue->limits.features & BLK_FEAT_NOWAIT; } static inline bool bdev_is_zoned(struct block_device *bdev) -- cgit v1.2.3 From f467fee48da4500786e145489787b37adae317c3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:47 +0200 Subject: block: move the dax flag to queue_limits Move the dax flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-21-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3d4519d609d..7022e06a3dd9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -307,6 +307,9 @@ enum { /* supports REQ_NOWAIT */ BLK_FEAT_NOWAIT = (1u << 7), + + /* supports DAX */ + BLK_FEAT_DAX = (1u << 8), }; /* @@ -575,7 +578,6 @@ struct request_queue { #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ -#define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ @@ -602,7 +604,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) -#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) +#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) #ifdef CONFIG_BLK_RQ_ALLOC_TIME -- cgit v1.2.3 From 8023e144f9d6e35f8786937e2f0c2fea0aba6dbc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:48 +0200 Subject: block: move the poll flag to queue_limits Move the poll flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Stacking drivers are simplified in that they now can simply set the flag, and blk_stack_limits will clear it when the features is not supported by any of the underlying devices. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-22-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7022e06a3dd9..cd27b66cbacc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -310,6 +310,9 @@ enum { /* supports DAX */ BLK_FEAT_DAX = (1u << 8), + + /* supports I/O polling */ + BLK_FEAT_POLL = (1u << 9), }; /* @@ -577,7 +580,6 @@ struct request_queue { #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ -#define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ -- cgit v1.2.3 From b1fc937a55f5735b98d9dceae5bb6ba262501f56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:49 +0200 Subject: block: move the zoned flag into the features field Move the zoned flags into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-23-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cd27b66cbacc..bdc30c1fb1b5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -313,6 +313,9 @@ enum { /* supports I/O polling */ BLK_FEAT_POLL = (1u << 9), + + /* is a zoned device */ + BLK_FEAT_ZONED = (1u << 10), }; /* @@ -320,7 +323,7 @@ enum { */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES) + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED) /* internal flags in queue_limits.flags */ enum { @@ -372,7 +375,6 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; - bool zoned; unsigned int max_open_zones; unsigned int max_active_zones; @@ -654,7 +656,8 @@ static inline enum rpm_status queue_rpm_status(struct request_queue *q) static inline bool blk_queue_is_zoned(struct request_queue *q) { - return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned; + return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + (q->limits.features & BLK_FEAT_ZONED); } #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From a52758a39768f441e468a41da6c15a59d6d6011a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:50 +0200 Subject: block: move the zone_resetall flag to queue_limits Move the zone_resetall flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-24-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bdc30c1fb1b5..1077cb8d8fd8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -316,6 +316,9 @@ enum { /* is a zoned device */ BLK_FEAT_ZONED = (1u << 10), + + /* supports Zone Reset All */ + BLK_FEAT_ZONE_RESETALL = (1u << 11), }; /* @@ -586,7 +589,6 @@ struct request_queue { #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ -#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ @@ -607,7 +609,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ - test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) + ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) -- cgit v1.2.3 From 9c1e42e3c876c66796eda23e79836a4d92613a61 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:51 +0200 Subject: block: move the pci_p2pdma flag to queue_limits Move the pci_p2pdma flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-25-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1077cb8d8fd8..ab0f7dfba556 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -319,6 +319,9 @@ enum { /* supports Zone Reset All */ BLK_FEAT_ZONE_RESETALL = (1u << 11), + + /* supports PCI(e) p2p requests */ + BLK_FEAT_PCI_P2PDMA = (1u << 12), }; /* @@ -588,7 +591,6 @@ struct request_queue { #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ -#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ @@ -611,8 +613,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_zone_resetall(q) \ ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) -#define blk_queue_pci_p2pdma(q) \ - test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) +#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA) #ifdef CONFIG_BLK_RQ_ALLOC_TIME #define blk_queue_rq_alloc_time(q) \ test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags) -- cgit v1.2.3 From 8c8f5c85b20d0a7dc0ab9b2a17318130d69ceb5a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:52 +0200 Subject: block: move the skip_tagset_quiesce flag to queue_limits Move the skip_tagset_quiesce flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-26-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ab0f7dfba556..2c433ebf6f20 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -322,6 +322,9 @@ enum { /* supports PCI(e) p2p requests */ BLK_FEAT_PCI_P2PDMA = (1u << 12), + + /* skip this queue in blk_mq_(un)quiesce_tagset */ + BLK_FEAT_SKIP_TAGSET_QUIESCE = (1u << 13), }; /* @@ -594,7 +597,6 @@ struct request_queue { #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ -#define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ #define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP) @@ -629,7 +631,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) #define blk_queue_skip_tagset_quiesce(q) \ - test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags) + ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -- cgit v1.2.3 From 339d3948c07b4aa2940aeb874294a7d6782cec16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:53 +0200 Subject: block: move the bounce flag into the features field Move the bounce flag into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-27-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c433ebf6f20..e96ba7b97288 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -325,6 +325,9 @@ enum { /* skip this queue in blk_mq_(un)quiesce_tagset */ BLK_FEAT_SKIP_TAGSET_QUIESCE = (1u << 13), + + /* bounce all highmem pages */ + BLK_FEAT_BOUNCE_HIGH = (1u << 14), }; /* @@ -332,7 +335,7 @@ enum { */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED) + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH) /* internal flags in queue_limits.flags */ enum { @@ -352,7 +355,6 @@ enum blk_bounce { struct queue_limits { unsigned int features; unsigned int flags; - enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; -- cgit v1.2.3 From f6860b6069b92559f5cdb65f48e2d82051eaebca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Jun 2024 17:45:33 +0200 Subject: block: remove the unused blk_bounce enum The enum has been replaced with the BLK_FEAT_BOUNCE_HIGH flag. Reported-by: Keith Busch Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240619154623.450048-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e96ba7b97288..f7d275e3fb2c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -343,15 +343,6 @@ enum { BLK_FLAGS_WRITE_CACHE_DISABLED = (1u << 31), }; -/* - * BLK_BOUNCE_NONE: never bounce (default) - * BLK_BOUNCE_HIGH: bounce all highmem pages - */ -enum blk_bounce { - BLK_BOUNCE_NONE, - BLK_BOUNCE_HIGH, -}; - struct queue_limits { unsigned int features; unsigned int flags; -- cgit v1.2.3 From bae1c74316b86c67c95658c3a0cd312cec9aad77 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Jun 2024 17:45:35 +0200 Subject: block: renumber and rename the cache disabled flag Start with the first bit, and drop the plural-S from the name. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240619154623.450048-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f7d275e3fb2c..713a98b6dbba 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -339,8 +339,8 @@ enum { /* internal flags in queue_limits.flags */ enum { - /* do not send FLUSH or FUA command despite advertised write cache */ - BLK_FLAGS_WRITE_CACHE_DISABLED = (1u << 31), + /* do not send FLUSH/FUA commands despite advertising a write cache */ + BLK_FLAG_WRITE_CACHE_DISABLED = (1u << 0), }; struct queue_limits { @@ -1339,7 +1339,7 @@ static inline bool bdev_stable_writes(struct block_device *bdev) static inline bool blk_queue_write_cache(struct request_queue *q) { return (q->limits.features & BLK_FEAT_WRITE_CACHE) && - !(q->limits.flags & BLK_FLAGS_WRITE_CACHE_DISABLED); + !(q->limits.flags & BLK_FLAG_WRITE_CACHE_DISABLED); } static inline bool bdev_write_cache(struct block_device *bdev) -- cgit v1.2.3 From 5543217be468268dfedf504f4969771b9a377353 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Jun 2024 17:45:36 +0200 Subject: block: move the misaligned flag into the features field Move the misaligned flags into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240619154623.450048-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 713a98b6dbba..7ad2b1240fc0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -341,6 +341,9 @@ enum { enum { /* do not send FLUSH/FUA commands despite advertising a write cache */ BLK_FLAG_WRITE_CACHE_DISABLED = (1u << 0), + + /* I/O topology is misaligned */ + BLK_FEAT_MISALIGNED = (1u << 1), }; struct queue_limits { @@ -374,7 +377,6 @@ struct queue_limits { unsigned short max_integrity_segments; unsigned short max_discard_segments; - unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; unsigned int max_open_zones; -- cgit v1.2.3 From 4cac3d3a712b5c76d462b29b73b9e58c0b6d9946 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Jun 2024 17:45:37 +0200 Subject: block: remove the discard_alignment flag queue_limits.discard_alignment is never read except in the places where it is stacked into another limit. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240619154623.450048-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7ad2b1240fc0..86410ce41bf6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -377,7 +377,6 @@ struct queue_limits { unsigned short max_integrity_segments; unsigned short max_discard_segments; - unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; unsigned int max_open_zones; unsigned int max_active_zones; -- cgit v1.2.3 From 7d4dec525f5fd555037486af4d02dd3682655ba1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Jun 2024 17:45:38 +0200 Subject: block: move the raid_partial_stripes_expensive flag into the features field Move the raid_partial_stripes_expensive flags into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240619154623.450048-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 86410ce41bf6..1fa2b148c206 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -328,6 +328,9 @@ enum { /* bounce all highmem pages */ BLK_FEAT_BOUNCE_HIGH = (1u << 14), + + /* undocumented magic for bcache */ + BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE = (1u << 15), }; /* @@ -335,7 +338,8 @@ enum { */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH) + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \ + BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE) /* internal flags in queue_limits.flags */ enum { @@ -377,7 +381,6 @@ struct queue_limits { unsigned short max_integrity_segments; unsigned short max_discard_segments; - unsigned char raid_partial_stripes_expensive; unsigned int max_open_zones; unsigned int max_active_zones; -- cgit v1.2.3 From f70167a7a6e7e8a6911f3a216dc044cbfe7c1983 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 20 Jun 2024 12:53:51 +0000 Subject: block: Generalize chunk_sectors support as boundary support The purpose of the chunk_sectors limit is to ensure that a mergeble request fits within the boundary of the chunck_sector value. Such a feature will be useful for other request_queue boundary limits, so generalize the chunk_sectors merge code. This idea was proposed by Hannes Reinecke. Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Reviewed-by: Hannes Reinecke Acked-by: Darrick J. Wong Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20240620125359.2684798-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e8253c1507a..fb7d4c21bba8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -907,14 +907,15 @@ static inline bool bio_straddles_zones(struct bio *bio) } /* - * Return how much of the chunk is left to be used for I/O at a given offset. + * Return how much within the boundary is left to be used for I/O at a given + * offset. */ -static inline unsigned int blk_chunk_sectors_left(sector_t offset, - unsigned int chunk_sectors) +static inline unsigned int blk_boundary_sectors_left(sector_t offset, + unsigned int boundary_sectors) { - if (unlikely(!is_power_of_2(chunk_sectors))) - return chunk_sectors - sector_div(offset, chunk_sectors); - return chunk_sectors - (offset & (chunk_sectors - 1)); + if (unlikely(!is_power_of_2(boundary_sectors))) + return boundary_sectors - sector_div(offset, boundary_sectors); + return boundary_sectors - (offset & (boundary_sectors - 1)); } /** -- cgit v1.2.3 From 9da3d1e912f3953196e66991d75208cde3e845e1 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 20 Jun 2024 12:53:54 +0000 Subject: block: Add core atomic write support Add atomic write support, as follows: - add helper functions to get request_queue atomic write limits - report request_queue atomic write support limits to sysfs and update Doc - support to safely merge atomic writes - deal with splitting atomic writes - misc helper functions - add a per-request atomic write flag New request_queue limits are added, as follows: - atomic_write_hw_max is set by the block driver and is the maximum length of an atomic write which the device may support. It is not necessarily a power-of-2. - atomic_write_max_sectors is derived from atomic_write_hw_max_sectors and max_hw_sectors. It is always a power-of-2. Atomic writes may be merged, and atomic_write_max_sectors would be the limit on a merged atomic write request size. This value is not capped at max_sectors, as the value in max_sectors can be controlled from userspace, and it would only cause trouble if userspace could limit atomic_write_unit_max_bytes and the other atomic write limits. - atomic_write_hw_unit_{min,max} are set by the block driver and are the min/max length of an atomic write unit which the device may support. They both must be a power-of-2. Typically atomic_write_hw_unit_max will hold the same value as atomic_write_hw_max. - atomic_write_unit_{min,max} are derived from atomic_write_hw_unit_{min,max}, max_hw_sectors, and block core limits. Both min and max values must be a power-of-2. - atomic_write_hw_boundary is set by the block driver. If non-zero, it indicates an LBA space boundary at which an atomic write straddles no longer is atomically executed by the disk. The value must be a power-of-2. Note that it would be acceptable to enforce a rule that atomic_write_hw_boundary_sectors is a multiple of atomic_write_hw_unit_max, but the resultant code would be more complicated. All atomic writes limits are by default set 0 to indicate no atomic write support. Even though it is assumed by Linux that a logical block can always be atomically written, we ignore this as it is not of particular interest. Stacked devices are just not supported either for now. An atomic write must always be submitted to the block driver as part of a single request. As such, only a single BIO must be submitted to the block layer for an atomic write. When a single atomic write BIO is submitted, it cannot be split. As such, atomic_write_unit_{max, min}_bytes are limited by the maximum guaranteed BIO size which will not be required to be split. This max size is calculated by request_queue max segments and the number of bvecs a BIO can fit, BIO_MAX_VECS. Currently we rely on userspace issuing a write with iovcnt=1 for pwritev2() - as such, we can rely on each segment containing PAGE_SIZE of data, apart from the first+last, which each can fit logical block size of data. The first+last will be LBS length/aligned as we rely on direct IO alignment rules also. New sysfs files are added to report the following atomic write limits: - atomic_write_unit_max_bytes - same as atomic_write_unit_max_sectors in bytes - atomic_write_unit_min_bytes - same as atomic_write_unit_min_sectors in bytes - atomic_write_boundary_bytes - same as atomic_write_hw_boundary_sectors in bytes - atomic_write_max_bytes - same as atomic_write_max_sectors in bytes Atomic writes may only be merged with other atomic writes and only under the following conditions: - total resultant request length <= atomic_write_max_bytes - the merged write does not straddle a boundary Helper function bdev_can_atomic_write() is added to indicate whether atomic writes may be issued to a bdev. If a bdev is a partition, the partition start must be aligned with both atomic_write_unit_min_sectors and atomic_write_hw_boundary_sectors. FSes will rely on the block layer to validate that an atomic write BIO submitted will be of valid size, so add blk_validate_atomic_write_op_size() for this purpose. Userspace expects an atomic write which is of invalid size to be rejected with -EINVAL, so add BLK_STS_INVAL for this. Also use BLK_STS_INVAL for when a BIO needs to be split, as this should mean an invalid size BIO. Flag REQ_ATOMIC is used for indicating an atomic write. Co-developed-by: Himanshu Madhani Signed-off-by: Himanshu Madhani Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Reviewed-by: Keith Busch Link: https://lore.kernel.org/r/20240620125359.2684798-6-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fb7d4c21bba8..4816f3b1d528 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -377,6 +377,16 @@ struct queue_limits { unsigned int discard_alignment; unsigned int zone_write_granularity; + /* atomic write limits */ + unsigned int atomic_write_hw_max; + unsigned int atomic_write_max_sectors; + unsigned int atomic_write_hw_boundary; + unsigned int atomic_write_boundary_sectors; + unsigned int atomic_write_hw_unit_min; + unsigned int atomic_write_unit_min; + unsigned int atomic_write_hw_unit_max; + unsigned int atomic_write_unit_max; + unsigned short max_segments; unsigned short max_integrity_segments; unsigned short max_discard_segments; @@ -1403,6 +1413,30 @@ static inline int queue_dma_alignment(const struct request_queue *q) return q ? q->limits.dma_alignment : 511; } +static inline unsigned int +queue_atomic_write_unit_max_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_unit_max; +} + +static inline unsigned int +queue_atomic_write_unit_min_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_unit_min; +} + +static inline unsigned int +queue_atomic_write_boundary_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_boundary_sectors << SECTOR_SHIFT; +} + +static inline unsigned int +queue_atomic_write_max_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_max_sectors << SECTOR_SHIFT; +} + static inline unsigned int bdev_dma_alignment(struct block_device *bdev) { return queue_dma_alignment(bdev_get_queue(bdev)); @@ -1644,6 +1678,27 @@ struct io_comp_batch { void (*complete)(struct io_comp_batch *); }; +static inline bool bdev_can_atomic_write(struct block_device *bdev) +{ + struct request_queue *bd_queue = bdev->bd_queue; + struct queue_limits *limits = &bd_queue->limits; + + if (!limits->atomic_write_unit_min) + return false; + + if (bdev_is_partition(bdev)) { + sector_t bd_start_sect = bdev->bd_start_sect; + unsigned int alignment = + max(limits->atomic_write_unit_min, + limits->atomic_write_hw_boundary); + + if (!IS_ALIGNED(bd_start_sect, alignment >> SECTOR_SHIFT)) + return false; + } + + return true; +} + #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From 9abcfbd235f59fb5b6379e5bc0231dad831ebace Mon Sep 17 00:00:00 2001 From: Prasad Singamsetty Date: Thu, 20 Jun 2024 12:53:55 +0000 Subject: block: Add atomic write support for statx Extend statx system call to return additional info for atomic write support support if the specified file is a block device. Reviewed-by: Martin K. Petersen Signed-off-by: Prasad Singamsetty Signed-off-by: John Garry Reviewed-by: Keith Busch Acked-by: Darrick J. Wong Reviewed-by: Darrick J. Wong Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20240620125359.2684798-7-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4816f3b1d528..2800231046a9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1638,7 +1638,8 @@ int sync_blockdev(struct block_device *bdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); -void bdev_statx_dioalign(struct inode *inode, struct kstat *stat); +void bdev_statx(struct inode *backing_inode, struct kstat *stat, + u32 request_mask); void printk_all_partitions(void); int __init early_lookup_bdev(const char *pathname, dev_t *dev); #else @@ -1656,7 +1657,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) +static inline void bdev_statx(struct inode *backing_inode, struct kstat *stat, + u32 request_mask) { } static inline void printk_all_partitions(void) -- cgit v1.2.3 From b6cfe2287df6e26d685af8a8a96ed1bf87bdde28 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 21 Jun 2024 12:15:05 +0900 Subject: block: Define bdev_nr_zones() as an inline function There is no need for bdev_nr_zones() to be an exported function calculating the number of zones of a block device. Instead, given that all callers use this helper with a fully initialized block device that has a gendisk, we can redefine this function as an inline helper in blkdev.h. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240621031506.759397-3-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2800231046a9..1078a7d51295 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -673,7 +673,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } #ifdef CONFIG_BLK_DEV_ZONED -unsigned int bdev_nr_zones(struct block_device *bdev); static inline unsigned int disk_nr_zones(struct gendisk *disk) { @@ -687,6 +686,11 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + return disk_nr_zones(bdev->bd_disk); +} + static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return bdev->bd_disk->queue->limits.max_open_zones; -- cgit v1.2.3 From caaf7101c01a91a882d3da2f566579dda692367d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 21 Jun 2024 12:15:06 +0900 Subject: block: Cleanup block device zone helpers There is no need to conditionally define on CONFIG_BLK_DEV_ZONED the inline helper functions bdev_nr_zones(), bdev_max_open_zones(), bdev_max_active_zones() and disk_zone_no() as these function will return the correct valu in all cases (zoned device or not, including when CONFIG_BLK_DEV_ZONED is not set). Furthermore, disk_nr_zones() definition can be simplified as disk->nr_zones is always 0 for regular block devices. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240621031506.759397-4-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1078a7d51295..e89003360c17 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -673,11 +673,21 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } #ifdef CONFIG_BLK_DEV_ZONED - static inline unsigned int disk_nr_zones(struct gendisk *disk) { - return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0; + return disk->nr_zones; +} +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); +#else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return 0; +} +static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +{ + return false; } +#endif /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { @@ -701,36 +711,6 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev) return bdev->bd_disk->queue->limits.max_active_zones; } -bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); -#else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - return 0; -} - -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return 0; -} -static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) -{ - return 0; -} -static inline unsigned int bdev_max_open_zones(struct block_device *bdev) -{ - return 0; -} - -static inline unsigned int bdev_max_active_zones(struct block_device *bdev) -{ - return 0; -} -static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) -{ - return false; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - static inline unsigned int blk_queue_depth(struct request_queue *q) { if (q->queue_depth) -- cgit v1.2.3 From 44348870de4b8f292f97b84583a298d66fbaf738 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jun 2024 19:38:35 +0200 Subject: block: fix the blk_queue_nonrot polarity Take care of the inverse polarity of the BLK_FEAT_ROTATIONAL flag vs the old nonrot helper. Fixes: bd4a633b6f7c ("block: move the nonrot flag to queue_limits") Reported-by: kernel test robot Reported-by: Keith Busch Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20240624173835.76753-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e89003360c17..b2f1362c4681 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -617,7 +617,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) -#define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) +#define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL)) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) -- cgit v1.2.3 From ec9b1cf0b0ebfb52274971a8a0e74e0a133f64fb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 16:26:24 +0200 Subject: block: rename BLK_FEAT_MISALIGNED This is a flag for ->flags and not a feature for ->features. And fix the one place that actually incorrectly cleared it from ->features. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240626142637.300624-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b2f1362c4681..1a7e9d9c16d7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -347,7 +347,7 @@ enum { BLK_FLAG_WRITE_CACHE_DISABLED = (1u << 0), /* I/O topology is misaligned */ - BLK_FEAT_MISALIGNED = (1u << 1), + BLK_FLAG_MISALIGNED = (1u << 1), }; struct queue_limits { -- cgit v1.2.3 From fcf865e357f80285af12c0c9a49f89d71acb7f4b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 16:26:25 +0200 Subject: block: convert features and flags to __bitwise types ... and let sparse help us catch mismatches or abuses. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240626142637.300624-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 85 +++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 42 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1a7e9d9c16d7..b37826b350a2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -283,55 +283,56 @@ static inline bool blk_op_is_passthrough(blk_opf_t op) } /* flags set by the driver in queue_limits.features */ -enum { - /* supports a volatile write cache */ - BLK_FEAT_WRITE_CACHE = (1u << 0), +typedef unsigned int __bitwise blk_features_t; - /* supports passing on the FUA bit */ - BLK_FEAT_FUA = (1u << 1), +/* supports a volatile write cache */ +#define BLK_FEAT_WRITE_CACHE ((__force blk_features_t)(1u << 0)) - /* rotational device (hard drive or floppy) */ - BLK_FEAT_ROTATIONAL = (1u << 2), +/* supports passing on the FUA bit */ +#define BLK_FEAT_FUA ((__force blk_features_t)(1u << 1)) - /* contributes to the random number pool */ - BLK_FEAT_ADD_RANDOM = (1u << 3), +/* rotational device (hard drive or floppy) */ +#define BLK_FEAT_ROTATIONAL ((__force blk_features_t)(1u << 2)) - /* do disk/partitions IO accounting */ - BLK_FEAT_IO_STAT = (1u << 4), +/* contributes to the random number pool */ +#define BLK_FEAT_ADD_RANDOM ((__force blk_features_t)(1u << 3)) - /* don't modify data until writeback is done */ - BLK_FEAT_STABLE_WRITES = (1u << 5), +/* do disk/partitions IO accounting */ +#define BLK_FEAT_IO_STAT ((__force blk_features_t)(1u << 4)) - /* always completes in submit context */ - BLK_FEAT_SYNCHRONOUS = (1u << 6), +/* don't modify data until writeback is done */ +#define BLK_FEAT_STABLE_WRITES ((__force blk_features_t)(1u << 5)) - /* supports REQ_NOWAIT */ - BLK_FEAT_NOWAIT = (1u << 7), +/* always completes in submit context */ +#define BLK_FEAT_SYNCHRONOUS ((__force blk_features_t)(1u << 6)) - /* supports DAX */ - BLK_FEAT_DAX = (1u << 8), +/* supports REQ_NOWAIT */ +#define BLK_FEAT_NOWAIT ((__force blk_features_t)(1u << 7)) - /* supports I/O polling */ - BLK_FEAT_POLL = (1u << 9), +/* supports DAX */ +#define BLK_FEAT_DAX ((__force blk_features_t)(1u << 8)) - /* is a zoned device */ - BLK_FEAT_ZONED = (1u << 10), +/* supports I/O polling */ +#define BLK_FEAT_POLL ((__force blk_features_t)(1u << 9)) - /* supports Zone Reset All */ - BLK_FEAT_ZONE_RESETALL = (1u << 11), +/* is a zoned device */ +#define BLK_FEAT_ZONED ((__force blk_features_t)(1u << 10)) - /* supports PCI(e) p2p requests */ - BLK_FEAT_PCI_P2PDMA = (1u << 12), +/* supports Zone Reset All */ +#define BLK_FEAT_ZONE_RESETALL ((__force blk_features_t)(1u << 11)) - /* skip this queue in blk_mq_(un)quiesce_tagset */ - BLK_FEAT_SKIP_TAGSET_QUIESCE = (1u << 13), +/* supports PCI(e) p2p requests */ +#define BLK_FEAT_PCI_P2PDMA ((__force blk_features_t)(1u << 12)) - /* bounce all highmem pages */ - BLK_FEAT_BOUNCE_HIGH = (1u << 14), +/* skip this queue in blk_mq_(un)quiesce_tagset */ +#define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13)) - /* undocumented magic for bcache */ - BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE = (1u << 15), -}; +/* bounce all highmem pages */ +#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14)) + +/* undocumented magic for bcache */ +#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ + ((__force blk_features_t)(1u << 15)) /* * Flags automatically inherited when stacking limits. @@ -342,17 +343,17 @@ enum { BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE) /* internal flags in queue_limits.flags */ -enum { - /* do not send FLUSH/FUA commands despite advertising a write cache */ - BLK_FLAG_WRITE_CACHE_DISABLED = (1u << 0), +typedef unsigned int __bitwise blk_flags_t; - /* I/O topology is misaligned */ - BLK_FLAG_MISALIGNED = (1u << 1), -}; +/* do not send FLUSH/FUA commands despite advertising a write cache */ +#define BLK_FLAG_WRITE_CACHE_DISABLED ((__force blk_flags_t)(1u << 0)) + +/* I/O topology is misaligned */ +#define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1)) struct queue_limits { - unsigned int features; - unsigned int flags; + blk_features_t features; + blk_flags_t flags; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; -- cgit v1.2.3 From 73781b3b81e76583708a652c853d54d03dce031d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 16:26:27 +0200 Subject: block: remove disk_update_readahead Mark blk_apply_bdi_limits non-static and open code disk_update_readahead in the only caller. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240626142637.300624-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b37826b350a2..6b88382012e9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -973,7 +973,6 @@ static inline void blk_queue_disable_write_zeroes(struct request_queue *q) /* * Access functions for manipulating queue properties */ -void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); -- cgit v1.2.3 From abfc9d810926dfbf5645c7755c8d5ab96273f27d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 16:26:28 +0200 Subject: block: remove the fallback case in queue_dma_alignment Now that all updates go through blk_validate_limits the default of 511 is set at initialization time. Also remove the unused NULL check as calling this helper on a NULL queue can't happen (and doesn't make much sense to start with). Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240626142637.300624-8-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6b88382012e9..94fcbc912312 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1394,7 +1394,7 @@ static inline bool bdev_is_zone_start(struct block_device *bdev, static inline int queue_dma_alignment(const struct request_queue *q) { - return q ? q->limits.dma_alignment : 511; + return q->limits.dma_alignment; } static inline unsigned int -- cgit v1.2.3 From e94b45d08b5d1c230c0f59c3eed758d28658851e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 16:26:29 +0200 Subject: block: move dma_pad_mask into queue_limits dma_pad_mask is a queue_limits by all ways of looking at it, so move it there and set it through the atomic queue limits APIs. Add a little helper that takes the alignment and pad into account to simplify the code that is touched a bit. Note that there never was any need for the > check in blk_queue_update_dma_pad, this probably was just copy and paste from dma_update_dma_alignment. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240626142637.300624-9-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 94fcbc912312..a53e3434e1a2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -401,6 +401,7 @@ struct queue_limits { * due to possible offsets. */ unsigned int dma_alignment; + unsigned int dma_pad_mask; struct blk_integrity integrity; }; @@ -509,8 +510,6 @@ struct request_queue { */ int id; - unsigned int dma_pad_mask; - /* * queue settings */ @@ -981,7 +980,6 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, sector_t offset, const char *pfx); -extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); struct blk_independent_access_ranges * @@ -1433,10 +1431,16 @@ static inline bool bdev_iter_is_aligned(struct block_device *bdev, bdev_logical_block_size(bdev) - 1); } +static inline int blk_lim_dma_alignment_and_pad(struct queue_limits *lim) +{ + return lim->dma_alignment | lim->dma_pad_mask; +} + static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { - unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; + unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits); + return !(addr & alignment) && !(len & alignment); } -- cgit v1.2.3 From 63db4a1f795a19e4e12f036a12a5f61c48b03e5c Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 27 Jun 2024 16:07:35 +0000 Subject: block: Delete blk_queue_flag_test_and_set() Since commit 70200574cc22 ("block: remove QUEUE_FLAG_DISCARD"), blk_queue_flag_test_and_set() has not been used, so delete it. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240627160735.842189-1-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a53e3434e1a2..53c41ef4222c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -609,7 +609,6 @@ struct request_queue { void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); -bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) -- cgit v1.2.3 From 5476394aa9f27d670dd2bac426fdb6ac12b12cb3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jun 2024 13:14:01 +0200 Subject: block: simplify queue_logical_block_size queue_logical_block_size is never called with a 0 queue, and the logical_block_size field in queue_limits is always initialized for a live queue. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Link: https://lore.kernel.org/r/20240627111407.476276-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53c41ef4222c..4d0d4b83bc74 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1227,12 +1227,7 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev) static inline unsigned queue_logical_block_size(const struct request_queue *q) { - int retval = 512; - - if (q && q->limits.logical_block_size) - retval = q->limits.logical_block_size; - - return retval; + return q->limits.logical_block_size; } static inline unsigned int bdev_logical_block_size(struct block_device *bdev) -- cgit v1.2.3 From f2a7bea23710fceb99dac6da4ef82c3cc8932f7f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 4 Jul 2024 14:28:15 +0900 Subject: block: Remove REQ_OP_ZONE_RESET_ALL emulation Now that device mapper can handle resetting all zones of a mapped zoned device using REQ_OP_ZONE_RESET_ALL, all zoned block device drivers support this operation. With this, the request queue feature BLK_FEAT_ZONE_RESETALL is not necessary and the emulation code in blk-zone.c can be removed. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240704052816.623865-5-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4d0d4b83bc74..dc250d8070d2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -318,9 +318,6 @@ typedef unsigned int __bitwise blk_features_t; /* is a zoned device */ #define BLK_FEAT_ZONED ((__force blk_features_t)(1u << 10)) -/* supports Zone Reset All */ -#define BLK_FEAT_ZONE_RESETALL ((__force blk_features_t)(1u << 11)) - /* supports PCI(e) p2p requests */ #define BLK_FEAT_PCI_P2PDMA ((__force blk_features_t)(1u << 12)) @@ -618,8 +615,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL)) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) -#define blk_queue_zone_resetall(q) \ - ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA) #ifdef CONFIG_BLK_RQ_ALLOC_TIME -- cgit v1.2.3 From bf86bcdb40123ee99669ee91b67e023669433a1a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 18:51:20 +0200 Subject: blk-lib: check for kill signal in ioctl BLKZEROOUT Zeroout can access a significant capacity and take longer than the user expected. A user may change their mind about wanting to run that command and attempt to kill the process and do something else with their device. But since the task is uninterruptable, they have to wait for it to finish, which could be many hours. Add a new BLKDEV_ZERO_KILLABLE flag for blkdev_issue_zeroout that checks for a fatal signal at each iteration so the user doesn't have to wait for their regretted operation to complete naturally. Heavily based on an earlier patch from Keith Busch. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240701165219.1571322-11-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dc250d8070d2..02e04df27282 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1095,6 +1095,7 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ #define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ +#define BLKDEV_ZERO_KILLABLE (1 << 2) /* interruptible by fatal signals */ extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, -- cgit v1.2.3 From fe3d508ba95bc63adba661355115be340275c0d1 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 8 Jul 2024 09:16:48 +0000 Subject: block: Validate logical block size in blk_validate_limits() Some drivers validate that their own logical block size. It is no harm to always do this, so validate in blk_validate_limits(). This allows us to remove the validation in most of those drivers. Add a comment to blk_validate_block_size() to inform users that self- validation of LBS is usually unnecessary. Signed-off-by: John Garry Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240708091651.177447-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 02e04df27282..dce4a6bf7307 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -268,6 +268,7 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } +/* blk_validate_limits() validates bsize, so drivers don't usually need to */ static inline int blk_validate_block_size(unsigned long bsize) { if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) -- cgit v1.2.3 From c8f51feee135f37f0d77b4616083c25524daa7b0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jul 2024 11:29:01 +0000 Subject: block: remove QUEUE_FLAG_STOPPED QUEUE_FLAG_STOPPED is entirely unused. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-5-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dce4a6bf7307..942ad4e0f231 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -588,7 +588,6 @@ struct request_queue { }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ -#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ #define QUEUE_FLAG_DYING 1 /* queue being torn down */ #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ @@ -608,7 +607,6 @@ struct request_queue { void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); -#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) -- cgit v1.2.3 From 55177adf1837bc56f878f7f6f7123947a2088148 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 19 Jul 2024 11:29:04 +0000 Subject: block: Make QUEUE_FLAG_x as an enum This will allow us better keep in sync with blk_queue_flag_name[]. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-8-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 942ad4e0f231..ded46fad67a0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -588,19 +588,22 @@ struct request_queue { }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ -#define QUEUE_FLAG_DYING 1 /* queue being torn down */ -#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ -#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ -#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ -#define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ -#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ -#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ -#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ -#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ -#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ -#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ -#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ +enum { + QUEUE_FLAG_DYING, /* queue being torn down */ + QUEUE_FLAG_NOMERGES, /* disable merge attempts */ + QUEUE_FLAG_SAME_COMP, /* complete on same CPU-group */ + QUEUE_FLAG_FAIL_IO, /* fake timeout */ + QUEUE_FLAG_NOXMERGES, /* No extended merges */ + QUEUE_FLAG_SAME_FORCE, /* force complete on same CPU */ + QUEUE_FLAG_INIT_DONE, /* queue is initialized */ + QUEUE_FLAG_STATS, /* track IO start and completion times */ + QUEUE_FLAG_REGISTERED, /* queue has been registered to a disk */ + QUEUE_FLAG_QUIESCED, /* queue has been quiesced */ + QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */ + QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */ + QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ + QUEUE_FLAG_MAX +}; #define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP) -- cgit v1.2.3