From e45c47d1f94e0cc7b6b079fdb4bcce2995e2adc4 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 28 Jan 2022 10:58:39 -0500 Subject: block: add bio_start_io_acct_time() to control start_time bio_start_io_acct_time() interface is like bio_start_io_acct() that allows start_time to be passed in. This gives drivers the ability to defer starting accounting until after IO is issued (but possibily not entirely due to bio splitting). Reviewed-by: Christoph Hellwig Signed-off-by: Mike Snitzer Link: https://lore.kernel.org/r/20220128155841.39644-2-snitzer@redhat.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9c95df26fc26..f35aea98bc35 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1258,6 +1258,7 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); +void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); unsigned long bio_start_io_acct(struct bio *bio); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, struct block_device *orig_bdev); -- cgit v1.2.3 From 322cbb50de711814c42fb088f6d31901502c711a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jan 2022 10:39:13 +0100 Subject: block: remove genhd.h There is no good reason to keep genhd.h separate from the main blkdev.h header that includes it. So fold the contents of genhd.h into blkdev.h and remove genhd.h entirely. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20220124093913.742411-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 273 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 270 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f35aea98bc35..99a4384bb8a5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1,9 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions Copyright (C) 1992 Drew Eckhardt + */ #ifndef _LINUX_BLKDEV_H #define _LINUX_BLKDEV_H -#include -#include +#include +#include +#include #include #include #include @@ -12,11 +16,15 @@ #include #include #include +#include #include #include #include +#include #include #include +#include +#include struct module; struct request_queue; @@ -33,6 +41,10 @@ struct blk_queue_stats; struct blk_stat_callback; struct blk_crypto_profile; +extern const struct device_type disk_type; +extern struct device_type part_type; +extern struct class block_class; + /* Must be consistent with blk_mq_poll_stats_bkt() */ #define BLK_MQ_POLL_STATS_BKTS 16 @@ -45,6 +57,144 @@ struct blk_crypto_profile; */ #define BLKCG_MAX_POLS 6 +#define DISK_MAX_PARTS 256 +#define DISK_NAME_LEN 32 + +#define PARTITION_META_INFO_VOLNAMELTH 64 +/* + * Enough for the string representation of any kind of UUID plus NULL. + * EFI UUID is 36 characters. MSDOS UUID is 11 characters. + */ +#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) + +struct partition_meta_info { + char uuid[PARTITION_META_INFO_UUIDLTH]; + u8 volname[PARTITION_META_INFO_VOLNAMELTH]; +}; + +/** + * DOC: genhd capability flags + * + * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to + * removable media. When set, the device remains present even when media is not + * inserted. Shall not be set for devices which are removed entirely when the + * media is removed. + * + * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, + * doesn't appear in sysfs, and can't be opened from userspace or using + * blkdev_get*. Used for the underlying components of multipath devices. + * + * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not + * scan for partitions from add_disk, and users can't add partitions manually. + * + */ +enum { + GENHD_FL_REMOVABLE = 1 << 0, + GENHD_FL_HIDDEN = 1 << 1, + GENHD_FL_NO_PART = 1 << 2, +}; + +enum { + DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ + DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ +}; + +enum { + /* Poll even if events_poll_msecs is unset */ + DISK_EVENT_FLAG_POLL = 1 << 0, + /* Forward events to udev */ + DISK_EVENT_FLAG_UEVENT = 1 << 1, + /* Block event polling when open for exclusive write */ + DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, +}; + +struct disk_events; +struct badblocks; + +struct blk_integrity { + const struct blk_integrity_profile *profile; + unsigned char flags; + unsigned char tuple_size; + unsigned char interval_exp; + unsigned char tag_size; +}; + +struct gendisk { + /* + * major/first_minor/minors should not be set by any new driver, the + * block core will take care of allocating them automatically. + */ + int major; + int first_minor; + int minors; + + char disk_name[DISK_NAME_LEN]; /* name of major driver */ + + unsigned short events; /* supported events */ + unsigned short event_flags; /* flags related to event processing */ + + struct xarray part_tbl; + struct block_device *part0; + + const struct block_device_operations *fops; + struct request_queue *queue; + void *private_data; + + int flags; + unsigned long state; +#define GD_NEED_PART_SCAN 0 +#define GD_READ_ONLY 1 +#define GD_DEAD 2 +#define GD_NATIVE_CAPACITY 3 + + struct mutex open_mutex; /* open/close mutex */ + unsigned open_partitions; /* number of open partitions */ + + struct backing_dev_info *bdi; + struct kobject *slave_dir; +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED + struct list_head slave_bdevs; +#endif + struct timer_rand_state *random; + atomic_t sync_io; /* RAID */ + struct disk_events *ev; +#ifdef CONFIG_BLK_DEV_INTEGRITY + struct kobject integrity_kobj; +#endif /* CONFIG_BLK_DEV_INTEGRITY */ +#if IS_ENABLED(CONFIG_CDROM) + struct cdrom_device_info *cdi; +#endif + int node_id; + struct badblocks *bb; + struct lockdep_map lockdep_map; + u64 diskseq; +}; + +static inline bool disk_live(struct gendisk *disk) +{ + return !inode_unhashed(disk->part0->bd_inode); +} + +/* + * The gendisk is refcounted by the part0 block_device, and the bd_device + * therein is also used for device model presentation in sysfs. + */ +#define dev_to_disk(device) \ + (dev_to_bdev(device)->bd_disk) +#define disk_to_dev(disk) \ + (&((disk)->part0->bd_device)) + +#if IS_REACHABLE(CONFIG_CDROM) +#define disk_to_cdi(disk) ((disk)->cdi) +#else +#define disk_to_cdi(disk) NULL +#endif + +static inline dev_t disk_devt(struct gendisk *disk) +{ + return MKDEV(disk->major, disk->first_minor); +} + static inline int blk_validate_block_size(unsigned long bsize) { if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) @@ -596,6 +746,118 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) +int __must_check device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups); +static inline int __must_check add_disk(struct gendisk *disk) +{ + return device_add_disk(NULL, disk, NULL); +} +void del_gendisk(struct gendisk *gp); +void invalidate_disk(struct gendisk *disk); +void set_disk_ro(struct gendisk *disk, bool read_only); +void disk_uevent(struct gendisk *disk, enum kobject_action action); + +static inline int get_disk_ro(struct gendisk *disk) +{ + return disk->part0->bd_read_only || + test_bit(GD_READ_ONLY, &disk->state); +} + +static inline int bdev_read_only(struct block_device *bdev) +{ + return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); +} + +bool set_capacity_and_notify(struct gendisk *disk, sector_t size); +bool disk_force_media_change(struct gendisk *disk, unsigned int events); + +void add_disk_randomness(struct gendisk *disk) __latent_entropy; +void rand_initialize_disk(struct gendisk *disk); + +static inline sector_t get_start_sect(struct block_device *bdev) +{ + return bdev->bd_start_sect; +} + +static inline sector_t bdev_nr_sectors(struct block_device *bdev) +{ + return bdev->bd_nr_sectors; +} + +static inline loff_t bdev_nr_bytes(struct block_device *bdev) +{ + return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; +} + +static inline sector_t get_capacity(struct gendisk *disk) +{ + return bdev_nr_sectors(disk->part0); +} + +static inline u64 sb_bdev_nr_blocks(struct super_block *sb) +{ + return bdev_nr_sectors(sb->s_bdev) >> + (sb->s_blocksize_bits - SECTOR_SHIFT); +} + +int bdev_disk_changed(struct gendisk *disk, bool invalidate); + +struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, + struct lock_class_key *lkclass); +void put_disk(struct gendisk *disk); +struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); + +/** + * blk_alloc_disk - allocate a gendisk structure + * @node_id: numa node to allocate on + * + * Allocate and pre-initialize a gendisk structure for use with BIO based + * drivers. + * + * Context: can sleep + */ +#define blk_alloc_disk(node_id) \ +({ \ + static struct lock_class_key __key; \ + \ + __blk_alloc_disk(node_id, &__key); \ +}) +void blk_cleanup_disk(struct gendisk *disk); + +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)); +#define register_blkdev(major, name) \ + __register_blkdev(major, name, NULL) +void unregister_blkdev(unsigned int major, const char *name); + +bool bdev_check_media_change(struct block_device *bdev); +int __invalidate_device(struct block_device *bdev, bool kill_dirty); +void set_capacity(struct gendisk *disk, sector_t size); + +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED +int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); +void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); +int bd_register_pending_holders(struct gendisk *disk); +#else +static inline int bd_link_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ + return 0; +} +static inline void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ +} +static inline int bd_register_pending_holders(struct gendisk *disk) +{ + return 0; +} +#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ + +dev_t part_devt(struct gendisk *disk, u8 partno); +void inc_diskseq(struct gendisk *disk); +dev_t blk_lookup_devt(const char *name, int partno); +void blk_request_module(dev_t devt); extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); @@ -1311,6 +1573,7 @@ void invalidate_bdev(struct block_device *bdev); int sync_blockdev(struct block_device *bdev); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); +void printk_all_partitions(void); #else static inline void invalidate_bdev(struct block_device *bdev) { @@ -1326,7 +1589,11 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -#endif +static inline void printk_all_partitions(void) +{ +} +#endif /* CONFIG_BLOCK */ + int fsync_bdev(struct block_device *bdev); int freeze_bdev(struct block_device *bdev); -- cgit v1.2.3 From b1f866b013e6e5583f2f0bf4a61d13eddb9a1799 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 08:05:48 +0100 Subject: block: remove blk_needs_flush_plug blk_needs_flush_plug fails to account for the cb_list, which needs flushing as well. Remove it and just check if there is a plug instead of poking into the internals of the plug structure. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220127070549.1377856-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99a4384bb8a5..f902a1c2fac0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1055,14 +1055,6 @@ extern void blk_finish_plug(struct blk_plug *); void blk_flush_plug(struct blk_plug *plug, bool from_schedule); -static inline bool blk_needs_flush_plug(struct task_struct *tsk) -{ - struct blk_plug *plug = tsk->plug; - - return plug && - (plug->mq_list || !list_empty(&plug->cb_list)); -} - int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); #else /* CONFIG_BLOCK */ @@ -1086,11 +1078,6 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } -static inline bool blk_needs_flush_plug(struct task_struct *tsk) -{ - return false; -} - static inline int blkdev_issue_flush(struct block_device *bdev) { return 0; -- cgit v1.2.3 From aa8dcccaf32bfdc09f2aff089d5d60c37da5b7b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 08:05:49 +0100 Subject: block: check that there is a plug in blk_flush_plug Rename blk_flush_plug to __blk_flush_plug and add a wrapper that includes the NULL check instead of open coding that check everywhere. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220127070549.1377856-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f902a1c2fac0..654163d3b903 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1053,7 +1053,12 @@ extern void blk_start_plug(struct blk_plug *); extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); extern void blk_finish_plug(struct blk_plug *); -void blk_flush_plug(struct blk_plug *plug, bool from_schedule); +void __blk_flush_plug(struct blk_plug *plug, bool from_schedule); +static inline void blk_flush_plug(struct blk_plug *plug, bool async) +{ + if (plug) + __blk_flush_plug(plug, async); +} int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); -- cgit v1.2.3 From b42c1fc3d55e077d36718ad9800d89100b2aff81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jan 2022 07:41:25 +0100 Subject: block: fix the kerneldoc for bio_end_io_acct Document the actually existing parameter name. Reported-by: Stephen Rothwell Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220127064125.1314347-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 654163d3b903..3bfc75a2a450 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1520,7 +1520,7 @@ void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, /** * bio_end_io_acct - end I/O accounting for bio based drivers * @bio: bio to end account for - * @start: start time returned by bio_start_io_acct() + * @start_time: start time returned by bio_start_io_acct() */ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) { -- cgit v1.2.3 From 76792055c4c8b2472ca1ae48e0ddaf8497529f08 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Feb 2022 10:45:10 +0100 Subject: block: add a ->free_disk method Add a method to notify the driver that the gendisk is about to be freed. This allows drivers to tie the lifetime of their private data to that of the gendisk and thus deal with device removal races without expensive synchronization and boilerplate code. A new flag is added so that ->free_disk is only called after a successful call to add_disk, which significantly simplifies the error handling path during probing. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220215094514.3828912-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3bfc75a2a450..f757f9c2871f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -146,6 +146,7 @@ struct gendisk { #define GD_READ_ONLY 1 #define GD_DEAD 2 #define GD_NATIVE_CAPACITY 3 +#define GD_ADDED 4 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ @@ -1464,6 +1465,7 @@ struct block_device_operations { void (*unlock_native_capacity) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); int (*set_read_only)(struct block_device *bdev, bool ro); + void (*free_disk)(struct gendisk *disk); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, -- cgit v1.2.3 From 7a5428dcb7902700b830e912feee4e845df7c019 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Feb 2022 08:52:31 +0100 Subject: block: fix surprise removal for drivers calling blk_set_queue_dying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various block drivers call blk_set_queue_dying to mark a disk as dead due to surprise removal events, but since commit 8e141f9eb803 that doesn't work given that the GD_DEAD flag needs to be set to stop I/O. Replace the driver calls to blk_set_queue_dying with a new (and properly documented) blk_mark_disk_dead API, and fold blk_set_queue_dying into the only remaining caller. Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk") Reported-by: Markus Blöchl Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20220217075231.1140-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f35aea98bc35..16b47035e4b0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -748,7 +748,8 @@ extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, bool __must_check blk_get_queue(struct request_queue *); extern void blk_put_queue(struct request_queue *); -extern void blk_set_queue_dying(struct request_queue *); + +void blk_mark_disk_dead(struct gendisk *disk); #ifdef CONFIG_BLOCK /* -- cgit v1.2.3 From 73bd66d9c834220579c881a3eb020fd8917075d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 9 Feb 2022 09:28:28 +0100 Subject: scsi: block: Remove REQ_OP_WRITE_SAME support No more users of REQ_OP_WRITE_SAME or drivers implementing it are left, so remove the infrastructure. [mkp: fold in and tweak sysfs reporting fix] Link: https://lore.kernel.org/r/20220209082828.2629273-8-hch@lst.de Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/linux/blkdev.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9c95df26fc26..b10470d5e986 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -97,7 +97,6 @@ struct queue_limits { unsigned int io_opt; unsigned int max_discard_sectors; unsigned int max_hw_discard_sectors; - unsigned int max_write_same_sectors; unsigned int max_write_zeroes_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; @@ -651,9 +650,6 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, return min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT); - if (unlikely(op == REQ_OP_WRITE_SAME)) - return q->limits.max_write_same_sectors; - if (unlikely(op == REQ_OP_WRITE_ZEROES)) return q->limits.max_write_zeroes_sectors; @@ -696,8 +692,6 @@ extern void blk_queue_max_discard_segments(struct request_queue *, extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); -extern void blk_queue_max_write_same_sectors(struct request_queue *q, - unsigned int max_write_same_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); @@ -842,9 +836,6 @@ static inline long nr_blockdev_pages(void) extern void blk_io_schedule(void); -extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, struct page *page); - #define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, @@ -1071,16 +1062,6 @@ static inline int bdev_discard_alignment(struct block_device *bdev) return q->limits.discard_alignment; } -static inline unsigned int bdev_write_same(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return q->limits.max_write_same_sectors; - - return 0; -} - static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); -- cgit v1.2.3 From 20f01f163203666010ee1560852590a0c0572726 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Jan 2022 13:59:38 -0800 Subject: blk-crypto: show crypto capabilities in sysfs Add sysfs files that expose the inline encryption capabilities of request queues: /sys/block/$disk/queue/crypto/max_dun_bits /sys/block/$disk/queue/crypto/modes/$mode /sys/block/$disk/queue/crypto/num_keyslots Userspace can use these new files to decide what encryption settings to use, or whether to use inline encryption at all. This also brings the crypto capabilities in line with the other queue properties, which are already discoverable via the queue directory in sysfs. Design notes: - Place the new files in a new subdirectory "crypto" to group them together and to avoid complicating the main "queue" directory. This also makes it possible to replace "crypto" with a symlink later if we ever make the blk_crypto_profiles into real kobjects (see below). - It was necessary to define a new kobject that corresponds to the crypto subdirectory. For now, this kobject just contains a pointer to the blk_crypto_profile. Note that multiple queues (and hence multiple such kobjects) may refer to the same blk_crypto_profile. An alternative design would more closely match the current kernel data structures: the blk_crypto_profile could be a kobject itself, located directly under the host controller device's kobject, while /sys/block/$disk/queue/crypto would be a symlink to it. I decided not to do that for now because it would require a lot more changes, such as no longer embedding blk_crypto_profile in other structures, and also because I'm not sure we can rule out moving the crypto capabilities into 'struct queue_limits' in the future. (Even if multiple queues share the same crypto engine, maybe the supported data unit sizes could differ due to other queue properties.) It would also still be possible to switch to that design later without breaking userspace, by replacing the directory with a symlink. - Use "max_dun_bits" instead of "max_dun_bytes". Currently, the kernel internally stores this value in bytes, but that's an implementation detail. It probably makes more sense to talk about this value in bits, and choosing bits is more future-proof. - "modes" is a sub-subdirectory, since there may be multiple supported crypto modes, sysfs is supposed to have one value per file, and it makes sense to group all the mode files together. - Each mode had to be named. The crypto API names like "xts(aes)" are not appropriate because they don't specify the key size. Therefore, I assigned new names. The exact names chosen are arbitrary, but they happen to match the names used in log messages in fs/crypto/. - The "num_keyslots" file is a bit different from the others in that it is only useful to know for performance reasons. However, it's included as it can still be useful. For example, a user might not want to use inline encryption if there aren't very many keyslots. Reviewed-by: Hannes Reinecke Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20220124215938.2769-4-ebiggers@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f757f9c2871f..e19947d84f12 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -413,6 +413,7 @@ struct request_queue { #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; + struct kobject *crypto_kobject; #endif unsigned int rq_timeout; -- cgit v1.2.3 From c75e707fe1aab32f1dc8e09845533b6542d9aaa9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Mar 2022 18:55:56 +0100 Subject: block: remove the per-bio/request write hint With the NVMe support for this gone, there are no consumers of these hints left, so remove them. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220304175556.407719-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a12c031af887..226836ccd060 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -518,9 +518,6 @@ struct request_queue { bool mq_sysfs_init_done; -#define BLK_MAX_WRITE_HINTS 5 - u64 write_hints[BLK_MAX_WRITE_HINTS]; - /* * Independent sector access ranges. This is always NULL for * devices that do not have multiple independent access ranges. -- cgit v1.2.3 From 4e5cc99e1e485954a9c09872e0eeea570fb2b5a5 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 8 Mar 2022 15:32:19 +0800 Subject: blk-mq: manage hctx map via xarray First code becomes more clean by switching to xarray from plain array. Second use-after-free on q->queue_hw_ctx can be fixed because queue_for_each_hw_ctx() may be run when updating nr_hw_queues is in-progress. With this patch, q->hctx_table is defined as xarray, and this structure will share same lifetime with request queue, so queue_for_each_hw_ctx() can use q->hctx_table to lookup hctx reliably. Reported-by: Yu Kuai Signed-off-by: Ming Lei Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220308073219.91173-7-ming.lei@redhat.com [axboe: fix blk_mq_hw_ctx forward declaration] Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e19947d84f12..d982171469bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -355,7 +355,7 @@ struct request_queue { unsigned int queue_depth; /* hw dispatch queues */ - struct blk_mq_hw_ctx **queue_hw_ctx; + struct xarray hctx_table; unsigned int nr_hw_queues; /* -- cgit v1.2.3 From 69fe0f29892077f14b56e2a479b6bcf533209d53 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 4 Mar 2022 21:08:03 -0500 Subject: block: add ->poll_bio to block_device_operations Prepare for supporting IO polling for bio-based driver. Add ->poll_bio callback so that bio-based driver can provide their own logic for polling bio. Also fix ->submit_bio_bio typo in comment block above __submit_bio_noacct. Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f757f9c2871f..51f1b1ddbed2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1455,6 +1455,8 @@ enum blk_unique_id { struct block_device_operations { void (*submit_bio)(struct bio *bio); + int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob, + unsigned int flags); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); -- cgit v1.2.3