From 4a2dcc35911324d6fcde09b1760cf4f2962699ef Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:23 -0700 Subject: block: introduce bdev_dma_alignment helper Preparing for upcoming dma_alignment users that have a block_device, but don't need the request_queue. Signed-off-by: Keith Busch Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-5-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2f7b43444c5f..2556fcdb645b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1365,6 +1365,11 @@ static inline int queue_dma_alignment(const struct request_queue *q) return q ? q->dma_alignment : 511; } +static inline unsigned int bdev_dma_alignment(struct block_device *bdev) +{ + return queue_dma_alignment(bdev_get_queue(bdev)); +} + static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { -- cgit v1.2.3 From cfa320f72882f0e944e2237287db84b0f7df877d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:27 -0700 Subject: iov: introduce iov_iter_aligned The existing iov_iter_alignment() function returns the logical OR of address and length. For cases where address and length need to be considered separately, introduce a helper function that a caller can specificy length and address masks that indicate if the iov is unaligned. Cc: Alexander Viro Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-9-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/uio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/uio.h b/include/linux/uio.h index 739285fe5a2f..34ba4a731179 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -219,6 +219,8 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); #endif size_t iov_iter_zero(size_t bytes, struct iov_iter *); +bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, + unsigned len_mask); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, -- cgit v1.2.3 From 5debd9691c3ac64c3acd6867c264ad38bbe48cdc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:28 -0700 Subject: block: introduce bdev_iter_is_aligned helper Provide a convenient function for this repeatable coding pattern. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-10-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2556fcdb645b..0b8bc1fe0b2c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1370,6 +1370,13 @@ static inline unsigned int bdev_dma_alignment(struct block_device *bdev) return queue_dma_alignment(bdev_get_queue(bdev)); } +static inline bool bdev_iter_is_aligned(struct block_device *bdev, + struct iov_iter *iter) +{ + return iov_iter_is_aligned(iter, bdev_dma_alignment(bdev), + bdev_logical_block_size(bdev) - 1); +} + static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { -- cgit v1.2.3 From b1a000d3b8ec582da64bb644be633e5a0beffcbf Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:29 -0700 Subject: block: relax direct io memory alignment Use the address alignment requirements from the block_device for direct io instead of requiring addresses be aligned to the block size. User space can discover the alignment requirements from the dma_alignment queue attribute. User space can specify any hardware compatible DMA offset for each segment, but every segment length is still required to be a multiple of the block size. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-11-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0b8bc1fe0b2c..886c44e97434 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -424,6 +424,11 @@ struct request_queue { unsigned long nr_requests; /* Max # of requests */ unsigned int dma_pad_mask; + /* + * Drivers that set dma_alignment to less than 511 must be prepared to + * handle individual bvec's that are not a multiple of a SECTOR_SIZE + * due to possible offsets. + */ unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION -- cgit v1.2.3 From 8689461be3f1ce6686bc26f1f379790bb0fc7a8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:29 +0200 Subject: block: factor out a chunk_size_left helper Factor out a helper from blk_max_size_offset so that it can be reused independently. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Pankaj Raghav Link: https://lore.kernel.org/r/20220614090934.570632-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 886c44e97434..283961257cc9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -934,6 +934,17 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, return q->limits.max_sectors; } +/* + * Return how much of the chunk is left to be used for I/O at a given offset. + */ +static inline unsigned int blk_chunk_sectors_left(sector_t offset, + unsigned int chunk_sectors) +{ + if (unlikely(!is_power_of_2(chunk_sectors))) + return chunk_sectors - sector_div(offset, chunk_sectors); + return chunk_sectors - (offset & (chunk_sectors - 1)); +} + /* * Return maximum size of a request at given offset. Only valid for * file system requests. @@ -949,12 +960,8 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, return q->limits.max_sectors; } - if (likely(is_power_of_2(chunk_sectors))) - chunk_sectors -= offset & (chunk_sectors - 1); - else - chunk_sectors -= sector_div(offset, chunk_sectors); - - return min(q->limits.max_sectors, chunk_sectors); + return min(q->limits.max_sectors, + blk_chunk_sectors_left(offset, chunk_sectors)); } /* -- cgit v1.2.3 From efef739d5f37dc998b113fb965aea68d42a9eddc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:33 +0200 Subject: block: fold blk_max_size_offset into get_max_io_size Now that blk_max_size_offset has a single caller left, fold it into that and clean up the naming convention for the local variables there. Signed-off-by: Christoph Hellwig Reviewed-by: Pankaj Raghav Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220614090934.570632-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 283961257cc9..652c357dafb9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -945,25 +945,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, return chunk_sectors - (offset & (chunk_sectors - 1)); } -/* - * Return maximum size of a request at given offset. Only valid for - * file system requests. - */ -static inline unsigned int blk_max_size_offset(struct request_queue *q, - sector_t offset, - unsigned int chunk_sectors) -{ - if (!chunk_sectors) { - if (q->limits.chunk_sectors) - chunk_sectors = q->limits.chunk_sectors; - else - return q->limits.max_sectors; - } - - return min(q->limits.max_sectors, - blk_chunk_sectors_left(offset, chunk_sectors)); -} - /* * Access functions for manipulating queue properties */ -- cgit v1.2.3 From 2a9336c42a6abdcef564d522648a684a474a3483 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:34 +0200 Subject: block: move blk_queue_get_max_sectors to blk.h blk_queue_get_max_sectors is private to the block layer, so move it out of blkdev.h. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220614090934.570632-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 652c357dafb9..b2d42201bd5d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -921,19 +921,6 @@ static inline unsigned int bio_zone_is_seq(struct bio *bio) } #endif /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, - int op) -{ - if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) - return min(q->limits.max_discard_sectors, - UINT_MAX >> SECTOR_SHIFT); - - if (unlikely(op == REQ_OP_WRITE_ZEROES)) - return q->limits.max_write_zeroes_sectors; - - return q->limits.max_sectors; -} - /* * Return how much of the chunk is left to be used for I/O at a given offset. */ -- cgit v1.2.3 From e589f46445960c274cc813a1cc8e2fc73b2a1849 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:26 +0200 Subject: block: fix default IO priority handling again Commit e70344c05995 ("block: fix default IO priority handling") introduced an inconsistency in get_current_ioprio() that tasks without IO context return IOPRIO_DEFAULT priority while tasks with freshly allocated IO context will return 0 (IOPRIO_CLASS_NONE/0) IO priority. Tasks without IO context used to be rare before 5a9d041ba2f6 ("block: move io_context creation into where it's needed") but after this commit they became common because now only BFQ IO scheduler setups task's IO context. Similar inconsistency is there for get_task_ioprio() so this inconsistency is now exposed to userspace and userspace will see different IO priority for tasks operating on devices with BFQ compared to devices without BFQ. Furthemore the changes done by commit e70344c05995 change the behavior when no IO priority is set for BFQ IO scheduler which is also documented in ioprio_set(2) manpage: "If no I/O scheduler has been set for a thread, then by default the I/O priority will follow the CPU nice value (setpriority(2)). In Linux kernels before version 2.6.24, once an I/O priority had been set using ioprio_set(), there was no way to reset the I/O scheduling behavior to the default. Since Linux 2.6.24, specifying ioprio as 0 can be used to reset to the default I/O scheduling behavior." So make sure we default to IOPRIO_CLASS_NONE as used to be the case before commit e70344c05995. Also cleanup alloc_io_context() to explicitely set this IO priority for the allocated IO context to avoid future surprises. Note that we tweak ioprio_best() to maintain ioprio_get(2) behavior and make this commit easily backportable. CC: stable@vger.kernel.org Fixes: e70344c05995 ("block: fix default IO priority handling") Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-1-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 3f53bc27a19b..3d088a88f832 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -11,7 +11,7 @@ /* * Default IO priority. */ -#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) +#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0) /* * Check that a priority value has a valid class. -- cgit v1.2.3 From f7eda402878b12bc0884c5bc1192a9e76ad121fb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:27 +0200 Subject: block: Return effective IO priority from get_current_ioprio() get_current_ioprio() is used to initialize IO priority of various requests. As such it should be returning the effective IO priority of the task (i.e., reflecting the fact that unset IO priority should get set based on task's CPU priority) so that the conversion is concentrated in one place. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-2-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 3d088a88f832..61ed6bb4998e 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -53,10 +53,17 @@ static inline int task_nice_ioclass(struct task_struct *task) static inline int get_current_ioprio(void) { struct io_context *ioc = current->io_context; + int prio; if (ioc) - return ioc->ioprio; - return IOPRIO_DEFAULT; + prio = ioc->ioprio; + else + prio = IOPRIO_DEFAULT; + + if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) + prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), + task_nice_ioprio(current)); + return prio; } /* -- cgit v1.2.3 From 893e5d32d5832674bcf6465f27958e883b72b346 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:28 +0200 Subject: block: Generalize get_current_ioprio() for any task get_current_ioprio() operates only on current task. We will need the same functionality for other tasks as well. Generalize get_current_ioprio() for that and also move the bulk out of the header file because it is large enough. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-3-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 61ed6bb4998e..9752cf4a9c7c 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -46,24 +46,18 @@ static inline int task_nice_ioclass(struct task_struct *task) return IOPRIO_CLASS_BE; } -/* - * If the calling process has set an I/O priority, use that. Otherwise, return - * the default I/O priority. - */ -static inline int get_current_ioprio(void) +#ifdef CONFIG_BLOCK +int __get_task_ioprio(struct task_struct *p); +#else +static inline int __get_task_ioprio(struct task_struct *p) { - struct io_context *ioc = current->io_context; - int prio; - - if (ioc) - prio = ioc->ioprio; - else - prio = IOPRIO_DEFAULT; + return IOPRIO_DEFAULT; +} +#endif /* CONFIG_BLOCK */ - if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) - prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), - task_nice_ioprio(current)); - return prio; +static inline int get_current_ioprio(void) +{ + return __get_task_ioprio(current); } /* -- cgit v1.2.3 From fc25545e17bd74befe0b8ab2c65ac84936be5066 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:29 +0200 Subject: block: Make ioprio_best() static Nobody outside of block/ioprio.c uses it. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-4-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 9752cf4a9c7c..7578d4f6a969 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -60,11 +60,6 @@ static inline int get_current_ioprio(void) return __get_task_ioprio(current); } -/* - * For inheritance, return the highest of the two given priorities - */ -extern int ioprio_best(unsigned short aprio, unsigned short bprio); - extern int set_task_ioprio(struct task_struct *task, int ioprio); #ifdef CONFIG_BLOCK -- cgit v1.2.3 From 1f90307e5f0d7bc9a336ead528f616a5df8e5944 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:49 +0200 Subject: block: remove QUEUE_FLAG_DEAD Disallow setting the blk-mq state on any queue that is already dying as setting the state even then is a bad idea, and remove the now unused QUEUE_FLAG_DEAD flag. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b2d42201bd5d..f4632f4fe884 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -564,7 +564,6 @@ struct request_queue { #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ @@ -592,7 +591,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_has_srcu(q) test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags) -#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ -- cgit v1.2.3 From 6f8191fdf41d3a53cc1d63fe2234e812c55a0092 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:51 +0200 Subject: block: simplify disk shutdown Set the queue dying flag and call blk_mq_exit_queue from del_gendisk for all disks that do not have separately allocated queues, and thus remove the need to call blk_cleanup_queue for them. Rename blk_cleanup_disk to blk_mq_destroy_queue to make it clear that this function is intended only for separately allocated blk-mq queues. This saves an extra queue freeze for devices without a separately allocated queue. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +++ include/linux/blkdev.h | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e2d9daf7e8dd..0fd96e92c6c6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -686,10 +686,13 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, \ __blk_mq_alloc_disk(set, queuedata, &__key); \ }) +struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, + struct lock_class_key *lkclass); struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); void blk_mq_unregister_dev(struct device *, struct request_queue *); +void blk_mq_destroy_queue(struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f4632f4fe884..530eeccffda3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -148,6 +148,7 @@ struct gendisk { #define GD_NATIVE_CAPACITY 3 #define GD_ADDED 4 #define GD_SUPPRESS_PART_SCAN 5 +#define GD_OWNS_QUEUE 6 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ @@ -815,8 +816,6 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) int bdev_disk_changed(struct gendisk *disk, bool invalidate); -struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, - struct lock_class_key *lkclass); void put_disk(struct gendisk *disk); struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); @@ -933,7 +932,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, /* * Access functions for manipulating queue properties */ -extern void blk_cleanup_queue(struct request_queue *); void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce limit); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); -- cgit v1.2.3 From 8b9ab62662048a3274361c7e5f64037c2c133e2c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:52 +0200 Subject: block: remove blk_cleanup_disk blk_cleanup_disk is nothing but a trivial wrapper for put_disk now, so remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 530eeccffda3..22b12531aeb7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -834,7 +834,6 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); \ __blk_alloc_disk(node_id, &__key); \ }) -void blk_cleanup_disk(struct gendisk *disk); int __register_blkdev(unsigned int major, const char *name, void (*probe)(dev_t devt)); -- cgit v1.2.3 From cc5c516df028b221d94c65c47c5ae8d20f61b6f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jun 2022 19:18:45 +0200 Subject: block: simplify blktrace sysfs attribute creation Add the trace attributes to the default gendisk attributes, just like we already do for partitions. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220628171850.1313069-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 623e22492afa..f6f9b544365a 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -77,10 +77,6 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); -extern void blk_trace_remove_sysfs(struct device *dev); -extern int blk_trace_init_sysfs(struct device *dev); - -extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) @@ -91,13 +87,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) # define blk_add_cgroup_trace_msg(q, cg, fmt, ...) do { } while (0) -# define blk_trace_remove_sysfs(dev) do { } while (0) # define blk_trace_note_message_enabled(q) (false) -static inline int blk_trace_init_sysfs(struct device *dev) -{ - return 0; -} - #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 8682b92e5ab852b93739a0f2b261fff4c733be57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jun 2022 19:18:50 +0200 Subject: blk-mq: cleanup disk sysfs registration Pass a gendisk to the sysfs register/unregister functions and give them descriptive names. Also move the unregistration helper next to the one doing the registration. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220628171850.1313069-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0fd96e92c6c6..43aad0da3305 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -691,7 +691,6 @@ struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); -void blk_mq_unregister_dev(struct device *, struct request_queue *); void blk_mq_destroy_queue(struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); -- cgit v1.2.3 From 6a27d28c81bc5843de2490688a04ee5baa6615e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 29 Jun 2022 08:20:12 +0200 Subject: block: move ->ia_ranges from the request_queue to the gendisk Independent access ranges only matter for file system I/O and are only valid with a registered gendisk, so move them there. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20220629062013.1331068-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 22b12531aeb7..b9a94c53c6cd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -171,6 +171,12 @@ struct gendisk { struct badblocks *bb; struct lockdep_map lockdep_map; u64 diskseq; + + /* + * Independent sector access ranges. This is always NULL for + * devices that do not have multiple independent access ranges. + */ + struct blk_independent_access_ranges *ia_ranges; }; static inline bool disk_live(struct gendisk *disk) @@ -539,12 +545,6 @@ struct request_queue { bool mq_sysfs_init_done; - /* - * Independent sector access ranges. This is always NULL for - * devices that do not have multiple independent access ranges. - */ - struct blk_independent_access_ranges *ia_ranges; - /** * @srcu: Sleepable RCU. Use as lock when type of the request queue * is blocking (BLK_MQ_F_BLOCKING). Must be the last member -- cgit v1.2.3 From 99e48cd6855e9535488e3c90d65edd46c6e6fc1b Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:50 +0800 Subject: blk-mq: Add a flag for reserved requests Add a flag for reserved requests so that drivers may know this for any special handling. Signed-off-by: John Garry Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/1657109034-206040-3-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 43aad0da3305..7c62b7fabec7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -57,6 +57,7 @@ typedef __u32 __bitwise req_flags_t; #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) /* queue has elevator attached */ #define RQF_ELV ((__force req_flags_t)(1 << 22)) +#define RQF_RESV ((__force req_flags_t)(1 << 23)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ @@ -825,6 +826,11 @@ static inline bool blk_mq_need_time_stamp(struct request *rq) return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); } +static inline bool blk_mq_is_reserved_rq(struct request *rq) +{ + return rq->rq_flags & RQF_RESV; +} + /* * Batched completions only work when there is no I/O error and no special * ->end_io handler. -- cgit v1.2.3 From 9bdb4833dd399cbff82cc20893f52bdec66a9eca Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:51 +0800 Subject: blk-mq: Drop blk_mq_ops.timeout 'reserved' arg With new API blk_mq_is_reserved_rq() we can tell if a request is from the reserved pool, so stop passing 'reserved' arg. There is actually only a single user of that arg for all the callback implementations, which can use blk_mq_is_reserved_rq() instead. This will also allow us to stop passing the same 'reserved' around the blk-mq iter functions next. Signed-off-by: John Garry Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Acked-by: Ulf Hansson # For MMC Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/1657109034-206040-4-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7c62b7fabec7..c84c56d296fe 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -575,7 +575,7 @@ struct blk_mq_ops { /** * @timeout: Called on request timeout. */ - enum blk_eh_timer_return (*timeout)(struct request *, bool); + enum blk_eh_timer_return (*timeout)(struct request *); /** * @poll: Called to poll for completion of a specific tag. -- cgit v1.2.3 From 2dd6532e9591f201e7571b30915db807603ab924 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:53 +0800 Subject: blk-mq: Drop 'reserved' arg of busy_tag_iter_fn We no longer use the 'reserved' arg in busy_tag_iter_fn for any iter function so it may be dropped. Signed-off-by: John Garry Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Reviewed-by: Sagi Grimberg #nvme Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/1657109034-206040-6-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- include/scsi/scsi_host.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c84c56d296fe..810a24884f7e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -520,7 +520,7 @@ struct blk_mq_queue_data { bool last; }; -typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); +typedef bool (busy_tag_iter_fn)(struct request *, void *); /** * struct blk_mq_ops - Callback functions that implements block driver diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 667d889b92b5..65082ecdd557 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -786,7 +786,7 @@ extern int scsi_host_block(struct Scsi_Host *shost); extern int scsi_host_unblock(struct Scsi_Host *shost, int new_state); void scsi_host_busy_iter(struct Scsi_Host *, - bool (*fn)(struct scsi_cmnd *, void *, bool), void *priv); + bool (*fn)(struct scsi_cmnd *, void *), void *priv); struct class_container; -- cgit v1.2.3 From f1a8bbd1100d9cd117bc8b7fc0903982bbaf474f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:35 +0200 Subject: block: remove a superflous ifdef in blkdev.h It doesn't hurt to always have the blk_zone_cond_str prototype, and the two inlines can also be defined unconditionally. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b9a94c53c6cd..270cd0c55292 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -899,8 +899,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) return bdev->bd_queue; /* this is never NULL */ } -#ifdef CONFIG_BLK_DEV_ZONED - /* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); @@ -915,7 +913,6 @@ static inline unsigned int bio_zone_is_seq(struct bio *bio) return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev), bio->bi_iter.bi_sector); } -#endif /* CONFIG_BLK_DEV_ZONED */ /* * Return how much of the chunk is left to be used for I/O at a given offset. -- cgit v1.2.3 From 6b2bd274744e6454ba7bbbe6a09b44866f2f414a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:40 +0200 Subject: block: pass a gendisk to blk_queue_set_zoned Prepare for storing the zone related field in struct gendisk instead of struct request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 270cd0c55292..416faa013782 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -291,7 +291,7 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); -void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model); +void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From 1dc0172027b0aa09823b430e395b1116d2745f36 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:43 +0200 Subject: block: remove queue_max_open_zones and queue_max_active_zones Always use the bdev based helpers instead. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-10-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 416faa013782..7d4105d23b0a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -702,21 +702,22 @@ static inline void blk_queue_max_open_zones(struct request_queue *q, q->max_open_zones = max_open_zones; } -static inline unsigned int queue_max_open_zones(const struct request_queue *q) -{ - return q->max_open_zones; -} - static inline void blk_queue_max_active_zones(struct request_queue *q, unsigned int max_active_zones) { q->max_active_zones = max_active_zones; } -static inline unsigned int queue_max_active_zones(const struct request_queue *q) +static inline unsigned int bdev_max_open_zones(struct block_device *bdev) +{ + return bdev->bd_disk->queue->max_open_zones; +} + +static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { - return q->max_active_zones; + return bdev->bd_disk->queue->max_active_zones; } + #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -732,11 +733,11 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, { return 0; } -static inline unsigned int queue_max_open_zones(const struct request_queue *q) +static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return 0; } -static inline unsigned int queue_max_active_zones(const struct request_queue *q) +static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { return 0; } @@ -1314,24 +1315,6 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) return 0; } -static inline unsigned int bdev_max_open_zones(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return queue_max_open_zones(q); - return 0; -} - -static inline unsigned int bdev_max_active_zones(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return queue_max_active_zones(q); - return 0; -} - static inline int queue_dma_alignment(const struct request_queue *q) { return q ? q->dma_alignment : 511; -- cgit v1.2.3 From 982977df48179c8c690868f398051074e68eef0f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:44 +0200 Subject: block: pass a gendisk to blk_queue_max_open_zones and blk_queue_max_active_zones Switch to a gendisk based API in preparation for moving all zone related fields from the request_queue to the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-11-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7d4105d23b0a..c05e1cc05c26 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -696,16 +696,16 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); } -static inline void blk_queue_max_open_zones(struct request_queue *q, +static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - q->max_open_zones = max_open_zones; + disk->queue->max_open_zones = max_open_zones; } -static inline void blk_queue_max_active_zones(struct request_queue *q, +static inline void disk_set_max_active_zones(struct gendisk *disk, unsigned int max_active_zones) { - q->max_active_zones = max_active_zones; + disk->queue->max_active_zones = max_active_zones; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) -- cgit v1.2.3 From b623e347323f6464b20fb0d899a0a73522ed8f6c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:45 +0200 Subject: block: replace blkdev_nr_zones with bdev_nr_zones Pass a block_device instead of a request_queue as that is what most callers have at hand. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Acked-by: Damien Le Moal Link: https://lore.kernel.org/r/20220706070350.1703384-12-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c05e1cc05c26..fa2757ef4a84 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -298,7 +298,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); -unsigned int blkdev_nr_zones(struct gendisk *disk); +unsigned int bdev_nr_zones(struct block_device *bdev); extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); @@ -312,7 +312,7 @@ extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, #else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blkdev_nr_zones(struct gendisk *disk) +static inline unsigned int bdev_nr_zones(struct block_device *bdev) { return 0; } -- cgit v1.2.3 From de71973c2951cb2ce4b46560f021f03b15906408 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:49 +0200 Subject: block: remove blk_queue_zone_sectors Always use bdev_zone_sectors instead. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-16-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fa2757ef4a84..21b97f7115dc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -667,11 +667,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } } -static inline sector_t blk_queue_zone_sectors(struct request_queue *q) -{ - return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; -} - #ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -1310,9 +1305,9 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - if (q) - return blk_queue_zone_sectors(q); - return 0; + if (!blk_queue_is_zoned(q)) + return 0; + return q->limits.chunk_sectors; } static inline int queue_dma_alignment(const struct request_queue *q) -- cgit v1.2.3 From d86e716aa40643e3eb8c69fab3a198146bf76dd6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:50 +0200 Subject: block: move zone related fields to struct gendisk Move the zone related fields that are currently stored in struct request_queue to struct gendisk as these are part of the highlevel block layer API and are only used for non-passthrough I/O that requires the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-17-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 8 ++--- include/linux/blkdev.h | 91 +++++++++++++++++++++++--------------------------- 2 files changed, 46 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 810a24884f7e..d74f6a6b7e69 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -1129,12 +1129,12 @@ void blk_dump_rq_flags(struct request *, char *); #ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_rq_zone_no(struct request *rq) { - return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); + return disk_zone_no(rq->q->disk, blk_rq_pos(rq)); } static inline unsigned int blk_rq_zone_is_seq(struct request *rq) { - return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); + return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq)); } bool blk_req_needs_zone_write_lock(struct request *rq); @@ -1156,8 +1156,8 @@ static inline void blk_req_zone_write_unlock(struct request *rq) static inline bool blk_req_zone_is_write_locked(struct request *rq) { - return rq->q->seq_zones_wlock && - test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); + return rq->q->disk->seq_zones_wlock && + test_bit(blk_rq_zone_no(rq), rq->q->disk->seq_zones_wlock); } static inline bool blk_req_can_dispatch_to_zone(struct request *rq) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 21b97f7115dc..22c477fadc0f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -164,6 +164,29 @@ struct gendisk { #ifdef CONFIG_BLK_DEV_INTEGRITY struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ + +#ifdef CONFIG_BLK_DEV_ZONED + /* + * Zoned block device information for request dispatch control. + * nr_zones is the total number of zones of the device. This is always + * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones + * bits which indicates if a zone is conventional (bit set) or + * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones + * bits which indicates if a zone is write locked, that is, if a write + * request targeting the zone was dispatched. + * + * Reads of this information must be protected with blk_queue_enter() / + * blk_queue_exit(). Modifying this information is only allowed while + * no requests are being processed. See also blk_mq_freeze_queue() and + * blk_mq_unfreeze_queue(). + */ + unsigned int nr_zones; + unsigned int max_open_zones; + unsigned int max_active_zones; + unsigned long *conv_zones_bitmap; + unsigned long *seq_zones_wlock; +#endif /* CONFIG_BLK_DEV_ZONED */ + #if IS_ENABLED(CONFIG_CDROM) struct cdrom_device_info *cdi; #endif @@ -467,31 +490,6 @@ struct request_queue { unsigned int required_elevator_features; -#ifdef CONFIG_BLK_DEV_ZONED - /* - * Zoned block device information for request dispatch control. - * nr_zones is the total number of zones of the device. This is always - * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones - * bits which indicates if a zone is conventional (bit set) or - * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones - * bits which indicates if a zone is write locked, that is, if a write - * request targeting the zone was dispatched. All three fields are - * initialized by the low level device driver (e.g. scsi/sd.c). - * Stacking drivers (device mappers) may or may not initialize - * these fields. - * - * Reads of this information must be protected with blk_queue_enter() / - * blk_queue_exit(). Modifying this information is only allowed while - * no requests are being processed. See also blk_mq_freeze_queue() and - * blk_mq_unfreeze_queue(). - */ - unsigned int nr_zones; - unsigned long *conv_zones_bitmap; - unsigned long *seq_zones_wlock; - unsigned int max_open_zones; - unsigned int max_active_zones; -#endif /* CONFIG_BLK_DEV_ZONED */ - int node; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; @@ -668,63 +666,59 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } #ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int blk_queue_nr_zones(struct request_queue *q) +static inline unsigned int disk_nr_zones(struct gendisk *disk) { - return blk_queue_is_zoned(q) ? q->nr_zones : 0; + return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0; } -static inline unsigned int blk_queue_zone_no(struct request_queue *q, - sector_t sector) +static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { - if (!blk_queue_is_zoned(q)) + if (!blk_queue_is_zoned(disk->queue)) return 0; - return sector >> ilog2(q->limits.chunk_sectors); + return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline bool blk_queue_zone_is_seq(struct request_queue *q, - sector_t sector) +static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) { - if (!blk_queue_is_zoned(q)) + if (!blk_queue_is_zoned(disk->queue)) return false; - if (!q->conv_zones_bitmap) + if (!disk->conv_zones_bitmap) return true; - return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); + return !test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap); } static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - disk->queue->max_open_zones = max_open_zones; + disk->max_open_zones = max_open_zones; } static inline void disk_set_max_active_zones(struct gendisk *disk, unsigned int max_active_zones) { - disk->queue->max_active_zones = max_active_zones; + disk->max_active_zones = max_active_zones; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { - return bdev->bd_disk->queue->max_open_zones; + return bdev->bd_disk->max_open_zones; } static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { - return bdev->bd_disk->queue->max_active_zones; + return bdev->bd_disk->max_active_zones; } #else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blk_queue_nr_zones(struct request_queue *q) +static inline unsigned int disk_nr_zones(struct gendisk *disk) { return 0; } -static inline bool blk_queue_zone_is_seq(struct request_queue *q, - sector_t sector) +static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) { return false; } -static inline unsigned int blk_queue_zone_no(struct request_queue *q, - sector_t sector) +static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { return 0; } @@ -732,6 +726,7 @@ static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return 0; } + static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { return 0; @@ -900,14 +895,12 @@ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); static inline unsigned int bio_zone_no(struct bio *bio) { - return blk_queue_zone_no(bdev_get_queue(bio->bi_bdev), - bio->bi_iter.bi_sector); + return disk_zone_no(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); } static inline unsigned int bio_zone_is_seq(struct bio *bio) { - return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev), - bio->bi_iter.bi_sector); + return disk_zone_is_seq(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); } /* -- cgit v1.2.3 From 71f28f3136aff5890cd56de78abc673f8393cad9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 13 Jul 2022 22:07:10 +0800 Subject: ublk_drv: add io_uring based userspace block driver This is the driver part of userspace block driver(ublk driver), the other part is userspace daemon part(ublksrv)[1]. The two parts communicate by io_uring's IORING_OP_URING_CMD with one shared cmd buffer for storing io command, and the buffer is read only for ublksrv, each io command is indexed by io request tag directly, and is written by ublk driver. For example, when one READ io request is submitted to ublk block driver, ublk driver stores the io command into cmd buffer first, then completes one IORING_OP_URING_CMD for notifying ublksrv, and the URING_CMD is issued to ublk driver beforehand by ublksrv for getting notification of any new io request, and each URING_CMD is associated with one io request by tag. After ublksrv gets the io command, it translates and handles the ublk io request, such as, for the ublk-loop target, ublksrv translates the request into same request on another file or disk, like the kernel loop block driver. In ublksrv's implementation, the io is still handled by io_uring, and share same ring with IORING_OP_URING_CMD command. When the target io request is done, the same IORING_OP_URING_CMD is issued to ublk driver for both committing io request result and getting future notification of new io request. Another thing done by ublk driver is to copy data between kernel io request and ublksrv's io buffer: 1) before ubsrv handles WRITE request, copy the request's data into ublksrv's userspace io buffer, so that ublksrv can handle the write request 2) after ubsrv handles READ request, copy ublksrv's userspace io buffer into this READ request, then ublk driver can complete the READ request Zero copy may be switched if mm is ready to support it. ublk driver doesn't handle any logic of the specific user space driver, so it is small/simple enough. [1] ublksrv https://github.com/ming1/ubdsrv Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220713140711.97356-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 156 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 include/uapi/linux/ublk_cmd.h (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h new file mode 100644 index 000000000000..4f0c16ec875e --- /dev/null +++ b/include/uapi/linux/ublk_cmd.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef USER_BLK_DRV_CMD_INC_H +#define USER_BLK_DRV_CMD_INC_H + +#include + +/* ublk server command definition */ + +/* + * Admin commands, issued by ublk server, and handled by ublk driver. + */ +#define UBLK_CMD_GET_QUEUE_AFFINITY 0x01 +#define UBLK_CMD_GET_DEV_INFO 0x02 +#define UBLK_CMD_ADD_DEV 0x04 +#define UBLK_CMD_DEL_DEV 0x05 +#define UBLK_CMD_START_DEV 0x06 +#define UBLK_CMD_STOP_DEV 0x07 + +/* + * IO commands, issued by ublk server, and handled by ublk driver. + * + * FETCH_REQ: issued via sqe(URING_CMD) beforehand for fetching IO request + * from ublk driver, should be issued only when starting device. After + * the associated cqe is returned, request's tag can be retrieved via + * cqe->userdata. + * + * COMMIT_AND_FETCH_REQ: issued via sqe(URING_CMD) after ublkserver handled + * this IO request, request's handling result is committed to ublk + * driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be + * handled before completing io request. + */ +#define UBLK_IO_FETCH_REQ 0x20 +#define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21 + +/* only ABORT means that no re-fetch */ +#define UBLK_IO_RES_OK 0 +#define UBLK_IO_RES_ABORT (-ENODEV) + +#define UBLKSRV_CMD_BUF_OFFSET 0 +#define UBLKSRV_IO_BUF_OFFSET 0x80000000 + +/* tag bit is 12bit, so at most 4096 IOs for each queue */ +#define UBLK_MAX_QUEUE_DEPTH 4096 + +/* + * zero copy requires 4k block size, and can remap ublk driver's io + * request into ublksrv's vm space + */ +#define UBLK_F_SUPPORT_ZERO_COPY (1UL << 0) + +/* device state */ +#define UBLK_S_DEV_DEAD 0 +#define UBLK_S_DEV_LIVE 1 + +/* shipped via sqe->cmd of io_uring command */ +struct ublksrv_ctrl_cmd { + /* sent to which device, must be valid */ + __u32 dev_id; + + /* sent to which queue, must be -1 if the cmd isn't for queue */ + __u16 queue_id; + /* + * cmd specific buffer, can be IN or OUT. + */ + __u16 len; + __u64 addr; + + /* inline data */ + __u64 data[2]; +}; + +struct ublksrv_ctrl_dev_info { + __u16 nr_hw_queues; + __u16 queue_depth; + __u16 block_size; + __u16 state; + + __u32 rq_max_blocks; + __u32 dev_id; + + __u64 dev_blocks; + + __s32 ublksrv_pid; + __s32 reserved0; + __u64 flags[2]; + + /* For ublksrv internal use, invisible to ublk driver */ + __u64 ublksrv_flags; + __u64 reserved1[9]; +}; + +#define UBLK_IO_OP_READ 0 +#define UBLK_IO_OP_WRITE 1 +#define UBLK_IO_OP_FLUSH 2 +#define UBLK_IO_OP_DISCARD 3 +#define UBLK_IO_OP_WRITE_SAME 4 +#define UBLK_IO_OP_WRITE_ZEROES 5 + +#define UBLK_IO_F_FAILFAST_DEV (1U << 8) +#define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9) +#define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) +#define UBLK_IO_F_META (1U << 11) +#define UBLK_IO_F_INTEGRITY (1U << 12) +#define UBLK_IO_F_FUA (1U << 13) +#define UBLK_IO_F_PREFLUSH (1U << 14) +#define UBLK_IO_F_NOUNMAP (1U << 15) +#define UBLK_IO_F_SWAP (1U << 16) + +/* + * io cmd is described by this structure, and stored in share memory, indexed + * by request tag. + * + * The data is stored by ublk driver, and read by ublksrv after one fetch command + * returns. + */ +struct ublksrv_io_desc { + /* op: bit 0-7, flags: bit 8-31 */ + __u32 op_flags; + + __u32 nr_sectors; + + /* start sector for this io */ + __u64 start_sector; + + /* buffer address in ublksrv daemon vm space, from ublk driver */ + __u64 addr; +}; + +static inline __u8 ublksrv_get_op(const struct ublksrv_io_desc *iod) +{ + return iod->op_flags & 0xff; +} + +static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod) +{ + return iod->op_flags >> 8; +} + +/* issued to ublk driver via /dev/ublkcN */ +struct ublksrv_io_cmd { + __u16 q_id; + + /* for fetch/commit which result */ + __u16 tag; + + /* io result, it is valid for COMMIT* command only */ + __s32 result; + + /* + * userspace buffer address in ublksrv daemon process, valid for + * FETCH* command only + */ + __u64 addr; +}; + +#endif -- cgit v1.2.3 From 0edb3696c1713c42f52acbd8355b545e58f782b1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 13 Jul 2022 22:07:11 +0800 Subject: ublk_drv: support to complete io command via task_work_add Use task_work_add if it is available, since task_work_add can bring up better performance, especially batching signaling ->ubq_daemon can be done. It is observed that task_work_add() can boost iops by +4% on random 4k io test. Also except for completing io command, all other code paths are same with completing io command via io_uring_cmd_complete_in_task. Meantime add one flag of UBLK_F_URING_CMD_COMP_IN_TASK for comparing the mode easily. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220713140711.97356-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 4f0c16ec875e..a3f5e7c21807 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -48,6 +48,12 @@ */ #define UBLK_F_SUPPORT_ZERO_COPY (1UL << 0) +/* + * Force to complete io cmd via io_uring_cmd_complete_in_task so that + * performance comparison is done easily with using task_work_add + */ +#define UBLK_F_URING_CMD_COMP_IN_TASK (1UL << 1) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From 900d156bac2bc474cf7c7bee4efbc6c83ec5ae58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jul 2022 07:53:17 +0200 Subject: block: remove bdevname Replace the remaining calls of bdevname with snprintf using the %pg format specifier. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220713055317.1888500-10-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 22c477fadc0f..2775763c51b9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1457,7 +1457,6 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); -const char *bdevname(struct block_device *bdev, char *buffer); int lookup_bdev(const char *pathname, dev_t *dev); void blkdev_show(struct seq_file *seqf, off_t offset); -- cgit v1.2.3 From ff07a02e9e8e6489db841e0c48a5c78e7e78d572 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:27 -0700 Subject: treewide: Rename enum req_opf into enum req_op The type name enum req_opf is misleading since it suggests that values of this type include both an operation type and flags. Since values of this type represent an operation only, change the type name into enum req_op. Convert the enum req_op documentation into kernel-doc format. Move a few definitions such that the enum req_op documentation occurs just above the enum req_op definition. The name "req_opf" was introduced by commit ef295ecf090d ("block: better op and flags encoding"). Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-2-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 16 ++++++++-------- include/linux/blkdev.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a24d4078fb21..0e6a2af7ed3d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -337,8 +337,12 @@ enum { typedef __u32 __bitwise blk_mq_req_flags_t; -/* - * Operations and flags common to the bio and request structures. +#define REQ_OP_BITS 8 +#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) +#define REQ_FLAG_BITS 24 + +/** + * enum req_op - Operations common to the bio and request structures. * We use 8 bits for encoding the operation, and the remaining 24 for flags. * * The least significant bit of the operation number indicates the data @@ -350,11 +354,7 @@ typedef __u32 __bitwise blk_mq_req_flags_t; * If a operation does not transfer data the least significant bit has no * meaning. */ -#define REQ_OP_BITS 8 -#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) -#define REQ_FLAG_BITS 24 - -enum req_opf { +enum req_op { /* read sectors from the device */ REQ_OP_READ = 0, /* write sectors to the device */ @@ -509,7 +509,7 @@ static inline bool op_is_discard(unsigned int op) * due to its different handling in the block layer and device response in * case of command failure. */ -static inline bool op_is_zone_mgmt(enum req_opf op) +static inline bool op_is_zone_mgmt(enum req_op op) { switch (op & REQ_OP_MASK) { case REQ_OP_ZONE_RESET: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2775763c51b9..ec072a5129bf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -322,7 +322,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); unsigned int bdev_nr_zones(struct block_device *bdev); -extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, +extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); int blk_revalidate_disk_zones(struct gendisk *disk, -- cgit v1.2.3 From 77e7ffd7ad3952909be6a9c599b7d164c8866fec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:28 -0700 Subject: block: Use enum req_op where appropriate Change the type of the arguments that are used to pass a REQ_OP_* value from int or unsigned int into enum req_op to improve static type checking. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-3-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- include/linux/blkdev.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 0e6a2af7ed3d..cce8768bc00b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -522,7 +522,7 @@ static inline bool op_is_zone_mgmt(enum req_op op) } } -static inline int op_stat_group(unsigned int op) +static inline int op_stat_group(enum req_op op) { if (op_is_discard(op)) return STAT_DISCARD; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ec072a5129bf..2f13f0062192 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -872,7 +872,7 @@ extern void blk_queue_exit(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); /* Helper to convert REQ_OP_XXX to its string format XXX */ -extern const char *blk_op_str(unsigned int op); +extern const char *blk_op_str(enum req_op op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); @@ -1434,9 +1434,9 @@ static inline void blk_wake_io_task(struct task_struct *waiter) } unsigned long bdev_start_io_acct(struct block_device *bdev, - unsigned int sectors, unsigned int op, + unsigned int sectors, enum req_op op, unsigned long start_time); -void bdev_end_io_acct(struct block_device *bdev, unsigned int op, +void bdev_end_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time); void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); -- cgit v1.2.3 From 86947df3a9236481276e8baadde50a403b02b4d4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:29 -0700 Subject: block: Change the type of the last .rw_page() argument All .rw_page() callers pass an enum req_op value as last argument. Make this explicit by changing the type of the last argument into enum req_op. See also commit 3f289dcb4b26 ("block: make bdev_ops->rw_page() take a REQ_OP instead of bool"). Cc: Tejun Heo Cc: Minchan Kim Cc: Dan Williams Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-4-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2f13f0062192..ca2ff113ea00 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1381,7 +1381,7 @@ struct block_device_operations { unsigned int flags); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); - int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); + int (*rw_page)(struct block_device *, sector_t, struct page *, enum req_op); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, -- cgit v1.2.3 From 2d9b02be73ba8efba406b399a722b4e33614dd0e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:30 -0700 Subject: block: Change the type of req_op() and bio_op() into enum req_op Improve static type checking by changing the type of the value returned by req_op() and bio_op() from unsigned int into enum req_op. Insert 'default: break;' in switch statements on the enum req_op type to prevent that the compiler warns about these switch statements. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Cc: Tim Waugh Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-5-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ++++-- include/linux/blk_types.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d74f6a6b7e69..677195de0663 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -198,8 +198,10 @@ struct request { void *end_io_data; }; -#define req_op(req) \ - ((req)->cmd_flags & REQ_OP_MASK) +static inline enum req_op req_op(const struct request *req) +{ + return req->cmd_flags & REQ_OP_MASK; +} static inline bool blk_rq_is_passthrough(struct request *rq) { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index cce8768bc00b..e66cbe377ae8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -463,8 +463,10 @@ enum stat_group { NR_STAT_GROUPS }; -#define bio_op(bio) \ - ((bio)->bi_opf & REQ_OP_MASK) +static inline enum req_op bio_op(const struct bio *bio) +{ + return bio->bi_opf & REQ_OP_MASK; +} /* obsolete, don't use in new code */ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, -- cgit v1.2.3 From 342a72a334073f163da924b69c3d3fb4685eb33a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:31 -0700 Subject: block: Introduce the type blk_opf_t Introduce the type blk_opf_t for the request operation and flags (REQ_OP_* and REQ_*). This type will be used to improve documentation of the block layer code and also to allow sparse to verify whether request flags are used correctly. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-6-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 97 +++++++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e66cbe377ae8..1ef99790f6ed 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -240,6 +240,8 @@ static inline void bio_issue_init(struct bio_issue *issue, ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } +typedef __u32 __bitwise blk_opf_t; + typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U @@ -250,7 +252,7 @@ typedef unsigned int blk_qc_t; struct bio { struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; - unsigned int bi_opf; /* bottom bits REQ_OP, top bits + blk_opf_t bi_opf; /* bottom bits REQ_OP, top bits * req_flags. */ unsigned short bi_flags; /* BIO_* below */ @@ -338,7 +340,7 @@ enum { typedef __u32 __bitwise blk_mq_req_flags_t; #define REQ_OP_BITS 8 -#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) +#define REQ_OP_MASK (__force blk_opf_t)((1 << REQ_OP_BITS) - 1) #define REQ_FLAG_BITS 24 /** @@ -356,35 +358,35 @@ typedef __u32 __bitwise blk_mq_req_flags_t; */ enum req_op { /* read sectors from the device */ - REQ_OP_READ = 0, + REQ_OP_READ = (__force blk_opf_t)0, /* write sectors to the device */ - REQ_OP_WRITE = 1, + REQ_OP_WRITE = (__force blk_opf_t)1, /* flush the volatile write cache */ - REQ_OP_FLUSH = 2, + REQ_OP_FLUSH = (__force blk_opf_t)2, /* discard sectors */ - REQ_OP_DISCARD = 3, + REQ_OP_DISCARD = (__force blk_opf_t)3, /* securely erase sectors */ - REQ_OP_SECURE_ERASE = 5, + REQ_OP_SECURE_ERASE = (__force blk_opf_t)5, /* write the zero filled sector many times */ - REQ_OP_WRITE_ZEROES = 9, + REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9, /* Open a zone */ - REQ_OP_ZONE_OPEN = 10, + REQ_OP_ZONE_OPEN = (__force blk_opf_t)10, /* Close a zone */ - REQ_OP_ZONE_CLOSE = 11, + REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, /* Transition a zone to full */ - REQ_OP_ZONE_FINISH = 12, + REQ_OP_ZONE_FINISH = (__force blk_opf_t)12, /* write data at the current zone write pointer */ - REQ_OP_ZONE_APPEND = 13, + REQ_OP_ZONE_APPEND = (__force blk_opf_t)13, /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = 15, + REQ_OP_ZONE_RESET = (__force blk_opf_t)15, /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = 17, + REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17, /* Driver private requests */ - REQ_OP_DRV_IN = 34, - REQ_OP_DRV_OUT = 35, + REQ_OP_DRV_IN = (__force blk_opf_t)34, + REQ_OP_DRV_OUT = (__force blk_opf_t)35, - REQ_OP_LAST, + REQ_OP_LAST = (__force blk_opf_t)36, }; enum req_flag_bits { @@ -425,28 +427,31 @@ enum req_flag_bits { __REQ_NR_BITS, /* stops here */ }; -#define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV) -#define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT) -#define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER) -#define REQ_SYNC (1ULL << __REQ_SYNC) -#define REQ_META (1ULL << __REQ_META) -#define REQ_PRIO (1ULL << __REQ_PRIO) -#define REQ_NOMERGE (1ULL << __REQ_NOMERGE) -#define REQ_IDLE (1ULL << __REQ_IDLE) -#define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) -#define REQ_FUA (1ULL << __REQ_FUA) -#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) -#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) -#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) -#define REQ_NOWAIT (1ULL << __REQ_NOWAIT) -#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) - -#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) -#define REQ_POLLED (1ULL << __REQ_POLLED) -#define REQ_ALLOC_CACHE (1ULL << __REQ_ALLOC_CACHE) - -#define REQ_DRV (1ULL << __REQ_DRV) -#define REQ_SWAP (1ULL << __REQ_SWAP) +#define REQ_FAILFAST_DEV \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_DEV) +#define REQ_FAILFAST_TRANSPORT \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_TRANSPORT) +#define REQ_FAILFAST_DRIVER \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_DRIVER) +#define REQ_SYNC (__force blk_opf_t)(1ULL << __REQ_SYNC) +#define REQ_META (__force blk_opf_t)(1ULL << __REQ_META) +#define REQ_PRIO (__force blk_opf_t)(1ULL << __REQ_PRIO) +#define REQ_NOMERGE (__force blk_opf_t)(1ULL << __REQ_NOMERGE) +#define REQ_IDLE (__force blk_opf_t)(1ULL << __REQ_IDLE) +#define REQ_INTEGRITY (__force blk_opf_t)(1ULL << __REQ_INTEGRITY) +#define REQ_FUA (__force blk_opf_t)(1ULL << __REQ_FUA) +#define REQ_PREFLUSH (__force blk_opf_t)(1ULL << __REQ_PREFLUSH) +#define REQ_RAHEAD (__force blk_opf_t)(1ULL << __REQ_RAHEAD) +#define REQ_BACKGROUND (__force blk_opf_t)(1ULL << __REQ_BACKGROUND) +#define REQ_NOWAIT (__force blk_opf_t)(1ULL << __REQ_NOWAIT) +#define REQ_CGROUP_PUNT (__force blk_opf_t)(1ULL << __REQ_CGROUP_PUNT) + +#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) +#define REQ_POLLED (__force blk_opf_t)(1ULL << __REQ_POLLED) +#define REQ_ALLOC_CACHE (__force blk_opf_t)(1ULL << __REQ_ALLOC_CACHE) + +#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) +#define REQ_SWAP (__force blk_opf_t)(1ULL << __REQ_SWAP) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) @@ -469,22 +474,22 @@ static inline enum req_op bio_op(const struct bio *bio) } /* obsolete, don't use in new code */ -static inline void bio_set_op_attrs(struct bio *bio, unsigned op, - unsigned op_flags) +static inline void bio_set_op_attrs(struct bio *bio, enum req_op op, + blk_opf_t op_flags) { bio->bi_opf = op | op_flags; } -static inline bool op_is_write(unsigned int op) +static inline bool op_is_write(blk_opf_t op) { - return (op & 1); + return !!(op & (__force blk_opf_t)1); } /* * Check if the bio or request is one that needs special treatment in the * flush state machine. */ -static inline bool op_is_flush(unsigned int op) +static inline bool op_is_flush(blk_opf_t op) { return op & (REQ_FUA | REQ_PREFLUSH); } @@ -494,13 +499,13 @@ static inline bool op_is_flush(unsigned int op) * PREFLUSH flag. Other operations may be marked as synchronous using the * REQ_SYNC flag. */ -static inline bool op_is_sync(unsigned int op) +static inline bool op_is_sync(blk_opf_t op) { return (op & REQ_OP_MASK) == REQ_OP_READ || (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); } -static inline bool op_is_discard(unsigned int op) +static inline bool op_is_discard(blk_opf_t op) { return (op & REQ_OP_MASK) == REQ_OP_DISCARD; } -- cgit v1.2.3 From 16458cf3bd15e5624205df6e8a76b9a5363555f3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:32 -0700 Subject: block: Use the new blk_opf_t type Use the new blk_opf_t type for arguments and variables that represent request flags or a bitwise combination of a request operation and request flags. Rename the function arguments and also a structure member that hold a request operation and flags from 'rw' into 'opf'. This patch does not change any functionality. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-7-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/bio.h | 10 +++++----- include/linux/blk-mq.h | 6 +++--- include/linux/blkdev.h | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 992ee987f273..ca22b06700a9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -405,7 +405,7 @@ extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, - unsigned int opf, gfp_t gfp_mask, + blk_opf_t opf, gfp_t gfp_mask, struct bio_set *bs); struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask); extern void bio_put(struct bio *); @@ -418,7 +418,7 @@ int bio_init_clone(struct block_device *bdev, struct bio *bio, extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(struct block_device *bdev, - unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask) + unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp_mask) { return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set); } @@ -456,9 +456,9 @@ struct request_queue; extern int submit_bio_wait(struct bio *bio); void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, - unsigned short max_vecs, unsigned int opf); + unsigned short max_vecs, blk_opf_t opf); extern void bio_uninit(struct bio *); -void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf); +void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); @@ -789,6 +789,6 @@ static inline void bio_clear_polled(struct bio *bio) } struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, - unsigned int nr_pages, unsigned int opf, gfp_t gfp); + unsigned int nr_pages, blk_opf_t opf, gfp_t gfp); #endif /* __LINUX_BIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 677195de0663..effee1dc715a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -80,7 +80,7 @@ struct request { struct blk_mq_ctx *mq_ctx; struct blk_mq_hw_ctx *mq_hctx; - unsigned int cmd_flags; /* op and common flags */ + blk_opf_t cmd_flags; /* op and common flags */ req_flags_t rq_flags; int tag; @@ -715,10 +715,10 @@ enum { BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), }; -struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, +struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, blk_mq_req_flags_t flags); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, - unsigned int op, blk_mq_req_flags_t flags, + blk_opf_t opf, blk_mq_req_flags_t flags, unsigned int hctx_idx); /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ca2ff113ea00..d04bdf549efa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -250,7 +250,7 @@ static inline int blk_validate_block_size(unsigned long bsize) return 0; } -static inline bool blk_op_is_passthrough(unsigned int op) +static inline bool blk_op_is_passthrough(blk_opf_t op) { op &= REQ_OP_MASK; return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; -- cgit v1.2.3 From 919dbca8670d0f7828dfbb2f9b434ac22dca8d2e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:37 -0700 Subject: blktrace: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for a function argument that represents a combination of a request operation and request flags. Rename that argument from 'op' into 'opf' to make its role more clear. Cc: Christoph Hellwig Cc: Steven Rostedt Cc: Li Zefan Cc: Chaitanya Kulkarni Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-12-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index f6f9b544365a..cfbda114348c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -7,6 +7,7 @@ #include #include #include +#include #if defined(CONFIG_BLK_DEV_IO_TRACE) @@ -105,7 +106,7 @@ struct compat_blk_user_trace_setup { #endif -void blk_fill_rwbs(char *rwbs, unsigned int op); +void blk_fill_rwbs(char *rwbs, blk_opf_t opf); static inline sector_t blk_rq_trace_sector(struct request *rq) { -- cgit v1.2.3 From 581075e4f6475bb97c73ecccf68636a9453a31fd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:47 -0700 Subject: dm/core: Reduce the size of struct dm_io_request Combine the bi_op and bi_op_flags into the bi_opf member. Use the new blk_opf_t type to improve static type checking. This patch does not change any functionality. Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-22-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/dm-io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index a52c6580cc9a..8e1c4ab5df04 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -13,6 +13,7 @@ #ifdef __KERNEL__ #include +#include struct dm_io_region { struct block_device *bdev; @@ -57,8 +58,7 @@ struct dm_io_notify { */ struct dm_io_client; struct dm_io_request { - int bi_op; /* REQ_OP */ - int bi_op_flags; /* req_flag_bits */ + blk_opf_t bi_opf; /* Request type and flags */ struct dm_io_memory mem; /* Memory to use for io */ struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ struct dm_io_client *client; /* Client memory handler */ -- cgit v1.2.3 From ea957547e819a21bd49895c6162f78d542867d39 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:05 -0700 Subject: scsi/core: Improve static type checking Improve static type checking by using the new blk_opf_t type for the combination of a request operation and its flags. Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: John Garry Cc: Mike Christie Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-40-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/scsi/scsi_cmnd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 1e80e70dfa92..bac55decf900 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -386,7 +386,7 @@ static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd) extern void scsi_build_sense(struct scsi_cmnd *scmd, int desc, u8 key, u8 asc, u8 ascq); -struct request *scsi_alloc_request(struct request_queue *q, - unsigned int op, blk_mq_req_flags_t flags); +struct request *scsi_alloc_request(struct request_queue *q, blk_opf_t opf, + blk_mq_req_flags_t flags); #endif /* _SCSI_SCSI_CMND_H */ -- cgit v1.2.3 From 2599cac57a9af4e7ce628e2ef41e92797cba4ae2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:07 -0700 Subject: scsi/core: Use the new blk_opf_t type Use the new blk_opf_t type for arguments and variables that represent request flags. Use the !! operator in scsi_noretry_cmd() to convert the blk_opf_t type into a boolean. This patch does not change any functionality. Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: John Garry Cc: Mike Christie Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-42-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/scsi/scsi_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 7cf5f3b7589f..2493bd65351a 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -457,7 +457,7 @@ extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, u64 flags, + int timeout, int retries, blk_opf_t flags, req_flags_t rq_flags, int *resid); /* Make sure any sense buffer is the correct size. */ #define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \ -- cgit v1.2.3 From f8e6e4bd9fd8c452565f3eaeb358e3cc08d880f4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:11 -0700 Subject: mm: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for block layer request flags. Cc: Andrew Morton Cc: Christoph Hellwig Cc: Jan Kara Cc: Stefan Roesch Cc: NeilBrown Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-46-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/writeback.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index da21d63f70e2..e91bea371b18 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -101,9 +101,9 @@ struct writeback_control { #endif }; -static inline int wbc_to_write_flags(struct writeback_control *wbc) +static inline blk_opf_t wbc_to_write_flags(struct writeback_control *wbc) { - int flags = 0; + blk_opf_t flags = 0; if (wbc->punt_to_cgroup) flags = REQ_CGROUP_PUNT; -- cgit v1.2.3 From 3ae7286943ae6f6bfecfe0a3da9d1a4c64f5531f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:12 -0700 Subject: fs/buffer: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for block layer request flags. Change WRITE into REQ_OP_WRITE. This patch does not change any functionality since REQ_OP_WRITE == WRITE == 1. Reviewed-by: Jan Kara Cc: Al Viro Cc: Christoph Hellwig Cc: Matthew Wilcox Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-47-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c9d1463bb20f..9795df9400bd 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -9,6 +9,7 @@ #define _LINUX_BUFFER_HEAD_H #include +#include #include #include #include @@ -201,11 +202,11 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(int, int, int, struct buffer_head * bh[]); +void ll_rw_block(enum req_op, blk_opf_t, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); -int __sync_dirty_buffer(struct buffer_head *bh, int op_flags); -void write_dirty_buffer(struct buffer_head *bh, int op_flags); -int submit_bh(int, int, struct buffer_head *); +int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); +void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); +int submit_bh(enum req_op, blk_opf_t, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); -- cgit v1.2.3 From 1420c4a549bf28ffddbed827d61fb3d4d2132ddb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:13 -0700 Subject: fs/buffer: Combine two submit_bh() and ll_rw_block() arguments Both submit_bh() and ll_rw_block() accept a request operation type and request flags as their first two arguments. Micro-optimize these two functions by combining these first two arguments into a single argument. This patch does not change the behavior of any of the modified code. Cc: Alexander Viro Cc: Jan Kara Acked-by: Song Liu (for the md changes) Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-48-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9795df9400bd..bb68eb6407da 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -202,11 +202,11 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(enum req_op, blk_opf_t, int, struct buffer_head * bh[]); +void ll_rw_block(blk_opf_t, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); -int submit_bh(enum req_op, blk_opf_t, struct buffer_head *); +int submit_bh(blk_opf_t, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); -- cgit v1.2.3 From 7649c873c16a384d447f7dbf9b153e333159f914 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:18 -0700 Subject: fs/f2fs: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Cc: Jaegeuk Kim Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-53-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/trace/events/f2fs.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 513e889ef8aa..f1e922237736 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -66,7 +66,7 @@ TRACE_DEFINE_ENUM(CP_RESIZE); #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_PREFLUSH | REQ_FUA) -#define F2FS_BIO_FLAG_MASK(t) (t & F2FS_OP_FLAGS) +#define F2FS_BIO_FLAG_MASK(t) (__force u32)((t) & F2FS_OP_FLAGS) #define show_bio_type(op,op_flags) show_bio_op(op), \ show_bio_op_flags(op_flags) @@ -75,12 +75,12 @@ TRACE_DEFINE_ENUM(CP_RESIZE); #define show_bio_op_flags(flags) \ __print_flags(F2FS_BIO_FLAG_MASK(flags), "|", \ - { REQ_RAHEAD, "R" }, \ - { REQ_SYNC, "S" }, \ - { REQ_META, "M" }, \ - { REQ_PRIO, "P" }, \ - { REQ_PREFLUSH, "PF" }, \ - { REQ_FUA, "FUA" }) + { (__force u32)REQ_RAHEAD, "R" }, \ + { (__force u32)REQ_SYNC, "S" }, \ + { (__force u32)REQ_META, "M" }, \ + { (__force u32)REQ_PRIO, "P" }, \ + { (__force u32)REQ_PREFLUSH, "PF" }, \ + { (__force u32)REQ_FUA, "FUA" }) #define show_data_type(type) \ __print_symbolic(type, \ @@ -1036,8 +1036,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __field(pgoff_t, index) __field(block_t, old_blkaddr) __field(block_t, new_blkaddr) - __field(int, op) - __field(int, op_flags) + __field(enum req_op, op) + __field(blk_opf_t, op_flags) __field(int, temp) __field(int, type) ), @@ -1092,8 +1092,8 @@ DECLARE_EVENT_CLASS(f2fs__bio, TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, target) - __field(int, op) - __field(int, op_flags) + __field(enum req_op, op) + __field(blk_opf_t, op_flags) __field(int, type) __field(sector_t, sector) __field(unsigned int, size) -- cgit v1.2.3 From 6669797b0dd41ced457760b6e1014fdda8ce19ce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:22 -0700 Subject: fs/jbd2: Fix the documentation of the jbd2_write_superblock() callers Commit 2a222ca992c3 ("fs: have submit_bh users pass in op and flags separately") renamed the jbd2_write_superblock() 'write_op' argument into 'write_flags'. Propagate this change to the jbd2_write_superblock() callers. Additionally, change the type of 'write_flags' into blk_opf_t. Cc: Mike Christie Cc: Theodore Ts'o Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-57-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/jbd2.h | 2 +- include/trace/events/jbd2.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e79d6e0b14e8..dc1724131300 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1557,7 +1557,7 @@ extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *); extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, - unsigned long, int); + unsigned long, blk_opf_t); extern void jbd2_journal_abort (journal_t *, int); extern int jbd2_journal_errno (journal_t *); extern void jbd2_journal_ack_err (journal_t *); diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index a4dfe005983d..99f783c384bb 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -355,22 +355,22 @@ TRACE_EVENT(jbd2_update_log_tail, TRACE_EVENT(jbd2_write_superblock, - TP_PROTO(journal_t *journal, int write_op), + TP_PROTO(journal_t *journal, blk_opf_t write_flags), - TP_ARGS(journal, write_op), + TP_ARGS(journal, write_flags), TP_STRUCT__entry( __field( dev_t, dev ) - __field( int, write_op ) + __field( blk_opf_t, write_flags ) ), TP_fast_assign( __entry->dev = journal->j_fs_dev->bd_dev; - __entry->write_op = write_op; + __entry->write_flags = write_flags; ), - TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev), - MINOR(__entry->dev), __entry->write_op) + TP_printk("dev %d,%d write_flags %x", MAJOR(__entry->dev), + MINOR(__entry->dev), (__force u32)__entry->write_flags) ); TRACE_EVENT(jbd2_lock_buffer_stall, -- cgit v1.2.3 From ed4512590bd5839f8ea9eef1626b0f4db626b1d1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:24 -0700 Subject: fs/nilfs2: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Combine the 'mode' and 'mode_flags' arguments of nilfs_btnode_submit_block into a single argument 'opf'. Reviewed-by: Ryusuke Konishi Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-59-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/trace/events/nilfs2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/nilfs2.h b/include/trace/events/nilfs2.h index 84ee31fc04cc..8efc6236f57c 100644 --- a/include/trace/events/nilfs2.h +++ b/include/trace/events/nilfs2.h @@ -192,7 +192,7 @@ TRACE_EVENT(nilfs2_mdt_submit_block, TP_PROTO(struct inode *inode, unsigned long ino, unsigned long blkoff, - int mode), + enum req_op mode), TP_ARGS(inode, ino, blkoff, mode), @@ -200,7 +200,7 @@ TRACE_EVENT(nilfs2_mdt_submit_block, __field(struct inode *, inode) __field(unsigned long, ino) __field(unsigned long, blkoff) - __field(int, mode) + __field(enum req_op, mode) ), TP_fast_assign( -- cgit v1.2.3 From d276a22314c2bad9136c5e0b09eb3c8a560e1161 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Jul 2022 08:30:13 +0200 Subject: ublk: remove UBLK_IO_F_INTEGRITY The ublk protocol has no mechanism to actually transfer the integrity metadata, so don't define this flag, which requires that an integrity payload is attached to a bio. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220718063013.335531-1-hch@lst.de Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index a3f5e7c21807..d6879eea2fde 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -106,7 +106,6 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9) #define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) #define UBLK_IO_F_META (1U << 11) -#define UBLK_IO_F_INTEGRITY (1U << 12) #define UBLK_IO_F_FUA (1U << 13) #define UBLK_IO_F_PREFLUSH (1U << 14) #define UBLK_IO_F_NOUNMAP (1U << 15) -- cgit v1.2.3 From 5f8bcc837a9640ba4bf5e7b1d7f9b254ea029f47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jul 2022 15:09:10 +0200 Subject: ublk: remove UBLK_IO_F_PREFLUSH REQ_PREFLUSH is turned into REQ_OP_FLUSH by the flush state machine and thus never seen by a blk-mq based driver. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220721130916.1869719-3-hch@lst.de Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index d6879eea2fde..917580b34198 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -107,7 +107,6 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) #define UBLK_IO_F_META (1U << 11) #define UBLK_IO_F_FUA (1U << 13) -#define UBLK_IO_F_PREFLUSH (1U << 14) #define UBLK_IO_F_NOUNMAP (1U << 15) #define UBLK_IO_F_SWAP (1U << 16) -- cgit v1.2.3 From 6d8c5afc9ab14595707ff25d971dde45728eba3e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Jul 2022 18:38:17 +0800 Subject: ublk_drv: make sure that correct flags(features) returned to userspace Userspace may support more features or new added flags, but the driver side can be old, so make sure correct flags(features) returned to userpsace, then userspace can work as expected. Also mark the 2nd flags as reversed, just use the 1st one. When we run out of flags, the reserved one can be handled at that time. Reviewed-by: Christoph Hellwig Reviewed-by: ZiyangZhang Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220722103817.631258-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 917580b34198..ca33092354ab 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -46,13 +46,13 @@ * zero copy requires 4k block size, and can remap ublk driver's io * request into ublksrv's vm space */ -#define UBLK_F_SUPPORT_ZERO_COPY (1UL << 0) +#define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) /* * Force to complete io cmd via io_uring_cmd_complete_in_task so that * performance comparison is done easily with using task_work_add */ -#define UBLK_F_URING_CMD_COMP_IN_TASK (1UL << 1) +#define UBLK_F_URING_CMD_COMP_IN_TASK (1ULL << 1) /* device state */ #define UBLK_S_DEV_DEAD 0 @@ -88,7 +88,8 @@ struct ublksrv_ctrl_dev_info { __s32 ublksrv_pid; __s32 reserved0; - __u64 flags[2]; + __u64 flags; + __u64 flags_reserved; /* For ublksrv internal use, invisible to ublk driver */ __u64 ublksrv_flags; -- cgit v1.2.3