From 432acd550e3607d5fea23e27f6ab4e4567deccfd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Dec 2023 08:26:59 +0100 Subject: iomap: move the io_folios field out of struct iomap_ioend The io_folios member in struct iomap_ioend counts the number of folios added to an ioend. It is only used at submission time and can thus be moved to iomap_writepage_ctx instead. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231207072710.176093-4-hch@lst.de Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Christian Brauner --- include/linux/iomap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 96dd0acbba44..b2a05dff914d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -293,7 +293,6 @@ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ u16 io_type; u16 io_flags; /* IOMAP_F_* */ - u32 io_folios; /* folios added to ioend */ struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ @@ -329,6 +328,7 @@ struct iomap_writepage_ctx { struct iomap iomap; struct iomap_ioend *ioend; const struct iomap_writeback_ops *ops; + u32 nr_folios; /* folios added to the ioend */ }; void iomap_finish_ioends(struct iomap_ioend *ioend, int error); -- cgit v1.2.3 From ae5535efd8c445ad6033ac0d5da0197897b148ea Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Dec 2023 08:27:05 +0100 Subject: iomap: don't chain bios Back in the days when a single bio could only be filled to the hardware limits, and we scheduled a work item for each bio completion, chaining multiple bios for a single ioend made a lot of sense to reduce the number of completions. But these days bios can be filled until we reach the number of vectors or total size limit, which means we can always fit at least 1 megabyte worth of data in the worst case, but usually a lot more due to large folios. The only thing bio chaining is buying us now is to reduce the size of the allocation from an ioend with an embedded bio into a plain bio, which is a 52 bytes differences on 64-bit systems. This is not worth the added complexity, so remove the bio chaining and only use the bio embedded into the ioend. This will help to simplify further changes to the iomap writeback code. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231207072710.176093-10-hch@lst.de Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/iomap.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index b2a05dff914d..b8d3b658ad2b 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -297,10 +297,14 @@ struct iomap_ioend { size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ - struct bio *io_bio; /* bio being built */ - struct bio io_inline_bio; /* MUST BE LAST! */ + struct bio io_bio; /* MUST BE LAST! */ }; +static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) +{ + return container_of(bio, struct iomap_ioend, io_bio); +} + struct iomap_writeback_ops { /* * Required, maps the blocks so that writeback can be performed on -- cgit v1.2.3 From 30deff8531f469453ccc0981f14eceb0a2ea68d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Dec 2023 08:27:09 +0100 Subject: iomap: map multiple blocks at a time The ->map_blocks interface returns a valid range for writeback, but we still call back into it for every block, which is a bit inefficient. Change iomap_writepage_map to use the valid range in the map until the end of the folio or the dirty range inside the folio instead of calling back into every block. Note that the range is not used over folio boundaries as we need to be able to check the mapping sequence count under the folio lock. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231207072710.176093-14-hch@lst.de Signed-off-by: Christian Brauner --- include/linux/iomap.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index b8d3b658ad2b..49d93f538785 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -309,6 +309,13 @@ struct iomap_writeback_ops { /* * Required, maps the blocks so that writeback can be performed on * the range starting at offset. + * + * Can return arbitrarily large regions, but we need to call into it at + * least once per folio to allow the file systems to synchronize with + * the write path that could be invalidating mappings. + * + * An existing mapping from a previous call to this method can be reused + * by the file system if it is still valid. */ int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, loff_t offset); -- cgit v1.2.3 From 19871b5c7a003946d3cd4209a348ab7c0df5dbad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Dec 2023 08:27:10 +0100 Subject: iomap: pass the length of the dirty region to ->map_blocks Let the file system know how much dirty data exists at the passed in offset. This allows file systems to allocate the right amount of space that actually is written back if they can't eagerly convert (e.g. because they don't support unwritten extents). Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231207072710.176093-15-hch@lst.de Signed-off-by: Christian Brauner --- include/linux/iomap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 49d93f538785..6fc1c858013d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -318,7 +318,7 @@ struct iomap_writeback_ops { * by the file system if it is still valid. */ int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset); + loff_t offset, unsigned len); /* * Optional, allows the file systems to perform actions just before -- cgit v1.2.3 From fe3944fb245ab99570552a3bf970b00058a9ca6d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 2 Feb 2024 12:39:23 -0800 Subject: fs: Move enum rw_hint into a new header file Move enum rw_hint into a new header file to prepare for using this data type in the block layer. Add the attribute __packed to reduce the space occupied by instances of this data type from four bytes to one byte. Change the data type of i_write_hint from u8 into enum rw_hint. Reviewed-by: Christoph Hellwig Acked-by: Chao Yu # for the F2FS part Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: Christoph Hellwig Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240202203926.2478590-5-bvanassche@acm.org Signed-off-by: Christian Brauner --- include/linux/fs.h | 16 ++-------------- include/linux/rw_hint.h | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 14 deletions(-) create mode 100644 include/linux/rw_hint.h (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ed5966a70495..bdabda5dc364 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -309,19 +310,6 @@ struct address_space; struct writeback_control; struct readahead_control; -/* - * Write life time hint values. - * Stored in struct inode as u8. - */ -enum rw_hint { - WRITE_LIFE_NOT_SET = 0, - WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, - WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, - WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, - WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, - WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, -}; - /* Match RWF_* bits to IOCB bits */ #define IOCB_HIPRI (__force int) RWF_HIPRI #define IOCB_DSYNC (__force int) RWF_DSYNC @@ -677,7 +665,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; - u8 i_write_hint; + enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h new file mode 100644 index 000000000000..309ca72f2dfb --- /dev/null +++ b/include/linux/rw_hint.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_RW_HINT_H +#define _LINUX_RW_HINT_H + +#include +#include +#include + +/* Block storage write lifetime hint values. */ +enum rw_hint { + WRITE_LIFE_NOT_SET = RWH_WRITE_LIFE_NOT_SET, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +} __packed; + +/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */ +#ifndef __CHECKER__ +static_assert(sizeof(enum rw_hint) == 1); +#endif + +#endif /* _LINUX_RW_HINT_H */ -- cgit v1.2.3 From 449813515d3e5efec85206bb91588a6249a421a3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 2 Feb 2024 12:39:25 -0800 Subject: block, fs: Restore the per-bio/request data lifetime fields Restore support for passing data lifetime information from filesystems to block drivers. This patch reverts commit b179c98f7697 ("block: Remove request.write_hint") and commit c75e707fe1aa ("block: remove the per-bio/request write hint"). This patch does not modify the size of struct bio because the new bi_write_hint member fills a hole in struct bio. pahole reports the following for struct bio on an x86_64 system with this patch applied: /* size: 112, cachelines: 2, members: 20 */ /* sum members: 110, holes: 1, sum holes: 2 */ /* last cacheline: 48 bytes */ Reviewed-by: Kanchan Joshi Cc: Jens Axboe Cc: Christoph Hellwig Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240202203926.2478590-7-bvanassche@acm.org Signed-off-by: Christian Brauner --- include/linux/blk-mq.h | 2 ++ include/linux/blk_types.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7a8150a5f051..492b0128b5d9 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -8,6 +8,7 @@ #include #include #include +#include struct blk_mq_tags; struct blk_flush_queue; @@ -135,6 +136,7 @@ struct request { struct blk_crypto_keyslot *crypt_keyslot; #endif + enum rw_hint write_hint; unsigned short ioprio; enum mq_rq_state state; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index f288c94374b3..12d87cef2c03 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -10,6 +10,7 @@ #include #include #include +#include struct bio_set; struct bio; @@ -269,6 +270,7 @@ struct bio { */ unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; + enum rw_hint bi_write_hint; blk_status_t bi_status; atomic_t __bi_remaining; -- cgit v1.2.3