From 43c9835b144c7ce29efe142d662529662a9eb376 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Jul 2023 11:42:39 +0200 Subject: block: don't allow enabling a cache on devices that don't support it Currently the write_cache attribute allows enabling the QUEUE_FLAG_WC flag on devices that never claimed the capability. Fix that by adding a QUEUE_FLAG_HW_WC flag that is set by blk_queue_write_cache and guards re-enabling the cache through sysfs. Note that any rescan that calls blk_queue_write_cache will still re-enable the write cache as in the current code. Fixes: 93e9d8e836cb ("block: add ability to flag write back caching on a device") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230707094239.107968-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ed44a997f629..2f5371b8482c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -538,6 +538,7 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ +#define QUEUE_FLAG_HW_WC 18 /* Write back caching supported */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ -- cgit v1.2.3 From 660e802c76c89e871c29cd3174c07c8d23e39c35 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Mon, 17 Jul 2023 12:00:55 +0800 Subject: blk-mq: use percpu csd to remote complete instead of per-rq csd If request need to be completed remotely, we insert it into percpu llist, and smp_call_function_single_async() if llist is empty previously. We don't need to use per-rq csd, percpu csd is enough. And the size of struct request is decreased by 24 bytes. This way is cleaner, and looks correct, given block softirq is guaranteed to be scheduled to consume the list if one new request is added to this percpu list, either smp_call_function_single_async() returns -EBUSY or 0. Signed-off-by: Chengming Zhou Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230717040058.3993930-2-chengming.zhou@linux.dev Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b96e00499f9e..67f810857634 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -182,10 +182,7 @@ struct request { rq_end_io_fn *saved_end_io; } flush; - union { - struct __call_single_data csd; - u64 fifo_time; - }; + u64 fifo_time; /* * completion callback. -- cgit v1.2.3 From 81ada09cc25e4bf2de7d2951925fb409338a545d Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Mon, 17 Jul 2023 12:00:58 +0800 Subject: blk-flush: reuse rq queuelist in flush state machine Since we don't need to maintain inflight flush_data requests list anymore, we can reuse rq->queuelist for flush pending list. Note in mq_flush_data_end_io(), we need to re-initialize rq->queuelist before reusing it in the state machine when end, since the rq->rq_next also reuse it, may have corrupted rq->queuelist by the driver. This patch decrease the size of struct request by 16 bytes. Signed-off-by: Chengming Zhou Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20230717040058.3993930-5-chengming.zhou@linux.dev Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 67f810857634..01e8c31db665 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -178,7 +178,6 @@ struct request { struct { unsigned int seq; - struct list_head list; rq_end_io_fn *saved_end_io; } flush; -- cgit v1.2.3 From 2ba39cc46bfe463cb9673bf62a04c4c21942f1f2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Aug 2023 19:21:57 +0200 Subject: fs: rename and move block_page_mkwrite_return block_page_mkwrite_return is neither block nor mkwrite specific, and should not be under CONFIG_BLOCK. Move it to mm.h and rename it to vmf_fs_error. Signed-off-by: Christoph Hellwig Reviewed-by: Luis Chamberlain Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Christian Brauner Link: https://lore.kernel.org/r/20230801172201.1923299-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 12 ------------ include/linux/mm.h | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6cb3e9af78c9..7002a9ff63a3 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -291,18 +291,6 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); -/* Convert errno to return value from ->page_mkwrite() call */ -static inline vm_fault_t block_page_mkwrite_return(int err) -{ - if (err == 0) - return VM_FAULT_LOCKED; - if (err == -EFAULT || err == -EAGAIN) - return VM_FAULT_NOPAGE; - if (err == -ENOMEM) - return VM_FAULT_OOM; - /* -ENOSPC, -EDQUOT, -EIO ... */ - return VM_FAULT_SIGBUS; -} sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); diff --git a/include/linux/mm.h b/include/linux/mm.h index 2dd73e4f3d8e..75777eae1c9c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3386,6 +3386,24 @@ static inline vm_fault_t vmf_error(int err) return VM_FAULT_SIGBUS; } +/* + * Convert errno to return value for ->page_mkwrite() calls. + * + * This should eventually be merged with vmf_error() above, but will need a + * careful audit of all vmf_error() callers. + */ +static inline vm_fault_t vmf_fs_error(int err) +{ + if (err == 0) + return VM_FAULT_LOCKED; + if (err == -EFAULT || err == -EAGAIN) + return VM_FAULT_NOPAGE; + if (err == -ENOMEM) + return VM_FAULT_OOM; + /* -ENOSPC, -EDQUOT, -EIO ... */ + return VM_FAULT_SIGBUS; +} + struct page *follow_page(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags); -- cgit v1.2.3 From 925c86a19bacf8ce10eb666328fb3fa5aff7b951 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Aug 2023 19:22:01 +0200 Subject: fs: add CONFIG_BUFFER_HEAD Add a new config option that controls building the buffer_head code, and select it from all file systems and stacking drivers that need it. For the block device nodes and alternative iomap based buffered I/O path is provided when buffer_head support is not enabled, and iomap needs a a small tweak to define the IOMAP_F_BUFFER_HEAD flag to 0 to not call into the buffer_head code when it doesn't exist. Otherwise this is just Kconfig and ifdef changes. Signed-off-by: Christoph Hellwig Reviewed-by: Luis Chamberlain Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20230801172201.1923299-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 32 ++++++++++++++++---------------- include/linux/iomap.h | 4 ++++ 2 files changed, 20 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 7002a9ff63a3..c89ef50d5112 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -16,8 +16,6 @@ #include #include -#ifdef CONFIG_BLOCK - enum bh_state_bits { BH_Uptodate, /* Contains valid data */ BH_Dirty, /* Is dirty */ @@ -198,7 +196,6 @@ void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); void folio_set_bh(struct buffer_head *bh, struct folio *folio, unsigned long offset); -bool try_to_free_buffers(struct folio *); struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, bool retry); struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, @@ -213,10 +210,6 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate); /* Things to do with buffers at mapping->private_list */ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); -int inode_has_buffers(struct inode *); -void invalidate_inode_buffers(struct inode *); -int remove_inode_buffers(struct inode *inode); -int sync_mapping_buffers(struct address_space *mapping); int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, bool datasync); int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, @@ -240,9 +233,6 @@ void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); -void invalidate_bh_lrus(void); -void invalidate_bh_lrus_cpu(void); -bool has_bh_in_lru(int cpu, void *dummy); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); @@ -258,8 +248,6 @@ int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait); void __bh_read_batch(int nr, struct buffer_head *bhs[], blk_opf_t op_flags, bool force_lock); -extern int buffer_heads_over_limit; - /* * Generic address_space_operations implementations for buffer_head-backed * address_spaces. @@ -304,8 +292,6 @@ extern int buffer_migrate_folio_norefs(struct address_space *, #define buffer_migrate_folio_norefs NULL #endif -void buffer_init(void); - /* * inline definitions */ @@ -465,7 +451,20 @@ __bread(struct block_device *bdev, sector_t block, unsigned size) bool block_dirty_folio(struct address_space *mapping, struct folio *folio); -#else /* CONFIG_BLOCK */ +#ifdef CONFIG_BUFFER_HEAD + +void buffer_init(void); +bool try_to_free_buffers(struct folio *folio); +int inode_has_buffers(struct inode *inode); +void invalidate_inode_buffers(struct inode *inode); +int remove_inode_buffers(struct inode *inode); +int sync_mapping_buffers(struct address_space *mapping); +void invalidate_bh_lrus(void); +void invalidate_bh_lrus_cpu(void); +bool has_bh_in_lru(int cpu, void *dummy); +extern int buffer_heads_over_limit; + +#else /* CONFIG_BUFFER_HEAD */ static inline void buffer_init(void) {} static inline bool try_to_free_buffers(struct folio *folio) { return true; } @@ -473,9 +472,10 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } +static inline void invalidate_bh_lrus(void) {} static inline void invalidate_bh_lrus_cpu(void) {} static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; } #define buffer_heads_over_limit 0 -#endif /* CONFIG_BLOCK */ +#endif /* CONFIG_BUFFER_HEAD */ #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e2b836c2e119..54f50d34fd9d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -58,7 +58,11 @@ struct vm_fault; #define IOMAP_F_DIRTY (1U << 1) #define IOMAP_F_SHARED (1U << 2) #define IOMAP_F_MERGED (1U << 3) +#ifdef CONFIG_BUFFER_HEAD #define IOMAP_F_BUFFER_HEAD (1U << 4) +#else +#define IOMAP_F_BUFFER_HEAD 0 +#endif /* CONFIG_BUFFER_HEAD */ #define IOMAP_F_XATTR (1U << 5) /* -- cgit v1.2.3 From 7ba3792718709d410be5d971732b9251cbda67b6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Aug 2023 14:26:34 -0400 Subject: block: Add some exports for bcachefs - bio_set_pages_dirty(), bio_check_pages_dirty() - dio path - blk_status_to_str() - error messages - bio_add_folio() - this should definitely be exported for everyone, it's the modern version of bio_add_page() Signed-off-by: Kent Overstreet Cc: linux-block@vger.kernel.org Cc: Jens Axboe Signed-off-by: Kent Overstreet Link: https://lore.kernel.org/r/20230813182636.2966159-2-kent.overstreet@linux.dev Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2f5371b8482c..4feed1fc141f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -847,6 +847,7 @@ extern const char *blk_op_str(enum req_op op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); +const char *blk_status_to_str(blk_status_t status); /* only poll the hardware once, don't continue until a completion was found */ #define BLK_POLL_ONESHOT (1 << 0) -- cgit v1.2.3 From 649f070e69739d22c57c22dbce0788b72cd93fac Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Aug 2023 14:26:36 -0400 Subject: block: Bring back zero_fill_bio_iter This reverts 6f822e1b5d9dda3d20e87365de138046e3baa03a - this helper is used by bcachefs. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: linux-block@vger.kernel.org Link: https://lore.kernel.org/r/20230813182636.2966159-4-kent.overstreet@linux.dev Signed-off-by: Jens Axboe --- include/linux/bio.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index c4f5b5228105..8b99210eb7fb 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -488,7 +488,12 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); void guard_bio_eod(struct bio *bio); -void zero_fill_bio(struct bio *bio); +void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); + +static inline void zero_fill_bio(struct bio *bio) +{ + zero_fill_bio_iter(bio, bio->bi_iter); +} static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { -- cgit v1.2.3 From 9fb10726ecc5145550180aec4fd0adf0a7b1d634 Mon Sep 17 00:00:00 2001 From: Greg Joyce Date: Fri, 21 Jul 2023 16:15:32 -0500 Subject: block: sed-opal: Implement IOC_OPAL_DISCOVERY Add IOC_OPAL_DISCOVERY ioctl to return raw discovery data to a SED Opal application. This allows the application to display drive capabilities and state. Signed-off-by: Greg Joyce Reviewed-by: Christoph Hellwig Reviewed-by: Jonathan Derrick Acked-by: Jarkko Sakkinen Link: https://lore.kernel.org/r/20230721211534.3437070-2-gjoyce@linux.vnet.ibm.com Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index bbae1e52ab4f..ef65f589fbeb 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -47,6 +47,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_GET_STATUS: case IOC_OPAL_GET_LR_STATUS: case IOC_OPAL_GET_GEOMETRY: + case IOC_OPAL_DISCOVERY: return true; } return false; -- cgit v1.2.3 From 5c82efc1aee8eb0919aa67a0d2559de5a326bd7c Mon Sep 17 00:00:00 2001 From: Greg Joyce Date: Fri, 21 Jul 2023 16:15:33 -0500 Subject: block: sed-opal: Implement IOC_OPAL_REVERT_LSP This is used in conjunction with IOC_OPAL_REVERT_TPR to return a drive to Original Factory State without erasing the data. If IOC_OPAL_REVERT_LSP is called with opal_revert_lsp.options bit OPAL_PRESERVE set prior to calling IOC_OPAL_REVERT_TPR, the drive global locking range will not be erased. Signed-off-by: Greg Joyce Reviewed-by: Christoph Hellwig Reviewed-by: Jonathan Derrick Acked-by: Jarkko Sakkinen Link: https://lore.kernel.org/r/20230721211534.3437070-3-gjoyce@linux.vnet.ibm.com Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index ef65f589fbeb..2f189546e133 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -48,6 +48,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_GET_LR_STATUS: case IOC_OPAL_GET_GEOMETRY: case IOC_OPAL_DISCOVERY: + case IOC_OPAL_REVERT_LSP: return true; } return false; -- cgit v1.2.3 From 3bfeb61256643281ac4be5b8a57e9d9da3db4335 Mon Sep 17 00:00:00 2001 From: Greg Joyce Date: Fri, 21 Jul 2023 16:15:34 -0500 Subject: block: sed-opal: keyring support for SED keys Extend the SED block driver so it can alternatively obtain a key from a sed-opal kernel keyring. The SED ioctls will indicate the source of the key, either directly in the ioctl data or from the keyring. This allows the use of SED commands in scripts such as udev scripts so that drives may be automatically unlocked as they become available. Signed-off-by: Greg Joyce Reviewed-by: Jonathan Derrick Acked-by: Jarkko Sakkinen Link: https://lore.kernel.org/r/20230721211534.3437070-4-gjoyce@linux.vnet.ibm.com Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 2f189546e133..2ac50822554e 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -25,6 +25,9 @@ bool opal_unlock_from_suspend(struct opal_dev *dev); struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv); int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr); +#define OPAL_AUTH_KEY "opal-boot-pin" +#define OPAL_AUTH_KEY_PREV "opal-boot-pin-prev" + static inline bool is_sed_ioctl(unsigned int cmd) { switch (cmd) { -- cgit v1.2.3