diff options
| -rw-r--r-- | Documentation/filesystems/locking.rst | 14 | ||||
| -rw-r--r-- | Documentation/trace/ftrace.rst | 4 | ||||
| -rw-r--r-- | drivers/md/md-bitmap.c | 27 | ||||
| -rw-r--r-- | drivers/md/raid5.h | 6 | ||||
| -rw-r--r-- | fs/buffer.c | 385 | ||||
| -rw-r--r-- | fs/ext4/ext4.h | 10 | ||||
| -rw-r--r-- | fs/ext4/fast_commit.c | 8 | ||||
| -rw-r--r-- | fs/ext4/ialloc.c | 6 | ||||
| -rw-r--r-- | fs/ext4/mmp.c | 5 | ||||
| -rw-r--r-- | fs/ext4/super.c | 18 | ||||
| -rw-r--r-- | fs/gfs2/bmap.c | 13 | ||||
| -rw-r--r-- | fs/gfs2/dir.c | 12 | ||||
| -rw-r--r-- | fs/gfs2/meta_io.c | 13 | ||||
| -rw-r--r-- | fs/jbd2/commit.c | 13 | ||||
| -rw-r--r-- | fs/jbd2/journal.c | 4 | ||||
| -rw-r--r-- | fs/nilfs2/btnode.c | 4 | ||||
| -rw-r--r-- | fs/nilfs2/gcinode.c | 4 | ||||
| -rw-r--r-- | fs/nilfs2/mdt.c | 4 | ||||
| -rw-r--r-- | fs/ocfs2/buffer_head_io.c | 16 | ||||
| -rw-r--r-- | include/linux/buffer_head.h | 16 | ||||
| -rw-r--r-- | mm/vmscan.c | 2 |
21 files changed, 288 insertions, 296 deletions
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 8421ea21bd35..a27aca42fd85 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -416,20 +416,6 @@ lm_open_conflict yes no no lm_breaker_timedout yes no no ====================== ============= ================= ========= -buffer_head -=========== - -prototypes:: - - void (*b_end_io)(struct buffer_head *bh, int uptodate); - -locking rules: - -called from interrupts. In other words, extreme care is needed here. -bh is locked, but that's all warranties we have here. Currently only RAID1, -highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices -call this method upon the IO completion. - block_device_operations ======================= prototypes:: diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index b9efb148a5c2..2ed1b96e440b 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -1624,7 +1624,7 @@ function-trace, we get a much larger output:: => blk_queue_bio => submit_bio_noacct => submit_bio - => submit_bh + => bh_submit => __ext3_get_inode_loc => ext3_iget => ext3_lookup @@ -1909,7 +1909,7 @@ tracers. => blk_queue_bio => submit_bio_noacct => submit_bio - => submit_bh + => bh_submit => ext3_bread => ext3_dir_bread => htree_dirblock_to_tree diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 028b9ca8ce52..7d778fe1c47c 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -502,6 +502,18 @@ static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index, static void md_bitmap_file_kick(struct bitmap *bitmap); #ifdef CONFIG_MD_BITMAP_FILE +static void end_bitmap_write(struct bio *bio) +{ + struct buffer_head *bh; + bool uptodate = bio_endio_bh(bio, &bh); + struct bitmap *bitmap = bh->b_private; + + if (!uptodate) + set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); + if (atomic_dec_and_test(&bitmap->pending_writes)) + wake_up(&bitmap->write_wait); +} + static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) { struct buffer_head *bh = page_buffers(page); @@ -510,7 +522,7 @@ static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) atomic_inc(&bitmap->pending_writes); set_buffer_locked(bh); set_buffer_mapped(bh); - submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); + bh_submit(bh, REQ_OP_WRITE | REQ_SYNC, end_bitmap_write); bh = bh->b_this_page; } @@ -519,16 +531,6 @@ static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) atomic_read(&bitmap->pending_writes) == 0); } -static void end_bitmap_write(struct buffer_head *bh, int uptodate) -{ - struct bitmap *bitmap = bh->b_private; - - if (!uptodate) - set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); - if (atomic_dec_and_test(&bitmap->pending_writes)) - wake_up(&bitmap->write_wait); -} - static void free_buffers(struct page *page) { struct buffer_head *bh; @@ -592,12 +594,11 @@ static int read_file_page(struct file *file, unsigned long index, else count -= blocksize; - bh->b_end_io = end_bitmap_write; bh->b_private = bitmap; atomic_inc(&bitmap->pending_writes); set_buffer_locked(bh); set_buffer_mapped(bh); - submit_bh(REQ_OP_READ, bh); + bh_submit(bh, REQ_OP_READ, end_bitmap_write); } blk_cur++; bh = bh->b_this_page; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 1c7b710fc9c1..1dfa60a41d91 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -38,7 +38,7 @@ * Clean -> Dirty - on compute_parity to satisfy write/sync (RECONSTRUCT or RMW) * * The Want->Empty, Want->Clean, Dirty->Clean, transitions - * all happen in b_end_io at interrupt time. + * all happen in end_io at interrupt time. * Each sets the Uptodate bit before releasing the Lock bit. * This leaves one multi-stage transition: * Want->Dirty->Clean @@ -64,7 +64,7 @@ * together, but we are not guaranteed of that so we allow for more. * * If a buffer is on the read list when the associated cache buffer is - * Uptodate, the data is copied into the read buffer and it's b_end_io + * Uptodate, the data is copied into the read buffer and it's end_io * routine is called. This may happen in the end_request routine only * if the buffer has just successfully been read. end_request should * remove the buffers from the list and then set the Uptodate bit on @@ -76,7 +76,7 @@ * into the cache buffer, which is then marked dirty, and moved onto a * third list, the written list (bh_written). Once both the parity * block and the cached buffer are successfully written, any buffer on - * a written list can be returned with b_end_io. + * a written list can be returned with end_io. * * The write list and read list both act as fifos. The read list, * write list and written list are protected by the device_lock. diff --git a/fs/buffer.c b/fs/buffer.c index b0b3792b1496..7ed8dd77d221 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -54,9 +54,6 @@ #include "internal.h" -static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, - enum rw_hint hint, struct writeback_control *wbc); - #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) inline void touch_buffer(struct buffer_head *bh) @@ -132,15 +129,43 @@ static void buffer_io_error(struct buffer_head *bh, char *msg) bh->b_bdev, (unsigned long long)bh->b_blocknr, msg); } -/* - * End-of-IO handler helper function which does not touch the bh after - * unlocking it. - * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but - * a race there is benign: unlock_buffer() only use the bh's address for - * hashing after unlocking the buffer, so it doesn't actually touch the bh - * itself. +/** + * bio_endio_bh - Discard the bio used to submit a buffer. + * @bio: The bio. + * @bhp: Where to return the buffer_head. + * + * Call this in your bio_end_io handler to retrieve the buffer_head + * submitted in bh_submit(). If you did not call bh_submit(), do not + * call this function; it will return garbage. + * + * This function consumes the bio refcount which will probably free the + * bio. + * + * Return: True if the I/O succeeded. */ -static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate) +bool bio_endio_bh(struct bio *bio, struct buffer_head **bhp) +{ + bool success = bio->bi_status == BLK_STS_OK; + struct buffer_head *bh = bio->bi_private; + + if (unlikely(bio_flagged(bio, BIO_QUIET))) + set_bit(BH_Quiet, &bh->b_state); + bio_put(bio); + + *bhp = bh; + return success; +} +EXPORT_SYMBOL(bio_endio_bh); + +/** + * end_buffer_read_sync - Handle buffer reads finishing + * @bh: The buffer. + * @uptodate: True if the read was successful. + * + * If a buffer is read through a mechanism that isn't bh_submit(), you + * can call this function to finish the read. + */ +void end_buffer_read_sync(struct buffer_head *bh, int uptodate) { if (uptodate) { set_buffer_uptodate(bh); @@ -150,21 +175,36 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate) } unlock_buffer(bh); } +EXPORT_SYMBOL(end_buffer_read_sync); -/* - * Default synchronous end-of-IO handler.. Just mark it up-to-date and - * unlock the buffer. +/** + * bh_end_read - I/O end handler for reads + * @bio: The bio being completed. + * + * Pass this function to bh_submit() if you're reading into the buffer, + * unless you need your own special I/O end handler. */ -void end_buffer_read_sync(struct buffer_head *bh, int uptodate) +void bh_end_read(struct bio *bio) { - put_bh(bh); - __end_buffer_read_notouch(bh, uptodate); + struct buffer_head *bh; + bool uptodate = bio_endio_bh(bio, &bh); + end_buffer_read_sync(bh, uptodate); } -EXPORT_SYMBOL(end_buffer_read_sync); +EXPORT_SYMBOL(bh_end_read); -void end_buffer_write_sync(struct buffer_head *bh, int uptodate) +/** + * bh_end_write - I/O end handler for writes + * @bio: The bio being completed. + * + * Pass this function to bh_submit() if you're writing from the buffer, + * unless you need your own special I/O end handler. + */ +void bh_end_write(struct bio *bio) { - if (uptodate) { + struct buffer_head *bh; + bool success = bio_endio_bh(bio, &bh); + + if (success) { set_buffer_uptodate(bh); } else { buffer_io_error(bh, ", lost sync page write"); @@ -172,9 +212,8 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) clear_buffer_uptodate(bh); } unlock_buffer(bh); - put_bh(bh); } -EXPORT_SYMBOL(end_buffer_write_sync); +EXPORT_SYMBOL(bh_end_write); static struct buffer_head * __find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic) @@ -342,11 +381,13 @@ static void decrypt_bh(struct work_struct *work) } /* - * I/O completion handler for block_read_full_folio() - pages + * I/O completion handler for block_read_full_folio() - folios * which come unlocked at the end of I/O. */ -static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate) +static void bh_end_async_read(struct bio *bio) { + struct buffer_head *bh; + bool uptodate = bio_endio_bh(bio, &bh); struct inode *inode = bh->b_folio->mapping->host; bool decrypt = fscrypt_inode_uses_fs_layer_crypto(inode); struct fsverity_info *vi = NULL; @@ -371,17 +412,24 @@ static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate) } return; } - uptodate = 0; + uptodate = false; } end_buffer_async_read(bh, uptodate); } -/* - * Completion handler for block_write_full_folio() - folios which are unlocked - * during I/O, and which have the writeback flag cleared upon I/O completion. +/** + * bh_end_async_write - I/O end handler for async folio writes + * @bio: The bio being completed. + * + * Pass this function to bh_submit() if you're doing the equivalent of + * block_write_full_folio(). That is, the folio is unlocked, and will + * have its writeback flag cleared once all async write buffers have + * completed. */ -static void end_buffer_async_write(struct buffer_head *bh, int uptodate) +void bh_end_async_write(struct bio *bio) { + struct buffer_head *bh; + bool success = bio_endio_bh(bio, &bh); unsigned long flags; struct buffer_head *first; struct buffer_head *tmp; @@ -390,7 +438,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate) BUG_ON(!buffer_async_write(bh)); folio = bh->b_folio; - if (uptodate) { + if (success) { set_buffer_uptodate(bh); } else { buffer_io_error(bh, ", lost async page write"); @@ -418,46 +466,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate) still_busy: spin_unlock_irqrestore(&first->b_uptodate_lock, flags); } - -/* - * If a page's buffers are under async readin (end_buffer_async_read - * completion) then there is a possibility that another thread of - * control could lock one of the buffers after it has completed - * but while some of the other buffers have not completed. This - * locked buffer would confuse end_buffer_async_read() into not unlocking - * the page. So the absence of BH_Async_Read tells end_buffer_async_read() - * that this buffer is not under async I/O. - * - * The page comes unlocked when it has no locked buffer_async buffers - * left. - * - * PageLocked prevents anyone starting new async I/O reads any of - * the buffers. - * - * PageWriteback is used to prevent simultaneous writeout of the same - * page. - * - * PageLocked prevents anyone from starting writeback of a page which is - * under read I/O (PageWriteback is only ever set against a locked page). - */ -static void mark_buffer_async_read(struct buffer_head *bh) -{ - bh->b_end_io = end_buffer_async_read_io; - set_buffer_async_read(bh); -} - -static void mark_buffer_async_write_endio(struct buffer_head *bh, - bh_end_io_t *handler) -{ - bh->b_end_io = handler; - set_buffer_async_write(bh); -} - -void mark_buffer_async_write(struct buffer_head *bh) -{ - mark_buffer_async_write_endio(bh, end_buffer_async_write); -} -EXPORT_SYMBOL(mark_buffer_async_write); +EXPORT_SYMBOL(bh_end_async_write); /* @@ -916,7 +925,6 @@ static sector_t folio_init_buffers(struct folio *folio, do { if (!buffer_mapped(bh)) { - bh->b_end_io = NULL; bh->b_private = NULL; bh->b_bdev = bdev; bh->b_blocknr = block; @@ -1157,6 +1165,83 @@ void __bforget(struct buffer_head *bh) } EXPORT_SYMBOL(__bforget); +static void buffer_set_crypto_ctx(struct bio *bio, const struct buffer_head *bh, + gfp_t gfp_mask) +{ + const struct address_space *mapping = folio_mapping(bh->b_folio); + + /* + * The ext4 journal (jbd2) can submit a buffer_head it directly created + * for a non-pagecache page. fscrypt doesn't care about these. + */ + if (!mapping) + return; + fscrypt_set_bio_crypt_ctx(bio, mapping->host, + folio_pos(bh->b_folio) + bh_offset(bh), gfp_mask); +} + +static void __bh_submit(struct buffer_head *bh, blk_opf_t opf, + enum rw_hint write_hint, struct writeback_control *wbc, + bio_end_io_t end_bio) +{ + const enum req_op op = opf & REQ_OP_MASK; + struct bio *bio; + + BUG_ON(!buffer_locked(bh)); + BUG_ON(!buffer_mapped(bh)); + BUG_ON(buffer_delay(bh)); + BUG_ON(buffer_unwritten(bh)); + + /* + * Only clear out a write error when rewriting + */ + if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE)) + clear_buffer_write_io_error(bh); + + if (buffer_meta(bh)) + opf |= REQ_META; + if (buffer_prio(bh)) + opf |= REQ_PRIO; + + bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO); + + if (IS_ENABLED(CONFIG_FS_ENCRYPTION)) + buffer_set_crypto_ctx(bio, bh, GFP_NOIO); + + bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_write_hint = write_hint; + + bio_add_folio_nofail(bio, bh->b_folio, bh->b_size, bh_offset(bh)); + + bio->bi_end_io = end_bio; + bio->bi_private = bh; + + /* Take care of bh's that straddle the end of the device */ + guard_bio_eod(bio); + + if (wbc) { + wbc_init_bio(wbc, bio); + wbc_account_cgroup_owner(wbc, bh->b_folio, bh->b_size); + } + + blk_crypto_submit_bio(bio); +} + +/** + * bh_submit - Start I/O against a buffer head + * @bh: The buffer head to perform I/O on. + * @opf: Operation and flags for bio. + * @end_io: The routine to call when I/O has completed. + * + * If you need to do I/O on an individual bh (instead of allowing the + * page cache to do I/O on the folio that it is in), call this function. + */ +void bh_submit(struct buffer_head *bh, blk_opf_t opf, bio_end_io_t end_io) +{ + __bh_submit(bh, opf, WRITE_LIFE_NOT_SET, NULL, end_io); +} +EXPORT_SYMBOL(bh_submit); + static struct buffer_head *__bread_slow(struct buffer_head *bh) { lock_buffer(bh); @@ -1164,9 +1249,7 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh) unlock_buffer(bh); return bh; } else { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, bh); + bh_submit(bh, REQ_OP_READ, bh_end_read); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -1716,15 +1799,15 @@ static struct buffer_head *folio_create_buffers(struct folio *folio, /* * While block_write_full_folio is writing back the dirty buffers under - * the page lock, whoever dirtied the buffers may decide to clean them + * the folio lock, whoever dirtied the buffers may decide to clean them * again at any time. We handle that by only looking at the buffer * state inside lock_buffer(). * * If block_write_full_folio() is called for regular writeback - * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a - * locked buffer. This only can happen if someone has written the buffer - * directly, with submit_bh(). At the address_space level PageWriteback - * prevents this contention from occurring. + * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a folio which + * has a locked buffer. This only can happen if someone has written + * the buffer directly, with bh_submit(). At the address_space level + * the folio writeback flag prevents this contention from occurring. * * If block_write_full_folio() is called with wbc->sync_mode == * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this @@ -1810,8 +1893,7 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, continue; } if (test_clear_buffer_dirty(bh)) { - mark_buffer_async_write_endio(bh, - end_buffer_async_write); + set_buffer_async_write(bh); } else { unlock_buffer(bh); } @@ -1827,8 +1909,9 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, - inode->i_write_hint, wbc); + __bh_submit(bh, REQ_OP_WRITE | write_flags, + inode->i_write_hint, wbc, + bh_end_async_write); nr_underway++; } bh = next; @@ -1841,7 +1924,7 @@ done: /* * The folio was marked dirty, but the buffers were * clean. Someone wrote them back by hand with - * write_dirty_buffer/submit_bh. A rare case. + * write_dirty_buffer/bh_submit. A rare case. */ folio_end_writeback(folio); @@ -1865,8 +1948,7 @@ recover: if (buffer_mapped(bh) && buffer_dirty(bh) && !buffer_delay(bh)) { lock_buffer(bh); - mark_buffer_async_write_endio(bh, - end_buffer_async_write); + set_buffer_async_write(bh); } else { /* * The buffer may have been set dirty during @@ -1882,8 +1964,9 @@ recover: struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, - inode->i_write_hint, wbc); + __bh_submit(bh, REQ_OP_WRITE | write_flags, + inode->i_write_hint, wbc, + bh_end_async_write); nr_underway++; } bh = next; @@ -2339,9 +2422,33 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) continue; } - mark_buffer_async_read(bh); + /* + * If a folio's buffers are under async readin + * (end_buffer_async_read completion) then there is a + * possibility that another thread of control could lock + * one of the buffers after it has completed but while + * some of the other buffers have not completed. This + * locked buffer would confuse end_buffer_async_read() + * into not unlocking the folio. So the absence of + * BH_Async_Read tells end_buffer_async_read() that this + * buffer is not under async I/O. + * + * The folio comes unlocked when it has no locked + * buffer_async buffers left. + * + * The folio lock prevents anyone starting new async + * I/O reads into any of the buffers. + * + * The writeback flag is used to prevent simultaneous + * writeout of the same folio. + * + * The folio lock prevents anyone from starting writeback + * of a folio which is under read I/O (the writeback + * flag is only ever set on a locked folio). + */ + set_buffer_async_read(bh); if (prev) - submit_bh(REQ_OP_READ, prev); + bh_submit(prev, REQ_OP_READ, bh_end_async_read); prev = bh; } while (iblock++, (bh = bh->b_this_page) != head); @@ -2355,7 +2462,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) * in this folio. */ if (prev) - submit_bh(REQ_OP_READ, prev); + bh_submit(prev, REQ_OP_READ, bh_end_async_read); else folio_end_read(folio, !page_error); @@ -2663,86 +2770,6 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, } EXPORT_SYMBOL(generic_block_bmap); -static void end_bio_bh_io_sync(struct bio *bio) -{ - struct buffer_head *bh = bio->bi_private; - - if (unlikely(bio_flagged(bio, BIO_QUIET))) - set_bit(BH_Quiet, &bh->b_state); - - bh->b_end_io(bh, !bio->bi_status); - bio_put(bio); -} - -static void buffer_set_crypto_ctx(struct bio *bio, const struct buffer_head *bh, - gfp_t gfp_mask) -{ - const struct address_space *mapping = folio_mapping(bh->b_folio); - - /* - * The ext4 journal (jbd2) can submit a buffer_head it directly created - * for a non-pagecache page. fscrypt doesn't care about these. - */ - if (!mapping) - return; - fscrypt_set_bio_crypt_ctx(bio, mapping->host, - folio_pos(bh->b_folio) + bh_offset(bh), gfp_mask); -} - -static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, - enum rw_hint write_hint, - struct writeback_control *wbc) -{ - const enum req_op op = opf & REQ_OP_MASK; - struct bio *bio; - - BUG_ON(!buffer_locked(bh)); - BUG_ON(!buffer_mapped(bh)); - BUG_ON(!bh->b_end_io); - BUG_ON(buffer_delay(bh)); - BUG_ON(buffer_unwritten(bh)); - - /* - * Only clear out a write error when rewriting - */ - if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE)) - clear_buffer_write_io_error(bh); - - if (buffer_meta(bh)) - opf |= REQ_META; - if (buffer_prio(bh)) - opf |= REQ_PRIO; - - bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO); - - if (IS_ENABLED(CONFIG_FS_ENCRYPTION)) - buffer_set_crypto_ctx(bio, bh, GFP_NOIO); - - bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio->bi_write_hint = write_hint; - - bio_add_folio_nofail(bio, bh->b_folio, bh->b_size, bh_offset(bh)); - - bio->bi_end_io = end_bio_bh_io_sync; - bio->bi_private = bh; - - /* Take care of bh's that straddle the end of the device */ - guard_bio_eod(bio); - - if (wbc) { - wbc_init_bio(wbc, bio); - wbc_account_cgroup_owner(wbc, bh->b_folio, bh->b_size); - } - - blk_crypto_submit_bio(bio); -} - -void submit_bh(blk_opf_t opf, struct buffer_head *bh) -{ - submit_bh_wbc(opf, bh, WRITE_LIFE_NOT_SET, NULL); -} -EXPORT_SYMBOL(submit_bh); - void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) { lock_buffer(bh); @@ -2750,9 +2777,7 @@ void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) unlock_buffer(bh); return; } - bh->b_end_io = end_buffer_write_sync; - get_bh(bh); - submit_bh(REQ_OP_WRITE | op_flags, bh); + bh_submit(bh, REQ_OP_WRITE | op_flags, bh_end_write); } EXPORT_SYMBOL(write_dirty_buffer); @@ -2775,9 +2800,7 @@ int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) return -EIO; } - get_bh(bh); - bh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE | op_flags, bh); + bh_submit(bh, REQ_OP_WRITE | op_flags, bh_end_write); wait_on_buffer(bh); if (!buffer_uptodate(bh)) return -EIO; @@ -3009,9 +3032,7 @@ int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait) BUG_ON(!buffer_locked(bh)); - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ | op_flags, bh); + bh_submit(bh, REQ_OP_READ | op_flags, bh_end_read); if (wait) { wait_on_buffer(bh); if (!buffer_uptodate(bh)) @@ -3053,9 +3074,7 @@ void __bh_read_batch(int nr, struct buffer_head *bhs[], continue; } - bh->b_end_io = end_buffer_read_sync; - get_bh(bh); - submit_bh(REQ_OP_READ | op_flags, bh); + bh_submit(bh, REQ_OP_READ | op_flags, bh_end_read); } } EXPORT_SYMBOL(__bh_read_batch); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 94283a991e5c..6af11f0ff1c5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2959,7 +2959,7 @@ extern unsigned long ext4_count_dirs(struct super_block *); extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, int barrier); -extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); +void ext4_end_bitmap_read(struct bio *bio); /* fast_commit.c */ int ext4_fc_info_show(struct seq_file *seq, void *v); @@ -3184,10 +3184,10 @@ extern struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb, sector_t block); extern struct buffer_head *ext4_sb_bread_nofail(struct super_block *sb, sector_t block); -extern void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags, - bh_end_io_t *end_io, bool simu_fail); -extern int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, - bh_end_io_t *end_io, bool simu_fail); +void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags, + bio_end_io_t end_io, bool simu_fail); +int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, + bio_end_io_t end_io, bool simu_fail); extern int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait); extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block); extern int ext4_seq_options_show(struct seq_file *seq, void *offset); diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index b3c22636251d..5773b85e43cb 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -184,8 +184,11 @@ #include <trace/events/ext4.h> static struct kmem_cache *ext4_fc_dentry_cachep; -static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate) +static void ext4_end_buffer_io_sync(struct bio *bio) { + struct buffer_head *bh; + bool uptodate = bio_endio_bh(bio, &bh); + BUFFER_TRACE(bh, ""); if (uptodate) { ext4_debug("%s: Block %lld up-to-date", @@ -659,8 +662,7 @@ static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) lock_buffer(bh); set_buffer_dirty(bh); set_buffer_uptodate(bh); - bh->b_end_io = ext4_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE | write_flags, bh); + bh_submit(bh, REQ_OP_WRITE | write_flags, ext4_end_buffer_io_sync); EXT4_SB(sb)->s_fc_bh = NULL; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 8c80d5087516..a40cb27f8116 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -66,14 +66,16 @@ void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); } -void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) +void ext4_end_bitmap_read(struct bio *bio) { + struct buffer_head *bh; + bool uptodate = bio_endio_bh(bio, &bh); + if (uptodate) { set_buffer_uptodate(bh); set_bitmap_uptodate(bh); } unlock_buffer(bh); - put_bh(bh); } static int ext4_validate_inode_bitmap(struct super_block *sb, diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 6f57c181ff77..7ce361484b38 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -46,9 +46,8 @@ static int write_mmp_block_thawed(struct super_block *sb, ext4_mmp_csum_set(sb, mmp); lock_buffer(bh); - bh->b_end_io = end_buffer_write_sync; - get_bh(bh); - submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh); + bh_submit(bh, REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, + bh_end_write); wait_on_buffer(bh); if (unlikely(!buffer_uptodate(bh))) return -EIO; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6a77db4d3124..7283108d7609 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -161,7 +161,7 @@ MODULE_ALIAS("ext3"); static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, - bh_end_io_t *end_io, bool simu_fail) + bio_end_io_t end_io, bool simu_fail) { if (simu_fail) { clear_buffer_uptodate(bh); @@ -176,13 +176,13 @@ static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, */ clear_buffer_verified(bh); - bh->b_end_io = end_io ? end_io : end_buffer_read_sync; - get_bh(bh); - submit_bh(REQ_OP_READ | op_flags, bh); + if (!end_io) + end_io = bh_end_read; + bh_submit(bh, REQ_OP_READ | op_flags, end_io); } void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags, - bh_end_io_t *end_io, bool simu_fail) + bio_end_io_t end_io, bool simu_fail) { BUG_ON(!buffer_locked(bh)); @@ -194,7 +194,7 @@ void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags, } int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, - bh_end_io_t *end_io, bool simu_fail) + bio_end_io_t end_io, bool simu_fail) { BUG_ON(!buffer_locked(bh)); @@ -6316,12 +6316,10 @@ static int ext4_commit_super(struct super_block *sb) clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } - get_bh(sbh); /* Clear potential dirty bit if it was journalled update */ clear_buffer_dirty(sbh); - sbh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE | REQ_SYNC | - (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh); + bh_submit(sbh, REQ_OP_WRITE | REQ_SYNC | + (test_opt(sb, BARRIER) ? REQ_FUA : 0), bh_end_write); wait_on_buffer(sbh); if (buffer_write_io_error(sbh)) { ext4_msg(sb, KERN_ERR, "I/O error while writing " diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index b3d7fcd95f03..d158c4b7413d 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -304,14 +304,15 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end) rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE); if (trylock_buffer(rabh)) { if (!buffer_uptodate(rabh)) { - rabh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | - REQ_PRIO, rabh); - continue; + bh_submit(rabh, + REQ_OP_READ | REQ_RAHEAD | REQ_META | + REQ_PRIO, + bh_end_read); + } else { + unlock_buffer(rabh); } - unlock_buffer(rabh); } - brelse(rabh); + put_bh(rabh); } } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 022dbb31e0d9..0237b36b9eb1 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1505,15 +1505,13 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, if (trylock_buffer(bh)) { if (buffer_uptodate(bh)) { unlock_buffer(bh); - brelse(bh); - continue; + } else { + bh_submit(bh, REQ_OP_READ | REQ_RAHEAD | + REQ_META | REQ_PRIO, + bh_end_read); } - bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | - REQ_PRIO, bh); - continue; } - brelse(bh); + put_bh(bh); } } diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index d407dd476e72..a87cfbf0df38 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -59,7 +59,7 @@ static void gfs2_aspace_write_folio(struct folio *folio, continue; } if (test_clear_buffer_dirty(bh)) { - mark_buffer_async_write(bh); + set_buffer_async_write(bh); } else { unlock_buffer(bh); } @@ -75,7 +75,8 @@ static void gfs2_aspace_write_folio(struct folio *folio, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(REQ_OP_WRITE | write_flags, bh); + bh_submit(bh, REQ_OP_WRITE | write_flags, + bh_end_async_write); nr_underway++; } bh = next; @@ -212,7 +213,7 @@ static void gfs2_meta_read_endio(struct bio *bio) do { struct buffer_head *next = bh->b_this_page; len -= bh->b_size; - bh->b_end_io(bh, !bio->bi_status); + end_buffer_read_sync(bh, bio->bi_status == BLK_STS_OK); bh = next; } while (bh && len); } @@ -221,7 +222,7 @@ static void gfs2_meta_read_endio(struct bio *bio) /* * Submit several consecutive buffer head I/O requests as a single bio I/O - * request. (See submit_bh_wbc.) + * request. (See bh_submit.) */ static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num) { @@ -275,7 +276,6 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, unlock_buffer(bh); flags &= ~DIO_WAIT; } else { - bh->b_end_io = end_buffer_read_sync; get_bh(bh); bhs[num++] = bh; } @@ -286,11 +286,10 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, lock_buffer(bh); if (buffer_uptodate(bh)) { unlock_buffer(bh); - brelse(bh); } else { - bh->b_end_io = end_buffer_read_sync; bhs[num++] = bh; } + brelse(bh); } gfs2_submit_bhs(REQ_OP_READ | REQ_META | REQ_PRIO, bhs, num); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 8cf61e7185c4..4e91593d27e5 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -29,8 +29,10 @@ /* * IO end handler for temporary buffer_heads handling writes to the journal. */ -static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) +static void journal_end_buffer_io_sync(struct bio *bio) { + struct buffer_head *bh; + bool uptodate = bio_endio_bh(bio, &bh); struct buffer_head *orig_bh = bh->b_private; BUFFER_TRACE(bh, ""); @@ -147,13 +149,12 @@ static int journal_submit_commit_record(journal_t *journal, lock_buffer(bh); clear_buffer_dirty(bh); set_buffer_uptodate(bh); - bh->b_end_io = journal_end_buffer_io_sync; if (journal->j_flags & JBD2_BARRIER && !jbd2_has_feature_async_commit(journal)) write_flags |= REQ_PREFLUSH | REQ_FUA; - submit_bh(write_flags, bh); + bh_submit(bh, write_flags, journal_end_buffer_io_sync); *cbh = bh; return 0; } @@ -751,9 +752,9 @@ start_journal_io: lock_buffer(bh); clear_buffer_dirty(bh); set_buffer_uptodate(bh); - bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS, - bh); + bh_submit(bh, + REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS, + journal_end_buffer_io_sync); } cond_resched(); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 4f397fcdb13c..2040af8c84cb 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1820,9 +1820,7 @@ static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags) } if (jbd2_journal_has_csum_v2or3(journal)) sb->s_checksum = jbd2_superblock_csum(sb); - get_bh(bh); - bh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE | write_flags, bh); + bh_submit(bh, REQ_OP_WRITE | write_flags, bh_end_write); wait_on_buffer(bh); if (buffer_write_io_error(bh)) { clear_buffer_write_io_error(bh); diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 2e553d698d0f..680e4009a30d 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -134,9 +134,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, } set_buffer_mapped(bh); bh->b_blocknr = pblocknr; /* set block address for read */ - bh->b_end_io = end_buffer_read_sync; - get_bh(bh); - submit_bh(opf, bh); + bh_submit(bh, opf, bh_end_read); bh->b_blocknr = blocknr; /* set back to the given block address */ *submit_ptr = pblocknr; err = 0; diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 62d4c1b787e9..85379ac23ae2 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -83,9 +83,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, if (!buffer_mapped(bh)) set_buffer_mapped(bh); bh->b_blocknr = pbn; - bh->b_end_io = end_buffer_read_sync; - get_bh(bh); - submit_bh(REQ_OP_READ, bh); + bh_submit(bh, REQ_OP_READ, bh_end_read); if (vbn) bh->b_blocknr = vbn; out: diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 09adb40c65e5..2a435349fd21 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -148,9 +148,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, blk_opf_t opf, } map_bh(bh, inode->i_sb, (sector_t)blknum); - bh->b_end_io = end_buffer_read_sync; - get_bh(bh); - submit_bh(opf, bh); + bh_submit(bh, opf, bh_end_read); ret = 0; trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 701d27d908d4..8e67eba70674 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -62,9 +62,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, /* remove from dirty list before I/O. */ clear_buffer_dirty(bh); - get_bh(bh); /* for end_buffer_write_sync() */ - bh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE, bh); + bh_submit(bh, REQ_OP_WRITE, bh_end_write); wait_on_buffer(bh); @@ -145,9 +143,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, #endif } - get_bh(bh); /* for end_buffer_read_sync() */ - bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, bh); + bh_submit(bh, REQ_OP_READ, bh_end_read); } read_failure: @@ -323,11 +319,9 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, continue; } - get_bh(bh); /* for end_buffer_read_sync() */ if (validate) set_buffer_needs_validate(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, bh); + bh_submit(bh, REQ_OP_READ, bh_end_read); continue; } } @@ -446,10 +440,8 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, /* remove from dirty list before I/O. */ clear_buffer_dirty(bh); - get_bh(bh); /* for end_buffer_write_sync() */ - bh->b_end_io = end_buffer_write_sync; ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check); - submit_bh(REQ_OP_WRITE, bh); + bh_submit(bh, REQ_OP_WRITE, bh_end_write); wait_on_buffer(bh); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index e4939e33b4b5..8b23bc9a244c 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -46,7 +46,6 @@ enum bh_state_bits { struct page; struct buffer_head; struct address_space; -typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); /* * Historically, a buffer_head was used to map a single block @@ -55,7 +54,7 @@ typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); * is the bio, and buffer_heads are used for extracting block * mappings (via a get_block_t call), for tracking state within * a folio (via a folio_mapping) and for wrapping bio submission - * for backward compatibility reasons (e.g. submit_bh). + * for backward compatibility reasons (e.g. bh_submit). */ struct buffer_head { unsigned long b_state; /* buffer state bitmap (see above) */ @@ -70,8 +69,7 @@ struct buffer_head { char *b_data; /* pointer to data within the page */ struct block_device *b_bdev; - bh_end_io_t *b_end_io; /* I/O completion */ - void *b_private; /* reserved for b_end_io */ + void *b_private; /* reserved for bio_end_io */ struct list_head b_assoc_buffers; /* associated with another mapping */ struct mapping_metadata_bhs *b_mmb; /* head of the list of metadata bhs * this buffer is associated with */ @@ -203,7 +201,12 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size); struct buffer_head *create_empty_buffers(struct folio *folio, unsigned long blocksize, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); -void end_buffer_write_sync(struct buffer_head *bh, int uptodate); +bool bio_endio_bh(struct bio *bio, struct buffer_head **bhp); + +/* Completion routines suitable for passing to bh_submit() */ +void bh_end_read(struct bio *bio); +void bh_end_write(struct bio *bio); +void bh_end_async_write(struct bio *bio); /* Things to do with metadata buffers list */ void mmb_mark_buffer_dirty(struct buffer_head *bh, struct mapping_metadata_bhs *mmb); @@ -218,7 +221,6 @@ static inline void clean_bdev_bh_alias(struct buffer_head *bh) clean_bdev_aliases(bh->b_bdev, bh->b_blocknr, 1); } -void mark_buffer_async_write(struct buffer_head *bh); void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, @@ -239,7 +241,7 @@ void __lock_buffer(struct buffer_head *bh); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); -void submit_bh(blk_opf_t, struct buffer_head *); +void bh_submit(struct buffer_head *, blk_opf_t, bio_end_io_t); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); diff --git a/mm/vmscan.c b/mm/vmscan.c index bd1b1aa12581..67231d3189ef 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1449,7 +1449,7 @@ retry: * is possible for a folio to have the dirty flag set, * but it is actually clean (all its buffers are clean). * This happens if the buffers were written out directly, - * with submit_bh(). ext3 will do this, as well as + * with bh_submit(). ext3 will do this, as well as * the blockdev mapping. filemap_release_folio() will * discover that cleanness and will drop the buffers * and mark the folio clean - it can be freed. |
