From 9cf6b720f84d6999ff9a514d0a939dd183846aaf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 6 Apr 2009 14:48:03 +0200 Subject: block: fsync_buffers_list() should use SWRITE_SYNC_PLUG Then it can submit all the buffers without unplugging for each one. We will kick off the pending IO if we come across a new address space. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- fs/buffer.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 5d55a896ff78..43afaa5d6901 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -737,7 +737,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) { struct buffer_head *bh; struct list_head tmp; - struct address_space *mapping; + struct address_space *mapping, *prev_mapping = NULL; int err = 0, err2; INIT_LIST_HEAD(&tmp); @@ -762,7 +762,18 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) * contents - it is a noop if I/O is still in * flight on potentially older contents. */ - ll_rw_block(SWRITE_SYNC, 1, &bh); + ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); + + /* + * Kick off IO for the previous mapping. Note + * that we will not run the very last mapping, + * wait_on_buffer() will do that for us + * through sync_buffer(). + */ + if (prev_mapping && prev_mapping != mapping) + blk_run_address_space(prev_mapping); + prev_mapping = mapping; + brelse(bh); spin_lock(lock); } @@ -2957,12 +2968,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE || rw == SWRITE_SYNC) + if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) lock_buffer(bh); else if (!trylock_buffer(bh)) continue; - if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { + if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || + rw == SWRITE_SYNC_PLUG) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); -- cgit v1.2.3 From 1aa2a7cc6fd7b5c86681a6ae9dfd1072c261a435 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 6 Apr 2009 14:48:08 +0200 Subject: block: switch sync_dirty_buffer() over to WRITE_SYNC We should now have the logic in place to handle this properly without regressing on the write performance, so re-enable the sync writes. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 43afaa5d6901..6e35762b6169 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3010,7 +3010,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE, bh); + ret = submit_bh(WRITE_SYNC, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); -- cgit v1.2.3 From 6e34eeddf7deec1444bbddab533f03f520d8458c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 7 Apr 2009 18:12:43 -0400 Subject: block_write_full_page: switch synchronous writes to use WRITE_SYNC_PLUG Now that we have a distinction between WRITE_SYNC and WRITE_SYNC_PLUG, use WRITE_SYNC_PLUG in __block_write_full_page() to avoid unplugging the block device I/O queue between each page that gets flushed out. Otherwise, when we run sync() or fsync() and we need to write out a large number of pages, the block device queue will get unplugged between for every page that is flushed out, which will be a pretty serious performance regression caused by commit a64c8610. Signed-off-by: "Theodore Ts'o" --- fs/buffer.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 6e35762b6169..13edf7ad3ff1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1596,6 +1596,16 @@ EXPORT_SYMBOL(unmap_underlying_metadata); * locked buffer. This only can happen if someone has written the buffer * directly, with submit_bh(). At the address_space level PageWriteback * prevents this contention from occurring. + * + * If block_write_full_page() is called with wbc->sync_mode == + * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this + * causes the writes to be flagged as synchronous writes, but the + * block device queue will NOT be unplugged, since usually many pages + * will be pushed to the out before the higher-level caller actually + * waits for the writes to be completed. The various wait functions, + * such as wait_on_writeback_range() will ultimately call sync_page() + * which will ultimately call blk_run_backing_dev(), which will end up + * unplugging the device queue. */ static int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block, struct writeback_control *wbc) @@ -1606,7 +1616,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; const unsigned blocksize = 1 << inode->i_blkbits; int nr_underway = 0; - int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + int write_op = (wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC_PLUG : WRITE); BUG_ON(!PageLocked(page)); -- cgit v1.2.3