From 26821ed40b4230259e770c9911180f38fcaa6f59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:21 +0100 Subject: make sure data is on disk before calling ->write_inode Similar to the fsync issue fixed a while ago in commit 2daea67e966dc0c42067ebea015ddac6834cef88 we need to write for data to actually hit the disk before writing out the metadata to guarantee data integrity for filesystems that modify the inode in the data I/O completion path. Currently XFS and NFS handle this manually, and AFS has a write_inode method that does nothing but waiting for data, while others are possibly missing out on this. Fortunately this change has a lot less impact than the fsync change as none of the write_inode methods starts data writeout of any form by itself. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/fs-writeback.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1a7c42c64ff4..5f2721b1e4be 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -461,15 +461,20 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ret = do_writepages(mapping, wbc); - /* Don't write the inode if only I_DIRTY_PAGES was set */ - if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { - int err = write_inode(inode, wait); + /* + * Make sure to wait on the data before writing out the metadata. + * This is important for filesystems that modify metadata on data + * I/O completion. + */ + if (wait) { + int err = filemap_fdatawait(mapping); if (ret == 0) ret = err; } - if (wait) { - int err = filemap_fdatawait(mapping); + /* Don't write the inode if only I_DIRTY_PAGES was set */ + if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { + int err = write_inode(inode, wait); if (ret == 0) ret = err; } -- cgit v1.2.3 From a9185b41a4f84971b930c519f0c63bd450c4810d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:37 +0100 Subject: pass writeback_control to ->write_inode This gives the filesystem more information about the writeback that is happening. Trond requested this for the NFS unstable write handling, and other filesystems might benefit from this too by beeing able to distinguish between the different callers in more detail. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/fs-writeback.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5f2721b1e4be..76fc4d594acb 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -381,10 +381,10 @@ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); } -static int write_inode(struct inode *inode, int sync) +static int write_inode(struct inode *inode, struct writeback_control *wbc) { if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) - return inode->i_sb->s_op->write_inode(inode, sync); + return inode->i_sb->s_op->write_inode(inode, wbc); return 0; } @@ -421,7 +421,6 @@ static int writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { struct address_space *mapping = inode->i_mapping; - int wait = wbc->sync_mode == WB_SYNC_ALL; unsigned dirty; int ret; @@ -439,7 +438,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * We'll have another go at writing back this inode when we * completed a full scan of b_io. */ - if (!wait) { + if (wbc->sync_mode != WB_SYNC_ALL) { requeue_io(inode); return 0; } @@ -466,7 +465,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * This is important for filesystems that modify metadata on data * I/O completion. */ - if (wait) { + if (wbc->sync_mode == WB_SYNC_ALL) { int err = filemap_fdatawait(mapping); if (ret == 0) ret = err; @@ -474,7 +473,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { - int err = write_inode(inode, wait); + int err = write_inode(inode, wbc); if (ret == 0) ret = err; } -- cgit v1.2.3