From f895c53f8ace3c3e49ebf9def90e63fc6d46d2bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Feb 2010 14:17:50 -0500 Subject: NFS: Make close(2) asynchronous when closing NFS O_DIRECT files For NFSv2 and v3: O_DIRECT writes are always synchronous, and aren't cached, so nothing should be flushed when closing an NFS O_DIRECT file descriptor. Thus there are no write errors to report on close(2). In addition, there's no cached data to verify on the next open(2), so we don't need clean GETATTR results at close time to compare with. Thus, there's no need for the nfs_revalidate_inode() call when closing an NFS O_DIRECT file. This reduces the number of synchronous on-the-wire requests for a simple open-write-close of an NFS O_DIRECT file by roughly 20%. For NFSv4: Call nfs4_do_close() with wait set to zero when closing an NFS O_DIRECT file. The CLOSE will go on the wire, but the application won't wait for it to complete. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f141bde7756a..87cca56846d6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -620,11 +620,6 @@ void put_nfs_open_context(struct nfs_open_context *ctx) __put_nfs_open_context(ctx, 0); } -static void put_nfs_open_context_sync(struct nfs_open_context *ctx) -{ - __put_nfs_open_context(ctx, 1); -} - /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages @@ -671,7 +666,7 @@ static void nfs_file_clear_open_context(struct file *filp) spin_lock(&inode->i_lock); list_move_tail(&ctx->list, &NFS_I(inode)->open_files); spin_unlock(&inode->i_lock); - put_nfs_open_context_sync(ctx); + __put_nfs_open_context(ctx, filp->f_flags & O_DIRECT ? 0 : 1); } } -- cgit v1.2.3 From 6eae7974d0490a9dbc3091f702ea1650871652a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Jan 2010 13:44:07 -0500 Subject: Switch alloc_nfs_open_context() to struct path Signed-off-by: Al Viro --- fs/nfs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f141bde7756a..7570573bdb30 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -574,14 +574,14 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) nfs_revalidate_inode(server, inode); } -static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) +static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred) { struct nfs_open_context *ctx; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { - ctx->path.dentry = dget(dentry); - ctx->path.mnt = mntget(mnt); + ctx->path = *path; + path_get(&ctx->path); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->lockowner = current->files; @@ -686,7 +686,7 @@ int nfs_open(struct inode *inode, struct file *filp) cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); - ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred); + ctx = alloc_nfs_open_context(&filp->f_path, cred); put_rpccred(cred); if (ctx == NULL) return -ENOMEM; -- cgit v1.2.3 From 26821ed40b4230259e770c9911180f38fcaa6f59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:21 +0100 Subject: make sure data is on disk before calling ->write_inode Similar to the fsync issue fixed a while ago in commit 2daea67e966dc0c42067ebea015ddac6834cef88 we need to write for data to actually hit the disk before writing out the metadata to guarantee data integrity for filesystems that modify the inode in the data I/O completion path. Currently XFS and NFS handle this manually, and AFS has a write_inode method that does nothing but waiting for data, while others are possibly missing out on this. Fortunately this change has a lot less impact than the fsync change as none of the write_inode methods starts data writeout of any form by itself. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/nfs/inode.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7570573bdb30..5ecd952cae1d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -101,12 +101,7 @@ int nfs_write_inode(struct inode *inode, int sync) { int ret; - if (sync) { - ret = filemap_fdatawait(inode->i_mapping); - if (ret == 0) - ret = nfs_commit_inode(inode, FLUSH_SYNC); - } else - ret = nfs_commit_inode(inode, 0); + ret = nfs_commit_inode(inode, sync ? FLUSH_SYNC : 0); if (ret >= 0) return 0; __mark_inode_dirty(inode, I_DIRTY_DATASYNC); -- cgit v1.2.3 From a9185b41a4f84971b930c519f0c63bd450c4810d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:37 +0100 Subject: pass writeback_control to ->write_inode This gives the filesystem more information about the writeback that is happening. Trond requested this for the NFS unstable write handling, and other filesystems might benefit from this too by beeing able to distinguish between the different callers in more detail. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/nfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5ecd952cae1d..7f9ecc46f3fb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -97,11 +97,12 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -int nfs_write_inode(struct inode *inode, int sync) +int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; - ret = nfs_commit_inode(inode, sync ? FLUSH_SYNC : 0); + ret = nfs_commit_inode(inode, + wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); if (ret >= 0) return 0; __mark_inode_dirty(inode, I_DIRTY_DATASYNC); -- cgit v1.2.3 From 8fc795f703c5138e1a8bfb88c69f52632031aa6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 16:46:56 -0800 Subject: NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c The sole purpose of nfs_write_inode is to commit unstable writes, so move it into fs/nfs/write.c, and make nfs_commit_inode static. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7f9ecc46f3fb..89e98312599d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -97,18 +97,6 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - int ret; - - ret = nfs_commit_inode(inode, - wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); - if (ret >= 0) - return 0; - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - return ret; -} - void nfs_clear_inode(struct inode *inode) { /* -- cgit v1.2.3 From ff778d02bf867e1733a09b34ad6dbb723b024814 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 16:53:39 -0800 Subject: NFS: Add a count of the number of unstable writes carried by an inode In order to know when we should do opportunistic commits of the unstable writes, when the VM is doing a background flush, we add a field to count the number of unstable writes. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 89e98312599d..aa5a831001ab 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1404,6 +1404,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); nfsi->npages = 0; + nfsi->ncommit = 0; atomic_set(&nfsi->silly_count, 1); INIT_HLIST_HEAD(&nfsi->silly_list); init_waitqueue_head(&nfsi->waitqueue); -- cgit v1.2.3 From acdc53b2146c7ee67feb1f02f7bc3020126514b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:26 -0800 Subject: NFS: Replace __nfs_write_mapping with sync_inode() Now that we have correct COMMIT semantics in writeback_single_inode, we can reduce and simplify nfs_wb_all(). Also replace nfs_wb_nocommit() with a call to filemap_write_and_wait(), which doesn't need to hold the inode->i_mutex. With that done, we can eliminate nfs_write_mapping() altogether. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index aa5a831001ab..443772df9b17 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -495,17 +495,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; - /* - * Flush out writes to the server in order to update c/mtime. - * - * Hold the i_mutex to suspend application writes temporarily; - * this prevents long-running writing applications from blocking - * nfs_wb_nocommit. - */ + /* Flush out writes to the server in order to update c/mtime. */ if (S_ISREG(inode->i_mode)) { - mutex_lock(&inode->i_mutex); - nfs_wb_nocommit(inode); - mutex_unlock(&inode->i_mutex); + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto out; } /* @@ -529,6 +523,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); } +out: return err; } -- cgit v1.2.3 From 5cf95214ccb915591e2214f81de4659302d3e452 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:29 -0800 Subject: NFS: Clean up nfs_sync_mapping Remove the redundant call to filemap_write_and_wait(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 443772df9b17..e8b41170d295 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -114,16 +114,12 @@ void nfs_clear_inode(struct inode *inode) */ int nfs_sync_mapping(struct address_space *mapping) { - int ret; + int ret = 0; - if (mapping->nrpages == 0) - return 0; - unmap_mapping_range(mapping, 0, 0, 0); - ret = filemap_write_and_wait(mapping); - if (ret != 0) - goto out; - ret = nfs_wb_all(mapping->host); -out: + if (mapping->nrpages != 0) { + unmap_mapping_range(mapping, 0, 0, 0); + ret = nfs_wb_all(mapping->host); + } return ret; } -- cgit v1.2.3 From 1cda707d52e51a6cafac0aef12d2bd7052d572e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:30 -0800 Subject: NFS: Remove requirement for inode->i_mutex from nfs_invalidate_mapping Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 41 +---------------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e8b41170d295..dbaaf7d2a188 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -754,7 +754,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) return __nfs_revalidate_inode(server, inode); } -static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) +static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); @@ -775,49 +775,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa return 0; } -static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) -{ - int ret = 0; - - mutex_lock(&inode->i_mutex); - if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) { - ret = nfs_sync_mapping(mapping); - if (ret == 0) - ret = nfs_invalidate_mapping_nolock(inode, mapping); - } - mutex_unlock(&inode->i_mutex); - return ret; -} - -/** - * nfs_revalidate_mapping_nolock - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping - */ -int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) -{ - struct nfs_inode *nfsi = NFS_I(inode); - int ret = 0; - - if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_timeout(inode) || NFS_STALE(inode)) { - ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (ret < 0) - goto out; - } - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - ret = nfs_invalidate_mapping_nolock(inode, mapping); -out: - return ret; -} - /** * nfs_revalidate_mapping - Revalidate the pagecache * @inode - pointer to host inode * @mapping - pointer to mapping - * - * This version of the function will take the inode->i_mutex and attempt to - * flush out all dirty data if it needs to invalidate the page cache. */ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { -- cgit v1.2.3 From b4d2314bb88b07e5a04e6c75b442a1dfcd60e340 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 10 Mar 2010 15:21:44 -0500 Subject: NFSv4: Don't ignore the NFS_INO_REVAL_FORCED flag in nfs_revalidate_inode() If the NFS_INO_REVAL_FORCED flag is set, that means that we don't yet have an up to date attribute cache. Even if we hold a delegation, we must put a GETATTR on the wire. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 657201acda84..e358df75a6ad 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -729,7 +729,7 @@ int nfs_attribute_timeout(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - if (nfs_have_delegation(inode, FMODE_READ)) + if (nfs_have_delegated_attributes(inode)) return 0; return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } -- cgit v1.2.3