From ca0daa277acac1029f74d9fea838c9e507398226 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Jun 2016 17:08:28 -0400 Subject: NFS: Cache aggressively when file is open for writing Unless the user is using file locking, we must assume close-to-open cache consistency when the file is open for writing. Adjust the caching algorithm so that it does not clear the cache on out-of-order writes and/or attribute revalidations. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 717a8d6af52d..2d39d9f9da7d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -779,11 +779,6 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) return status; } -static int -is_time_granular(struct timespec *ts) { - return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000)); -} - static int do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { @@ -817,12 +812,8 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { - if (is_time_granular(&NFS_SERVER(inode)->time_delta)) - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - else - nfs_zap_caches(inode); - } + if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + nfs_zap_mapping(inode, filp->f_mapping); out: return status; } -- cgit v1.2.3 From 6b56a89833fa7903595c8d138bb4927187315cba Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Jun 2016 18:23:01 -0400 Subject: NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer filemap_datawrite() and friends already deal just fine with livelock. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2d39d9f9da7d..29d7477a62e8 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -359,14 +359,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, file, mapping->host->i_ino, len, (long long) pos); start: - /* - * Prevent starvation issues if someone is doing a consistency - * sync-to-disk - */ - ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, - nfs_wait_bit_killable, TASK_KILLABLE); - if (ret) - return ret; /* * Wait for O_DIRECT to complete */ -- cgit v1.2.3 From 93761d9863c332d1099d80629f89cf48eb745e48 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 2 Jun 2016 11:03:00 -0400 Subject: NFS: Don't hold the inode lock across fsync() Commits are no longer required to be serialised. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 29d7477a62e8..249262b6bcbe 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -277,11 +277,9 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret != 0) break; - inode_lock(inode); ret = nfs_file_fsync_commit(file, start, end, datasync); if (!ret) ret = pnfs_sync_inode(inode, !!datasync); - inode_unlock(inode); /* * If nfs_file_fsync_commit detected a server reboot, then * resend all dirty pages that might have been covered by -- cgit v1.2.3 From 4f52b6bb8c57b9accafad526a429d6c0851cc62f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 2 Jun 2016 18:10:33 -0400 Subject: NFS: Don't call COMMIT in ->releasepage() While COMMIT has the potential to free up a lot of memory that is being taken by unstable writes, it isn't guaranteed to free up this particular page. Also, calling fsync() on the server is expensive and so we want to do it in a more controlled fashion, rather than have it triggered at random by the VM. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 249262b6bcbe..df4dd8e7e62e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -460,31 +460,8 @@ static void nfs_invalidate_page(struct page *page, unsigned int offset, */ static int nfs_release_page(struct page *page, gfp_t gfp) { - struct address_space *mapping = page->mapping; - dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); - /* Always try to initiate a 'commit' if relevant, but only - * wait for it if the caller allows blocking. Even then, - * only wait 1 second and only if the 'bdi' is not congested. - * Waiting indefinitely can cause deadlocks when the NFS - * server is on this machine, when a new TCP connection is - * needed and in other rare cases. There is no particular - * need to wait extensively here. A short wait has the - * benefit that someone else can worry about the freezer. - */ - if (mapping) { - struct nfs_server *nfss = NFS_SERVER(mapping->host); - nfs_commit_inode(mapping->host, 0); - if (gfpflags_allow_blocking(gfp) && - !bdi_write_congested(&nfss->backing_dev_info)) { - wait_on_page_bit_killable_timeout(page, PG_private, - HZ); - if (PagePrivate(page)) - set_bdi_congested(&nfss->backing_dev_info, - BLK_RW_ASYNC); - } - } /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) return 0; -- cgit v1.2.3 From 89698b24d24f9c8b470a73351b0b7199c17e0153 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 23 Jun 2016 10:35:48 -0400 Subject: NFS Cleanup: move call to generic_write_checks() into fs/nfs/direct.c Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index df4dd8e7e62e..c26847c84d00 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -629,12 +629,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result) return result; - if (iocb->ki_flags & IOCB_DIRECT) { - result = generic_write_checks(iocb, from); - if (result <= 0) - return result; + if (iocb->ki_flags & IOCB_DIRECT) return nfs_file_direct_write(iocb, from); - } dprintk("NFS: write(%pD2, %zu@%Ld)\n", file, count, (long long) iocb->ki_pos); -- cgit v1.2.3 From 18290650b1c8655cfe6e0d63dd34942a037a130b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 23 Jun 2016 15:00:42 -0400 Subject: NFS: Move buffered I/O locking into nfs_file_write() Preparation for the patch that de-serialises O_DIRECT reads and writes. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c26847c84d00..46cf0afe3c0f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -623,7 +623,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(file); unsigned long written = 0; ssize_t result; - size_t count = iov_iter_count(from); result = nfs_key_timeout_notify(file, inode); if (result) @@ -633,9 +632,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) return nfs_file_direct_write(iocb, from); dprintk("NFS: write(%pD2, %zu@%Ld)\n", - file, count, (long long) iocb->ki_pos); + file, iov_iter_count(from), (long long) iocb->ki_pos); - result = -EBUSY; if (IS_SWAPFILE(inode)) goto out_swapfile; /* @@ -647,28 +645,33 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) goto out; } - result = count; - if (!count) + inode_lock(inode); + result = generic_write_checks(iocb, from); + if (result > 0) { + current->backing_dev_info = inode_to_bdi(inode); + result = generic_perform_write(file, from, iocb->ki_pos); + current->backing_dev_info = NULL; + } + inode_unlock(inode); + if (result <= 0) goto out; - result = generic_file_write_iter(iocb, from); - if (result > 0) - written = result; + written = generic_write_sync(iocb, result); + iocb->ki_pos += written; /* Return error values */ - if (result >= 0 && nfs_need_check_write(file, inode)) { + if (nfs_need_check_write(file, inode)) { int err = vfs_fsync(file, 0); if (err < 0) result = err; } - if (result > 0) - nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); out: return result; out_swapfile: printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); - goto out; + return -EBUSY; } EXPORT_SYMBOL_GPL(nfs_file_write); -- cgit v1.2.3 From a5864c999de6703f7ce908f72337568520c6cad3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 3 Jun 2016 17:07:19 -0400 Subject: NFS: Do not serialise O_DIRECT reads and writes Allow dio requests to be scheduled in parallel, but ensuring that they do not conflict with buffered I/O. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 46cf0afe3c0f..9f8da9e1b23f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -170,12 +170,14 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) iocb->ki_filp, iov_iter_count(to), (unsigned long) iocb->ki_pos); - result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping); + nfs_start_io_read(inode); + result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); if (!result) { result = generic_file_read_iter(iocb, to); if (result > 0) nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); } + nfs_end_io_read(inode); return result; } EXPORT_SYMBOL_GPL(nfs_file_read); @@ -191,12 +193,14 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", filp, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_mapping_protected(inode, filp->f_mapping); + nfs_start_io_read(inode); + res = nfs_revalidate_mapping(inode, filp->f_mapping); if (!res) { res = generic_file_splice_read(filp, ppos, pipe, count, flags); if (res > 0) nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res); } + nfs_end_io_read(inode); return res; } EXPORT_SYMBOL_GPL(nfs_file_splice_read); @@ -645,14 +649,14 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) goto out; } - inode_lock(inode); + nfs_start_io_write(inode); result = generic_write_checks(iocb, from); if (result > 0) { current->backing_dev_info = inode_to_bdi(inode); result = generic_perform_write(file, from, iocb->ki_pos); current->backing_dev_info = NULL; } - inode_unlock(inode); + nfs_end_io_write(inode); if (result <= 0) goto out; -- cgit v1.2.3 From f508d46ae41a796036aef566637685dbf83b554f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 23 Jun 2016 09:55:48 -0400 Subject: NFS: Remove redundant waits for O_DIRECT in fsync() and write_begin() We're now waiting immediately after taking the locks, so waiting in fsync() and write_begin() is either redundant or potentially subject to livelock (if not holding the lock). Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 9f8da9e1b23f..0e9b4a068f13 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -276,7 +276,6 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) trace_nfs_fsync_enter(inode); - inode_dio_wait(inode); do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret != 0) @@ -361,11 +360,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, file, mapping->host->i_ino, len, (long long) pos); start: - /* - * Wait for O_DIRECT to complete - */ - inode_dio_wait(mapping->host); - page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; -- cgit v1.2.3 From 9a773e7c8de2a34ae682624624e95a96b121b6d1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 23 Jun 2016 11:09:04 -0400 Subject: NFS nfs_vm_page_mkwrite: Don't freeze me, Bro... Prevent filesystem freezes while handling the write page fault. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/nfs/file.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0e9b4a068f13..039d58790629 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -569,6 +569,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) filp, filp->f_mapping->host->i_ino, (long long)page_offset(page)); + sb_start_pagefault(inode->i_sb); + /* make sure the cache has finished storing the page */ nfs_fscache_wait_on_page_write(NFS_I(inode), page); @@ -595,6 +597,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) out_unlock: unlock_page(page); out: + sb_end_pagefault(inode->i_sb); return ret; } -- cgit v1.2.3