From bde52788bdb755b9e4b75db6c434f30e32a0ca0b Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 13 Sep 2013 19:19:54 +0400 Subject: fuse: wait for writeback in fuse_file_fallocate() The patch fixes a race between mmap-ed write and fallocate(PUNCH_HOLE): 1) An user makes a page dirty via mmap-ed write. 2) The user performs fallocate(2) with mode == PUNCH_HOLE|KEEP_SIZE and covering the page. 3) Before truncate_pagecache_range call from fuse_file_fallocate, the page goes to write-back. The page is fully processed by fuse_writepage (including end_page_writeback on the page), but fuse_flush_writepages did nothing because fi->writectr < 0. 4) truncate_pagecache_range is called and fuse_file_fallocate is finishing by calling fuse_release_nowrite. The latter triggers processing queued write-back request which will write stale data to the hole soon. Changed in v2 (thanks to Brian for suggestion): - Do not truncate page cache until FUSE_FALLOCATE succeeded. Otherwise, we can end up in returning -ENOTSUPP while user data is already punched from page cache. Use filemap_write_and_wait_range() instead. Changed in v3 (thanks to Miklos for suggestion): - fuse_wait_on_writeback() is prone to livelocks; use fuse_set_nowrite() instead. So far as we need a dirty-page barrier only, fuse_sync_writes() should be enough. - rebased to for-linus branch of fuse.git Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi Cc: stable@vger.kernel.org --- fs/fuse/file.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d409deafc67b..f9f07c4fa517 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2484,8 +2484,15 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (lock_inode) { mutex_lock(&inode->i_mutex); - if (mode & FALLOC_FL_PUNCH_HOLE) - fuse_set_nowrite(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { + loff_t endbyte = offset + length - 1; + err = filemap_write_and_wait_range(inode->i_mapping, + offset, endbyte); + if (err) + goto out; + + fuse_sync_writes(inode); + } } req = fuse_get_req_nopages(fc); @@ -2520,11 +2527,8 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, fuse_invalidate_attr(inode); out: - if (lock_inode) { - if (mode & FALLOC_FL_PUNCH_HOLE) - fuse_release_nowrite(inode); + if (lock_inode) mutex_unlock(&inode->i_mutex); - } return err; } -- cgit v1.2.3 From 0ab08f576b9e6a6b689fc6b4e632079b978e619b Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Fri, 13 Sep 2013 19:20:16 +0400 Subject: fuse: fix fallocate vs. ftruncate race A former patch introducing FUSE_I_SIZE_UNSTABLE flag provided detailed description of races between ftruncate and anyone who can extend i_size: > 1. As in the previous scenario fuse_dentry_revalidate() discovered that i_size > changed (due to our own fuse_do_setattr()) and is going to call > truncate_pagecache() for some 'new_size' it believes valid right now. But by > the time that particular truncate_pagecache() is called ... > 2. fuse_do_setattr() returns (either having called truncate_pagecache() or > not -- it doesn't matter). > 3. The file is extended either by write(2) or ftruncate(2) or fallocate(2). > 4. mmap-ed write makes a page in the extended region dirty. This patch adds necessary bits to fuse_file_fallocate() to protect from that race. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi Cc: stable@vger.kernel.org --- fs/fuse/file.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f9f07c4fa517..4598345ab87d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2467,6 +2467,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, { struct fuse_file *ff = file->private_data; struct inode *inode = file->f_inode; + struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = ff->fc; struct fuse_req *req; struct fuse_fallocate_in inarg = { @@ -2495,6 +2496,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, } } + if (!(mode & FALLOC_FL_KEEP_SIZE)) + set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -2527,6 +2531,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, fuse_invalidate_attr(inode); out: + if (!(mode & FALLOC_FL_KEEP_SIZE)) + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + if (lock_inode) mutex_unlock(&inode->i_mutex); -- cgit v1.2.3 From 3c70b8eeda596069258772afabf2ab0b1aa017f0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 1 Oct 2013 16:41:22 +0200 Subject: fuse: don't check_submounts_and_drop() in RCU walk If revalidate finds an invalid dentry in RCU walk mode, let the VFS deal with it instead of calling check_submounts_and_drop() which is not prepared for being called from RCU walk. Signed-off-by: Miklos Szeredi Cc: stable@vger.kernel.org --- fs/fuse/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 62b43b577bfc..9b16806f11da 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -259,7 +259,8 @@ out: invalid: ret = 0; - if (check_submounts_and_drop(entry) != 0) + + if (!(flags & LOOKUP_RCU) && check_submounts_and_drop(entry) != 0) ret = 1; goto out; } -- cgit v1.2.3 From 6314efee3cfeea2da12dbc05edfa20e5a42391bd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 1 Oct 2013 16:41:22 +0200 Subject: fuse: readdirplus: fix RCU walk Doing dput(parent) is not valid in RCU walk mode. In RCU mode it would probably be okay to update the parent flags, but it's actually not necessary most of the time... So only set the FUSE_I_ADVISE_RDPLUS flag on the parent when the entry was recently initialized by READDIRPLUS. This is achieved by setting FUSE_I_INIT_RDPLUS on entries added by READDIRPLUS and only dropping out of RCU mode if this flag is set. FUSE_I_INIT_RDPLUS is cleared once the FUSE_I_ADVISE_RDPLUS flag is set in the parent. Reported-by: Al Viro Signed-off-by: Miklos Szeredi Cc: stable@vger.kernel.org --- fs/fuse/dir.c | 12 +++++++++--- fs/fuse/fuse_i.h | 2 ++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 9b16806f11da..a75159846a75 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -182,6 +182,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) struct inode *inode; struct dentry *parent; struct fuse_conn *fc; + struct fuse_inode *fi; int ret; inode = ACCESS_ONCE(entry->d_inode); @@ -228,7 +229,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) if (!err && !outarg.nodeid) err = -ENOENT; if (!err) { - struct fuse_inode *fi = get_fuse_inode(inode); + fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode)) { fuse_queue_forget(fc, forget, outarg.nodeid, 1); goto invalid; @@ -246,8 +247,11 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) attr_version); fuse_change_entry_timeout(entry, &outarg); } else if (inode) { - fc = get_fuse_conn(inode); - if (fc->readdirplus_auto) { + fi = get_fuse_inode(inode); + if (flags & LOOKUP_RCU) { + if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state)) + return -ECHILD; + } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) { parent = dget_parent(entry); fuse_advise_use_readdirplus(parent->d_inode); dput(parent); @@ -1292,6 +1296,8 @@ static int fuse_direntplus_link(struct file *file, } found: + if (fc->readdirplus_auto) + set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); fuse_change_entry_timeout(dentry, o); err = 0; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5ced199b50bb..5b9e6f3b6aef 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -115,6 +115,8 @@ struct fuse_inode { enum { /** Advise readdirplus */ FUSE_I_ADVISE_RDPLUS, + /** Initialized with readdirplus */ + FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, }; -- cgit v1.2.3 From 698fa1d163c560343b8012a0a916440d076b6c8a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 1 Oct 2013 16:41:23 +0200 Subject: fuse: no RCU mode in fuse_access() fuse_access() is never called in RCU walk, only on the final component of access(2) and chdir(2)... Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index a75159846a75..b7989f2ab4c4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1068,6 +1068,8 @@ static int fuse_access(struct inode *inode, int mask) struct fuse_access_in inarg; int err; + BUG_ON(mask & MAY_NOT_BLOCK); + if (fc->no_access) return 0; @@ -1155,9 +1157,6 @@ static int fuse_permission(struct inode *inode, int mask) noticed immediately, only after the attribute timeout has expired */ } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - err = fuse_access(inode, mask); } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { if (!(inode->i_mode & S_IXUGO)) { -- cgit v1.2.3