From 76d2e3890fb169168c73f2e4f8375c7cc24a765e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Aug 2025 07:25:20 -0700 Subject: NFS: Fix a race when updating an existing write After nfs_lock_and_join_requests() tests for whether the request is still attached to the mapping, nothing prevents a call to nfs_inode_remove_request() from succeeding until we actually lock the page group. The reason is that whoever called nfs_inode_remove_request() doesn't necessarily have a lock on the page group head. So in order to avoid races, let's take the page group lock earlier in nfs_lock_and_join_requests(), and hold it across the removal of the request in nfs_inode_remove_request(). Reported-by: Jeff Layton Tested-by: Joe Quanaim Tested-by: Andrew Steffen Reviewed-by: Jeff Layton Fixes: bd37d6fce184 ("NFSv4: Convert nfs_lock_and_join_requests() to use nfs_page_find_head_request()") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fa5c41d0989a..8b7c04737967 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -153,20 +153,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode) } } -static int -nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) +static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) { - int ret; - - if (!test_bit(PG_REMOVE, &req->wb_flags)) - return 0; - ret = nfs_page_group_lock(req); - if (ret) - return ret; if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) nfs_page_set_inode_ref(req, inode); - nfs_page_group_unlock(req); - return 0; } /** @@ -585,19 +575,18 @@ retry: } } + ret = nfs_page_group_lock(head); + if (ret < 0) + goto out_unlock; + /* Ensure that nobody removed the request before we locked it */ if (head != folio->private) { + nfs_page_group_unlock(head); nfs_unlock_and_release_request(head); goto retry; } - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) - goto out_unlock; - - ret = nfs_page_group_lock(head); - if (ret < 0) - goto out_unlock; + nfs_cancel_remove_inode(head, inode); /* lock each request in the page group */ for (subreq = head->wb_this_page; @@ -786,7 +775,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req)); - if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + nfs_page_group_lock(req); + if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); struct address_space *mapping = folio->mapping; @@ -798,6 +788,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } spin_unlock(&mapping->i_private_lock); } + nfs_page_group_unlock(req); if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { atomic_long_dec(&nfsi->nrequests); -- cgit v1.2.3 From b7b8574225e9d2b5f1fb5483886ab797892f43b5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Sep 2025 11:48:57 -0400 Subject: NFS: nfs_invalidate_folio() must observe the offset and size arguments If we're truncating part of the folio, then we need to write out the data on the part that is not covered by the cancellation. Fixes: d47992f86b30 ("mm: change invalidatepage prototype to accept length") Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8b7c04737967..e359fbcdc8a0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -2045,6 +2045,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio) * release it */ nfs_inode_remove_request(req); nfs_unlock_and_release_request(req); + folio_cancel_dirty(folio); } return ret; -- cgit v1.2.3 From c12b6a7b12a13ccd3aece6be09345c1944e18d3e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Sep 2025 20:11:03 -0400 Subject: NFS: Fix the marking of the folio as up to date Since all callers of nfs_page_group_covers_page() have already ensured that there is only one group member, all that is required is to check that the entire folio contains dirty data. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 52 +++++----------------------------------------------- 1 file changed, 5 insertions(+), 47 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e359fbcdc8a0..647c53d1418a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -237,59 +237,17 @@ static void nfs_mapping_set_error(struct folio *folio, int error) } /* - * nfs_page_group_search_locked - * @head - head request of page group - * @page_offset - offset into page + * nfs_page_covers_folio + * @req: struct nfs_page * - * Search page group with head @head to find a request that contains the - * page offset @page_offset. - * - * Returns a pointer to the first matching nfs request, or NULL if no - * match is found. - * - * Must be called with the page group lock held - */ -static struct nfs_page * -nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset) -{ - struct nfs_page *req; - - req = head; - do { - if (page_offset >= req->wb_pgbase && - page_offset < (req->wb_pgbase + req->wb_bytes)) - return req; - - req = req->wb_this_page; - } while (req != head); - - return NULL; -} - -/* - * nfs_page_group_covers_page - * @head - head request of page group - * - * Return true if the page group with head @head covers the whole page, - * returns false otherwise + * Return true if the request covers the whole folio. + * Note that the caller should ensure all subrequests have been joined */ static bool nfs_page_group_covers_page(struct nfs_page *req) { unsigned int len = nfs_folio_length(nfs_page_to_folio(req)); - struct nfs_page *tmp; - unsigned int pos = 0; - - nfs_page_group_lock(req); - for (;;) { - tmp = nfs_page_group_search_locked(req->wb_head, pos); - if (!tmp) - break; - pos = tmp->wb_pgbase + tmp->wb_bytes; - } - - nfs_page_group_unlock(req); - return pos >= len; + return req->wb_pgbase == 0 && req->wb_bytes == len; } /* We can set the PG_uptodate flag if we see that a write request -- cgit v1.2.3