summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_iomap.c
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2017-03-08 09:58:08 -0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-04-08 09:30:32 +0200
commitda617af8f0c6fa9cd2694440529f5edf99c0c6d1 (patch)
treec786239646bcfb00f48fdfd20b318e82079ab7fa /fs/xfs/xfs_iomap.c
parent77aedb0cbe6aa45338a6e59afa995fde37133bf0 (diff)
xfs: use iomap new flag for newly allocated delalloc blocks
commit f65e6fad293b3a5793b7fa2044800506490e7a2e upstream. Commit fa7f138 ("xfs: clear delalloc and cache on buffered write failure") fixed one regression in the iomap error handling code and exposed another. The fundamental problem is that if a buffered write is a rewrite of preexisting delalloc blocks and the write fails, the failure handling code can punch out preexisting blocks with valid file data. This was reproduced directly by sub-block writes in the LTP kernel/syscalls/write/write03 test. A first 100 byte write allocates a single block in a file. A subsequent 100 byte write fails and punches out the block, including the data successfully written by the previous write. To address this problem, update the ->iomap_begin() handler to distinguish newly allocated delalloc blocks from preexisting delalloc blocks via the IOMAP_F_NEW flag. Use this flag in the ->iomap_end() handler to decide when a failed or short write should punch out delalloc blocks. This introduces the subtle requirement that ->iomap_begin() should never combine newly allocated delalloc blocks with existing blocks in the resulting iomap descriptor. This can occur when a new delalloc reservation merges with a neighboring extent that is part of the current write, for example. Therefore, drop the post-allocation extent lookup from xfs_bmapi_reserve_delalloc() and just return the record inserted into the fork. This ensures only new blocks are returned and thus that preexisting delalloc blocks are always handled as "found" blocks and not punched out on a failed rewrite. Reported-by: Xiong Zhou <xzhou@redhat.com> Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs/xfs/xfs_iomap.c')
-rw-r--r--fs/xfs/xfs_iomap.c16
1 files changed, 11 insertions, 5 deletions
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5211887cbcd2..360562484e7b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -637,6 +637,11 @@ retry:
goto out_unlock;
}
+ /*
+ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+ * them out if the write happens to fail.
+ */
+ iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
@@ -1061,7 +1066,8 @@ xfs_file_iomap_end_delalloc(
struct xfs_inode *ip,
loff_t offset,
loff_t length,
- ssize_t written)
+ ssize_t written,
+ struct iomap *iomap)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_fsb;
@@ -1080,14 +1086,14 @@ xfs_file_iomap_end_delalloc(
end_fsb = XFS_B_TO_FSB(mp, offset + length);
/*
- * Trim back delalloc blocks if we didn't manage to write the whole
- * range reserved.
+ * Trim delalloc blocks if they were allocated by this write and we
+ * didn't manage to write the whole range.
*
* We don't need to care about racing delalloc as we hold i_mutex
* across the reserve/allocate/unreserve calls. If there are delalloc
* blocks in the range, they are ours.
*/
- if (start_fsb < end_fsb) {
+ if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
XFS_FSB_TO_B(mp, end_fsb) - 1);
@@ -1117,7 +1123,7 @@ xfs_file_iomap_end(
{
if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
- length, written);
+ length, written, iomap);
return 0;
}