From 8acd5e9b1217e58a57124d9e225afa12efeae20d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jul 2013 00:09:19 -0400 Subject: ext4: fix error handling in ext4_ext_truncate() Previously ext4_ext_truncate() was ignoring potential error returns from ext4_es_remove_extent() and ext4_ext_remove_space(). This can lead to the on-diks extent tree and the extent status tree cache getting out of sync, which is particuarlly bad, and can lead to file system corruption and potential data loss. Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7097b0f680e6..f57cc0e7f1bc 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4405,9 +4405,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); +retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); + if (err == ENOMEM) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + if (err) { + ext4_std_error(inode->i_sb, err); + return; + } err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + ext4_std_error(inode->i_sb, err); } static void ext4_falloc_update_inode(struct inode *inode, -- cgit v1.2.3 From c8e15130e1636f68d5165aa2605b8e9cba0f644c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jul 2013 00:09:37 -0400 Subject: ext4: simplify calculation of blocks to free on error In ext4_ext_map_blocks(), if we have successfully allocated the data blocks, but then run into trouble inserting the extent into the extent tree, most likely due to an ENOSPC condition, determine the arguments to ext4_free_blocks() in a simpler way which is easier to prove to be correct. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f57cc0e7f1bc..593091537e76 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4261,8 +4261,8 @@ got_allocated_blocks: /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), fb_flags); + ext4_free_blocks(handle, inode, NULL, newblock, + EXT4_C2B(sbi, allocated_clusters), fb_flags); goto out2; } -- cgit v1.2.3 From 76828c882630ced08b5ddce22cc0095b05de9bc5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jul 2013 12:27:47 -0400 Subject: ext4: yield during large unlinks During large unlink operations on files with extents, we can use a lot of CPU time. This adds a cond_resched() call when starting to examine the next level of a multi-level extent tree. Multi-level extent trees are rare in the first place, and this should rarely be executed. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 593091537e76..cfdc51e30257 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2835,6 +2835,9 @@ again: err = -EIO; break; } + /* Yield here to deal with large extent trees. + * Should be a no-op if we did IO above. */ + cond_resched(); if (WARN_ON(i + 1 > depth)) { err = -EIO; break; -- cgit v1.2.3 From 63b999685cb372e24eb73f255cd73547026370fd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 16 Jul 2013 10:28:47 -0400 Subject: ext4: call ext4_es_lru_add() after handling cache miss If there are no items in the extent status tree, ext4_es_lru_add() is a no-op. So it is not sufficient to call ext4_es_lru_add() before we try to lookup an entry in the extent status tree. We also need to call it at the end of ext4_ext_map_blocks(), after items have been added to the extent status tree. This could lead to inodes with that have extent status trees but which are not in the LRU list, which means they won't get considered for eviction by the es_shrinker. Signed-off-by: "Theodore Ts'o" Cc: Zheng Liu Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index cfdc51e30257..a61873808f76 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4385,8 +4385,9 @@ out2: } out3: - trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); - + trace_ext4_ext_map_blocks_exit(inode, flags, map, + err ? err : allocated); + ext4_es_lru_add(inode); return err ? err : allocated; } -- cgit v1.2.3