From 034fb4c95fc0fed4ec4a50778127b92c6f2aec01 Mon Sep 17 00:00:00 2001 From: Surbhi Palande Date: Mon, 14 Dec 2009 09:53:52 -0500 Subject: ext4: replace BUG() with return -EIO in ext4_ext_get_blocks This patch fixes the Kernel BZ #14286. When the address of an extent corresponding to a valid block is corrupted, a -EIO should be reported instead of a BUG(). This situation should not normally not occur except in the case of a corrupted filesystem. If however it does, then the system should not panic directly but depending on the mount time options appropriate action should be taken. If the mount options so permit, the I/O should be gracefully aborted by returning a -EIO. http://bugzilla.kernel.org/show_bug.cgi?id=14286 Signed-off-by: Surbhi Palande Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3a7928f825e4..8fd6c567964a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3190,7 +3190,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * this situation is possible, though, _during_ tree modification; * this is why assert can't be put in ext4_ext_find_extent() */ - BUG_ON(path[depth].p_ext == NULL && depth != 0); + if (path[depth].p_ext == NULL && depth != 0) { + ext4_error(inode->i_sb, __func__, "bad extent address " + "inode: %lu, iblock: %d, depth: %d", + inode->i_ino, iblock, depth); + err = -EIO; + goto out2; + } eh = path[depth].p_hdr; ex = path[depth].p_ext; -- cgit v1.2.3 From 515f41c33a9d44a964264c9511ad2c869af1fac3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Dec 2009 23:39:06 -0500 Subject: ext4: Ensure zeroout blocks have no dirty metadata This fixes a bug (found by Curt Wohlgemuth) in which new blocks returned from an extent created with ext4_ext_zeroout() can have dirty metadata still associated with them. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Curt Wohlgemuth Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8fd6c567964a..91ae46098ea4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3023,6 +3023,14 @@ out: return err; } +static void unmap_underlying_metadata_blocks(struct block_device *bdev, + sector_t block, int count) +{ + int i; + for (i = 0; i < count; i++) + unmap_underlying_metadata(bdev, block + i); +} + static int ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned int max_blocks, @@ -3098,6 +3106,18 @@ out: } else allocated = ret; set_buffer_new(bh_result); + /* + * if we allocated more blocks than requested + * we need to make sure we unmap the extra block + * allocated. The actual needed block will get + * unmapped later when we find the buffer_head marked + * new. + */ + if (allocated > max_blocks) { + unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, + newblock + max_blocks, + allocated - max_blocks); + } map_out: set_buffer_mapped(bh_result); out1: -- cgit v1.2.3 From 9d0be50230b333005635967f7ecd4897dbfd181b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 1 Jan 2010 02:41:30 -0500 Subject: ext4: Calculate metadata requirements more accurately In the past, ext4_calc_metadata_amount(), and its sub-functions ext4_ext_calc_metadata_amount() and ext4_indirect_calc_metadata_amount() badly over-estimated the number of metadata blocks that might be required for delayed allocation blocks. This didn't matter as much when functions which managed the reserved metadata blocks were more aggressive about dropping reserved metadata blocks as delayed allocation blocks were written, but unfortunately they were too aggressive. This was fixed in commit 0637c6f, but as a result the over-estimation by ext4_calc_metadata_amount() would lead to reserving 2-3 times the number of pending delayed allocation blocks as potentially required metadata blocks. So if there are 1 megabytes of blocks which have been not yet been allocation, up to 3 megabytes of space would get reserved out of the user's quota and from the file system free space pool until all of the inode's data blocks have been allocated. This commit addresses this problem by much more accurately estimating the number of metadata blocks that will be required. It will still somewhat over-estimate the number of blocks needed, since it must make a worst case estimate not knowing which physical blocks will be needed, but it is much more accurate than before. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 91ae46098ea4..7d7b74e94687 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) * to allocate @blocks * Worse case is one block per extent */ -int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) +int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) { - int lcap, icap, rcap, leafs, idxs, num; - int newextents = blocks; - - rcap = ext4_ext_space_root_idx(inode, 0); - lcap = ext4_ext_space_block(inode, 0); - icap = ext4_ext_space_block_idx(inode, 0); + struct ext4_inode_info *ei = EXT4_I(inode); + int idxs, num = 0; - /* number of new leaf blocks needed */ - num = leafs = (newextents + lcap - 1) / lcap; + idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) + / sizeof(struct ext4_extent_idx)); /* - * Worse case, we need separate index block(s) - * to link all new leaf blocks + * If the new delayed allocation block is contiguous with the + * previous da block, it can share index blocks with the + * previous block, so we only need to allocate a new index + * block every idxs leaf blocks. At ldxs**2 blocks, we need + * an additional index block, and at ldxs**3 blocks, yet + * another index blocks. */ - idxs = (leafs + icap - 1) / icap; - do { - num += idxs; - idxs = (idxs + icap - 1) / icap; - } while (idxs > rcap); + if (ei->i_da_metadata_calc_len && + ei->i_da_metadata_calc_last_lblock+1 == lblock) { + if ((ei->i_da_metadata_calc_len % idxs) == 0) + num++; + if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) + num++; + if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { + num++; + ei->i_da_metadata_calc_len = 0; + } else + ei->i_da_metadata_calc_len++; + ei->i_da_metadata_calc_last_lblock++; + return num; + } - return num; + /* + * In the worst case we need a new set of index blocks at + * every level of the inode's extent tree. + */ + ei->i_da_metadata_calc_len = 1; + ei->i_da_metadata_calc_last_lblock = lblock; + return ext_depth(inode) + 1; } static int -- cgit v1.2.3