From 197217a5af79c23609da03eda2a52ee8603eec52 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 May 2011 11:45:29 -0400 Subject: ext4: add a function merging extents right and left 1) Rename ext4_ext_try_to_merge() to ext4_ext_try_to_merge_right(). 2) Add a new function ext4_ext_try_to_merge() which tries to merge an extent both left and right. 3) Use the new function in ext4_ext_convert_unwritten_endio() and ext4_ext_insert_extent(). Signed-off-by: Yongqiang Yang Tested-by: Allison Henderson --- fs/ext4/extents.c | 65 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 29 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4890d6f3ad15..23a98b6f00a2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1563,7 +1563,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns * 1 if they got merged. */ -static int ext4_ext_try_to_merge(struct inode *inode, +static int ext4_ext_try_to_merge_right(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex) { @@ -1602,6 +1602,31 @@ static int ext4_ext_try_to_merge(struct inode *inode, return merge_done; } +/* + * This function tries to merge the @ex extent to neighbours in the tree. + * return 1 if merge left else 0. + */ +static int ext4_ext_try_to_merge(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *ex) { + struct ext4_extent_header *eh; + unsigned int depth; + int merge_done = 0; + int ret = 0; + + depth = ext_depth(inode); + BUG_ON(path[depth].p_hdr == NULL); + eh = path[depth].p_hdr; + + if (ex > EXT_FIRST_EXTENT(eh)) + merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); + + if (!merge_done) + ret = ext4_ext_try_to_merge_right(inode, path, ex); + + return ret; +} + /* * check if a portion of the "newext" extent overlaps with an * existing extent. @@ -3039,6 +3064,7 @@ fix_extent_len: ext4_ext_dirty(handle, inode, path + depth); return err; } + static int ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) @@ -3047,46 +3073,27 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, struct ext4_extent_header *eh; int depth; int err = 0; - int ret = 0; depth = ext_depth(inode); eh = path[depth].p_hdr; ex = path[depth].p_ext; + ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" + "block %llu, max_blocks %u\n", inode->i_ino, + (unsigned long long)le32_to_cpu(ex->ee_block), + ext4_ext_get_actual_len(ex)); + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; /* first mark the extent as initialized */ ext4_ext_mark_initialized(ex); - /* - * We have to see if it can be merged with the extent - * on the left. + /* note: ext4_ext_correct_indexes() isn't needed here because + * borders are not changed */ - if (ex > EXT_FIRST_EXTENT(eh)) { - /* - * To merge left, pass "ex - 1" to try_to_merge(), - * since it merges towards right _only_. - */ - ret = ext4_ext_try_to_merge(inode, path, ex - 1); - if (ret) { - err = ext4_ext_correct_indexes(handle, inode, path); - if (err) - goto out; - depth = ext_depth(inode); - ex--; - } - } - /* - * Try to Merge towards right. - */ - ret = ext4_ext_try_to_merge(inode, path, ex); - if (ret) { - err = ext4_ext_correct_indexes(handle, inode, path); - if (err) - goto out; - depth = ext_depth(inode); - } + ext4_ext_try_to_merge(inode, path, ex); + /* Mark modified extent as dirty */ err = ext4_ext_dirty(handle, inode, path + depth); out: -- cgit v1.2.3 From 47ea3bb59cf47298b1cbb4d7bef056b3afea0879 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 May 2011 12:23:07 -0400 Subject: ext4: add ext4_split_extent_at() and ext4_split_extent() Add two functions: ext4_split_extent_at(), which splits an extent into two extents at given logical block, and ext4_split_extent() which splits an extent into three extents. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" Tested-by: Allison Henderson --- fs/ext4/extents.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 23a98b6f00a2..10317eb1d2a9 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2554,6 +2554,193 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) return ret; } +/* + * used by extent splitting. + */ +#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ + due to ENOSPC */ +#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ +#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ + +/* + * ext4_split_extent_at() splits an extent at given block. + * + * @handle: the journal handle + * @inode: the file inode + * @path: the path to the extent + * @split: the logical block where the extent is splitted. + * @split_flags: indicates if the extent could be zeroout if split fails, and + * the states(init or uninit) of new extents. + * @flags: flags used to insert new extent to extent tree. + * + * + * Splits extent [a, b] into two extents [a, @split) and [@split, b], states + * of which are deterimined by split_flag. + * + * There are two cases: + * a> the extent are splitted into two extent. + * b> split is not needed, and just mark the extent. + * + * return 0 on success. + */ +static int ext4_split_extent_at(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t split, + int split_flag, + int flags) +{ + ext4_fsblk_t newblock; + ext4_lblk_t ee_block; + struct ext4_extent *ex, newex, orig_ex; + struct ext4_extent *ex2 = NULL; + unsigned int ee_len, depth; + int err = 0; + + ext_debug("ext4_split_extents_at: inode %lu, logical" + "block %llu\n", inode->i_ino, (unsigned long long)split); + + ext4_ext_show_leaf(inode, path); + + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + newblock = split - ee_block + ext4_ext_pblock(ex); + + BUG_ON(split < ee_block || split >= (ee_block + ee_len)); + + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; + + if (split == ee_block) { + /* + * case b: block @split is the block that the extent begins with + * then we just change the state of the extent, and splitting + * is not needed. + */ + if (split_flag & EXT4_EXT_MARK_UNINIT2) + ext4_ext_mark_uninitialized(ex); + else + ext4_ext_mark_initialized(ex); + + if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) + ext4_ext_try_to_merge(inode, path, ex); + + err = ext4_ext_dirty(handle, inode, path + depth); + goto out; + } + + /* case a */ + memcpy(&orig_ex, ex, sizeof(orig_ex)); + ex->ee_len = cpu_to_le16(split - ee_block); + if (split_flag & EXT4_EXT_MARK_UNINIT1) + ext4_ext_mark_uninitialized(ex); + + /* + * path may lead to new leaf, not to original leaf any more + * after ext4_ext_insert_extent() returns, + */ + err = ext4_ext_dirty(handle, inode, path + depth); + if (err) + goto fix_extent_len; + + ex2 = &newex; + ex2->ee_block = cpu_to_le32(split); + ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); + ext4_ext_store_pblock(ex2, newblock); + if (split_flag & EXT4_EXT_MARK_UNINIT2) + ext4_ext_mark_uninitialized(ex2); + + err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); + if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) + goto fix_extent_len; + /* update the extent length and mark as initialized */ + ex->ee_len = cpu_to_le32(ee_len); + ext4_ext_try_to_merge(inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + depth); + goto out; + } else if (err) + goto fix_extent_len; + +out: + ext4_ext_show_leaf(inode, path); + return err; + +fix_extent_len: + ex->ee_len = orig_ex.ee_len; + ext4_ext_dirty(handle, inode, path + depth); + return err; +} + +/* + * ext4_split_extents() splits an extent and mark extent which is covered + * by @map as split_flags indicates + * + * It may result in splitting the extent into multiple extents (upto three) + * There are three possibilities: + * a> There is no split required + * b> Splits in two extents: Split is happening at either end of the extent + * c> Splits in three extents: Somone is splitting in middle of the extent + * + */ +static int ext4_split_extent(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + struct ext4_map_blocks *map, + int split_flag, + int flags) +{ + ext4_lblk_t ee_block; + struct ext4_extent *ex; + unsigned int ee_len, depth; + int err = 0; + int uninitialized; + int split_flag1, flags1; + + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + uninitialized = ext4_ext_is_uninitialized(ex); + + if (map->m_lblk + map->m_len < ee_block + ee_len) { + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? + EXT4_EXT_MAY_ZEROOUT : 0; + flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; + if (uninitialized) + split_flag1 |= EXT4_EXT_MARK_UNINIT1 | + EXT4_EXT_MARK_UNINIT2; + err = ext4_split_extent_at(handle, inode, path, + map->m_lblk + map->m_len, split_flag1, flags1); + } + + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, map->m_lblk, path); + if (IS_ERR(path)) + return PTR_ERR(path); + + if (map->m_lblk >= ee_block) { + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? + EXT4_EXT_MAY_ZEROOUT : 0; + if (uninitialized) + split_flag1 |= EXT4_EXT_MARK_UNINIT1; + if (split_flag & EXT4_EXT_MARK_UNINIT2) + split_flag1 |= EXT4_EXT_MARK_UNINIT2; + err = ext4_split_extent_at(handle, inode, path, + map->m_lblk, split_flag1, flags); + if (err) + goto out; + } + + ext4_ext_show_leaf(inode, path); +out: + return err ? err : map->m_len; +} + #define EXT4_EXT_ZERO_LEN 7 /* * This function is called by ext4_ext_map_blocks() if someone tries to write -- cgit v1.2.3 From 667eff35a1f56fa74ce98a0c7c29a40adc1ba4e3 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 3 May 2011 12:25:07 -0400 Subject: ext4: reimplement convert and split_unwritten Reimplement ext4_ext_convert_to_initialized() and ext4_split_unwritten_extents() using ext4_split_extent() Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" Tested-by: Allison Henderson --- fs/ext4/extents.c | 480 ++++++++---------------------------------------------- 1 file changed, 72 insertions(+), 408 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 10317eb1d2a9..e363f21b091b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2757,17 +2757,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, struct ext4_map_blocks *map, struct ext4_ext_path *path) { - struct ext4_extent *ex, newex, orig_ex; - struct ext4_extent *ex1 = NULL; - struct ext4_extent *ex2 = NULL; - struct ext4_extent *ex3 = NULL; - struct ext4_extent_header *eh; + struct ext4_map_blocks split_map; + struct ext4_extent zero_ex; + struct ext4_extent *ex; ext4_lblk_t ee_block, eof_block; unsigned int allocated, ee_len, depth; - ext4_fsblk_t newblock; int err = 0; - int ret = 0; - int may_zeroout; + int split_flag = 0; ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, @@ -2779,280 +2775,87 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, eof_block = map->m_lblk + map->m_len; depth = ext_depth(inode); - eh = path[depth].p_hdr; ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); allocated = ee_len - (map->m_lblk - ee_block); - newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); - - ex2 = ex; - orig_ex.ee_block = ex->ee_block; - orig_ex.ee_len = cpu_to_le16(ee_len); - ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); + WARN_ON(map->m_lblk < ee_block); /* * It is safe to convert extent to initialized via explicit * zeroout only if extent is fully insde i_size or new_size. */ - may_zeroout = ee_block + ee_len <= eof_block; + split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ - if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); + if (ee_len <= 2*EXT4_EXT_ZERO_LEN && + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + err = ext4_ext_zeroout(inode, ex); if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zeroed the full extent */ - return allocated; - } - - /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ - if (map->m_lblk > ee_block) { - ex1 = ex; - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); - ext4_ext_mark_uninitialized(ex1); - ex2 = &newex; - } - /* - * for sanity, update the length of the ex2 extent before - * we insert ex3, if ex1 is NULL. This is to avoid temporary - * overlap of blocks. - */ - if (!ex1 && allocated > map->m_len) - ex2->ee_len = cpu_to_le16(map->m_len); - /* ex3: to ee_block + ee_len : uninitialised */ - if (allocated > map->m_len) { - unsigned int newdepth; - /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ - if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { - /* - * map->m_lblk == ee_block is handled by the zerouout - * at the beginning. - * Mark first half uninitialized. - * Mark second half initialized and zero out the - * initialized extent - */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = cpu_to_le16(ee_len - allocated); - ext4_ext_mark_uninitialized(ex); - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - - ex3 = &newex; - ex3->ee_block = cpu_to_le32(map->m_lblk); - ext4_ext_store_pblock(ex3, newblock); - ex3->ee_len = cpu_to_le16(allocated); - err = ext4_ext_insert_extent(handle, inode, path, - ex3, 0); - if (err == -ENOSPC) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, - ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* blocks available from map->m_lblk */ - return allocated; - - } else if (err) - goto fix_extent_len; - - /* - * We need to zero out the second half because - * an fallocate request can update file size and - * converting the second half to initialized extent - * implies that we can leak some junk data to user - * space. - */ - err = ext4_ext_zeroout(inode, ex3); - if (err) { - /* - * We should actually mark the - * second half as uninit and return error - * Insert would have changed the extent - */ - depth = ext_depth(inode); - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, - path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - return err; - } - /* get the second half extent details */ - ex = path[depth].p_ext; - err = ext4_ext_get_access(handle, inode, - path + depth); - if (err) - return err; - ext4_ext_mark_uninitialized(ex); - ext4_ext_dirty(handle, inode, path + depth); - return err; - } - - /* zeroed the second half */ - return allocated; - } - ex3 = &newex; - ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); - ext4_ext_store_pblock(ex3, newblock + map->m_len); - ex3->ee_len = cpu_to_le16(allocated - map->m_len); - ext4_ext_mark_uninitialized(ex3); - err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); - if (err == -ENOSPC && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zeroed the full extent */ - /* blocks available from map->m_lblk */ - return allocated; - - } else if (err) - goto fix_extent_len; - /* - * The depth, and hence eh & ex might change - * as part of the insert above. - */ - newdepth = ext_depth(inode); - /* - * update the extent length after successful insert of the - * split extent - */ - ee_len -= ext4_ext_get_actual_len(ex3); - orig_ex.ee_len = cpu_to_le16(ee_len); - may_zeroout = ee_block + ee_len <= eof_block; - - depth = newdepth; - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, path); - if (IS_ERR(path)) { - err = PTR_ERR(path); goto out; - } - eh = path[depth].p_hdr; - ex = path[depth].p_ext; - if (ex2 != &newex) - ex2 = ex; err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; - - allocated = map->m_len; - - /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying - * to insert a extent in the middle zerout directly - * otherwise give the extent a chance to merge to left - */ - if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && - map->m_lblk != ee_block && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zero out the first half */ - /* blocks available from map->m_lblk */ - return allocated; - } - } - /* - * If there was a change of depth as part of the - * insertion of ex3 above, we need to update the length - * of the ex1 extent again here - */ - if (ex1 && ex1 != ex) { - ex1 = ex; - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); - ext4_ext_mark_uninitialized(ex1); - ex2 = &newex; - } - /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ - ex2->ee_block = cpu_to_le32(map->m_lblk); - ext4_ext_store_pblock(ex2, newblock); - ex2->ee_len = cpu_to_le16(allocated); - if (ex2 != ex) - goto insert; - /* - * New (initialized) extent starts from the first block - * in the current extent. i.e., ex2 == ex - * We have to see if it can be merged with the extent - * on the left. - */ - if (ex2 > EXT_FIRST_EXTENT(eh)) { - /* - * To merge left, pass "ex2 - 1" to try_to_merge(), - * since it merges towards right _only_. - */ - ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); - if (ret) { - err = ext4_ext_correct_indexes(handle, inode, path); - if (err) - goto out; - depth = ext_depth(inode); - ex2--; - } + ext4_ext_mark_initialized(ex); + ext4_ext_try_to_merge(inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + depth); + goto out; } + /* - * Try to Merge towards right. This might be required - * only when the whole extent is being written to. - * i.e. ex2 == ex and ex3 == NULL. + * four cases: + * 1. split the extent into three extents. + * 2. split the extent into two extents, zeroout the first half. + * 3. split the extent into two extents, zeroout the second half. + * 4. split the extent into two extents with out zeroout. */ - if (!ex3) { - ret = ext4_ext_try_to_merge(inode, path, ex2); - if (ret) { - err = ext4_ext_correct_indexes(handle, inode, path); + split_map.m_lblk = map->m_lblk; + split_map.m_len = map->m_len; + + if (allocated > map->m_len) { + if (allocated <= EXT4_EXT_ZERO_LEN && + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + /* case 3 */ + zero_ex.ee_block = + cpu_to_le32(map->m_lblk + map->m_len); + zero_ex.ee_len = cpu_to_le16(allocated - map->m_len); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(ex) + map->m_lblk - ee_block); + err = ext4_ext_zeroout(inode, &zero_ex); if (err) goto out; + split_map.m_lblk = map->m_lblk; + split_map.m_len = allocated; + } else if ((map->m_lblk - ee_block + map->m_len < + EXT4_EXT_ZERO_LEN) && + (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + /* case 2 */ + if (map->m_lblk != ee_block) { + zero_ex.ee_block = ex->ee_block; + zero_ex.ee_len = cpu_to_le16(map->m_lblk - + ee_block); + ext4_ext_store_pblock(&zero_ex, + ext4_ext_pblock(ex)); + err = ext4_ext_zeroout(inode, &zero_ex); + if (err) + goto out; + } + + allocated = map->m_lblk - ee_block + map->m_len; + + split_map.m_lblk = ee_block; + split_map.m_len = allocated; } } - /* Mark modified extent as dirty */ - err = ext4_ext_dirty(handle, inode, path + depth); - goto out; -insert: - err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); - if (err == -ENOSPC && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zero out the first half */ - return allocated; - } else if (err) - goto fix_extent_len; + + allocated = ext4_split_extent(handle, inode, path, + &split_map, split_flag, 0); + if (allocated < 0) + err = allocated; + out: - ext4_ext_show_leaf(inode, path); return err ? err : allocated; - -fix_extent_len: - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_mark_uninitialized(ex); - ext4_ext_dirty(handle, inode, path + depth); - return err; } /* @@ -3083,15 +2886,11 @@ static int ext4_split_unwritten_extents(handle_t *handle, struct ext4_ext_path *path, int flags) { - struct ext4_extent *ex, newex, orig_ex; - struct ext4_extent *ex1 = NULL; - struct ext4_extent *ex2 = NULL; - struct ext4_extent *ex3 = NULL; - ext4_lblk_t ee_block, eof_block; - unsigned int allocated, ee_len, depth; - ext4_fsblk_t newblock; - int err = 0; - int may_zeroout; + ext4_lblk_t eof_block; + ext4_lblk_t ee_block; + struct ext4_extent *ex; + unsigned int ee_len; + int split_flag = 0, depth; ext_debug("ext4_split_unwritten_extents: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, @@ -3101,155 +2900,20 @@ static int ext4_split_unwritten_extents(handle_t *handle, inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map->m_len) eof_block = map->m_lblk + map->m_len; - - depth = ext_depth(inode); - ex = path[depth].p_ext; - ee_block = le32_to_cpu(ex->ee_block); - ee_len = ext4_ext_get_actual_len(ex); - allocated = ee_len - (map->m_lblk - ee_block); - newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); - - ex2 = ex; - orig_ex.ee_block = ex->ee_block; - orig_ex.ee_len = cpu_to_le16(ee_len); - ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); - /* * It is safe to convert extent to initialized via explicit * zeroout only if extent is fully insde i_size or new_size. */ - may_zeroout = ee_block + ee_len <= eof_block; - - /* - * If the uninitialized extent begins at the same logical - * block where the write begins, and the write completely - * covers the extent, then we don't need to split it. - */ - if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) - return allocated; - - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; - /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ - if (map->m_lblk > ee_block) { - ex1 = ex; - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); - ext4_ext_mark_uninitialized(ex1); - ex2 = &newex; - } - /* - * for sanity, update the length of the ex2 extent before - * we insert ex3, if ex1 is NULL. This is to avoid temporary - * overlap of blocks. - */ - if (!ex1 && allocated > map->m_len) - ex2->ee_len = cpu_to_le16(map->m_len); - /* ex3: to ee_block + ee_len : uninitialised */ - if (allocated > map->m_len) { - unsigned int newdepth; - ex3 = &newex; - ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); - ext4_ext_store_pblock(ex3, newblock + map->m_len); - ex3->ee_len = cpu_to_le16(allocated - map->m_len); - ext4_ext_mark_uninitialized(ex3); - err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); - if (err == -ENOSPC && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zeroed the full extent */ - /* blocks available from map->m_lblk */ - return allocated; - - } else if (err) - goto fix_extent_len; - /* - * The depth, and hence eh & ex might change - * as part of the insert above. - */ - newdepth = ext_depth(inode); - /* - * update the extent length after successful insert of the - * split extent - */ - ee_len -= ext4_ext_get_actual_len(ex3); - orig_ex.ee_len = cpu_to_le16(ee_len); - may_zeroout = ee_block + ee_len <= eof_block; - - depth = newdepth; - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } - ex = path[depth].p_ext; - if (ex2 != &newex) - ex2 = ex; + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; + split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; + split_flag |= EXT4_EXT_MARK_UNINIT2; - allocated = map->m_len; - } - /* - * If there was a change of depth as part of the - * insertion of ex3 above, we need to update the length - * of the ex1 extent again here - */ - if (ex1 && ex1 != ex) { - ex1 = ex; - ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); - ext4_ext_mark_uninitialized(ex1); - ex2 = &newex; - } - /* - * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written - * using direct I/O, uninitialised still. - */ - ex2->ee_block = cpu_to_le32(map->m_lblk); - ext4_ext_store_pblock(ex2, newblock); - ex2->ee_len = cpu_to_le16(allocated); - ext4_ext_mark_uninitialized(ex2); - if (ex2 != ex) - goto insert; - /* Mark modified extent as dirty */ - err = ext4_ext_dirty(handle, inode, path + depth); - ext_debug("out here\n"); - goto out; -insert: - err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); - if (err == -ENOSPC && may_zeroout) { - err = ext4_ext_zeroout(inode, &orig_ex); - if (err) - goto fix_extent_len; - /* update the extent length and mark as initialized */ - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_dirty(handle, inode, path + depth); - /* zero out the first half */ - return allocated; - } else if (err) - goto fix_extent_len; -out: - ext4_ext_show_leaf(inode, path); - return err ? err : allocated; - -fix_extent_len: - ex->ee_block = orig_ex.ee_block; - ex->ee_len = orig_ex.ee_len; - ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); - ext4_ext_mark_uninitialized(ex); - ext4_ext_dirty(handle, inode, path + depth); - return err; + flags |= EXT4_GET_BLOCKS_PRE_IO; + return ext4_split_extent(handle, inode, path, map, split_flag, flags); } static int ext4_convert_unwritten_extents_endio(handle_t *handle, -- cgit v1.2.3 From 9b940f8e8c32456c8a6428fa4313a4bcca7b4fcb Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 16 May 2011 10:11:09 -0400 Subject: ext4: ext4_ext_convert_to_initialized bug found in extended FSX testing This patch addresses bugs found while testing punch hole with the fsx test. The patch corrects the number of blocks that are zeroed out while splitting an extent, and also corrects the return value to return the number of blocks split out, instead of the number of blocks zeroed out. This patch has been tested in addition to the following patches: [Ext4 punch hole v7] [XFS Tests Punch Hole 1/1 v2] Add Punch Hole Testing to FSX The test ran successfully for 24 hours. Signed-off-by: Allison Henderson Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e363f21b091b..5f243da8ff1d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2818,8 +2818,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, (EXT4_EXT_MAY_ZEROOUT & split_flag)) { /* case 3 */ zero_ex.ee_block = - cpu_to_le32(map->m_lblk + map->m_len); - zero_ex.ee_len = cpu_to_le16(allocated - map->m_len); + cpu_to_le32(map->m_lblk); + zero_ex.ee_len = cpu_to_le16(allocated); ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex) + map->m_lblk - ee_block); err = ext4_ext_zeroout(inode, &zero_ex); @@ -2842,10 +2842,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, goto out; } - allocated = map->m_lblk - ee_block + map->m_len; - split_map.m_lblk = ee_block; - split_map.m_len = allocated; + split_map.m_len = map->m_lblk - ee_block + map->m_len; + allocated = map->m_len; } } -- cgit v1.2.3 From 93917411be8db5d21abf15c781dfa43c5cc6edf2 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Sun, 22 May 2011 20:49:12 -0400 Subject: ext4: make ext4_split_extent() handle error correctly Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" Reviewed-by: Mingming Cao --- fs/ext4/extents.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5f243da8ff1d..9a3844adb1cb 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2716,6 +2716,8 @@ static int ext4_split_extent(handle_t *handle, EXT4_EXT_MARK_UNINIT2; err = ext4_split_extent_at(handle, inode, path, map->m_lblk + map->m_len, split_flag1, flags1); + if (err) + goto out; } ext4_ext_drop_refs(path); -- cgit v1.2.3 From 77f4135f2a219a2127be6cc1208c42e6175b11dd Mon Sep 17 00:00:00 2001 From: Vivek Haldar Date: Sun, 22 May 2011 21:24:16 -0400 Subject: ext4: count hits/misses of extent cache and expose in sysfs The number of hits and misses for each filesystem is exposed in /sys/fs/ext4//extent_cache_{hits, misses}. Tested: fsstress, manual checks. Signed-off-by: Vivek Haldar Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9a3844adb1cb..4444317d141a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2035,6 +2035,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, struct ext4_extent *ex) { struct ext4_ext_cache *cex; + struct ext4_sb_info *sbi; int ret = 0; /* @@ -2042,6 +2043,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, */ spin_lock(&EXT4_I(inode)->i_block_reservation_lock); cex = &EXT4_I(inode)->i_cached_extent; + sbi = EXT4_SB(inode->i_sb); /* has cache valid data? */ if (cex->ec_len == 0) @@ -2057,6 +2059,10 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, ret = 1; } errout: + if (!ret) + sbi->extent_cache_misses++; + else + sbi->extent_cache_hits++; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); return ret; } @@ -3901,4 +3907,3 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return error; } - -- cgit v1.2.3 From f6d2f6b327ceef5c689581529a852dc6ec3b74a6 Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Sun, 22 May 2011 21:33:00 -0400 Subject: ext4: fix unbalanced up_write() in ext4_ext_truncate() error path ext4_ext_truncate() should not invoke up_write(&EXT4_I(inode)->i_data_sem) when ext4_orphan_add() returns an error, as it hasn't performed a down_write() yet. This trivial patch fixes this by moving the up_write() invocation above the out_stop label. Signed-off-by: Eric Gouriou Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4444317d141a..4e2bdc26b85c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3450,8 +3450,9 @@ void ext4_ext_truncate(struct inode *inode) if (IS_SYNC(inode)) ext4_handle_sync(handle); -out_stop: up_write(&EXT4_I(inode)->i_data_sem); + +out_stop: /* * If this was a simple ftruncate() and the file will remain alive, * then we need to clear up the orphan record which we created above. -- cgit v1.2.3 From b221349fa8b45d13c3650089f0514df7d1eb36c3 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 24 May 2011 11:36:58 -0400 Subject: ext4: fix ext4_ext_fiemap_cb() to handle blocks before request range correctly To get delayed-extent information, ext4_ext_fiemap_cb() looks up pagecache, it thus collects information starting from a page's head block. If blocksize < pagesize, the beginning blocks of a page may lies before the request range. So ext4_ext_fiemap_cb() should proceed ignoring them, because they has been handled before. If no mapped buffer in the range is found in the 1st page, we need to look up the 2nd page, otherwise delayed-extents after a hole will be ignored. Without this patch, xfstests 225 will hung on ext4 with 1K block. Reported-by: Amir Goldstein Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 49 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4e2bdc26b85c..ca62d748cc65 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3688,6 +3688,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, pgoff_t last_offset; pgoff_t offset; pgoff_t index; + pgoff_t start_index = 0; struct page **pages = NULL; struct buffer_head *bh = NULL; struct buffer_head *head = NULL; @@ -3714,39 +3715,57 @@ out: kfree(pages); return EXT_CONTINUE; } + index = 0; +next_page: /* Try to find the 1st mapped buffer. */ - end = ((__u64)pages[0]->index << PAGE_SHIFT) >> + end = ((__u64)pages[index]->index << PAGE_SHIFT) >> blksize_bits; - if (!page_has_buffers(pages[0])) + if (!page_has_buffers(pages[index])) goto out; - head = page_buffers(pages[0]); + head = page_buffers(pages[index]); if (!head) goto out; + index++; bh = head; do { - if (buffer_mapped(bh)) { + if (end >= newex->ec_block + + newex->ec_len) + /* The buffer is out of + * the request range. + */ + goto out; + + if (buffer_mapped(bh) && + end >= newex->ec_block) { + start_index = index - 1; /* get the 1st mapped buffer. */ - if (end > newex->ec_block + - newex->ec_len) - /* The buffer is out of - * the request range. - */ - goto out; goto found_mapped_buffer; } + bh = bh->b_this_page; end++; } while (bh != head); - /* No mapped buffer found. */ - goto out; + /* No mapped buffer in the range found in this page, + * We need to look up next page. + */ + if (index >= ret) { + /* There is no page left, but we need to limit + * newex->ec_len. + */ + newex->ec_len = end - newex->ec_block; + goto out; + } + goto next_page; } else { /*Find contiguous delayed buffers. */ if (ret > 0 && pages[0]->index == last_offset) head = page_buffers(pages[0]); bh = head; + index = 1; + start_index = 0; } found_mapped_buffer: @@ -3769,7 +3788,7 @@ found_mapped_buffer: end++; } while (bh != head); - for (index = 1; index < ret; index++) { + for (; index < ret; index++) { if (!page_has_buffers(pages[index])) { bh = NULL; break; @@ -3779,8 +3798,10 @@ found_mapped_buffer: bh = NULL; break; } + if (pages[index]->index != - pages[0]->index + index) { + pages[start_index]->index + index + - start_index) { /* Blocks are not contiguous. */ bh = NULL; break; -- cgit v1.2.3 From 55f020db66ce187fb8c8e4002a94b0eb714da450 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Wed, 25 May 2011 07:41:26 -0400 Subject: ext4: add flag to ext4_has_free_blocks This patch adds an allocation request flag to the ext4_has_free_blocks function which enables the use of reserved blocks. This will allow a punch hole to proceed even if the disk is full. Punching a hole may require additional blocks to first split the extents. Because ext4_has_free_blocks is a low level function, the flag needs to be passed down through several functions listed below: ext4_ext_insert_extent ext4_ext_create_new_leaf ext4_ext_grow_indepth ext4_ext_split ext4_ext_new_meta_block ext4_mb_new_blocks ext4_claim_free_blocks ext4_has_free_blocks [ext4 punch hole patch series 1/5 v7] Signed-off-by: Allison Henderson Signed-off-by: "Theodore Ts'o" Reviewed-by: Mingming Cao --- fs/ext4/extents.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ca62d748cc65..aa3a2601bada 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -192,12 +192,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, static ext4_fsblk_t ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent *ex, int *err) + struct ext4_extent *ex, int *err, unsigned int flags) { ext4_fsblk_t goal, newblock; goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); - newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err); + newblock = ext4_new_meta_blocks(handle, inode, goal, flags, + NULL, err); return newblock; } @@ -792,8 +793,9 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, * - initializes subtree */ static int ext4_ext_split(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext, int at) + unsigned int flags, + struct ext4_ext_path *path, + struct ext4_extent *newext, int at) { struct buffer_head *bh = NULL; int depth = ext_depth(inode); @@ -847,7 +849,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); for (a = 0; a < depth - at; a++) { newblock = ext4_ext_new_meta_block(handle, inode, path, - newext, &err); + newext, &err, flags); if (newblock == 0) goto cleanup; ablocks[a] = newblock; @@ -1056,8 +1058,9 @@ cleanup: * just created block */ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext) + unsigned int flags, + struct ext4_ext_path *path, + struct ext4_extent *newext) { struct ext4_ext_path *curp = path; struct ext4_extent_header *neh; @@ -1065,7 +1068,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock; int err = 0; - newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); + newblock = ext4_ext_new_meta_block(handle, inode, path, + newext, &err, flags); if (newblock == 0) return err; @@ -1140,8 +1144,9 @@ out: * if no free index is found, then it requests in-depth growing. */ static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *newext) + unsigned int flags, + struct ext4_ext_path *path, + struct ext4_extent *newext) { struct ext4_ext_path *curp; int depth, i, err = 0; @@ -1161,7 +1166,7 @@ repeat: if (EXT_HAS_FREE_INDEX(curp)) { /* if we found index with free entry, then use that * entry: create all needed subtree and add new leaf */ - err = ext4_ext_split(handle, inode, path, newext, i); + err = ext4_ext_split(handle, inode, flags, path, newext, i); if (err) goto out; @@ -1174,7 +1179,8 @@ repeat: err = PTR_ERR(path); } else { /* tree is full, time to grow in depth */ - err = ext4_ext_grow_indepth(handle, inode, path, newext); + err = ext4_ext_grow_indepth(handle, inode, flags, + path, newext); if (err) goto out; @@ -1693,6 +1699,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, int depth, len, err; ext4_lblk_t next; unsigned uninitialized = 0; + int flags = 0; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); @@ -1767,7 +1774,9 @@ repeat: * There is no free space in the found leaf. * We're gonna add a new leaf in the tree. */ - err = ext4_ext_create_new_leaf(handle, inode, path, newext); + if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) + flags = EXT4_MB_USE_ROOT_BLOCKS; + err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext); if (err) goto cleanup; depth = ext_depth(inode); -- cgit v1.2.3 From d583fb87a3ff0ca50befd2f73f7a67fade1c8c56 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Wed, 25 May 2011 07:41:43 -0400 Subject: ext4: punch out extents This patch modifies the truncate routines to support hole punching Below is a brief summary of the patches changes: - Added end param to ext_ext4_rm_leaf This function has been modified to accept an end parameter which enables it to punch holes in leafs instead of just truncating them. - Implemented the "remove head" case in the ext_remove_blocks routine This routine is used by ext_ext4_rm_leaf to remove the tail of an extent during a truncate. The new ext_ext4_rm_leaf routine will now also use it to remove the head of an extent in the case that the hole covers a region of blocks at the beginning of an extent. - Added "end" param to ext4_ext_remove_space routine This function has been modified to accept a stop parameter, which is passed through to ext4_ext_rm_leaf. [ext4 punch hole patch series 3/5 v6] Signed-off-by: Allison Henderson Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 160 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 141 insertions(+), 19 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index aa3a2601bada..c8c687b5d9a8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -46,6 +46,13 @@ #include +static int ext4_split_extent(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + struct ext4_map_blocks *map, + int split_flag, + int flags); + static int ext4_ext_truncate_extend_restart(handle_t *handle, struct inode *inode, int needed) @@ -2203,8 +2210,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, NULL, start, num, flags); } else if (from == le32_to_cpu(ex->ee_block) && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { - printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), ee_len); + /* head removal */ + ext4_lblk_t num; + ext4_fsblk_t start; + + num = to - from; + start = ext4_ext_pblock(ex); + + ext_debug("free first %u blocks starting %llu\n", num, start); + ext4_free_blocks(handle, inode, 0, start, num, flags); + } else { printk(KERN_INFO "strange request: removal(2) " "%u-%u from %u:%u\n", @@ -2213,9 +2228,22 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, return 0; } + +/* + * ext4_ext_rm_leaf() Removes the extents associated with the + * blocks appearing between "start" and "end", and splits the extents + * if "start" and "end" appear in the same extent + * + * @handle: The journal handle + * @inode: The files inode + * @path: The path to the leaf + * @start: The first block to remove + * @end: The last block to remove + */ static int ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, ext4_lblk_t start) + struct ext4_ext_path *path, ext4_lblk_t start, + ext4_lblk_t end) { int err = 0, correct_index = 0; int depth = ext_depth(inode), credits; @@ -2226,6 +2254,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, unsigned short ex_ee_len; unsigned uninitialized = 0; struct ext4_extent *ex; + struct ext4_map_blocks map; /* the header must be checked already in ext4_ext_remove_space() */ ext_debug("truncate since %u in leaf\n", start); @@ -2255,31 +2284,95 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, path[depth].p_ext = ex; a = ex_ee_block > start ? ex_ee_block : start; - b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? - ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; + b = ex_ee_block+ex_ee_len - 1 < end ? + ex_ee_block+ex_ee_len - 1 : end; ext_debug(" border %u:%u\n", a, b); - if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { - block = 0; - num = 0; - BUG(); + /* If this extent is beyond the end of the hole, skip it */ + if (end <= ex_ee_block) { + ex--; + ex_ee_block = le32_to_cpu(ex->ee_block); + ex_ee_len = ext4_ext_get_actual_len(ex); + continue; + } else if (a != ex_ee_block && + b != ex_ee_block + ex_ee_len - 1) { + /* + * If this is a truncate, then this condition should + * never happen because at least one of the end points + * needs to be on the edge of the extent. + */ + if (end == EXT_MAX_BLOCK) { + ext_debug(" bad truncate %u:%u\n", + start, end); + block = 0; + num = 0; + err = -EIO; + goto out; + } + /* + * else this is a hole punch, so the extent needs to + * be split since neither edge of the hole is on the + * extent edge + */ + else{ + map.m_pblk = ext4_ext_pblock(ex); + map.m_lblk = ex_ee_block; + map.m_len = b - ex_ee_block; + + err = ext4_split_extent(handle, + inode, path, &map, 0, + EXT4_GET_BLOCKS_PUNCH_OUT_EXT | + EXT4_GET_BLOCKS_PRE_IO); + + if (err < 0) + goto out; + + ex_ee_len = ext4_ext_get_actual_len(ex); + + b = ex_ee_block+ex_ee_len - 1 < end ? + ex_ee_block+ex_ee_len - 1 : end; + + /* Then remove tail of this extent */ + block = ex_ee_block; + num = a - block; + } } else if (a != ex_ee_block) { /* remove tail of the extent */ block = ex_ee_block; num = a - block; } else if (b != ex_ee_block + ex_ee_len - 1) { /* remove head of the extent */ - block = a; - num = b - a; - /* there is no "make a hole" API yet */ - BUG(); + block = b; + num = ex_ee_block + ex_ee_len - b; + + /* + * If this is a truncate, this condition + * should never happen + */ + if (end == EXT_MAX_BLOCK) { + ext_debug(" bad truncate %u:%u\n", + start, end); + err = -EIO; + goto out; + } } else { /* remove whole extent: excellent! */ block = ex_ee_block; num = 0; - BUG_ON(a != ex_ee_block); - BUG_ON(b != ex_ee_block + ex_ee_len - 1); + if (a != ex_ee_block) { + ext_debug(" bad truncate %u:%u\n", + start, end); + err = -EIO; + goto out; + } + + if (b != ex_ee_block + ex_ee_len - 1) { + ext_debug(" bad truncate %u:%u\n", + start, end); + err = -EIO; + goto out; + } } /* @@ -2310,7 +2403,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (num == 0) { /* this extent is removed; mark slot entirely unused */ ext4_ext_store_pblock(ex, 0); - le16_add_cpu(&eh->eh_entries, -1); + } else if (block != ex_ee_block) { + /* + * If this was a head removal, then we need to update + * the physical block since it is now at a different + * location + */ + ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a)); } ex->ee_block = cpu_to_le32(block); @@ -2326,6 +2425,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (err) goto out; + /* + * If the extent was completely released, + * we need to remove it from the leaf + */ + if (num == 0) { + if (end != EXT_MAX_BLOCK) { + /* + * For hole punching, we need to scoot all the + * extents up when an extent is removed so that + * we dont have blank extents in the middle + */ + memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) * + sizeof(struct ext4_extent)); + + /* Now get rid of the one at the end */ + memset(EXT_LAST_EXTENT(eh), 0, + sizeof(struct ext4_extent)); + } + le16_add_cpu(&eh->eh_entries, -1); + } + ext_debug("new extent: %u:%u:%llu\n", block, num, ext4_ext_pblock(ex)); ex--; @@ -2366,7 +2486,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) return 1; } -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) +static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, + ext4_lblk_t end) { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); @@ -2405,7 +2526,8 @@ again: while (i >= 0 && err == 0) { if (i == depth) { /* this is leaf block */ - err = ext4_ext_rm_leaf(handle, inode, path, start); + err = ext4_ext_rm_leaf(handle, inode, path, + start, end); /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); path[i].p_bh = NULL; @@ -3451,7 +3573,7 @@ void ext4_ext_truncate(struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); - err = ext4_ext_remove_space(inode, last_block); + err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK); /* In a multi-transaction truncate, we only make the final * transaction synchronous. -- cgit v1.2.3 From e861304b8ed83fe43e36d46794d72641c82d4636 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Wed, 25 May 2011 07:41:46 -0400 Subject: ext4: add "punch hole" flag to ext4_map_blocks() This patch adds a new flag to ext4_map_blocks() that specifies the given range of blocks should be punched out. Extents are first converted to uninitialized extents before they are punched out. Because punching a hole may require that the extent be split, it is possible that the splitting may need more blocks than are available. To deal with this, use of reserved blocks are enabled to allow the split to proceed. The routine then returns the number of blocks successfully punched out. [ext4 punch hole patch series 4/5 v7] Signed-off-by: Allison Henderson Signed-off-by: "Theodore Ts'o" Reviewed-by: Mingming Cao --- fs/ext4/extents.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 87 insertions(+), 11 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c8c687b5d9a8..1d456b2ac377 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3298,15 +3298,19 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock = 0; int err = 0, depth, ret; unsigned int allocated = 0; + unsigned int punched_out = 0; + unsigned int result = 0; struct ext4_allocation_request ar; ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; + struct ext4_map_blocks punch_map; ext_debug("blocks %u/%u requested for inode %lu\n", map->m_lblk, map->m_len, inode->i_ino); trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* check in cache */ - if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { + if (ext4_ext_in_cache(inode, map->m_lblk, &newex) && + ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) { if (!newex.ee_start_lo && !newex.ee_start_hi) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { /* @@ -3371,16 +3375,84 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, ee_block, ee_len, newblock); - /* Do not put uninitialized extent in the cache */ - if (!ext4_ext_is_uninitialized(ex)) { - ext4_ext_put_in_cache(inode, ee_block, - ee_len, ee_start); - goto out; + if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) { + /* + * Do not put uninitialized extent + * in the cache + */ + if (!ext4_ext_is_uninitialized(ex)) { + ext4_ext_put_in_cache(inode, ee_block, + ee_len, ee_start); + goto out; + } + ret = ext4_ext_handle_uninitialized_extents( + handle, inode, map, path, flags, + allocated, newblock); + return ret; } - ret = ext4_ext_handle_uninitialized_extents(handle, - inode, map, path, flags, allocated, - newblock); - return ret; + + /* + * Punch out the map length, but only to the + * end of the extent + */ + punched_out = allocated < map->m_len ? + allocated : map->m_len; + + /* + * Sense extents need to be converted to + * uninitialized, they must fit in an + * uninitialized extent + */ + if (punched_out > EXT_UNINIT_MAX_LEN) + punched_out = EXT_UNINIT_MAX_LEN; + + punch_map.m_lblk = map->m_lblk; + punch_map.m_pblk = newblock; + punch_map.m_len = punched_out; + punch_map.m_flags = 0; + + /* Check to see if the extent needs to be split */ + if (punch_map.m_len != ee_len || + punch_map.m_lblk != ee_block) { + + ret = ext4_split_extent(handle, inode, + path, &punch_map, 0, + EXT4_GET_BLOCKS_PUNCH_OUT_EXT | + EXT4_GET_BLOCKS_PRE_IO); + + if (ret < 0) { + err = ret; + goto out2; + } + /* + * find extent for the block at + * the start of the hole + */ + ext4_ext_drop_refs(path); + kfree(path); + + path = ext4_ext_find_extent(inode, + map->m_lblk, NULL); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + goto out2; + } + + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_len = ext4_ext_get_actual_len(ex); + ee_block = le32_to_cpu(ex->ee_block); + ee_start = ext4_ext_pblock(ex); + + } + + ext4_ext_mark_uninitialized(ex); + + err = ext4_ext_remove_space(inode, map->m_lblk, + map->m_lblk + punched_out); + + goto out2; } } @@ -3525,7 +3597,11 @@ out2: } trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, newblock, map->m_len, err ? err : allocated); - return err ? err : allocated; + + result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? + punched_out : allocated; + + return err ? err : result; } void ext4_ext_truncate(struct inode *inode) -- cgit v1.2.3 From a4bb6b64e39abc0e41ca077725f2a72c868e7622 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Wed, 25 May 2011 07:41:50 -0400 Subject: ext4: enable "punch hole" functionality This patch adds new routines: "ext4_punch_hole" "ext4_ext_punch_hole" and "ext4_ext_check_cache" fallocate has been modified to call ext4_punch_hole when the punch hole flag is passed. At the moment, we only support punching holes in extents, so this routine is pretty much a wrapper for the ext4_ext_punch_hole routine. The ext4_ext_punch_hole routine first completes all outstanding writes with the associated pages, and then releases them. The unblock aligned data is zeroed, and all blocks in between are punched out. The ext4_ext_check_cache routine is very similar to ext4_ext_in_cache except it accepts a ext4_ext_cache parameter instead of a ext4_extent parameter. This routine is used by ext4_ext_punch_hole to check and see if a block in a hole that has been cached. The ext4_ext_cache parameter is necessary because the members ext4_extent structure are not large enough to hold a 32 bit value. The existing ext4_ext_in_cache routine has become a wrapper to this new function. [ext4 punch hole patch series 5/5 v7] Signed-off-by: Allison Henderson Signed-off-by: "Theodore Ts'o" Reviewed-by: Mingming Cao --- fs/ext4/extents.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 225 insertions(+), 11 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 1d456b2ac377..88ff3a74787b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2044,12 +2044,23 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, } /* + * ext4_ext_in_cache() + * Checks to see if the given block is in the cache. + * If it is, the cached extent is stored in the given + * cache extent pointer. If the cached extent is a hole, + * this routine should be used instead of + * ext4_ext_in_cache if the calling function needs to + * know the size of the hole. + * + * @inode: The files inode + * @block: The block to look for in the cache + * @ex: Pointer where the cached extent will be stored + * if it contains block + * * Return 0 if cache is invalid; 1 if the cache is valid */ -static int -ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, - struct ext4_extent *ex) -{ +static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, + struct ext4_ext_cache *ex){ struct ext4_ext_cache *cex; struct ext4_sb_info *sbi; int ret = 0; @@ -2066,9 +2077,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, goto errout; if (in_range(block, cex->ec_block, cex->ec_len)) { - ex->ee_block = cpu_to_le32(cex->ec_block); - ext4_ext_store_pblock(ex, cex->ec_start); - ex->ee_len = cpu_to_le16(cex->ec_len); + memcpy(ex, cex, sizeof(struct ext4_ext_cache)); ext_debug("%u cached by %u:%u:%llu\n", block, cex->ec_block, cex->ec_len, cex->ec_start); @@ -2083,6 +2092,37 @@ errout: return ret; } +/* + * ext4_ext_in_cache() + * Checks to see if the given block is in the cache. + * If it is, the cached extent is stored in the given + * extent pointer. + * + * @inode: The files inode + * @block: The block to look for in the cache + * @ex: Pointer where the cached extent will be stored + * if it contains block + * + * Return 0 if cache is invalid; 1 if the cache is valid + */ +static int +ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, + struct ext4_extent *ex) +{ + struct ext4_ext_cache cex; + int ret = 0; + + if (ext4_ext_check_cache(inode, block, &cex)) { + ex->ee_block = cpu_to_le32(cex.ec_block); + ext4_ext_store_pblock(ex, cex.ec_start); + ex->ee_len = cpu_to_le16(cex.ec_len); + ret = 1; + } + + return ret; +} + + /* * ext4_ext_rm_idx: * removes index from the index block. @@ -3724,10 +3764,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) struct ext4_map_blocks map; unsigned int credits, blkbits = inode->i_blkbits; - /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode & ~FALLOC_FL_KEEP_SIZE) - return -EOPNOTSUPP; - /* * currently supporting (pre)allocate mode for extent-based * files _only_ @@ -3735,6 +3771,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EOPNOTSUPP; + /* Return error if mode is not supported */ + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + + if (mode & FALLOC_FL_PUNCH_HOLE) + return ext4_punch_hole(file, offset, len); + trace_ext4_fallocate_enter(inode, offset, len, mode); map.m_lblk = offset >> blkbits; /* @@ -4100,6 +4143,177 @@ static int ext4_xattr_fiemap(struct inode *inode, return (error < 0 ? error : 0); } +/* + * ext4_ext_punch_hole + * + * Punches a hole of "length" bytes in a file starting + * at byte "offset" + * + * @inode: The inode of the file to punch a hole in + * @offset: The starting byte offset of the hole + * @length: The length of the hole + * + * Returns the number of blocks removed or negative on err + */ +int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct ext4_ext_cache cache_ex; + ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks; + struct address_space *mapping = inode->i_mapping; + struct ext4_map_blocks map; + handle_t *handle; + loff_t first_block_offset, last_block_offset, block_len; + loff_t first_page, last_page, first_page_offset, last_page_offset; + int ret, credits, blocks_released, err = 0; + + first_block = (offset + sb->s_blocksize - 1) >> + EXT4_BLOCK_SIZE_BITS(sb); + last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); + + first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb); + last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb); + + first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + last_page = (offset + length) >> PAGE_CACHE_SHIFT; + + first_page_offset = first_page << PAGE_CACHE_SHIFT; + last_page_offset = last_page << PAGE_CACHE_SHIFT; + + /* + * Write out all dirty pages to avoid race conditions + * Then release them. + */ + if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { + err = filemap_write_and_wait_range(mapping, + first_page_offset == 0 ? 0 : first_page_offset-1, + last_page_offset); + + if (err) + return err; + } + + /* Now release the pages */ + if (last_page_offset > first_page_offset) { + truncate_inode_pages_range(mapping, first_page_offset, + last_page_offset-1); + } + + /* finish any pending end_io work */ + ext4_flush_completed_IO(inode); + + credits = ext4_writepage_trans_blocks(inode); + handle = ext4_journal_start(inode, credits); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + err = ext4_orphan_add(handle, inode); + if (err) + goto out; + + /* + * Now we need to zero out the un block aligned data. + * If the file is smaller than a block, just + * zero out the middle + */ + if (first_block > last_block) + ext4_block_zero_page_range(handle, mapping, offset, length); + else { + /* zero out the head of the hole before the first block */ + block_len = first_block_offset - offset; + if (block_len > 0) + ext4_block_zero_page_range(handle, mapping, + offset, block_len); + + /* zero out the tail of the hole after the last block */ + block_len = offset + length - last_block_offset; + if (block_len > 0) { + ext4_block_zero_page_range(handle, mapping, + last_block_offset, block_len); + } + } + + /* If there are no blocks to remove, return now */ + if (first_block >= last_block) + goto out; + + down_write(&EXT4_I(inode)->i_data_sem); + ext4_ext_invalidate_cache(inode); + ext4_discard_preallocations(inode); + + /* + * Loop over all the blocks and identify blocks + * that need to be punched out + */ + iblock = first_block; + blocks_released = 0; + while (iblock < last_block) { + max_blocks = last_block - iblock; + num_blocks = 1; + memset(&map, 0, sizeof(map)); + map.m_lblk = iblock; + map.m_len = max_blocks; + ret = ext4_ext_map_blocks(handle, inode, &map, + EXT4_GET_BLOCKS_PUNCH_OUT_EXT); + + if (ret > 0) { + blocks_released += ret; + num_blocks = ret; + } else if (ret == 0) { + /* + * If map blocks could not find the block, + * then it is in a hole. If the hole was + * not already cached, then map blocks should + * put it in the cache. So we can get the hole + * out of the cache + */ + memset(&cache_ex, 0, sizeof(cache_ex)); + if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) && + !cache_ex.ec_start) { + + /* The hole is cached */ + num_blocks = cache_ex.ec_block + + cache_ex.ec_len - iblock; + + } else { + /* The block could not be identified */ + err = -EIO; + break; + } + } else { + /* Map blocks error */ + err = ret; + break; + } + + if (num_blocks == 0) { + /* This condition should never happen */ + ext_debug("Block lookup failed"); + err = -EIO; + break; + } + + iblock += num_blocks; + } + + if (blocks_released > 0) { + ext4_ext_invalidate_cache(inode); + ext4_discard_preallocations(inode); + } + + if (IS_SYNC(inode)) + ext4_handle_sync(handle); + + up_write(&EXT4_I(inode)->i_data_sem); + +out: + ext4_orphan_del(handle, inode); + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + return err; +} int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { -- cgit v1.2.3 From 556b27abf73833923d5cd4be80006292e1b31662 Mon Sep 17 00:00:00 2001 From: Vivek Haldar Date: Wed, 25 May 2011 07:41:54 -0400 Subject: ext4: do not normalize block requests from fallocate() Currently, an fallocate request of size slightly larger than a power of 2 is turned into two block requests, each a power of 2, with the extra blocks pre-allocated for future use. When an application calls fallocate, it already has an idea about how large the file may grow so there is usually little benefit to reserve extra blocks on the preallocation list. This reduces disk fragmentation. Tested: fsstress. Also verified manually that fallocat'ed files are contiguously laid out with this change (whereas without it they begin at power-of-2 boundaries, leaving blocks in between). CPU usage of fallocate is not appreciably higher. In a tight fallocate loop, CPU usage hovers between 5%-8% with this change, and 5%-7% without it. Using a simulated file system aging program which the file system to 70%, the percentage of free extents larger than 8MB (as measured by e2freefrag) increased from 38.8% without this change, to 69.4% with this change. Signed-off-by: Vivek Haldar Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 88ff3a74787b..ae65f247ceda 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3554,6 +3554,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, else /* disable in-core preallocation for non-regular files */ ar.flags = 0; + if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) + ar.flags |= EXT4_MB_HINT_NOPREALLOC; newblock = ext4_mb_new_blocks(handle, &ar, &err); if (!newblock) goto out2; @@ -3807,7 +3809,8 @@ retry: break; } ret = ext4_map_blocks(handle, inode, &map, - EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); + EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | + EXT4_GET_BLOCKS_NO_NORMALIZE); if (ret <= 0) { #ifdef EXT4FS_DEBUG WARN_ON(ret <= 0); -- cgit v1.2.3 From 1b16da77f90328661fc7e556ad591f9ee6b7ef6a Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 25 May 2011 17:41:48 -0400 Subject: ext4: teach ext4_ext_split to calculate extents efficiently Make ext4_ext_split() get extents to be moved by calculating in a statement instead of counting in a loop. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 84 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 46 insertions(+), 38 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ae65f247ceda..5199bac7fc62 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -482,9 +482,43 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) } ext_debug("\n"); } + +static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, + ext4_fsblk_t newblock, int level) +{ + int depth = ext_depth(inode); + struct ext4_extent *ex; + + if (depth != level) { + struct ext4_extent_idx *idx; + idx = path[level].p_idx; + while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { + ext_debug("%d: move %d:%llu in new index %llu\n", level, + le32_to_cpu(idx->ei_block), + ext4_idx_pblock(idx), + newblock); + idx++; + } + + return; + } + + ex = path[depth].p_ext; + while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { + ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", + le32_to_cpu(ex->ee_block), + ext4_ext_pblock(ex), + ext4_ext_is_uninitialized(ex), + ext4_ext_get_actual_len(ex), + newblock); + ex++; + } +} + #else #define ext4_ext_show_path(inode, path) #define ext4_ext_show_leaf(inode, path) +#define ext4_ext_show_move(inode, path, newblock, level) #endif void ext4_ext_drop_refs(struct ext4_ext_path *path) @@ -808,7 +842,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, int depth = ext_depth(inode); struct ext4_extent_header *neh; struct ext4_extent_idx *fidx; - struct ext4_extent *ex; int i = at, k, m, a; ext4_fsblk_t newblock, oldblock; __le32 border; @@ -885,7 +918,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_depth = 0; - ex = EXT_FIRST_EXTENT(neh); /* move remainder of path[depth] to the new leaf */ if (unlikely(path[depth].p_hdr->eh_entries != @@ -897,25 +929,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, goto cleanup; } /* start copy from next extent */ - /* TODO: we could do it by single memmove */ - m = 0; - path[depth].p_ext++; - while (path[depth].p_ext <= - EXT_MAX_EXTENT(path[depth].p_hdr)) { - ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", - le32_to_cpu(path[depth].p_ext->ee_block), - ext4_ext_pblock(path[depth].p_ext), - ext4_ext_is_uninitialized(path[depth].p_ext), - ext4_ext_get_actual_len(path[depth].p_ext), - newblock); - /*memmove(ex++, path[depth].p_ext++, - sizeof(struct ext4_extent)); - neh->eh_entries++;*/ - path[depth].p_ext++; - m++; - } + m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++; + ext4_ext_show_move(inode, path, newblock, depth); if (m) { - memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); + struct ext4_extent *ex; + ex = EXT_FIRST_EXTENT(neh); + memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m); le16_add_cpu(&neh->eh_entries, m); } @@ -977,12 +996,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext_debug("int.index at %d (block %llu): %u -> %llu\n", i, newblock, le32_to_cpu(border), oldblock); - /* copy indexes */ - m = 0; - path[i].p_idx++; - ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, - EXT_MAX_INDEX(path[i].p_hdr)); + /* move remainder of path[i] to the new index block */ if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != EXT_LAST_INDEX(path[i].p_hdr))) { EXT4_ERROR_INODE(inode, @@ -991,20 +1006,13 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = -EIO; goto cleanup; } - while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { - ext_debug("%d: move %d:%llu in new index %llu\n", i, - le32_to_cpu(path[i].p_idx->ei_block), - ext4_idx_pblock(path[i].p_idx), - newblock); - /*memmove(++fidx, path[i].p_idx++, - sizeof(struct ext4_extent_idx)); - neh->eh_entries++; - BUG_ON(neh->eh_entries > neh->eh_max);*/ - path[i].p_idx++; - m++; - } + /* start copy indexes */ + m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; + ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, + EXT_MAX_INDEX(path[i].p_hdr)); + ext4_ext_show_move(inode, path, newblock, i); if (m) { - memmove(++fidx, path[i].p_idx - m, + memmove(++fidx, path[i].p_idx, sizeof(struct ext4_extent_idx) * m); le16_add_cpu(&neh->eh_entries, m); } -- cgit v1.2.3