summaryrefslogtreecommitdiff
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c626
1 files changed, 406 insertions, 220 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74292a71b384..0b16fb4c06d3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -73,8 +73,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode,
{
struct ext4_extent_tail *et;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return 1;
et = find_ext4_extent_tail(eh);
@@ -88,8 +87,7 @@ static void ext4_extent_block_csum_set(struct inode *inode,
{
struct ext4_extent_tail *et;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return;
et = find_ext4_extent_tail(eh);
@@ -98,14 +96,14 @@ static void ext4_extent_block_csum_set(struct inode *inode,
static int ext4_split_extent(handle_t *handle,
struct inode *inode,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
struct ext4_map_blocks *map,
int split_flag,
int flags);
static int ext4_split_extent_at(handle_t *handle,
struct inode *inode,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
ext4_lblk_t split,
int split_flag,
int flags);
@@ -291,6 +289,20 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
return size;
}
+static inline int
+ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path **ppath, ext4_lblk_t lblk,
+ int nofail)
+{
+ struct ext4_ext_path *path = *ppath;
+ int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
+
+ return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
+ EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
+ EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
+ (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
+}
+
/*
* Calculate the number of metadata blocks needed
* to allocate @blocks
@@ -695,9 +707,11 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
void ext4_ext_drop_refs(struct ext4_ext_path *path)
{
- int depth = path->p_depth;
- int i;
+ int depth, i;
+ if (!path)
+ return;
+ depth = path->p_depth;
for (i = 0; i <= depth; i++, path++)
if (path->p_bh) {
brelse(path->p_bh);
@@ -841,24 +855,32 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
}
struct ext4_ext_path *
-ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
- struct ext4_ext_path *path, int flags)
+ext4_find_extent(struct inode *inode, ext4_lblk_t block,
+ struct ext4_ext_path **orig_path, int flags)
{
struct ext4_extent_header *eh;
struct buffer_head *bh;
- short int depth, i, ppos = 0, alloc = 0;
+ struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
+ short int depth, i, ppos = 0;
int ret;
eh = ext_inode_hdr(inode);
depth = ext_depth(inode);
- /* account possible depth increase */
+ if (path) {
+ ext4_ext_drop_refs(path);
+ if (depth > path[0].p_maxdepth) {
+ kfree(path);
+ *orig_path = path = NULL;
+ }
+ }
if (!path) {
+ /* account possible depth increase */
path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
GFP_NOFS);
- if (!path)
+ if (unlikely(!path))
return ERR_PTR(-ENOMEM);
- alloc = 1;
+ path[0].p_maxdepth = depth + 1;
}
path[0].p_hdr = eh;
path[0].p_bh = NULL;
@@ -876,7 +898,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
flags);
- if (IS_ERR(bh)) {
+ if (unlikely(IS_ERR(bh))) {
ret = PTR_ERR(bh);
goto err;
}
@@ -910,8 +932,9 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
err:
ext4_ext_drop_refs(path);
- if (alloc)
- kfree(path);
+ kfree(path);
+ if (orig_path)
+ *orig_path = NULL;
return ERR_PTR(ret);
}
@@ -1238,16 +1261,24 @@ cleanup:
* just created block
*/
static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
- unsigned int flags,
- struct ext4_extent *newext)
+ unsigned int flags)
{
struct ext4_extent_header *neh;
struct buffer_head *bh;
- ext4_fsblk_t newblock;
+ ext4_fsblk_t newblock, goal = 0;
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
int err = 0;
- newblock = ext4_ext_new_meta_block(handle, inode, NULL,
- newext, &err, flags);
+ /* Try to prepend new index to old one */
+ if (ext_depth(inode))
+ goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
+ if (goal > le32_to_cpu(es->s_first_data_block)) {
+ flags |= EXT4_MB_HINT_TRY_GOAL;
+ goal--;
+ } else
+ goal = ext4_inode_to_goal_block(inode);
+ newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
+ NULL, &err);
if (newblock == 0)
return err;
@@ -1314,9 +1345,10 @@ out:
static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
unsigned int mb_flags,
unsigned int gb_flags,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
struct ext4_extent *newext)
{
+ struct ext4_ext_path *path = *ppath;
struct ext4_ext_path *curp;
int depth, i, err = 0;
@@ -1340,23 +1372,21 @@ repeat:
goto out;
/* refill path */
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode,
+ path = ext4_find_extent(inode,
(ext4_lblk_t)le32_to_cpu(newext->ee_block),
- path, gb_flags);
+ ppath, gb_flags);
if (IS_ERR(path))
err = PTR_ERR(path);
} else {
/* tree is full, time to grow in depth */
- err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext);
+ err = ext4_ext_grow_indepth(handle, inode, mb_flags);
if (err)
goto out;
/* refill path */
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode,
+ path = ext4_find_extent(inode,
(ext4_lblk_t)le32_to_cpu(newext->ee_block),
- path, gb_flags);
+ ppath, gb_flags);
if (IS_ERR(path)) {
err = PTR_ERR(path);
goto out;
@@ -1559,7 +1589,7 @@ found_extent:
* allocated block. Thus, index entries have to be consistent
* with leaves.
*/
-static ext4_lblk_t
+ext4_lblk_t
ext4_ext_next_allocated_block(struct ext4_ext_path *path)
{
int depth;
@@ -1802,6 +1832,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
sizeof(struct ext4_extent_idx);
s += sizeof(struct ext4_extent_header);
+ path[1].p_maxdepth = path[0].p_maxdepth;
memcpy(path[0].p_hdr, path[1].p_hdr, s);
path[0].p_depth = 0;
path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
@@ -1896,9 +1927,10 @@ out:
* creating new leaf in the no-space case.
*/
int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
struct ext4_extent *newext, int gb_flags)
{
+ struct ext4_ext_path *path = *ppath;
struct ext4_extent_header *eh;
struct ext4_extent *ex, *fex;
struct ext4_extent *nearex; /* nearest extent */
@@ -1907,6 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
ext4_lblk_t next;
int mb_flags = 0, unwritten;
+ if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ mb_flags |= EXT4_MB_DELALLOC_RESERVED;
if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
return -EIO;
@@ -1925,7 +1959,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
/*
* Try to see whether we should rather test the extent on
* right from ex, or from the left of ex. This is because
- * ext4_ext_find_extent() can return either extent on the
+ * ext4_find_extent() can return either extent on the
* left, or on the right from the searched position. This
* will make merging more effective.
*/
@@ -2008,7 +2042,7 @@ prepend:
if (next != EXT_MAX_BLOCKS) {
ext_debug("next leaf block - %u\n", next);
BUG_ON(npath != NULL);
- npath = ext4_ext_find_extent(inode, next, NULL, 0);
+ npath = ext4_find_extent(inode, next, NULL, 0);
if (IS_ERR(npath))
return PTR_ERR(npath);
BUG_ON(npath->p_depth != path->p_depth);
@@ -2028,9 +2062,9 @@ prepend:
* We're gonna add a new leaf in the tree.
*/
if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
- mb_flags = EXT4_MB_USE_RESERVED;
+ mb_flags |= EXT4_MB_USE_RESERVED;
err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
- path, newext);
+ ppath, newext);
if (err)
goto cleanup;
depth = ext_depth(inode);
@@ -2108,10 +2142,8 @@ merge:
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
cleanup:
- if (npath) {
- ext4_ext_drop_refs(npath);
- kfree(npath);
- }
+ ext4_ext_drop_refs(npath);
+ kfree(npath);
return err;
}
@@ -2133,13 +2165,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
/* find extent for this block */
down_read(&EXT4_I(inode)->i_data_sem);
- if (path && ext_depth(inode) != depth) {
- /* depth was changed. we have to realloc path */
- kfree(path);
- path = NULL;
- }
-
- path = ext4_ext_find_extent(inode, block, path, 0);
+ path = ext4_find_extent(inode, block, &path, 0);
if (IS_ERR(path)) {
up_read(&EXT4_I(inode)->i_data_sem);
err = PTR_ERR(path);
@@ -2156,7 +2182,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
}
ex = path[depth].p_ext;
next = ext4_ext_next_allocated_block(path);
- ext4_ext_drop_refs(path);
flags = 0;
exists = 0;
@@ -2266,11 +2291,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
block = es.es_lblk + es.es_len;
}
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
-
+ ext4_ext_drop_refs(path);
+ kfree(path);
return err;
}
@@ -2826,7 +2848,7 @@ again:
ext4_lblk_t ee_block;
/* find extent for this block */
- path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
+ path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path)) {
ext4_journal_stop(handle);
return PTR_ERR(path);
@@ -2854,24 +2876,14 @@ again:
*/
if (end >= ee_block &&
end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
- int split_flag = 0;
-
- if (ext4_ext_is_unwritten(ex))
- split_flag = EXT4_EXT_MARK_UNWRIT1 |
- EXT4_EXT_MARK_UNWRIT2;
-
/*
* Split the extent in two so that 'end' is the last
* block in the first new extent. Also we should not
* fail removing space due to ENOSPC so try to use
* reserved block if that happens.
*/
- err = ext4_split_extent_at(handle, inode, path,
- end + 1, split_flag,
- EXT4_EX_NOCACHE |
- EXT4_GET_BLOCKS_PRE_IO |
- EXT4_GET_BLOCKS_METADATA_NOFAIL);
-
+ err = ext4_force_split_extent_at(handle, inode, &path,
+ end + 1, 1);
if (err < 0)
goto out;
}
@@ -2893,7 +2905,7 @@ again:
ext4_journal_stop(handle);
return -ENOMEM;
}
- path[0].p_depth = depth;
+ path[0].p_maxdepth = path[0].p_depth = depth;
path[0].p_hdr = ext_inode_hdr(inode);
i = 0;
@@ -3013,10 +3025,9 @@ again:
out:
ext4_ext_drop_refs(path);
kfree(path);
- if (err == -EAGAIN) {
- path = NULL;
+ path = NULL;
+ if (err == -EAGAIN)
goto again;
- }
ext4_journal_stop(handle);
return err;
@@ -3130,11 +3141,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
*/
static int ext4_split_extent_at(handle_t *handle,
struct inode *inode,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
ext4_lblk_t split,
int split_flag,
int flags)
{
+ struct ext4_ext_path *path = *ppath;
ext4_fsblk_t newblock;
ext4_lblk_t ee_block;
struct ext4_extent *ex, newex, orig_ex, zero_ex;
@@ -3205,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle,
if (split_flag & EXT4_EXT_MARK_UNWRIT2)
ext4_ext_mark_unwritten(ex2);
- err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+ err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
if (split_flag & EXT4_EXT_DATA_VALID1) {
@@ -3271,11 +3283,12 @@ fix_extent_len:
*/
static int ext4_split_extent(handle_t *handle,
struct inode *inode,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
struct ext4_map_blocks *map,
int split_flag,
int flags)
{
+ struct ext4_ext_path *path = *ppath;
ext4_lblk_t ee_block;
struct ext4_extent *ex;
unsigned int ee_len, depth;
@@ -3298,7 +3311,7 @@ static int ext4_split_extent(handle_t *handle,
EXT4_EXT_MARK_UNWRIT2;
if (split_flag & EXT4_EXT_DATA_VALID2)
split_flag1 |= EXT4_EXT_DATA_VALID1;
- err = ext4_split_extent_at(handle, inode, path,
+ err = ext4_split_extent_at(handle, inode, ppath,
map->m_lblk + map->m_len, split_flag1, flags1);
if (err)
goto out;
@@ -3309,8 +3322,7 @@ static int ext4_split_extent(handle_t *handle,
* Update path is required because previous ext4_split_extent_at() may
* result in split of original leaf or extent zeroout.
*/
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
+ path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = ext_depth(inode);
@@ -3330,7 +3342,7 @@ static int ext4_split_extent(handle_t *handle,
split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
EXT4_EXT_MARK_UNWRIT2);
}
- err = ext4_split_extent_at(handle, inode, path,
+ err = ext4_split_extent_at(handle, inode, ppath,
map->m_lblk, split_flag1, flags);
if (err)
goto out;
@@ -3364,9 +3376,10 @@ out:
static int ext4_ext_convert_to_initialized(handle_t *handle,
struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
int flags)
{
+ struct ext4_ext_path *path = *ppath;
struct ext4_sb_info *sbi;
struct ext4_extent_header *eh;
struct ext4_map_blocks split_map;
@@ -3590,11 +3603,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
}
}
- allocated = ext4_split_extent(handle, inode, path,
- &split_map, split_flag, flags);
- if (allocated < 0)
- err = allocated;
-
+ err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
+ flags);
+ if (err > 0)
+ err = 0;
out:
/* If we have gotten a failure, don't zero out status tree */
if (!err)
@@ -3629,9 +3641,10 @@ out:
static int ext4_split_convert_extents(handle_t *handle,
struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path *path,
+ struct ext4_ext_path **ppath,
int flags)
{
+ struct ext4_ext_path *path = *ppath;
ext4_lblk_t eof_block;
ext4_lblk_t ee_block;
struct ext4_extent *ex;
@@ -3665,74 +3678,15 @@ static int ext4_split_convert_extents(handle_t *handle,
split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
}
flags |= EXT4_GET_BLOCKS_PRE_IO;
- return ext4_split_extent(handle, inode, path, map, split_flag, flags);
+ return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
}
-static int ext4_convert_initialized_extents(handle_t *handle,
- struct inode *inode,
- struct ext4_map_blocks *map,
- struct ext4_ext_path *path)
-{
- struct ext4_extent *ex;
- ext4_lblk_t ee_block;
- unsigned int ee_len;
- int depth;
- int err = 0;
-
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
-
- ext_debug("%s: inode %lu, logical"
- "block %llu, max_blocks %u\n", __func__, inode->i_ino,
- (unsigned long long)ee_block, ee_len);
-
- if (ee_block != map->m_lblk || ee_len > map->m_len) {
- err = ext4_split_convert_extents(handle, inode, map, path,
- EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
- if (err < 0)
- goto out;
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out;
- }
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- if (!ex) {
- EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
- (unsigned long) map->m_lblk);
- err = -EIO;
- goto out;
- }
- }
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
- /* first mark the extent as unwritten */
- ext4_ext_mark_unwritten(ex);
-
- /* note: ext4_ext_correct_indexes() isn't needed here because
- * borders are not changed
- */
- ext4_ext_try_to_merge(handle, inode, path, ex);
-
- /* Mark modified extent as dirty */
- err = ext4_ext_dirty(handle, inode, path + path->p_depth);
-out:
- ext4_ext_show_leaf(inode, path);
- return err;
-}
-
-
static int ext4_convert_unwritten_extents_endio(handle_t *handle,
struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path *path)
+ struct ext4_ext_path **ppath)
{
+ struct ext4_ext_path *path = *ppath;
struct ext4_extent *ex;
ext4_lblk_t ee_block;
unsigned int ee_len;
@@ -3761,16 +3715,13 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
inode->i_ino, (unsigned long long)ee_block, ee_len,
(unsigned long long)map->m_lblk, map->m_len);
#endif
- err = ext4_split_convert_extents(handle, inode, map, path,
+ err = ext4_split_convert_extents(handle, inode, map, ppath,
EXT4_GET_BLOCKS_CONVERT);
if (err < 0)
- goto out;
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out;
- }
+ return err;
+ path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
depth = ext_depth(inode);
ex = path[depth].p_ext;
}
@@ -3963,12 +3914,16 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
}
static int
-ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
- struct ext4_map_blocks *map,
- struct ext4_ext_path *path, int flags,
- unsigned int allocated, ext4_fsblk_t newblock)
+convert_initialized_extent(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path **ppath, int flags,
+ unsigned int allocated, ext4_fsblk_t newblock)
{
- int ret = 0;
+ struct ext4_ext_path *path = *ppath;
+ struct ext4_extent *ex;
+ ext4_lblk_t ee_block;
+ unsigned int ee_len;
+ int depth;
int err = 0;
/*
@@ -3978,28 +3933,67 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
- ret = ext4_convert_initialized_extents(handle, inode, map,
- path);
- if (ret >= 0) {
- ext4_update_inode_fsync_trans(handle, inode, 1);
- err = check_eofblocks_fl(handle, inode, map->m_lblk,
- path, map->m_len);
- } else
- err = ret;
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
+
+ ext_debug("%s: inode %lu, logical"
+ "block %llu, max_blocks %u\n", __func__, inode->i_ino,
+ (unsigned long long)ee_block, ee_len);
+
+ if (ee_block != map->m_lblk || ee_len > map->m_len) {
+ err = ext4_split_convert_extents(handle, inode, map, ppath,
+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
+ if (err < 0)
+ return err;
+ path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ if (!ex) {
+ EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+ (unsigned long) map->m_lblk);
+ return -EIO;
+ }
+ }
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ return err;
+ /* first mark the extent as unwritten */
+ ext4_ext_mark_unwritten(ex);
+
+ /* note: ext4_ext_correct_indexes() isn't needed here because
+ * borders are not changed
+ */
+ ext4_ext_try_to_merge(handle, inode, path, ex);
+
+ /* Mark modified extent as dirty */
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+ if (err)
+ return err;
+ ext4_ext_show_leaf(inode, path);
+
+ ext4_update_inode_fsync_trans(handle, inode, 1);
+ err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len);
+ if (err)
+ return err;
map->m_flags |= EXT4_MAP_UNWRITTEN;
if (allocated > map->m_len)
allocated = map->m_len;
map->m_len = allocated;
-
- return err ? err : allocated;
+ return allocated;
}
static int
ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path *path, int flags,
+ struct ext4_ext_path **ppath, int flags,
unsigned int allocated, ext4_fsblk_t newblock)
{
+ struct ext4_ext_path *path = *ppath;
int ret = 0;
int err = 0;
ext4_io_end_t *io = ext4_inode_aio(inode);
@@ -4021,8 +4015,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
/* get_block() before submit the IO, split the extent */
if (flags & EXT4_GET_BLOCKS_PRE_IO) {
- ret = ext4_split_convert_extents(handle, inode, map,
- path, flags | EXT4_GET_BLOCKS_CONVERT);
+ ret = ext4_split_convert_extents(handle, inode, map, ppath,
+ flags | EXT4_GET_BLOCKS_CONVERT);
if (ret <= 0)
goto out;
/*
@@ -4040,7 +4034,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
/* IO end_io complete, convert the filled extent to written */
if (flags & EXT4_GET_BLOCKS_CONVERT) {
ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
- path);
+ ppath);
if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
err = check_eofblocks_fl(handle, inode, map->m_lblk,
@@ -4078,7 +4072,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
}
/* buffered write, writepage time, convert*/
- ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags);
+ ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
if (ret >= 0)
ext4_update_inode_fsync_trans(handle, inode, 1);
out:
@@ -4279,7 +4273,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
/* find extent for this block */
- path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0);
+ path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
if (IS_ERR(path)) {
err = PTR_ERR(path);
path = NULL;
@@ -4291,7 +4285,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
/*
* consistent leaf must not be empty;
* this situation is possible, though, _during_ tree modification;
- * this is why assert can't be put in ext4_ext_find_extent()
+ * this is why assert can't be put in ext4_find_extent()
*/
if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
EXT4_ERROR_INODE(inode, "bad extent address "
@@ -4331,15 +4325,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
if ((!ext4_ext_is_unwritten(ex)) &&
(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
- allocated = ext4_ext_convert_initialized_extent(
- handle, inode, map, path, flags,
- allocated, newblock);
+ allocated = convert_initialized_extent(
+ handle, inode, map, &path,
+ flags, allocated, newblock);
goto out2;
} else if (!ext4_ext_is_unwritten(ex))
goto out;
ret = ext4_ext_handle_unwritten_extents(
- handle, inode, map, path, flags,
+ handle, inode, map, &path, flags,
allocated, newblock);
if (ret < 0)
err = ret;
@@ -4376,7 +4370,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
/*
* If we are doing bigalloc, check to see if the extent returned
- * by ext4_ext_find_extent() implies a cluster we can use.
+ * by ext4_find_extent() implies a cluster we can use.
*/
if (cluster_offset && ex &&
get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
@@ -4451,6 +4445,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ar.flags = 0;
if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
ar.flags |= EXT4_MB_HINT_NOPREALLOC;
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ar.flags |= EXT4_MB_DELALLOC_RESERVED;
newblock = ext4_mb_new_blocks(handle, &ar, &err);
if (!newblock)
goto out2;
@@ -4486,7 +4482,7 @@ got_allocated_blocks:
err = check_eofblocks_fl(handle, inode, map->m_lblk,
path, ar.len);
if (!err)
- err = ext4_ext_insert_extent(handle, inode, path,
+ err = ext4_ext_insert_extent(handle, inode, &path,
&newex, flags);
if (!err && set_unwritten) {
@@ -4619,10 +4615,8 @@ out:
map->m_pblk = newblock;
map->m_len = allocated;
out2:
- if (path) {
- ext4_ext_drop_refs(path);
- kfree(path);
- }
+ ext4_ext_drop_refs(path);
+ kfree(path);
trace_ext4_ext_map_blocks_exit(inode, flags, map,
err ? err : allocated);
@@ -4799,7 +4793,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
max_blocks -= lblk;
flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
- EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;
+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
+ EXT4_EX_NOCACHE;
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
@@ -4837,15 +4832,21 @@ static long ext4_zero_range(struct file *file, loff_t offset,
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+ flags, mode);
+ if (ret)
+ goto out_dio;
/*
* Remove entire range from the extent status tree.
+ *
+ * ext4_es_remove_extent(inode, lblk, max_blocks) is
+ * NOT sufficient. I'm not sure why this is the case,
+ * but let's be conservative and remove the extent
+ * status tree for the entire inode. There should be
+ * no outstanding delalloc extents thanks to the
+ * filemap_write_and_wait_range() call above.
*/
- ret = ext4_es_remove_extent(inode, lblk, max_blocks);
- if (ret)
- goto out_dio;
-
- ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
- flags, mode);
+ ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
if (ret)
goto out_dio;
}
@@ -5304,36 +5305,31 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
struct ext4_ext_path *path;
int ret = 0, depth;
struct ext4_extent *extent;
- ext4_lblk_t stop_block, current_block;
+ ext4_lblk_t stop_block;
ext4_lblk_t ex_start, ex_end;
/* Let path point to the last extent */
- path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
extent = path[depth].p_ext;
- if (!extent) {
- ext4_ext_drop_refs(path);
- kfree(path);
- return ret;
- }
+ if (!extent)
+ goto out;
stop_block = le32_to_cpu(extent->ee_block) +
ext4_ext_get_actual_len(extent);
- ext4_ext_drop_refs(path);
- kfree(path);
/* Nothing to shift, if hole is at the end of file */
if (start >= stop_block)
- return ret;
+ goto out;
/*
* Don't start shifting extents until we make sure the hole is big
* enough to accomodate the shift.
*/
- path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
+ path = ext4_find_extent(inode, start - 1, &path, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5346,8 +5342,6 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ex_start = 0;
ex_end = 0;
}
- ext4_ext_drop_refs(path);
- kfree(path);
if ((start == ex_start && shift > ex_start) ||
(shift > start - ex_end))
@@ -5355,7 +5349,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
/* Its safe to start updating extents */
while (start < stop_block) {
- path = ext4_ext_find_extent(inode, start, NULL, 0);
+ path = ext4_find_extent(inode, start, &path, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5365,27 +5359,23 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
(unsigned long) start);
return -EIO;
}
-
- current_block = le32_to_cpu(extent->ee_block);
- if (start > current_block) {
+ if (start > le32_to_cpu(extent->ee_block)) {
/* Hole, move to the next extent */
- ret = mext_next_extent(inode, path, &extent);
- if (ret != 0) {
- ext4_ext_drop_refs(path);
- kfree(path);
- if (ret == 1)
- ret = 0;
- break;
+ if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
+ path[depth].p_ext++;
+ } else {
+ start = ext4_ext_next_allocated_block(path);
+ continue;
}
}
ret = ext4_ext_shift_path_extents(path, shift, inode,
handle, &start);
- ext4_ext_drop_refs(path);
- kfree(path);
if (ret)
break;
}
-
+out:
+ ext4_ext_drop_refs(path);
+ kfree(path);
return ret;
}
@@ -5508,3 +5498,199 @@ out_mutex:
mutex_unlock(&inode->i_mutex);
return ret;
}
+
+/**
+ * ext4_swap_extents - Swap extents between two inodes
+ *
+ * @inode1: First inode
+ * @inode2: Second inode
+ * @lblk1: Start block for first inode
+ * @lblk2: Start block for second inode
+ * @count: Number of blocks to swap
+ * @mark_unwritten: Mark second inode's extents as unwritten after swap
+ * @erp: Pointer to save error value
+ *
+ * This helper routine does exactly what is promise "swap extents". All other
+ * stuff such as page-cache locking consistency, bh mapping consistency or
+ * extent's data copying must be performed by caller.
+ * Locking:
+ * i_mutex is held for both inodes
+ * i_data_sem is locked for write for both inodes
+ * Assumptions:
+ * All pages from requested range are locked for both inodes
+ */
+int
+ext4_swap_extents(handle_t *handle, struct inode *inode1,
+ struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
+ ext4_lblk_t count, int unwritten, int *erp)
+{
+ struct ext4_ext_path *path1 = NULL;
+ struct ext4_ext_path *path2 = NULL;
+ int replaced_count = 0;
+
+ BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
+ BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
+ BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+ BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+
+ *erp = ext4_es_remove_extent(inode1, lblk1, count);
+ if (unlikely(*erp))
+ return 0;
+ *erp = ext4_es_remove_extent(inode2, lblk2, count);
+ if (unlikely(*erp))
+ return 0;
+
+ while (count) {
+ struct ext4_extent *ex1, *ex2, tmp_ex;
+ ext4_lblk_t e1_blk, e2_blk;
+ int e1_len, e2_len, len;
+ int split = 0;
+
+ path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
+ if (unlikely(IS_ERR(path1))) {
+ *erp = PTR_ERR(path1);
+ path1 = NULL;
+ finish:
+ count = 0;
+ goto repeat;
+ }
+ path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
+ if (unlikely(IS_ERR(path2))) {
+ *erp = PTR_ERR(path2);
+ path2 = NULL;
+ goto finish;
+ }
+ ex1 = path1[path1->p_depth].p_ext;
+ ex2 = path2[path2->p_depth].p_ext;
+ /* Do we have somthing to swap ? */
+ if (unlikely(!ex2 || !ex1))
+ goto finish;
+
+ e1_blk = le32_to_cpu(ex1->ee_block);
+ e2_blk = le32_to_cpu(ex2->ee_block);
+ e1_len = ext4_ext_get_actual_len(ex1);
+ e2_len = ext4_ext_get_actual_len(ex2);
+
+ /* Hole handling */
+ if (!in_range(lblk1, e1_blk, e1_len) ||
+ !in_range(lblk2, e2_blk, e2_len)) {
+ ext4_lblk_t next1, next2;
+
+ /* if hole after extent, then go to next extent */
+ next1 = ext4_ext_next_allocated_block(path1);
+ next2 = ext4_ext_next_allocated_block(path2);
+ /* If hole before extent, then shift to that extent */
+ if (e1_blk > lblk1)
+ next1 = e1_blk;
+ if (e2_blk > lblk2)
+ next2 = e1_blk;
+ /* Do we have something to swap */
+ if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
+ goto finish;
+ /* Move to the rightest boundary */
+ len = next1 - lblk1;
+ if (len < next2 - lblk2)
+ len = next2 - lblk2;
+ if (len > count)
+ len = count;
+ lblk1 += len;
+ lblk2 += len;
+ count -= len;
+ goto repeat;
+ }
+
+ /* Prepare left boundary */
+ if (e1_blk < lblk1) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode1,
+ &path1, lblk1, 0);
+ if (unlikely(*erp))
+ goto finish;
+ }
+ if (e2_blk < lblk2) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode2,
+ &path2, lblk2, 0);
+ if (unlikely(*erp))
+ goto finish;
+ }
+ /* ext4_split_extent_at() may result in leaf extent split,
+ * path must to be revalidated. */
+ if (split)
+ goto repeat;
+
+ /* Prepare right boundary */
+ len = count;
+ if (len > e1_blk + e1_len - lblk1)
+ len = e1_blk + e1_len - lblk1;
+ if (len > e2_blk + e2_len - lblk2)
+ len = e2_blk + e2_len - lblk2;
+
+ if (len != e1_len) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode1,
+ &path1, lblk1 + len, 0);
+ if (unlikely(*erp))
+ goto finish;
+ }
+ if (len != e2_len) {
+ split = 1;
+ *erp = ext4_force_split_extent_at(handle, inode2,
+ &path2, lblk2 + len, 0);
+ if (*erp)
+ goto finish;
+ }
+ /* ext4_split_extent_at() may result in leaf extent split,
+ * path must to be revalidated. */
+ if (split)
+ goto repeat;
+
+ BUG_ON(e2_len != e1_len);
+ *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
+ if (unlikely(*erp))
+ goto finish;
+ *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
+ if (unlikely(*erp))
+ goto finish;
+
+ /* Both extents are fully inside boundaries. Swap it now */
+ tmp_ex = *ex1;
+ ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
+ ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
+ ex1->ee_len = cpu_to_le16(e2_len);
+ ex2->ee_len = cpu_to_le16(e1_len);
+ if (unwritten)
+ ext4_ext_mark_unwritten(ex2);
+ if (ext4_ext_is_unwritten(&tmp_ex))
+ ext4_ext_mark_unwritten(ex1);
+
+ ext4_ext_try_to_merge(handle, inode2, path2, ex2);
+ ext4_ext_try_to_merge(handle, inode1, path1, ex1);
+ *erp = ext4_ext_dirty(handle, inode2, path2 +
+ path2->p_depth);
+ if (unlikely(*erp))
+ goto finish;
+ *erp = ext4_ext_dirty(handle, inode1, path1 +
+ path1->p_depth);
+ /*
+ * Looks scarry ah..? second inode already points to new blocks,
+ * and it was successfully dirtied. But luckily error may happen
+ * only due to journal error, so full transaction will be
+ * aborted anyway.
+ */
+ if (unlikely(*erp))
+ goto finish;
+ lblk1 += len;
+ lblk2 += len;
+ replaced_count += len;
+ count -= len;
+
+ repeat:
+ ext4_ext_drop_refs(path1);
+ kfree(path1);
+ ext4_ext_drop_refs(path2);
+ kfree(path2);
+ path1 = path2 = NULL;
+ }
+ return replaced_count;
+}