diff options
| -rw-r--r-- | fs/btrfs/bio.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/bio.h | 3 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 6 | ||||
| -rw-r--r-- | fs/btrfs/relocation.c | 461 |
4 files changed, 467 insertions, 5 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index d46f39996469..d3475d179362 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -821,7 +821,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) */ if (!(inode->flags & BTRFS_INODE_NODATASUM) && !test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) && - !btrfs_is_data_reloc_root(inode->root)) { + !btrfs_is_data_reloc_root(inode->root) && !bbio->is_remap) { if (should_async_write(bbio) && btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num)) goto done; diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h index 157cdfa2f78a..303ed6c7103d 100644 --- a/fs/btrfs/bio.h +++ b/fs/btrfs/bio.h @@ -90,6 +90,9 @@ struct btrfs_bio { */ bool is_scrub:1; + /* Whether the bio is coming from copy_remapped_data_io(). */ + bool is_remap:1; + /* Whether the csum generation for data write is async. */ bool async_csum:1; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5e3e9f18b263..ebff087b4e89 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4557,7 +4557,8 @@ static noinline int find_free_extent(struct btrfs_root *root, block_group->cached != BTRFS_CACHE_NO) { down_read(&space_info->groups_sem); if (list_empty(&block_group->list) || - block_group->ro) { + block_group->ro || + (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED)) { /* * someone is removing this block group, * we can't jump into the have_block_group @@ -4591,7 +4592,8 @@ search: ffe_ctl->hinted = false; /* If the block group is read-only, we can skip it entirely. */ - if (unlikely(block_group->ro)) { + if (unlikely(block_group->ro || + (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED))) { if (ffe_ctl->for_treelog) btrfs_clear_treelog_bg(block_group); if (ffe_ctl->for_data_reloc) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4d3b3854ff7f..a1558ee92d29 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3966,6 +3966,457 @@ static void adjust_block_group_remap_bytes(struct btrfs_trans_handle *trans, btrfs_inc_delayed_refs_rsv_bg_updates(fs_info); } +/* Private structure for I/O from copy_remapped_data(). */ +struct reloc_io_private { + struct completion done; + refcount_t pending_refs; + blk_status_t status; +}; + +static void reloc_endio(struct btrfs_bio *bbio) +{ + struct reloc_io_private *priv = bbio->private; + + if (bbio->bio.bi_status) + WRITE_ONCE(priv->status, bbio->bio.bi_status); + + if (refcount_dec_and_test(&priv->pending_refs)) + complete(&priv->done); + + bio_put(&bbio->bio); +} + +static int copy_remapped_data_io(struct btrfs_fs_info *fs_info, + struct reloc_io_private *priv, + struct page **pages, u64 addr, u64 length, + blk_opf_t op) +{ + struct btrfs_bio *bbio; + int i; + + init_completion(&priv->done); + refcount_set(&priv->pending_refs, 1); + priv->status = 0; + + bbio = btrfs_bio_alloc(BIO_MAX_VECS, op, BTRFS_I(fs_info->btree_inode), + addr, reloc_endio, priv); + bbio->bio.bi_iter.bi_sector = (addr >> SECTOR_SHIFT); + bbio->is_remap = true; + + i = 0; + do { + size_t bytes = min_t(u64, length, PAGE_SIZE); + + if (bio_add_page(&bbio->bio, pages[i], bytes, 0) < bytes) { + refcount_inc(&priv->pending_refs); + btrfs_submit_bbio(bbio, 0); + + bbio = btrfs_bio_alloc(BIO_MAX_VECS, op, + BTRFS_I(fs_info->btree_inode), + addr, reloc_endio, priv); + bbio->bio.bi_iter.bi_sector = (addr >> SECTOR_SHIFT); + bbio->is_remap = true; + continue; + } + + i++; + addr += bytes; + length -= bytes; + } while (length); + + refcount_inc(&priv->pending_refs); + btrfs_submit_bbio(bbio, 0); + + if (!refcount_dec_and_test(&priv->pending_refs)) + wait_for_completion_io(&priv->done); + + return blk_status_to_errno(READ_ONCE(priv->status)); +} + +static int copy_remapped_data(struct btrfs_fs_info *fs_info, u64 old_addr, + u64 new_addr, u64 length) +{ + int ret; + u64 copy_len = min_t(u64, length, SZ_1M); + struct page **pages; + struct reloc_io_private priv; + unsigned int nr_pages = DIV_ROUND_UP(length, PAGE_SIZE); + + pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); + if (!pages) + return -ENOMEM; + + ret = btrfs_alloc_page_array(nr_pages, pages, 0); + if (ret) { + ret = -ENOMEM; + goto end; + } + + /* Copy 1MB at a time, to avoid using too much memory. */ + do { + u64 to_copy = min_t(u64, length, copy_len); + + /* Limit to one bio. */ + to_copy = min_t(u64, to_copy, BIO_MAX_VECS << PAGE_SHIFT); + + ret = copy_remapped_data_io(fs_info, &priv, pages, old_addr, + to_copy, REQ_OP_READ); + if (ret) + goto end; + + ret = copy_remapped_data_io(fs_info, &priv, pages, new_addr, + to_copy, REQ_OP_WRITE); + if (ret) + goto end; + + if (to_copy == length) + break; + + old_addr += to_copy; + new_addr += to_copy; + length -= to_copy; + } while (true); + + ret = 0; +end: + for (int i = 0; i < nr_pages; i++) { + if (pages[i]) + __free_page(pages[i]); + } + kfree(pages); + + return ret; +} + +static int add_remap_item(struct btrfs_trans_handle *trans, + struct btrfs_path *path, u64 new_addr, u64 length, + u64 old_addr) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_remap_item remap = { 0 }; + struct btrfs_key key; + struct extent_buffer *leaf; + int ret; + + key.objectid = old_addr; + key.type = BTRFS_REMAP_KEY; + key.offset = length; + + ret = btrfs_insert_empty_item(trans, fs_info->remap_root, path, + &key, sizeof(struct btrfs_remap_item)); + if (ret) + return ret; + + leaf = path->nodes[0]; + btrfs_set_stack_remap_address(&remap, new_addr); + write_extent_buffer(leaf, &remap, btrfs_item_ptr_offset(leaf, path->slots[0]), + sizeof(struct btrfs_remap_item)); + + btrfs_release_path(path); + + return 0; +} + +static int add_remap_backref_item(struct btrfs_trans_handle *trans, + struct btrfs_path *path, u64 new_addr, + u64 length, u64 old_addr) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_remap_item remap = { 0 }; + struct btrfs_key key; + struct extent_buffer *leaf; + int ret; + + key.objectid = new_addr; + key.type = BTRFS_REMAP_BACKREF_KEY; + key.offset = length; + + ret = btrfs_insert_empty_item(trans, fs_info->remap_root, path, &key, + sizeof(struct btrfs_remap_item)); + if (ret) + return ret; + + leaf = path->nodes[0]; + btrfs_set_stack_remap_address(&remap, old_addr); + write_extent_buffer(leaf, &remap, btrfs_item_ptr_offset(leaf, path->slots[0]), + sizeof(struct btrfs_remap_item)); + + btrfs_release_path(path); + + return 0; +} + +static int move_existing_remap(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, + struct btrfs_block_group *bg, u64 new_addr, + u64 length, u64 old_addr) +{ + struct btrfs_trans_handle *trans; + struct extent_buffer *leaf; + struct btrfs_remap_item *remap_ptr; + struct btrfs_remap_item remap = { 0 }; + struct btrfs_key key, ins; + u64 dest_addr, dest_length, min_size; + struct btrfs_block_group *dest_bg; + int ret; + const bool is_data = (bg->flags & BTRFS_BLOCK_GROUP_DATA); + struct btrfs_space_info *sinfo = bg->space_info; + bool mutex_taken = false; + bool bg_needs_free_space; + + spin_lock(&sinfo->lock); + btrfs_space_info_update_bytes_may_use(sinfo, length); + spin_unlock(&sinfo->lock); + + if (is_data) + min_size = fs_info->sectorsize; + else + min_size = fs_info->nodesize; + + ret = btrfs_reserve_extent(fs_info->fs_root, length, length, min_size, + 0, 0, &ins, is_data, false); + if (unlikely(ret)) { + spin_lock(&sinfo->lock); + btrfs_space_info_update_bytes_may_use(sinfo, -length); + spin_unlock(&sinfo->lock); + return ret; + } + + dest_addr = ins.objectid; + dest_length = ins.offset; + + if (!is_data && !IS_ALIGNED(dest_length, fs_info->nodesize)) { + u64 new_length = ALIGN_DOWN(dest_length, fs_info->nodesize); + + btrfs_free_reserved_extent(fs_info, dest_addr + new_length, + dest_length - new_length, 0); + + dest_length = new_length; + } + + trans = btrfs_join_transaction(fs_info->remap_root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto end; + } + + mutex_lock(&fs_info->remap_mutex); + mutex_taken = true; + + /* Find old remap entry. */ + key.objectid = old_addr; + key.type = BTRFS_REMAP_KEY; + key.offset = length; + + ret = btrfs_search_slot(trans, fs_info->remap_root, &key, path, 0, 1); + if (ret == 1) { + /* + * Not a problem if the remap entry wasn't found: that means + * that another transaction has deallocated the data. + * move_existing_remaps() loops until the BG contains no + * remaps, so we can just return 0 in this case. + */ + btrfs_release_path(path); + ret = 0; + goto end; + } else if (unlikely(ret)) { + goto end; + } + + ret = copy_remapped_data(fs_info, new_addr, dest_addr, dest_length); + if (unlikely(ret)) + goto end; + + /* Change data of old remap entry. */ + leaf = path->nodes[0]; + remap_ptr = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_remap_item); + btrfs_set_remap_address(leaf, remap_ptr, dest_addr); + btrfs_mark_buffer_dirty(trans, leaf); + + if (dest_length != length) { + key.offset = dest_length; + btrfs_set_item_key_safe(trans, path, &key); + } + + btrfs_release_path(path); + + if (dest_length != length) { + /* Add remap item for remainder. */ + ret = add_remap_item(trans, path, new_addr + dest_length, + length - dest_length, old_addr + dest_length); + if (unlikely(ret)) + goto end; + } + + /* Change or remove old backref. */ + key.objectid = new_addr; + key.type = BTRFS_REMAP_BACKREF_KEY; + key.offset = length; + + ret = btrfs_search_slot(trans, fs_info->remap_root, &key, path, -1, 1); + if (unlikely(ret)) { + if (ret == 1) { + btrfs_release_path(path); + ret = -ENOENT; + } + goto end; + } + + leaf = path->nodes[0]; + + if (dest_length == length) { + ret = btrfs_del_item(trans, fs_info->remap_root, path); + if (unlikely(ret)) { + btrfs_release_path(path); + goto end; + } + } else { + key.objectid += dest_length; + key.offset -= dest_length; + btrfs_set_item_key_safe(trans, path, &key); + btrfs_set_stack_remap_address(&remap, old_addr + dest_length); + + write_extent_buffer(leaf, &remap, + btrfs_item_ptr_offset(leaf, path->slots[0]), + sizeof(struct btrfs_remap_item)); + } + + btrfs_release_path(path); + + /* Add new backref. */ + ret = add_remap_backref_item(trans, path, dest_addr, dest_length, old_addr); + if (unlikely(ret)) + goto end; + + adjust_block_group_remap_bytes(trans, bg, -dest_length); + + ret = btrfs_add_to_free_space_tree(trans, new_addr, dest_length); + if (unlikely(ret)) + goto end; + + dest_bg = btrfs_lookup_block_group(fs_info, dest_addr); + + adjust_block_group_remap_bytes(trans, dest_bg, dest_length); + + mutex_lock(&dest_bg->free_space_lock); + bg_needs_free_space = test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, + &dest_bg->runtime_flags); + mutex_unlock(&dest_bg->free_space_lock); + btrfs_put_block_group(dest_bg); + + if (bg_needs_free_space) { + ret = btrfs_add_block_group_free_space(trans, dest_bg); + if (unlikely(ret)) + goto end; + } + + ret = btrfs_remove_from_free_space_tree(trans, dest_addr, dest_length); + if (unlikely(ret)) { + btrfs_remove_from_free_space_tree(trans, new_addr, dest_length); + goto end; + } + + ret = 0; + +end: + if (mutex_taken) + mutex_unlock(&fs_info->remap_mutex); + + btrfs_dec_block_group_reservations(fs_info, dest_addr); + + if (unlikely(ret)) { + btrfs_free_reserved_extent(fs_info, dest_addr, dest_length, 0); + + if (trans) { + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + } + } else { + dest_bg = btrfs_lookup_block_group(fs_info, dest_addr); + btrfs_free_reserved_bytes(dest_bg, dest_length, 0); + btrfs_put_block_group(dest_bg); + + ret = btrfs_commit_transaction(trans); + } + + return ret; +} + +static int move_existing_remaps(struct btrfs_fs_info *fs_info, + struct btrfs_block_group *bg, + struct btrfs_path *path) +{ + int ret; + struct btrfs_key key; + struct extent_buffer *leaf; + struct btrfs_remap_item *remap; + u64 old_addr; + + /* Look for backrefs in remap tree. */ + while (bg->remap_bytes > 0) { + key.objectid = bg->start; + key.type = BTRFS_REMAP_BACKREF_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, fs_info->remap_root, &key, path, 0, 0); + if (ret < 0) + return ret; + + leaf = path->nodes[0]; + + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(fs_info->remap_root, path); + if (ret < 0) { + btrfs_release_path(path); + return ret; + } + + if (ret) { + btrfs_release_path(path); + break; + } + + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (key.type != BTRFS_REMAP_BACKREF_KEY) { + path->slots[0]++; + + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(fs_info->remap_root, path); + if (ret < 0) { + btrfs_release_path(path); + return ret; + } + + if (ret) { + btrfs_release_path(path); + break; + } + + leaf = path->nodes[0]; + } + } + + remap = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_remap_item); + old_addr = btrfs_remap_address(leaf, remap); + + btrfs_release_path(path); + + ret = move_existing_remap(fs_info, path, bg, key.objectid, + key.offset, old_addr); + if (ret) + return ret; + } + + ASSERT(bg->remap_bytes == 0); + + return 0; +} + static int create_remap_tree_entries(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_block_group *bg) @@ -4615,10 +5066,16 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start, ret = btrfs_zone_finish(rc->block_group); WARN_ON(ret && ret != -EAGAIN); - if (should_relocate_using_remap_tree(bg)) + if (should_relocate_using_remap_tree(bg)) { + if (bg->remap_bytes != 0) { + ret = move_existing_remaps(fs_info, bg, path); + if (ret) + goto out; + } ret = start_block_group_remapping(fs_info, path, bg); - else + } else { ret = do_nonremap_reloc(fs_info, verbose, rc); + } out: if (ret && bg_is_ro) |
