summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/bio.c2
-rw-r--r--fs/btrfs/bio.h3
-rw-r--r--fs/btrfs/extent-tree.c6
-rw-r--r--fs/btrfs/relocation.c461
4 files changed, 467 insertions, 5 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index d46f39996469..d3475d179362 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -821,7 +821,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
*/
if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
!test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
- !btrfs_is_data_reloc_root(inode->root)) {
+ !btrfs_is_data_reloc_root(inode->root) && !bbio->is_remap) {
if (should_async_write(bbio) &&
btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
goto done;
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index 157cdfa2f78a..303ed6c7103d 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -90,6 +90,9 @@ struct btrfs_bio {
*/
bool is_scrub:1;
+ /* Whether the bio is coming from copy_remapped_data_io(). */
+ bool is_remap:1;
+
/* Whether the csum generation for data write is async. */
bool async_csum:1;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5e3e9f18b263..ebff087b4e89 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4557,7 +4557,8 @@ static noinline int find_free_extent(struct btrfs_root *root,
block_group->cached != BTRFS_CACHE_NO) {
down_read(&space_info->groups_sem);
if (list_empty(&block_group->list) ||
- block_group->ro) {
+ block_group->ro ||
+ (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED)) {
/*
* someone is removing this block group,
* we can't jump into the have_block_group
@@ -4591,7 +4592,8 @@ search:
ffe_ctl->hinted = false;
/* If the block group is read-only, we can skip it entirely. */
- if (unlikely(block_group->ro)) {
+ if (unlikely(block_group->ro ||
+ (block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED))) {
if (ffe_ctl->for_treelog)
btrfs_clear_treelog_bg(block_group);
if (ffe_ctl->for_data_reloc)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 4d3b3854ff7f..a1558ee92d29 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3966,6 +3966,457 @@ static void adjust_block_group_remap_bytes(struct btrfs_trans_handle *trans,
btrfs_inc_delayed_refs_rsv_bg_updates(fs_info);
}
+/* Private structure for I/O from copy_remapped_data(). */
+struct reloc_io_private {
+ struct completion done;
+ refcount_t pending_refs;
+ blk_status_t status;
+};
+
+static void reloc_endio(struct btrfs_bio *bbio)
+{
+ struct reloc_io_private *priv = bbio->private;
+
+ if (bbio->bio.bi_status)
+ WRITE_ONCE(priv->status, bbio->bio.bi_status);
+
+ if (refcount_dec_and_test(&priv->pending_refs))
+ complete(&priv->done);
+
+ bio_put(&bbio->bio);
+}
+
+static int copy_remapped_data_io(struct btrfs_fs_info *fs_info,
+ struct reloc_io_private *priv,
+ struct page **pages, u64 addr, u64 length,
+ blk_opf_t op)
+{
+ struct btrfs_bio *bbio;
+ int i;
+
+ init_completion(&priv->done);
+ refcount_set(&priv->pending_refs, 1);
+ priv->status = 0;
+
+ bbio = btrfs_bio_alloc(BIO_MAX_VECS, op, BTRFS_I(fs_info->btree_inode),
+ addr, reloc_endio, priv);
+ bbio->bio.bi_iter.bi_sector = (addr >> SECTOR_SHIFT);
+ bbio->is_remap = true;
+
+ i = 0;
+ do {
+ size_t bytes = min_t(u64, length, PAGE_SIZE);
+
+ if (bio_add_page(&bbio->bio, pages[i], bytes, 0) < bytes) {
+ refcount_inc(&priv->pending_refs);
+ btrfs_submit_bbio(bbio, 0);
+
+ bbio = btrfs_bio_alloc(BIO_MAX_VECS, op,
+ BTRFS_I(fs_info->btree_inode),
+ addr, reloc_endio, priv);
+ bbio->bio.bi_iter.bi_sector = (addr >> SECTOR_SHIFT);
+ bbio->is_remap = true;
+ continue;
+ }
+
+ i++;
+ addr += bytes;
+ length -= bytes;
+ } while (length);
+
+ refcount_inc(&priv->pending_refs);
+ btrfs_submit_bbio(bbio, 0);
+
+ if (!refcount_dec_and_test(&priv->pending_refs))
+ wait_for_completion_io(&priv->done);
+
+ return blk_status_to_errno(READ_ONCE(priv->status));
+}
+
+static int copy_remapped_data(struct btrfs_fs_info *fs_info, u64 old_addr,
+ u64 new_addr, u64 length)
+{
+ int ret;
+ u64 copy_len = min_t(u64, length, SZ_1M);
+ struct page **pages;
+ struct reloc_io_private priv;
+ unsigned int nr_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+
+ pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
+ if (!pages)
+ return -ENOMEM;
+
+ ret = btrfs_alloc_page_array(nr_pages, pages, 0);
+ if (ret) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ /* Copy 1MB at a time, to avoid using too much memory. */
+ do {
+ u64 to_copy = min_t(u64, length, copy_len);
+
+ /* Limit to one bio. */
+ to_copy = min_t(u64, to_copy, BIO_MAX_VECS << PAGE_SHIFT);
+
+ ret = copy_remapped_data_io(fs_info, &priv, pages, old_addr,
+ to_copy, REQ_OP_READ);
+ if (ret)
+ goto end;
+
+ ret = copy_remapped_data_io(fs_info, &priv, pages, new_addr,
+ to_copy, REQ_OP_WRITE);
+ if (ret)
+ goto end;
+
+ if (to_copy == length)
+ break;
+
+ old_addr += to_copy;
+ new_addr += to_copy;
+ length -= to_copy;
+ } while (true);
+
+ ret = 0;
+end:
+ for (int i = 0; i < nr_pages; i++) {
+ if (pages[i])
+ __free_page(pages[i]);
+ }
+ kfree(pages);
+
+ return ret;
+}
+
+static int add_remap_item(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path, u64 new_addr, u64 length,
+ u64 old_addr)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_remap_item remap = { 0 };
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ int ret;
+
+ key.objectid = old_addr;
+ key.type = BTRFS_REMAP_KEY;
+ key.offset = length;
+
+ ret = btrfs_insert_empty_item(trans, fs_info->remap_root, path,
+ &key, sizeof(struct btrfs_remap_item));
+ if (ret)
+ return ret;
+
+ leaf = path->nodes[0];
+ btrfs_set_stack_remap_address(&remap, new_addr);
+ write_extent_buffer(leaf, &remap, btrfs_item_ptr_offset(leaf, path->slots[0]),
+ sizeof(struct btrfs_remap_item));
+
+ btrfs_release_path(path);
+
+ return 0;
+}
+
+static int add_remap_backref_item(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path, u64 new_addr,
+ u64 length, u64 old_addr)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_remap_item remap = { 0 };
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ int ret;
+
+ key.objectid = new_addr;
+ key.type = BTRFS_REMAP_BACKREF_KEY;
+ key.offset = length;
+
+ ret = btrfs_insert_empty_item(trans, fs_info->remap_root, path, &key,
+ sizeof(struct btrfs_remap_item));
+ if (ret)
+ return ret;
+
+ leaf = path->nodes[0];
+ btrfs_set_stack_remap_address(&remap, old_addr);
+ write_extent_buffer(leaf, &remap, btrfs_item_ptr_offset(leaf, path->slots[0]),
+ sizeof(struct btrfs_remap_item));
+
+ btrfs_release_path(path);
+
+ return 0;
+}
+
+static int move_existing_remap(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+ struct btrfs_block_group *bg, u64 new_addr,
+ u64 length, u64 old_addr)
+{
+ struct btrfs_trans_handle *trans;
+ struct extent_buffer *leaf;
+ struct btrfs_remap_item *remap_ptr;
+ struct btrfs_remap_item remap = { 0 };
+ struct btrfs_key key, ins;
+ u64 dest_addr, dest_length, min_size;
+ struct btrfs_block_group *dest_bg;
+ int ret;
+ const bool is_data = (bg->flags & BTRFS_BLOCK_GROUP_DATA);
+ struct btrfs_space_info *sinfo = bg->space_info;
+ bool mutex_taken = false;
+ bool bg_needs_free_space;
+
+ spin_lock(&sinfo->lock);
+ btrfs_space_info_update_bytes_may_use(sinfo, length);
+ spin_unlock(&sinfo->lock);
+
+ if (is_data)
+ min_size = fs_info->sectorsize;
+ else
+ min_size = fs_info->nodesize;
+
+ ret = btrfs_reserve_extent(fs_info->fs_root, length, length, min_size,
+ 0, 0, &ins, is_data, false);
+ if (unlikely(ret)) {
+ spin_lock(&sinfo->lock);
+ btrfs_space_info_update_bytes_may_use(sinfo, -length);
+ spin_unlock(&sinfo->lock);
+ return ret;
+ }
+
+ dest_addr = ins.objectid;
+ dest_length = ins.offset;
+
+ if (!is_data && !IS_ALIGNED(dest_length, fs_info->nodesize)) {
+ u64 new_length = ALIGN_DOWN(dest_length, fs_info->nodesize);
+
+ btrfs_free_reserved_extent(fs_info, dest_addr + new_length,
+ dest_length - new_length, 0);
+
+ dest_length = new_length;
+ }
+
+ trans = btrfs_join_transaction(fs_info->remap_root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ goto end;
+ }
+
+ mutex_lock(&fs_info->remap_mutex);
+ mutex_taken = true;
+
+ /* Find old remap entry. */
+ key.objectid = old_addr;
+ key.type = BTRFS_REMAP_KEY;
+ key.offset = length;
+
+ ret = btrfs_search_slot(trans, fs_info->remap_root, &key, path, 0, 1);
+ if (ret == 1) {
+ /*
+ * Not a problem if the remap entry wasn't found: that means
+ * that another transaction has deallocated the data.
+ * move_existing_remaps() loops until the BG contains no
+ * remaps, so we can just return 0 in this case.
+ */
+ btrfs_release_path(path);
+ ret = 0;
+ goto end;
+ } else if (unlikely(ret)) {
+ goto end;
+ }
+
+ ret = copy_remapped_data(fs_info, new_addr, dest_addr, dest_length);
+ if (unlikely(ret))
+ goto end;
+
+ /* Change data of old remap entry. */
+ leaf = path->nodes[0];
+ remap_ptr = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_remap_item);
+ btrfs_set_remap_address(leaf, remap_ptr, dest_addr);
+ btrfs_mark_buffer_dirty(trans, leaf);
+
+ if (dest_length != length) {
+ key.offset = dest_length;
+ btrfs_set_item_key_safe(trans, path, &key);
+ }
+
+ btrfs_release_path(path);
+
+ if (dest_length != length) {
+ /* Add remap item for remainder. */
+ ret = add_remap_item(trans, path, new_addr + dest_length,
+ length - dest_length, old_addr + dest_length);
+ if (unlikely(ret))
+ goto end;
+ }
+
+ /* Change or remove old backref. */
+ key.objectid = new_addr;
+ key.type = BTRFS_REMAP_BACKREF_KEY;
+ key.offset = length;
+
+ ret = btrfs_search_slot(trans, fs_info->remap_root, &key, path, -1, 1);
+ if (unlikely(ret)) {
+ if (ret == 1) {
+ btrfs_release_path(path);
+ ret = -ENOENT;
+ }
+ goto end;
+ }
+
+ leaf = path->nodes[0];
+
+ if (dest_length == length) {
+ ret = btrfs_del_item(trans, fs_info->remap_root, path);
+ if (unlikely(ret)) {
+ btrfs_release_path(path);
+ goto end;
+ }
+ } else {
+ key.objectid += dest_length;
+ key.offset -= dest_length;
+ btrfs_set_item_key_safe(trans, path, &key);
+ btrfs_set_stack_remap_address(&remap, old_addr + dest_length);
+
+ write_extent_buffer(leaf, &remap,
+ btrfs_item_ptr_offset(leaf, path->slots[0]),
+ sizeof(struct btrfs_remap_item));
+ }
+
+ btrfs_release_path(path);
+
+ /* Add new backref. */
+ ret = add_remap_backref_item(trans, path, dest_addr, dest_length, old_addr);
+ if (unlikely(ret))
+ goto end;
+
+ adjust_block_group_remap_bytes(trans, bg, -dest_length);
+
+ ret = btrfs_add_to_free_space_tree(trans, new_addr, dest_length);
+ if (unlikely(ret))
+ goto end;
+
+ dest_bg = btrfs_lookup_block_group(fs_info, dest_addr);
+
+ adjust_block_group_remap_bytes(trans, dest_bg, dest_length);
+
+ mutex_lock(&dest_bg->free_space_lock);
+ bg_needs_free_space = test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
+ &dest_bg->runtime_flags);
+ mutex_unlock(&dest_bg->free_space_lock);
+ btrfs_put_block_group(dest_bg);
+
+ if (bg_needs_free_space) {
+ ret = btrfs_add_block_group_free_space(trans, dest_bg);
+ if (unlikely(ret))
+ goto end;
+ }
+
+ ret = btrfs_remove_from_free_space_tree(trans, dest_addr, dest_length);
+ if (unlikely(ret)) {
+ btrfs_remove_from_free_space_tree(trans, new_addr, dest_length);
+ goto end;
+ }
+
+ ret = 0;
+
+end:
+ if (mutex_taken)
+ mutex_unlock(&fs_info->remap_mutex);
+
+ btrfs_dec_block_group_reservations(fs_info, dest_addr);
+
+ if (unlikely(ret)) {
+ btrfs_free_reserved_extent(fs_info, dest_addr, dest_length, 0);
+
+ if (trans) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ }
+ } else {
+ dest_bg = btrfs_lookup_block_group(fs_info, dest_addr);
+ btrfs_free_reserved_bytes(dest_bg, dest_length, 0);
+ btrfs_put_block_group(dest_bg);
+
+ ret = btrfs_commit_transaction(trans);
+ }
+
+ return ret;
+}
+
+static int move_existing_remaps(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group *bg,
+ struct btrfs_path *path)
+{
+ int ret;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_remap_item *remap;
+ u64 old_addr;
+
+ /* Look for backrefs in remap tree. */
+ while (bg->remap_bytes > 0) {
+ key.objectid = bg->start;
+ key.type = BTRFS_REMAP_BACKREF_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, fs_info->remap_root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ leaf = path->nodes[0];
+
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(fs_info->remap_root, path);
+ if (ret < 0) {
+ btrfs_release_path(path);
+ return ret;
+ }
+
+ if (ret) {
+ btrfs_release_path(path);
+ break;
+ }
+
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ if (key.type != BTRFS_REMAP_BACKREF_KEY) {
+ path->slots[0]++;
+
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(fs_info->remap_root, path);
+ if (ret < 0) {
+ btrfs_release_path(path);
+ return ret;
+ }
+
+ if (ret) {
+ btrfs_release_path(path);
+ break;
+ }
+
+ leaf = path->nodes[0];
+ }
+ }
+
+ remap = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_remap_item);
+ old_addr = btrfs_remap_address(leaf, remap);
+
+ btrfs_release_path(path);
+
+ ret = move_existing_remap(fs_info, path, bg, key.objectid,
+ key.offset, old_addr);
+ if (ret)
+ return ret;
+ }
+
+ ASSERT(bg->remap_bytes == 0);
+
+ return 0;
+}
+
static int create_remap_tree_entries(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_block_group *bg)
@@ -4615,10 +5066,16 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start,
ret = btrfs_zone_finish(rc->block_group);
WARN_ON(ret && ret != -EAGAIN);
- if (should_relocate_using_remap_tree(bg))
+ if (should_relocate_using_remap_tree(bg)) {
+ if (bg->remap_bytes != 0) {
+ ret = move_existing_remaps(fs_info, bg, path);
+ if (ret)
+ goto out;
+ }
ret = start_block_group_remapping(fs_info, path, bg);
- else
+ } else {
ret = do_nonremap_reloc(fs_info, verbose, rc);
+ }
out:
if (ret && bg_is_ro)