From 9e622d6bea0202e9fe267955362c01918562c09b Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 26 Jan 2012 15:01:12 -0500 Subject: Btrfs: fix enospc error caused by wrong checks of the chunk When we did sysbench test for inline files, enospc error happened easily though there was lots of free disk space which could be allocated for new chunks. Reproduce steps: # mkfs.btrfs -b $((2 * 1024 * 1024 * 1024)) # mount /mnt # ulimit -n 102400 # cd /mnt # sysbench --num-threads=1 --test=fileio --file-num=81920 \ > --file-total-size=80M --file-block-size=1K --file-io-mode=sync \ > --file-test-mode=seqwr prepare # sysbench --num-threads=1 --test=fileio --file-num=81920 \ > --file-total-size=80M --file-block-size=1K --file-io-mode=sync \ > --file-test-mode=seqwr run The reason of this bug is: Now, we can reserve space which is larger than the free space in the chunks if we have enough free disk space which can be used for new chunks. By this way, the space allocator should allocate a new chunk by force if there is no free space in the free space cache. But there are two wrong checks which break this operation. One is if (ret == -ENOSPC && num_bytes > min_alloc_size) in btrfs_reserve_extent(), it is wrong, we should try to allocate a new chunk even we fail to allocate free space by minimum allocable size. The other is if (space_info->force_alloc) force = space_info->force_alloc; in do_chunk_alloc(). It makes the allocator ignore CHUNK_ALLOC_FORCE If someone sets ->force_alloc to CHUNK_ALLOC_LIMITED, and makes the enospc error happen. Fix these two wrong checks. Especially the second one, we fix it by changing the value of CHUNK_ALLOC_LIMITED and CHUNK_ALLOC_FORCE, and make CHUNK_ALLOC_FORCE greater than CHUNK_ALLOC_LIMITED since CHUNK_ALLOC_FORCE has higher priority. And if the value which is passed in by the caller is greater than ->force_alloc, use the passed value. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 700879ed64cf..283af7a676a3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -34,23 +34,24 @@ #include "locking.h" #include "free-space-cache.h" -/* control flags for do_chunk_alloc's force field +/* + * control flags for do_chunk_alloc's force field * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk * if we really need one. * - * CHUNK_ALLOC_FORCE means it must try to allocate one - * * CHUNK_ALLOC_LIMITED means to only try and allocate one * if we have very few chunks already allocated. This is * used as part of the clustering code to help make sure * we have a good pool of storage to cluster in, without * filling the FS with empty chunks * + * CHUNK_ALLOC_FORCE means it must try to allocate one + * */ enum { CHUNK_ALLOC_NO_FORCE = 0, - CHUNK_ALLOC_FORCE = 1, - CHUNK_ALLOC_LIMITED = 2, + CHUNK_ALLOC_LIMITED = 1, + CHUNK_ALLOC_FORCE = 2, }; /* @@ -3414,7 +3415,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, again: spin_lock(&space_info->lock); - if (space_info->force_alloc) + if (force < space_info->force_alloc) force = space_info->force_alloc; if (space_info->full) { spin_unlock(&space_info->lock); @@ -5794,6 +5795,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 search_end, struct btrfs_key *ins, u64 data) { + bool final_tried = false; int ret; u64 search_start = 0; @@ -5813,22 +5815,25 @@ again: search_start, search_end, hint_byte, ins, data); - if (ret == -ENOSPC && num_bytes > min_alloc_size) { - num_bytes = num_bytes >> 1; - num_bytes = num_bytes & ~(root->sectorsize - 1); - num_bytes = max(num_bytes, min_alloc_size); - do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes, data, CHUNK_ALLOC_FORCE); - goto again; - } - if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { - struct btrfs_space_info *sinfo; - - sinfo = __find_space_info(root->fs_info, data); - printk(KERN_ERR "btrfs allocation failed flags %llu, " - "wanted %llu\n", (unsigned long long)data, - (unsigned long long)num_bytes); - dump_space_info(sinfo, num_bytes, 1); + if (ret == -ENOSPC) { + if (!final_tried) { + num_bytes = num_bytes >> 1; + num_bytes = num_bytes & ~(root->sectorsize - 1); + num_bytes = max(num_bytes, min_alloc_size); + do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, data, CHUNK_ALLOC_FORCE); + if (num_bytes == min_alloc_size) + final_tried = true; + goto again; + } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) { + struct btrfs_space_info *sinfo; + + sinfo = __find_space_info(root->fs_info, data); + printk(KERN_ERR "btrfs allocation failed flags %llu, " + "wanted %llu\n", (unsigned long long)data, + (unsigned long long)num_bytes); + dump_space_info(sinfo, num_bytes, 1); + } } trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); -- cgit v1.2.3 From 2cac13e41bf5b99ffc426bd28dfd2248df1dfa67 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 9 Feb 2012 18:17:41 +0800 Subject: Btrfs: fix trim 0 bytes after a device delete A user reported a bug of btrfs's trim, that is we will trim 0 bytes after a device delete. The reproducer: $ mkfs.btrfs disk1 $ mkfs.btrfs disk2 $ mount disk1 /mnt $ fstrim -v /mnt $ btrfs device add disk2 /mnt $ btrfs device del disk1 /mnt $ fstrim -v /mnt This is because after we delete the device, the block group may start from a non-zero place, which will confuse trim to discard nothing. Reported-by: Lutz Euler Signed-off-by: Liu Bo --- fs/btrfs/extent-tree.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 283af7a676a3..60bfe2d68547 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7886,9 +7886,16 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) u64 start; u64 end; u64 trimmed = 0; + u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); int ret = 0; - cache = btrfs_lookup_block_group(fs_info, range->start); + /* + * try to trim all FS space, our block group may start from non-zero. + */ + if (range->len == total_bytes) + cache = btrfs_lookup_first_block_group(fs_info, range->start); + else + cache = btrfs_lookup_block_group(fs_info, range->start); while (cache) { if (cache->key.objectid >= (range->start + range->len)) { -- cgit v1.2.3 From d9b0218f6cb682aa6a4ada2bfc5a25fdf3018563 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 16 Feb 2012 18:34:39 +0800 Subject: Btrfs: fix a bug on overcommit stuff When overcommitting, we should check the sum of pinned space and bytes for delayed item. Signed-off-by: Liu Bo --- fs/btrfs/extent-tree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 60bfe2d68547..dc083f55bcfd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3611,12 +3611,15 @@ static int may_commit_transaction(struct btrfs_root *root, if (space_info != delayed_rsv->space_info) return -ENOSPC; + spin_lock(&space_info->lock); spin_lock(&delayed_rsv->lock); - if (delayed_rsv->size < bytes) { + if (space_info->bytes_pinned + delayed_rsv->size < bytes) { spin_unlock(&delayed_rsv->lock); + spin_unlock(&space_info->lock); return -ENOSPC; } spin_unlock(&delayed_rsv->lock); + spin_unlock(&space_info->lock); commit: trans = btrfs_join_transaction(root); -- cgit v1.2.3 From 5500cdbe14d7435e04f66ff3cfb8ecd8b8e44ebf Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 23 Feb 2012 10:49:04 -0500 Subject: Btrfs: increase the global block reserve estimates When doing IO with large amounts of data fragmentation, the global block reserve calulations are too low. This increases them to avoid ENOSPC crashes. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dc083f55bcfd..079e5a1c343c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4108,7 +4108,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) num_bytes += div64_u64(data_used + meta_used, 50); if (num_bytes * 3 > meta_used) - num_bytes = div64_u64(meta_used, 3); + num_bytes = div64_u64(meta_used, 3) * 2; return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); } -- cgit v1.2.3 From e77266e4c4be6f9dc91bf688bce015a8babd5fe0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 24 Feb 2012 10:39:05 -0500 Subject: Btrfs: fix compiler warnings on 32 bit systems The enospc tracing code added some interesting uses of u64 pointer casts. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 079e5a1c343c..37e0a800d34e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3312,7 +3312,8 @@ commit_trans: } data_sinfo->bytes_may_use += bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", - (u64)data_sinfo, bytes, 1); + (u64)(unsigned long)data_sinfo, + bytes, 1); spin_unlock(&data_sinfo->lock); return 0; @@ -3333,7 +3334,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) spin_lock(&data_sinfo->lock); data_sinfo->bytes_may_use -= bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", - (u64)data_sinfo, bytes, 0); + (u64)(unsigned long)data_sinfo, + bytes, 0); spin_unlock(&data_sinfo->lock); } @@ -3698,9 +3700,9 @@ again: if (used + orig_bytes <= space_info->total_bytes) { space_info->bytes_may_use += orig_bytes; trace_btrfs_space_reservation(root->fs_info, - "space_info", - (u64)space_info, - orig_bytes, 1); + "space_info", + (u64)(unsigned long)space_info, + orig_bytes, 1); ret = 0; } else { /* @@ -3769,9 +3771,9 @@ again: if (used + num_bytes < space_info->total_bytes + avail) { space_info->bytes_may_use += orig_bytes; trace_btrfs_space_reservation(root->fs_info, - "space_info", - (u64)space_info, - orig_bytes, 1); + "space_info", + (u64)(unsigned long)space_info, + orig_bytes, 1); ret = 0; } else { wait_ordered = true; @@ -3916,8 +3918,8 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, spin_lock(&space_info->lock); space_info->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)space_info, - num_bytes, 0); + (u64)(unsigned long)space_info, + num_bytes, 0); space_info->reservation_progress++; spin_unlock(&space_info->lock); } @@ -4135,14 +4137,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) block_rsv->reserved += num_bytes; sinfo->bytes_may_use += num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)sinfo, num_bytes, 1); + (u64)(unsigned long)sinfo, num_bytes, 1); } if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; sinfo->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)sinfo, num_bytes, 0); + (u64)(unsigned long)sinfo, num_bytes, 0); sinfo->reservation_progress++; block_rsv->reserved = block_rsv->size; block_rsv->full = 1; @@ -4195,7 +4197,8 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, if (!trans->bytes_reserved) return; - trace_btrfs_space_reservation(root->fs_info, "transaction", (u64)trans, + trace_btrfs_space_reservation(root->fs_info, "transaction", + (u64)(unsigned long)trans, trans->bytes_reserved, 0); btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); trans->bytes_reserved = 0; @@ -4713,9 +4716,9 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, space_info->bytes_reserved += num_bytes; if (reserve == RESERVE_ALLOC) { trace_btrfs_space_reservation(cache->fs_info, - "space_info", - (u64)space_info, - num_bytes, 0); + "space_info", + (u64)(unsigned long)space_info, + num_bytes, 0); space_info->bytes_may_use -= num_bytes; } } -- cgit v1.2.3