summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent-tree.c60
-rw-r--r--fs/btrfs/free-space-cache.c38
-rw-r--r--fs/btrfs/volumes.c26
-rw-r--r--fs/btrfs/volumes.h12
6 files changed, 140 insertions, 21 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 302f37c56546..d71915e04e92 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1279,6 +1279,7 @@ struct btrfs_block_group_cache {
unsigned int dirty:1;
unsigned int iref:1;
unsigned int has_caching_ctl:1;
+ unsigned int removed:1;
int disk_cache_state;
@@ -1311,6 +1312,8 @@ struct btrfs_block_group_cache {
/* For read-only block groups */
struct list_head ro_list;
+
+ atomic_t trimming;
};
/* delayed seq elem */
@@ -1740,6 +1743,12 @@ struct btrfs_fs_info {
/* For btrfs to record security options */
struct security_mnt_opts security_opts;
+
+ /*
+ * Chunks that can't be freed yet (under a trim/discard operation)
+ * and will be latter freed. Protected by fs_info->chunk_mutex.
+ */
+ struct list_head pinned_chunks;
};
struct btrfs_subvolume_writers {
@@ -3405,7 +3414,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
u64 type, u64 chunk_objectid, u64 chunk_offset,
u64 size);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 group_start);
+ struct btrfs_root *root, u64 group_start,
+ struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1e3e414c8501..30965120772b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2384,6 +2384,8 @@ int open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
+ INIT_LIST_HEAD(&fs_info->pinned_chunks);
+
ret = btrfs_alloc_stripe_hash_table(fs_info);
if (ret) {
err = ret;
@@ -3715,6 +3717,17 @@ void close_ctree(struct btrfs_root *root)
btrfs_free_block_rsv(root, root->orphan_block_rsv);
root->orphan_block_rsv = NULL;
+
+ lock_chunks(root);
+ while (!list_empty(&fs_info->pinned_chunks)) {
+ struct extent_map *em;
+
+ em = list_first_entry(&fs_info->pinned_chunks,
+ struct extent_map, list);
+ list_del_init(&em->list);
+ free_extent_map(em);
+ }
+ unlock_chunks(root);
}
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a5e64dda2db9..dbc115a25798 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9005,6 +9005,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
INIT_LIST_HEAD(&cache->bg_list);
INIT_LIST_HEAD(&cache->ro_list);
btrfs_init_free_space_ctl(cache);
+ atomic_set(&cache->trimming, 0);
return cache;
}
@@ -9306,7 +9307,8 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
}
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 group_start)
+ struct btrfs_root *root, u64 group_start,
+ struct extent_map *em)
{
struct btrfs_path *path;
struct btrfs_block_group_cache *block_group;
@@ -9319,6 +9321,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
int index;
int factor;
struct btrfs_caching_control *caching_ctl = NULL;
+ bool remove_em;
root = root->fs_info->extent_root;
@@ -9464,6 +9467,61 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
memcpy(&key, &block_group->key, sizeof(key));
+ lock_chunks(root);
+ spin_lock(&block_group->lock);
+ block_group->removed = 1;
+ /*
+ * At this point trimming can't start on this block group, because we
+ * removed the block group from the tree fs_info->block_group_cache_tree
+ * so no one can't find it anymore and even if someone already got this
+ * block group before we removed it from the rbtree, they have already
+ * incremented block_group->trimming - if they didn't, they won't find
+ * any free space entries because we already removed them all when we
+ * called btrfs_remove_free_space_cache().
+ *
+ * And we must not remove the extent map from the fs_info->mapping_tree
+ * to prevent the same logical address range and physical device space
+ * ranges from being reused for a new block group. This is because our
+ * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
+ * completely transactionless, so while it is trimming a range the
+ * currently running transaction might finish and a new one start,
+ * allowing for new block groups to be created that can reuse the same
+ * physical device locations unless we take this special care.
+ */
+ remove_em = (atomic_read(&block_group->trimming) == 0);
+ /*
+ * Make sure a trimmer task always sees the em in the pinned_chunks list
+ * if it sees block_group->removed == 1 (needs to lock block_group->lock
+ * before checking block_group->removed).
+ */
+ if (!remove_em) {
+ /*
+ * Our em might be in trans->transaction->pending_chunks which
+ * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks),
+ * and so is the fs_info->pinned_chunks list.
+ *
+ * So at this point we must be holding the chunk_mutex to avoid
+ * any races with chunk allocation (more specifically at
+ * volumes.c:contains_pending_extent()), to ensure it always
+ * sees the em, either in the pending_chunks list or in the
+ * pinned_chunks list.
+ */
+ list_move_tail(&em->list, &root->fs_info->pinned_chunks);
+ }
+ spin_unlock(&block_group->lock);
+ unlock_chunks(root);
+
+ if (remove_em) {
+ struct extent_map_tree *em_tree;
+
+ em_tree = &root->fs_info->mapping_tree.map_tree;
+ write_lock(&em_tree->lock);
+ remove_extent_mapping(em_tree, em);
+ write_unlock(&em_tree->lock);
+ /* once for the tree */
+ free_extent_map(em);
+ }
+
btrfs_put_block_group(block_group);
btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 33848196550e..0ddc114e2aed 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -27,6 +27,7 @@
#include "disk-io.h"
#include "extent_io.h"
#include "inode-map.h"
+#include "volumes.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
@@ -3101,11 +3102,46 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
*trimmed = 0;
+ spin_lock(&block_group->lock);
+ if (block_group->removed) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ atomic_inc(&block_group->trimming);
+ spin_unlock(&block_group->lock);
+
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
if (ret)
- return ret;
+ goto out;
ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
+out:
+ spin_lock(&block_group->lock);
+ if (atomic_dec_and_test(&block_group->trimming) &&
+ block_group->removed) {
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+
+ spin_unlock(&block_group->lock);
+
+ em_tree = &block_group->fs_info->mapping_tree.map_tree;
+ write_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, block_group->key.objectid,
+ 1);
+ BUG_ON(!em); /* logic error, can't happen */
+ remove_extent_mapping(em_tree, em);
+ write_unlock(&em_tree->lock);
+
+ lock_chunks(block_group->fs_info->chunk_root);
+ list_del_init(&em->list);
+ unlock_chunks(block_group->fs_info->chunk_root);
+
+ /* once for us and once for the tree */
+ free_extent_map(em);
+ free_extent_map(em);
+ } else {
+ spin_unlock(&block_group->lock);
+ }
return ret;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 01920515f90d..588f37e0a564 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -53,16 +53,6 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
-static void lock_chunks(struct btrfs_root *root)
-{
- mutex_lock(&root->fs_info->chunk_mutex);
-}
-
-static void unlock_chunks(struct btrfs_root *root)
-{
- mutex_unlock(&root->fs_info->chunk_mutex);
-}
-
static struct btrfs_fs_devices *__alloc_fs_devices(void)
{
struct btrfs_fs_devices *fs_devs;
@@ -1068,9 +1058,11 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
u64 *start, u64 len)
{
struct extent_map *em;
+ struct list_head *search_list = &trans->transaction->pending_chunks;
int ret = 0;
- list_for_each_entry(em, &trans->transaction->pending_chunks, list) {
+again:
+ list_for_each_entry(em, search_list, list) {
struct map_lookup *map;
int i;
@@ -1087,6 +1079,10 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
ret = 1;
}
}
+ if (search_list == &trans->transaction->pending_chunks) {
+ search_list = &trans->root->fs_info->pinned_chunks;
+ goto again;
+ }
return ret;
}
@@ -2653,18 +2649,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
}
}
- ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
+ ret = btrfs_remove_block_group(trans, extent_root, chunk_offset, em);
if (ret) {
btrfs_abort_transaction(trans, extent_root, ret);
goto out;
}
- write_lock(&em_tree->lock);
- remove_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
-
- /* once for the tree */
- free_extent_map(em);
out:
/* once for us */
free_extent_map(em);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 4cc00e64427e..637bcfadadb2 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -515,4 +515,16 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
struct btrfs_transaction *transaction);
+
+static inline void lock_chunks(struct btrfs_root *root)
+{
+ mutex_lock(&root->fs_info->chunk_mutex);
+}
+
+static inline void unlock_chunks(struct btrfs_root *root)
+{
+ mutex_unlock(&root->fs_info->chunk_mutex);
+}
+
+
#endif