diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/acl.c | 39 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 5 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 229 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 40 | ||||
-rw-r--r-- | fs/btrfs/dir-item.c | 19 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 40 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 112 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 3 | ||||
-rw-r--r-- | fs/btrfs/file.c | 729 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 624 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 43 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 117 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 5 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 43 | ||||
-rw-r--r-- | fs/btrfs/super.c | 20 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 44 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 6 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 86 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 19 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 80 | ||||
-rw-r--r-- | fs/btrfs/xattr.h | 9 |
21 files changed, 1320 insertions, 992 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 361604244271..12d7be8df561 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -94,7 +94,8 @@ static int btrfs_xattr_get_acl(struct inode *inode, int type, /* * Needs to be called with fs_mutex held */ -static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +static int btrfs_set_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct posix_acl *acl, int type) { int ret, size = 0; const char *name; @@ -111,12 +112,14 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) switch (type) { case ACL_TYPE_ACCESS: mode = inode->i_mode; - ret = posix_acl_equiv_mode(acl, &mode); - if (ret < 0) - return ret; - ret = 0; - inode->i_mode = mode; name = POSIX_ACL_XATTR_ACCESS; + if (acl) { + ret = posix_acl_equiv_mode(acl, &mode); + if (ret < 0) + return ret; + inode->i_mode = mode; + } + ret = 0; break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) @@ -140,8 +143,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) goto out; } - ret = __btrfs_setxattr(inode, name, value, size, 0); - + ret = __btrfs_setxattr(trans, inode, name, value, size, 0); out: kfree(value); @@ -154,9 +156,12 @@ out: static int btrfs_xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) { - int ret = 0; + int ret; struct posix_acl *acl = NULL; + if (!is_owner_or_cap(inode)) + return -EPERM; + if (value) { acl = posix_acl_from_xattr(value, size); if (acl == NULL) { @@ -167,7 +172,7 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type, } } - ret = btrfs_set_acl(inode, acl, type); + ret = btrfs_set_acl(NULL, inode, acl, type); posix_acl_release(acl); @@ -221,7 +226,8 @@ int btrfs_check_acl(struct inode *inode, int mask) * stuff has been fixed to work with that. If the locking stuff changes, we * need to re-evaluate the acl locking stuff. */ -int btrfs_init_acl(struct inode *inode, struct inode *dir) +int btrfs_init_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) { struct posix_acl *acl = NULL; int ret = 0; @@ -246,7 +252,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) mode_t mode; if (S_ISDIR(inode->i_mode)) { - ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); + ret = btrfs_set_acl(trans, inode, acl, + ACL_TYPE_DEFAULT); if (ret) goto failed; } @@ -261,10 +268,11 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) inode->i_mode = mode; if (ret > 0) { /* we need an acl */ - ret = btrfs_set_acl(inode, clone, + ret = btrfs_set_acl(trans, inode, clone, ACL_TYPE_ACCESS); } } + posix_acl_release(clone); } failed: posix_acl_release(acl); @@ -294,7 +302,7 @@ int btrfs_acl_chmod(struct inode *inode) ret = posix_acl_chmod_masq(clone, inode->i_mode); if (!ret) - ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); + ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS); posix_acl_release(clone); @@ -320,7 +328,8 @@ int btrfs_acl_chmod(struct inode *inode) return 0; } -int btrfs_init_acl(struct inode *inode, struct inode *dir) +int btrfs_init_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) { return 0; } diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index f6783a42f010..3f1f50d9d916 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,9 +44,6 @@ struct btrfs_inode { */ struct extent_io_tree io_failure_tree; - /* held while inesrting or deleting extents from files */ - struct mutex extent_mutex; - /* held while logging the inode in tree-log.c */ struct mutex log_mutex; @@ -166,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode) static inline void btrfs_i_size_write(struct inode *inode, u64 size) { - inode->i_size = size; + i_size_write(inode, size); BTRFS_I(inode)->disk_i_size = size; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ec96f3a6d536..c4bc570a396e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -37,6 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct extent_buffer *src_buf); static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); +static int setup_items_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, + u32 total_data, u32 total_size, int nr); + struct btrfs_path *btrfs_alloc_path(void) { @@ -451,9 +456,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, extent_buffer_get(cow); spin_unlock(&root->node_lock); - btrfs_free_extent(trans, root, buf->start, buf->len, - parent_start, root->root_key.objectid, - level, 0); + btrfs_free_tree_block(trans, root, buf->start, buf->len, + parent_start, root->root_key.objectid, level); free_extent_buffer(buf); add_root_to_dirty_list(root); } else { @@ -468,9 +472,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_set_node_ptr_generation(parent, parent_slot, trans->transid); btrfs_mark_buffer_dirty(parent); - btrfs_free_extent(trans, root, buf->start, buf->len, - parent_start, root->root_key.objectid, - level, 0); + btrfs_free_tree_block(trans, root, buf->start, buf->len, + parent_start, root->root_key.objectid, level); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -1030,8 +1033,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_unlock(mid); /* once for the path */ free_extent_buffer(mid); - ret = btrfs_free_extent(trans, root, mid->start, mid->len, - 0, root->root_key.objectid, level, 1); + ret = btrfs_free_tree_block(trans, root, mid->start, mid->len, + 0, root->root_key.objectid, level); /* once for the root ptr */ free_extent_buffer(mid); return ret; @@ -1095,10 +1098,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, 1); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, bytenr, - blocksize, 0, - root->root_key.objectid, - level, 0); + wret = btrfs_free_tree_block(trans, root, + bytenr, blocksize, 0, + root->root_key.objectid, + level); if (wret) ret = wret; } else { @@ -1143,9 +1146,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, bytenr, blocksize, - 0, root->root_key.objectid, - level, 0); + wret = btrfs_free_tree_block(trans, root, bytenr, blocksize, + 0, root->root_key.objectid, level); if (wret) ret = wret; } else { @@ -2997,75 +2999,85 @@ again: return ret; } -/* - * This function splits a single item into two items, - * giving 'new_key' to the new item and splitting the - * old one at split_offset (from the start of the item). - * - * The path may be released by this operation. After - * the split, the path is pointing to the old item. The - * new item is going to be in the same node as the old one. - * - * Note, the item being split must be smaller enough to live alone on - * a tree block with room for one extra struct btrfs_item - * - * This allows us to split the item in place, keeping a lock on the - * leaf the entire time. - */ -int btrfs_split_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *new_key, - unsigned long split_offset) +static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int ins_len) { - u32 item_size; + struct btrfs_key key; struct extent_buffer *leaf; - struct btrfs_key orig_key; - struct btrfs_item *item; - struct btrfs_item *new_item; - int ret = 0; - int slot; - u32 nritems; - u32 orig_offset; - struct btrfs_disk_key disk_key; - char *buf; + struct btrfs_file_extent_item *fi; + u64 extent_len = 0; + u32 item_size; + int ret; leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); - if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) - goto split; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY && + key.type != BTRFS_EXTENT_CSUM_KEY); + + if (btrfs_leaf_free_space(root, leaf) >= ins_len) + return 0; item_size = btrfs_item_size_nr(leaf, path->slots[0]); + if (key.type == BTRFS_EXTENT_DATA_KEY) { + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + extent_len = btrfs_file_extent_num_bytes(leaf, fi); + } btrfs_release_path(root, path); - path->search_for_split = 1; path->keep_locks = 1; - - ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); + path->search_for_split = 1; + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); path->search_for_split = 0; + if (ret < 0) + goto err; + ret = -EAGAIN; + leaf = path->nodes[0]; /* if our item isn't there or got smaller, return now */ - if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], - path->slots[0])) { - path->keep_locks = 0; - return -EAGAIN; + if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) + goto err; + + if (key.type == BTRFS_EXTENT_DATA_KEY) { + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + if (extent_len != btrfs_file_extent_num_bytes(leaf, fi)) + goto err; } btrfs_set_path_blocking(path); - ret = split_leaf(trans, root, &orig_key, path, - sizeof(struct btrfs_item), 1); - path->keep_locks = 0; + ret = split_leaf(trans, root, &key, path, ins_len, 1); BUG_ON(ret); + path->keep_locks = 0; btrfs_unlock_up_safe(path, 1); + return 0; +err: + path->keep_locks = 0; + return ret; +} + +static noinline int split_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key, + unsigned long split_offset) +{ + struct extent_buffer *leaf; + struct btrfs_item *item; + struct btrfs_item *new_item; + int slot; + char *buf; + u32 nritems; + u32 item_size; + u32 orig_offset; + struct btrfs_disk_key disk_key; + leaf = path->nodes[0]; BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); -split: - /* - * make sure any changes to the path from split_leaf leave it - * in a blocking state - */ btrfs_set_path_blocking(path); item = btrfs_item_nr(leaf, path->slots[0]); @@ -3073,19 +3085,19 @@ split: item_size = btrfs_item_size(leaf, item); buf = kmalloc(item_size, GFP_NOFS); + if (!buf) + return -ENOMEM; + read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, path->slots[0]), item_size); - slot = path->slots[0] + 1; - leaf = path->nodes[0]; + slot = path->slots[0] + 1; nritems = btrfs_header_nritems(leaf); - if (slot != nritems) { /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), - btrfs_item_nr_offset(slot), - (nritems - slot) * sizeof(struct btrfs_item)); - + btrfs_item_nr_offset(slot), + (nritems - slot) * sizeof(struct btrfs_item)); } btrfs_cpu_key_to_disk(&disk_key, new_key); @@ -3113,16 +3125,81 @@ split: item_size - split_offset); btrfs_mark_buffer_dirty(leaf); - ret = 0; - if (btrfs_leaf_free_space(root, leaf) < 0) { - btrfs_print_leaf(root, leaf); - BUG(); - } + BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); kfree(buf); + return 0; +} + +/* + * This function splits a single item into two items, + * giving 'new_key' to the new item and splitting the + * old one at split_offset (from the start of the item). + * + * The path may be released by this operation. After + * the split, the path is pointing to the old item. The + * new item is going to be in the same node as the old one. + * + * Note, the item being split must be smaller enough to live alone on + * a tree block with room for one extra struct btrfs_item + * + * This allows us to split the item in place, keeping a lock on the + * leaf the entire time. + */ +int btrfs_split_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key, + unsigned long split_offset) +{ + int ret; + ret = setup_leaf_for_split(trans, root, path, + sizeof(struct btrfs_item)); + if (ret) + return ret; + + ret = split_item(trans, root, path, new_key, split_offset); return ret; } /* + * This function duplicate a item, giving 'new_key' to the new item. + * It guarantees both items live in the same tree leaf and the new item + * is contiguous with the original item. + * + * This allows us to split file extent in place, keeping a lock on the + * leaf the entire time. + */ +int btrfs_duplicate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key) +{ + struct extent_buffer *leaf; + int ret; + u32 item_size; + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + ret = setup_leaf_for_split(trans, root, path, + item_size + sizeof(struct btrfs_item)); + if (ret) + return ret; + + path->slots[0]++; + ret = setup_items_for_insert(trans, root, path, new_key, &item_size, + item_size, item_size + + sizeof(struct btrfs_item), 1); + BUG_ON(ret); + + leaf = path->nodes[0]; + memcpy_extent_buffer(leaf, + btrfs_item_ptr_offset(leaf, path->slots[0]), + btrfs_item_ptr_offset(leaf, path->slots[0] - 1), + item_size); + return 0; +} + +/* * make the item pointed to by the path smaller. new_size indicates * how small to make it, and from_end tells us if we just chop bytes * off the end of the item or if we shift the item to chop bytes off @@ -3714,8 +3791,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, */ btrfs_unlock_up_safe(path, 0); - ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, - 0, root->root_key.objectid, 0, 0); + ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len, + 0, root->root_key.objectid, 0); return ret; } /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 444b3e9b92a4..9f806dd04c27 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -310,6 +310,9 @@ struct btrfs_header { #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) +#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ + sizeof(struct btrfs_item) -\ + sizeof(struct btrfs_dir_item)) /* @@ -859,8 +862,9 @@ struct btrfs_fs_info { struct mutex ordered_operations_mutex; struct rw_semaphore extent_commit_sem; - struct rw_semaphore subvol_sem; + struct rw_semaphore cleanup_work_sem; + struct rw_semaphore subvol_sem; struct srcu_struct subvol_srcu; struct list_head trans_list; @@ -868,6 +872,9 @@ struct btrfs_fs_info { struct list_head dead_roots; struct list_head caching_block_groups; + spinlock_t delayed_iput_lock; + struct list_head delayed_iputs; + atomic_t nr_async_submits; atomic_t async_submit_draining; atomic_t nr_async_bios; @@ -1034,12 +1041,12 @@ struct btrfs_root { int ref_cows; int track_dirty; int in_radix; + int clean_orphans; u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; int defrag_running; - int defrag_level; char *name; int in_sysfs; @@ -1975,6 +1982,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, u64 parent, u64 root_objectid, struct btrfs_disk_key *key, int level, u64 hint, u64 empty_size); +int btrfs_free_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u32 blocksize, + u64 parent, u64 root_objectid, int level); struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u32 blocksize, @@ -2089,6 +2100,10 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_key *new_key, unsigned long split_offset); +int btrfs_duplicate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -2196,9 +2211,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_dir_item *di); int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, const char *name, - u16 name_len, const void *data, u16 data_len, - u64 dir); + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid, + const char *name, u16 name_len, + const void *data, u16 data_len); struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, @@ -2292,7 +2308,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct inode *inode, u64 new_size, u32 min_type); -int btrfs_start_delalloc_inodes(struct btrfs_root *root); +int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); @@ -2332,6 +2348,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); void btrfs_orphan_cleanup(struct btrfs_root *root); int btrfs_cont_expand(struct inode *inode, loff_t size); int btrfs_invalidate_inodes(struct btrfs_root *root); +void btrfs_add_delayed_iput(struct inode *inode); +void btrfs_run_delayed_iputs(struct btrfs_root *root); extern const struct dentry_operations btrfs_dentry_operations; /* ioctl.c */ @@ -2345,12 +2363,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); int btrfs_check_file(struct btrfs_root *root, struct inode *inode); extern const struct file_operations btrfs_file_operations; -int btrfs_drop_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 locked_end, - u64 inline_limit, u64 *hint_block, int drop_cache); +int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, + u64 start, u64 end, u64 *hint_byte, int drop_cache); int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, u64 start, u64 end); int btrfs_release_file(struct inode *inode, struct file *file); @@ -2380,7 +2395,8 @@ int btrfs_check_acl(struct inode *inode, int mask); #else #define btrfs_check_acl NULL #endif -int btrfs_init_acl(struct inode *inode, struct inode *dir); +int btrfs_init_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir); int btrfs_acl_chmod(struct inode *inode); /* relocation.c */ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f3a6075519cc..e9103b3baa49 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle * into the tree */ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, const char *name, - u16 name_len, const void *data, u16 data_len, - u64 dir) + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid, + const char *name, u16 name_len, + const void *data, u16 data_len) { int ret = 0; - struct btrfs_path *path; struct btrfs_dir_item *dir_item; unsigned long name_ptr, data_ptr; struct btrfs_key key, location; @@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; u32 data_size; - key.objectid = dir; + BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)); + + key.objectid = objectid; btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); key.offset = btrfs_name_hash(name, name_len); - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - if (name_len + data_len + sizeof(struct btrfs_dir_item) > - BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item)) - return -ENOSPC; data_size = sizeof(*dir_item) + name_len + data_len; dir_item = insert_with_overflow(trans, root, path, &key, data_size, @@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, write_extent_buffer(leaf, data, data_ptr, data_len); btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 02b6afbd7450..2b59201b955c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->stripesize = stripesize; root->ref_cows = 0; root->track_dirty = 0; + root->in_radix = 0; + root->clean_orphans = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->defrag_trans_start = fs_info->generation; init_completion(&root->kobj_unregister); root->defrag_running = 0; - root->defrag_level = 0; root->root_key.objectid = objectid; root->anon_super.s_root = NULL; root->anon_super.s_dev = 0; @@ -980,12 +981,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, while (1) { ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, - 0, &start, &end, EXTENT_DIRTY); + 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); if (ret) break; - clear_extent_dirty(&log_root_tree->dirty_log_pages, - start, end, GFP_NOFS); + clear_extent_bits(&log_root_tree->dirty_log_pages, start, end, + EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); } eb = fs_info->log_root_tree->node; @@ -1210,8 +1211,10 @@ again: ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); - if (ret == 0) + if (ret == 0) { root->in_radix = 1; + root->clean_orphans = 1; + } spin_unlock(&fs_info->fs_roots_radix_lock); radix_tree_preload_end(); if (ret) { @@ -1225,10 +1228,6 @@ again: ret = btrfs_find_dead_roots(fs_info->tree_root, root->root_key.objectid); WARN_ON(ret); - - if (!(fs_info->sb->s_flags & MS_RDONLY)) - btrfs_orphan_cleanup(root); - return root; fail: free_fs_root(root); @@ -1477,6 +1476,7 @@ static int cleaner_kthread(void *arg) if (!(root->fs_info->sb->s_flags & MS_RDONLY) && mutex_trylock(&root->fs_info->cleaner_mutex)) { + btrfs_run_delayed_iputs(root); btrfs_clean_old_snapshots(root); mutex_unlock(&root->fs_info->cleaner_mutex); } @@ -1606,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); + INIT_LIST_HEAD(&fs_info->delayed_iputs); INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->delalloc_inodes); INIT_LIST_HEAD(&fs_info->ordered_operations); @@ -1614,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->new_trans_lock); spin_lock_init(&fs_info->ref_cache_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); + spin_lock_init(&fs_info->delayed_iput_lock); init_completion(&fs_info->kobj_unregister); fs_info->tree_root = tree_root; @@ -1689,6 +1691,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); init_rwsem(&fs_info->extent_commit_sem); + init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); @@ -1979,7 +1982,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_recover_relocation(tree_root); - BUG_ON(ret); + if (ret < 0) { + printk(KERN_WARNING + "btrfs: failed to recover relocation\n"); + err = -EINVAL; + goto fail_trans_kthread; + } } location.objectid = BTRFS_FS_TREE_OBJECTID; @@ -1990,6 +1998,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!fs_info->fs_root) goto fail_trans_kthread; + if (!(sb->s_flags & MS_RDONLY)) { + down_read(&fs_info->cleanup_work_sem); + btrfs_orphan_cleanup(fs_info->fs_root); + up_read(&fs_info->cleanup_work_sem); + } + return tree_root; fail_trans_kthread: @@ -2386,8 +2400,14 @@ int btrfs_commit_super(struct btrfs_root *root) int ret; mutex_lock(&root->fs_info->cleaner_mutex); + btrfs_run_delayed_iputs(root); btrfs_clean_old_snapshots(root); mutex_unlock(&root->fs_info->cleaner_mutex); + + /* wait until ongoing cleanup work done */ + down_write(&root->fs_info->cleanup_work_sem); + up_write(&root->fs_info->cleanup_work_sem); + trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 94627c4cc193..559f72489b3b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -83,6 +83,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) return (cache->flags & bits) == bits; } +void btrfs_get_block_group(struct btrfs_block_group_cache *cache) +{ + atomic_inc(&cache->count); +} + +void btrfs_put_block_group(struct btrfs_block_group_cache *cache) +{ + if (atomic_dec_and_test(&cache->count)) + kfree(cache); +} + /* * this adds the block group to the fs_info rb tree for the block group * cache @@ -156,7 +167,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, } } if (ret) - atomic_inc(&ret->count); + btrfs_get_block_group(ret); spin_unlock(&info->block_group_cache_lock); return ret; @@ -195,6 +206,14 @@ static int exclude_super_stripes(struct btrfs_root *root, int stripe_len; int i, nr, ret; + if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { + stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; + cache->bytes_super += stripe_len; + ret = add_excluded_extent(root, cache->key.objectid, + stripe_len); + BUG_ON(ret); + } + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); ret = btrfs_rmap_block(&root->fs_info->mapping_tree, @@ -255,7 +274,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, if (ret) break; - if (extent_start == start) { + if (extent_start <= start) { start = extent_end + 1; } else if (extent_start > start && extent_start < end) { size = extent_start - start; @@ -399,6 +418,8 @@ err: put_caching_control(caching_ctl); atomic_dec(&block_group->space_info->caching_threads); + btrfs_put_block_group(block_group); + return 0; } @@ -439,6 +460,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache) up_write(&fs_info->extent_commit_sem); atomic_inc(&cache->space_info->caching_threads); + btrfs_get_block_group(cache); tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", cache->key.objectid); @@ -478,12 +500,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( return cache; } -void btrfs_put_block_group(struct btrfs_block_group_cache *cache) -{ - if (atomic_dec_and_test(&cache->count)) - kfree(cache); -} - static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { @@ -2574,7 +2590,7 @@ next_block_group(struct btrfs_root *root, if (node) { cache = rb_entry(node, struct btrfs_block_group_cache, cache_node); - atomic_inc(&cache->count); + btrfs_get_block_group(cache); } else cache = NULL; spin_unlock(&root->fs_info->block_group_cache_lock); @@ -2880,9 +2896,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work) root = async->root; info = async->info; - btrfs_start_delalloc_inodes(root); + btrfs_start_delalloc_inodes(root, 0); wake_up(&info->flush_wait); - btrfs_wait_ordered_extents(root, 0); + btrfs_wait_ordered_extents(root, 0, 0); spin_lock(&info->lock); info->flushing = 0; @@ -2956,8 +2972,8 @@ static void flush_delalloc(struct btrfs_root *root, return; flush: - btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(root, 0); + btrfs_start_delalloc_inodes(root, 0); + btrfs_wait_ordered_extents(root, 0, 0); spin_lock(&info->lock); info->flushing = 0; @@ -3454,14 +3470,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, else old_val -= num_bytes; btrfs_set_super_bytes_used(&info->super_copy, old_val); - - /* block accounting for root item */ - old_val = btrfs_root_used(&root->root_item); - if (alloc) - old_val += num_bytes; - else - old_val -= num_bytes; - btrfs_set_root_used(&root->root_item, old_val); spin_unlock(&info->delalloc_lock); while (total) { @@ -4049,6 +4057,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, return ret; } +int btrfs_free_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u32 blocksize, + u64 parent, u64 root_objectid, int level) +{ + u64 used; + spin_lock(&root->node_lock); + used = btrfs_root_used(&root->root_item) - blocksize; + btrfs_set_root_used(&root->root_item, used); + spin_unlock(&root->node_lock); + + return btrfs_free_extent(trans, root, bytenr, blocksize, + parent, root_objectid, level, 0); +} + static u64 stripe_align(struct btrfs_root *root, u64 val) { u64 mask = ((u64)root->stripesize - 1); @@ -4212,7 +4235,7 @@ search: u64 offset; int cached; - atomic_inc(&block_group->count); + btrfs_get_block_group(block_group); search_start = block_group->key.objectid; have_block_group: @@ -4300,7 +4323,7 @@ have_block_group: btrfs_put_block_group(block_group); block_group = last_ptr->block_group; - atomic_inc(&block_group->count); + btrfs_get_block_group(block_group); spin_unlock(&last_ptr->lock); spin_unlock(&last_ptr->refill_lock); @@ -4578,7 +4601,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, { int ret; u64 search_start = 0; - struct btrfs_fs_info *info = root->fs_info; data = btrfs_get_alloc_profile(root, data); again: @@ -4586,17 +4608,9 @@ again: * the only place that sets empty_size is btrfs_realloc_node, which * is not called recursively on allocations */ - if (empty_size || root->ref_cows) { - if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { - ret = do_chunk_alloc(trans, root->fs_info->extent_root, - 2 * 1024 * 1024, - BTRFS_BLOCK_GROUP_METADATA | - (info->metadata_alloc_profile & - info->avail_metadata_alloc_bits), 0); - } + if (empty_size || root->ref_cows) ret = do_chunk_alloc(trans, root->fs_info->extent_root, num_bytes + 2 * 1024 * 1024, data, 0); - } WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, @@ -4897,6 +4911,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, extent_op); BUG_ON(ret); } + + if (root_objectid == root->root_key.objectid) { + u64 used; + spin_lock(&root->node_lock); + used = btrfs_root_used(&root->root_item) + num_bytes; + btrfs_set_root_used(&root->root_item, used); + spin_unlock(&root->node_lock); + } return ret; } @@ -4919,8 +4941,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, btrfs_set_buffer_uptodate(buf); if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { - set_extent_dirty(&root->dirty_log_pages, buf->start, - buf->start + buf->len - 1, GFP_NOFS); + /* + * we allow two log transactions at a time, use different + * EXENT bit to differentiate dirty pages. + */ + if (root->log_transid % 2 == 0) + set_extent_dirty(&root->dirty_log_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); + else + set_extent_new(&root->dirty_log_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); } else { set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); @@ -5372,10 +5402,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, int ret; while (level >= 0) { - if (path->slots[level] >= - btrfs_header_nritems(path->nodes[level])) - break; - ret = walk_down_proc(trans, root, path, wc, lookup_info); if (ret > 0) break; @@ -5383,6 +5409,10 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, if (level == 0) break; + if (path->slots[level] >= + btrfs_header_nritems(path->nodes[level])) + break; + ret = do_walk_down(trans, root, path, wc, &lookup_info); if (ret > 0) { path->slots[level]++; @@ -7373,9 +7403,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) wait_block_group_cache_done(block_group); btrfs_remove_free_space_cache(block_group); - - WARN_ON(atomic_read(&block_group->count) != 1); - kfree(block_group); + btrfs_put_block_group(block_group); spin_lock(&info->block_group_cache_lock); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 96577e8bf9fd..b177ed319612 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3165,10 +3165,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, spin_unlock(&tree->buffer_lock); goto free_eb; } - spin_unlock(&tree->buffer_lock); - /* add one reference for the tree */ atomic_inc(&eb->refs); + spin_unlock(&tree->buffer_lock); return eb; free_eb: diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 06550affbd27..a7fd9f3a750a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -179,18 +179,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, } flags = em->flags; if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { - if (em->start <= start && - (!testend || em->start + em->len >= start + len)) { + if (testend && em->start + em->len >= start + len) { free_extent_map(em); write_unlock(&em_tree->lock); break; } - if (start < em->start) { - len = em->start - start; - } else { + start = em->start + em->len; + if (testend) len = start + len - (em->start + em->len); - start = em->start + em->len; - } free_extent_map(em); write_unlock(&em_tree->lock); continue; @@ -265,324 +261,253 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, * If an extent intersects the range but is not entirely inside the range * it is either truncated or split. Anything entirely inside the range * is deleted from the tree. - * - * inline_limit is used to tell this code which offsets in the file to keep - * if they contain inline extents. */ -noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 locked_end, - u64 inline_limit, u64 *hint_byte, int drop_cache) +int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, + u64 start, u64 end, u64 *hint_byte, int drop_cache) { - u64 extent_end = 0; - u64 search_start = start; - u64 ram_bytes = 0; - u64 disk_bytenr = 0; - u64 orig_locked_end = locked_end; - u8 compression; - u8 encryption; - u16 other_encoding = 0; + struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_buffer *leaf; - struct btrfs_file_extent_item *extent; + struct btrfs_file_extent_item *fi; struct btrfs_path *path; struct btrfs_key key; - struct btrfs_file_extent_item old; - int keep; - int slot; - int bookend; - int found_type = 0; - int found_extent; - int found_inline; + struct btrfs_key new_key; + u64 search_start = start; + u64 disk_bytenr = 0; + u64 num_bytes = 0; + u64 extent_offset = 0; + u64 extent_end = 0; + int del_nr = 0; + int del_slot = 0; + int extent_type; int recow; int ret; - inline_limit = 0; if (drop_cache) btrfs_drop_extent_cache(inode, start, end - 1, 0); path = btrfs_alloc_path(); if (!path) return -ENOMEM; + while (1) { recow = 0; - btrfs_release_path(root, path); ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, search_start, -1); if (ret < 0) - goto out; - if (ret > 0) { - if (path->slots[0] == 0) { - ret = 0; - goto out; - } - path->slots[0]--; + break; + if (ret > 0 && path->slots[0] > 0 && search_start == start) { + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); + if (key.objectid == inode->i_ino && + key.type == BTRFS_EXTENT_DATA_KEY) + path->slots[0]--; } + ret = 0; next_slot: - keep = 0; - bookend = 0; - found_extent = 0; - found_inline = 0; - compression = 0; - encryption = 0; - extent = NULL; leaf = path->nodes[0]; - slot = path->slots[0]; - ret = 0; - btrfs_item_key_to_cpu(leaf, &key, slot); - if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY && - key.offset >= end) { - goto out; - } - if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || - key.objectid != inode->i_ino) { - goto out; - } - if (recow) { - search_start = max(key.offset, start); - continue; - } - if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(leaf, extent); - compression = btrfs_file_extent_compression(leaf, - extent); - encryption = btrfs_file_extent_encryption(leaf, - extent); - other_encoding = btrfs_file_extent_other_encoding(leaf, - extent); - if (found_type == BTRFS_FILE_EXTENT_REG || - found_type == BTRFS_FILE_EXTENT_PREALLOC) { - extent_end = - btrfs_file_extent_disk_bytenr(leaf, - extent); - if (extent_end) - *hint_byte = extent_end; - - extent_end = key.offset + - btrfs_file_extent_num_bytes(leaf, extent); - ram_bytes = btrfs_file_extent_ram_bytes(leaf, - extent); - found_extent = 1; - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - found_inline = 1; - extent_end = key.offset + - btrfs_file_extent_inline_len(leaf, extent); + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + BUG_ON(del_nr > 0); + ret = btrfs_next_leaf(root, path); + if (ret < 0) + break; + if (ret > 0) { + ret = 0; + break; } + leaf = path->nodes[0]; + recow = 1; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid > inode->i_ino || + key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) + break; + + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + extent_type = btrfs_file_extent_type(leaf, fi); + + if (extent_type == BTRFS_FILE_EXTENT_REG || + extent_type == BTRFS_FILE_EXTENT_PREALLOC) { + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); + extent_offset = btrfs_file_extent_offset(leaf, fi); + extent_end = key.offset + + btrfs_file_extent_num_bytes(leaf, fi); + } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + extent_end = key.offset + + btrfs_file_extent_inline_len(leaf, fi); } else { + WARN_ON(1); extent_end = search_start; } - /* we found nothing we can drop */ - if ((!found_extent && !found_inline) || - search_start >= extent_end) { - int nextret; - u32 nritems; - nritems = btrfs_header_nritems(leaf); - if (slot >= nritems - 1) { - nextret = btrfs_next_leaf(root, path); - if (nextret) - goto out; - recow = 1; - } else { - path->slots[0]++; - } + if (extent_end <= search_start) { + path->slots[0]++; goto next_slot; } - if (end <= extent_end && start >= key.offset && found_inline) - *hint_byte = EXTENT_MAP_INLINE; - - if (found_extent) { - read_extent_buffer(leaf, &old, (unsigned long)extent, - sizeof(old)); - } - - if (end < extent_end && end >= key.offset) { - bookend = 1; - if (found_inline && start <= key.offset) - keep = 1; + search_start = max(key.offset, start); + if (recow) { + btrfs_release_path(root, path); + continue; } - if (bookend && found_extent) { - if (locked_end < extent_end) { - ret = try_lock_extent(&BTRFS_I(inode)->io_tree, - locked_end, extent_end - 1, - GFP_NOFS); - if (!ret) { - btrfs_release_path(root, path); - lock_extent(&BTRFS_I(inode)->io_tree, - locked_end, extent_end - 1, - GFP_NOFS); - locked_end = extent_end; - continue; - } - locked_end = extent_end; + /* + * | - range to drop - | + * | -------- extent -------- | + */ + if (start > key.offset && end < extent_end) { + BUG_ON(del_nr > 0); + BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); + + memcpy(&new_key, &key, sizeof(new_key)); + new_key.offset = start; + ret = btrfs_duplicate_item(trans, root, path, + &new_key); + if (ret == -EAGAIN) { + btrfs_release_path(root, path); + continue; } - disk_bytenr = le64_to_cpu(old.disk_bytenr); - if (disk_bytenr != 0) { + if (ret < 0) + break; + + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0] - 1, + struct btrfs_file_extent_item); + btrfs_set_file_extent_num_bytes(leaf, fi, + start - key.offset); + + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + extent_offset += start - key.offset; + btrfs_set_file_extent_offset(leaf, fi, extent_offset); + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_end - start); + btrfs_mark_buffer_dirty(leaf); + + if (disk_bytenr > 0) { ret = btrfs_inc_extent_ref(trans, root, - disk_bytenr, - le64_to_cpu(old.disk_num_bytes), 0, - root->root_key.objectid, - key.objectid, key.offset - - le64_to_cpu(old.offset)); + disk_bytenr, num_bytes, 0, + root->root_key.objectid, + new_key.objectid, + start - extent_offset); BUG_ON(ret); + *hint_byte = disk_bytenr; } + key.offset = start; } + /* + * | ---- range to drop ----- | + * | -------- extent -------- | + */ + if (start <= key.offset && end < extent_end) { + BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); - if (found_inline) { - u64 mask = root->sectorsize - 1; - search_start = (extent_end + mask) & ~mask; - } else - search_start = extent_end; - - /* truncate existing extent */ - if (start > key.offset) { - u64 new_num; - u64 old_num; - keep = 1; - WARN_ON(start & (root->sectorsize - 1)); - if (found_extent) { - new_num = start - key.offset; - old_num = btrfs_file_extent_num_bytes(leaf, - extent); - *hint_byte = - btrfs_file_extent_disk_bytenr(leaf, - extent); - if (btrfs_file_extent_disk_bytenr(leaf, - extent)) { - inode_sub_bytes(inode, old_num - - new_num); - } - btrfs_set_file_extent_num_bytes(leaf, - extent, new_num); - btrfs_mark_buffer_dirty(leaf); - } else if (key.offset < inline_limit && - (end > extent_end) && - (inline_limit < extent_end)) { - u32 new_size; - new_size = btrfs_file_extent_calc_inline_size( - inline_limit - key.offset); - inode_sub_bytes(inode, extent_end - - inline_limit); - btrfs_set_file_extent_ram_bytes(leaf, extent, - new_size); - if (!compression && !encryption) { - btrfs_truncate_item(trans, root, path, - new_size, 1); - } + memcpy(&new_key, &key, sizeof(new_key)); + new_key.offset = end; + btrfs_set_item_key_safe(trans, root, path, &new_key); + + extent_offset += end - key.offset; + btrfs_set_file_extent_offset(leaf, fi, extent_offset); + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_end - end); + btrfs_mark_buffer_dirty(leaf); + if (disk_bytenr > 0) { + inode_sub_bytes(inode, end - key.offset); + *hint_byte = disk_bytenr; } + break; } - /* delete the entire extent */ - if (!keep) { - if (found_inline) - inode_sub_bytes(inode, extent_end - - key.offset); - ret = btrfs_del_item(trans, root, path); - /* TODO update progress marker and return */ - BUG_ON(ret); - extent = NULL; - btrfs_release_path(root, path); - /* the extent will be freed later */ - } - if (bookend && found_inline && start <= key.offset) { - u32 new_size; - new_size = btrfs_file_extent_calc_inline_size( - extent_end - end); - inode_sub_bytes(inode, end - key.offset); - btrfs_set_file_extent_ram_bytes(leaf, extent, - new_size); - if (!compression && !encryption) - ret = btrfs_truncate_item(trans, root, path, - new_size, 0); - BUG_ON(ret); - } - /* create bookend, splitting the extent in two */ - if (bookend && found_extent) { - struct btrfs_key ins; - ins.objectid = inode->i_ino; - ins.offset = end; - btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); - btrfs_release_path(root, path); - path->leave_spinning = 1; - ret = btrfs_insert_empty_item(trans, root, path, &ins, - sizeof(*extent)); - BUG_ON(ret); + search_start = extent_end; + /* + * | ---- range to drop ----- | + * | -------- extent -------- | + */ + if (start > key.offset && end >= extent_end) { + BUG_ON(del_nr > 0); + BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); - leaf = path->nodes[0]; - extent = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - write_extent_buffer(leaf, &old, - (unsigned long)extent, sizeof(old)); - - btrfs_set_file_extent_compression(leaf, extent, - compression); - btrfs_set_file_extent_encryption(leaf, extent, - encryption); - btrfs_set_file_extent_other_encoding(leaf, extent, - other_encoding); - btrfs_set_file_extent_offset(leaf, extent, - le64_to_cpu(old.offset) + end - key.offset); - WARN_ON(le64_to_cpu(old.num_bytes) < - (extent_end - end)); - btrfs_set_file_extent_num_bytes(leaf, extent, - extent_end - end); + btrfs_set_file_extent_num_bytes(leaf, fi, + start - key.offset); + btrfs_mark_buffer_dirty(leaf); + if (disk_bytenr > 0) { + inode_sub_bytes(inode, extent_end - start); + *hint_byte = disk_bytenr; + } + if (end == extent_end) + break; - /* - * set the ram bytes to the size of the full extent - * before splitting. This is a worst case flag, - * but its the best we can do because we don't know - * how splitting affects compression - */ - btrfs_set_file_extent_ram_bytes(leaf, extent, - ram_bytes); - btrfs_set_file_extent_type(leaf, extent, found_type); - - btrfs_unlock_up_safe(path, 1); - btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_set_lock_blocking(path->nodes[0]); - - path->leave_spinning = 0; - btrfs_release_path(root, path); - if (disk_bytenr != 0) - inode_add_bytes(inode, extent_end - end); + path->slots[0]++; + goto next_slot; } - if (found_extent && !keep) { - u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr); + /* + * | ---- range to drop ----- | + * | ------ extent ------ | + */ + if (start <= key.offset && end >= extent_end) { + if (del_nr == 0) { + del_slot = path->slots[0]; + del_nr = 1; + } else { + BUG_ON(del_slot + del_nr != path->slots[0]); + del_nr++; + } - if (old_disk_bytenr != 0) { + if (extent_type == BTRFS_FILE_EXTENT_INLINE) { inode_sub_bytes(inode, - le64_to_cpu(old.num_bytes)); + extent_end - key.offset); + extent_end = ALIGN(extent_end, + root->sectorsize); + } else if (disk_bytenr > 0) { ret = btrfs_free_extent(trans, root, - old_disk_bytenr, - le64_to_cpu(old.disk_num_bytes), - 0, root->root_key.objectid, + disk_bytenr, num_bytes, 0, + root->root_key.objectid, key.objectid, key.offset - - le64_to_cpu(old.offset)); + extent_offset); BUG_ON(ret); - *hint_byte = old_disk_bytenr; + inode_sub_bytes(inode, + extent_end - key.offset); + *hint_byte = disk_bytenr; } - } - if (search_start >= end) { - ret = 0; - goto out; + if (end == extent_end) + break; + + if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) { + path->slots[0]++; + goto next_slot; + } + + ret = btrfs_del_items(trans, root, path, del_slot, + del_nr); + BUG_ON(ret); + + del_nr = 0; + del_slot = 0; + + btrfs_release_path(root, path); + continue; } + + BUG_ON(1); } -out: - btrfs_free_path(path); - if (locked_end > orig_locked_end) { - unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, - locked_end - 1, GFP_NOFS); + + if (del_nr > 0) { + ret = btrfs_del_items(trans, root, path, del_slot, del_nr); + BUG_ON(ret); } + + btrfs_free_path(path); return ret; } static int extent_mergeable(struct extent_buffer *leaf, int slot, - u64 objectid, u64 bytenr, u64 *start, u64 *end) + u64 objectid, u64 bytenr, u64 orig_offset, + u64 *start, u64 *end) { struct btrfs_file_extent_item *fi; struct btrfs_key key; @@ -598,6 +523,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot, fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || + btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset || btrfs_file_extent_compression(leaf, fi) || btrfs_file_extent_encryption(leaf, fi) || btrfs_file_extent_other_encoding(leaf, fi)) @@ -620,23 +546,24 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot, * two or three. */ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, u64 start, u64 end) { + struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_buffer *leaf; struct btrfs_path *path; struct btrfs_file_extent_item *fi; struct btrfs_key key; + struct btrfs_key new_key; u64 bytenr; u64 num_bytes; u64 extent_end; u64 orig_offset; u64 other_start; u64 other_end; - u64 split = start; - u64 locked_end = end; - int extent_type; - int split_end = 1; + u64 split; + int del_nr = 0; + int del_slot = 0; + int recow; int ret; btrfs_drop_extent_cache(inode, start, end - 1, 0); @@ -644,12 +571,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); again: + recow = 0; + split = start; key.objectid = inode->i_ino; key.type = BTRFS_EXTENT_DATA_KEY; - if (split == start) - key.offset = split; - else - key.offset = split - 1; + key.offset = split; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0 && path->slots[0] > 0) @@ -661,159 +587,158 @@ again: key.type != BTRFS_EXTENT_DATA_KEY); fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - extent_type = btrfs_file_extent_type(leaf, fi); - BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC); + BUG_ON(btrfs_file_extent_type(leaf, fi) != + BTRFS_FILE_EXTENT_PREALLOC); extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); BUG_ON(key.offset > start || extent_end < end); bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); + memcpy(&new_key, &key, sizeof(new_key)); - if (key.offset == start) - split = end; - - if (key.offset == start && extent_end == end) { - int del_nr = 0; - int del_slot = 0; - other_start = end; - other_end = 0; - if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, - bytenr, &other_start, &other_end)) { - extent_end = other_end; - del_slot = path->slots[0] + 1; - del_nr++; - ret = btrfs_free_extent(trans, root, bytenr, num_bytes, - 0, root->root_key.objectid, - inode->i_ino, orig_offset); - BUG_ON(ret); - } + if (start == key.offset && end < extent_end) { other_start = 0; other_end = start; - if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino, - bytenr, &other_start, &other_end)) { - key.offset = other_start; - del_slot = path->slots[0]; - del_nr++; - ret = btrfs_free_extent(trans, root, bytenr, num_bytes, - 0, root->root_key.objectid, - inode->i_ino, orig_offset); - BUG_ON(ret); - } - split_end = 0; - if (del_nr == 0) { - btrfs_set_file_extent_type(leaf, fi, - BTRFS_FILE_EXTENT_REG); - goto done; - } - - fi = btrfs_item_ptr(leaf, del_slot - 1, - struct btrfs_file_extent_item); - btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_num_bytes(leaf, fi, - extent_end - key.offset); - btrfs_mark_buffer_dirty(leaf); - - ret = btrfs_del_items(trans, root, path, del_slot, del_nr); - BUG_ON(ret); - goto release; - } else if (split == start) { - if (locked_end < extent_end) { - ret = try_lock_extent(&BTRFS_I(inode)->io_tree, - locked_end, extent_end - 1, GFP_NOFS); - if (!ret) { - btrfs_release_path(root, path); - lock_extent(&BTRFS_I(inode)->io_tree, - locked_end, extent_end - 1, GFP_NOFS); - locked_end = extent_end; - goto again; - } - locked_end = extent_end; + if (extent_mergeable(leaf, path->slots[0] - 1, + inode->i_ino, bytenr, orig_offset, + &other_start, &other_end)) { + new_key.offset = end; + btrfs_set_item_key_safe(trans, root, path, &new_key); + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_end - end); + btrfs_set_file_extent_offset(leaf, fi, + end - orig_offset); + fi = btrfs_item_ptr(leaf, path->slots[0] - 1, + struct btrfs_file_extent_item); + btrfs_set_file_extent_num_bytes(leaf, fi, + end - other_start); + btrfs_mark_buffer_dirty(leaf); + goto out; } - btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset); - } else { - BUG_ON(key.offset != start); - key.offset = split; - btrfs_set_file_extent_offset(leaf, fi, key.offset - - orig_offset); - btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split); - btrfs_set_item_key_safe(trans, root, path, &key); - extent_end = split; } - if (extent_end == end) { - split_end = 0; - extent_type = BTRFS_FILE_EXTENT_REG; - } - if (extent_end == end && split == start) { + if (start > key.offset && end == extent_end) { other_start = end; other_end = 0; - if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, - bytenr, &other_start, &other_end)) { - path->slots[0]++; + if (extent_mergeable(leaf, path->slots[0] + 1, + inode->i_ino, bytenr, orig_offset, + &other_start, &other_end)) { fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - key.offset = split; - btrfs_set_item_key_safe(trans, root, path, &key); - btrfs_set_file_extent_offset(leaf, fi, key.offset - - orig_offset); btrfs_set_file_extent_num_bytes(leaf, fi, - other_end - split); - goto done; - } - } - if (extent_end == end && split == end) { - other_start = 0; - other_end = start; - if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino, - bytenr, &other_start, &other_end)) { - path->slots[0]--; + start - key.offset); + path->slots[0]++; + new_key.offset = start; + btrfs_set_item_key_safe(trans, root, path, &new_key); + fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - - other_start); - goto done; + btrfs_set_file_extent_num_bytes(leaf, fi, + other_end - start); + btrfs_set_file_extent_offset(leaf, fi, + start - orig_offset); + btrfs_mark_buffer_dirty(leaf); + goto out; } } - btrfs_mark_buffer_dirty(leaf); + while (start > key.offset || end < extent_end) { + if (key.offset == start) + split = end; - ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, - root->root_key.objectid, - inode->i_ino, orig_offset); - BUG_ON(ret); - btrfs_release_path(root, path); + new_key.offset = split; + ret = btrfs_duplicate_item(trans, root, path, &new_key); + if (ret == -EAGAIN) { + btrfs_release_path(root, path); + goto again; + } + BUG_ON(ret < 0); - key.offset = start; - ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi)); - BUG_ON(ret); + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0] - 1, + struct btrfs_file_extent_item); + btrfs_set_file_extent_num_bytes(leaf, fi, + split - key.offset); - leaf = path->nodes[0]; - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(leaf, fi, trans->transid); - btrfs_set_file_extent_type(leaf, fi, extent_type); - btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr); - btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); - btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset); - btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); - btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); - btrfs_set_file_extent_compression(leaf, fi, 0); - btrfs_set_file_extent_encryption(leaf, fi, 0); - btrfs_set_file_extent_other_encoding(leaf, fi, 0); -done: - btrfs_mark_buffer_dirty(leaf); - -release: - btrfs_release_path(root, path); - if (split_end && split == start) { - split = end; - goto again; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_end - split); + btrfs_mark_buffer_dirty(leaf); + + ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, + root->root_key.objectid, + inode->i_ino, orig_offset); + BUG_ON(ret); + + if (split == start) { + key.offset = start; + } else { + BUG_ON(start != key.offset); + path->slots[0]--; + extent_end = end; + } + recow = 1; + } + + other_start = end; + other_end = 0; + if (extent_mergeable(leaf, path->slots[0] + 1, + inode->i_ino, bytenr, orig_offset, + &other_start, &other_end)) { + if (recow) { + btrfs_release_path(root, path); + goto again; + } + extent_end = other_end; + del_slot = path->slots[0] + 1; + del_nr++; + ret = btrfs_free_extent(trans, root, bytenr, num_bytes, + 0, root->root_key.objectid, + inode->i_ino, orig_offset); + BUG_ON(ret); + } + other_start = 0; + other_end = start; + if (extent_mergeable(leaf, path->slots[0] - 1, + inode->i_ino, bytenr, orig_offset, + &other_start, &other_end)) { + if (recow) { + btrfs_release_path(root, path); + goto again; + } + key.offset = other_start; + del_slot = path->slots[0]; + del_nr++; + ret = btrfs_free_extent(trans, root, bytenr, num_bytes, + 0, root->root_key.objectid, + inode->i_ino, orig_offset); + BUG_ON(ret); } - if (locked_end > end) { - unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, - GFP_NOFS); + if (del_nr == 0) { + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_type(leaf, fi, + BTRFS_FILE_EXTENT_REG); + btrfs_mark_buffer_dirty(leaf); + } else { + fi = btrfs_item_ptr(leaf, del_slot - 1, + struct btrfs_file_extent_item); + btrfs_set_file_extent_type(leaf, fi, + BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_end - key.offset); + btrfs_mark_buffer_dirty(leaf); + + ret = btrfs_del_items(trans, root, path, del_slot, del_nr); + BUG_ON(ret); } +out: btrfs_free_path(path); return 0; } @@ -1210,7 +1135,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } mutex_lock(&dentry->d_inode->i_mutex); out: - return ret > 0 ? EIO : ret; + return ret > 0 ? -EIO : ret; } static const struct vm_operations_struct btrfs_file_vm_ops = { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b3ad168a0bfc..e03a836d50d0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode, u64 start, u64 end, int *page_started, unsigned long *nr_written, int unlock); -static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) +static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) { int err; - err = btrfs_init_acl(inode, dir); + err = btrfs_init_acl(trans, inode, dir); if (!err) - err = btrfs_xattr_security_init(inode, dir); + err = btrfs_xattr_security_init(trans, inode, dir); return err; } @@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); + /* + * we're an inline extent, so nobody can + * extend the file past i_size without locking + * a page we already have locked. + * + * We must do any isize and inode updates + * before we unlock the pages. Otherwise we + * could end up racing with unlink. + */ BTRFS_I(inode)->disk_i_size = inode->i_size; btrfs_update_inode(trans, root, inode); + return 0; fail: btrfs_free_path(path); @@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, return 1; } - ret = btrfs_drop_extents(trans, root, inode, start, - aligned_end, aligned_end, start, + ret = btrfs_drop_extents(trans, inode, start, aligned_end, &hint_byte, 1); BUG_ON(ret); @@ -416,7 +426,6 @@ again: start, end, total_compressed, pages); } - btrfs_end_transaction(trans, root); if (ret == 0) { /* * inline extent creation worked, we don't need @@ -430,9 +439,11 @@ again: EXTENT_CLEAR_DELALLOC | EXTENT_CLEAR_ACCOUNTING | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); - ret = 0; + + btrfs_end_transaction(trans, root); goto free_pages_out; } + btrfs_end_transaction(trans, root); } if (will_compress) { @@ -543,7 +554,6 @@ static noinline int submit_compressed_extents(struct inode *inode, if (list_empty(&async_cow->extents)) return 0; - trans = btrfs_join_transaction(root, 1); while (!list_empty(&async_cow->extents)) { async_extent = list_entry(async_cow->extents.next, @@ -590,19 +600,15 @@ retry: lock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1, GFP_NOFS); - /* - * here we're doing allocation and writeback of the - * compressed pages - */ - btrfs_drop_extent_cache(inode, async_extent->start, - async_extent->start + - async_extent->ram_size - 1, 0); + trans = btrfs_join_transaction(root, 1); ret = btrfs_reserve_extent(trans, root, async_extent->compressed_size, async_extent->compressed_size, 0, alloc_hint, (u64)-1, &ins, 1); + btrfs_end_transaction(trans, root); + if (ret) { int i; for (i = 0; i < async_extent->nr_pages; i++) { @@ -618,6 +624,14 @@ retry: goto retry; } + /* + * here we're doing allocation and writeback of the + * compressed pages + */ + btrfs_drop_extent_cache(inode, async_extent->start, + async_extent->start + + async_extent->ram_size - 1, 0); + em = alloc_extent_map(GFP_NOFS); em->start = async_extent->start; em->len = async_extent->ram_size; @@ -649,8 +663,6 @@ retry: BTRFS_ORDERED_COMPRESSED); BUG_ON(ret); - btrfs_end_transaction(trans, root); - /* * clear dirty, set writeback and unlock the pages. */ @@ -672,13 +684,11 @@ retry: async_extent->nr_pages); BUG_ON(ret); - trans = btrfs_join_transaction(root, 1); alloc_hint = ins.objectid + ins.offset; kfree(async_extent); cond_resched(); } - btrfs_end_transaction(trans, root); return 0; } @@ -742,6 +752,7 @@ static noinline int cow_file_range(struct inode *inode, EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); + *nr_written = *nr_written + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; *page_started = 1; @@ -1596,7 +1607,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_pos, u64 disk_bytenr, u64 disk_num_bytes, u64 num_bytes, u64 ram_bytes, - u64 locked_end, u8 compression, u8 encryption, u16 other_encoding, int extent_type) { @@ -1622,9 +1632,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, * the caller is expected to unpin it and allow it to be merged * with the others. */ - ret = btrfs_drop_extents(trans, root, inode, file_pos, - file_pos + num_bytes, locked_end, - file_pos, &hint, 0); + ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, + &hint, 0); BUG_ON(ret); ins.objectid = inode->i_ino; @@ -1671,24 +1680,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, * before we start the transaction. It limits the amount of btree * reads required while inside the transaction. */ -static noinline void reada_csum(struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_ordered_extent *ordered_extent) -{ - struct btrfs_ordered_sum *sum; - u64 bytenr; - - sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum, - list); - bytenr = sum->sums[0].bytenr; - - /* - * we don't care about the results, the point of this search is - * just to get the btree leaves into ram - */ - btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0); -} - /* as ordered data IO finishes, this gets called so we can finish * an ordered extent if the range of bytes in the file it covers are * fully written. @@ -1699,7 +1690,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct btrfs_path *path; int compressed = 0; int ret; @@ -1707,46 +1697,32 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) if (!ret) return 0; - /* - * before we join the transaction, try to do some of our IO. - * This will limit the amount of IO that we have to do with - * the transaction running. We're unlikely to need to do any - * IO if the file extents are new, the disk_i_size checks - * covers the most common case. - */ - if (start < BTRFS_I(inode)->disk_i_size) { - path = btrfs_alloc_path(); - if (path) { - ret = btrfs_lookup_file_extent(NULL, root, path, - inode->i_ino, - start, 0); - ordered_extent = btrfs_lookup_ordered_extent(inode, - start); - if (!list_empty(&ordered_extent->list)) { - btrfs_release_path(root, path); - reada_csum(root, path, ordered_extent); - } - btrfs_free_path(path); + ordered_extent = btrfs_lookup_ordered_extent(inode, start); + BUG_ON(!ordered_extent); + + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { + BUG_ON(!list_empty(&ordered_extent->list)); + ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); + if (!ret) { + trans = btrfs_join_transaction(root, 1); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + btrfs_end_transaction(trans, root); } + goto out; } - trans = btrfs_join_transaction(root, 1); - - if (!ordered_extent) - ordered_extent = btrfs_lookup_ordered_extent(inode, start); - BUG_ON(!ordered_extent); - if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) - goto nocow; - lock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); + trans = btrfs_join_transaction(root, 1); + if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) compressed = 1; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { BUG_ON(compressed); - ret = btrfs_mark_extent_written(trans, root, inode, + ret = btrfs_mark_extent_written(trans, inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len); @@ -1758,8 +1734,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->disk_len, ordered_extent->len, ordered_extent->len, - ordered_extent->file_offset + - ordered_extent->len, compressed, 0, 0, BTRFS_FILE_EXTENT_REG); unpin_extent_cache(&BTRFS_I(inode)->extent_tree, @@ -1770,22 +1744,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); -nocow: add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); - mutex_lock(&BTRFS_I(inode)->extent_mutex); - btrfs_ordered_update_i_size(inode, ordered_extent); - btrfs_update_inode(trans, root, inode); - btrfs_remove_ordered_extent(inode, ordered_extent); - mutex_unlock(&BTRFS_I(inode)->extent_mutex); - + /* this also removes the ordered extent from the tree */ + btrfs_ordered_update_i_size(inode, 0, ordered_extent); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + btrfs_end_transaction(trans, root); +out: /* once for us */ btrfs_put_ordered_extent(ordered_extent); /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - btrfs_end_transaction(trans, root); return 0; } @@ -2008,6 +1980,54 @@ zeroit: return -EIO; } +struct delayed_iput { + struct list_head list; + struct inode *inode; +}; + +void btrfs_add_delayed_iput(struct inode *inode) +{ + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct delayed_iput *delayed; + + if (atomic_add_unless(&inode->i_count, -1, 1)) + return; + + delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL); + delayed->inode = inode; + + spin_lock(&fs_info->delayed_iput_lock); + list_add_tail(&delayed->list, &fs_info->delayed_iputs); + spin_unlock(&fs_info->delayed_iput_lock); +} + +void btrfs_run_delayed_iputs(struct btrfs_root *root) +{ + LIST_HEAD(list); + struct btrfs_fs_info *fs_info = root->fs_info; + struct delayed_iput *delayed; + int empty; + + spin_lock(&fs_info->delayed_iput_lock); + empty = list_empty(&fs_info->delayed_iputs); + spin_unlock(&fs_info->delayed_iput_lock); + if (empty) + return; + + down_read(&root->fs_info->cleanup_work_sem); + spin_lock(&fs_info->delayed_iput_lock); + list_splice_init(&fs_info->delayed_iputs, &list); + spin_unlock(&fs_info->delayed_iput_lock); + + while (!list_empty(&list)) { + delayed = list_entry(list.next, struct delayed_iput, list); + list_del(&delayed->list); + iput(delayed->inode); + kfree(delayed); + } + up_read(&root->fs_info->cleanup_work_sem); +} + /* * This creates an orphan entry for the given inode in case something goes * wrong in the middle of an unlink/truncate. @@ -2080,16 +2100,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) struct inode *inode; int ret = 0, nr_unlink = 0, nr_truncate = 0; - path = btrfs_alloc_path(); - if (!path) + if (!xchg(&root->clean_orphans, 0)) return; + + path = btrfs_alloc_path(); + BUG_ON(!path); path->reada = -1; key.objectid = BTRFS_ORPHAN_OBJECTID; btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); key.offset = (u64)-1; - while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { @@ -2834,37 +2855,40 @@ out: * min_type is the minimum key type to truncate down to. If set to 0, this * will kill all the items on this inode, including the INODE_ITEM_KEY. */ -noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 new_size, u32 min_type) +int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 new_size, u32 min_type) { - int ret; struct btrfs_path *path; - struct btrfs_key key; - struct btrfs_key found_key; - u32 found_type = (u8)-1; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; + struct btrfs_key key; + struct btrfs_key found_key; u64 extent_start = 0; u64 extent_num_bytes = 0; u64 extent_offset = 0; u64 item_end = 0; + u64 mask = root->sectorsize - 1; + u32 found_type = (u8)-1; int found_extent; int del_item; int pending_del_nr = 0; int pending_del_slot = 0; int extent_type = -1; int encoding; - u64 mask = root->sectorsize - 1; + int ret; + int err = 0; + + BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); if (root->ref_cows) btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); + path = btrfs_alloc_path(); BUG_ON(!path); path->reada = -1; - /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; key.type = (u8)-1; @@ -2872,17 +2896,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, search_again: path->leave_spinning = 1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto error; + if (ret < 0) { + err = ret; + goto out; + } if (ret > 0) { /* there are no items in the tree for us to truncate, we're * done */ - if (path->slots[0] == 0) { - ret = 0; - goto error; - } + if (path->slots[0] == 0) + goto out; path->slots[0]--; } @@ -2917,28 +2941,17 @@ search_again: } item_end--; } - if (item_end < new_size) { - if (found_type == BTRFS_DIR_ITEM_KEY) - found_type = BTRFS_INODE_ITEM_KEY; - else if (found_type == BTRFS_EXTENT_ITEM_KEY) - found_type = BTRFS_EXTENT_DATA_KEY; - else if (found_type == BTRFS_EXTENT_DATA_KEY) - found_type = BTRFS_XATTR_ITEM_KEY; - else if (found_type == BTRFS_XATTR_ITEM_KEY) - found_type = BTRFS_INODE_REF_KEY; - else if (found_type) - found_type--; - else + if (found_type > min_type) { + del_item = 1; + } else { + if (item_end < new_size) break; - btrfs_set_key_type(&key, found_type); - goto next; + if (found_key.offset >= new_size) + del_item = 1; + else + del_item = 0; } - if (found_key.offset >= new_size) - del_item = 1; - else - del_item = 0; found_extent = 0; - /* FIXME, shrink the extent if the ref count is only 1 */ if (found_type != BTRFS_EXTENT_DATA_KEY) goto delete; @@ -3025,42 +3038,36 @@ delete: inode->i_ino, extent_offset); BUG_ON(ret); } -next: - if (path->slots[0] == 0) { - if (pending_del_nr) - goto del_pending; - btrfs_release_path(root, path); - if (found_type == BTRFS_INODE_ITEM_KEY) - break; - goto search_again; - } - path->slots[0]--; - if (pending_del_nr && - path->slots[0] + 1 != pending_del_slot) { - struct btrfs_key debug; -del_pending: - btrfs_item_key_to_cpu(path->nodes[0], &debug, - pending_del_slot); - ret = btrfs_del_items(trans, root, path, - pending_del_slot, - pending_del_nr); - BUG_ON(ret); - pending_del_nr = 0; + if (found_type == BTRFS_INODE_ITEM_KEY) + break; + + if (path->slots[0] == 0 || + path->slots[0] != pending_del_slot) { + if (root->ref_cows) { + err = -EAGAIN; + goto out; + } + if (pending_del_nr) { + ret = btrfs_del_items(trans, root, path, + pending_del_slot, + pending_del_nr); + BUG_ON(ret); + pending_del_nr = 0; + } btrfs_release_path(root, path); - if (found_type == BTRFS_INODE_ITEM_KEY) - break; goto search_again; + } else { + path->slots[0]--; } } - ret = 0; -error: +out: if (pending_del_nr) { ret = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); } btrfs_free_path(path); - return ret; + return err; } /* @@ -3180,10 +3187,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) if (size <= hole_start) return 0; - err = btrfs_truncate_page(inode->i_mapping, inode->i_size); - if (err) - return err; - while (1) { struct btrfs_ordered_extent *ordered; btrfs_wait_ordered_range(inode, hole_start, @@ -3196,9 +3199,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) btrfs_put_ordered_extent(ordered); } - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - cur_offset = hole_start; while (1) { em = btrfs_get_extent(inode, NULL, 0, cur_offset, @@ -3206,40 +3206,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) BUG_ON(IS_ERR(em) || !em); last_byte = min(extent_map_end(em), block_end); last_byte = (last_byte + mask) & ~mask; - if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { + if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { u64 hint_byte = 0; hole_size = last_byte - cur_offset; - err = btrfs_drop_extents(trans, root, inode, - cur_offset, - cur_offset + hole_size, - block_end, - cur_offset, &hint_byte, 1); - if (err) - break; - err = btrfs_reserve_metadata_space(root, 1); + err = btrfs_reserve_metadata_space(root, 2); if (err) break; + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + err = btrfs_drop_extents(trans, inode, cur_offset, + cur_offset + hole_size, + &hint_byte, 1); + BUG_ON(err); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, cur_offset, 0, 0, hole_size, 0, hole_size, 0, 0, 0); + BUG_ON(err); + btrfs_drop_extent_cache(inode, hole_start, last_byte - 1, 0); - btrfs_unreserve_metadata_space(root, 1); + + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 2); } free_extent_map(em); cur_offset = last_byte; - if (err || cur_offset >= block_end) + if (cur_offset >= block_end) break; } - btrfs_end_transaction(trans, root); unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); return err; } +static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + unsigned long nr; + int ret; + + if (attr->ia_size == inode->i_size) + return 0; + + if (attr->ia_size > inode->i_size) { + unsigned long limit; + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (attr->ia_size > inode->i_sb->s_maxbytes) + return -EFBIG; + if (limit != RLIM_INFINITY && attr->ia_size > limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; + } + } + + ret = btrfs_reserve_metadata_space(root, 1); + if (ret) + return ret; + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_orphan_add(trans, inode); + BUG_ON(ret); + + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 1); + btrfs_btree_balance_dirty(root, nr); + + if (attr->ia_size > inode->i_size) { + ret = btrfs_cont_expand(inode, attr->ia_size); + if (ret) { + btrfs_truncate(inode); + return ret; + } + + i_size_write(inode, attr->ia_size); + btrfs_ordered_update_i_size(inode, inode->i_size, NULL); + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + if (inode->i_nlink > 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root, nr); + return 0; + } + + /* + * We're truncating a file that used to have good data down to + * zero. Make sure it gets into the ordered flush list so that + * any new writes get down to disk quickly. + */ + if (attr->ia_size == 0) + BTRFS_I(inode)->ordered_data_close = 1; + + /* we don't support swapfiles, so vmtruncate shouldn't fail */ + ret = vmtruncate(inode, attr->ia_size); + BUG_ON(ret); + + return 0; +} + static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -3250,23 +3330,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { - if (attr->ia_size > inode->i_size) { - err = btrfs_cont_expand(inode, attr->ia_size); - if (err) - return err; - } else if (inode->i_size > 0 && - attr->ia_size == 0) { - - /* we're truncating a file that used to have good - * data down to zero. Make sure it gets into - * the ordered flush list so that any new writes - * get down to disk quickly. - */ - BTRFS_I(inode)->ordered_data_close = 1; - } + err = btrfs_setattr_size(inode, attr); + if (err) + return err; } + attr->ia_valid &= ~ATTR_SIZE; - err = inode_setattr(inode, attr); + if (attr->ia_valid) + err = inode_setattr(inode, attr); if (!err && ((attr->ia_valid & ATTR_MODE))) err = btrfs_acl_chmod(inode); @@ -3287,36 +3358,43 @@ void btrfs_delete_inode(struct inode *inode) } btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (root->fs_info->log_root_recovering) { + BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); + goto no_delete; + } + if (inode->i_nlink > 0) { BUG_ON(btrfs_root_refs(&root->root_item) != 0); goto no_delete; } btrfs_i_size_write(inode, 0); - trans = btrfs_join_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); - if (ret) { - btrfs_orphan_del(NULL, inode); - goto no_delete_lock; - } + while (1) { + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); - btrfs_orphan_del(trans, inode); + if (ret != -EAGAIN) + break; - nr = trans->blocks_used; - clear_inode(inode); + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + trans = NULL; + btrfs_btree_balance_dirty(root, nr); + } - btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root, nr); - return; + if (ret == 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } -no_delete_lock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); no_delete: clear_inode(inode); + return; } /* @@ -3569,7 +3647,6 @@ static noinline void init_btrfs_i(struct inode *inode) INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); - mutex_init(&BTRFS_I(inode)->extent_mutex); mutex_init(&BTRFS_I(inode)->log_mutex); } @@ -3695,6 +3772,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) } srcu_read_unlock(&root->fs_info->subvol_srcu, index); + if (root != sub_root) { + down_read(&root->fs_info->cleanup_work_sem); + if (!(inode->i_sb->s_flags & MS_RDONLY)) + btrfs_orphan_cleanup(sub_root); + up_read(&root->fs_info->cleanup_work_sem); + } + return inode; } @@ -3869,7 +3953,11 @@ skip: /* Reached end of directory/root. Bump pos past the last item. */ if (key_type == BTRFS_DIR_INDEX_KEY) - filp->f_pos = INT_LIMIT(off_t); + /* + * 32-bit glibc will use getdents64, but then strtol - + * so the last number we can serve is this. + */ + filp->f_pos = 0x7fffffff; else filp->f_pos++; nopos: @@ -4219,7 +4307,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -4290,7 +4378,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -4336,6 +4424,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; + /* do not allow sys_link's with other subvols of the same device */ + if (root->objectid != BTRFS_I(inode)->root->objectid) + return -EPERM; + /* * 1 item for inode ref * 2 items for dir items @@ -4423,7 +4515,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) drop_on_err = 1; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) goto out_fail; @@ -5074,17 +5166,20 @@ static void btrfs_truncate(struct inode *inode) unsigned long nr; u64 mask = root->sectorsize - 1; - if (!S_ISREG(inode->i_mode)) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (!S_ISREG(inode->i_mode)) { + WARN_ON(1); return; + } ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); if (ret) return; + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); + btrfs_ordered_update_i_size(inode, inode->i_size, NULL); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); /* * setattr is responsible for setting the ordered_data_close flag, @@ -5106,21 +5201,32 @@ static void btrfs_truncate(struct inode *inode) if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) btrfs_add_ordered_operation(trans, root, inode); - btrfs_set_trans_block_group(trans, inode); - btrfs_i_size_write(inode, inode->i_size); + while (1) { + ret = btrfs_truncate_inode_items(trans, root, inode, + inode->i_size, + BTRFS_EXTENT_DATA_KEY); + if (ret != -EAGAIN) + break; - ret = btrfs_orphan_add(trans, inode); - if (ret) - goto out; - /* FIXME, add redo link to tree so we don't leak on crash */ - ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, - BTRFS_EXTENT_DATA_KEY); - btrfs_update_inode(trans, root, inode); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); - ret = btrfs_orphan_del(trans, inode); + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root, nr); + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + } + + if (ret == 0 && inode->i_nlink > 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } + + ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); -out: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); BUG_ON(ret); @@ -5217,9 +5323,9 @@ void btrfs_destroy_inode(struct inode *inode) spin_lock(&root->list_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { - printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" - " list\n", inode->i_ino); - dump_stack(); + printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", + inode->i_ino); + list_del_init(&BTRFS_I(inode)->i_orphan); } spin_unlock(&root->list_lock); @@ -5476,7 +5582,7 @@ out_fail: * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -int btrfs_start_delalloc_inodes(struct btrfs_root *root) +int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) { struct list_head *head = &root->fs_info->delalloc_inodes; struct btrfs_inode *binode; @@ -5495,7 +5601,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) spin_unlock(&root->fs_info->delalloc_lock); if (inode) { filemap_flush(inode->i_mapping); - iput(inode); + if (delay_iput) + btrfs_add_delayed_iput(inode); + else + iput(inode); } cond_resched(); spin_lock(&root->fs_info->delalloc_lock); @@ -5569,7 +5678,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -5641,57 +5750,77 @@ out_fail: return err; } -static int prealloc_file_range(struct btrfs_trans_handle *trans, - struct inode *inode, u64 start, u64 end, - u64 locked_end, u64 alloc_hint, int mode) +static int prealloc_file_range(struct inode *inode, u64 start, u64 end, + u64 alloc_hint, int mode, loff_t actual_len) { + struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 alloc_size; u64 cur_offset = start; u64 num_bytes = end - start; int ret = 0; + u64 i_size; while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); - ret = btrfs_reserve_metadata_space(root, 1); - if (ret) - goto out; + trans = btrfs_start_transaction(root, 1); ret = btrfs_reserve_extent(trans, root, alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); - goto out; + goto stop_trans; } + + ret = btrfs_reserve_metadata_space(root, 3); + if (ret) { + btrfs_free_reserved_extent(root, ins.objectid, + ins.offset); + goto stop_trans; + } + ret = insert_reserved_file_extent(trans, inode, cur_offset, ins.objectid, ins.offset, ins.offset, - ins.offset, locked_end, - 0, 0, 0, + ins.offset, 0, 0, 0, BTRFS_FILE_EXTENT_PREALLOC); BUG_ON(ret); btrfs_drop_extent_cache(inode, cur_offset, cur_offset + ins.offset -1, 0); + num_bytes -= ins.offset; cur_offset += ins.offset; alloc_hint = ins.objectid + ins.offset; - btrfs_unreserve_metadata_space(root, 1); - } -out: - if (cur_offset > start) { + inode->i_ctime = CURRENT_TIME; BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && - cur_offset > i_size_read(inode)) - btrfs_i_size_write(inode, cur_offset); + (actual_len > inode->i_size) && + (cur_offset > inode->i_size)) { + + if (cur_offset > actual_len) + i_size = actual_len; + else + i_size = cur_offset; + i_size_write(inode, i_size); + btrfs_ordered_update_i_size(inode, i_size, NULL); + } + ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); + + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 3); } + return ret; +stop_trans: + btrfs_end_transaction(trans, root); return ret; + } static long btrfs_fallocate(struct inode *inode, int mode, @@ -5705,8 +5834,6 @@ static long btrfs_fallocate(struct inode *inode, int mode, u64 locked_end; u64 mask = BTRFS_I(inode)->root->sectorsize - 1; struct extent_map *em; - struct btrfs_trans_handle *trans; - struct btrfs_root *root; int ret; alloc_start = offset & ~mask; @@ -5725,9 +5852,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, goto out; } - root = BTRFS_I(inode)->root; - - ret = btrfs_check_data_free_space(root, inode, + ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, alloc_end - alloc_start); if (ret) goto out; @@ -5736,12 +5861,6 @@ static long btrfs_fallocate(struct inode *inode, int mode, while (1) { struct btrfs_ordered_extent *ordered; - trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); - if (!trans) { - ret = -EIO; - goto out_free; - } - /* the extent lock is ordered inside the running * transaction */ @@ -5755,8 +5874,6 @@ static long btrfs_fallocate(struct inode *inode, int mode, btrfs_put_ordered_extent(ordered); unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, GFP_NOFS); - btrfs_end_transaction(trans, BTRFS_I(inode)->root); - /* * we can't wait on the range with the transaction * running or with the extent lock held @@ -5777,10 +5894,12 @@ static long btrfs_fallocate(struct inode *inode, int mode, BUG_ON(IS_ERR(em) || !em); last_byte = min(extent_map_end(em), alloc_end); last_byte = (last_byte + mask) & ~mask; - if (em->block_start == EXTENT_MAP_HOLE) { - ret = prealloc_file_range(trans, inode, cur_offset, - last_byte, locked_end + 1, - alloc_hint, mode); + if (em->block_start == EXTENT_MAP_HOLE || + (cur_offset >= inode->i_size && + !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { + ret = prealloc_file_range(inode, + cur_offset, last_byte, + alloc_hint, mode, offset+len); if (ret < 0) { free_extent_map(em); break; @@ -5799,9 +5918,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, GFP_NOFS); - btrfs_end_transaction(trans, BTRFS_I(inode)->root); -out_free: - btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start); + btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, + alloc_end - alloc_start); out: mutex_unlock(&inode->i_mutex); return ret; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index cdbb054102b9..0bc577695061 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -237,7 +237,6 @@ static noinline int create_subvol(struct btrfs_root *root, u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; u64 index = 0; - unsigned long nr = 1; /* * 1 - inode item @@ -290,7 +289,7 @@ static noinline int create_subvol(struct btrfs_root *root, btrfs_set_root_generation(&root_item, trans->transid); btrfs_set_root_level(&root_item, 0); btrfs_set_root_refs(&root_item, 1); - btrfs_set_root_used(&root_item, 0); + btrfs_set_root_used(&root_item, leaf->len); btrfs_set_root_last_snapshot(&root_item, 0); memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); @@ -342,24 +341,21 @@ static noinline int create_subvol(struct btrfs_root *root, d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); fail: - nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); if (err && !ret) ret = err; btrfs_unreserve_metadata_space(root, 6); - btrfs_btree_balance_dirty(root, nr); return ret; } static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen) { + struct inode *inode; struct btrfs_pending_snapshot *pending_snapshot; struct btrfs_trans_handle *trans; - int ret = 0; - int err; - unsigned long nr = 0; + int ret; if (!root->ref_cows) return -EINVAL; @@ -372,20 +368,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, */ ret = btrfs_reserve_metadata_space(root, 6); if (ret) - goto fail_unlock; + goto fail; pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); if (!pending_snapshot) { ret = -ENOMEM; btrfs_unreserve_metadata_space(root, 6); - goto fail_unlock; + goto fail; } pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); if (!pending_snapshot->name) { ret = -ENOMEM; kfree(pending_snapshot); btrfs_unreserve_metadata_space(root, 6); - goto fail_unlock; + goto fail; } memcpy(pending_snapshot->name, name, namelen); pending_snapshot->name[namelen] = '\0'; @@ -395,10 +391,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, pending_snapshot->root = root; list_add(&pending_snapshot->list, &trans->transaction->pending_snapshots); - err = btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + btrfs_unreserve_metadata_space(root, 6); -fail_unlock: - btrfs_btree_balance_dirty(root, nr); + inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto fail; + } + BUG_ON(!inode); + d_instantiate(dentry, inode); + ret = 0; +fail: return ret; } @@ -947,7 +952,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, */ /* the destination must be opened for writing */ - if (!(file->f_mode & FMODE_WRITE)) + if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) return -EINVAL; ret = mnt_want_write(file->f_path.mnt); @@ -959,12 +964,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, ret = -EBADF; goto out_drop_write; } + src = src_file->f_dentry->d_inode; ret = -EINVAL; if (src == inode) goto out_fput; + /* the src must be open for reading */ + if (!(src_file->f_mode & FMODE_READ)) + goto out_fput; + ret = -EISDIR; if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) goto out_fput; @@ -995,7 +1005,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, /* determine range to clone */ ret = -EINVAL; - if (off >= src->i_size || off + len > src->i_size) + if (off + len > src->i_size || off + len < off) goto out_unlock; if (len == 0) olen = len = src->i_size - off; @@ -1027,8 +1037,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, BUG_ON(!trans); /* punch hole in destination first */ - btrfs_drop_extents(trans, root, inode, off, off + len, - off + len, 0, &hint_byte, 1); + btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1); /* clone data */ key.objectid = src->i_ino; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 5799bc46a309..5c2a9e78a949 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) /* * remove an ordered extent from the tree. No references are dropped - * but, anyone waiting on this extent is woken up. + * and you must wake_up entry->wait. You must hold the tree mutex + * while you call this function. */ -int btrfs_remove_ordered_extent(struct inode *inode, +static int __btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; tree = &BTRFS_I(inode)->ordered_tree; - mutex_lock(&tree->mutex); node = &entry->rb_node; rb_erase(node, &tree->tree); tree->last = NULL; @@ -326,16 +326,34 @@ int btrfs_remove_ordered_extent(struct inode *inode, } spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + return 0; +} + +/* + * remove an ordered extent from the tree. No references are dropped + * but any waiters are woken. + */ +int btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) +{ + struct btrfs_ordered_inode_tree *tree; + int ret; + + tree = &BTRFS_I(inode)->ordered_tree; + mutex_lock(&tree->mutex); + ret = __btrfs_remove_ordered_extent(inode, entry); mutex_unlock(&tree->mutex); wake_up(&entry->wait); - return 0; + + return ret; } /* * wait for all the ordered extents in a root. This is done when balancing * space between drives. */ -int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) +int btrfs_wait_ordered_extents(struct btrfs_root *root, + int nocow_only, int delay_iput) { struct list_head splice; struct list_head *cur; @@ -372,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) if (inode) { btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); - iput(inode); + if (delay_iput) + btrfs_add_delayed_iput(inode); + else + iput(inode); } else { btrfs_put_ordered_extent(ordered); } @@ -430,7 +451,7 @@ again: btrfs_wait_ordered_range(inode, 0, (u64)-1); else filemap_flush(inode->i_mapping); - iput(inode); + btrfs_add_delayed_iput(inode); } cond_resched(); @@ -589,7 +610,7 @@ out: * After an extent is done, call this to conditionally update the on disk * i_size. i_size is updated to cover any fully written part of the file. */ -int btrfs_ordered_update_i_size(struct inode *inode, +int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered) { struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; @@ -597,18 +618,32 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 disk_i_size; u64 new_i_size; u64 i_size_test; + u64 i_size = i_size_read(inode); struct rb_node *node; + struct rb_node *prev = NULL; struct btrfs_ordered_extent *test; + int ret = 1; + + if (ordered) + offset = entry_end(ordered); + else + offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); mutex_lock(&tree->mutex); disk_i_size = BTRFS_I(inode)->disk_i_size; + /* truncate file */ + if (disk_i_size > i_size) { + BTRFS_I(inode)->disk_i_size = i_size; + ret = 0; + goto out; + } + /* * if the disk i_size is already at the inode->i_size, or * this ordered extent is inside the disk i_size, we're done */ - if (disk_i_size >= inode->i_size || - ordered->file_offset + ordered->len <= disk_i_size) { + if (disk_i_size == i_size || offset <= disk_i_size) { goto out; } @@ -616,8 +651,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, * we can't update the disk_isize if there are delalloc bytes * between disk_i_size and this ordered extent */ - if (test_range_bit(io_tree, disk_i_size, - ordered->file_offset + ordered->len - 1, + if (test_range_bit(io_tree, disk_i_size, offset - 1, EXTENT_DELALLOC, 0, NULL)) { goto out; } @@ -626,20 +660,32 @@ int btrfs_ordered_update_i_size(struct inode *inode, * if we find an ordered extent then we can't update disk i_size * yet */ - node = &ordered->rb_node; - while (1) { - node = rb_prev(node); - if (!node) - break; + if (ordered) { + node = rb_prev(&ordered->rb_node); + } else { + prev = tree_search(tree, offset); + /* + * we insert file extents without involving ordered struct, + * so there should be no ordered struct cover this offset + */ + if (prev) { + test = rb_entry(prev, struct btrfs_ordered_extent, + rb_node); + BUG_ON(offset_in_entry(test, offset)); + } + node = prev; + } + while (node) { test = rb_entry(node, struct btrfs_ordered_extent, rb_node); if (test->file_offset + test->len <= disk_i_size) break; - if (test->file_offset >= inode->i_size) + if (test->file_offset >= i_size) break; if (test->file_offset >= disk_i_size) goto out; + node = rb_prev(node); } - new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); + new_i_size = min_t(u64, offset, i_size); /* * at this point, we know we can safely update i_size to at least @@ -647,7 +693,14 @@ int btrfs_ordered_update_i_size(struct inode *inode, * walk forward and see if ios from higher up in the file have * finished. */ - node = rb_next(&ordered->rb_node); + if (ordered) { + node = rb_next(&ordered->rb_node); + } else { + if (prev) + node = rb_next(prev); + else + node = rb_first(&tree->tree); + } i_size_test = 0; if (node) { /* @@ -655,10 +708,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, * between our ordered extent and the next one. */ test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - if (test->file_offset > entry_end(ordered)) + if (test->file_offset > offset) i_size_test = test->file_offset; } else { - i_size_test = i_size_read(inode); + i_size_test = i_size; } /* @@ -667,15 +720,25 @@ int btrfs_ordered_update_i_size(struct inode *inode, * are no delalloc bytes in this area, it is safe to update * disk_i_size to the end of the region. */ - if (i_size_test > entry_end(ordered) && - !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, - EXTENT_DELALLOC, 0, NULL)) { - new_i_size = min_t(u64, i_size_test, i_size_read(inode)); + if (i_size_test > offset && + !test_range_bit(io_tree, offset, i_size_test - 1, + EXTENT_DELALLOC, 0, NULL)) { + new_i_size = min_t(u64, i_size_test, i_size); } BTRFS_I(inode)->disk_i_size = new_i_size; + ret = 0; out: + /* + * we need to remove the ordered extent with the tree lock held + * so that other people calling this function don't find our fully + * processed ordered entry and skip updating the i_size + */ + if (ordered) + __btrfs_remove_ordered_extent(inode, ordered); mutex_unlock(&tree->mutex); - return 0; + if (ordered) + wake_up(&ordered->wait); + return ret; } /* diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index f82e87488ca8..1fe1282ef47c 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -150,12 +150,13 @@ void btrfs_start_ordered_extent(struct inode *inode, int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); struct btrfs_ordered_extent * btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); -int btrfs_ordered_update_i_size(struct inode *inode, +int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); -int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); +int btrfs_wait_ordered_extents(struct btrfs_root *root, + int nocow_only, int delay_iput); #endif diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index cfcc93c93a7b..ab7ab5318745 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root, return 0; } +static void put_inodes(struct list_head *list) +{ + struct inodevec *ivec; + while (!list_empty(list)) { + ivec = list_entry(list->next, struct inodevec, list); + list_del(&ivec->list); + while (ivec->nr > 0) { + ivec->nr--; + iput(ivec->inode[ivec->nr]); + } + kfree(ivec); + } +} + static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key) @@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, btrfs_btree_balance_dirty(root, nr); + /* + * put inodes outside transaction, otherwise we may deadlock. + */ + put_inodes(&inode_list); + if (replaced && rc->stage == UPDATE_DATA_PTRS) invalidate_extent_cache(root, &key, &next_key); } @@ -1752,19 +1771,7 @@ out: btrfs_btree_balance_dirty(root, nr); - /* - * put inodes while we aren't holding the tree locks - */ - while (!list_empty(&inode_list)) { - struct inodevec *ivec; - ivec = list_entry(inode_list.next, struct inodevec, list); - list_del(&ivec->list); - while (ivec->nr > 0) { - ivec->nr--; - iput(ivec->inode[ivec->nr]); - } - kfree(ivec); - } + put_inodes(&inode_list); if (replaced && rc->stage == UPDATE_DATA_PTRS) invalidate_extent_cache(root, &key, &next_key); @@ -3274,8 +3281,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) return -ENOMEM; path = btrfs_alloc_path(); - if (!path) + if (!path) { + kfree(cluster); return -ENOMEM; + } rc->extents_found = 0; rc->extents_skipped = 0; @@ -3534,8 +3543,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) (unsigned long long)rc->block_group->key.objectid, (unsigned long long)rc->block_group->flags); - btrfs_start_delalloc_inodes(fs_info->tree_root); - btrfs_wait_ordered_extents(fs_info->tree_root, 0); + btrfs_start_delalloc_inodes(fs_info->tree_root, 0); + btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); while (1) { rc->extents_found = 0; @@ -3755,6 +3764,8 @@ out: BTRFS_DATA_RELOC_TREE_OBJECTID); if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); + else + btrfs_orphan_cleanup(fs_root); } return err; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 752a5463bf53..a649305b2059 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -126,8 +126,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) { struct btrfs_fs_info *info = root->fs_info; substring_t args[MAX_OPT_ARGS]; - char *p, *num; + char *p, *num, *orig; int intarg; + int ret = 0; if (!options) return 0; @@ -140,6 +141,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) if (!options) return -ENOMEM; + orig = options; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -262,12 +264,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) case Opt_discard: btrfs_set_opt(info->mount_opt, DISCARD); break; + case Opt_err: + printk(KERN_INFO "btrfs: unrecognized mount option " + "'%s'\n", p); + ret = -EINVAL; + goto out; default: break; } } - kfree(options); - return 0; +out: + kfree(orig); + return ret; } /* @@ -405,8 +413,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } - btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(root, 0); + btrfs_start_delalloc_inodes(root, 0); + btrfs_wait_ordered_extents(root, 0, 0); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -450,6 +458,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",notreelog"); if (btrfs_test_opt(root, FLUSHONCOMMIT)) seq_puts(seq, ",flushoncommit"); + if (btrfs_test_opt(root, DISCARD)) + seq_puts(seq, ",discard"); if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) seq_puts(seq, ",noacl"); return 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c207e8c32c9b..b2acc79f1b34 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, memset(trans, 0, sizeof(*trans)); kmem_cache_free(btrfs_trans_handle_cachep, trans); + if (throttle) + btrfs_run_delayed_iputs(root); + return 0; } @@ -354,7 +357,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, * those extents are sent to disk but does not wait on them */ int btrfs_write_marked_extents(struct btrfs_root *root, - struct extent_io_tree *dirty_pages) + struct extent_io_tree *dirty_pages, int mark) { int ret; int err = 0; @@ -367,7 +370,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root, while (1) { ret = find_first_extent_bit(dirty_pages, start, &start, &end, - EXTENT_DIRTY); + mark); if (ret) break; while (start <= end) { @@ -413,7 +416,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root, * on all the pages and clear them from the dirty pages state tree */ int btrfs_wait_marked_extents(struct btrfs_root *root, - struct extent_io_tree *dirty_pages) + struct extent_io_tree *dirty_pages, int mark) { int ret; int err = 0; @@ -425,12 +428,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, unsigned long index; while (1) { - ret = find_first_extent_bit(dirty_pages, 0, &start, &end, - EXTENT_DIRTY); + ret = find_first_extent_bit(dirty_pages, start, &start, &end, + mark); if (ret) break; - clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); + clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); while (start <= end) { index = start >> PAGE_CACHE_SHIFT; start = (u64)(index + 1) << PAGE_CACHE_SHIFT; @@ -460,13 +463,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, * those extents are on disk for transaction or log commit */ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, - struct extent_io_tree *dirty_pages) + struct extent_io_tree *dirty_pages, int mark) { int ret; int ret2; - ret = btrfs_write_marked_extents(root, dirty_pages); - ret2 = btrfs_wait_marked_extents(root, dirty_pages); + ret = btrfs_write_marked_extents(root, dirty_pages, mark); + ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); return ret || ret2; } @@ -479,7 +482,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, return filemap_write_and_wait(btree_inode->i_mapping); } return btrfs_write_and_wait_marked_extents(root, - &trans->transaction->dirty_pages); + &trans->transaction->dirty_pages, + EXTENT_DIRTY); } /* @@ -497,13 +501,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, { int ret; u64 old_root_bytenr; + u64 old_root_used; struct btrfs_root *tree_root = root->fs_info->tree_root; + old_root_used = btrfs_root_used(&root->root_item); btrfs_write_dirty_block_groups(trans, root); while (1) { old_root_bytenr = btrfs_root_bytenr(&root->root_item); - if (old_root_bytenr == root->node->start) + if (old_root_bytenr == root->node->start && + old_root_used == btrfs_root_used(&root->root_item)) break; btrfs_set_root_node(&root->root_item, root->node); @@ -512,6 +519,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, &root->root_item); BUG_ON(ret); + old_root_used = btrfs_root_used(&root->root_item); ret = btrfs_write_dirty_block_groups(trans, root); BUG_ON(ret); } @@ -795,7 +803,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, memcpy(&pending->root_key, &key, sizeof(key)); fail: kfree(new_root_item); - btrfs_unreserve_metadata_space(root, 6); return ret; } @@ -807,7 +814,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, u64 index = 0; struct btrfs_trans_handle *trans; struct inode *parent_inode; - struct inode *inode; struct btrfs_root *parent_root; parent_inode = pending->dentry->d_parent->d_inode; @@ -839,8 +845,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, BUG_ON(ret); - inode = btrfs_lookup_dentry(parent_inode, pending->dentry); - d_instantiate(pending->dentry, inode); fail: btrfs_end_transaction(trans, fs_info->fs_root); return ret; @@ -994,11 +998,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); if (flush_on_commit) { - btrfs_start_delalloc_inodes(root); - ret = btrfs_wait_ordered_extents(root, 0); + btrfs_start_delalloc_inodes(root, 1); + ret = btrfs_wait_ordered_extents(root, 0, 1); BUG_ON(ret); } else if (snap_pending) { - ret = btrfs_wait_ordered_extents(root, 1); + ret = btrfs_wait_ordered_extents(root, 0, 1); BUG_ON(ret); } @@ -1116,6 +1120,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, current->journal_info = NULL; kmem_cache_free(btrfs_trans_handle_cachep, trans); + + if (current != root->fs_info->transaction_kthread) + btrfs_run_delayed_iputs(root); + return ret; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index d4e3e7a6938c..93c7ccb33118 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root); int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, - struct extent_io_tree *dirty_pages); + struct extent_io_tree *dirty_pages, int mark); int btrfs_write_marked_extents(struct btrfs_root *root, - struct extent_io_tree *dirty_pages); + struct extent_io_tree *dirty_pages, int mark); int btrfs_wait_marked_extents(struct btrfs_root *root, - struct extent_io_tree *dirty_pages); + struct extent_io_tree *dirty_pages, int mark); int btrfs_transaction_in_commit(struct btrfs_fs_info *info); #endif diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 741666a7676a..4a9434b622ec 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -542,8 +542,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ - ret = btrfs_drop_extents(trans, root, inode, - start, extent_end, extent_end, start, &alloc_hint, 1); + ret = btrfs_drop_extents(trans, inode, start, extent_end, + &alloc_hint, 1); BUG_ON(ret); if (found_type == BTRFS_FILE_EXTENT_REG || @@ -930,6 +930,17 @@ out_nowrite: return 0; } +static int insert_orphan_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 offset) +{ + int ret; + ret = btrfs_find_orphan_item(root, offset); + if (ret > 0) + ret = btrfs_insert_orphan_item(trans, root, offset); + return ret; +} + + /* * There are a few corners where the link count of the file can't * be properly maintained during replay. So, instead of adding @@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, } BTRFS_I(inode)->index_cnt = (u64)-1; - if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { - ret = replay_dir_deletes(trans, root, NULL, path, - inode->i_ino, 1); + if (inode->i_nlink == 0) { + if (S_ISDIR(inode->i_mode)) { + ret = replay_dir_deletes(trans, root, NULL, path, + inode->i_ino, 1); + BUG_ON(ret); + } + ret = insert_orphan_item(trans, root, inode->i_ino); BUG_ON(ret); } btrfs_free_path(path); @@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, /* inode keys are done during the first stage */ if (key.type == BTRFS_INODE_ITEM_KEY && wc->stage == LOG_WALK_REPLAY_INODES) { - struct inode *inode; struct btrfs_inode_item *inode_item; u32 mode; @@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, eb, i, &key); BUG_ON(ret); - /* for regular files, truncate away - * extents past the new EOF + /* for regular files, make sure corresponding + * orhpan item exist. extents past the new EOF + * will be truncated later by orphan cleanup. */ if (S_ISREG(mode)) { - inode = read_one_inode(root, - key.objectid); - BUG_ON(!inode); - - ret = btrfs_truncate_inode_items(wc->trans, - root, inode, inode->i_size, - BTRFS_EXTENT_DATA_KEY); + ret = insert_orphan_item(wc->trans, root, + key.objectid); BUG_ON(ret); - - /* if the nlink count is zero here, the iput - * will free the inode. We bump it to make - * sure it doesn't get freed until the link - * count fixup is done - */ - if (inode->i_nlink == 0) { - btrfs_inc_nlink(inode); - btrfs_update_inode(wc->trans, - root, inode); - } - iput(inode); } + ret = link_to_fixup_dir(wc->trans, root, path, key.objectid); BUG_ON(ret); @@ -1977,10 +1976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, { int index1; int index2; + int mark; int ret; struct btrfs_root *log = root->log_root; struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; - u64 log_transid = 0; + unsigned long log_transid = 0; mutex_lock(&root->log_mutex); index1 = root->log_transid % 2; @@ -2014,24 +2014,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out; } + log_transid = root->log_transid; + if (log_transid % 2 == 0) + mark = EXTENT_DIRTY; + else + mark = EXTENT_NEW; + /* we start IO on all the marked extents here, but we don't actually * wait for them until later. */ - ret = btrfs_write_marked_extents(log, &log->dirty_log_pages); + ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); BUG_ON(ret); btrfs_set_root_node(&log->root_item, log->node); root->log_batch = 0; - log_transid = root->log_transid; root->log_transid++; log->log_transid = root->log_transid; root->log_start_pid = 0; smp_mb(); /* - * log tree has been flushed to disk, new modifications of - * the log will be written to new positions. so it's safe to - * allow log writers to go in. + * IO has been started, blocks of the log tree have WRITTEN flag set + * in their headers. new modifications of the log will be written to + * new positions. so it's safe to allow log writers to go in. */ mutex_unlock(&root->log_mutex); @@ -2052,7 +2057,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, index2 = log_root_tree->log_transid % 2; if (atomic_read(&log_root_tree->log_commit[index2])) { - btrfs_wait_marked_extents(log, &log->dirty_log_pages); + btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); wait_log_commit(trans, log_root_tree, log_root_tree->log_transid); mutex_unlock(&log_root_tree->log_mutex); @@ -2072,16 +2077,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * check the full commit flag again */ if (root->fs_info->last_trans_log_full_commit == trans->transid) { - btrfs_wait_marked_extents(log, &log->dirty_log_pages); + btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); mutex_unlock(&log_root_tree->log_mutex); ret = -EAGAIN; goto out_wake_log_root; } ret = btrfs_write_and_wait_marked_extents(log_root_tree, - &log_root_tree->dirty_log_pages); + &log_root_tree->dirty_log_pages, + EXTENT_DIRTY | EXTENT_NEW); BUG_ON(ret); - btrfs_wait_marked_extents(log, &log->dirty_log_pages); + btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); btrfs_set_super_log_root(&root->fs_info->super_for_commit, log_root_tree->node->start); @@ -2147,12 +2153,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, - 0, &start, &end, EXTENT_DIRTY); + 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); if (ret) break; - clear_extent_dirty(&log->dirty_log_pages, - start, end, GFP_NOFS); + clear_extent_bits(&log->dirty_log_pages, start, end, + EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); } if (log->log_transid > 0) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7eda483d7b5a..41ecbb2347f2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1135,7 +1135,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->avail_metadata_alloc_bits; if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && - root->fs_info->fs_devices->rw_devices <= 4) { + root->fs_info->fs_devices->num_devices <= 4) { printk(KERN_ERR "btrfs: unable to go below four devices " "on raid10\n"); ret = -EINVAL; @@ -1143,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && - root->fs_info->fs_devices->rw_devices <= 2) { + root->fs_info->fs_devices->num_devices <= 2) { printk(KERN_ERR "btrfs: unable to go below two " "devices on raid1\n"); ret = -EINVAL; @@ -1434,8 +1434,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) return -EINVAL; bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); - if (!bdev) - return -EIO; + if (IS_ERR(bdev)) + return PTR_ERR(bdev); if (root->fs_info->fs_devices->seeding) { seeding_dev = 1; @@ -2209,7 +2209,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, max_chunk_size = 10 * calc_size; min_stripe_size = 64 * 1024 * 1024; } else if (type & BTRFS_BLOCK_GROUP_METADATA) { - max_chunk_size = 4 * calc_size; + max_chunk_size = 256 * 1024 * 1024; min_stripe_size = 32 * 1024 * 1024; } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { calc_size = 8 * 1024 * 1024; @@ -2538,6 +2538,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) if (!em) return 1; + if (btrfs_test_opt(root, DEGRADED)) { + free_extent_map(em); + return 0; + } + map = (struct map_lookup *)em->bdev; for (i = 0; i < map->num_stripes; i++) { if (!map->stripes[i].dev->writeable) { @@ -2649,8 +2654,10 @@ again: em = lookup_extent_mapping(em_tree, logical, *length); read_unlock(&em_tree->lock); - if (!em && unplug_page) + if (!em && unplug_page) { + kfree(multi); return 0; + } if (!em) { printk(KERN_CRIT "unable to find logical %llu len %llu\n", diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index b6dd5967c48a..193b58f7d3f3 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -85,22 +85,23 @@ out: return ret; } -int __btrfs_setxattr(struct inode *inode, const char *name, - const void *value, size_t size, int flags) +static int do_setxattr(struct btrfs_trans_handle *trans, + struct inode *inode, const char *name, + const void *value, size_t size, int flags) { struct btrfs_dir_item *di; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; struct btrfs_path *path; - int ret = 0, mod = 0; + size_t name_len = strlen(name); + int ret = 0; + + if (name_len + size > BTRFS_MAX_XATTR_SIZE(root)) + return -ENOSPC; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - trans = btrfs_join_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - /* first lets see if we already have this xattr */ di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, strlen(name), -1); @@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name, } ret = btrfs_delete_one_dir_name(trans, root, path, di); - if (ret) - goto out; + BUG_ON(ret); btrfs_release_path(root, path); /* if we don't have a value then we are removing the xattr */ - if (!value) { - mod = 1; + if (!value) goto out; - } } else { btrfs_release_path(root, path); @@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name, } /* ok we have to create a completely new xattr */ - ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), - value, size, inode->i_ino); + ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino, + name, name_len, value, size); + BUG_ON(ret); +out: + btrfs_free_path(path); + return ret; +} + +int __btrfs_setxattr(struct btrfs_trans_handle *trans, + struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + if (trans) + return do_setxattr(trans, inode, name, value, size, flags); + + ret = btrfs_reserve_metadata_space(root, 2); if (ret) - goto out; - mod = 1; + return ret; -out: - if (mod) { - inode->i_ctime = CURRENT_TIME; - ret = btrfs_update_inode(trans, root, inode); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto out; } + btrfs_set_trans_block_group(trans, inode); - btrfs_end_transaction(trans, root); - btrfs_free_path(path); + ret = do_setxattr(trans, inode, name, value, size, flags); + if (ret) + goto out; + + inode->i_ctime = CURRENT_TIME; + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); +out: + btrfs_end_transaction_throttle(trans, root); + btrfs_unreserve_metadata_space(root, 2); return ret; } @@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, if (size == 0) value = ""; /* empty EA, do not remove */ - return __btrfs_setxattr(dentry->d_inode, name, value, size, flags); + + return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size, + flags); } int btrfs_removexattr(struct dentry *dentry, const char *name) @@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name) if (!btrfs_is_valid_xattr(name)) return -EOPNOTSUPP; - return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); + + return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0, + XATTR_REPLACE); } -int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) +int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) { int err; size_t len; @@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) } else { strcpy(name, XATTR_SECURITY_PREFIX); strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); - err = __btrfs_setxattr(inode, name, value, len, 0); + err = __btrfs_setxattr(trans, inode, name, value, len, 0); kfree(name); } diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index c71e9c3cf3f7..721efa0346e0 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h @@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[]; extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, void *buffer, size_t size); -extern int __btrfs_setxattr(struct inode *inode, const char *name, - const void *value, size_t size, int flags); - +extern int __btrfs_setxattr(struct btrfs_trans_handle *trans, + struct inode *inode, const char *name, + const void *value, size_t size, int flags); extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); extern int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); extern int btrfs_removexattr(struct dentry *dentry, const char *name); -extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); +extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir); #endif /* __XATTR__ */ |