summaryrefslogtreecommitdiff
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c222
1 files changed, 198 insertions, 24 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9c45431e69ab..9314adeba946 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1613,6 +1613,9 @@ static bool name_in_log_ref(struct btrfs_root *log_root,
* not exist in the FS, it is skipped. fsyncs on directories
* do not force down inodes inside that directory, just changes to the
* names or unlinks in a directory.
+ *
+ * Returns < 0 on error, 0 if the name wasn't replayed (dentry points to a
+ * non-existing inode) and 1 if the name was replayed.
*/
static noinline int replay_one_name(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -1631,6 +1634,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
int exists;
int ret = 0;
bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
+ bool name_added = false;
dir = read_one_inode(root, key->objectid);
if (!dir)
@@ -1708,6 +1712,8 @@ out:
}
kfree(name);
iput(dir);
+ if (!ret && name_added)
+ ret = 1;
return ret;
insert:
@@ -1723,6 +1729,8 @@ insert:
name, name_len, log_type, &log_key);
if (ret && ret != -ENOENT && ret != -EEXIST)
goto out;
+ if (!ret)
+ name_added = true;
update_size = false;
ret = 0;
goto out;
@@ -1740,12 +1748,13 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
- int ret;
+ int ret = 0;
u32 item_size = btrfs_item_size_nr(eb, slot);
struct btrfs_dir_item *di;
int name_len;
unsigned long ptr;
unsigned long ptr_end;
+ struct btrfs_path *fixup_path = NULL;
ptr = btrfs_item_ptr_offset(eb, slot);
ptr_end = ptr + item_size;
@@ -1755,12 +1764,59 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
return -EIO;
name_len = btrfs_dir_name_len(eb, di);
ret = replay_one_name(trans, root, path, eb, di, key);
- if (ret)
- return ret;
+ if (ret < 0)
+ break;
ptr = (unsigned long)(di + 1);
ptr += name_len;
+
+ /*
+ * If this entry refers to a non-directory (directories can not
+ * have a link count > 1) and it was added in the transaction
+ * that was not committed, make sure we fixup the link count of
+ * the inode it the entry points to. Otherwise something like
+ * the following would result in a directory pointing to an
+ * inode with a wrong link that does not account for this dir
+ * entry:
+ *
+ * mkdir testdir
+ * touch testdir/foo
+ * touch testdir/bar
+ * sync
+ *
+ * ln testdir/bar testdir/bar_link
+ * ln testdir/foo testdir/foo_link
+ * xfs_io -c "fsync" testdir/bar
+ *
+ * <power failure>
+ *
+ * mount fs, log replay happens
+ *
+ * File foo would remain with a link count of 1 when it has two
+ * entries pointing to it in the directory testdir. This would
+ * make it impossible to ever delete the parent directory has
+ * it would result in stale dentries that can never be deleted.
+ */
+ if (ret == 1 && btrfs_dir_type(eb, di) != BTRFS_FT_DIR) {
+ struct btrfs_key di_key;
+
+ if (!fixup_path) {
+ fixup_path = btrfs_alloc_path();
+ if (!fixup_path) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
+ btrfs_dir_item_key_to_cpu(eb, di, &di_key);
+ ret = link_to_fixup_dir(trans, root, fixup_path,
+ di_key.objectid);
+ if (ret)
+ break;
+ }
+ ret = 0;
}
- return 0;
+ btrfs_free_path(fixup_path);
+ return ret;
}
/*
@@ -4904,6 +4960,94 @@ next_dir_inode:
return ret;
}
+static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
+ struct inode *inode,
+ struct btrfs_log_ctx *ctx)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ const u64 ino = btrfs_ino(inode);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->skip_locking = 1;
+ path->search_commit_root = 1;
+
+ key.objectid = ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ while (true) {
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+ u32 cur_offset = 0;
+ u32 item_size;
+ unsigned long ptr;
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ /* BTRFS_INODE_EXTREF_KEY is BTRFS_INODE_REF_KEY + 1 */
+ if (key.objectid != ino || key.type > BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ item_size = btrfs_item_size_nr(leaf, slot);
+ ptr = btrfs_item_ptr_offset(leaf, slot);
+ while (cur_offset < item_size) {
+ struct btrfs_key inode_key;
+ struct inode *dir_inode;
+
+ inode_key.type = BTRFS_INODE_ITEM_KEY;
+ inode_key.offset = 0;
+
+ if (key.type == BTRFS_INODE_EXTREF_KEY) {
+ struct btrfs_inode_extref *extref;
+
+ extref = (struct btrfs_inode_extref *)
+ (ptr + cur_offset);
+ inode_key.objectid = btrfs_inode_extref_parent(
+ leaf, extref);
+ cur_offset += sizeof(*extref);
+ cur_offset += btrfs_inode_extref_name_len(leaf,
+ extref);
+ } else {
+ inode_key.objectid = key.offset;
+ cur_offset = item_size;
+ }
+
+ dir_inode = btrfs_iget(root->fs_info->sb, &inode_key,
+ root, NULL);
+ /* If parent inode was deleted, skip it. */
+ if (IS_ERR(dir_inode))
+ continue;
+
+ ret = btrfs_log_inode(trans, root, dir_inode,
+ LOG_INODE_ALL, 0, LLONG_MAX, ctx);
+ iput(dir_inode);
+ if (ret)
+ goto out;
+ }
+ path->slots[0]++;
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
* helper function around btrfs_log_inode to make sure newly created
* parent directories also end up in the log. A minimal inode and backref
@@ -4923,9 +5067,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct dentry *old_parent = NULL;
int ret = 0;
u64 last_committed = root->fs_info->last_trans_committed;
- const struct dentry * const first_parent = parent;
- const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
- last_committed);
bool log_dentries = false;
struct inode *orig_inode = inode;
@@ -4986,6 +5127,53 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
log_dentries = true;
+ /*
+ * On unlink we must make sure all our current and old parent directores
+ * inodes are fully logged. This is to prevent leaving dangling
+ * directory index entries in directories that were our parents but are
+ * not anymore. Not doing this results in old parent directory being
+ * impossible to delete after log replay (rmdir will always fail with
+ * error -ENOTEMPTY).
+ *
+ * Example 1:
+ *
+ * mkdir testdir
+ * touch testdir/foo
+ * ln testdir/foo testdir/bar
+ * sync
+ * unlink testdir/bar
+ * xfs_io -c fsync testdir/foo
+ * <power failure>
+ * mount fs, triggers log replay
+ *
+ * If we don't log the parent directory (testdir), after log replay the
+ * directory still has an entry pointing to the file inode using the bar
+ * name, but a matching BTRFS_INODE_[REF|EXTREF]_KEY does not exist and
+ * the file inode has a link count of 1.
+ *
+ * Example 2:
+ *
+ * mkdir testdir
+ * touch foo
+ * ln foo testdir/foo2
+ * ln foo testdir/foo3
+ * sync
+ * unlink testdir/foo3
+ * xfs_io -c fsync foo
+ * <power failure>
+ * mount fs, triggers log replay
+ *
+ * Similar as the first example, after log replay the parent directory
+ * testdir still has an entry pointing to the inode file with name foo3
+ * but the file inode does not have a matching BTRFS_INODE_REF_KEY item
+ * and has a link count of 2.
+ */
+ if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
+ ret = btrfs_log_all_parents(trans, orig_inode, ctx);
+ if (ret)
+ goto end_trans;
+ }
+
while (1) {
if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
break;
@@ -4994,23 +5182,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (root != BTRFS_I(inode)->root)
break;
- /*
- * On unlink we must make sure our immediate parent directory
- * inode is fully logged. This is to prevent leaving dangling
- * directory index entries and a wrong directory inode's i_size.
- * Not doing so can result in a directory being impossible to
- * delete after log replay (rmdir will always fail with error
- * -ENOTEMPTY).
- */
- if (did_unlink && parent == first_parent)
- inode_only = LOG_INODE_ALL;
- else
- inode_only = LOG_INODE_EXISTS;
-
- if (BTRFS_I(inode)->generation >
- root->fs_info->last_trans_committed ||
- inode_only == LOG_INODE_ALL) {
- ret = btrfs_log_inode(trans, root, inode, inode_only,
+ if (BTRFS_I(inode)->generation > last_committed) {
+ ret = btrfs_log_inode(trans, root, inode,
+ LOG_INODE_EXISTS,
0, LLONG_MAX, ctx);
if (ret)
goto end_trans;