summaryrefslogtreecommitdiff
path: root/fs/bcachefs/fs.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-05-19 13:45:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-05-19 13:45:48 -0700
commit16dbfae867cdeb32f3d24cea81193793d5decc61 (patch)
treefee64885a8eb977b56ef913bb7a4f74a773c78b8 /fs/bcachefs/fs.c
parenta90f1cd105c6c5c246f07ca371d873d35b78c7d9 (diff)
parent07f9a27f1969764d11374942961d51fee0ab628f (diff)
Merge tag 'bcachefs-2024-05-19' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs updates from Kent Overstreet: - More safety fixes, primarily found by syzbot - Run the upgrade/downgrade paths in nochnages mode. Nochanges mode is primarily for testing fsck/recovery in dry run mode, so it shouldn't change anything besides disabling writes and holding dirty metadata in memory. The idea here was to reduce the amount of activity if we can't write anything out, so that bringing up a filesystem in "super ro" mode would be more lilkely to work for data recovery - but norecovery is the correct option for this. - btree_trans->locked; we now track whether a btree_trans has any btree nodes locked, and this is used for improved assertions related to trans_unlock() and trans_relock(). We'll also be using it for improving how we work with lockdep in the future: we don't want lockdep to be tracking individual btree node locks because we take too many for lockdep to track, and it's not necessary since we have a cycle detector. - Trigger improvements that are prep work for online fsck - BTREE_TRIGGER_check_repair; this regularizes how we do some repair work for extents that goes with running triggers in fsck, and fixes some subtle issues with transaction restarts there. - bch2_snapshot_equiv() has now been ripped out of fsck.c; snapshot equivalence classes are for when snapshot deletion leaves behind redundant snapshot nodes, but snapshot deletion now cleans this up right away, so the abstraction doesn't need to leak. - Improvements to how we resume writing to the journal in recovery. The code for picking the new place to write when reading the journal is greatly simplified and we also store the position in the superblock for when we don't read the journal; this means that we preserve more of the journal for list_journal debugging. - Improvements to sysfs btree_cache and btree_node_cache, for debugging memory reclaim. - We now detect when we've blocked for 10 seconds on the allocator in the write path and dump some useful info. - Safety fixes for devices references: this is a big series that changes almost all device lookups to properly check if the device exists and take a reference to it. Previously we assumed that if a bkey exists that references a device then the device must exist, and this was enforced in .invalid methods, but this was incorrect because it meant device removal relied on accounting being correct to not leave keys pointing to invalid devices, and that's not something we can assume. Getting the "pointer to invalid device" checks out of our .invalid() methods fixes some long standing device removal bugs; the only outstanding bug with device removal now is a race between the discard path and deleting alloc info, which should be easily fixed. - The allocator now prefers not to expand the new member_info.btree_allocated bitmap, meaning if repair ever requires scanning for btree nodes (because of a corrupt interior nodes) we won't have to scan the whole device(s). - New coding style document, which among other things talks about the correct usage of assertions * tag 'bcachefs-2024-05-19' of https://evilpiepirate.org/git/bcachefs: (155 commits) bcachefs: add no_invalid_checks flag bcachefs: add counters for failed shrinker reclaim bcachefs: Fix sb_field_downgrade validation bcachefs: Plumb bch_validate_flags to sb_field_ops.validate() bcachefs: s/bkey_invalid_flags/bch_validate_flags bcachefs: fsync() should not return -EROFS bcachefs: Invalid devices are now checked for by fsck, not .invalid methods bcachefs: kill bch2_dev_bkey_exists() in bch2_check_fix_ptrs() bcachefs: kill bch2_dev_bkey_exists() in bch2_read_endio() bcachefs: bch2_dev_get_ioref() checks for device not present bcachefs: bch2_dev_get_ioref2(); io_read.c bcachefs: bch2_dev_get_ioref2(); debug.c bcachefs: bch2_dev_get_ioref2(); journal_io.c bcachefs: bch2_dev_get_ioref2(); io_write.c bcachefs: bch2_dev_get_ioref2(); btree_io.c bcachefs: bch2_dev_get_ioref2(); backpointers.c bcachefs: bch2_dev_get_ioref2(); alloc_background.c bcachefs: for_each_bset() declares loop iter bcachefs: Move BCACHEFS_STATFS_MAGIC value to UAPI magic.h bcachefs: Improve sysfs internal/btree_cache ...
Diffstat (limited to 'fs/bcachefs/fs.c')
-rw-r--r--fs/bcachefs/fs.c109
1 files changed, 60 insertions, 49 deletions
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 65b04b3c2679..fd851f10d11c 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -90,7 +90,7 @@ retry:
bch2_trans_begin(trans);
ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode),
- BTREE_ITER_INTENT) ?:
+ BTREE_ITER_intent) ?:
(set ? set(trans, inode, &inode_u, p) : 0) ?:
bch2_inode_write(trans, &iter, &inode_u) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
@@ -213,19 +213,43 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
_ret; \
})
+static struct inode *bch2_alloc_inode(struct super_block *sb)
+{
+ BUG();
+}
+
+static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c)
+{
+ struct bch_inode_info *inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
+ if (!inode)
+ return NULL;
+
+ inode_init_once(&inode->v);
+ mutex_init(&inode->ei_update_lock);
+ two_state_lock_init(&inode->ei_pagecache_lock);
+ INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
+ mutex_init(&inode->ei_quota_lock);
+ inode->v.i_state = 0;
+
+ if (unlikely(inode_init_always(c->vfs_sb, &inode->v))) {
+ kmem_cache_free(bch2_inode_cache, inode);
+ return NULL;
+ }
+
+ return inode;
+}
+
/*
* Allocate a new inode, dropping/retaking btree locks if necessary:
*/
static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
{
- struct bch_fs *c = trans->c;
-
struct bch_inode_info *inode =
memalloc_flags_do(PF_MEMALLOC_NORECLAIM|PF_MEMALLOC_NOWARN,
- to_bch_ei(new_inode(c->vfs_sb)));
+ __bch2_new_inode(trans->c));
if (unlikely(!inode)) {
- int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM);
+ int ret = drop_locks_do(trans, (inode = __bch2_new_inode(trans->c)) ? 0 : -ENOMEM);
if (ret && inode) {
__destroy_inode(&inode->v);
kmem_cache_free(bch2_inode_cache, inode);
@@ -290,7 +314,7 @@ __bch2_create(struct mnt_idmap *idmap,
if (ret)
return ERR_PTR(ret);
#endif
- inode = to_bch_ei(new_inode(c->vfs_sb));
+ inode = __bch2_new_inode(c);
if (unlikely(!inode)) {
inode = ERR_PTR(-ENOMEM);
goto err;
@@ -323,7 +347,7 @@ retry:
inum.inum = inode_u.bi_inum;
ret = bch2_subvolume_get(trans, inum.subvol, true,
- BTREE_ITER_WITH_UPDATES, &subvol) ?:
+ BTREE_ITER_with_updates, &subvol) ?:
bch2_trans_commit(trans, NULL, &journal_seq, 0);
if (unlikely(ret)) {
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
@@ -376,17 +400,14 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_iter dirent_iter = {};
subvol_inum inum = {};
+ struct printbuf buf = PRINTBUF;
- int ret = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
- dir_hash_info, dir, name, 0);
+ struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
+ dir_hash_info, dir, name, 0);
+ int ret = bkey_err(k);
if (ret)
return ERR_PTR(ret);
- struct bkey_s_c k = bch2_btree_iter_peek_slot(&dirent_iter);
- ret = bkey_err(k);
- if (ret)
- goto err;
-
ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), &inum);
if (ret > 0)
ret = -ENOENT;
@@ -406,20 +427,31 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?:
PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
- if (bch2_err_matches(ret, ENOENT)) {
- struct printbuf buf = PRINTBUF;
- bch2_bkey_val_to_text(&buf, c, k);
- bch_err(c, "%s points to missing inode", buf.buf);
- printbuf_exit(&buf);
- }
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
+ c, "dirent to missing inode:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
if (ret)
goto err;
+ /* regular files may have hardlinks: */
+ if (bch2_fs_inconsistent_on(bch2_inode_should_have_bp(&inode_u) &&
+ !bkey_eq(k.k->p, POS(inode_u.bi_dir, inode_u.bi_dir_offset)),
+ c,
+ "dirent points to inode that does not point back:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k),
+ prt_printf(&buf, "\n "),
+ bch2_inode_unpacked_to_text(&buf, &inode_u),
+ buf.buf))) {
+ ret = -ENOENT;
+ goto err;
+ }
+
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
inode = bch2_inode_insert(c, inode);
out:
bch2_trans_iter_exit(trans, &dirent_iter);
+ printbuf_exit(&buf);
return inode;
err:
inode = ERR_PTR(ret);
@@ -787,7 +819,7 @@ retry:
acl = NULL;
ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
- BTREE_ITER_INTENT);
+ BTREE_ITER_intent);
if (ret)
goto btree_err;
@@ -1043,6 +1075,10 @@ retry:
bch2_btree_iter_set_pos(&iter,
POS(iter.pos.inode, iter.pos.offset + sectors));
+
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ break;
}
start = iter.pos.offset;
bch2_trans_iter_exit(trans, &iter);
@@ -1490,34 +1526,9 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
mapping_set_large_folios(inode->v.i_mapping);
}
-static struct inode *bch2_alloc_inode(struct super_block *sb)
-{
- struct bch_inode_info *inode;
-
- inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
- if (!inode)
- return NULL;
-
- inode_init_once(&inode->v);
- mutex_init(&inode->ei_update_lock);
- two_state_lock_init(&inode->ei_pagecache_lock);
- INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
- mutex_init(&inode->ei_quota_lock);
-
- return &inode->v;
-}
-
-static void bch2_i_callback(struct rcu_head *head)
-{
- struct inode *vinode = container_of(head, struct inode, i_rcu);
- struct bch_inode_info *inode = to_bch_ei(vinode);
-
- kmem_cache_free(bch2_inode_cache, inode);
-}
-
-static void bch2_destroy_inode(struct inode *vinode)
+static void bch2_free_inode(struct inode *vinode)
{
- call_rcu(&vinode->i_rcu, bch2_i_callback);
+ kmem_cache_free(bch2_inode_cache, to_bch_ei(vinode));
}
static int inode_update_times_fn(struct btree_trans *trans,
@@ -1825,7 +1836,7 @@ static int bch2_unfreeze(struct super_block *sb)
static const struct super_operations bch_super_operations = {
.alloc_inode = bch2_alloc_inode,
- .destroy_inode = bch2_destroy_inode,
+ .free_inode = bch2_free_inode,
.write_inode = bch2_vfs_write_inode,
.evict_inode = bch2_evict_inode,
.sync_fs = bch2_sync_fs,