summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-08-12 18:46:07 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-08-12 18:46:07 +0200
commit0c80bdfc9aa624b4a6c33c30ad11e81bf6407c36 (patch)
treecaea5044f1bd46b56c9894a23c629ef4bf93a5ce /fs
parentbfa54a793ba77ef696755b66f3ac4ed00c7d1248 (diff)
parent7c626ce4bae1ac14f60076d00eafe71af30450ba (diff)
Merge 6.11-rc3 into driver-core-next
We need the driver core fixes in here as well to build on top of. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/acl.c11
-rw-r--r--fs/bcachefs/acl.h2
-rw-r--r--fs/bcachefs/alloc_background.h12
-rw-r--r--fs/bcachefs/alloc_foreground.c34
-rw-r--r--fs/bcachefs/alloc_foreground.h9
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/bcachefs_format.h5
-rw-r--r--fs/bcachefs/btree_iter.c5
-rw-r--r--fs/bcachefs/btree_update_interior.c2
-rw-r--r--fs/bcachefs/buckets.c12
-rw-r--r--fs/bcachefs/buckets.h2
-rw-r--r--fs/bcachefs/disk_accounting.c65
-rw-r--r--fs/bcachefs/disk_accounting_format.h15
-rw-r--r--fs/bcachefs/ec.c34
-rw-r--r--fs/bcachefs/fs.c8
-rw-r--r--fs/bcachefs/io_misc.c6
-rw-r--r--fs/bcachefs/io_read.c1
-rw-r--r--fs/bcachefs/io_write.c5
-rw-r--r--fs/bcachefs/opts.h5
-rw-r--r--fs/bcachefs/replicas.c1
-rw-r--r--fs/bcachefs/sb-downgrade.c27
-rw-r--r--fs/bcachefs/sb-errors_format.h6
-rw-r--r--fs/bcachefs/super-io.c4
-rw-r--r--fs/bcachefs/super.c1
-rw-r--r--fs/bcachefs/sysfs.c6
-rw-r--r--fs/btrfs/block-group.c13
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/direct-io.c38
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/file.c22
-rw-r--r--fs/btrfs/free-space-cache.c4
-rw-r--r--fs/btrfs/inode.c28
-rw-r--r--fs/btrfs/print-tree.c2
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/space-info.c5
-rw-r--r--fs/btrfs/space-info.h1
-rw-r--r--fs/btrfs/super.c5
-rw-r--r--fs/btrfs/tests/extent-map-tests.c99
-rw-r--r--fs/btrfs/tree-checker.c49
-rw-r--r--fs/ceph/caps.c35
-rw-r--r--fs/ceph/super.h7
-rw-r--r--fs/file.c1
-rw-r--r--fs/nfsd/nfsctl.c3
-rw-r--r--fs/smb/client/cifs_debug.c2
-rw-r--r--fs/smb/client/cifsfs.h4
-rw-r--r--fs/smb/client/cifsglob.h36
-rw-r--r--fs/smb/client/cifsproto.h2
-rw-r--r--fs/smb/client/inode.c17
-rw-r--r--fs/smb/client/ioctl.c32
-rw-r--r--fs/smb/client/misc.c65
-rw-r--r--fs/smb/client/reparse.c4
-rw-r--r--fs/smb/client/reparse.h19
-rw-r--r--fs/smb/client/smb2inode.c8
-rw-r--r--fs/smb/client/smb2pdu.c3
-rw-r--r--fs/smb/client/smbdirect.c8
-rw-r--r--fs/smb/client/trace.h51
-rw-r--r--fs/smb/client/transport.c2
-rw-r--r--fs/tracefs/event_inode.c4
-rw-r--r--fs/tracefs/inode.c12
-rw-r--r--fs/tracefs/internal.h5
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c28
-rw-r--r--fs/xfs/scrub/agheader_repair.c2
-rw-r--r--fs/xfs/scrub/parent.c2
-rw-r--r--fs/xfs/scrub/trace.h10
-rw-r--r--fs/xfs/xfs_attr_list.c2
-rw-r--r--fs/xfs/xfs_trace.h10
-rw-r--r--fs/xfs/xfs_xattr.c19
69 files changed, 699 insertions, 273 deletions
diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c
index a7b425d3c8a0..331a17f3f113 100644
--- a/fs/bcachefs/acl.c
+++ b/fs/bcachefs/acl.c
@@ -272,16 +272,19 @@ bch2_acl_to_xattr(struct btree_trans *trans,
return xattr;
}
-struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, int type)
+struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu)
{
- struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+ struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
- struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct posix_acl *acl = NULL;
+
+ if (rcu)
+ return ERR_PTR(-ECHILD);
+
+ struct btree_trans *trans = bch2_trans_get(c);
retry:
bch2_trans_begin(trans);
diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h
index 27e7eec0f278..fe730a6bf0c1 100644
--- a/fs/bcachefs/acl.h
+++ b/fs/bcachefs/acl.h
@@ -28,7 +28,7 @@ void bch2_acl_to_text(struct printbuf *, const void *, size_t);
#ifdef CONFIG_BCACHEFS_POSIX_ACL
-struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int);
+struct posix_acl *bch2_get_acl(struct inode *, int, bool);
int bch2_set_acl_trans(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *,
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index 8d2b62c9588e..96a0444ea78f 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -82,6 +82,14 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket,
bucket_data_type(bucket) != bucket_data_type(ptr);
}
+/*
+ * It is my general preference to use unsigned types for unsigned quantities -
+ * however, these helpers are used in disk accounting calculations run by
+ * triggers where the output will be negated and added to an s64. unsigned is
+ * right out even though all these quantities will fit in 32 bits, since it
+ * won't be sign extended correctly; u64 will negate "correctly", but s64 is the
+ * simpler option here.
+ */
static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a)
{
return a.stripe_sectors + a.dirty_sectors + a.cached_sectors;
@@ -166,8 +174,8 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
* avoid overflowing LRU_TIME_BITS on a corrupted fs, when
* bucket_sectors_dirty is (much) bigger than bucket_size
*/
- u64 d = min(bch2_bucket_sectors_dirty(a),
- ca->mi.bucket_size);
+ u64 d = min_t(s64, bch2_bucket_sectors_dirty(a),
+ ca->mi.bucket_size);
return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
}
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 618d2ff0292e..8563c2d26847 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1603,7 +1603,8 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope
prt_newline(out);
}
-void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
+void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bch_dev *ca)
{
struct open_bucket *ob;
@@ -1613,7 +1614,8 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
ob++) {
spin_lock(&ob->lock);
- if (ob->valid && !ob->on_partial_list)
+ if (ob->valid && !ob->on_partial_list &&
+ (!ca || ob->dev == ca->dev_idx))
bch2_open_bucket_to_text(out, c, ob);
spin_unlock(&ob->lock);
}
@@ -1738,7 +1740,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
- bch2_dev_usage_to_text(out, &stats);
+ bch2_dev_usage_to_text(out, ca, &stats);
prt_newline(out);
@@ -1756,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats));
}
-void bch2_print_allocator_stuck(struct bch_fs *c)
+static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
{
struct printbuf buf = PRINTBUF;
- prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n");
+ prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n",
+ c->opts.allocator_stuck_timeout);
prt_printf(&buf, "Allocator debug:\n");
printbuf_indent_add(&buf, 2);
@@ -1790,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c)
bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
+
+static inline unsigned allocator_wait_timeout(struct bch_fs *c)
+{
+ if (c->allocator_last_stuck &&
+ time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies))
+ return 0;
+
+ return c->opts.allocator_stuck_timeout * HZ;
+}
+
+void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
+{
+ unsigned t = allocator_wait_timeout(c);
+
+ if (t && closure_sync_timeout(cl, t)) {
+ c->allocator_last_stuck = jiffies;
+ bch2_print_allocator_stuck(c);
+ }
+
+ closure_sync(cl);
+}
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
index 6da9e7e29026..386d231ceca3 100644
--- a/fs/bcachefs/alloc_foreground.h
+++ b/fs/bcachefs/alloc_foreground.h
@@ -223,7 +223,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp
void bch2_fs_allocator_foreground_init(struct bch_fs *);
void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *);
-void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *);
+void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *, struct bch_dev *);
void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *);
void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
@@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *);
void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *);
-void bch2_print_allocator_stuck(struct bch_fs *);
+void __bch2_wait_on_allocator(struct bch_fs *, struct closure *);
+static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
+{
+ if (cl->closure_get_happened)
+ __bch2_wait_on_allocator(c, cl);
+}
#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 91361a167dcd..eedf2d6045e7 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -893,6 +893,8 @@ struct bch_fs {
struct bch_fs_usage_base __percpu *usage;
u64 __percpu *online_reserved;
+ unsigned long allocator_last_stuck;
+
struct io_clock io_clock[2];
/* JOURNAL SEQ BLACKLIST */
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 74a60b1a4ddf..b25f86356728 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -675,7 +675,8 @@ struct bch_sb_field_ext {
x(btree_subvolume_children, BCH_VERSION(1, 6)) \
x(mi_btree_bitmap, BCH_VERSION(1, 7)) \
x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \
- x(disk_accounting_v2, BCH_VERSION(1, 9))
+ x(disk_accounting_v2, BCH_VERSION(1, 9)) \
+ x(disk_accounting_v3, BCH_VERSION(1, 10))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -836,6 +837,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
struct bch_sb, flags[5], 0, 16);
+LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
+ struct bch_sb, flags[5], 16, 32);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 36872207f09b..aa8a049071f4 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -1921,6 +1921,11 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
bch2_trans_verify_not_in_restart(trans);
bch2_btree_iter_verify(iter);
+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
+ if (ret)
+ goto err;
+
+
struct btree_path *path = btree_iter_path(trans, iter);
/* already at end? */
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 31ee50184be2..e61f9695771e 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1264,7 +1264,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
bch2_trans_unlock(trans);
- closure_sync(&cl);
+ bch2_wait_on_allocator(c, &cl);
} while (bch2_err_matches(ret, BCH_ERR_operation_blocked));
}
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 2650a0d24663..9f7004e941ce 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -71,17 +71,21 @@ bch2_fs_usage_read_short(struct bch_fs *c)
return ret;
}
-void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage)
+void bch2_dev_usage_to_text(struct printbuf *out,
+ struct bch_dev *ca,
+ struct bch_dev_usage *usage)
{
prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n");
for (unsigned i = 0; i < BCH_DATA_NR; i++) {
bch2_prt_data_type(out, i);
prt_printf(out, "\t%llu\r%llu\r%llu\r\n",
- usage->d[i].buckets,
- usage->d[i].sectors,
- usage->d[i].fragmented);
+ usage->d[i].buckets,
+ usage->d[i].sectors,
+ usage->d[i].fragmented);
}
+
+ prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets);
}
static int bch2_check_fix_ptr(struct btree_trans *trans,
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 2d35eeb24a2d..edbdffd508fc 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -212,7 +212,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
return ret;
}
-void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *);
+void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *);
static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark)
{
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index dcdd59249c23..046ac92b6639 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -114,11 +114,74 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans,
return bch2_disk_accounting_mod(trans, &acc, &sectors, 1, gc);
}
+static inline bool is_zero(char *start, char *end)
+{
+ BUG_ON(start > end);
+
+ for (; start < end; start++)
+ if (*start)
+ return false;
+ return true;
+}
+
+#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member))
+
int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bch_validate_flags flags,
struct printbuf *err)
{
- return 0;
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, k.k->p);
+ void *end = &acc_k + 1;
+ int ret = 0;
+
+ switch (acc_k.type) {
+ case BCH_DISK_ACCOUNTING_nr_inodes:
+ end = field_end(acc_k, nr_inodes);
+ break;
+ case BCH_DISK_ACCOUNTING_persistent_reserved:
+ end = field_end(acc_k, persistent_reserved);
+ break;
+ case BCH_DISK_ACCOUNTING_replicas:
+ bkey_fsck_err_on(!acc_k.replicas.nr_devs,
+ c, err, accounting_key_replicas_nr_devs_0,
+ "accounting key replicas entry with nr_devs=0");
+
+ bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs ||
+ (acc_k.replicas.nr_required > 1 &&
+ acc_k.replicas.nr_required == acc_k.replicas.nr_devs),
+ c, err, accounting_key_replicas_nr_required_bad,
+ "accounting key replicas entry with bad nr_required");
+
+ for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++)
+ bkey_fsck_err_on(acc_k.replicas.devs[i] > acc_k.replicas.devs[i + 1],
+ c, err, accounting_key_replicas_devs_unsorted,
+ "accounting key replicas entry with unsorted devs");
+
+ end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas);
+ break;
+ case BCH_DISK_ACCOUNTING_dev_data_type:
+ end = field_end(acc_k, dev_data_type);
+ break;
+ case BCH_DISK_ACCOUNTING_compression:
+ end = field_end(acc_k, compression);
+ break;
+ case BCH_DISK_ACCOUNTING_snapshot:
+ end = field_end(acc_k, snapshot);
+ break;
+ case BCH_DISK_ACCOUNTING_btree:
+ end = field_end(acc_k, btree);
+ break;
+ case BCH_DISK_ACCOUNTING_rebalance_work:
+ end = field_end(acc_k, rebalance_work);
+ break;
+ }
+
+ bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)),
+ c, err, accounting_key_junk_at_end,
+ "junk at end of accounting key");
+fsck_err:
+ return ret;
}
void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_pos *k)
diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h
index cba417060b33..a93cf26ff4a9 100644
--- a/fs/bcachefs/disk_accounting_format.h
+++ b/fs/bcachefs/disk_accounting_format.h
@@ -124,20 +124,19 @@ struct bch_dev_data_type {
__u8 data_type;
};
-struct bch_dev_stripe_buckets {
- __u8 dev;
-};
-
struct bch_acct_compression {
__u8 type;
};
struct bch_acct_snapshot {
__u32 id;
-};
+} __packed;
struct bch_acct_btree {
__u32 id;
+} __packed;
+
+struct bch_acct_rebalance_work {
};
struct disk_accounting_pos {
@@ -149,12 +148,12 @@ struct disk_accounting_pos {
struct bch_persistent_reserved persistent_reserved;
struct bch_replicas_entry_v1 replicas;
struct bch_dev_data_type dev_data_type;
- struct bch_dev_stripe_buckets dev_stripe_buckets;
struct bch_acct_compression compression;
struct bch_acct_snapshot snapshot;
struct bch_acct_btree btree;
- };
- };
+ struct bch_acct_rebalance_work rebalance_work;
+ } __packed;
+ } __packed;
struct bpos _pad;
};
};
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 9b5b5c9a6c63..84f1cbf6497f 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -1809,6 +1809,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity);
BUG_ON(v->nr_redundant != h->s->nr_parity);
+ /* * We bypass the sector allocator which normally does this: */
+ bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
+
for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) {
__clear_bit(v->ptrs[i].dev, devs.d);
if (i < h->s->nr_data)
@@ -2235,6 +2238,23 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
mutex_unlock(&c->ec_stripes_heap_lock);
}
+static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c,
+ struct ec_stripe_new *s)
+{
+ prt_printf(out, "\tidx %llu blocks %u+%u allocated %u ref %u %u %s obs",
+ s->idx, s->nr_data, s->nr_parity,
+ bitmap_weight(s->blocks_allocated, s->nr_data),
+ atomic_read(&s->ref[STRIPE_REF_io]),
+ atomic_read(&s->ref[STRIPE_REF_stripe]),
+ bch2_watermarks[s->h->watermark]);
+
+ struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
+ unsigned i;
+ for_each_set_bit(i, s->blocks_gotten, v->nr_blocks)
+ prt_printf(out, " %u", s->blocks[i]);
+ prt_newline(out);
+}
+
void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
{
struct ec_stripe_head *h;
@@ -2247,23 +2267,15 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
bch2_watermarks[h->watermark]);
if (h->s)
- prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n",
- h->s->idx, h->s->nr_data, h->s->nr_parity,
- bitmap_weight(h->s->blocks_allocated,
- h->s->nr_data));
+ bch2_new_stripe_to_text(out, c, h->s);
}
mutex_unlock(&c->ec_stripe_head_lock);
prt_printf(out, "in flight:\n");
mutex_lock(&c->ec_stripe_new_lock);
- list_for_each_entry(s, &c->ec_stripe_new_list, list) {
- prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n",
- s->idx, s->nr_data, s->nr_parity,
- atomic_read(&s->ref[STRIPE_REF_io]),
- atomic_read(&s->ref[STRIPE_REF_stripe]),
- bch2_watermarks[s->h->watermark]);
- }
+ list_for_each_entry(s, &c->ec_stripe_new_list, list)
+ bch2_new_stripe_to_text(out, c, s);
mutex_unlock(&c->ec_stripe_new_lock);
}
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 3a5f49affa0a..15fc41e63b6c 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1199,7 +1199,7 @@ static const struct inode_operations bch_file_inode_operations = {
.fiemap = bch2_fiemap,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
- .get_acl = bch2_get_acl,
+ .get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
@@ -1219,7 +1219,7 @@ static const struct inode_operations bch_dir_inode_operations = {
.tmpfile = bch2_tmpfile,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
- .get_acl = bch2_get_acl,
+ .get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
@@ -1241,7 +1241,7 @@ static const struct inode_operations bch_symlink_inode_operations = {
.setattr = bch2_setattr,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
- .get_acl = bch2_get_acl,
+ .get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
@@ -1251,7 +1251,7 @@ static const struct inode_operations bch_special_inode_operations = {
.setattr = bch2_setattr,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
- .get_acl = bch2_get_acl,
+ .get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index 2cf6297756f8..177ed331c00b 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -126,11 +126,7 @@ err_noprint:
if (closure_nr_remaining(&cl) != 1) {
bch2_trans_unlock_long(trans);
-
- if (closure_sync_timeout(&cl, HZ * 10)) {
- bch2_print_allocator_stuck(c);
- closure_sync(&cl);
- }
+ bch2_wait_on_allocator(c, &cl);
}
return ret;
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 4531c9ab3e12..7ee3b75480df 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -406,6 +406,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
bch2_trans_iter_init(trans, &iter, rbio->data_btree,
rbio->read_pos, BTREE_ITER_slots);
retry:
+ bch2_trans_begin(trans);
rbio->bio.bi_status = 0;
k = bch2_btree_iter_peek_slot(&iter);
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index d31c8d006d97..1d4761d15002 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -1503,10 +1503,7 @@ err:
if ((op->flags & BCH_WRITE_SYNC) ||
(!(op->flags & BCH_WRITE_SUBMITTED) &&
!(op->flags & BCH_WRITE_IN_WORKER))) {
- if (closure_sync_timeout(&op->cl, HZ * 10)) {
- bch2_print_allocator_stuck(c);
- closure_sync(&op->cl);
- }
+ bch2_wait_on_allocator(c, &op->cl);
__bch2_write_index(op);
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 60b93018501f..cda1725702ea 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -391,6 +391,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \
NULL, "Log transaction function names in journal") \
+ x(allocator_stuck_timeout, u16, \
+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_UINT(0, U16_MAX), \
+ BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30, \
+ NULL, "Default timeout in seconds for stuck allocator messages")\
x(noexcl, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 10c96cb2047a..1223b710755d 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -24,7 +24,6 @@ static int bch2_memcmp(const void *l, const void *r, const void *priv)
static void verify_replicas_entry(struct bch_replicas_entry_v1 *e)
{
#ifdef CONFIG_BCACHEFS_DEBUG
- BUG_ON(e->data_type >= BCH_DATA_NR);
BUG_ON(!e->nr_devs);
BUG_ON(e->nr_required > 1 &&
e->nr_required >= e->nr_devs);
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index dfbbd33c8731..6c4469f53313 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -61,7 +61,18 @@
BCH_FSCK_ERR_dev_usage_buckets_wrong, \
BCH_FSCK_ERR_dev_usage_sectors_wrong, \
BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
- BCH_FSCK_ERR_accounting_mismatch)
+ BCH_FSCK_ERR_accounting_mismatch) \
+ x(disk_accounting_v3, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_bkey_version_in_future, \
+ BCH_FSCK_ERR_dev_usage_buckets_wrong, \
+ BCH_FSCK_ERR_dev_usage_sectors_wrong, \
+ BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
+ BCH_FSCK_ERR_accounting_mismatch, \
+ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
+ BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \
+ BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \
+ BCH_FSCK_ERR_accounting_key_junk_at_end)
#define DOWNGRADE_TABLE() \
x(bucket_stripe_sectors, \
@@ -79,6 +90,20 @@
BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \
BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
BCH_FSCK_ERR_fs_usage_replicas_wrong, \
+ BCH_FSCK_ERR_bkey_version_in_future) \
+ x(disk_accounting_v3, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_dev_usage_buckets_wrong, \
+ BCH_FSCK_ERR_dev_usage_sectors_wrong, \
+ BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
+ BCH_FSCK_ERR_fs_usage_hidden_wrong, \
+ BCH_FSCK_ERR_fs_usage_btree_wrong, \
+ BCH_FSCK_ERR_fs_usage_data_wrong, \
+ BCH_FSCK_ERR_fs_usage_cached_wrong, \
+ BCH_FSCK_ERR_fs_usage_reserved_wrong, \
+ BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \
+ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
+ BCH_FSCK_ERR_fs_usage_replicas_wrong, \
BCH_FSCK_ERR_bkey_version_in_future)
struct upgrade_downgrade_entry {
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index d1b2f2aa397a..d3a498617303 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -287,7 +287,11 @@ enum bch_fsck_flags {
x(accounting_replicas_not_marked, 273, 0) \
x(invalid_btree_id, 274, 0) \
x(alloc_key_io_time_bad, 275, 0) \
- x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX)
+ x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
+ x(accounting_key_junk_at_end, 277, 0) \
+ x(accounting_key_replicas_nr_devs_0, 278, 0) \
+ x(accounting_key_replicas_nr_required_bad, 279, 0) \
+ x(accounting_key_replicas_devs_unsorted, 280, 0) \
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 8bc819832790..c8c2ccbdfbb5 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb))
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version));
+
+ if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 &&
+ !BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb))
+ SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30);
}
for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 0455a1001fec..e7fa2de35014 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1193,7 +1193,6 @@ static void bch2_dev_free(struct bch_dev *ca)
if (ca->kobj.state_in_sysfs)
kobject_del(&ca->kobj);
- kfree(ca->buckets_nouse);
bch2_free_super(&ca->disk_sb);
bch2_dev_allocator_background_exit(ca);
bch2_dev_journal_exit(ca);
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 1c0d1fb20276..f393023a3ae2 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -367,7 +367,7 @@ SHOW(bch2_fs)
bch2_stripes_heap_to_text(out, c);
if (attr == &sysfs_open_buckets)
- bch2_open_buckets_to_text(out, c);
+ bch2_open_buckets_to_text(out, c, NULL);
if (attr == &sysfs_open_buckets_partial)
bch2_open_buckets_partial_to_text(out, c);
@@ -811,6 +811,9 @@ SHOW(bch2_dev)
if (attr == &sysfs_alloc_debug)
bch2_dev_alloc_debug_to_text(out, ca);
+ if (attr == &sysfs_open_buckets)
+ bch2_open_buckets_to_text(out, c, ca);
+
return 0;
}
@@ -892,6 +895,7 @@ struct attribute *bch2_dev_files[] = {
/* debug: */
&sysfs_alloc_debug,
+ &sysfs_open_buckets,
NULL
};
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 498442d0c216..2e49d978f504 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1223,8 +1223,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
block_group->space_info->total_bytes -= block_group->length;
block_group->space_info->bytes_readonly -=
(block_group->length - block_group->zone_unusable);
- block_group->space_info->bytes_zone_unusable -=
- block_group->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info,
+ -block_group->zone_unusable);
block_group->space_info->disk_total -= block_group->length * factor;
spin_unlock(&block_group->space_info->lock);
@@ -1396,7 +1396,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
if (btrfs_is_zoned(cache->fs_info)) {
/* Migrate zone_unusable bytes to readonly */
sinfo->bytes_readonly += cache->zone_unusable;
- sinfo->bytes_zone_unusable -= cache->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
+ -cache->zone_unusable);
cache->zone_unusable = 0;
}
cache->ro++;
@@ -3056,9 +3057,11 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
if (btrfs_is_zoned(cache->fs_info)) {
/* Migrate zone_unusable bytes back */
cache->zone_unusable =
- (cache->alloc_offset - cache->used) +
+ (cache->alloc_offset - cache->used - cache->pinned -
+ cache->reserved) +
(cache->length - cache->zone_capacity);
- sinfo->bytes_zone_unusable += cache->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
+ cache->zone_unusable);
sinfo->bytes_readonly -= cache->zone_unusable;
}
num_bytes = cache->length - cache->reserved -
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c8568b1a61c4..75fa563e4cac 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -459,6 +459,7 @@ struct btrfs_file_private {
void *filldir_buf;
u64 last_index;
struct extent_state *llseek_cached_state;
+ bool fsync_skip_inode_lock;
};
static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
index f9fb2db6a1e4..67adbe9d294a 100644
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -856,21 +856,37 @@ relock:
* So here we disable page faults in the iov_iter and then retry if we
* got -EFAULT, faulting in the pages before the retry.
*/
+again:
from->nofault = true;
dio = btrfs_dio_write(iocb, from, written);
from->nofault = false;
- /*
- * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync
- * iocb, and that needs to lock the inode. So unlock it before calling
- * iomap_dio_complete() to avoid a deadlock.
- */
- btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
-
- if (IS_ERR_OR_NULL(dio))
+ if (IS_ERR_OR_NULL(dio)) {
ret = PTR_ERR_OR_ZERO(dio);
- else
+ } else {
+ struct btrfs_file_private stack_private = { 0 };
+ struct btrfs_file_private *private;
+ const bool have_private = (file->private_data != NULL);
+
+ if (!have_private)
+ file->private_data = &stack_private;
+
+ /*
+ * If we have a synchronous write, we must make sure the fsync
+ * triggered by the iomap_dio_complete() call below doesn't
+ * deadlock on the inode lock - we are already holding it and we
+ * can't call it after unlocking because we may need to complete
+ * partial writes due to the input buffer (or parts of it) not
+ * being already faulted in.
+ */
+ private = file->private_data;
+ private->fsync_skip_inode_lock = true;
ret = iomap_dio_complete(dio);
+ private->fsync_skip_inode_lock = false;
+
+ if (!have_private)
+ file->private_data = NULL;
+ }
/* No increment (+=) because iomap returns a cumulative value. */
if (ret > 0)
@@ -897,10 +913,12 @@ relock:
} else {
fault_in_iov_iter_readable(from, left);
prev_left = left;
- goto relock;
+ goto again;
}
}
+ btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
+
/*
* If 'ret' is -ENOTBLK or we have not written all data, then it means
* we must fallback to buffered IO.
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d77498e7671c..ff9f0d41987e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2793,7 +2793,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
readonly = true;
} else if (btrfs_is_zoned(fs_info)) {
/* Need reset before reusing in a zoned block group */
- space_info->bytes_zone_unusable += len;
+ btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info,
+ len);
readonly = true;
}
spin_unlock(&cache->lock);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 81558f90ee80..23b65dc73c00 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -664,7 +664,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
start_diff = start - em->start;
em->start = start;
em->len = end - start;
- if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE && !extent_map_is_compressed(em))
+ if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
em->offset += start_diff;
return add_extent_mapping(inode, em, 0);
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 21381de906f6..9914419f3b7d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1603,6 +1603,7 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
*/
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
+ struct btrfs_file_private *private = file->private_data;
struct dentry *dentry = file_dentry(file);
struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
struct btrfs_root *root = inode->root;
@@ -1612,6 +1613,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
int ret = 0, err;
u64 len;
bool full_sync;
+ const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false);
trace_btrfs_sync_file(file, datasync);
@@ -1639,7 +1641,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (ret)
goto out;
- btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP);
+ if (skip_ilock)
+ down_write(&inode->i_mmap_lock);
+ else
+ btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP);
atomic_inc(&root->log_batch);
@@ -1663,7 +1668,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
ret = start_ordered_ops(inode, start, end);
if (ret) {
- btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
+ if (skip_ilock)
+ up_write(&inode->i_mmap_lock);
+ else
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
goto out;
}
@@ -1788,7 +1796,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* file again, but that will end up using the synchronization
* inside btrfs_sync_log to keep things safe.
*/
- btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
+ if (skip_ilock)
+ up_write(&inode->i_mmap_lock);
+ else
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
if (ret == BTRFS_NO_LOG_SYNC) {
ret = btrfs_end_transaction(trans);
@@ -1857,7 +1868,10 @@ out:
out_release_extents:
btrfs_release_log_ctx_extents(&ctx);
- btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
+ if (skip_ilock)
+ up_write(&inode->i_mmap_lock);
+ else
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
goto out;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3f9b7507543a..f5996a43db24 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2723,8 +2723,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
* If the block group is read-only, we should account freed space into
* bytes_readonly.
*/
- if (!block_group->ro)
+ if (!block_group->ro) {
block_group->zone_unusable += to_unusable;
+ WARN_ON(block_group->zone_unusable > block_group->length);
+ }
spin_unlock(&ctl->tree_lock);
if (!used) {
spin_lock(&block_group->lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 01eab6955647..333b0e8587a2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -714,8 +714,9 @@ out:
return ret;
}
-static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset,
- u64 end,
+static noinline int cow_file_range_inline(struct btrfs_inode *inode,
+ struct page *locked_page,
+ u64 offset, u64 end,
size_t compressed_size,
int compress_type,
struct folio *compressed_folio,
@@ -739,7 +740,10 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset,
return ret;
}
- extent_clear_unlock_delalloc(inode, offset, end, NULL, &cached,
+ if (ret == 0)
+ locked_page = NULL;
+
+ extent_clear_unlock_delalloc(inode, offset, end, locked_page, &cached,
clear_flags,
PAGE_UNLOCK | PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK);
@@ -1043,10 +1047,10 @@ again:
* extent for the subpage case.
*/
if (total_in < actual_end)
- ret = cow_file_range_inline(inode, start, end, 0,
+ ret = cow_file_range_inline(inode, NULL, start, end, 0,
BTRFS_COMPRESS_NONE, NULL, false);
else
- ret = cow_file_range_inline(inode, start, end, total_compressed,
+ ret = cow_file_range_inline(inode, NULL, start, end, total_compressed,
compress_type, folios[0], false);
if (ret <= 0) {
if (ret < 0)
@@ -1359,7 +1363,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
if (!no_inline) {
/* lets try to make an inline extent */
- ret = cow_file_range_inline(inode, start, end, 0,
+ ret = cow_file_range_inline(inode, locked_page, start, end, 0,
BTRFS_COMPRESS_NONE, NULL, false);
if (ret <= 0) {
/*
@@ -1581,6 +1585,7 @@ out_unlock:
locked_page, &cached,
clear_bits,
page_ops);
+ btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
start += cur_alloc_size;
}
@@ -1594,6 +1599,7 @@ out_unlock:
clear_bits |= EXTENT_CLEAR_DATA_RESV;
extent_clear_unlock_delalloc(inode, start, end, locked_page,
&cached, clear_bits, page_ops);
+ btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
}
return ret;
}
@@ -2255,6 +2261,7 @@ error:
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK);
+ btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL);
}
btrfs_free_path(path);
return ret;
@@ -5660,7 +5667,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
struct inode *inode;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
- struct btrfs_key location;
+ struct btrfs_key location = { 0 };
u8 di_type = 0;
int ret = 0;
@@ -7198,6 +7205,12 @@ static void wait_subpage_spinlock(struct page *page)
spin_unlock_irq(&subpage->lock);
}
+static int btrfs_launder_folio(struct folio *folio)
+{
+ return btrfs_qgroup_free_data(folio_to_inode(folio), NULL, folio_pos(folio),
+ PAGE_SIZE, NULL);
+}
+
static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags)
{
if (try_release_extent_mapping(&folio->page, gfp_flags)) {
@@ -10133,6 +10146,7 @@ static const struct address_space_operations btrfs_aops = {
.writepages = btrfs_writepages,
.readahead = btrfs_readahead,
.invalidate_folio = btrfs_invalidate_folio,
+ .launder_folio = btrfs_launder_folio,
.release_folio = btrfs_release_folio,
.migrate_folio = btrfs_migrate_folio,
.dirty_folio = filemap_dirty_folio,
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 32dcea662da3..fc821aa446f0 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -14,7 +14,7 @@
struct root_name_map {
u64 id;
- char name[16];
+ const char *name;
};
static const struct root_name_map root_map[] = {
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 14a8d7100018..0de9162ff481 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1648,14 +1648,20 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe)
}
}
+static u32 stripe_length(const struct scrub_stripe *stripe)
+{
+ ASSERT(stripe->bg);
+
+ return min(BTRFS_STRIPE_LEN,
+ stripe->bg->start + stripe->bg->length - stripe->logical);
+}
+
static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
struct scrub_stripe *stripe)
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct btrfs_bio *bbio = NULL;
- unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start +
- stripe->bg->length - stripe->logical) >>
- fs_info->sectorsize_bits;
+ unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits;
u64 stripe_len = BTRFS_STRIPE_LEN;
int mirror = stripe->mirror_num;
int i;
@@ -1729,9 +1735,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_bio *bbio;
- unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start +
- stripe->bg->length - stripe->logical) >>
- fs_info->sectorsize_bits;
+ unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits;
int mirror = stripe->mirror_num;
ASSERT(stripe->bg);
@@ -1871,6 +1875,9 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
stripe = &sctx->stripes[i];
wait_scrub_stripe_io(stripe);
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.last_physical = stripe->physical + stripe_length(stripe);
+ spin_unlock(&sctx->stat_lock);
scrub_reset_stripe(stripe);
}
out:
@@ -2139,7 +2146,9 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
cur_physical, &found_logical);
if (ret > 0) {
/* No more extent, just update the accounting */
+ spin_lock(&sctx->stat_lock);
sctx->stat.last_physical = physical + logical_length;
+ spin_unlock(&sctx->stat_lock);
ret = 0;
break;
}
@@ -2336,6 +2345,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
stripe_logical += chunk_logical;
ret = scrub_raid56_parity_stripe(sctx, scrub_dev, bg,
map, stripe_logical);
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.last_physical = min(physical + BTRFS_STRIPE_LEN,
+ physical_end);
+ spin_unlock(&sctx->stat_lock);
if (ret)
goto out;
goto next;
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 9ac94d3119e8..68e14fd48638 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -316,7 +316,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
found->bytes_used += block_group->used;
found->disk_used += block_group->used * factor;
found->bytes_readonly += block_group->bytes_super;
- found->bytes_zone_unusable += block_group->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable);
if (block_group->length > 0)
found->full = 0;
btrfs_try_granting_tickets(info, found);
@@ -583,8 +583,7 @@ again:
spin_lock(&cache->lock);
avail = cache->length - cache->used - cache->pinned -
- cache->reserved - cache->delalloc_bytes -
- cache->bytes_super - cache->zone_unusable;
+ cache->reserved - cache->bytes_super - cache->zone_unusable;
btrfs_info(fs_info,
"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s",
cache->start, cache->length, cache->used, cache->pinned,
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 4db8a0267c16..88b44221ce97 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -249,6 +249,7 @@ btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \
DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info");
DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned");
+DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable");
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 08d33cb372fb..83478deada3b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -683,8 +683,11 @@ bool btrfs_check_options(const struct btrfs_fs_info *info,
ret = false;
if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) {
- if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE))
+ if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) {
btrfs_info(info, "disk space caching is enabled");
+ btrfs_warn(info,
+"space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2");
+ }
if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE))
btrfs_info(info, "using free-space-tree");
}
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index ebec4ab361b8..56e61ac1cc64 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -900,6 +900,102 @@ out:
return ret;
}
+/*
+ * Test a regression for compressed extent map adjustment when we attempt to
+ * add an extent map that is partially overlapped by another existing extent
+ * map. The resulting extent map offset was left unchanged despite having
+ * incremented its start offset.
+ */
+static int test_case_8(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
+{
+ struct extent_map_tree *em_tree = &inode->extent_tree;
+ struct extent_map *em;
+ int ret;
+ int ret2;
+
+ em = alloc_extent_map();
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ return -ENOMEM;
+ }
+
+ /* Compressed extent for the file range [120K, 128K). */
+ em->start = SZ_1K * 120;
+ em->len = SZ_8K;
+ em->disk_num_bytes = SZ_4K;
+ em->ram_bytes = SZ_8K;
+ em->flags |= EXTENT_FLAG_COMPRESS_ZLIB;
+ write_lock(&em_tree->lock);
+ ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
+ write_unlock(&em_tree->lock);
+ free_extent_map(em);
+ if (ret < 0) {
+ test_err("couldn't add extent map for range [120K, 128K)");
+ goto out;
+ }
+
+ em = alloc_extent_map();
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Compressed extent for the file range [108K, 144K), which overlaps
+ * with the [120K, 128K) we previously inserted.
+ */
+ em->start = SZ_1K * 108;
+ em->len = SZ_1K * 36;
+ em->disk_num_bytes = SZ_4K;
+ em->ram_bytes = SZ_1K * 36;
+ em->flags |= EXTENT_FLAG_COMPRESS_ZLIB;
+
+ /*
+ * Try to add the extent map but with a search range of [140K, 144K),
+ * this should succeed and adjust the extent map to the range
+ * [128K, 144K), with a length of 16K and an offset of 20K.
+ *
+ * This simulates a scenario where in the subvolume tree of an inode we
+ * have a compressed file extent item for the range [108K, 144K) and we
+ * have an overlapping compressed extent map for the range [120K, 128K),
+ * which was created by an encoded write, but its ordered extent was not
+ * yet completed, so the subvolume tree doesn't have yet the file extent
+ * item for that range - we only have the extent map in the inode's
+ * extent map tree.
+ */
+ write_lock(&em_tree->lock);
+ ret = btrfs_add_extent_mapping(inode, &em, SZ_1K * 140, SZ_4K);
+ write_unlock(&em_tree->lock);
+ free_extent_map(em);
+ if (ret < 0) {
+ test_err("couldn't add extent map for range [108K, 144K)");
+ goto out;
+ }
+
+ if (em->start != SZ_128K) {
+ test_err("unexpected extent map start %llu (should be 128K)", em->start);
+ ret = -EINVAL;
+ goto out;
+ }
+ if (em->len != SZ_16K) {
+ test_err("unexpected extent map length %llu (should be 16K)", em->len);
+ ret = -EINVAL;
+ goto out;
+ }
+ if (em->offset != SZ_1K * 20) {
+ test_err("unexpected extent map offset %llu (should be 20K)", em->offset);
+ ret = -EINVAL;
+ goto out;
+ }
+out:
+ ret2 = free_extent_map_tree(inode);
+ if (ret == 0)
+ ret = ret2;
+
+ return ret;
+}
+
struct rmap_test_vector {
u64 raid_type;
u64 physical_start;
@@ -1078,6 +1174,9 @@ int btrfs_test_extent_map(void)
ret = test_case_7(fs_info, BTRFS_I(inode));
if (ret)
goto out;
+ ret = test_case_8(fs_info, BTRFS_I(inode));
+ if (ret)
+ goto out;
test_msg("running rmap tests");
for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) {
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 6388786fd8b5..a825fa598e3c 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -634,7 +634,7 @@ static int check_dir_item(struct extent_buffer *leaf,
*/
if (key->type == BTRFS_DIR_ITEM_KEY ||
key->type == BTRFS_XATTR_ITEM_KEY) {
- char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
+ char namebuf[MAX(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
read_extent_buffer(leaf, namebuf,
(unsigned long)(di + 1), name_len);
@@ -1289,6 +1289,19 @@ static void extent_err(const struct extent_buffer *eb, int slot,
va_end(args);
}
+static bool is_valid_dref_root(u64 rootid)
+{
+ /*
+ * The following tree root objectids are allowed to have a data backref:
+ * - subvolume trees
+ * - data reloc tree
+ * - tree root
+ * For v1 space cache
+ */
+ return is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID ||
+ rootid == BTRFS_ROOT_TREE_OBJECTID;
+}
+
static int check_extent_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot,
struct btrfs_key *prev_key)
@@ -1441,6 +1454,8 @@ static int check_extent_item(struct extent_buffer *leaf,
struct btrfs_extent_data_ref *dref;
struct btrfs_shared_data_ref *sref;
u64 seq;
+ u64 dref_root;
+ u64 dref_objectid;
u64 dref_offset;
u64 inline_offset;
u8 inline_type;
@@ -1484,11 +1499,26 @@ static int check_extent_item(struct extent_buffer *leaf,
*/
case BTRFS_EXTENT_DATA_REF_KEY:
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ dref_root = btrfs_extent_data_ref_root(leaf, dref);
+ dref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
seq = hash_extent_data_ref(
btrfs_extent_data_ref_root(leaf, dref),
btrfs_extent_data_ref_objectid(leaf, dref),
btrfs_extent_data_ref_offset(leaf, dref));
+ if (unlikely(!is_valid_dref_root(dref_root))) {
+ extent_err(leaf, slot,
+ "invalid data ref root value %llu",
+ dref_root);
+ return -EUCLEAN;
+ }
+ if (unlikely(dref_objectid < BTRFS_FIRST_FREE_OBJECTID ||
+ dref_objectid > BTRFS_LAST_FREE_OBJECTID)) {
+ extent_err(leaf, slot,
+ "invalid data ref objectid value %llu",
+ dref_root);
+ return -EUCLEAN;
+ }
if (unlikely(!IS_ALIGNED(dref_offset,
fs_info->sectorsize))) {
extent_err(leaf, slot,
@@ -1627,6 +1657,8 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
return -EUCLEAN;
}
for (; ptr < end; ptr += sizeof(*dref)) {
+ u64 root;
+ u64 objectid;
u64 offset;
/*
@@ -1634,7 +1666,22 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
* overflow from the leaf due to hash collisions.
*/
dref = (struct btrfs_extent_data_ref *)ptr;
+ root = btrfs_extent_data_ref_root(leaf, dref);
+ objectid = btrfs_extent_data_ref_objectid(leaf, dref);
offset = btrfs_extent_data_ref_offset(leaf, dref);
+ if (unlikely(!is_valid_dref_root(root))) {
+ extent_err(leaf, slot,
+ "invalid extent data backref root value %llu",
+ root);
+ return -EUCLEAN;
+ }
+ if (unlikely(objectid < BTRFS_FIRST_FREE_OBJECTID ||
+ objectid > BTRFS_LAST_FREE_OBJECTID)) {
+ extent_err(leaf, slot,
+ "invalid extent data backref objectid value %llu",
+ root);
+ return -EUCLEAN;
+ }
if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) {
extent_err(leaf, slot,
"invalid extent data backref offset, have %llu expect aligned to %u",
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index e98aa8219303..808c9c048276 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2016,6 +2016,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
* CHECK_CAPS_AUTHONLY - we should only check the auth cap
* CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
* further delay.
+ * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
+ * further delay.
*/
void ceph_check_caps(struct ceph_inode_info *ci, int flags)
{
@@ -2097,7 +2099,7 @@ retry:
}
doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
- "flushing %s issued %s revoking %s retain %s %s%s%s\n",
+ "flushing %s issued %s revoking %s retain %s %s%s%s%s\n",
inode, ceph_vinop(inode), ceph_cap_string(file_wanted),
ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
ceph_cap_string(ci->i_flushing_caps),
@@ -2105,7 +2107,8 @@ retry:
ceph_cap_string(retain),
(flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
(flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
- (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
+ (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
+ (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
/*
* If we no longer need to hold onto old our caps, and we may
@@ -2180,6 +2183,11 @@ retry:
queue_writeback = true;
}
+ if (flags & CHECK_CAPS_FLUSH_FORCE) {
+ doutc(cl, "force to flush caps\n");
+ goto ack;
+ }
+
if (cap == ci->i_auth_cap &&
(cap->issued & CEPH_CAP_FILE_WR)) {
/* request larger max_size from MDS? */
@@ -3510,6 +3518,8 @@ static void handle_cap_grant(struct inode *inode,
bool queue_invalidate = false;
bool deleted_inode = false;
bool fill_inline = false;
+ bool revoke_wait = false;
+ int flags = 0;
/*
* If there is at least one crypto block then we'll trust
@@ -3705,16 +3715,18 @@ static void handle_cap_grant(struct inode *inode,
ceph_cap_string(cap->issued), ceph_cap_string(newcaps),
ceph_cap_string(revoking));
if (S_ISREG(inode->i_mode) &&
- (revoking & used & CEPH_CAP_FILE_BUFFER))
+ (revoking & used & CEPH_CAP_FILE_BUFFER)) {
writeback = true; /* initiate writeback; will delay ack */
- else if (queue_invalidate &&
+ revoke_wait = true;
+ } else if (queue_invalidate &&
revoking == CEPH_CAP_FILE_CACHE &&
- (newcaps & CEPH_CAP_FILE_LAZYIO) == 0)
- ; /* do nothing yet, invalidation will be queued */
- else if (cap == ci->i_auth_cap)
+ (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) {
+ revoke_wait = true; /* do nothing yet, invalidation will be queued */
+ } else if (cap == ci->i_auth_cap) {
check_caps = 1; /* check auth cap only */
- else
+ } else {
check_caps = 2; /* check all caps */
+ }
/* If there is new caps, try to wake up the waiters */
if (~cap->issued & newcaps)
wake = true;
@@ -3741,8 +3753,9 @@ static void handle_cap_grant(struct inode *inode,
BUG_ON(cap->issued & ~cap->implemented);
/* don't let check_caps skip sending a response to MDS for revoke msgs */
- if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
+ if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
cap->mds_wanted = 0;
+ flags |= CHECK_CAPS_FLUSH_FORCE;
if (cap == ci->i_auth_cap)
check_caps = 1; /* check auth cap only */
else
@@ -3798,9 +3811,9 @@ static void handle_cap_grant(struct inode *inode,
mutex_unlock(&session->s_mutex);
if (check_caps == 1)
- ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
+ ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
else if (check_caps == 2)
- ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
+ ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
}
/*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b63b4cd9b5b6..6e817bf1337c 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -200,9 +200,10 @@ struct ceph_cap {
struct list_head caps_item;
};
-#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */
-#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */
-#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */
+#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */
+#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */
+#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */
+#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */
struct ceph_cap_flush {
u64 tid;
diff --git a/fs/file.c b/fs/file.c
index a3b72aa64f11..a11e59b5d602 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -1248,6 +1248,7 @@ __releases(&files->file_lock)
* tables and this condition does not arise without those.
*/
fdt = files_fdtable(files);
+ fd = array_index_nospec(fd, fdt->max_fds);
tofree = fdt->fd[fd];
if (!tofree && fd_is_open(fd, fdt))
goto Ebusy;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 9e0ea6fc2aa3..34eb2c2cbcde 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -2069,8 +2069,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info)
continue;
}
- ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa,
- SVC_SOCK_ANONYMOUS,
+ ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0,
get_current_cred());
/* always save the latest error */
if (ret < 0)
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index c71ae5c04306..4a20e92474b2 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -1072,7 +1072,7 @@ static int cifs_security_flags_proc_open(struct inode *inode, struct file *file)
static void
cifs_security_flags_handle_must_flags(unsigned int *flags)
{
- unsigned int signflags = *flags & CIFSSEC_MUST_SIGN;
+ unsigned int signflags = *flags & (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL);
if ((*flags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
*flags = CIFSSEC_MUST_KRB5;
diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h
index 62d5fee3e5eb..ca2bd204bcc5 100644
--- a/fs/smb/client/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
@@ -147,6 +147,6 @@ extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
/* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 49
-#define CIFS_VERSION "2.49"
+#define SMB3_PRODUCT_BUILD 50
+#define CIFS_VERSION "2.50"
#endif /* _CIFSFS_H */
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 8e86fec7dcd2..5c9b3e6cd95f 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -345,7 +345,7 @@ struct smb_version_operations {
/* connect to a server share */
int (*tree_connect)(const unsigned int, struct cifs_ses *, const char *,
struct cifs_tcon *, const struct nls_table *);
- /* close tree connecion */
+ /* close tree connection */
int (*tree_disconnect)(const unsigned int, struct cifs_tcon *);
/* get DFS referrals */
int (*get_dfs_refer)(const unsigned int, struct cifs_ses *,
@@ -816,7 +816,7 @@ struct TCP_Server_Info {
* Protected by @refpath_lock and @srv_lock. The @refpath_lock is
* mostly used for not requiring a copy of @leaf_fullpath when getting
* cached or new DFS referrals (which might also sleep during I/O).
- * While @srv_lock is held for making string and NULL comparions against
+ * While @srv_lock is held for making string and NULL comparisons against
* both fields as in mount(2) and cache refresh.
*
* format: \\HOST\SHARE[\OPTIONAL PATH]
@@ -1471,29 +1471,6 @@ struct cifs_io_parms {
struct TCP_Server_Info *server;
};
-struct cifs_aio_ctx {
- struct kref refcount;
- struct list_head list;
- struct mutex aio_mutex;
- struct completion done;
- struct iov_iter iter;
- struct kiocb *iocb;
- struct cifsFileInfo *cfile;
- struct bio_vec *bv;
- loff_t pos;
- unsigned int nr_pinned_pages;
- ssize_t rc;
- unsigned int len;
- unsigned int total_len;
- unsigned int bv_need_unpin; /* If ->bv[] needs unpinning */
- bool should_dirty;
- /*
- * Indicates if this aio_ctx is for direct_io,
- * If yes, iter is a copy of the user passed iov_iter
- */
- bool direct_io;
-};
-
struct cifs_io_request {
struct netfs_io_request rreq;
struct cifsFileInfo *cfile;
@@ -1904,7 +1881,7 @@ static inline bool is_replayable_error(int error)
#define CIFSSEC_MAY_SIGN 0x00001
#define CIFSSEC_MAY_NTLMV2 0x00004
#define CIFSSEC_MAY_KRB5 0x00008
-#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */
+#define CIFSSEC_MAY_SEAL 0x00040
#define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */
#define CIFSSEC_MUST_SIGN 0x01001
@@ -1914,11 +1891,11 @@ require use of the stronger protocol */
#define CIFSSEC_MUST_NTLMV2 0x04004
#define CIFSSEC_MUST_KRB5 0x08008
#ifdef CONFIG_CIFS_UPCALL
-#define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */
+#define CIFSSEC_MASK 0xCF0CF /* flags supported if no weak allowed */
#else
-#define CIFSSEC_MASK 0x87087 /* flags supported if no weak allowed */
+#define CIFSSEC_MASK 0xC70C7 /* flags supported if no weak allowed */
#endif /* UPCALL */
-#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
+#define CIFSSEC_MUST_SEAL 0x40040
#define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */
#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL)
@@ -2010,7 +1987,6 @@ require use of the stronger protocol */
* cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo
* ->invalidHandle initiate_cifs_search
* ->oplock_break_cancelled
- * cifs_aio_ctx->aio_mutex cifs_aio_ctx cifs_aio_ctx_alloc
****************************************************************************/
#ifdef DECLARE_GLOBALS_HERE
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index c15bb5ee7eb7..497bf3c447bc 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -619,8 +619,6 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
struct shash_desc *shash);
enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
enum securityEnum);
-struct cifs_aio_ctx *cifs_aio_ctx_alloc(void);
-void cifs_aio_ctx_release(struct kref *refcount);
int cifs_alloc_hash(const char *name, struct shash_desc **sdesc);
void cifs_free_hash(struct shash_desc **sdesc);
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 4a8aa1de9522..dd0afa23734c 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1042,13 +1042,26 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
}
rc = -EOPNOTSUPP;
- switch ((data->reparse.tag = tag)) {
- case 0: /* SMB1 symlink */
+ data->reparse.tag = tag;
+ if (!data->reparse.tag) {
if (server->ops->query_symlink) {
rc = server->ops->query_symlink(xid, tcon,
cifs_sb, full_path,
&data->symlink_target);
}
+ if (rc == -EOPNOTSUPP)
+ data->reparse.tag = IO_REPARSE_TAG_INTERNAL;
+ }
+
+ switch (data->reparse.tag) {
+ case 0: /* SMB1 symlink */
+ break;
+ case IO_REPARSE_TAG_INTERNAL:
+ rc = 0;
+ if (le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY) {
+ cifs_create_junction_fattr(fattr, sb);
+ goto out;
+ }
break;
case IO_REPARSE_TAG_MOUNT_POINT:
cifs_create_junction_fattr(fattr, sb);
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
index 855ac5a62edf..44dbaf9929a4 100644
--- a/fs/smb/client/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -170,7 +170,10 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon,
static int cifs_shutdown(struct super_block *sb, unsigned long arg)
{
struct cifs_sb_info *sbi = CIFS_SB(sb);
+ struct tcon_link *tlink;
+ struct cifs_tcon *tcon;
__u32 flags;
+ int rc;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -178,14 +181,21 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg)
if (get_user(flags, (__u32 __user *)arg))
return -EFAULT;
- if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH)
- return -EINVAL;
+ tlink = cifs_sb_tlink(sbi);
+ if (IS_ERR(tlink))
+ return PTR_ERR(tlink);
+ tcon = tlink_tcon(tlink);
+
+ trace_smb3_shutdown_enter(flags, tcon->tid);
+ if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH) {
+ rc = -EINVAL;
+ goto shutdown_out_err;
+ }
if (cifs_forced_shutdown(sbi))
- return 0;
+ goto shutdown_good;
cifs_dbg(VFS, "shut down requested (%d)", flags);
-/* trace_cifs_shutdown(sb, flags);*/
/*
* see:
@@ -201,7 +211,8 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg)
*/
case CIFS_GOING_FLAGS_DEFAULT:
cifs_dbg(FYI, "shutdown with default flag not supported\n");
- return -EINVAL;
+ rc = -EINVAL;
+ goto shutdown_out_err;
/*
* FLAGS_LOGFLUSH is easy since it asks to write out metadata (not
* data) but metadata writes are not cached on the client, so can treat
@@ -210,11 +221,18 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg)
case CIFS_GOING_FLAGS_LOGFLUSH:
case CIFS_GOING_FLAGS_NOLOGFLUSH:
sbi->mnt_cifs_flags |= CIFS_MOUNT_SHUTDOWN;
- return 0;
+ goto shutdown_good;
default:
- return -EINVAL;
+ rc = -EINVAL;
+ goto shutdown_out_err;
}
+
+shutdown_good:
+ trace_smb3_shutdown_done(flags, tcon->tid);
return 0;
+shutdown_out_err:
+ trace_smb3_shutdown_err(rc, flags, tcon->tid);
+ return rc;
}
static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug_info __user *in)
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index 07c468ddb88a..c6f11e6f9eb9 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -352,7 +352,7 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
* on simple responses (wct, bcc both zero)
* in particular have seen this on
* ulogoffX and FindClose. This leaves
- * one byte of bcc potentially unitialized
+ * one byte of bcc potentially uninitialized
*/
/* zero rest of bcc */
tmp[sizeof(struct smb_hdr)+1] = 0;
@@ -995,60 +995,6 @@ parse_DFS_referrals_exit:
return rc;
}
-struct cifs_aio_ctx *
-cifs_aio_ctx_alloc(void)
-{
- struct cifs_aio_ctx *ctx;
-
- /*
- * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io
- * to false so that we know when we have to unreference pages within
- * cifs_aio_ctx_release()
- */
- ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL);
- if (!ctx)
- return NULL;
-
- INIT_LIST_HEAD(&ctx->list);
- mutex_init(&ctx->aio_mutex);
- init_completion(&ctx->done);
- kref_init(&ctx->refcount);
- return ctx;
-}
-
-void
-cifs_aio_ctx_release(struct kref *refcount)
-{
- struct cifs_aio_ctx *ctx = container_of(refcount,
- struct cifs_aio_ctx, refcount);
-
- cifsFileInfo_put(ctx->cfile);
-
- /*
- * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly
- * which means that iov_iter_extract_pages() was a success and thus
- * that we may have references or pins on pages that we need to
- * release.
- */
- if (ctx->bv) {
- if (ctx->should_dirty || ctx->bv_need_unpin) {
- unsigned int i;
-
- for (i = 0; i < ctx->nr_pinned_pages; i++) {
- struct page *page = ctx->bv[i].bv_page;
-
- if (ctx->should_dirty)
- set_page_dirty(page);
- if (ctx->bv_need_unpin)
- unpin_user_page(page);
- }
- }
- kvfree(ctx->bv);
- }
-
- kfree(ctx);
-}
-
/**
* cifs_alloc_hash - allocate hash and hash context together
* @name: The name of the crypto hash algo
@@ -1288,6 +1234,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid,
const char *full_path,
bool *islink)
{
+ struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_ses *ses = tcon->ses;
size_t len;
char *path;
@@ -1304,12 +1251,12 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid,
!is_tcon_dfs(tcon))
return 0;
- spin_lock(&tcon->tc_lock);
- if (!tcon->origin_fullpath) {
- spin_unlock(&tcon->tc_lock);
+ spin_lock(&server->srv_lock);
+ if (!server->leaf_fullpath) {
+ spin_unlock(&server->srv_lock);
return 0;
}
- spin_unlock(&tcon->tc_lock);
+ spin_unlock(&server->srv_lock);
/*
* Slow path - tcon is DFS and @full_path has prefix path, so attempt
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index a0ffbda90733..689d8a506d45 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -505,6 +505,10 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
}
switch (tag) {
+ case IO_REPARSE_TAG_INTERNAL:
+ if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY))
+ return false;
+ fallthrough;
case IO_REPARSE_TAG_DFS:
case IO_REPARSE_TAG_DFSR:
case IO_REPARSE_TAG_MOUNT_POINT:
diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h
index 6b55d1df9e2f..2c0644bc4e65 100644
--- a/fs/smb/client/reparse.h
+++ b/fs/smb/client/reparse.h
@@ -12,6 +12,12 @@
#include "fs_context.h"
#include "cifsglob.h"
+/*
+ * Used only by cifs.ko to ignore reparse points from files when client or
+ * server doesn't support FSCTL_GET_REPARSE_POINT.
+ */
+#define IO_REPARSE_TAG_INTERNAL ((__u32)~0U)
+
static inline dev_t reparse_nfs_mkdev(struct reparse_posix_data *buf)
{
u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer);
@@ -78,10 +84,19 @@ static inline u32 reparse_mode_wsl_tag(mode_t mode)
static inline bool reparse_inode_match(struct inode *inode,
struct cifs_fattr *fattr)
{
+ struct cifsInodeInfo *cinode = CIFS_I(inode);
struct timespec64 ctime = inode_get_ctime(inode);
- return (CIFS_I(inode)->cifsAttrs & ATTR_REPARSE) &&
- CIFS_I(inode)->reparse_tag == fattr->cf_cifstag &&
+ /*
+ * Do not match reparse tags when client or server doesn't support
+ * FSCTL_GET_REPARSE_POINT. @fattr->cf_cifstag should contain correct
+ * reparse tag from query dir response but the client won't be able to
+ * read the reparse point data anyway. This spares us a revalidation.
+ */
+ if (cinode->reparse_tag != IO_REPARSE_TAG_INTERNAL &&
+ cinode->reparse_tag != fattr->cf_cifstag)
+ return false;
+ return (cinode->cifsAttrs & ATTR_REPARSE) &&
timespec64_equal(&ctime, &fattr->cf_ctime);
}
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index 5c02a12251c8..9f5bc41433c1 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -930,6 +930,8 @@ int smb2_query_path_info(const unsigned int xid,
switch (rc) {
case 0:
+ rc = parse_create_response(data, cifs_sb, &out_iov[0]);
+ break;
case -EOPNOTSUPP:
/*
* BB TODO: When support for special files added to Samba
@@ -948,7 +950,8 @@ int smb2_query_path_info(const unsigned int xid,
cmds[num_cmds++] = SMB2_OP_GET_REPARSE;
oparms = CIFS_OPARMS(cifs_sb, tcon, full_path,
- FILE_READ_ATTRIBUTES | FILE_READ_EA,
+ FILE_READ_ATTRIBUTES |
+ FILE_READ_EA | SYNCHRONIZE,
FILE_OPEN, create_options |
OPEN_REPARSE_POINT, ACL_NO_MODE);
cifs_get_readable_path(tcon, full_path, &cfile);
@@ -1256,7 +1259,8 @@ int smb2_query_reparse_point(const unsigned int xid,
cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
cifs_get_readable_path(tcon, full_path, &cfile);
- oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_READ_ATTRIBUTES,
+ oparms = CIFS_OPARMS(cifs_sb, tcon, full_path,
+ FILE_READ_ATTRIBUTES | FILE_READ_EA | SYNCHRONIZE,
FILE_OPEN, OPEN_REPARSE_POINT, ACL_NO_MODE);
rc = smb2_compound_op(xid, tcon, cifs_sb,
full_path, &oparms, &in_iov,
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 9a06b5594669..83facb54276a 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -82,6 +82,9 @@ int smb3_encryption_required(const struct cifs_tcon *tcon)
if (tcon->seal &&
(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION))
return 1;
+ if (((global_secflags & CIFSSEC_MUST_SEAL) == CIFSSEC_MUST_SEAL) &&
+ (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION))
+ return 1;
return 0;
}
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index d74e829de51c..7bcc379014ca 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -406,7 +406,7 @@ static void smbd_post_send_credits(struct work_struct *work)
else
response = get_empty_queue_buffer(info);
if (!response) {
- /* now switch to emtpy packet queue */
+ /* now switch to empty packet queue */
if (use_receive_queue) {
use_receive_queue = 0;
continue;
@@ -618,7 +618,7 @@ out:
/*
* Test if FRWR (Fast Registration Work Requests) is supported on the device
- * This implementation requries FRWR on RDMA read/write
+ * This implementation requires FRWR on RDMA read/write
* return value: true if it is supported
*/
static bool frwr_is_supported(struct ib_device_attr *attrs)
@@ -2177,7 +2177,7 @@ cleanup_entries:
* MR available in the list. It may access the list while the
* smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock
* as they never modify the same places. However, there may be several CPUs
- * issueing I/O trying to get MR at the same time, mr_list_lock is used to
+ * issuing I/O trying to get MR at the same time, mr_list_lock is used to
* protect this situation.
*/
static struct smbd_mr *get_mr(struct smbd_connection *info)
@@ -2311,7 +2311,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info,
/*
* There is no need for waiting for complemtion on ib_post_send
* on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
- * on the next ib_post_send when we actaully send I/O to remote peer
+ * on the next ib_post_send when we actually send I/O to remote peer
*/
rc = ib_post_send(info->id->qp, &reg_wr->wr, NULL);
if (!rc)
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h
index 6b3bdfb97211..0f0c10c7ada7 100644
--- a/fs/smb/client/trace.h
+++ b/fs/smb/client/trace.h
@@ -1388,7 +1388,7 @@ DECLARE_EVENT_CLASS(smb3_ioctl_class,
__entry->command = command;
),
TP_printk("xid=%u fid=0x%llx ioctl cmd=0x%x",
- __entry->xid, __entry->fid, __entry->command)
+ __entry->xid, __entry->fid, __entry->command)
)
#define DEFINE_SMB3_IOCTL_EVENT(name) \
@@ -1400,9 +1400,58 @@ DEFINE_EVENT(smb3_ioctl_class, smb3_##name, \
DEFINE_SMB3_IOCTL_EVENT(ioctl);
+DECLARE_EVENT_CLASS(smb3_shutdown_class,
+ TP_PROTO(__u32 flags,
+ __u32 tid),
+ TP_ARGS(flags, tid),
+ TP_STRUCT__entry(
+ __field(__u32, flags)
+ __field(__u32, tid)
+ ),
+ TP_fast_assign(
+ __entry->flags = flags;
+ __entry->tid = tid;
+ ),
+ TP_printk("flags=0x%x tid=0x%x",
+ __entry->flags, __entry->tid)
+)
+
+#define DEFINE_SMB3_SHUTDOWN_EVENT(name) \
+DEFINE_EVENT(smb3_shutdown_class, smb3_##name, \
+ TP_PROTO(__u32 flags, \
+ __u32 tid), \
+ TP_ARGS(flags, tid))
+
+DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_enter);
+DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_done);
+DECLARE_EVENT_CLASS(smb3_shutdown_err_class,
+ TP_PROTO(int rc,
+ __u32 flags,
+ __u32 tid),
+ TP_ARGS(rc, flags, tid),
+ TP_STRUCT__entry(
+ __field(int, rc)
+ __field(__u32, flags)
+ __field(__u32, tid)
+ ),
+ TP_fast_assign(
+ __entry->rc = rc;
+ __entry->flags = flags;
+ __entry->tid = tid;
+ ),
+ TP_printk("rc=%d flags=0x%x tid=0x%x",
+ __entry->rc, __entry->flags, __entry->tid)
+)
+#define DEFINE_SMB3_SHUTDOWN_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_shutdown_err_class, smb3_##name, \
+ TP_PROTO(int rc, \
+ __u32 flags, \
+ __u32 tid), \
+ TP_ARGS(rc, flags, tid))
+DEFINE_SMB3_SHUTDOWN_ERR_EVENT(shutdown_err);
DECLARE_EVENT_CLASS(smb3_credit_class,
TP_PROTO(__u64 currmid,
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index adfe0d058701..6e68aaf5bd20 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -1289,7 +1289,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
out:
/*
* This will dequeue all mids. After this it is important that the
- * demultiplex_thread will not process any of these mids any futher.
+ * demultiplex_thread will not process any of these mids any further.
* This is prevented above by using a noop callback that will not
* wake this thread except for the very last PDU.
*/
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index 5d88c184f0fc..01e99e98457d 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -112,7 +112,7 @@ static void release_ei(struct kref *ref)
entry->release(entry->name, ei->data);
}
- call_rcu(&ei->rcu, free_ei_rcu);
+ call_srcu(&eventfs_srcu, &ei->rcu, free_ei_rcu);
}
static inline void put_ei(struct eventfs_inode *ei)
@@ -736,7 +736,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode
/* Was the parent freed? */
if (list_empty(&ei->list)) {
cleanup_ei(ei);
- ei = NULL;
+ ei = ERR_PTR(-EBUSY);
}
return ei;
}
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 1028ab6d9a74..1748dff58c3b 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -42,7 +42,7 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb)
struct tracefs_inode *ti;
unsigned long flags;
- ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL);
+ ti = alloc_inode_sb(sb, tracefs_inode_cachep, GFP_KERNEL);
if (!ti)
return NULL;
@@ -53,15 +53,14 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb)
return &ti->vfs_inode;
}
-static void tracefs_free_inode_rcu(struct rcu_head *rcu)
+static void tracefs_free_inode(struct inode *inode)
{
- struct tracefs_inode *ti;
+ struct tracefs_inode *ti = get_tracefs(inode);
- ti = container_of(rcu, struct tracefs_inode, rcu);
kmem_cache_free(tracefs_inode_cachep, ti);
}
-static void tracefs_free_inode(struct inode *inode)
+static void tracefs_destroy_inode(struct inode *inode)
{
struct tracefs_inode *ti = get_tracefs(inode);
unsigned long flags;
@@ -69,8 +68,6 @@ static void tracefs_free_inode(struct inode *inode)
spin_lock_irqsave(&tracefs_inode_lock, flags);
list_del_rcu(&ti->list);
spin_unlock_irqrestore(&tracefs_inode_lock, flags);
-
- call_rcu(&ti->rcu, tracefs_free_inode_rcu);
}
static ssize_t default_read_file(struct file *file, char __user *buf,
@@ -437,6 +434,7 @@ static int tracefs_drop_inode(struct inode *inode)
static const struct super_operations tracefs_super_operations = {
.alloc_inode = tracefs_alloc_inode,
.free_inode = tracefs_free_inode,
+ .destroy_inode = tracefs_destroy_inode,
.drop_inode = tracefs_drop_inode,
.statfs = simple_statfs,
.show_options = tracefs_show_options,
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h
index f704d8348357..d83c2a25f288 100644
--- a/fs/tracefs/internal.h
+++ b/fs/tracefs/internal.h
@@ -10,10 +10,7 @@ enum {
};
struct tracefs_inode {
- union {
- struct inode vfs_inode;
- struct rcu_head rcu;
- };
+ struct inode vfs_inode;
/* The below gets initialized with memset_after(ti, 0, vfs_inode) */
struct list_head list;
unsigned long flags;
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index cb035da3f990..fb05f44f6c75 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -56,7 +56,7 @@ typedef uint8_t xfs_dqtype_t;
* And, of course, we also need to take into account the dquot log format item
* used to describe each dquot.
*/
-#define XFS_DQUOT_LOGRES(mp) \
+#define XFS_DQUOT_LOGRES \
((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6)
#define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 3dc8f785bf29..45aaf169806a 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -338,11 +338,11 @@ xfs_calc_write_reservation(
blksz);
t1 += adj;
t3 += adj;
- return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+ return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
}
t4 = xfs_calc_refcountbt_reservation(mp, 1);
- return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
+ return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
}
unsigned int
@@ -410,11 +410,11 @@ xfs_calc_itruncate_reservation(
xfs_refcountbt_block_count(mp, 4),
blksz);
- return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+ return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
}
t4 = xfs_calc_refcountbt_reservation(mp, 2);
- return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
+ return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
}
unsigned int
@@ -466,7 +466,7 @@ STATIC uint
xfs_calc_rename_reservation(
struct xfs_mount *mp)
{
- unsigned int overhead = XFS_DQUOT_LOGRES(mp);
+ unsigned int overhead = XFS_DQUOT_LOGRES;
struct xfs_trans_resv *resp = M_RES(mp);
unsigned int t1, t2, t3 = 0;
@@ -577,7 +577,7 @@ STATIC uint
xfs_calc_link_reservation(
struct xfs_mount *mp)
{
- unsigned int overhead = XFS_DQUOT_LOGRES(mp);
+ unsigned int overhead = XFS_DQUOT_LOGRES;
struct xfs_trans_resv *resp = M_RES(mp);
unsigned int t1, t2, t3 = 0;
@@ -641,7 +641,7 @@ STATIC uint
xfs_calc_remove_reservation(
struct xfs_mount *mp)
{
- unsigned int overhead = XFS_DQUOT_LOGRES(mp);
+ unsigned int overhead = XFS_DQUOT_LOGRES;
struct xfs_trans_resv *resp = M_RES(mp);
unsigned int t1, t2, t3 = 0;
@@ -729,7 +729,7 @@ xfs_calc_icreate_reservation(
struct xfs_mount *mp)
{
struct xfs_trans_resv *resp = M_RES(mp);
- unsigned int overhead = XFS_DQUOT_LOGRES(mp);
+ unsigned int overhead = XFS_DQUOT_LOGRES;
unsigned int t1, t2, t3 = 0;
t1 = xfs_calc_icreate_resv_alloc(mp);
@@ -747,7 +747,7 @@ STATIC uint
xfs_calc_create_tmpfile_reservation(
struct xfs_mount *mp)
{
- uint res = XFS_DQUOT_LOGRES(mp);
+ uint res = XFS_DQUOT_LOGRES;
res += xfs_calc_icreate_resv_alloc(mp);
return res + xfs_calc_iunlink_add_reservation(mp);
@@ -829,7 +829,7 @@ STATIC uint
xfs_calc_ifree_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_iunlink_remove_reservation(mp) +
@@ -846,7 +846,7 @@ STATIC uint
xfs_calc_ichange_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
@@ -955,7 +955,7 @@ STATIC uint
xfs_calc_addafork_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
@@ -1003,7 +1003,7 @@ STATIC uint
xfs_calc_attrsetm_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
@@ -1043,7 +1043,7 @@ STATIC uint
xfs_calc_attrrm_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
max((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
XFS_FSB_TO_B(mp, 1)) +
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 0dbc484b182f..2f98d90d7fd6 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -696,7 +696,7 @@ xrep_agfl_init_header(
* step.
*/
xagb_bitmap_init(&af.used_extents);
- af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp),
+ af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp);
xagb_bitmap_walk(agfl_extents, xrep_agfl_fill, &af);
error = xagb_bitmap_disunion(agfl_extents, &af.used_extents);
if (error)
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 733c410a2279..91e7b51ce068 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -799,7 +799,7 @@ xchk_parent_pptr(
}
if (pp->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- goto out_pp;
+ goto out_names;
/*
* Complain if the number of parent pointers doesn't match the link
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 92ef4cdc486e..c886d5d0eb02 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -959,18 +959,16 @@ TRACE_EVENT(xfile_create,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long, ino)
- __array(char, pathname, 256)
+ __array(char, pathname, MAXNAMELEN)
),
TP_fast_assign(
- char pathname[257];
char *path;
__entry->ino = file_inode(xf->file)->i_ino;
- memset(pathname, 0, sizeof(pathname));
- path = file_path(xf->file, pathname, sizeof(pathname) - 1);
+ path = file_path(xf->file, __entry->pathname, MAXNAMELEN);
if (IS_ERR(path))
- path = "(unknown)";
- strncpy(__entry->pathname, path, sizeof(__entry->pathname));
+ strncpy(__entry->pathname, "(unknown)",
+ sizeof(__entry->pathname));
),
TP_printk("xfino 0x%lx path '%s'",
__entry->ino,
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 5c947e5ce8b8..7db386304875 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -139,7 +139,7 @@ xfs_attr_shortform_list(
sbp->name = sfe->nameval;
sbp->namelen = sfe->namelen;
/* These are bytes, and both on-disk, don't endian-flip */
- sbp->value = &sfe->nameval[sfe->namelen],
+ sbp->value = &sfe->nameval[sfe->namelen];
sbp->valuelen = sfe->valuelen;
sbp->flags = sfe->flags;
sbp->hash = xfs_attr_hashval(dp->i_mount, sfe->flags,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 5646d300b286..180ce697305a 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -4715,20 +4715,18 @@ TRACE_EVENT(xmbuf_create,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long, ino)
- __array(char, pathname, 256)
+ __array(char, pathname, MAXNAMELEN)
),
TP_fast_assign(
- char pathname[257];
char *path;
struct file *file = btp->bt_file;
__entry->dev = btp->bt_mount->m_super->s_dev;
__entry->ino = file_inode(file)->i_ino;
- memset(pathname, 0, sizeof(pathname));
- path = file_path(file, pathname, sizeof(pathname) - 1);
+ path = file_path(file, __entry->pathname, MAXNAMELEN);
if (IS_ERR(path))
- path = "(unknown)";
- strncpy(__entry->pathname, path, sizeof(__entry->pathname));
+ strncpy(__entry->pathname, "(unknown)",
+ sizeof(__entry->pathname));
),
TP_printk("dev %d:%d xmino 0x%lx path '%s'",
MAJOR(__entry->dev), MINOR(__entry->dev),
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index ab3d22f662f2..eaf849260bd6 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -110,7 +110,24 @@ xfs_attr_change(
args->whichfork = XFS_ATTR_FORK;
xfs_attr_sethash(args);
- return xfs_attr_set(args, op, args->attr_filter & XFS_ATTR_ROOT);
+ /*
+ * Some xattrs must be resistant to allocation failure at ENOSPC, e.g.
+ * creating an inode with ACLs or security attributes requires the
+ * allocation of the xattr holding that information to succeed. Hence
+ * we allow xattrs in the VFS TRUSTED, SYSTEM, POSIX_ACL and SECURITY
+ * (LSM xattr) namespaces to dip into the reserve block pool to allow
+ * manipulation of these xattrs when at ENOSPC. These VFS xattr
+ * namespaces translate to the XFS_ATTR_ROOT and XFS_ATTR_SECURE on-disk
+ * namespaces.
+ *
+ * For most of these cases, these special xattrs will fit in the inode
+ * itself and so consume no extra space or only require temporary extra
+ * space while an overwrite is being made. Hence the use of the reserved
+ * pool is largely to avoid the worst case reservation from preventing
+ * the xattr from being created at ENOSPC.
+ */
+ return xfs_attr_set(args, op,
+ args->attr_filter & (XFS_ATTR_ROOT | XFS_ATTR_SECURE));
}