diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2024-08-12 18:46:07 +0200 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2024-08-12 18:46:07 +0200 |
| commit | 0c80bdfc9aa624b4a6c33c30ad11e81bf6407c36 (patch) | |
| tree | caea5044f1bd46b56c9894a23c629ef4bf93a5ce /fs | |
| parent | bfa54a793ba77ef696755b66f3ac4ed00c7d1248 (diff) | |
| parent | 7c626ce4bae1ac14f60076d00eafe71af30450ba (diff) | |
Merge 6.11-rc3 into driver-core-next
We need the driver core fixes in here as well to build on top of.
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
69 files changed, 699 insertions, 273 deletions
diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index a7b425d3c8a0..331a17f3f113 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -272,16 +272,19 @@ bch2_acl_to_xattr(struct btree_trans *trans, return xattr; } -struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, - struct dentry *dentry, int type) +struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) { - struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct posix_acl *acl = NULL; + + if (rcu) + return ERR_PTR(-ECHILD); + + struct btree_trans *trans = bch2_trans_get(c); retry: bch2_trans_begin(trans); diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h index 27e7eec0f278..fe730a6bf0c1 100644 --- a/fs/bcachefs/acl.h +++ b/fs/bcachefs/acl.h @@ -28,7 +28,7 @@ void bch2_acl_to_text(struct printbuf *, const void *, size_t); #ifdef CONFIG_BCACHEFS_POSIX_ACL -struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int); +struct posix_acl *bch2_get_acl(struct inode *, int, bool); int bch2_set_acl_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 8d2b62c9588e..96a0444ea78f 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -82,6 +82,14 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket, bucket_data_type(bucket) != bucket_data_type(ptr); } +/* + * It is my general preference to use unsigned types for unsigned quantities - + * however, these helpers are used in disk accounting calculations run by + * triggers where the output will be negated and added to an s64. unsigned is + * right out even though all these quantities will fit in 32 bits, since it + * won't be sign extended correctly; u64 will negate "correctly", but s64 is the + * simpler option here. + */ static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a) { return a.stripe_sectors + a.dirty_sectors + a.cached_sectors; @@ -166,8 +174,8 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, * avoid overflowing LRU_TIME_BITS on a corrupted fs, when * bucket_sectors_dirty is (much) bigger than bucket_size */ - u64 d = min(bch2_bucket_sectors_dirty(a), - ca->mi.bucket_size); + u64 d = min_t(s64, bch2_bucket_sectors_dirty(a), + ca->mi.bucket_size); return div_u64(d * (1ULL << 31), ca->mi.bucket_size); } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 618d2ff0292e..8563c2d26847 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1603,7 +1603,8 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope prt_newline(out); } -void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) +void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_dev *ca) { struct open_bucket *ob; @@ -1613,7 +1614,8 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { spin_lock(&ob->lock); - if (ob->valid && !ob->on_partial_list) + if (ob->valid && !ob->on_partial_list && + (!ca || ob->dev == ca->dev_idx)) bch2_open_bucket_to_text(out, c, ob); spin_unlock(&ob->lock); } @@ -1738,7 +1740,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16); - bch2_dev_usage_to_text(out, &stats); + bch2_dev_usage_to_text(out, ca, &stats); prt_newline(out); @@ -1756,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats)); } -void bch2_print_allocator_stuck(struct bch_fs *c) +static noinline void bch2_print_allocator_stuck(struct bch_fs *c) { struct printbuf buf = PRINTBUF; - prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n"); + prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n", + c->opts.allocator_stuck_timeout); prt_printf(&buf, "Allocator debug:\n"); printbuf_indent_add(&buf, 2); @@ -1790,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c) bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_exit(&buf); } + +static inline unsigned allocator_wait_timeout(struct bch_fs *c) +{ + if (c->allocator_last_stuck && + time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies)) + return 0; + + return c->opts.allocator_stuck_timeout * HZ; +} + +void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + unsigned t = allocator_wait_timeout(c); + + if (t && closure_sync_timeout(cl, t)) { + c->allocator_last_stuck = jiffies; + bch2_print_allocator_stuck(c); + } + + closure_sync(cl); +} diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 6da9e7e29026..386d231ceca3 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -223,7 +223,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp void bch2_fs_allocator_foreground_init(struct bch_fs *); void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *); -void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *); +void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *, struct bch_dev *); void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *); void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); @@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *); void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *); -void bch2_print_allocator_stuck(struct bch_fs *); +void __bch2_wait_on_allocator(struct bch_fs *, struct closure *); +static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + if (cl->closure_get_happened) + __bch2_wait_on_allocator(c, cl); +} #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 91361a167dcd..eedf2d6045e7 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -893,6 +893,8 @@ struct bch_fs { struct bch_fs_usage_base __percpu *usage; u64 __percpu *online_reserved; + unsigned long allocator_last_stuck; + struct io_clock io_clock[2]; /* JOURNAL SEQ BLACKLIST */ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 74a60b1a4ddf..b25f86356728 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -675,7 +675,8 @@ struct bch_sb_field_ext { x(btree_subvolume_children, BCH_VERSION(1, 6)) \ x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ - x(disk_accounting_v2, BCH_VERSION(1, 9)) + x(disk_accounting_v2, BCH_VERSION(1, 9)) \ + x(disk_accounting_v3, BCH_VERSION(1, 10)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -836,6 +837,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI, LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, struct bch_sb, flags[5], 0, 16); +LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, + struct bch_sb, flags[5], 16, 32); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 36872207f09b..aa8a049071f4 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1921,6 +1921,11 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) bch2_trans_verify_not_in_restart(trans); bch2_btree_iter_verify(iter); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + if (ret) + goto err; + + struct btree_path *path = btree_iter_path(trans, iter); /* already at end? */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 31ee50184be2..e61f9695771e 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1264,7 +1264,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); bch2_trans_unlock(trans); - closure_sync(&cl); + bch2_wait_on_allocator(c, &cl); } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 2650a0d24663..9f7004e941ce 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -71,17 +71,21 @@ bch2_fs_usage_read_short(struct bch_fs *c) return ret; } -void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) +void bch2_dev_usage_to_text(struct printbuf *out, + struct bch_dev *ca, + struct bch_dev_usage *usage) { prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n"); for (unsigned i = 0; i < BCH_DATA_NR; i++) { bch2_prt_data_type(out, i); prt_printf(out, "\t%llu\r%llu\r%llu\r\n", - usage->d[i].buckets, - usage->d[i].sectors, - usage->d[i].fragmented); + usage->d[i].buckets, + usage->d[i].sectors, + usage->d[i].fragmented); } + + prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets); } static int bch2_check_fix_ptr(struct btree_trans *trans, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 2d35eeb24a2d..edbdffd508fc 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -212,7 +212,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) return ret; } -void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *); +void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *); static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) { diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index dcdd59249c23..046ac92b6639 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -114,11 +114,74 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans, return bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); } +static inline bool is_zero(char *start, char *end) +{ + BUG_ON(start > end); + + for (; start < end; start++) + if (*start) + return false; + return true; +} + +#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) + int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, enum bch_validate_flags flags, struct printbuf *err) { - return 0; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + void *end = &acc_k + 1; + int ret = 0; + + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_nr_inodes: + end = field_end(acc_k, nr_inodes); + break; + case BCH_DISK_ACCOUNTING_persistent_reserved: + end = field_end(acc_k, persistent_reserved); + break; + case BCH_DISK_ACCOUNTING_replicas: + bkey_fsck_err_on(!acc_k.replicas.nr_devs, + c, err, accounting_key_replicas_nr_devs_0, + "accounting key replicas entry with nr_devs=0"); + + bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs || + (acc_k.replicas.nr_required > 1 && + acc_k.replicas.nr_required == acc_k.replicas.nr_devs), + c, err, accounting_key_replicas_nr_required_bad, + "accounting key replicas entry with bad nr_required"); + + for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) + bkey_fsck_err_on(acc_k.replicas.devs[i] > acc_k.replicas.devs[i + 1], + c, err, accounting_key_replicas_devs_unsorted, + "accounting key replicas entry with unsorted devs"); + + end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: + end = field_end(acc_k, dev_data_type); + break; + case BCH_DISK_ACCOUNTING_compression: + end = field_end(acc_k, compression); + break; + case BCH_DISK_ACCOUNTING_snapshot: + end = field_end(acc_k, snapshot); + break; + case BCH_DISK_ACCOUNTING_btree: + end = field_end(acc_k, btree); + break; + case BCH_DISK_ACCOUNTING_rebalance_work: + end = field_end(acc_k, rebalance_work); + break; + } + + bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)), + c, err, accounting_key_junk_at_end, + "junk at end of accounting key"); +fsck_err: + return ret; } void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_pos *k) diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index cba417060b33..a93cf26ff4a9 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -124,20 +124,19 @@ struct bch_dev_data_type { __u8 data_type; }; -struct bch_dev_stripe_buckets { - __u8 dev; -}; - struct bch_acct_compression { __u8 type; }; struct bch_acct_snapshot { __u32 id; -}; +} __packed; struct bch_acct_btree { __u32 id; +} __packed; + +struct bch_acct_rebalance_work { }; struct disk_accounting_pos { @@ -149,12 +148,12 @@ struct disk_accounting_pos { struct bch_persistent_reserved persistent_reserved; struct bch_replicas_entry_v1 replicas; struct bch_dev_data_type dev_data_type; - struct bch_dev_stripe_buckets dev_stripe_buckets; struct bch_acct_compression compression; struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; - }; - }; + struct bch_acct_rebalance_work rebalance_work; + } __packed; + } __packed; struct bpos _pad; }; }; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 9b5b5c9a6c63..84f1cbf6497f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1809,6 +1809,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); BUG_ON(v->nr_redundant != h->s->nr_parity); + /* * We bypass the sector allocator which normally does this: */ + bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); + for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { __clear_bit(v->ptrs[i].dev, devs.d); if (i < h->s->nr_data) @@ -2235,6 +2238,23 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) mutex_unlock(&c->ec_stripes_heap_lock); } +static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c, + struct ec_stripe_new *s) +{ + prt_printf(out, "\tidx %llu blocks %u+%u allocated %u ref %u %u %s obs", + s->idx, s->nr_data, s->nr_parity, + bitmap_weight(s->blocks_allocated, s->nr_data), + atomic_read(&s->ref[STRIPE_REF_io]), + atomic_read(&s->ref[STRIPE_REF_stripe]), + bch2_watermarks[s->h->watermark]); + + struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; + unsigned i; + for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) + prt_printf(out, " %u", s->blocks[i]); + prt_newline(out); +} + void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) { struct ec_stripe_head *h; @@ -2247,23 +2267,15 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) bch2_watermarks[h->watermark]); if (h->s) - prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n", - h->s->idx, h->s->nr_data, h->s->nr_parity, - bitmap_weight(h->s->blocks_allocated, - h->s->nr_data)); + bch2_new_stripe_to_text(out, c, h->s); } mutex_unlock(&c->ec_stripe_head_lock); prt_printf(out, "in flight:\n"); mutex_lock(&c->ec_stripe_new_lock); - list_for_each_entry(s, &c->ec_stripe_new_list, list) { - prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n", - s->idx, s->nr_data, s->nr_parity, - atomic_read(&s->ref[STRIPE_REF_io]), - atomic_read(&s->ref[STRIPE_REF_stripe]), - bch2_watermarks[s->h->watermark]); - } + list_for_each_entry(s, &c->ec_stripe_new_list, list) + bch2_new_stripe_to_text(out, c, s); mutex_unlock(&c->ec_stripe_new_lock); } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3a5f49affa0a..15fc41e63b6c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1199,7 +1199,7 @@ static const struct inode_operations bch_file_inode_operations = { .fiemap = bch2_fiemap, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1219,7 +1219,7 @@ static const struct inode_operations bch_dir_inode_operations = { .tmpfile = bch2_tmpfile, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1241,7 +1241,7 @@ static const struct inode_operations bch_symlink_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1251,7 +1251,7 @@ static const struct inode_operations bch_special_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 2cf6297756f8..177ed331c00b 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -126,11 +126,7 @@ err_noprint: if (closure_nr_remaining(&cl) != 1) { bch2_trans_unlock_long(trans); - - if (closure_sync_timeout(&cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&cl); - } + bch2_wait_on_allocator(c, &cl); } return ret; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 4531c9ab3e12..7ee3b75480df 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -406,6 +406,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->read_pos, BTREE_ITER_slots); retry: + bch2_trans_begin(trans); rbio->bio.bi_status = 0; k = bch2_btree_iter_peek_slot(&iter); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d31c8d006d97..1d4761d15002 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1503,10 +1503,7 @@ err: if ((op->flags & BCH_WRITE_SYNC) || (!(op->flags & BCH_WRITE_SUBMITTED) && !(op->flags & BCH_WRITE_IN_WORKER))) { - if (closure_sync_timeout(&op->cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&op->cl); - } + bch2_wait_on_allocator(c, &op->cl); __bch2_write_index(op); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 60b93018501f..cda1725702ea 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -391,6 +391,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \ NULL, "Log transaction function names in journal") \ + x(allocator_stuck_timeout, u16, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(0, U16_MAX), \ + BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30, \ + NULL, "Default timeout in seconds for stuck allocator messages")\ x(noexcl, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 10c96cb2047a..1223b710755d 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -24,7 +24,6 @@ static int bch2_memcmp(const void *l, const void *r, const void *priv) static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) { #ifdef CONFIG_BCACHEFS_DEBUG - BUG_ON(e->data_type >= BCH_DATA_NR); BUG_ON(!e->nr_devs); BUG_ON(e->nr_required > 1 && e->nr_required >= e->nr_devs); diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index dfbbd33c8731..6c4469f53313 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -61,7 +61,18 @@ BCH_FSCK_ERR_dev_usage_buckets_wrong, \ BCH_FSCK_ERR_dev_usage_sectors_wrong, \ BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ - BCH_FSCK_ERR_accounting_mismatch) + BCH_FSCK_ERR_accounting_mismatch) \ + x(disk_accounting_v3, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_bkey_version_in_future, \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_accounting_mismatch, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ + BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ + BCH_FSCK_ERR_accounting_key_junk_at_end) #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ @@ -79,6 +90,20 @@ BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_bkey_version_in_future) \ + x(disk_accounting_v3, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_fs_usage_hidden_wrong, \ + BCH_FSCK_ERR_fs_usage_btree_wrong, \ + BCH_FSCK_ERR_fs_usage_data_wrong, \ + BCH_FSCK_ERR_fs_usage_cached_wrong, \ + BCH_FSCK_ERR_fs_usage_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ + BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_replicas_wrong, \ BCH_FSCK_ERR_bkey_version_in_future) struct upgrade_downgrade_entry { diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d1b2f2aa397a..d3a498617303 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -287,7 +287,11 @@ enum bch_fsck_flags { x(accounting_replicas_not_marked, 273, 0) \ x(invalid_btree_id, 274, 0) \ x(alloc_key_io_time_bad, 275, 0) \ - x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) + x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \ + x(accounting_key_junk_at_end, 277, 0) \ + x(accounting_key_replicas_nr_devs_0, 278, 0) \ + x(accounting_key_replicas_nr_required_bad, 279, 0) \ + x(accounting_key_replicas_devs_unsorted, 280, 0) \ enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 8bc819832790..c8c2ccbdfbb5 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); + + if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 && + !BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb)) + SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30); } for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0455a1001fec..e7fa2de35014 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1193,7 +1193,6 @@ static void bch2_dev_free(struct bch_dev *ca) if (ca->kobj.state_in_sysfs) kobject_del(&ca->kobj); - kfree(ca->buckets_nouse); bch2_free_super(&ca->disk_sb); bch2_dev_allocator_background_exit(ca); bch2_dev_journal_exit(ca); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 1c0d1fb20276..f393023a3ae2 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -367,7 +367,7 @@ SHOW(bch2_fs) bch2_stripes_heap_to_text(out, c); if (attr == &sysfs_open_buckets) - bch2_open_buckets_to_text(out, c); + bch2_open_buckets_to_text(out, c, NULL); if (attr == &sysfs_open_buckets_partial) bch2_open_buckets_partial_to_text(out, c); @@ -811,6 +811,9 @@ SHOW(bch2_dev) if (attr == &sysfs_alloc_debug) bch2_dev_alloc_debug_to_text(out, ca); + if (attr == &sysfs_open_buckets) + bch2_open_buckets_to_text(out, c, ca); + return 0; } @@ -892,6 +895,7 @@ struct attribute *bch2_dev_files[] = { /* debug: */ &sysfs_alloc_debug, + &sysfs_open_buckets, NULL }; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 498442d0c216..2e49d978f504 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1223,8 +1223,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->space_info->total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); - block_group->space_info->bytes_zone_unusable -= - block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info, + -block_group->zone_unusable); block_group->space_info->disk_total -= block_group->length * factor; spin_unlock(&block_group->space_info->lock); @@ -1396,7 +1396,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes to readonly */ sinfo->bytes_readonly += cache->zone_unusable; - sinfo->bytes_zone_unusable -= cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + -cache->zone_unusable); cache->zone_unusable = 0; } cache->ro++; @@ -3056,9 +3057,11 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes back */ cache->zone_unusable = - (cache->alloc_offset - cache->used) + + (cache->alloc_offset - cache->used - cache->pinned - + cache->reserved) + (cache->length - cache->zone_capacity); - sinfo->bytes_zone_unusable += cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + cache->zone_unusable); sinfo->bytes_readonly -= cache->zone_unusable; } num_bytes = cache->length - cache->reserved - diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c8568b1a61c4..75fa563e4cac 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -459,6 +459,7 @@ struct btrfs_file_private { void *filldir_buf; u64 last_index; struct extent_state *llseek_cached_state; + bool fsync_skip_inode_lock; }; static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c index f9fb2db6a1e4..67adbe9d294a 100644 --- a/fs/btrfs/direct-io.c +++ b/fs/btrfs/direct-io.c @@ -856,21 +856,37 @@ relock: * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ +again: from->nofault = true; dio = btrfs_dio_write(iocb, from, written); from->nofault = false; - /* - * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync - * iocb, and that needs to lock the inode. So unlock it before calling - * iomap_dio_complete() to avoid a deadlock. - */ - btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); - - if (IS_ERR_OR_NULL(dio)) + if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio); - else + } else { + struct btrfs_file_private stack_private = { 0 }; + struct btrfs_file_private *private; + const bool have_private = (file->private_data != NULL); + + if (!have_private) + file->private_data = &stack_private; + + /* + * If we have a synchronous write, we must make sure the fsync + * triggered by the iomap_dio_complete() call below doesn't + * deadlock on the inode lock - we are already holding it and we + * can't call it after unlocking because we may need to complete + * partial writes due to the input buffer (or parts of it) not + * being already faulted in. + */ + private = file->private_data; + private->fsync_skip_inode_lock = true; ret = iomap_dio_complete(dio); + private->fsync_skip_inode_lock = false; + + if (!have_private) + file->private_data = NULL; + } /* No increment (+=) because iomap returns a cumulative value. */ if (ret > 0) @@ -897,10 +913,12 @@ relock: } else { fault_in_iov_iter_readable(from, left); prev_left = left; - goto relock; + goto again; } } + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + /* * If 'ret' is -ENOTBLK or we have not written all data, then it means * we must fallback to buffered IO. diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d77498e7671c..ff9f0d41987e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2793,7 +2793,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, readonly = true; } else if (btrfs_is_zoned(fs_info)) { /* Need reset before reusing in a zoned block group */ - space_info->bytes_zone_unusable += len; + btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info, + len); readonly = true; } spin_unlock(&cache->lock); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 81558f90ee80..23b65dc73c00 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -664,7 +664,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode, start_diff = start - em->start; em->start = start; em->len = end - start; - if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE && !extent_map_is_compressed(em)) + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) em->offset += start_diff; return add_extent_mapping(inode, em, 0); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 21381de906f6..9914419f3b7d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1603,6 +1603,7 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + struct btrfs_file_private *private = file->private_data; struct dentry *dentry = file_dentry(file); struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); struct btrfs_root *root = inode->root; @@ -1612,6 +1613,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0, err; u64 len; bool full_sync; + const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); trace_btrfs_sync_file(file, datasync); @@ -1639,7 +1641,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (ret) goto out; - btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + down_write(&inode->i_mmap_lock); + else + btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); atomic_inc(&root->log_batch); @@ -1663,7 +1668,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ ret = start_ordered_ops(inode, start, end); if (ret) { - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } @@ -1788,7 +1796,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); if (ret == BTRFS_NO_LOG_SYNC) { ret = btrfs_end_transaction(trans); @@ -1857,7 +1868,10 @@ out: out_release_extents: btrfs_release_log_ctx_extents(&ctx); - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 3f9b7507543a..f5996a43db24 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2723,8 +2723,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, * If the block group is read-only, we should account freed space into * bytes_readonly. */ - if (!block_group->ro) + if (!block_group->ro) { block_group->zone_unusable += to_unusable; + WARN_ON(block_group->zone_unusable > block_group->length); + } spin_unlock(&ctl->tree_lock); if (!used) { spin_lock(&block_group->lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 01eab6955647..333b0e8587a2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -714,8 +714,9 @@ out: return ret; } -static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, - u64 end, +static noinline int cow_file_range_inline(struct btrfs_inode *inode, + struct page *locked_page, + u64 offset, u64 end, size_t compressed_size, int compress_type, struct folio *compressed_folio, @@ -739,7 +740,10 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, return ret; } - extent_clear_unlock_delalloc(inode, offset, end, NULL, &cached, + if (ret == 0) + locked_page = NULL; + + extent_clear_unlock_delalloc(inode, offset, end, locked_page, &cached, clear_flags, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); @@ -1043,10 +1047,10 @@ again: * extent for the subpage case. */ if (total_in < actual_end) - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, NULL, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); else - ret = cow_file_range_inline(inode, start, end, total_compressed, + ret = cow_file_range_inline(inode, NULL, start, end, total_compressed, compress_type, folios[0], false); if (ret <= 0) { if (ret < 0) @@ -1359,7 +1363,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, if (!no_inline) { /* lets try to make an inline extent */ - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, locked_page, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); if (ret <= 0) { /* @@ -1581,6 +1585,7 @@ out_unlock: locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); start += cur_alloc_size; } @@ -1594,6 +1599,7 @@ out_unlock: clear_bits |= EXTENT_CLEAR_DATA_RESV; extent_clear_unlock_delalloc(inode, start, end, locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); } return ret; } @@ -2255,6 +2261,7 @@ error: EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); + btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL); } btrfs_free_path(path); return ret; @@ -5660,7 +5667,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct inode *inode; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *sub_root = root; - struct btrfs_key location; + struct btrfs_key location = { 0 }; u8 di_type = 0; int ret = 0; @@ -7198,6 +7205,12 @@ static void wait_subpage_spinlock(struct page *page) spin_unlock_irq(&subpage->lock); } +static int btrfs_launder_folio(struct folio *folio) +{ + return btrfs_qgroup_free_data(folio_to_inode(folio), NULL, folio_pos(folio), + PAGE_SIZE, NULL); +} + static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags) { if (try_release_extent_mapping(&folio->page, gfp_flags)) { @@ -10133,6 +10146,7 @@ static const struct address_space_operations btrfs_aops = { .writepages = btrfs_writepages, .readahead = btrfs_readahead, .invalidate_folio = btrfs_invalidate_folio, + .launder_folio = btrfs_launder_folio, .release_folio = btrfs_release_folio, .migrate_folio = btrfs_migrate_folio, .dirty_folio = filemap_dirty_folio, diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 32dcea662da3..fc821aa446f0 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -14,7 +14,7 @@ struct root_name_map { u64 id; - char name[16]; + const char *name; }; static const struct root_name_map root_map[] = { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 14a8d7100018..0de9162ff481 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1648,14 +1648,20 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe) } } +static u32 stripe_length(const struct scrub_stripe *stripe) +{ + ASSERT(stripe->bg); + + return min(BTRFS_STRIPE_LEN, + stripe->bg->start + stripe->bg->length - stripe->logical); +} + static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, struct scrub_stripe *stripe) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; u64 stripe_len = BTRFS_STRIPE_LEN; int mirror = stripe->mirror_num; int i; @@ -1729,9 +1735,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, { struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_bio *bbio; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; int mirror = stripe->mirror_num; ASSERT(stripe->bg); @@ -1871,6 +1875,9 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx) stripe = &sctx->stripes[i]; wait_scrub_stripe_io(stripe); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = stripe->physical + stripe_length(stripe); + spin_unlock(&sctx->stat_lock); scrub_reset_stripe(stripe); } out: @@ -2139,7 +2146,9 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, cur_physical, &found_logical); if (ret > 0) { /* No more extent, just update the accounting */ + spin_lock(&sctx->stat_lock); sctx->stat.last_physical = physical + logical_length; + spin_unlock(&sctx->stat_lock); ret = 0; break; } @@ -2336,6 +2345,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, stripe_logical += chunk_logical; ret = scrub_raid56_parity_stripe(sctx, scrub_dev, bg, map, stripe_logical); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = min(physical + BTRFS_STRIPE_LEN, + physical_end); + spin_unlock(&sctx->stat_lock); if (ret) goto out; goto next; diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 9ac94d3119e8..68e14fd48638 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -316,7 +316,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, found->bytes_used += block_group->used; found->disk_used += block_group->used * factor; found->bytes_readonly += block_group->bytes_super; - found->bytes_zone_unusable += block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable); if (block_group->length > 0) found->full = 0; btrfs_try_granting_tickets(info, found); @@ -583,8 +583,7 @@ again: spin_lock(&cache->lock); avail = cache->length - cache->used - cache->pinned - - cache->reserved - cache->delalloc_bytes - - cache->bytes_super - cache->zone_unusable; + cache->reserved - cache->bytes_super - cache->zone_unusable; btrfs_info(fs_info, "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s", cache->start, cache->length, cache->used, cache->pinned, diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index 4db8a0267c16..88b44221ce97 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -249,6 +249,7 @@ btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \ DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info"); DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned"); +DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable"); int btrfs_init_space_info(struct btrfs_fs_info *fs_info); void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 08d33cb372fb..83478deada3b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -683,8 +683,11 @@ bool btrfs_check_options(const struct btrfs_fs_info *info, ret = false; if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { - if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) + if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { btrfs_info(info, "disk space caching is enabled"); + btrfs_warn(info, +"space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2"); + } if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) btrfs_info(info, "using free-space-tree"); } diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index ebec4ab361b8..56e61ac1cc64 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -900,6 +900,102 @@ out: return ret; } +/* + * Test a regression for compressed extent map adjustment when we attempt to + * add an extent map that is partially overlapped by another existing extent + * map. The resulting extent map offset was left unchanged despite having + * incremented its start offset. + */ +static int test_case_8(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) +{ + struct extent_map_tree *em_tree = &inode->extent_tree; + struct extent_map *em; + int ret; + int ret2; + + em = alloc_extent_map(); + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); + return -ENOMEM; + } + + /* Compressed extent for the file range [120K, 128K). */ + em->start = SZ_1K * 120; + em->len = SZ_8K; + em->disk_num_bytes = SZ_4K; + em->ram_bytes = SZ_8K; + em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; + write_lock(&em_tree->lock); + ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); + write_unlock(&em_tree->lock); + free_extent_map(em); + if (ret < 0) { + test_err("couldn't add extent map for range [120K, 128K)"); + goto out; + } + + em = alloc_extent_map(); + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); + ret = -ENOMEM; + goto out; + } + + /* + * Compressed extent for the file range [108K, 144K), which overlaps + * with the [120K, 128K) we previously inserted. + */ + em->start = SZ_1K * 108; + em->len = SZ_1K * 36; + em->disk_num_bytes = SZ_4K; + em->ram_bytes = SZ_1K * 36; + em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; + + /* + * Try to add the extent map but with a search range of [140K, 144K), + * this should succeed and adjust the extent map to the range + * [128K, 144K), with a length of 16K and an offset of 20K. + * + * This simulates a scenario where in the subvolume tree of an inode we + * have a compressed file extent item for the range [108K, 144K) and we + * have an overlapping compressed extent map for the range [120K, 128K), + * which was created by an encoded write, but its ordered extent was not + * yet completed, so the subvolume tree doesn't have yet the file extent + * item for that range - we only have the extent map in the inode's + * extent map tree. + */ + write_lock(&em_tree->lock); + ret = btrfs_add_extent_mapping(inode, &em, SZ_1K * 140, SZ_4K); + write_unlock(&em_tree->lock); + free_extent_map(em); + if (ret < 0) { + test_err("couldn't add extent map for range [108K, 144K)"); + goto out; + } + + if (em->start != SZ_128K) { + test_err("unexpected extent map start %llu (should be 128K)", em->start); + ret = -EINVAL; + goto out; + } + if (em->len != SZ_16K) { + test_err("unexpected extent map length %llu (should be 16K)", em->len); + ret = -EINVAL; + goto out; + } + if (em->offset != SZ_1K * 20) { + test_err("unexpected extent map offset %llu (should be 20K)", em->offset); + ret = -EINVAL; + goto out; + } +out: + ret2 = free_extent_map_tree(inode); + if (ret == 0) + ret = ret2; + + return ret; +} + struct rmap_test_vector { u64 raid_type; u64 physical_start; @@ -1078,6 +1174,9 @@ int btrfs_test_extent_map(void) ret = test_case_7(fs_info, BTRFS_I(inode)); if (ret) goto out; + ret = test_case_8(fs_info, BTRFS_I(inode)); + if (ret) + goto out; test_msg("running rmap tests"); for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) { diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 6388786fd8b5..a825fa598e3c 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -634,7 +634,7 @@ static int check_dir_item(struct extent_buffer *leaf, */ if (key->type == BTRFS_DIR_ITEM_KEY || key->type == BTRFS_XATTR_ITEM_KEY) { - char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + char namebuf[MAX(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; read_extent_buffer(leaf, namebuf, (unsigned long)(di + 1), name_len); @@ -1289,6 +1289,19 @@ static void extent_err(const struct extent_buffer *eb, int slot, va_end(args); } +static bool is_valid_dref_root(u64 rootid) +{ + /* + * The following tree root objectids are allowed to have a data backref: + * - subvolume trees + * - data reloc tree + * - tree root + * For v1 space cache + */ + return is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID || + rootid == BTRFS_ROOT_TREE_OBJECTID; +} + static int check_extent_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot, struct btrfs_key *prev_key) @@ -1441,6 +1454,8 @@ static int check_extent_item(struct extent_buffer *leaf, struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; u64 seq; + u64 dref_root; + u64 dref_objectid; u64 dref_offset; u64 inline_offset; u8 inline_type; @@ -1484,11 +1499,26 @@ static int check_extent_item(struct extent_buffer *leaf, */ case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); + dref_root = btrfs_extent_data_ref_root(leaf, dref); + dref_objectid = btrfs_extent_data_ref_objectid(leaf, dref); dref_offset = btrfs_extent_data_ref_offset(leaf, dref); seq = hash_extent_data_ref( btrfs_extent_data_ref_root(leaf, dref), btrfs_extent_data_ref_objectid(leaf, dref), btrfs_extent_data_ref_offset(leaf, dref)); + if (unlikely(!is_valid_dref_root(dref_root))) { + extent_err(leaf, slot, + "invalid data ref root value %llu", + dref_root); + return -EUCLEAN; + } + if (unlikely(dref_objectid < BTRFS_FIRST_FREE_OBJECTID || + dref_objectid > BTRFS_LAST_FREE_OBJECTID)) { + extent_err(leaf, slot, + "invalid data ref objectid value %llu", + dref_root); + return -EUCLEAN; + } if (unlikely(!IS_ALIGNED(dref_offset, fs_info->sectorsize))) { extent_err(leaf, slot, @@ -1627,6 +1657,8 @@ static int check_extent_data_ref(struct extent_buffer *leaf, return -EUCLEAN; } for (; ptr < end; ptr += sizeof(*dref)) { + u64 root; + u64 objectid; u64 offset; /* @@ -1634,7 +1666,22 @@ static int check_extent_data_ref(struct extent_buffer *leaf, * overflow from the leaf due to hash collisions. */ dref = (struct btrfs_extent_data_ref *)ptr; + root = btrfs_extent_data_ref_root(leaf, dref); + objectid = btrfs_extent_data_ref_objectid(leaf, dref); offset = btrfs_extent_data_ref_offset(leaf, dref); + if (unlikely(!is_valid_dref_root(root))) { + extent_err(leaf, slot, + "invalid extent data backref root value %llu", + root); + return -EUCLEAN; + } + if (unlikely(objectid < BTRFS_FIRST_FREE_OBJECTID || + objectid > BTRFS_LAST_FREE_OBJECTID)) { + extent_err(leaf, slot, + "invalid extent data backref objectid value %llu", + root); + return -EUCLEAN; + } if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { extent_err(leaf, slot, "invalid extent data backref offset, have %llu expect aligned to %u", diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e98aa8219303..808c9c048276 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2016,6 +2016,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) * CHECK_CAPS_AUTHONLY - we should only check the auth cap * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without * further delay. + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without + * further delay. */ void ceph_check_caps(struct ceph_inode_info *ci, int flags) { @@ -2097,7 +2099,7 @@ retry: } doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " - "flushing %s issued %s revoking %s retain %s %s%s%s\n", + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps), @@ -2105,7 +2107,8 @@ retry: ceph_cap_string(retain), (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); /* * If we no longer need to hold onto old our caps, and we may @@ -2180,6 +2183,11 @@ retry: queue_writeback = true; } + if (flags & CHECK_CAPS_FLUSH_FORCE) { + doutc(cl, "force to flush caps\n"); + goto ack; + } + if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { /* request larger max_size from MDS? */ @@ -3510,6 +3518,8 @@ static void handle_cap_grant(struct inode *inode, bool queue_invalidate = false; bool deleted_inode = false; bool fill_inline = false; + bool revoke_wait = false; + int flags = 0; /* * If there is at least one crypto block then we'll trust @@ -3705,16 +3715,18 @@ static void handle_cap_grant(struct inode *inode, ceph_cap_string(cap->issued), ceph_cap_string(newcaps), ceph_cap_string(revoking)); if (S_ISREG(inode->i_mode) && - (revoking & used & CEPH_CAP_FILE_BUFFER)) + (revoking & used & CEPH_CAP_FILE_BUFFER)) { writeback = true; /* initiate writeback; will delay ack */ - else if (queue_invalidate && + revoke_wait = true; + } else if (queue_invalidate && revoking == CEPH_CAP_FILE_CACHE && - (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) - ; /* do nothing yet, invalidation will be queued */ - else if (cap == ci->i_auth_cap) + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { + revoke_wait = true; /* do nothing yet, invalidation will be queued */ + } else if (cap == ci->i_auth_cap) { check_caps = 1; /* check auth cap only */ - else + } else { check_caps = 2; /* check all caps */ + } /* If there is new caps, try to wake up the waiters */ if (~cap->issued & newcaps) wake = true; @@ -3741,8 +3753,9 @@ static void handle_cap_grant(struct inode *inode, BUG_ON(cap->issued & ~cap->implemented); /* don't let check_caps skip sending a response to MDS for revoke msgs */ - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { + if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { cap->mds_wanted = 0; + flags |= CHECK_CAPS_FLUSH_FORCE; if (cap == ci->i_auth_cap) check_caps = 1; /* check auth cap only */ else @@ -3798,9 +3811,9 @@ static void handle_cap_grant(struct inode *inode, mutex_unlock(&session->s_mutex); if (check_caps == 1) - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); else if (check_caps == 2) - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); } /* diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b63b4cd9b5b6..6e817bf1337c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -200,9 +200,10 @@ struct ceph_cap { struct list_head caps_item; }; -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ struct ceph_cap_flush { u64 tid; diff --git a/fs/file.c b/fs/file.c index a3b72aa64f11..a11e59b5d602 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1248,6 +1248,7 @@ __releases(&files->file_lock) * tables and this condition does not arise without those. */ fdt = files_fdtable(files); + fd = array_index_nospec(fd, fdt->max_fds); tofree = fdt->fd[fd]; if (!tofree && fd_is_open(fd, fdt)) goto Ebusy; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 9e0ea6fc2aa3..34eb2c2cbcde 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -2069,8 +2069,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) continue; } - ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, - SVC_SOCK_ANONYMOUS, + ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0, get_current_cred()); /* always save the latest error */ if (ret < 0) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index c71ae5c04306..4a20e92474b2 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -1072,7 +1072,7 @@ static int cifs_security_flags_proc_open(struct inode *inode, struct file *file) static void cifs_security_flags_handle_must_flags(unsigned int *flags) { - unsigned int signflags = *flags & CIFSSEC_MUST_SIGN; + unsigned int signflags = *flags & (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL); if ((*flags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) *flags = CIFSSEC_MUST_KRB5; diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 62d5fee3e5eb..ca2bd204bcc5 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -147,6 +147,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 49 -#define CIFS_VERSION "2.49" +#define SMB3_PRODUCT_BUILD 50 +#define CIFS_VERSION "2.50" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 8e86fec7dcd2..5c9b3e6cd95f 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -345,7 +345,7 @@ struct smb_version_operations { /* connect to a server share */ int (*tree_connect)(const unsigned int, struct cifs_ses *, const char *, struct cifs_tcon *, const struct nls_table *); - /* close tree connecion */ + /* close tree connection */ int (*tree_disconnect)(const unsigned int, struct cifs_tcon *); /* get DFS referrals */ int (*get_dfs_refer)(const unsigned int, struct cifs_ses *, @@ -816,7 +816,7 @@ struct TCP_Server_Info { * Protected by @refpath_lock and @srv_lock. The @refpath_lock is * mostly used for not requiring a copy of @leaf_fullpath when getting * cached or new DFS referrals (which might also sleep during I/O). - * While @srv_lock is held for making string and NULL comparions against + * While @srv_lock is held for making string and NULL comparisons against * both fields as in mount(2) and cache refresh. * * format: \\HOST\SHARE[\OPTIONAL PATH] @@ -1471,29 +1471,6 @@ struct cifs_io_parms { struct TCP_Server_Info *server; }; -struct cifs_aio_ctx { - struct kref refcount; - struct list_head list; - struct mutex aio_mutex; - struct completion done; - struct iov_iter iter; - struct kiocb *iocb; - struct cifsFileInfo *cfile; - struct bio_vec *bv; - loff_t pos; - unsigned int nr_pinned_pages; - ssize_t rc; - unsigned int len; - unsigned int total_len; - unsigned int bv_need_unpin; /* If ->bv[] needs unpinning */ - bool should_dirty; - /* - * Indicates if this aio_ctx is for direct_io, - * If yes, iter is a copy of the user passed iov_iter - */ - bool direct_io; -}; - struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; @@ -1904,7 +1881,7 @@ static inline bool is_replayable_error(int error) #define CIFSSEC_MAY_SIGN 0x00001 #define CIFSSEC_MAY_NTLMV2 0x00004 #define CIFSSEC_MAY_KRB5 0x00008 -#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */ +#define CIFSSEC_MAY_SEAL 0x00040 #define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_MUST_SIGN 0x01001 @@ -1914,11 +1891,11 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_NTLMV2 0x04004 #define CIFSSEC_MUST_KRB5 0x08008 #ifdef CONFIG_CIFS_UPCALL -#define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0xCF0CF /* flags supported if no weak allowed */ #else -#define CIFSSEC_MASK 0x87087 /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0xC70C7 /* flags supported if no weak allowed */ #endif /* UPCALL */ -#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ +#define CIFSSEC_MUST_SEAL 0x40040 #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL) @@ -2010,7 +1987,6 @@ require use of the stronger protocol */ * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search * ->oplock_break_cancelled - * cifs_aio_ctx->aio_mutex cifs_aio_ctx cifs_aio_ctx_alloc ****************************************************************************/ #ifdef DECLARE_GLOBALS_HERE diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index c15bb5ee7eb7..497bf3c447bc 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -619,8 +619,6 @@ int __cifs_calc_signature(struct smb_rqst *rqst, struct shash_desc *shash); enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, enum securityEnum); -struct cifs_aio_ctx *cifs_aio_ctx_alloc(void); -void cifs_aio_ctx_release(struct kref *refcount); int cifs_alloc_hash(const char *name, struct shash_desc **sdesc); void cifs_free_hash(struct shash_desc **sdesc); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 4a8aa1de9522..dd0afa23734c 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1042,13 +1042,26 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, } rc = -EOPNOTSUPP; - switch ((data->reparse.tag = tag)) { - case 0: /* SMB1 symlink */ + data->reparse.tag = tag; + if (!data->reparse.tag) { if (server->ops->query_symlink) { rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path, &data->symlink_target); } + if (rc == -EOPNOTSUPP) + data->reparse.tag = IO_REPARSE_TAG_INTERNAL; + } + + switch (data->reparse.tag) { + case 0: /* SMB1 symlink */ + break; + case IO_REPARSE_TAG_INTERNAL: + rc = 0; + if (le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY) { + cifs_create_junction_fattr(fattr, sb); + goto out; + } break; case IO_REPARSE_TAG_MOUNT_POINT: cifs_create_junction_fattr(fattr, sb); diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 855ac5a62edf..44dbaf9929a4 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -170,7 +170,10 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, static int cifs_shutdown(struct super_block *sb, unsigned long arg) { struct cifs_sb_info *sbi = CIFS_SB(sb); + struct tcon_link *tlink; + struct cifs_tcon *tcon; __u32 flags; + int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -178,14 +181,21 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) if (get_user(flags, (__u32 __user *)arg)) return -EFAULT; - if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH) - return -EINVAL; + tlink = cifs_sb_tlink(sbi); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); + + trace_smb3_shutdown_enter(flags, tcon->tid); + if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH) { + rc = -EINVAL; + goto shutdown_out_err; + } if (cifs_forced_shutdown(sbi)) - return 0; + goto shutdown_good; cifs_dbg(VFS, "shut down requested (%d)", flags); -/* trace_cifs_shutdown(sb, flags);*/ /* * see: @@ -201,7 +211,8 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) */ case CIFS_GOING_FLAGS_DEFAULT: cifs_dbg(FYI, "shutdown with default flag not supported\n"); - return -EINVAL; + rc = -EINVAL; + goto shutdown_out_err; /* * FLAGS_LOGFLUSH is easy since it asks to write out metadata (not * data) but metadata writes are not cached on the client, so can treat @@ -210,11 +221,18 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) case CIFS_GOING_FLAGS_LOGFLUSH: case CIFS_GOING_FLAGS_NOLOGFLUSH: sbi->mnt_cifs_flags |= CIFS_MOUNT_SHUTDOWN; - return 0; + goto shutdown_good; default: - return -EINVAL; + rc = -EINVAL; + goto shutdown_out_err; } + +shutdown_good: + trace_smb3_shutdown_done(flags, tcon->tid); return 0; +shutdown_out_err: + trace_smb3_shutdown_err(rc, flags, tcon->tid); + return rc; } static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug_info __user *in) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 07c468ddb88a..c6f11e6f9eb9 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -352,7 +352,7 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) * on simple responses (wct, bcc both zero) * in particular have seen this on * ulogoffX and FindClose. This leaves - * one byte of bcc potentially unitialized + * one byte of bcc potentially uninitialized */ /* zero rest of bcc */ tmp[sizeof(struct smb_hdr)+1] = 0; @@ -995,60 +995,6 @@ parse_DFS_referrals_exit: return rc; } -struct cifs_aio_ctx * -cifs_aio_ctx_alloc(void) -{ - struct cifs_aio_ctx *ctx; - - /* - * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io - * to false so that we know when we have to unreference pages within - * cifs_aio_ctx_release() - */ - ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL); - if (!ctx) - return NULL; - - INIT_LIST_HEAD(&ctx->list); - mutex_init(&ctx->aio_mutex); - init_completion(&ctx->done); - kref_init(&ctx->refcount); - return ctx; -} - -void -cifs_aio_ctx_release(struct kref *refcount) -{ - struct cifs_aio_ctx *ctx = container_of(refcount, - struct cifs_aio_ctx, refcount); - - cifsFileInfo_put(ctx->cfile); - - /* - * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly - * which means that iov_iter_extract_pages() was a success and thus - * that we may have references or pins on pages that we need to - * release. - */ - if (ctx->bv) { - if (ctx->should_dirty || ctx->bv_need_unpin) { - unsigned int i; - - for (i = 0; i < ctx->nr_pinned_pages; i++) { - struct page *page = ctx->bv[i].bv_page; - - if (ctx->should_dirty) - set_page_dirty(page); - if (ctx->bv_need_unpin) - unpin_user_page(page); - } - } - kvfree(ctx->bv); - } - - kfree(ctx); -} - /** * cifs_alloc_hash - allocate hash and hash context together * @name: The name of the crypto hash algo @@ -1288,6 +1234,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, const char *full_path, bool *islink) { + struct TCP_Server_Info *server = tcon->ses->server; struct cifs_ses *ses = tcon->ses; size_t len; char *path; @@ -1304,12 +1251,12 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, !is_tcon_dfs(tcon)) return 0; - spin_lock(&tcon->tc_lock); - if (!tcon->origin_fullpath) { - spin_unlock(&tcon->tc_lock); + spin_lock(&server->srv_lock); + if (!server->leaf_fullpath) { + spin_unlock(&server->srv_lock); return 0; } - spin_unlock(&tcon->tc_lock); + spin_unlock(&server->srv_lock); /* * Slow path - tcon is DFS and @full_path has prefix path, so attempt diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index a0ffbda90733..689d8a506d45 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -505,6 +505,10 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, } switch (tag) { + case IO_REPARSE_TAG_INTERNAL: + if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY)) + return false; + fallthrough; case IO_REPARSE_TAG_DFS: case IO_REPARSE_TAG_DFSR: case IO_REPARSE_TAG_MOUNT_POINT: diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index 6b55d1df9e2f..2c0644bc4e65 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -12,6 +12,12 @@ #include "fs_context.h" #include "cifsglob.h" +/* + * Used only by cifs.ko to ignore reparse points from files when client or + * server doesn't support FSCTL_GET_REPARSE_POINT. + */ +#define IO_REPARSE_TAG_INTERNAL ((__u32)~0U) + static inline dev_t reparse_nfs_mkdev(struct reparse_posix_data *buf) { u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer); @@ -78,10 +84,19 @@ static inline u32 reparse_mode_wsl_tag(mode_t mode) static inline bool reparse_inode_match(struct inode *inode, struct cifs_fattr *fattr) { + struct cifsInodeInfo *cinode = CIFS_I(inode); struct timespec64 ctime = inode_get_ctime(inode); - return (CIFS_I(inode)->cifsAttrs & ATTR_REPARSE) && - CIFS_I(inode)->reparse_tag == fattr->cf_cifstag && + /* + * Do not match reparse tags when client or server doesn't support + * FSCTL_GET_REPARSE_POINT. @fattr->cf_cifstag should contain correct + * reparse tag from query dir response but the client won't be able to + * read the reparse point data anyway. This spares us a revalidation. + */ + if (cinode->reparse_tag != IO_REPARSE_TAG_INTERNAL && + cinode->reparse_tag != fattr->cf_cifstag) + return false; + return (cinode->cifsAttrs & ATTR_REPARSE) && timespec64_equal(&ctime, &fattr->cf_ctime); } diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 5c02a12251c8..9f5bc41433c1 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -930,6 +930,8 @@ int smb2_query_path_info(const unsigned int xid, switch (rc) { case 0: + rc = parse_create_response(data, cifs_sb, &out_iov[0]); + break; case -EOPNOTSUPP: /* * BB TODO: When support for special files added to Samba @@ -948,7 +950,8 @@ int smb2_query_path_info(const unsigned int xid, cmds[num_cmds++] = SMB2_OP_GET_REPARSE; oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, - FILE_READ_ATTRIBUTES | FILE_READ_EA, + FILE_READ_ATTRIBUTES | + FILE_READ_EA | SYNCHRONIZE, FILE_OPEN, create_options | OPEN_REPARSE_POINT, ACL_NO_MODE); cifs_get_readable_path(tcon, full_path, &cfile); @@ -1256,7 +1259,8 @@ int smb2_query_reparse_point(const unsigned int xid, cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); cifs_get_readable_path(tcon, full_path, &cfile); - oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_READ_ATTRIBUTES, + oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, + FILE_READ_ATTRIBUTES | FILE_READ_EA | SYNCHRONIZE, FILE_OPEN, OPEN_REPARSE_POINT, ACL_NO_MODE); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, &in_iov, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 9a06b5594669..83facb54276a 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -82,6 +82,9 @@ int smb3_encryption_required(const struct cifs_tcon *tcon) if (tcon->seal && (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) return 1; + if (((global_secflags & CIFSSEC_MUST_SEAL) == CIFSSEC_MUST_SEAL) && + (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) + return 1; return 0; } diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index d74e829de51c..7bcc379014ca 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -406,7 +406,7 @@ static void smbd_post_send_credits(struct work_struct *work) else response = get_empty_queue_buffer(info); if (!response) { - /* now switch to emtpy packet queue */ + /* now switch to empty packet queue */ if (use_receive_queue) { use_receive_queue = 0; continue; @@ -618,7 +618,7 @@ out: /* * Test if FRWR (Fast Registration Work Requests) is supported on the device - * This implementation requries FRWR on RDMA read/write + * This implementation requires FRWR on RDMA read/write * return value: true if it is supported */ static bool frwr_is_supported(struct ib_device_attr *attrs) @@ -2177,7 +2177,7 @@ cleanup_entries: * MR available in the list. It may access the list while the * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock * as they never modify the same places. However, there may be several CPUs - * issueing I/O trying to get MR at the same time, mr_list_lock is used to + * issuing I/O trying to get MR at the same time, mr_list_lock is used to * protect this situation. */ static struct smbd_mr *get_mr(struct smbd_connection *info) @@ -2311,7 +2311,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, /* * There is no need for waiting for complemtion on ib_post_send * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution - * on the next ib_post_send when we actaully send I/O to remote peer + * on the next ib_post_send when we actually send I/O to remote peer */ rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); if (!rc) diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 6b3bdfb97211..0f0c10c7ada7 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -1388,7 +1388,7 @@ DECLARE_EVENT_CLASS(smb3_ioctl_class, __entry->command = command; ), TP_printk("xid=%u fid=0x%llx ioctl cmd=0x%x", - __entry->xid, __entry->fid, __entry->command) + __entry->xid, __entry->fid, __entry->command) ) #define DEFINE_SMB3_IOCTL_EVENT(name) \ @@ -1400,9 +1400,58 @@ DEFINE_EVENT(smb3_ioctl_class, smb3_##name, \ DEFINE_SMB3_IOCTL_EVENT(ioctl); +DECLARE_EVENT_CLASS(smb3_shutdown_class, + TP_PROTO(__u32 flags, + __u32 tid), + TP_ARGS(flags, tid), + TP_STRUCT__entry( + __field(__u32, flags) + __field(__u32, tid) + ), + TP_fast_assign( + __entry->flags = flags; + __entry->tid = tid; + ), + TP_printk("flags=0x%x tid=0x%x", + __entry->flags, __entry->tid) +) + +#define DEFINE_SMB3_SHUTDOWN_EVENT(name) \ +DEFINE_EVENT(smb3_shutdown_class, smb3_##name, \ + TP_PROTO(__u32 flags, \ + __u32 tid), \ + TP_ARGS(flags, tid)) + +DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_enter); +DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_done); +DECLARE_EVENT_CLASS(smb3_shutdown_err_class, + TP_PROTO(int rc, + __u32 flags, + __u32 tid), + TP_ARGS(rc, flags, tid), + TP_STRUCT__entry( + __field(int, rc) + __field(__u32, flags) + __field(__u32, tid) + ), + TP_fast_assign( + __entry->rc = rc; + __entry->flags = flags; + __entry->tid = tid; + ), + TP_printk("rc=%d flags=0x%x tid=0x%x", + __entry->rc, __entry->flags, __entry->tid) +) +#define DEFINE_SMB3_SHUTDOWN_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_shutdown_err_class, smb3_##name, \ + TP_PROTO(int rc, \ + __u32 flags, \ + __u32 tid), \ + TP_ARGS(rc, flags, tid)) +DEFINE_SMB3_SHUTDOWN_ERR_EVENT(shutdown_err); DECLARE_EVENT_CLASS(smb3_credit_class, TP_PROTO(__u64 currmid, diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index adfe0d058701..6e68aaf5bd20 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1289,7 +1289,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, out: /* * This will dequeue all mids. After this it is important that the - * demultiplex_thread will not process any of these mids any futher. + * demultiplex_thread will not process any of these mids any further. * This is prevented above by using a noop callback that will not * wake this thread except for the very last PDU. */ diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 5d88c184f0fc..01e99e98457d 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -112,7 +112,7 @@ static void release_ei(struct kref *ref) entry->release(entry->name, ei->data); } - call_rcu(&ei->rcu, free_ei_rcu); + call_srcu(&eventfs_srcu, &ei->rcu, free_ei_rcu); } static inline void put_ei(struct eventfs_inode *ei) @@ -736,7 +736,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { cleanup_ei(ei); - ei = NULL; + ei = ERR_PTR(-EBUSY); } return ei; } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 1028ab6d9a74..1748dff58c3b 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -42,7 +42,7 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) struct tracefs_inode *ti; unsigned long flags; - ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); + ti = alloc_inode_sb(sb, tracefs_inode_cachep, GFP_KERNEL); if (!ti) return NULL; @@ -53,15 +53,14 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) return &ti->vfs_inode; } -static void tracefs_free_inode_rcu(struct rcu_head *rcu) +static void tracefs_free_inode(struct inode *inode) { - struct tracefs_inode *ti; + struct tracefs_inode *ti = get_tracefs(inode); - ti = container_of(rcu, struct tracefs_inode, rcu); kmem_cache_free(tracefs_inode_cachep, ti); } -static void tracefs_free_inode(struct inode *inode) +static void tracefs_destroy_inode(struct inode *inode) { struct tracefs_inode *ti = get_tracefs(inode); unsigned long flags; @@ -69,8 +68,6 @@ static void tracefs_free_inode(struct inode *inode) spin_lock_irqsave(&tracefs_inode_lock, flags); list_del_rcu(&ti->list); spin_unlock_irqrestore(&tracefs_inode_lock, flags); - - call_rcu(&ti->rcu, tracefs_free_inode_rcu); } static ssize_t default_read_file(struct file *file, char __user *buf, @@ -437,6 +434,7 @@ static int tracefs_drop_inode(struct inode *inode) static const struct super_operations tracefs_super_operations = { .alloc_inode = tracefs_alloc_inode, .free_inode = tracefs_free_inode, + .destroy_inode = tracefs_destroy_inode, .drop_inode = tracefs_drop_inode, .statfs = simple_statfs, .show_options = tracefs_show_options, diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index f704d8348357..d83c2a25f288 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -10,10 +10,7 @@ enum { }; struct tracefs_inode { - union { - struct inode vfs_inode; - struct rcu_head rcu; - }; + struct inode vfs_inode; /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ struct list_head list; unsigned long flags; diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index cb035da3f990..fb05f44f6c75 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -56,7 +56,7 @@ typedef uint8_t xfs_dqtype_t; * And, of course, we also need to take into account the dquot log format item * used to describe each dquot. */ -#define XFS_DQUOT_LOGRES(mp) \ +#define XFS_DQUOT_LOGRES \ ((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6) #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 3dc8f785bf29..45aaf169806a 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -338,11 +338,11 @@ xfs_calc_write_reservation( blksz); t1 += adj; t3 += adj; - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + return XFS_DQUOT_LOGRES + max3(t1, t2, t3); } t4 = xfs_calc_refcountbt_reservation(mp, 1); - return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); + return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); } unsigned int @@ -410,11 +410,11 @@ xfs_calc_itruncate_reservation( xfs_refcountbt_block_count(mp, 4), blksz); - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + return XFS_DQUOT_LOGRES + max3(t1, t2, t3); } t4 = xfs_calc_refcountbt_reservation(mp, 2); - return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); + return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); } unsigned int @@ -466,7 +466,7 @@ STATIC uint xfs_calc_rename_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -577,7 +577,7 @@ STATIC uint xfs_calc_link_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -641,7 +641,7 @@ STATIC uint xfs_calc_remove_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -729,7 +729,7 @@ xfs_calc_icreate_reservation( struct xfs_mount *mp) { struct xfs_trans_resv *resp = M_RES(mp); - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; unsigned int t1, t2, t3 = 0; t1 = xfs_calc_icreate_resv_alloc(mp); @@ -747,7 +747,7 @@ STATIC uint xfs_calc_create_tmpfile_reservation( struct xfs_mount *mp) { - uint res = XFS_DQUOT_LOGRES(mp); + uint res = XFS_DQUOT_LOGRES; res += xfs_calc_icreate_resv_alloc(mp); return res + xfs_calc_iunlink_add_reservation(mp); @@ -829,7 +829,7 @@ STATIC uint xfs_calc_ifree_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_iunlink_remove_reservation(mp) + @@ -846,7 +846,7 @@ STATIC uint xfs_calc_ichange_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); @@ -955,7 +955,7 @@ STATIC uint xfs_calc_addafork_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + @@ -1003,7 +1003,7 @@ STATIC uint xfs_calc_attrsetm_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); @@ -1043,7 +1043,7 @@ STATIC uint xfs_calc_attrrm_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + max((xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)) + diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 0dbc484b182f..2f98d90d7fd6 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -696,7 +696,7 @@ xrep_agfl_init_header( * step. */ xagb_bitmap_init(&af.used_extents); - af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp), + af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp); xagb_bitmap_walk(agfl_extents, xrep_agfl_fill, &af); error = xagb_bitmap_disunion(agfl_extents, &af.used_extents); if (error) diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 733c410a2279..91e7b51ce068 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -799,7 +799,7 @@ xchk_parent_pptr( } if (pp->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - goto out_pp; + goto out_names; /* * Complain if the number of parent pointers doesn't match the link diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 92ef4cdc486e..c886d5d0eb02 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -959,18 +959,16 @@ TRACE_EVENT(xfile_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, 256) + __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char pathname[257]; char *path; __entry->ino = file_inode(xf->file)->i_ino; - memset(pathname, 0, sizeof(pathname)); - path = file_path(xf->file, pathname, sizeof(pathname) - 1); + path = file_path(xf->file, __entry->pathname, MAXNAMELEN); if (IS_ERR(path)) - path = "(unknown)"; - strncpy(__entry->pathname, path, sizeof(__entry->pathname)); + strncpy(__entry->pathname, "(unknown)", + sizeof(__entry->pathname)); ), TP_printk("xfino 0x%lx path '%s'", __entry->ino, diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 5c947e5ce8b8..7db386304875 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -139,7 +139,7 @@ xfs_attr_shortform_list( sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ - sbp->value = &sfe->nameval[sfe->namelen], + sbp->value = &sfe->nameval[sfe->namelen]; sbp->valuelen = sfe->valuelen; sbp->flags = sfe->flags; sbp->hash = xfs_attr_hashval(dp->i_mount, sfe->flags, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5646d300b286..180ce697305a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -4715,20 +4715,18 @@ TRACE_EVENT(xmbuf_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, 256) + __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char pathname[257]; char *path; struct file *file = btp->bt_file; __entry->dev = btp->bt_mount->m_super->s_dev; __entry->ino = file_inode(file)->i_ino; - memset(pathname, 0, sizeof(pathname)); - path = file_path(file, pathname, sizeof(pathname) - 1); + path = file_path(file, __entry->pathname, MAXNAMELEN); if (IS_ERR(path)) - path = "(unknown)"; - strncpy(__entry->pathname, path, sizeof(__entry->pathname)); + strncpy(__entry->pathname, "(unknown)", + sizeof(__entry->pathname)); ), TP_printk("dev %d:%d xmino 0x%lx path '%s'", MAJOR(__entry->dev), MINOR(__entry->dev), diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index ab3d22f662f2..eaf849260bd6 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -110,7 +110,24 @@ xfs_attr_change( args->whichfork = XFS_ATTR_FORK; xfs_attr_sethash(args); - return xfs_attr_set(args, op, args->attr_filter & XFS_ATTR_ROOT); + /* + * Some xattrs must be resistant to allocation failure at ENOSPC, e.g. + * creating an inode with ACLs or security attributes requires the + * allocation of the xattr holding that information to succeed. Hence + * we allow xattrs in the VFS TRUSTED, SYSTEM, POSIX_ACL and SECURITY + * (LSM xattr) namespaces to dip into the reserve block pool to allow + * manipulation of these xattrs when at ENOSPC. These VFS xattr + * namespaces translate to the XFS_ATTR_ROOT and XFS_ATTR_SECURE on-disk + * namespaces. + * + * For most of these cases, these special xattrs will fit in the inode + * itself and so consume no extra space or only require temporary extra + * space while an overwrite is being made. Hence the use of the reserved + * pool is largely to avoid the worst case reservation from preventing + * the xattr from being created at ENOSPC. + */ + return xfs_attr_set(args, op, + args->attr_filter & (XFS_ATTR_ROOT | XFS_ATTR_SECURE)); } |
