diff options
author | Jakub Kicinski <kuba@kernel.org> | 2024-02-08 15:20:37 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-02-08 15:30:33 -0800 |
commit | 3be042cf46feeedf664152d063376b5c17026d1d (patch) | |
tree | 9733bd92f1e7ff996c9f92320ca999bed5553283 /fs | |
parent | b6b614558ed5b2ca50edacc0f2fbf5f52158c86c (diff) | |
parent | 1f719a2f3fa67665578c759ac34fd3d3690c1a20 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
No conflicts.
Adjacent changes:
drivers/net/ethernet/stmicro/stmmac/common.h
38cc3c6dcc09 ("net: stmmac: protect updates of 64-bit statistics counters")
fd5a6a71313e ("net: stmmac: est: Per Tx-queue error count for HLBF")
c5c3e1bfc9e0 ("net: stmmac: Offload queueMaxSDU from tc-taprio")
drivers/net/wireless/microchip/wilc1000/netdev.c
c9013880284d ("wifi: fill in MODULE_DESCRIPTION()s for wilc1000")
328efda22af8 ("wifi: wilc1000: do not realloc workqueue everytime an interface is added")
net/unix/garbage.c
11498715f266 ("af_unix: Remove io_uring code for GC.")
1279f9d9dec2 ("af_unix: Call kfree_skb() for dead unix_(sk)->oob_skb in GC.")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'fs')
36 files changed, 510 insertions, 778 deletions
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 3a4c24c28e7f..3dc8630ff9fe 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -455,6 +455,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, if (IS_ERR(victim)) return PTR_ERR(victim); + dir = d_inode(path.dentry); if (victim->d_sb->s_fs_info != c) { ret = -EXDEV; goto err; @@ -463,14 +464,13 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, ret = -ENOENT; goto err; } - dir = d_inode(path.dentry); ret = __bch2_unlink(dir, victim, true); if (!ret) { fsnotify_rmdir(dir, victim); d_delete(victim); } - inode_unlock(dir); err: + inode_unlock(dir); dput(victim); path_put(&path); return ret; diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h index b2be565bb8f2..64df11ab422b 100644 --- a/fs/bcachefs/mean_and_variance.h +++ b/fs/bcachefs/mean_and_variance.h @@ -17,7 +17,7 @@ * Rust and rustc has issues with u128. */ -#if defined(__SIZEOF_INT128__) && defined(__KERNEL__) +#if defined(__SIZEOF_INT128__) && defined(__KERNEL__) && !defined(CONFIG_PARISC) typedef struct { unsigned __int128 v; diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index b1c867aa2b58..9220d7de10db 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -53,9 +53,9 @@ int bch2_run_thread_with_file(struct thread_with_file *thr, if (ret) goto err; - fd_install(fd, file); get_task_struct(thr->task); wake_up_process(thr->task); + fd_install(fd, file); return fd; err: if (fd >= 0) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 56b815fd9fc6..231003b405ef 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -418,14 +418,15 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats, bch2_quantiles_update(&stats->quantiles, duration); } - if (time_after64(end, stats->last_event)) { + if (stats->last_event && time_after64(end, stats->last_event)) { freq = end - stats->last_event; mean_and_variance_update(&stats->freq_stats, freq); mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq); stats->max_freq = max(stats->max_freq, freq); stats->min_freq = min(stats->min_freq, freq); - stats->last_event = end; } + + stats->last_event = end; } static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c6907d533fe8..e71ef97d0a7c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1336,8 +1336,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { - /* Shouldn't get preallocated anon_dev for cached roots */ - ASSERT(!anon_dev); + /* + * Some other caller may have read out the newly inserted + * subvolume already (for things like backref walk etc). Not + * that common but still possible. In that case, we just need + * to free the anon_dev. + */ + if (unlikely(anon_dev)) { + free_anon_bdev(anon_dev); + anon_dev = 0; + } + if (check_ref && btrfs_root_refs(&root->root_item) == 0) { btrfs_put_root(root); return ERR_PTR(-ENOENT); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index dfed9dd9c2d7..ac3316e0d11c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3815,6 +3815,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) goto out; } + if (sa->create && is_fstree(sa->qgroupid)) { + ret = -EINVAL; + goto out; + } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 63b426cc7798..5470e1cdf10c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1736,6 +1736,15 @@ out: return ret; } +static bool qgroup_has_usage(struct btrfs_qgroup *qgroup) +{ + return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 || + qgroup->excl > 0 || qgroup->excl_cmpr > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0); +} + int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -1755,6 +1764,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) goto out; } + if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) { + ret = -EBUSY; + goto out; + } + /* Check if there are no children of this qgroup */ if (!list_empty(&qgroup->members)) { ret = -EBUSY; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 2d7519a6ce72..7902298c1f25 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -8111,7 +8111,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) } if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { - ret = -EINVAL; + ret = -EOPNOTSUPP; goto out; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a5d784872303..023571f8dd1b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -252,8 +252,10 @@ struct ext4_allocation_request { #define EXT4_MAP_MAPPED BIT(BH_Mapped) #define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten) #define EXT4_MAP_BOUNDARY BIT(BH_Boundary) +#define EXT4_MAP_DELAYED BIT(BH_Delay) #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ - EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY) + EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ + EXT4_MAP_DELAYED) struct ext4_map_blocks { ext4_fsblk_t m_pblk; @@ -2912,10 +2914,10 @@ extern const struct seq_operations ext4_mb_seq_groups_ops; extern const struct seq_operations ext4_mb_seq_structs_summary_ops; extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset); extern int ext4_mb_init(struct super_block *); -extern int ext4_mb_release(struct super_block *); +extern void ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); -extern void ext4_discard_preallocations(struct inode *, unsigned int); +extern void ext4_discard_preallocations(struct inode *); extern int __init ext4_init_mballoc(void); extern void ext4_exit_mballoc(void); extern ext4_group_t ext4_mb_prefetch(struct super_block *sb, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 01299b55a567..7669d154c05e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; @@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode, /* - * ext4_ext_determine_hole - determine hole around given block + * ext4_ext_find_hole - find hole around given block according to the given path * @inode: inode we lookup in * @path: path in extent tree to @lblk * @lblk: pointer to logical block around which we want to determine hole @@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode, * The function returns the length of a hole starting at @lblk. We update @lblk * to the beginning of the hole if we managed to find it. */ -static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t *lblk) +static ext4_lblk_t ext4_ext_find_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t *lblk) { int depth = ext_depth(inode); struct ext4_extent *ex; @@ -2271,30 +2271,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, } /* - * ext4_ext_put_gap_in_cache: - * calculate boundaries of the gap that the requested block fits into - * and cache this gap - */ -static void -ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start, - ext4_lblk_t hole_len) -{ - struct extent_status es; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, - hole_start + hole_len - 1, &es); - if (es.es_len) { - /* There's delayed extent containing lblock? */ - if (es.es_lblk <= hole_start) - return; - hole_len = min(es.es_lblk - hole_start, hole_len); - } - ext_debug(inode, " -> %u:%u\n", hole_start, hole_len); - ext4_es_insert_extent(inode, hole_start, hole_len, ~0, - EXTENT_STATUS_HOLE); -} - -/* * ext4_ext_rm_idx: * removes index from the index block. */ @@ -4062,6 +4038,72 @@ static int get_implied_cluster_alloc(struct super_block *sb, return 0; } +/* + * Determine hole length around the given logical block, first try to + * locate and expand the hole from the given @path, and then adjust it + * if it's partially or completely converted to delayed extents, insert + * it into the extent cache tree if it's indeed a hole, finally return + * the length of the determined extent. + */ +static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t lblk) +{ + ext4_lblk_t hole_start, len; + struct extent_status es; + + hole_start = lblk; + len = ext4_ext_find_hole(inode, path, &hole_start); +again: + ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, + hole_start + len - 1, &es); + if (!es.es_len) + goto insert_hole; + + /* + * There's a delalloc extent in the hole, handle it if the delalloc + * extent is in front of, behind and straddle the queried range. + */ + if (lblk >= es.es_lblk + es.es_len) { + /* + * The delalloc extent is in front of the queried range, + * find again from the queried start block. + */ + len -= lblk - hole_start; + hole_start = lblk; + goto again; + } else if (in_range(lblk, es.es_lblk, es.es_len)) { + /* + * The delalloc extent containing lblk, it must have been + * added after ext4_map_blocks() checked the extent status + * tree so we are not holding i_rwsem and delalloc info is + * only stabilized by i_data_sem we are going to release + * soon. Don't modify the extent status tree and report + * extent as a hole, just adjust the length to the delalloc + * extent's after lblk. + */ + len = es.es_lblk + es.es_len - lblk; + return len; + } else { + /* + * The delalloc extent is partially or completely behind + * the queried range, update hole length until the + * beginning of the delalloc extent. + */ + len = min(es.es_lblk - hole_start, len); + } + +insert_hole: + /* Put just found gap into cache to speed up subsequent requests */ + ext_debug(inode, " -> %u:%u\n", hole_start, len); + ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE); + + /* Update hole_len to reflect hole size after lblk */ + if (hole_start != lblk) + len -= lblk - hole_start; + + return len; +} /* * Block allocation/map/preallocation routine for extents based files @@ -4179,22 +4221,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * we couldn't try to create block if create flag is zero */ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { - ext4_lblk_t hole_start, hole_len; + ext4_lblk_t len; - hole_start = map->m_lblk; - hole_len = ext4_ext_determine_hole(inode, path, &hole_start); - /* - * put just found gap into cache to speed up - * subsequent requests - */ - ext4_ext_put_gap_in_cache(inode, hole_start, hole_len); + len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk); - /* Update hole_len to reflect hole size after map->m_lblk */ - if (hole_start != map->m_lblk) - hole_len -= map->m_lblk - hole_start; map->m_pblk = 0; - map->m_len = min_t(unsigned int, map->m_len, hole_len); - + map->m_len = min_t(unsigned int, map->m_len, len); goto out; } @@ -4313,7 +4345,7 @@ got_allocated_blocks: * not a good idea to call discard here directly, * but otherwise we'd need to call it every free(). */ - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE; ext4_free_blocks(handle, inode, NULL, newblock, @@ -5357,7 +5389,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start); ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); @@ -5365,7 +5397,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ret = ext4_ext_shift_extents(inode, handle, punch_stop, punch_stop - punch_start, SHIFT_LEFT); @@ -5497,7 +5529,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) goto out_stop; down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); path = ext4_find_extent(inode, offset_lblk, NULL, 0); if (IS_ERR(path)) { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 6aa15dafc677..54d6ff22585c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -174,7 +174,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp) (atomic_read(&inode->i_writecount) == 1) && !EXT4_I(inode)->i_reserved_data_blocks) { down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); } if (is_dx(inode) && filp->private_data) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index a9f3716119d3..d8ca7f64f952 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -714,7 +714,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode, * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5af1b0b8680e..2ccf3b5e3a7c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -371,7 +371,7 @@ void ext4_da_update_reserve_space(struct inode *inode, */ if ((ei->i_reserved_data_blocks == 0) && !inode_is_open_for_write(inode)) - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); } static int __check_block_validity(struct inode *inode, const char *func, @@ -515,6 +515,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, map->m_len = retval; } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { map->m_pblk = 0; + map->m_flags |= ext4_es_is_delayed(&es) ? + EXT4_MAP_DELAYED : 0; retval = es.es_len - (map->m_lblk - es.es_lblk); if (retval > map->m_len) retval = map->m_len; @@ -1703,11 +1705,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { - if (ext4_es_is_hole(&es)) { - retval = 0; - down_read(&EXT4_I(inode)->i_data_sem); + if (ext4_es_is_hole(&es)) goto add_delayed; - } /* * Delayed extent could be allocated by fallocate. @@ -1749,26 +1748,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, retval = ext4_ext_map_blocks(NULL, inode, map, 0); else retval = ext4_ind_map_blocks(NULL, inode, map, 0); - -add_delayed: - if (retval == 0) { - int ret; - - /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? - */ - - ret = ext4_insert_delayed_block(inode, map->m_lblk); - if (ret != 0) { - retval = ret; - goto out_unlock; - } - - map_bh(bh, inode->i_sb, invalid_block); - set_buffer_new(bh); - set_buffer_delay(bh); - } else if (retval > 0) { + if (retval < 0) { + up_read(&EXT4_I(inode)->i_data_sem); + return retval; + } + if (retval > 0) { unsigned int status; if (unlikely(retval != map->m_len)) { @@ -1783,11 +1767,21 @@ add_delayed: EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk, status); + up_read(&EXT4_I(inode)->i_data_sem); + return retval; } + up_read(&EXT4_I(inode)->i_data_sem); -out_unlock: - up_read((&EXT4_I(inode)->i_data_sem)); +add_delayed: + down_write(&EXT4_I(inode)->i_data_sem); + retval = ext4_insert_delayed_block(inode, map->m_lblk); + up_write(&EXT4_I(inode)->i_data_sem); + if (retval) + return retval; + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); return retval; } @@ -3268,6 +3262,9 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, iomap->addr = (u64) map->m_pblk << blkbits; if (flags & IOMAP_DAX) iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; + } else if (map->m_flags & EXT4_MAP_DELAYED) { + iomap->type = IOMAP_DELALLOC; + iomap->addr = IOMAP_NULL_ADDR; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; @@ -3430,35 +3427,11 @@ const struct iomap_ops ext4_iomap_overwrite_ops = { .iomap_end = ext4_iomap_end, }; -static bool ext4_iomap_is_delalloc(struct inode *inode, - struct ext4_map_blocks *map) -{ - struct extent_status es; - ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, - map->m_lblk, end, &es); - - if (!es.es_len || es.es_lblk > end) - return false; - - if (es.es_lblk > map->m_lblk) { - map->m_len = es.es_lblk - map->m_lblk; - return false; - } - - offset = map->m_lblk - es.es_lblk; - map->m_len = es.es_len - offset; - - return true; -} - static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { int ret; - bool delalloc = false; struct ext4_map_blocks map; u8 blkbits = inode->i_blkbits; @@ -3499,13 +3472,8 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret < 0) return ret; - if (ret == 0) - delalloc = ext4_iomap_is_delalloc(inode, &map); - set_iomap: ext4_set_iomap(inode, iomap, &map, offset, length, flags); - if (delalloc && iomap->type == IOMAP_HOLE) - iomap->type = IOMAP_DELALLOC; return 0; } @@ -4015,12 +3983,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) /* If there are blocks to remove, do it */ if (stop_block > first_block) { + ext4_lblk_t hole_len = stop_block - first_block; down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); - ext4_es_remove_extent(inode, first_block, - stop_block - first_block); + ext4_es_remove_extent(inode, first_block, hole_len); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) ret = ext4_ext_remove_space(inode, first_block, @@ -4029,6 +3997,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) ret = ext4_ind_remove_space(handle, inode, first_block, stop_block); + ext4_es_insert_extent(inode, first_block, hole_len, ~0, + EXTENT_STATUS_HOLE); up_write(&EXT4_I(inode)->i_data_sem); } ext4_fc_track_range(handle, inode, first_block, stop_block); @@ -4170,7 +4140,7 @@ int ext4_truncate(struct inode *inode) down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) err = ext4_ext_truncate(handle, inode); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index aa6be510eb8f..7160a71044c8 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -467,7 +467,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_reset_inode_seed(inode); ext4_reset_inode_seed(inode_bl); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); err = ext4_mark_inode_dirty(handle, inode); if (err < 0) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f44f668e407f..e4f7cf9d89c4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -564,14 +564,14 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(EXT4_SB(sb), first + i); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, inode ? inode->i_ino : 0, blocknr, "freeing block already freed " "(bit %u)", first + i); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); } mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); } @@ -677,7 +677,7 @@ do { \ } \ } while (0) -static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, +static void __mb_check_buddy(struct ext4_buddy *e4b, char *file, const char *function, int line) { struct super_block *sb = e4b->bd_sb; @@ -696,7 +696,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, void *buddy2; if (e4b->bd_info->bb_check_counter++ % 10) - return 0; + return; while (order > 1) { buddy = mb_find_buddy(e4b, order, &max); @@ -758,7 +758,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, grp = ext4_get_group_info(sb, e4b->bd_group); if (!grp) - return NULL; + return; list_for_each(cur, &grp->bb_prealloc_list) { ext4_group_t groupnr; struct ext4_prealloc_space *pa; @@ -768,7 +768,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, for (i = 0; i < pa->pa_len; i++) MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); } - return 0; } #undef MB_CHECK_ASSERT #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ @@ -842,7 +841,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) struct ext4_sb_info *sbi = EXT4_SB(sb); int new_order; - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0) + if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0) return; new_order = mb_avg_fragment_size_order(sb, @@ -871,7 +870,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) * cr level needs an update. */ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *iter; @@ -945,7 +944,7 @@ ext4_mb_find_good_group_avg_frag_lists(struct ext4_allocation_context *ac, int o * order. Updates *new_cr if cr level needs an update. */ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *grp = NULL; @@ -990,7 +989,7 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context * * much and fall to CR_GOAL_LEN_SLOW in that case. */ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *grp = NULL; @@ -1125,11 +1124,11 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac, } if (*new_cr == CR_POWER2_ALIGNED) { - ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group); } else if (*new_cr == CR_GOAL_LEN_FAST) { - ext4_mb_choose_next_group_goal_fast(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_goal_fast(ac, new_cr, group); } else if (*new_cr == CR_BEST_AVAIL_LEN) { - ext4_mb_choose_next_group_best_avail(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_best_avail(ac, new_cr, group); } else { /* * TODO: For CR=2, we can arrange groups in an rb tree sorted by @@ -1233,6 +1232,24 @@ void ext4_mb_generate_buddy(struct super_block *sb, atomic64_add(period, &sbi->s_mb_generation_time); } +static void mb_regenerate_buddy(struct ext4_buddy *e4b) +{ + int count; + int order = 1; + void *buddy; + + while ((buddy = mb_find_buddy(e4b, order++, &count))) + mb_set_bits(buddy, 0, count); + + e4b->bd_info->bb_fragments = 0; + memset(e4b->bd_info->bb_counters, 0, + sizeof(*e4b->bd_info->bb_counters) * + (e4b->bd_sb->s_blocksize_bits + 2)); + + ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, + e4b->bd_bitmap, e4b->bd_group, e4b->bd_info); +} + /* The buddy information is attached the buddy cache inode * for convenience. The information regarding each group * is loaded via ext4_mb_load_buddy. The information involve @@ -1891,11 +1908,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, mb_check_buddy(e4b); mb_free_blocks_double(inode, e4b, first, count); - this_cpu_inc(discard_pa_seq); - e4b->bd_info->bb_free += count; - if (first < e4b->bd_info->bb_first_free) - e4b->bd_info->bb_first_free = first; - /* access memory sequentially: check left neighbour, * clear range and then check right neighbour */ @@ -1909,21 +1921,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t blocknr; + /* + * Fastcommit replay can free already freed blocks which + * corrupts allocation info. Regenerate it. + */ + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + mb_regenerate_buddy(e4b); + goto check; + } + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); - if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { - ext4_grp_locked_error(sb, e4b->bd_group, - inode ? inode->i_ino : 0, - blocknr, - "freeing already freed block (bit %u); block bitmap corrupt.", - block); - ext4_mark_group_bitmap_corrupted( - sb, e4b->bd_group, + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); - } - goto done; + ext4_grp_locked_error(sb, e4b->bd_group, + inode ? inode->i_ino : 0, blocknr, + "freeing already freed block (bit %u); block bitmap corrupt.", + block); + return; } + this_cpu_inc(discard_pa_seq); + e4b->bd_info->bb_free += count; + if (first < e4b->bd_info->bb_first_free) + e4b->bd_info->bb_first_free = first; + /* let's maintain fragments counter */ if (left_is_free && right_is_free) e4b->bd_info->bb_fragments--; @@ -1948,9 +1970,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, if (first <= last) mb_buddy_mark_free(e4b, first >> 1, last >> 1); -done: mb_set_largest_free_order(sb, e4b->bd_info); mb_update_avg_fragment_size(sb, e4b->bd_info); +check: mb_check_buddy(e4b); } @@ -2276,6 +2298,9 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac, return; ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { @@ -2283,6 +2308,7 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac, ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); } @@ -2309,12 +2335,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (err) return err; - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { - ext4_mb_unload_buddy(e4b); - return 0; - } - ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); ex.fe_logical = 0xDEADFA11; /* debug value */ @@ -2347,6 +2371,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); @@ -2380,12 +2405,12 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, k = mb_find_next_zero_bit(buddy, max, 0); if (k >= max) { + ext4_mark_group_bitmap_corrupted(ac->ac_sb, + e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0, "%d free clusters of order %d. But found 0", grp->bb_counters[i], i); - ext4_mark_group_bitmap_corrupted(ac->ac_sb, - e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); break; } ac->ac_found++; @@ -2436,12 +2461,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, * free blocks even though group info says we * have free blocks */ + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " "group info. But bitmap says 0", free); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); break; } @@ -2467,12 +2492,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, if (WARN_ON(ex.fe_len <= 0)) break; if (free < ex.fe_len) { + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " "group info. But got %d blocks", free, ex.fe_len); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); /* * The number of free blocks differs. This mostly * indicate that the bitmap is corrupt. So exit @@ -3725,7 +3750,7 @@ static int ext4_mb_cleanup_pa(struct ext4_group_info *grp) return count; } -int ext4_mb_release(struct super_block *sb) +void ext4_mb_release(struct super_block *sb) { ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; @@ -3801,8 +3826,6 @@ int ext4_mb_release(struct super_block *sb) } free_percpu(sbi->s_locality_groups); - - return 0; } static inline int ext4_issue_discard(struct super_block *sb, @@ -5284,7 +5307,7 @@ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac) * the caller MUST hold group/inode locks. * TODO: optimize the case when there are no in-core structures yet */ -static noinline_for_stack int +static noinline_for_stack void ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, struct ext4_prealloc_space *pa) { @@ -5334,11 +5357,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, */ } atomic_add(free, &sbi->s_mb_discarded); - - return 0; } -static noinline_for_stack int +static noinline_for_stack void ext4_mb_release_group_pa(struct ext4_buddy *e4b, struct ext4_prealloc_space *pa) { @@ -5352,13 +5373,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) { ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu", e4b->bd_group, group, pa->pa_pstart); - return 0; + return; } mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); - - return 0; } /* @@ -5479,7 +5498,7 @@ out_dbg: * * FIXME!! Make sure it is valid at all the call sites */ -void ext4_discard_preallocations(struct inode *inode, unsigned int needed) +void ext4_discard_preallocations(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct super_block *sb = inode->i_sb; @@ -5491,9 +5510,8 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed) struct rb_node *iter; int err; - if (!S_ISREG(inode->i_mode)) { + if (!S_ISREG(inode->i_mode)) return; - } if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) return; @@ -5501,15 +5519,12 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed) mb_debug(sb, "discard preallocation for inode %lu\n", inode->i_ino); trace_ext4_discard_preallocations(inode, - atomic_read(&ei->i_prealloc_active), needed); - - if (needed == 0) - needed = UINT_MAX; + atomic_read(&ei->i_prealloc_active)); repeat: /* first, collect all pa's in the inode */ write_lock(&ei->i_prealloc_lock); - for (iter = rb_first(&ei->i_prealloc_node); iter && needed; + for (iter = rb_first(&ei->i_prealloc_node); iter; iter = rb_next(iter)) { pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); @@ -5533,7 +5548,6 @@ repeat: spin_unlock(&pa->pa_lock); rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node); list_add(&pa->u.pa_tmp_list, &list); - needed--; continue; } @@ -5943,7 +5957,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) /* * release all resource we used in allocation */ -static int ext4_mb_release_context(struct ext4_allocation_context *ac) +static void ext4_mb_release_context(struct ext4_allocation_context *ac) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_prealloc_space *pa = ac->ac_pa; @@ -5980,7 +5994,6 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) mutex_unlock(&ac->ac_lg->lg_mutex); ext4_mb_collect_stats(ac); - return 0; } static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) @@ -6761,6 +6774,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) bool set_trimmed = false; void *bitmap; + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + return 0; + last = ext4_last_grp_cluster(sb, e4b->bd_group); bitmap = e4b->bd_bitmap; if (start == 0 && max >= last) diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index d7aeb5da7d86..56938532b4ce 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -192,7 +192,6 @@ struct ext4_allocation_context { */ ext4_grpblk_t ac_orig_goal_len; - __u32 ac_groups_considered; __u32 ac_flags; /* allocation hints */ __u16 ac_groups_scanned; __u16 ac_groups_linear_remaining; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 3aa57376d9c2..7cd4afa4de1d 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -618,6 +618,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, goto out; o_end = o_start + len; + *moved_len = 0; while (o_start < o_end) { struct ext4_extent *ex; ext4_lblk_t cur_blk, next_blk; @@ -672,7 +673,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, */ ext4_double_up_write_data_sem(orig_inode, donor_inode); /* Swap original branches with new branches */ - move_extent_per_page(o_filp, donor_inode, + *moved_len += move_extent_per_page(o_filp, donor_inode, orig_page_index, donor_page_index, offset_in_page, cur_len, unwritten, &ret); @@ -682,14 +683,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, o_start += cur_len; d_start += cur_len; } - *moved_len = o_start - orig_blk; - if (*moved_len > len) - *moved_len = len; out: if (*moved_len) { - ext4_discard_preallocations(orig_inode, 0); - ext4_discard_preallocations(donor_inode, 0); + ext4_discard_preallocations(orig_inode); + ext4_discard_preallocations(donor_inode); } ext4_free_ext_path(path); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dcba0f85dfe2..0f931d0c227d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1525,7 +1525,7 @@ void ext4_clear_inode(struct inode *inode) ext4_fc_del(inode); invalidate_inode_buffers(inode); clear_inode(inode); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); dquot_drop(inode); if (EXT4_I(inode)->jinode) { diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 177f1f41f225..2e215e8c3c88 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -32,25 +32,21 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) { - struct dentry *parent = NULL; + struct dentry *parent; struct gfs2_sbd *sdp; struct gfs2_inode *dip; - struct inode *dinode, *inode; + struct inode *inode; struct gfs2_holder d_gh; struct gfs2_inode *ip = NULL; int error, valid = 0; int had_lock = 0; - if (flags & LOOKUP_RCU) { - dinode = d_inode_rcu(READ_ONCE(dentry->d_parent)); - if (!dinode) - return -ECHILD; - } else { - parent = dget_parent(dentry); - dinode = d_inode(parent); - } - sdp = GFS2_SB(dinode); - dip = GFS2_I(dinode); + if (flags & LOOKUP_RCU) + return -ECHILD; + + parent = dget_parent(dentry); + sdp = GFS2_SB(d_inode(parent)); + dip = GFS2_I(d_inode(parent)); inode = d_inode(dentry); if (inode) { @@ -66,8 +62,7 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); if (!had_lock) { - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, - flags & LOOKUP_RCU ? GL_NOBLOCK : 0, &d_gh); + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); if (error) goto out; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6bfc9383b7b8..1b95db2c3aac 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1882,10 +1882,10 @@ int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, WARN_ON_ONCE(!may_not_block); return -ECHILD; } - if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - int noblock = may_not_block ? GL_NOBLOCK : 0; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_ANY | noblock, &i_gh); + if (gfs2_glock_is_locked_by_me(gl) == NULL) { + if (may_not_block) + return -ECHILD; + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6dc6340e2852..7d6c657e0409 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4945,10 +4945,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); - spin_unlock(&fp->fi_lock); return false; } @@ -5557,12 +5555,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (status) goto out_unlock; + status = -EAGAIN; + if (fp->fi_had_conflict) + goto out_unlock; + spin_lock(&state_lock); spin_lock(&fp->fi_lock); - if (fp->fi_had_conflict) - status = -EAGAIN; - else - status = hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 16befff4cbb4..c86a72c9d9ec 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -87,7 +87,7 @@ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ -#define MAX_COMPOUND 5 +#define MAX_COMPOUND 7 /* * Default number of credits to keep available for SMB3. @@ -1032,6 +1032,8 @@ struct cifs_chan { __u8 signkey[SMB3_SIGN_KEY_SIZE]; }; +#define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) + /* * Session structure. One of these for each uid session with a particular host */ @@ -1064,6 +1066,7 @@ struct cifs_ses { enum securityEnum sectype; /* what security flavor was specified? */ bool sign; /* is signing required? */ bool domainAuto:1; + unsigned int flags; __u16 session_flags; __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE]; diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index cde81042bebd..ed4bd88dd528 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -75,6 +75,10 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, { unsigned int i; + /* if the channel is waiting for termination */ + if (server->terminate) + return CIFS_INVAL_CHAN_INDEX; + for (i = 0; i < ses->chan_count; i++) { if (ses->chans[i].server == server) return i; @@ -269,6 +273,8 @@ int cifs_try_adding_channels(struct cifs_ses *ses) &iface->sockaddr, rc); kref_put(&iface->refcount, release_iface); + /* failure to add chan should increase weight */ + iface->weight_fulfilled++; continue; } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 86f6f35b7f32..c58fa44dd6b0 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -178,6 +178,7 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses, } ses->chans[chan_index].server = NULL; + server->terminate = true; spin_unlock(&ses->chan_lock); /* @@ -188,7 +189,6 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses, */ cifs_put_tcp_session(server, from_reconnect); - server->terminate = true; cifs_signal_cifsd_for_reconnect(server, false); /* mark primary server as needing reconnect */ @@ -399,6 +399,15 @@ skip_sess_setup: goto out; } + spin_lock(&ses->ses_lock); + if (ses->flags & CIFS_SES_FLAG_SCALE_CHANNELS) { + spin_unlock(&ses->ses_lock); + mutex_unlock(&ses->session_mutex); + goto skip_add_channels; + } + ses->flags |= CIFS_SES_FLAG_SCALE_CHANNELS; + spin_unlock(&ses->ses_lock); + if (!rc && (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { mutex_unlock(&ses->session_mutex); @@ -410,7 +419,7 @@ skip_sess_setup: rc = SMB3_request_interfaces(xid, tcon, false); free_xid(xid); - if (rc == -EOPNOTSUPP) { + if (rc == -EOPNOTSUPP && ses->chan_count > 1) { /* * some servers like Azure SMB server do not advertise * that multichannel has been disabled with server @@ -428,17 +437,22 @@ skip_sess_setup: if (ses->chan_max > ses->chan_count && ses->iface_count && !SERVER_IS_CHAN(server)) { - if (ses->chan_count == 1) + if (ses->chan_count == 1) { cifs_server_dbg(VFS, "supports multichannel now\n"); + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + } cifs_try_adding_channels(ses); - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); } } else { mutex_unlock(&ses->session_mutex); } + skip_add_channels: + spin_lock(&ses->ses_lock); + ses->flags &= ~CIFS_SES_FLAG_SCALE_CHANNELS; + spin_unlock(&ses->ses_lock); if (smb2_command != SMB2_INTERNAL_CMD) mod_delayed_work(cifsiod_wq, &server->reconnect, 0); diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index e00278fcfa4f..994d70193432 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -435,8 +435,8 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (!(flags & CIFS_TRANSFORM_REQ)) return __smb_send_rqst(server, num_rqst, rqst); - if (num_rqst > MAX_COMPOUND - 1) - return -ENOMEM; + if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) + return -EIO; if (!server->ops->init_transform_rq) { cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 1c3dd0ad4660..110e8a272189 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -62,6 +62,46 @@ enum { #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) +/* + * eventfs_inode reference count management. + * + * NOTE! We count only references from dentries, in the + * form 'dentry->d_fsdata'. There are also references from + * directory inodes ('ti->private'), but the dentry reference + * count is always a superset of the inode reference count. + */ +static void release_ei(struct kref *ref) +{ + struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); + + WARN_ON_ONCE(!ei->is_freed); + + kfree(ei->entry_attrs); + kfree_const(ei->name); + kfree_rcu(ei, rcu); +} + +static inline void put_ei(struct eventfs_inode *ei) +{ + if (ei) + kref_put(&ei->kref, release_ei); +} + +static inline void free_ei(struct eventfs_inode *ei) +{ + if (ei) { + ei->is_freed = 1; + put_ei(ei); + } +} + +static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) +{ + if (ei) + kref_get(&ei->kref); + return ei; +} + static struct dentry *eventfs_root_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); @@ -156,33 +196,30 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, return ret; } -static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry) +static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb) { - struct inode *inode; + struct inode *root; /* Only update if the "events" was on the top level */ if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL)) return; /* Get the tracefs root inode. */ - inode = d_inode(dentry->d_sb->s_root); - ei->attr.uid = inode->i_uid; - ei->attr.gid = inode->i_gid; + root = d_inode(sb->s_root); + ei->attr.uid = root->i_uid; + ei->attr.gid = root->i_gid; } static void set_top_events_ownership(struct inode *inode) { struct tracefs_inode *ti = get_tracefs(inode); struct eventfs_inode *ei = ti->private; - struct dentry *dentry; /* The top events directory doesn't get automatically updated */ if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL)) return; - dentry = ei->dentry; - - update_top_events_attr(ei, dentry); + update_top_events_attr(ei, inode->i_sb); if (!(ei->attr.mode & EVENTFS_SAVE_UID)) inode->i_uid = ei->attr.uid; @@ -233,10 +270,11 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) { struct eventfs_inode *ei; - mutex_lock(&eventfs_mutex); do { - /* The parent always has an ei, except for events itself */ - ei = dentry->d_parent->d_fsdata; + // The parent is stable because we do not do renames + dentry = dentry->d_parent; + // ... and directories always have d_fsdata + ei = dentry->d_fsdata; /* * If the ei is being freed, the ownership of the children @@ -246,12 +284,10 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) ei = NULL; break; } - - dentry = ei->dentry; + // Walk upwards until you find the events inode } while (!ei->is_events); - mutex_unlock(&eventfs_mutex); - update_top_events_attr(ei, dentry); + update_top_events_attr(ei, dentry->d_sb); return ei; } @@ -282,11 +318,10 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, } /** - * create_file - create a file in the tracefs filesystem - * @name: the name of the file to create. + * lookup_file - look up a file in the tracefs filesystem + * @dentry: the dentry to look up * @mode: the permission that the file should have. * @attr: saved attributes changed by user - * @parent: parent dentry for this file. * @data: something that the caller will want to get to later on. * @fop: struct file_operations that should be used for this file. * @@ -294,30 +329,25 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, * directory. The inode.i_private pointer will point to @data in the open() * call. */ -static struct dentry *create_file(const char *name, umode_t mode, +static struct dentry *lookup_file(struct eventfs_inode *parent_ei, + struct dentry *dentry, + umode_t mode, struct eventfs_attr *attr, - struct dentry *parent, void *data, + void *data, const struct file_operations *fop) { struct tracefs_inode *ti; - struct dentry *dentry; struct inode *inode; if (!(mode & S_IFMT)) mode |= S_IFREG; if (WARN_ON_ONCE(!S_ISREG(mode))) - return NULL; - - WARN_ON_ONCE(!parent); - dentry = eventfs_start_creating(name, parent); - - if (IS_ERR(dentry)) - return dentry; + return ERR_PTR(-EIO); inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) - return eventfs_failed_creating(dentry); + return ERR_PTR(-ENOMEM); /* If the user updated the directory's attributes, use them */ update_inode_attr(dentry, inode, attr, mode); @@ -331,32 +361,31 @@ static struct dentry *create_file(const char *name, umode_t mode, ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; - d_instantiate(dentry, inode); - fsnotify_create(dentry->d_parent->d_inode, dentry); - return eventfs_end_creating(dentry); + + // Files have their parent's ei as their fsdata + dentry->d_fsdata = get_ei(parent_ei); + + d_add(dentry, inode); + return NULL; }; /** - * create_dir - create a dir in the tracefs filesystem + * lookup_dir_entry - look up a dir in the tracefs filesystem + * @dentry: the directory to look up * @ei: the eventfs_inode that represents the directory to create - * @parent: parent dentry for this file. * - * This function will create a dentry for a directory represented by + * This function will look up a dentry for a directory represented by * a eventfs_inode. */ -static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) +static struct dentry *lookup_dir_entry(struct dentry *dentry, + struct eventfs_inode *pei, struct eventfs_inode *ei) { struct tracefs_inode *ti; - struct dentry *dentry; struct inode *inode; - dentry = eventfs_start_creating(ei->name, parent); - if (IS_ERR(dentry)) - return dentry; - inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) - return eventfs_failed_creating(dentry); + return ERR_PTR(-ENOMEM); /* If the user updated the directory's attributes, use them */ update_inode_attr(dentry, inode, &ei->attr, @@ -370,64 +399,46 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; + /* Only directories have ti->private set to an ei, not files */ + ti->private = ei; - inc_nlink(inode); - d_instantiate(dentry, inode); - inc_nlink(dentry->d_parent->d_inode); - fsnotify_mkdir(dentry->d_parent->d_inode, dentry); - return eventfs_end_creating(dentry); + dentry->d_fsdata = get_ei(ei); + + d_add(dentry, inode); + return NULL; } -static void free_ei(struct eventfs_inode *ei) +static inline struct eventfs_inode *alloc_ei(const char *name) { - kfree_const(ei->name); - kfree(ei->d_children); - kfree(ei->entry_attrs); - kfree(ei); + struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL); + + if (!ei) + return NULL; + + ei->name = kstrdup_const(name, GFP_KERNEL); + if (!ei->name) { + kfree(ei); + return NULL; + } + kref_init(&ei->kref); + return ei; } /** - * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode - * @ti: the tracefs_inode of the dentry + * eventfs_d_release - dentry is going away * @dentry: dentry which has the reference to remove. * * Remove the association between a dentry from an eventfs_inode. */ -void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) +void eventfs_d_release(struct dentry *dentry) { - struct eventfs_inode *ei; - int i; - - mutex_lock(&eventfs_mutex); - - ei = dentry->d_fsdata; - if (!ei) - goto out; - - /* This could belong to one of the files of the ei */ - if (ei->dentry != dentry) { - for (i = 0; i < ei->nr_entries; i++) { - if (ei->d_children[i] == dentry) - break; - } - if (WARN_ON_ONCE(i == ei->nr_entries)) - goto out; - ei->d_children[i] = NULL; - } else if (ei->is_freed) { - free_ei(ei); - } else { - ei->dentry = NULL; - } - - dentry->d_fsdata = NULL; - out: - mutex_unlock(&eventfs_mutex); + put_ei(dentry->d_fsdata); } /** - * create_file_dentry - create a dentry for a file of an eventfs_inode + * lookup_file_dentry - create a dentry for a file of an eventfs_inode * @ei: the eventfs_inode that the file will be created under - * @idx: the index into the d_children[] of the @ei + * @idx: the index into the entry_attrs[] of the @ei * @parent: The parent dentry of the created file. * @name: The name of the file to create * @mode: The mode of the file. @@ -438,163 +449,17 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) * address located at @e_dentry. */ static struct dentry * -create_file_dentry(struct eventfs_inode *ei, int idx, - struct dentry *parent, const char *name, umode_t mode, void *data, +lookup_file_dentry(struct dentry *dentry, + struct eventfs_inode *ei, int idx, + umode_t mode, void *data, const struct file_operations *fops) { struct eventfs_attr *attr = NULL; - struct dentry **e_dentry = &ei->d_children[idx]; - struct dentry *dentry; - - WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); - mutex_lock(&eventfs_mutex); - if (ei->is_freed) { - mutex_unlock(&eventfs_mutex); - return NULL; - } - /* If the e_dentry already has a dentry, use it */ - if (*e_dentry) { - dget(*e_dentry); - mutex_unlock(&eventfs_mutex); - return *e_dentry; - } - - /* ei->entry_attrs are protected by SRCU */ if (ei->entry_attrs) attr = &ei->entry_attrs[idx]; - mutex_unlock(&eventfs_mutex); - - dentry = create_file(name, mode, attr, parent, data, fops); - - mutex_lock(&eventfs_mutex); - - if (IS_ERR_OR_NULL(dentry)) { - /* - * When the mutex was released, something else could have - * created the dentry for this e_dentry. In which case - * use that one. - * - * If ei->is_freed is set, the e_dentry is currently on its - * way to being freed, don't return it. If e_dentry is NULL - * it means it was already freed. - */ - if (ei->is_freed) { - dentry = NULL; - } else { - dentry = *e_dentry; - dget(dentry); - } - mutex_unlock(&eventfs_mutex); - return dentry; - } - - if (!*e_dentry && !ei->is_freed) { - *e_dentry = dentry; - dentry->d_fsdata = ei; - } else { - /* - * Should never happen unless we get here due to being freed. - * Otherwise it means two dentries exist with the same name. - */ - WARN_ON_ONCE(!ei->is_freed); - dentry = NULL; - } - mutex_unlock(&eventfs_mutex); - - return dentry; -} - -/** - * eventfs_post_create_dir - post create dir routine - * @ei: eventfs_inode of recently created dir - * - * Map the meta-data of files within an eventfs dir to their parent dentry - */ -static void eventfs_post_create_dir(struct eventfs_inode *ei) -{ - struct eventfs_inode *ei_child; - struct tracefs_inode *ti; - - lockdep_assert_held(&eventfs_mutex); - - /* srcu lock already held */ - /* fill parent-child relation */ - list_for_each_entry_srcu(ei_child, &ei->children, list, - srcu_read_lock_held(&eventfs_srcu)) { - ei_child->d_parent = ei->dentry; - } - - ti = get_tracefs(ei->dentry->d_inode); - ti->private = ei; -} - -/** - * create_dir_dentry - Create a directory dentry for the eventfs_inode - * @pei: The eventfs_inode parent of ei. - * @ei: The eventfs_inode to create the directory for - * @parent: The dentry of the parent of this directory - * - * This creates and attaches a directory dentry to the eventfs_inode @ei. - */ -static struct dentry * -create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, - struct dentry *parent) -{ - struct dentry *dentry = NULL; - - WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); - - mutex_lock(&eventfs_mutex); - if (pei->is_freed || ei->is_freed) { - mutex_unlock(&eventfs_mutex); - return NULL; - } - if (ei->dentry) { - /* If the eventfs_inode already has a dentry, use it */ - dentry = ei->dentry; - dget(dentry); - mutex_unlock(&eventfs_mutex); - return dentry; - } - mutex_unlock(&eventfs_mutex); - - dentry = create_dir(ei, parent); - - mutex_lock(&eventfs_mutex); - - if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { - /* - * When the mutex was released, something else could have - * created the dentry for this e_dentry. In which case - * use that one. - * - * If ei->is_freed is set, the e_dentry is currently on its - * way to being freed. - */ - dentry = ei->dentry; - if (dentry) - dget(dentry); - mutex_unlock(&eventfs_mutex); - return dentry; - } - - if (!ei->dentry && !ei->is_freed) { - ei->dentry = dentry; - eventfs_post_create_dir(ei); - dentry->d_fsdata = ei; - } else { - /* - * Should never happen unless we get here due to being freed. - * Otherwise it means two dentries exist with the same name. - */ - WARN_ON_ONCE(!ei->is_freed); - dentry = NULL; - } - mutex_unlock(&eventfs_mutex); - - return dentry; + return lookup_file(ei, dentry, mode, attr, data, fops); } /** @@ -611,79 +476,50 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - const struct file_operations *fops; - const struct eventfs_entry *entry; struct eventfs_inode *ei_child; struct tracefs_inode *ti; struct eventfs_inode *ei; - struct dentry *ei_dentry = NULL; - struct dentry *ret = NULL; - struct dentry *d; const char *name = dentry->d_name.name; - umode_t mode; - void *data; - int idx; - int i; - int r; + struct dentry *result = NULL; ti = get_tracefs(dir); if (!(ti->flags & TRACEFS_EVENT_INODE)) - return NULL; + return ERR_PTR(-EIO); - /* Grab srcu to prevent the ei from going away */ - idx = srcu_read_lock(&eventfs_srcu); - - /* - * Grab the eventfs_mutex to consistent value from ti->private. - * This s - */ mutex_lock(&eventfs_mutex); - ei = READ_ONCE(ti->private); - if (ei && !ei->is_freed) - ei_dentry = READ_ONCE(ei->dentry); - mutex_unlock(&eventfs_mutex); - if (!ei || !ei_dentry) + ei = ti->private; + if (!ei || ei->is_freed) goto out; - data = ei->data; - - list_for_each_entry_srcu(ei_child, &ei->children, list, - srcu_read_lock_held(&eventfs_srcu)) { + list_for_each_entry(ei_child, &ei->children, list) { if (strcmp(ei_child->name, name) != 0) continue; - ret = simple_lookup(dir, dentry, flags); - if (IS_ERR(ret)) + if (ei_child->is_freed) goto out; - d = create_dir_dentry(ei, ei_child, ei_dentry); - dput(d); + result = lookup_dir_entry(dentry, ei, ei_child); goto out; } - for (i = 0; i < ei->nr_entries; i++) { - entry = &ei->entries[i]; - if (strcmp(name, entry->name) == 0) { - void *cdata = data; - mutex_lock(&eventfs_mutex); - /* If ei->is_freed, then the event itself may be too */ - if (!ei->is_freed) - r = entry->callback(name, &mode, &cdata, &fops); - else - r = -1; - mutex_unlock(&eventfs_mutex); - if (r <= 0) - continue; - ret = simple_lookup(dir, dentry, flags); - if (IS_ERR(ret)) - goto out; - d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); - dput(d); - break; - } + for (int i = 0; i < ei->nr_entries; i++) { + void *data; + umode_t mode; + const struct file_operations *fops; + const struct eventfs_entry *entry = &ei->entries[i]; + + if (strcmp(name, entry->name) != 0) + continue; + + data = ei->data; + if (entry->callback(name, &mode, &data, &fops) <= 0) + goto out; + + result = lookup_file_dentry(dentry, ei, i, mode, data, fops); + goto out; } out: - srcu_read_unlock(&eventfs_srcu, idx); - return ret; + mutex_unlock(&eventfs_mutex); + return result; } /* @@ -833,25 +669,10 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode if (!parent) return ERR_PTR(-EINVAL); - ei = kzalloc(sizeof(*ei), GFP_KERNEL); + ei = alloc_ei(name); if (!ei) return ERR_PTR(-ENOMEM); - ei->name = kstrdup_const(name, GFP_KERNEL); - if (!ei->name) { - kfree(ei); - return ERR_PTR(-ENOMEM); - } - - if (size) { - ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); - if (!ei->d_children) { - kfree_const(ei->name); - kfree(ei); - return ERR_PTR(-ENOMEM); - } - } - ei->entries = entries; ei->nr_entries = size; ei->data = data; @@ -859,10 +680,8 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode INIT_LIST_HEAD(&ei->list); mutex_lock(&eventfs_mutex); - if (!parent->is_freed) { + if (!parent->is_freed) list_add_tail(&ei->list, &parent->children); - ei->d_parent = parent->dentry; - } mutex_unlock(&eventfs_mutex); /* Was the parent freed? */ @@ -902,28 +721,20 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry if (IS_ERR(dentry)) return ERR_CAST(dentry); - ei = kzalloc(sizeof(*ei), GFP_KERNEL); + ei = alloc_ei(name); if (!ei) - goto fail_ei; + goto fail; inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) goto fail; - if (size) { - ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); - if (!ei->d_children) - goto fail; - } - - ei->dentry = dentry; + // Note: we have a ref to the dentry from tracefs_start_creating() + ei->events_dir = dentry; ei->entries = entries; ei->nr_entries = size; ei->is_events = 1; ei->data = data; - ei->name = kstrdup_const(name, GFP_KERNEL); - if (!ei->name) - goto fail; /* Save the ownership of this directory */ uid = d_inode(dentry->d_parent)->i_uid; @@ -954,11 +765,19 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry inode->i_op = &eventfs_root_dir_inode_operations; inode->i_fop = &eventfs_file_operations; - dentry->d_fsdata = ei; + dentry->d_fsdata = get_ei(ei); - /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); + /* + * Keep all eventfs directories with i_nlink == 1. + * Due to the dynamic nature of the dentry creations and not + * wanting to add a pointer to the parent eventfs_inode in the + * eventfs_inode structure, keeping the i_nlink in sync with the + * number of directories would cause too much complexity for + * something not worth much. Keeping directory links at 1 + * tells userspace not to trust the link number. + */ d_instantiate(dentry, inode); + /* The dentry of the "events" parent does keep track though */ inc_nlink(dentry->d_parent->d_inode); fsnotify_mkdir(dentry->d_parent->d_inode, dentry); tracefs_end_creating(dentry); @@ -966,72 +785,11 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry return ei; fail: - kfree(ei->d_children); - kfree(ei); - fail_ei: + free_ei(ei); tracefs_failed_creating(dentry); return ERR_PTR(-ENOMEM); } -static LLIST_HEAD(free_list); - -static void eventfs_workfn(struct work_struct *work) -{ - struct eventfs_inode *ei, *tmp; - struct llist_node *llnode; - - llnode = llist_del_all(&free_list); - llist_for_each_entry_safe(ei, tmp, llnode, llist) { - /* This dput() matches the dget() from unhook_dentry() */ - for (int i = 0; i < ei->nr_entries; i++) { - if (ei->d_children[i]) - dput(ei->d_children[i]); - } - /* This should only get here if it had a dentry */ - if (!WARN_ON_ONCE(!ei->dentry)) - dput(ei->dentry); - } -} - -static DECLARE_WORK(eventfs_work, eventfs_workfn); - -static void free_rcu_ei(struct rcu_head *head) -{ - struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); - - if (ei->dentry) { - /* Do not free the ei until all references of dentry are gone */ - if (llist_add(&ei->llist, &free_list)) - queue_work(system_unbound_wq, &eventfs_work); - return; - } - - /* If the ei doesn't have a dentry, neither should its children */ - for (int i = 0; i < ei->nr_entries; i++) { - WARN_ON_ONCE(ei->d_children[i]); - } - - free_ei(ei); -} - -static void unhook_dentry(struct dentry *dentry) -{ - if (!dentry) - return; - /* - * Need to add a reference to the dentry that is expected by - * simple_recursive_removal(), which will include a dput(). - */ - dget(dentry); - - /* - * Also add a reference for the dput() in eventfs_workfn(). - * That is required as that dput() will free the ei after - * the SRCU grace period is over. - */ - dget(dentry); -} - /** * eventfs_remove_rec - remove eventfs dir or file from list * @ei: eventfs_inode to be removed. @@ -1044,8 +802,6 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) { struct eventfs_inode *ei_child; - if (!ei) - return; /* * Check recursion depth. It should never be greater than 3: * 0 - events/ @@ -1057,28 +813,11 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) return; /* search for nested folders or files */ - list_for_each_entry_srcu(ei_child, &ei->children, list, - lockdep_is_held(&eventfs_mutex)) { - /* Children only have dentry if parent does */ - WARN_ON_ONCE(ei_child->dentry && !ei->dentry); + list_for_each_entry(ei_child, &ei->children, list) eventfs_remove_rec(ei_child, level + 1); - } - - ei->is_freed = 1; - - for (int i = 0; i < ei->nr_entries; i++) { - if (ei->d_children[i]) { - /* Children only have dentry if parent does */ - WARN_ON_ONCE(!ei->dentry); - unhook_dentry(ei->d_children[i]); - } - } - - unhook_dentry(ei->dentry); - - list_del_rcu(&ei->list); - call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei); + list_del(&ei->list); + free_ei(ei); } /** @@ -1089,22 +828,12 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) */ void eventfs_remove_dir(struct eventfs_inode *ei) { - struct dentry *dentry; - if (!ei) return; mutex_lock(&eventfs_mutex); - dentry = ei->dentry; eventfs_remove_rec(ei, 0); mutex_unlock(&eventfs_mutex); - - /* - * If any of the ei children has a dentry, then the ei itself - * must have a dentry. - */ - if (dentry) - simple_recursive_removal(dentry, NULL); } /** @@ -1117,7 +846,11 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei) { struct dentry *dentry; - dentry = ei->dentry; + dentry = ei->events_dir; + if (!dentry) + return; + + ei->events_dir = NULL; eventfs_remove_dir(ei); /* @@ -1127,5 +860,6 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei) * sticks around while the other ei->dentry are created * and destroyed dynamically. */ + d_invalidate(dentry); dput(dentry); } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index e1b172c0e091..d65ffad4c327 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -38,8 +38,6 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) if (!ti) return NULL; - ti->flags = 0; - return &ti->vfs_inode; } @@ -379,21 +377,30 @@ static const struct super_operations tracefs_super_operations = { .show_options = tracefs_show_options, }; -static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode) +/* + * It would be cleaner if eventfs had its own dentry ops. + * + * Note that d_revalidate is called potentially under RCU, + * so it can't take the eventfs mutex etc. It's fine - if + * we open a file just as it's marked dead, things will + * still work just fine, and just see the old stale case. + */ +static void tracefs_d_release(struct dentry *dentry) { - struct tracefs_inode *ti; + if (dentry->d_fsdata) + eventfs_d_release(dentry); +} - if (!dentry || !inode) - return; +static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct eventfs_inode *ei = dentry->d_fsdata; - ti = get_tracefs(inode); - if (ti && ti->flags & TRACEFS_EVENT_INODE) - eventfs_set_ei_status_free(ti, dentry); - iput(inode); + return !(ei && ei->is_freed); } static const struct dentry_operations tracefs_dentry_operations = { - .d_iput = tracefs_dentry_iput, + .d_revalidate = tracefs_d_revalidate, + .d_release = tracefs_d_release, }; static int trace_fill_super(struct super_block *sb, void *data, int silent) @@ -497,75 +504,6 @@ struct dentry *tracefs_end_creating(struct dentry *dentry) return dentry; } -/** - * eventfs_start_creating - start the process of creating a dentry - * @name: Name of the file created for the dentry - * @parent: The parent dentry where this dentry will be created - * - * This is a simple helper function for the dynamically created eventfs - * files. When the directory of the eventfs files are accessed, their - * dentries are created on the fly. This function is used to start that - * process. - */ -struct dentry *eventfs_start_creating(const char *name, struct dentry *parent) -{ - struct dentry *dentry; - int error; - - /* Must always have a parent. */ - if (WARN_ON_ONCE(!parent)) - return ERR_PTR(-EINVAL); - - error = simple_pin_fs(&trace_fs_type, &tracefs_mount, - &tracefs_mount_count); - if (error) - return ERR_PTR(error); - - if (unlikely(IS_DEADDIR(parent->d_inode))) - dentry = ERR_PTR(-ENOENT); - else - dentry = lookup_one_len(name, parent, strlen(name)); - - if (!IS_ERR(dentry) && dentry->d_inode) { - dput(dentry); - dentry = ERR_PTR(-EEXIST); - } - - if (IS_ERR(dentry)) - simple_release_fs(&tracefs_mount, &tracefs_mount_count); - - return dentry; -} - -/** - * eventfs_failed_creating - clean up a failed eventfs dentry creation - * @dentry: The dentry to clean up - * - * If after calling eventfs_start_creating(), a failure is detected, the - * resources created by eventfs_start_creating() needs to be cleaned up. In - * that case, this function should be called to perform that clean up. - */ -struct dentry *eventfs_failed_creating(struct dentry *dentry) -{ - dput(dentry); - simple_release_fs(&tracefs_mount, &tracefs_mount_count); - return NULL; -} - -/** - * eventfs_end_creating - Finish the process of creating a eventfs dentry - * @dentry: The dentry that has successfully been created. - * - * This function is currently just a place holder to match - * eventfs_start_creating(). In case any synchronization needs to be added, - * this function will be used to implement that without having to modify - * the callers of eventfs_start_creating(). - */ -struct dentry *eventfs_end_creating(struct dentry *dentry) -{ - return dentry; -} - /* Find the inode that this will use for default */ static struct inode *instance_inode(struct dentry *parent, struct inode *inode) { @@ -779,7 +717,11 @@ static void init_once(void *foo) { struct tracefs_inode *ti = (struct tracefs_inode *) foo; + /* inode_init_once() calls memset() on the vfs_inode portion */ inode_init_once(&ti->vfs_inode); + + /* Zero out the rest */ + memset_after(ti, 0, vfs_inode); } static int __init tracefs_init(void) diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 91c2bf0b91d9..beb3dcd0e434 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -11,9 +11,10 @@ enum { }; struct tracefs_inode { + struct inode vfs_inode; + /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ unsigned long flags; void *private; - struct inode vfs_inode; }; /* @@ -31,43 +32,37 @@ struct eventfs_attr { /* * struct eventfs_inode - hold the properties of the eventfs directories. * @list: link list into the parent directory + * @rcu: Union with @list for freeing + * @children: link list into the child eventfs_inode * @entries: the array of entries representing the files in the directory * @name: the name of the directory to create - * @children: link list into the child eventfs_inode - * @dentry: the dentry of the directory - * @d_parent: pointer to the parent's dentry - * @d_children: The array of dentries to represent the files when created + * @events_dir: the dentry of the events directory * @entry_attrs: Saved mode and ownership of the @d_children - * @attr: Saved mode and ownership of eventfs_inode itself * @data: The private data to pass to the callbacks + * @attr: Saved mode and ownership of eventfs_inode itself * @is_freed: Flag set if the eventfs is on its way to be freed * Note if is_freed is set, then dentry is corrupted. + * @is_events: Flag set for only the top level "events" directory * @nr_entries: The number of items in @entries + * @ino: The saved inode number */ struct eventfs_inode { - struct list_head list; + union { + struct list_head list; + struct rcu_head rcu; + }; + struct list_head children; const struct eventfs_entry *entries; const char *name; - struct list_head children; - struct dentry *dentry; /* Check is_freed to access */ - struct dentry *d_parent; - struct dentry **d_children; + struct dentry *events_dir; struct eventfs_attr *entry_attrs; - struct eventfs_attr attr; void *data; + struct eventfs_attr attr; + struct kref kref; unsigned int is_freed:1; unsigned int is_events:1; unsigned int nr_entries:30; unsigned int ino; - /* - * Union - used for deletion - * @llist: for calling dput() if needed after RCU - * @rcu: eventfs_inode to delete in RCU - */ - union { - struct llist_node llist; - struct rcu_head rcu; - }; }; static inline struct tracefs_inode *get_tracefs(const struct inode *inode) @@ -79,9 +74,7 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent); struct dentry *tracefs_end_creating(struct dentry *dentry); struct dentry *tracefs_failed_creating(struct dentry *dentry); struct inode *tracefs_get_inode(struct super_block *sb); -struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); -struct dentry *eventfs_failed_creating(struct dentry *dentry); -struct dentry *eventfs_end_creating(struct dentry *dentry); -void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); + +void eventfs_d_release(struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 9976a00a73f9..e965a48e7db9 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -421,10 +421,10 @@ xfs_attr_complete_op( bool do_replace = args->op_flags & XFS_DA_OP_REPLACE; args->op_flags &= ~XFS_DA_OP_REPLACE; - if (do_replace) { - args->attr_filter &= ~XFS_ATTR_INCOMPLETE; + args->attr_filter &= ~XFS_ATTR_INCOMPLETE; + if (do_replace) return replace_state; - } + return XFS_DAS_DONE; } diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 31100120b2c5..e31663cb7b43 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1119,20 +1119,6 @@ xfs_rtbitmap_blockcount( } /* - * Compute the maximum level number of the realtime summary file, as defined by - * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct - * use of rt volumes with more than 2^32 extents. - */ -uint8_t -xfs_compute_rextslog( - xfs_rtbxlen_t rtextents) -{ - if (!rtextents) - return 0; - return xfs_highbit64(rtextents); -} - -/* * Compute the number of rtbitmap words needed to populate every block of a * bitmap that is large enough to track the given number of rt extents. */ diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h index 274dc7dae1fa..152a66750af5 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.h +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -351,20 +351,6 @@ xfs_rtfree_extent( int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, xfs_filblks_t rtlen); -uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); - -/* Do we support an rt volume having this number of rtextents? */ -static inline bool -xfs_validate_rtextents( - xfs_rtbxlen_t rtextents) -{ - /* No runt rt volumes */ - if (rtextents == 0) - return false; - - return true; -} - xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents); unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp, @@ -383,8 +369,6 @@ unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp, # define xfs_rtsummary_read_buf(a,b) (-ENOSYS) # define xfs_rtbuf_cache_relse(a) (0) # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) -# define xfs_compute_rextslog(rtx) (0) -# define xfs_validate_rtextents(rtx) (false) static inline xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents) { diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 4a9e8588f4c9..5bb6e2bd6dee 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -1377,3 +1377,17 @@ xfs_validate_stripe_geometry( } return true; } + +/* + * Compute the maximum level number of the realtime summary file, as defined by + * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct + * use of rt volumes with more than 2^32 extents. + */ +uint8_t +xfs_compute_rextslog( + xfs_rtbxlen_t rtextents) +{ + if (!rtextents) + return 0; + return xfs_highbit64(rtextents); +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 19134b23c10b..2e8e8d63d4eb 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -38,4 +38,6 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, __s64 sunit, __s64 swidth, int sectorsize, bool silent); +uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 20b5375f2d9c..62e02d5380ad 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -251,4 +251,16 @@ bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off); bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off, xfs_fileoff_t len); +/* Do we support an rt volume having this number of rtextents? */ +static inline bool +xfs_validate_rtextents( + xfs_rtbxlen_t rtextents) +{ + /* No runt rt volumes */ + if (rtextents == 0) + return false; + + return true; +} + #endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 441ca9977652..46583517377f 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -15,6 +15,7 @@ #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_bit.h" +#include "xfs_sb.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/repair.h" diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index fabd0ed9dfa6..b1ff4f33324a 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -16,6 +16,7 @@ #include "xfs_rtbitmap.h" #include "xfs_bit.h" #include "xfs_bmap.h" +#include "xfs_sb.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" |