summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/block-group.c12
-rw-r--r--fs/btrfs/delayed-inode.c104
-rw-r--r--fs/btrfs/disk-io.c22
-rw-r--r--fs/btrfs/ioctl.c8
-rw-r--r--fs/btrfs/locking.h2
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/transaction.c39
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/nls/Kconfig7
-rw-r--r--fs/overlayfs/copy_up.c3
-rw-r--r--fs/overlayfs/file.c9
-rw-r--r--fs/smb/client/cached_dir.c11
-rw-r--r--fs/smb/client/cached_dir.h2
-rw-r--r--fs/smb/client/cifsfs.c12
-rw-r--r--fs/smb/client/cifsfs.h4
-rw-r--r--fs/smb/client/cifsglob.h2
-rw-r--r--fs/smb/client/connect.c1
-rw-r--r--fs/smb/client/fs_context.c11
-rw-r--r--fs/smb/client/fs_context.h4
-rw-r--r--fs/smb/client/fscache.c2
-rw-r--r--fs/smb/client/smb2ops.c1
-rw-r--r--fs/smb/client/trace.h2
-rw-r--r--fs/smb/common/smb2pdu.h2
-rw-r--r--fs/smb/server/Kconfig2
-rw-r--r--fs/smb/server/server.c2
-rw-r--r--fs/tracefs/event_inode.c59
-rw-r--r--fs/tracefs/inode.c5
-rw-r--r--fs/tracefs/internal.h5
29 files changed, 235 insertions, 103 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 3282adc84d52..a25c9910d90b 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -31,7 +31,7 @@ config BTRFS_FS
continue to be mountable and usable by newer kernels.
For more information, please see the web pages at
- http://btrfs.wiki.kernel.org.
+ https://btrfs.readthedocs.io
To compile this file system support as a module, choose M here. The
module will be called btrfs.
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 0cb1dee965a0..b2e5107b7cec 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -3028,8 +3028,16 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
fail:
btrfs_release_path(path);
- /* We didn't update the block group item, need to revert @commit_used. */
- if (ret < 0) {
+ /*
+ * We didn't update the block group item, need to revert commit_used
+ * unless the block group item didn't exist yet - this is to prevent a
+ * race with a concurrent insertion of the block group item, with
+ * insert_block_group_item(), that happened just after we attempted to
+ * update. In that case we would reset commit_used to 0 just after the
+ * insertion set it to a value greater than 0 - if the block group later
+ * becomes with 0 used bytes, we would incorrectly skip its update.
+ */
+ if (ret < 0 && ret != -ENOENT) {
spin_lock(&cache->lock);
cache->commit_used = old_commit_used;
spin_unlock(&cache->lock);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 53c1211dd60b..caf0bbd028d1 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -412,6 +412,7 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root)
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
{
+ struct btrfs_delayed_node *delayed_node = delayed_item->delayed_node;
struct rb_root_cached *root;
struct btrfs_delayed_root *delayed_root;
@@ -419,18 +420,21 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
if (RB_EMPTY_NODE(&delayed_item->rb_node))
return;
- delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
+ /* If it's in a rbtree, then we need to have delayed node locked. */
+ lockdep_assert_held(&delayed_node->mutex);
+
+ delayed_root = delayed_node->root->fs_info->delayed_root;
BUG_ON(!delayed_root);
if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
- root = &delayed_item->delayed_node->ins_root;
+ root = &delayed_node->ins_root;
else
- root = &delayed_item->delayed_node->del_root;
+ root = &delayed_node->del_root;
rb_erase_cached(&delayed_item->rb_node, root);
RB_CLEAR_NODE(&delayed_item->rb_node);
- delayed_item->delayed_node->count--;
+ delayed_node->count--;
finish_one_item(delayed_root);
}
@@ -1153,20 +1157,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node);
if (ret) {
- btrfs_release_delayed_node(curr_node);
- curr_node = NULL;
btrfs_abort_transaction(trans, ret);
break;
}
prev_node = curr_node;
curr_node = btrfs_next_delayed_node(curr_node);
+ /*
+ * See the comment below about releasing path before releasing
+ * node. If the commit of delayed items was successful the path
+ * should always be released, but in case of an error, it may
+ * point to locked extent buffers (a leaf at the very least).
+ */
+ ASSERT(path->nodes[0] == NULL);
btrfs_release_delayed_node(prev_node);
}
+ /*
+ * Release the path to avoid a potential deadlock and lockdep splat when
+ * releasing the delayed node, as that requires taking the delayed node's
+ * mutex. If another task starts running delayed items before we take
+ * the mutex, it will first lock the mutex and then it may try to lock
+ * the same btree path (leaf).
+ */
+ btrfs_free_path(path);
+
if (curr_node)
btrfs_release_delayed_node(curr_node);
- btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
@@ -1413,7 +1430,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
}
-/* Will return 0 or -ENOMEM */
+static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
+
+ if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
+ return;
+
+ /*
+ * Adding the new dir index item does not require touching another
+ * leaf, so we can release 1 unit of metadata that was previously
+ * reserved when starting the transaction. This applies only to
+ * the case where we had a transaction start and excludes the
+ * transaction join case (when replaying log trees).
+ */
+ trace_btrfs_space_reservation(fs_info, "transaction",
+ trans->transid, bytes, 0);
+ btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
+ ASSERT(trans->bytes_reserved >= bytes);
+ trans->bytes_reserved -= bytes;
+}
+
+/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
const char *name, int name_len,
struct btrfs_inode *dir,
@@ -1455,6 +1494,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_lock(&delayed_node->mutex);
+ /*
+ * First attempt to insert the delayed item. This is to make the error
+ * handling path simpler in case we fail (-EEXIST). There's no risk of
+ * any other task coming in and running the delayed item before we do
+ * the metadata space reservation below, because we are holding the
+ * delayed node's mutex and that mutex must also be locked before the
+ * node's delayed items can be run.
+ */
+ ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
+ if (unlikely(ret)) {
+ btrfs_err(trans->fs_info,
+"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
+ name_len, name, index, btrfs_root_id(delayed_node->root),
+ delayed_node->inode_id, dir->index_cnt,
+ delayed_node->index_cnt, ret);
+ btrfs_release_delayed_item(delayed_item);
+ btrfs_release_dir_index_item_space(trans);
+ mutex_unlock(&delayed_node->mutex);
+ goto release_node;
+ }
+
if (delayed_node->index_item_leaves == 0 ||
delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
delayed_node->curr_index_batch_size = data_len;
@@ -1472,36 +1532,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
* impossible.
*/
if (WARN_ON(ret)) {
- mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_item(delayed_item);
+ mutex_unlock(&delayed_node->mutex);
goto release_node;
}
delayed_node->index_item_leaves++;
- } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
- const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
-
- /*
- * Adding the new dir index item does not require touching another
- * leaf, so we can release 1 unit of metadata that was previously
- * reserved when starting the transaction. This applies only to
- * the case where we had a transaction start and excludes the
- * transaction join case (when replaying log trees).
- */
- trace_btrfs_space_reservation(fs_info, "transaction",
- trans->transid, bytes, 0);
- btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
- ASSERT(trans->bytes_reserved >= bytes);
- trans->bytes_reserved -= bytes;
- }
-
- ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
- if (unlikely(ret)) {
- btrfs_err(trans->fs_info,
- "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
- name_len, name, delayed_node->root->root_key.objectid,
- delayed_node->inode_id, ret);
- BUG();
+ } else {
+ btrfs_release_dir_index_item_space(trans);
}
mutex_unlock(&delayed_node->mutex);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0a96ea8c1d3a..68f60d50e1fd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -520,6 +520,7 @@ static bool btree_dirty_folio(struct address_space *mapping,
struct folio *folio)
{
struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
+ struct btrfs_subpage_info *spi = fs_info->subpage_info;
struct btrfs_subpage *subpage;
struct extent_buffer *eb;
int cur_bit = 0;
@@ -533,18 +534,19 @@ static bool btree_dirty_folio(struct address_space *mapping,
btrfs_assert_tree_write_locked(eb);
return filemap_dirty_folio(mapping, folio);
}
+
+ ASSERT(spi);
subpage = folio_get_private(folio);
- ASSERT(subpage->dirty_bitmap);
- while (cur_bit < BTRFS_SUBPAGE_BITMAP_SIZE) {
+ for (cur_bit = spi->dirty_offset;
+ cur_bit < spi->dirty_offset + spi->bitmap_nr_bits;
+ cur_bit++) {
unsigned long flags;
u64 cur;
- u16 tmp = (1 << cur_bit);
spin_lock_irqsave(&subpage->lock, flags);
- if (!(tmp & subpage->dirty_bitmap)) {
+ if (!test_bit(cur_bit, subpage->bitmaps)) {
spin_unlock_irqrestore(&subpage->lock, flags);
- cur_bit++;
continue;
}
spin_unlock_irqrestore(&subpage->lock, flags);
@@ -557,7 +559,7 @@ static bool btree_dirty_folio(struct address_space *mapping,
btrfs_assert_tree_write_locked(eb);
free_extent_buffer(eb);
- cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits);
+ cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits) - 1;
}
return filemap_dirty_folio(mapping, folio);
}
@@ -1547,7 +1549,7 @@ static int transaction_kthread(void *arg)
delta = ktime_get_seconds() - cur->start_time;
if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) &&
- cur->state < TRANS_STATE_COMMIT_START &&
+ cur->state < TRANS_STATE_COMMIT_PREP &&
delta < fs_info->commit_interval) {
spin_unlock(&fs_info->trans_lock);
delay -= msecs_to_jiffies((delta - 1) * 1000);
@@ -2682,8 +2684,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
- btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start,
- BTRFS_LOCKDEP_TRANS_COMMIT_START);
+ btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_prep,
+ BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
BTRFS_LOCKDEP_TRANS_UNBLOCKED);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed,
@@ -4870,7 +4872,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
while (!list_empty(&fs_info->trans_list)) {
t = list_first_entry(&fs_info->trans_list,
struct btrfs_transaction, list);
- if (t->state >= TRANS_STATE_COMMIT_START) {
+ if (t->state >= TRANS_STATE_COMMIT_PREP) {
refcount_inc(&t->use_count);
spin_unlock(&fs_info->trans_lock);
btrfs_wait_for_commit(fs_info, t->transid);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a18ee7b5a166..75ab766fe156 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1958,6 +1958,13 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
goto out_put;
}
+ /*
+ * We don't need the path anymore, so release it and
+ * avoid deadlocks and lockdep warnings in case
+ * btrfs_iget() needs to lookup the inode from its root
+ * btree and lock the same leaf.
+ */
+ btrfs_release_path(path);
temp_inode = btrfs_iget(sb, key2.objectid, root);
if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode);
@@ -1978,7 +1985,6 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
goto out_put;
}
- btrfs_release_path(path);
key.objectid = key.offset;
key.offset = (u64)-1;
dirid = key.objectid;
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index edb9b4a0dba1..7d6ee1e609bf 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -79,7 +79,7 @@ enum btrfs_lock_nesting {
};
enum btrfs_lockdep_trans_states {
- BTRFS_LOCKDEP_TRANS_COMMIT_START,
+ BTRFS_LOCKDEP_TRANS_COMMIT_PREP,
BTRFS_LOCKDEP_TRANS_UNBLOCKED,
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED,
BTRFS_LOCKDEP_TRANS_COMPLETED,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index b46ab348e8e5..345c449d588c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -639,7 +639,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
refcount_inc(&trans->use_count);
spin_unlock(&fs_info->trans_lock);
- ASSERT(trans);
+ ASSERT(trans || BTRFS_FS_ERROR(fs_info));
if (trans) {
if (atomic_dec_and_test(&trans->pending_ordered))
wake_up(&trans->pending_wait);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 874e4394df86..0bf42dccb041 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -56,12 +56,17 @@ static struct kmem_cache *btrfs_trans_handle_cachep;
* | Call btrfs_commit_transaction() on any trans handle attached to
* | transaction N
* V
- * Transaction N [[TRANS_STATE_COMMIT_START]]
+ * Transaction N [[TRANS_STATE_COMMIT_PREP]]
+ * |
+ * | If there are simultaneous calls to btrfs_commit_transaction() one will win
+ * | the race and the rest will wait for the winner to commit the transaction.
+ * |
+ * | The winner will wait for previous running transaction to completely finish
+ * | if there is one.
* |
- * | Will wait for previous running transaction to completely finish if there
- * | is one
+ * Transaction N [[TRANS_STATE_COMMIT_START]]
* |
- * | Then one of the following happes:
+ * | Then one of the following happens:
* | - Wait for all other trans handle holders to release.
* | The btrfs_commit_transaction() caller will do the commit work.
* | - Wait for current transaction to be committed by others.
@@ -112,6 +117,7 @@ static struct kmem_cache *btrfs_trans_handle_cachep;
*/
static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
[TRANS_STATE_RUNNING] = 0U,
+ [TRANS_STATE_COMMIT_PREP] = 0U,
[TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH),
[TRANS_STATE_COMMIT_DOING] = (__TRANS_START |
__TRANS_ATTACH |
@@ -1982,7 +1988,7 @@ void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
* Wait for the current transaction commit to start and block
* subsequent transaction joins
*/
- btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
+ btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
wait_event(fs_info->transaction_blocked_wait,
cur_trans->state >= TRANS_STATE_COMMIT_START ||
TRANS_ABORTED(cur_trans));
@@ -2129,7 +2135,7 @@ static void add_pending_snapshot(struct btrfs_trans_handle *trans)
return;
lockdep_assert_held(&trans->fs_info->trans_lock);
- ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
+ ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_PREP);
list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
}
@@ -2153,7 +2159,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
ktime_t interval;
ASSERT(refcount_read(&trans->use_count) == 1);
- btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
+ btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
@@ -2213,7 +2219,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
}
spin_lock(&fs_info->trans_lock);
- if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
+ if (cur_trans->state >= TRANS_STATE_COMMIT_PREP) {
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
add_pending_snapshot(trans);
@@ -2225,7 +2231,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
want_state = TRANS_STATE_SUPER_COMMITTED;
btrfs_trans_state_lockdep_release(fs_info,
- BTRFS_LOCKDEP_TRANS_COMMIT_START);
+ BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
ret = btrfs_end_transaction(trans);
wait_for_commit(cur_trans, want_state);
@@ -2237,9 +2243,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
return ret;
}
- cur_trans->state = TRANS_STATE_COMMIT_START;
+ cur_trans->state = TRANS_STATE_COMMIT_PREP;
wake_up(&fs_info->transaction_blocked_wait);
- btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
+ btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
if (cur_trans->list.prev != &fs_info->trans_list) {
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
@@ -2260,11 +2266,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
btrfs_put_transaction(prev_trans);
if (ret)
goto lockdep_release;
- } else {
- spin_unlock(&fs_info->trans_lock);
+ spin_lock(&fs_info->trans_lock);
}
} else {
- spin_unlock(&fs_info->trans_lock);
/*
* The previous transaction was aborted and was already removed
* from the list of transactions at fs_info->trans_list. So we
@@ -2272,11 +2276,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* corrupt state (pointing to trees with unwritten nodes/leafs).
*/
if (BTRFS_FS_ERROR(fs_info)) {
+ spin_unlock(&fs_info->trans_lock);
ret = -EROFS;
goto lockdep_release;
}
}
+ cur_trans->state = TRANS_STATE_COMMIT_START;
+ wake_up(&fs_info->transaction_blocked_wait);
+ spin_unlock(&fs_info->trans_lock);
+
/*
* Get the time spent on the work done by the commit thread and not
* the time spent waiting on a previous commit
@@ -2586,7 +2595,7 @@ lockdep_release:
goto cleanup_transaction;
lockdep_trans_commit_start_release:
- btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
+ btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
btrfs_end_transaction(trans);
return ret;
}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 8e9fa23bd7fe..6b309f8a99a8 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -14,6 +14,7 @@
enum btrfs_trans_state {
TRANS_STATE_RUNNING,
+ TRANS_STATE_COMMIT_PREP,
TRANS_STATE_COMMIT_START,
TRANS_STATE_COMMIT_DOING,
TRANS_STATE_UNBLOCKED,
diff --git a/fs/nls/Kconfig b/fs/nls/Kconfig
index a0d0e2f7ec83..2a601af6f3bd 100644
--- a/fs/nls/Kconfig
+++ b/fs/nls/Kconfig
@@ -618,11 +618,6 @@ config NLS_UTF8
the Unicode/ISO9646 universal character set.
config NLS_UCS2_UTILS
- tristate "NLS UCS-2 UTILS"
- help
- Set of older UCS-2 conversion utilities and tables used by some
- filesystems including SMB/CIFS. This includes upper case conversion
- tables. This will automatically be selected when the filesystem
- that uses it is selected.
+ tristate
endif # NLS
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index bae404a1bad4..d1761ec5866a 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -618,7 +618,8 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
if (err)
return err;
- if (inode->i_flags & OVL_COPY_I_FLAGS_MASK) {
+ if (inode->i_flags & OVL_COPY_I_FLAGS_MASK &&
+ (S_ISREG(c->stat.mode) || S_ISDIR(c->stat.mode))) {
/*
* Copy the fileattr inode flags that are the source of already
* copied i_flags
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 3b4cc633d763..4193633c4c7a 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -19,7 +19,6 @@ struct ovl_aio_req {
struct kiocb iocb;
refcount_t ref;
struct kiocb *orig_iocb;
- struct fd fd;
};
static struct kmem_cache *ovl_aio_request_cachep;
@@ -280,7 +279,7 @@ static rwf_t ovl_iocb_to_rwf(int ifl)
static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
{
if (refcount_dec_and_test(&aio_req->ref)) {
- fdput(aio_req->fd);
+ fput(aio_req->iocb.ki_filp);
kmem_cache_free(ovl_aio_request_cachep, aio_req);
}
}
@@ -342,10 +341,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
if (!aio_req)
goto out;
- aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
- kiocb_clone(&aio_req->iocb, iocb, real.file);
+ kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
refcount_set(&aio_req->ref, 2);
ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
@@ -409,10 +407,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
if (!aio_req)
goto out;
- aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
- kiocb_clone(&aio_req->iocb, iocb, real.file);
+ kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
refcount_set(&aio_req->ref, 2);
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 2d5e9a9d5b8b..b17f067e4ada 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -18,7 +18,8 @@ static void smb2_close_cached_fid(struct kref *ref);
static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
const char *path,
- bool lookup_only)
+ bool lookup_only,
+ __u32 max_cached_dirs)
{
struct cached_fid *cfid;
@@ -43,7 +44,7 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
spin_unlock(&cfids->cfid_list_lock);
return NULL;
}
- if (cfids->num_entries >= MAX_CACHED_FIDS) {
+ if (cfids->num_entries >= max_cached_dirs) {
spin_unlock(&cfids->cfid_list_lock);
return NULL;
}
@@ -145,7 +146,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
const char *npath;
if (tcon == NULL || tcon->cfids == NULL || tcon->nohandlecache ||
- is_smb1_server(tcon->ses->server))
+ is_smb1_server(tcon->ses->server) || (dir_cache_timeout == 0))
return -EOPNOTSUPP;
ses = tcon->ses;
@@ -162,7 +163,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
if (!utf16_path)
return -ENOMEM;
- cfid = find_or_create_cached_dir(cfids, path, lookup_only);
+ cfid = find_or_create_cached_dir(cfids, path, lookup_only, tcon->max_cached_dirs);
if (cfid == NULL) {
kfree(utf16_path);
return -ENOENT;
@@ -582,7 +583,7 @@ cifs_cfids_laundromat_thread(void *p)
return 0;
spin_lock(&cfids->cfid_list_lock);
list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
- if (time_after(jiffies, cfid->time + HZ * 30)) {
+ if (time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) {
list_del(&cfid->entry);
list_add(&cfid->entry, &entry);
cfids->num_entries--;
diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
index facc9b154d00..a82ff2cea789 100644
--- a/fs/smb/client/cached_dir.h
+++ b/fs/smb/client/cached_dir.h
@@ -49,7 +49,7 @@ struct cached_fid {
struct cached_dirents dirents;
};
-#define MAX_CACHED_FIDS 16
+/* default MAX_CACHED_FIDS is 16 */
struct cached_fids {
/* Must be held when:
* - accessing the cfids->entries list
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 73c44e097a69..22869cda1356 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -117,6 +117,10 @@ module_param(cifs_max_pending, uint, 0444);
MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
"CIFS/SMB1 dialect (N/A for SMB3) "
"Default: 32767 Range: 2 to 32767.");
+unsigned int dir_cache_timeout = 30;
+module_param(dir_cache_timeout, uint, 0644);
+MODULE_PARM_DESC(dir_cache_timeout, "Number of seconds to cache directory contents for which we have a lease. Default: 30 "
+ "Range: 1 to 65000 seconds, 0 to disable caching dir contents");
#ifdef CONFIG_CIFS_STATS2
unsigned int slow_rsp_threshold = 1;
module_param(slow_rsp_threshold, uint, 0644);
@@ -695,6 +699,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
if (tcon->handle_timeout)
seq_printf(s, ",handletimeout=%u", tcon->handle_timeout);
+ if (tcon->max_cached_dirs != MAX_CACHED_FIDS)
+ seq_printf(s, ",max_cached_dirs=%u", tcon->max_cached_dirs);
/*
* Display file and directory attribute timeout in seconds.
@@ -1679,6 +1685,12 @@ init_cifs(void)
CIFS_MAX_REQ);
}
+ /* Limit max to about 18 hours, and setting to zero disables directory entry caching */
+ if (dir_cache_timeout > 65000) {
+ dir_cache_timeout = 65000;
+ cifs_dbg(VFS, "dir_cache_timeout set to max of 65000 seconds\n");
+ }
+
cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
if (!cifsiod_wq) {
rc = -ENOMEM;
diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h
index 532c38fe07cd..41daebd220ff 100644
--- a/fs/smb/client/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
@@ -152,6 +152,6 @@ extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
/* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 44
-#define CIFS_VERSION "2.44"
+#define SMB3_PRODUCT_BUILD 45
+#define CIFS_VERSION "2.45"
#endif /* _CIFSFS_H */
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 259e231f8b4f..032d8716f671 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -1210,6 +1210,7 @@ struct cifs_tcon {
__u32 max_chunks;
__u32 max_bytes_chunk;
__u32 max_bytes_copy;
+ __u32 max_cached_dirs;
#ifdef CONFIG_CIFS_FSCACHE
u64 resource_id; /* server resource id */
struct fscache_volume *fscache; /* cookie for share */
@@ -2016,6 +2017,7 @@ extern unsigned int CIFSMaxBufSize; /* max size not including hdr */
extern unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
extern unsigned int cifs_min_small; /* min size of small buf pool */
extern unsigned int cifs_max_pending; /* MAX requests at once to server*/
+extern unsigned int dir_cache_timeout; /* max time for directory lease caching of dir */
extern bool disable_legacy_dialects; /* forbid vers=1.0 and vers=2.0 mounts */
extern atomic_t mid_count;
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 3bd71f982170..687754791bf0 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -2657,6 +2657,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
tcon->retry = ctx->retry;
tcon->nocase = ctx->nocase;
tcon->broken_sparse_sup = ctx->no_sparse;
+ tcon->max_cached_dirs = ctx->max_cached_dirs;
if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING)
tcon->nohandlecache = ctx->nohandlecache;
else
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 67e16c2ac90e..e45ce31bbda7 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -150,6 +150,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_u32("closetimeo", Opt_closetimeo),
fsparam_u32("echo_interval", Opt_echo_interval),
fsparam_u32("max_credits", Opt_max_credits),
+ fsparam_u32("max_cached_dirs", Opt_max_cached_dirs),
fsparam_u32("handletimeout", Opt_handletimeout),
fsparam_u64("snapshot", Opt_snapshot),
fsparam_u32("max_channels", Opt_max_channels),
@@ -1165,6 +1166,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
if (result.uint_32 > 1)
ctx->multichannel = true;
break;
+ case Opt_max_cached_dirs:
+ if (result.uint_32 < 1) {
+ cifs_errorf(fc, "%s: Invalid max_cached_dirs, needs to be 1 or more\n",
+ __func__);
+ goto cifs_parse_mount_err;
+ }
+ ctx->max_cached_dirs = result.uint_32;
+ break;
case Opt_handletimeout:
ctx->handle_timeout = result.uint_32;
if (ctx->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) {
@@ -1592,7 +1601,7 @@ int smb3_init_fs_context(struct fs_context *fc)
ctx->acregmax = CIFS_DEF_ACTIMEO;
ctx->acdirmax = CIFS_DEF_ACTIMEO;
ctx->closetimeo = SMB3_DEF_DCLOSETIMEO;
-
+ ctx->max_cached_dirs = MAX_CACHED_FIDS;
/* Most clients set timeout to 0, allows server to use its default */
ctx->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index f4eaf8558902..9d8d34af0211 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -128,6 +128,7 @@ enum cifs_param {
Opt_closetimeo,
Opt_echo_interval,
Opt_max_credits,
+ Opt_max_cached_dirs,
Opt_snapshot,
Opt_max_channels,
Opt_handletimeout,
@@ -261,6 +262,7 @@ struct smb3_fs_context {
__u32 handle_timeout; /* persistent and durable handle timeout in ms */
unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
unsigned int max_channels;
+ unsigned int max_cached_dirs;
__u16 compression; /* compression algorithm 0xFFFF default 0=disabled */
bool rootfs:1; /* if it's a SMB root file system */
bool witness:1; /* use witness protocol */
@@ -287,7 +289,7 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
*/
#define SMB3_MAX_DCLOSETIMEO (1 << 30)
#define SMB3_DEF_DCLOSETIMEO (1 * HZ) /* even 1 sec enough to help eg open/write/close/open/read */
-
+#define MAX_CACHED_FIDS 16
extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);
#endif
diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c
index 3677525ee993..e5cad149f5a2 100644
--- a/fs/smb/client/fscache.c
+++ b/fs/smb/client/fscache.c
@@ -48,7 +48,7 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
sharename = extract_sharename(tcon->tree_name);
if (IS_ERR(sharename)) {
cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
- return -EINVAL;
+ return PTR_ERR(sharename);
}
slen = strlen(sharename);
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index e3dd698854d6..d9eda2e958b4 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -2683,6 +2683,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
smb2_copy_fs_info_to_kstatfs(info, buf);
qfs_exit:
+ trace_smb3_qfs_done(xid, tcon->tid, tcon->ses->Suid, tcon->tree_name, rc);
free_rsp_buf(buftype, rsp_iov.iov_base);
return rc;
}
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h
index e671bd16f00c..a7e4755bed0f 100644
--- a/fs/smb/client/trace.h
+++ b/fs/smb/client/trace.h
@@ -691,7 +691,7 @@ DEFINE_EVENT(smb3_tcon_class, smb3_##name, \
TP_ARGS(xid, tid, sesid, unc_name, rc))
DEFINE_SMB3_TCON_EVENT(tcon);
-
+DEFINE_SMB3_TCON_EVENT(qfs_done);
/*
* For smb2/smb3 open (including create and mkdir) calls
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 2680251b9aac..319fb9ffc6a0 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -406,7 +406,7 @@ struct smb2_tree_disconnect_rsp {
/* Capabilities flags */
#define SMB2_GLOBAL_CAP_DFS 0x00000001
#define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */
-#define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */
+#define SMB2_GLOBAL_CAP_LARGE_MTU 0x00000004 /* Resp only New to SMB2.1 */
#define SMB2_GLOBAL_CAP_MULTI_CHANNEL 0x00000008 /* New to SMB3 */
#define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */
#define SMB2_GLOBAL_CAP_DIRECTORY_LEASING 0x00000020 /* New to SMB3 */
diff --git a/fs/smb/server/Kconfig b/fs/smb/server/Kconfig
index 793151ddd60e..cabe6a843c6a 100644
--- a/fs/smb/server/Kconfig
+++ b/fs/smb/server/Kconfig
@@ -1,5 +1,5 @@
config SMB_SERVER
- tristate "SMB3 server support (EXPERIMENTAL)"
+ tristate "SMB3 server support"
depends on INET
depends on MULTIUSER
depends on FILE_LOCKING
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index 801cd0929209..5ab2f52f9b35 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -590,8 +590,6 @@ static int __init ksmbd_server_init(void)
if (ret)
goto err_crypto_destroy;
- pr_warn_once("The ksmbd server is experimental\n");
-
return 0;
err_crypto_destroy:
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index 237c6f370ad9..9f64e7332796 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -185,17 +185,49 @@ static struct dentry *create_dir(const char *name, struct dentry *parent, void *
/**
* eventfs_set_ef_status_free - set the ef->status to free
+ * @ti: the tracefs_inode of the dentry
* @dentry: dentry who's status to be freed
*
* eventfs_set_ef_status_free will be called if no more
* references remain
*/
-void eventfs_set_ef_status_free(struct dentry *dentry)
+void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry)
{
struct tracefs_inode *ti_parent;
- struct eventfs_file *ef;
+ struct eventfs_inode *ei;
+ struct eventfs_file *ef, *tmp;
+
+ /* The top level events directory may be freed by this */
+ if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) {
+ LIST_HEAD(ef_del_list);
+
+ mutex_lock(&eventfs_mutex);
+
+ ei = ti->private;
+
+ /* Record all the top level files */
+ list_for_each_entry_srcu(ef, &ei->e_top_files, list,
+ lockdep_is_held(&eventfs_mutex)) {
+ list_add_tail(&ef->del_list, &ef_del_list);
+ }
+
+ /* Nothing should access this, but just in case! */
+ ti->private = NULL;
+
+ mutex_unlock(&eventfs_mutex);
+
+ /* Now safely free the top level files and their children */
+ list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
+ list_del(&ef->del_list);
+ eventfs_remove(ef);
+ }
+
+ kfree(ei);
+ return;
+ }
mutex_lock(&eventfs_mutex);
+
ti_parent = get_tracefs(dentry->d_parent->d_inode);
if (!ti_parent || !(ti_parent->flags & TRACEFS_EVENT_INODE))
goto out;
@@ -420,7 +452,8 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
ei = ti->private;
idx = srcu_read_lock(&eventfs_srcu);
- list_for_each_entry_rcu(ef, &ei->e_top_files, list) {
+ list_for_each_entry_srcu(ef, &ei->e_top_files, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
create_dentry(ef, dentry, false);
}
srcu_read_unlock(&eventfs_srcu, idx);
@@ -491,6 +524,9 @@ struct dentry *eventfs_create_events_dir(const char *name,
struct tracefs_inode *ti;
struct inode *inode;
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return NULL;
+
if (IS_ERR(dentry))
return dentry;
@@ -507,7 +543,7 @@ struct dentry *eventfs_create_events_dir(const char *name,
INIT_LIST_HEAD(&ei->e_top_files);
ti = get_tracefs(inode);
- ti->flags |= TRACEFS_EVENT_INODE;
+ ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
ti->private = ei;
inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
@@ -538,6 +574,9 @@ struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
struct eventfs_inode *ei_parent;
struct eventfs_file *ef;
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return NULL;
+
if (!parent)
return ERR_PTR(-EINVAL);
@@ -569,6 +608,9 @@ struct eventfs_file *eventfs_add_dir(const char *name,
{
struct eventfs_file *ef;
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return NULL;
+
if (!ef_parent)
return ERR_PTR(-EINVAL);
@@ -606,6 +648,9 @@ int eventfs_add_events_file(const char *name, umode_t mode,
struct eventfs_inode *ei;
struct eventfs_file *ef;
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return -ENODEV;
+
if (!parent)
return -EINVAL;
@@ -654,6 +699,9 @@ int eventfs_add_file(const char *name, umode_t mode,
{
struct eventfs_file *ef;
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return -ENODEV;
+
if (!ef_parent)
return -EINVAL;
@@ -791,7 +839,6 @@ void eventfs_remove(struct eventfs_file *ef)
void eventfs_remove_events_dir(struct dentry *dentry)
{
struct tracefs_inode *ti;
- struct eventfs_inode *ei;
if (!dentry || !dentry->d_inode)
return;
@@ -800,8 +847,6 @@ void eventfs_remove_events_dir(struct dentry *dentry)
if (!ti || !(ti->flags & TRACEFS_EVENT_INODE))
return;
- ei = ti->private;
d_invalidate(dentry);
dput(dentry);
- kfree(ei);
}
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index de5b72216b1a..891653ba9cf3 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -385,7 +385,7 @@ static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode)
ti = get_tracefs(inode);
if (ti && ti->flags & TRACEFS_EVENT_INODE)
- eventfs_set_ef_status_free(dentry);
+ eventfs_set_ef_status_free(ti, dentry);
iput(inode);
}
@@ -673,6 +673,9 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
*/
struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
{
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return NULL;
+
return __create_dir(name, parent, &simple_dir_inode_operations);
}
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h
index 69c2b1d87c46..4f2e49e2197b 100644
--- a/fs/tracefs/internal.h
+++ b/fs/tracefs/internal.h
@@ -3,7 +3,8 @@
#define _TRACEFS_INTERNAL_H
enum {
- TRACEFS_EVENT_INODE = BIT(1),
+ TRACEFS_EVENT_INODE = BIT(1),
+ TRACEFS_EVENT_TOP_INODE = BIT(2),
};
struct tracefs_inode {
@@ -24,6 +25,6 @@ struct inode *tracefs_get_inode(struct super_block *sb);
struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
struct dentry *eventfs_failed_creating(struct dentry *dentry);
struct dentry *eventfs_end_creating(struct dentry *dentry);
-void eventfs_set_ef_status_free(struct dentry *dentry);
+void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry);
#endif /* _TRACEFS_INTERNAL_H */