summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorMax Krummenacher <max.krummenacher@toradex.com>2024-02-29 21:24:13 +0100
committerMax Krummenacher <max.krummenacher@toradex.com>2024-02-29 21:24:26 +0100
commit99e9ab88844a2a2e1c048264315eb0d72eb0bf88 (patch)
tree0d89b05b1816b8cac1be77ac9b3c297762502e67 /fs
parent756e3199ba82d68ba07e5acf555d13aaa1da3da1 (diff)
parentd761b18f6bc83bce94251467e3c0974243318456 (diff)
Merge remote-tracking branch 'fslc/5.15-2.2.x-imx' into toradex_5.15-2.2.x-imx
Signed-off-by: Max Krummenacher <max.krummenacher@toradex.com> Conflicts: drivers/gpu/drm/bridge/lontium-lt8912b.c drivers/usb/dwc3/drd.c
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/xattr.c5
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/afs/cell.c67
-rw-r--r--fs/afs/cmservice.c2
-rw-r--r--fs/afs/dynroot.c33
-rw-r--r--fs/afs/internal.h19
-rw-r--r--fs/afs/proc.c6
-rw-r--r--fs/afs/rxrpc.c28
-rw-r--r--fs/afs/server.c40
-rw-r--r--fs/afs/server_list.c2
-rw-r--r--fs/afs/super.c2
-rw-r--r--fs/afs/vl_list.c19
-rw-r--r--fs/afs/vl_rotate.c10
-rw-r--r--fs/afs/volume.c47
-rw-r--r--fs/attr.c20
-rw-r--r--fs/autofs/waitq.c3
-rw-r--r--fs/binfmt_elf_fdpic.c5
-rw-r--r--fs/btrfs/ctree.c52
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/delalloc-space.c3
-rw-r--r--fs/btrfs/delayed-inode.c26
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent-tree.c11
-rw-r--r--fs/btrfs/extent_io.c8
-rw-r--r--fs/btrfs/inode.c7
-rw-r--r--fs/btrfs/ioctl.c31
-rw-r--r--fs/btrfs/ref-verify.c2
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/transaction.c7
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c17
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c4
-rw-r--r--fs/cifs/cifs_spnego.c4
-rw-r--r--fs/cifs/cifsfs.c5
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/misc.c4
-rw-r--r--fs/cifs/smb2misc.c26
-rw-r--r--fs/cifs/smb2ops.c34
-rw-r--r--fs/cifs/smb2pdu.c29
-rw-r--r--fs/cifs/smb2pdu.h2
-rw-r--r--fs/cifs/smb2transport.c5
-rw-r--r--fs/cifs/xattr.c5
-rw-r--r--fs/debugfs/file.c8
-rw-r--r--fs/debugfs/inode.c27
-rw-r--r--fs/debugfs/internal.h10
-rw-r--r--fs/dlm/plock.c6
-rw-r--r--fs/efivarfs/super.c12
-rw-r--r--fs/erofs/zdata.c2
-rw-r--r--fs/eventfd.c2
-rw-r--r--fs/exfat/namei.c29
-rw-r--r--fs/ext2/xattr.c4
-rw-r--r--fs/ext4/acl.h5
-rw-r--r--fs/ext4/balloc.c15
-rw-r--r--fs/ext4/block_validity.c8
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/extents_status.c310
-rw-r--r--fs/ext4/mballoc.c129
-rw-r--r--fs/ext4/mballoc.h14
-rw-r--r--fs/ext4/namei.c29
-rw-r--r--fs/ext4/resize.c23
-rw-r--r--fs/f2fs/compress.c2
-rw-r--r--fs/f2fs/data.c4
-rw-r--r--fs/f2fs/file.c13
-rw-r--r--fs/f2fs/namei.c2
-rw-r--r--fs/f2fs/super.c33
-rw-r--r--fs/f2fs/xattr.c6
-rw-r--r--fs/fs-writeback.c52
-rw-r--r--fs/fuse/dax.c1
-rw-r--r--fs/fuse/fuse_i.h15
-rw-r--r--fs/fuse/inode.c75
-rw-r--r--fs/fuse/readdir.c10
-rw-r--r--fs/gfs2/aops.c4
-rw-r--r--fs/gfs2/inode.c14
-rw-r--r--fs/gfs2/log.c25
-rw-r--r--fs/gfs2/quota.c11
-rw-r--r--fs/gfs2/rgrp.c2
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/inode.c16
-rw-r--r--fs/iomap/direct-io.c22
-rw-r--r--fs/jbd2/checkpoint.c28
-rw-r--r--fs/jbd2/commit.c50
-rw-r--r--fs/jbd2/journal.c38
-rw-r--r--fs/jbd2/recovery.c50
-rw-r--r--fs/jbd2/revoke.c8
-rw-r--r--fs/jbd2/transaction.c114
-rw-r--r--fs/jfs/jfs_dmap.c24
-rw-r--r--fs/jfs/jfs_extent.c5
-rw-r--r--fs/jfs/jfs_imap.c6
-rw-r--r--fs/ksmbd/Kconfig11
-rw-r--r--fs/ksmbd/asn1.c169
-rw-r--r--fs/ksmbd/auth.c72
-rw-r--r--fs/ksmbd/auth.h3
-rw-r--r--fs/ksmbd/connection.c169
-rw-r--r--fs/ksmbd/connection.h92
-rw-r--r--fs/ksmbd/ksmbd_netlink.h7
-rw-r--r--fs/ksmbd/ksmbd_work.c101
-rw-r--r--fs/ksmbd/ksmbd_work.h40
-rw-r--r--fs/ksmbd/mgmt/share_config.c56
-rw-r--r--fs/ksmbd/mgmt/share_config.h36
-rw-r--r--fs/ksmbd/mgmt/tree_connect.c80
-rw-r--r--fs/ksmbd/mgmt/tree_connect.h15
-rw-r--r--fs/ksmbd/mgmt/user_config.h1
-rw-r--r--fs/ksmbd/mgmt/user_session.c180
-rw-r--r--fs/ksmbd/mgmt/user_session.h8
-rw-r--r--fs/ksmbd/misc.c94
-rw-r--r--fs/ksmbd/misc.h6
-rw-r--r--fs/ksmbd/oplock.c369
-rw-r--r--fs/ksmbd/oplock.h12
-rw-r--r--fs/ksmbd/server.c54
-rw-r--r--fs/ksmbd/smb2misc.c19
-rw-r--r--fs/ksmbd/smb2ops.c19
-rw-r--r--fs/ksmbd/smb2pdu.c2047
-rw-r--r--fs/ksmbd/smb2pdu.h86
-rw-r--r--fs/ksmbd/smb_common.c180
-rw-r--r--fs/ksmbd/smb_common.h20
-rw-r--r--fs/ksmbd/smbacl.c55
-rw-r--r--fs/ksmbd/smbacl.h8
-rw-r--r--fs/ksmbd/transport_ipc.c4
-rw-r--r--fs/ksmbd/transport_rdma.c644
-rw-r--r--fs/ksmbd/transport_rdma.h6
-rw-r--r--fs/ksmbd/transport_tcp.c9
-rw-r--r--fs/ksmbd/unicode.c193
-rw-r--r--fs/ksmbd/unicode.h3
-rw-r--r--fs/ksmbd/vfs.c650
-rw-r--r--fs/ksmbd/vfs.h56
-rw-r--r--fs/ksmbd/vfs_cache.c73
-rw-r--r--fs/ksmbd/vfs_cache.h29
-rw-r--r--fs/lockd/mon.c3
-rw-r--r--fs/locks.c2
-rw-r--r--fs/namei.c127
-rw-r--r--fs/namespace.c11
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/blocklayout/dev.c4
-rw-r--r--fs/nfs/direct.c45
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c1
-rw-r--r--fs/nfs/nfs2xdr.c2
-rw-r--r--fs/nfs/nfs3xdr.c2
-rw-r--r--fs/nfs/nfs42proc.c5
-rw-r--r--fs/nfs/nfs4client.c9
-rw-r--r--fs/nfs/nfs4proc.c14
-rw-r--r--fs/nfs/nfs4state.c47
-rw-r--r--fs/nfs/pnfs.c33
-rw-r--r--fs/nfs/pnfs_dev.c2
-rw-r--r--fs/nfs/pnfs_nfs.c2
-rw-r--r--fs/nfs/sysfs.c16
-rw-r--r--fs/nfs/write.c23
-rw-r--r--fs/nfsd/blocklayoutxdr.c9
-rw-r--r--fs/nfsd/flexfilelayoutxdr.c9
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfs4xdr.c25
-rw-r--r--fs/nfsd/vfs.c12
-rw-r--r--fs/nilfs2/alloc.c3
-rw-r--r--fs/nilfs2/gcinode.c6
-rw-r--r--fs/nilfs2/inode.c7
-rw-r--r--fs/nilfs2/segment.c5
-rw-r--r--fs/nilfs2/sufile.c42
-rw-r--r--fs/nilfs2/the_nilfs.c6
-rw-r--r--fs/nls/nls_base.c4
-rw-r--r--fs/ntfs3/attrib.c6
-rw-r--r--fs/ntfs3/attrlist.c15
-rw-r--r--fs/ntfs3/bitmap.c3
-rw-r--r--fs/ntfs3/dir.c6
-rw-r--r--fs/ntfs3/frecord.c8
-rw-r--r--fs/ntfs3/fslog.c6
-rw-r--r--fs/ntfs3/fsntfs.c19
-rw-r--r--fs/ntfs3/index.c3
-rw-r--r--fs/ntfs3/super.c2
-rw-r--r--fs/ntfs3/xattr.c7
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/overlayfs/copy_up.c5
-rw-r--r--fs/overlayfs/file.c9
-rw-r--r--fs/proc/base.c3
-rw-r--r--fs/proc/proc_sysctl.c8
-rw-r--r--fs/proc/task_nommu.c27
-rw-r--r--fs/pstore/platform.c9
-rw-r--r--fs/pstore/ram_core.c4
-rw-r--r--fs/quota/dquot.c212
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/tracefs/inode.c3
-rw-r--r--fs/udf/balloc.c31
-rw-r--r--fs/udf/inode.c45
-rw-r--r--fs/verity/signature.c16
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c9
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c1
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h14
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c2
-rw-r--r--fs/xfs/scrub/common.c25
-rw-r--r--fs/xfs/scrub/common.h2
-rw-r--r--fs/xfs/scrub/fscounters.c13
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/scrub.h1
-rw-r--r--fs/xfs/xfs_attr_inactive.c8
-rw-r--r--fs/xfs/xfs_buf_item_recover.c66
-rw-r--r--fs/xfs/xfs_error.c9
-rw-r--r--fs/xfs/xfs_icache.c92
-rw-r--r--fs/xfs/xfs_icache.h1
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_iops.c34
-rw-r--r--fs/xfs/xfs_log.c9
-rw-r--r--fs/xfs/xfs_log_priv.h3
-rw-r--r--fs/xfs/xfs_log_recover.c44
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_qm_syscalls.c9
-rw-r--r--fs/xfs/xfs_reflink.c197
-rw-r--r--fs/xfs/xfs_super.c12
-rw-r--r--fs/xfs/xfs_symlink.c29
-rw-r--r--fs/xfs/xfs_sysfs.h7
-rw-r--r--fs/xfs/xfs_trace.h1
214 files changed, 5547 insertions, 3575 deletions
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index ee331845e2c7..31799ac10e33 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -73,7 +73,7 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
struct p9_fid *fid;
int ret;
- p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu\n",
+ p9_debug(P9_DEBUG_VFS, "name = '%s' value_len = %zu\n",
name, buffer_size);
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
@@ -144,7 +144,8 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
{
- return v9fs_xattr_get(dentry, NULL, buffer, buffer_size);
+ /* Txattrwalk with an empty string lists xattrs instead */
+ return v9fs_xattr_get(dentry, "", buffer, buffer_size);
}
static int v9fs_xattr_handler_get(const struct xattr_handler *handler,
diff --git a/fs/Kconfig b/fs/Kconfig
index a6313a969bc5..971339ecc1a2 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -369,8 +369,8 @@ source "fs/ksmbd/Kconfig"
config SMBFS_COMMON
tristate
- default y if CIFS=y
- default m if CIFS=m
+ default y if CIFS=y || SMB_SERVER=y
+ default m if CIFS=m || SMB_SERVER=m
source "fs/coda/Kconfig"
source "fs/afs/Kconfig"
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index d88407fb9bc0..77571372888d 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -158,7 +158,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
cell->name[i] = tolower(name[i]);
cell->name[i] = 0;
- atomic_set(&cell->ref, 1);
+ refcount_set(&cell->ref, 1);
atomic_set(&cell->active, 0);
INIT_WORK(&cell->manager, afs_manage_cell_work);
cell->volumes = RB_ROOT;
@@ -287,7 +287,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
cell = candidate;
candidate = NULL;
atomic_set(&cell->active, 2);
- trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), 2, afs_cell_trace_insert);
+ trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 2, afs_cell_trace_insert);
rb_link_node_rcu(&cell->net_node, parent, pp);
rb_insert_color(&cell->net_node, &net->cells);
up_write(&net->cells_lock);
@@ -295,7 +295,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
afs_queue_cell(cell, afs_cell_trace_get_queue_new);
wait_for_cell:
- trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), atomic_read(&cell->active),
+ trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), atomic_read(&cell->active),
afs_cell_trace_wait);
_debug("wait_for_cell");
wait_var_event(&cell->state,
@@ -409,10 +409,12 @@ static int afs_update_cell(struct afs_cell *cell)
if (ret == -ENOMEM)
goto out_wake;
- ret = -ENOMEM;
vllist = afs_alloc_vlserver_list(0);
- if (!vllist)
+ if (!vllist) {
+ if (ret >= 0)
+ ret = -ENOMEM;
goto out_wake;
+ }
switch (ret) {
case -ENODATA:
@@ -490,13 +492,13 @@ static void afs_cell_destroy(struct rcu_head *rcu)
{
struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu);
struct afs_net *net = cell->net;
- int u;
+ int r;
_enter("%p{%s}", cell, cell->name);
- u = atomic_read(&cell->ref);
- ASSERTCMP(u, ==, 0);
- trace_afs_cell(cell->debug_id, u, atomic_read(&cell->active), afs_cell_trace_free);
+ r = refcount_read(&cell->ref);
+ ASSERTCMP(r, ==, 0);
+ trace_afs_cell(cell->debug_id, r, atomic_read(&cell->active), afs_cell_trace_free);
afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers));
afs_unuse_cell(net, cell->alias_of, afs_cell_trace_unuse_alias);
@@ -539,13 +541,10 @@ void afs_cells_timer(struct timer_list *timer)
*/
struct afs_cell *afs_get_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
- int u;
+ int r;
- if (atomic_read(&cell->ref) <= 0)
- BUG();
-
- u = atomic_inc_return(&cell->ref);
- trace_afs_cell(cell->debug_id, u, atomic_read(&cell->active), reason);
+ __refcount_inc(&cell->ref, &r);
+ trace_afs_cell(cell->debug_id, r + 1, atomic_read(&cell->active), reason);
return cell;
}
@@ -556,12 +555,14 @@ void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
if (cell) {
unsigned int debug_id = cell->debug_id;
- unsigned int u, a;
+ unsigned int a;
+ bool zero;
+ int r;
a = atomic_read(&cell->active);
- u = atomic_dec_return(&cell->ref);
- trace_afs_cell(debug_id, u, a, reason);
- if (u == 0) {
+ zero = __refcount_dec_and_test(&cell->ref, &r);
+ trace_afs_cell(debug_id, r - 1, a, reason);
+ if (zero) {
a = atomic_read(&cell->active);
WARN(a != 0, "Cell active count %u > 0\n", a);
call_rcu(&cell->rcu, afs_cell_destroy);
@@ -574,14 +575,12 @@ void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason)
*/
struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
- int u, a;
-
- if (atomic_read(&cell->ref) <= 0)
- BUG();
+ int r, a;
- u = atomic_read(&cell->ref);
+ r = refcount_read(&cell->ref);
+ WARN_ON(r == 0);
a = atomic_inc_return(&cell->active);
- trace_afs_cell(cell->debug_id, u, a, reason);
+ trace_afs_cell(cell->debug_id, r, a, reason);
return cell;
}
@@ -593,7 +592,7 @@ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_tr
{
unsigned int debug_id;
time64_t now, expire_delay;
- int u, a;
+ int r, a;
if (!cell)
return;
@@ -607,9 +606,9 @@ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_tr
expire_delay = afs_cell_gc_delay;
debug_id = cell->debug_id;
- u = atomic_read(&cell->ref);
+ r = refcount_read(&cell->ref);
a = atomic_dec_return(&cell->active);
- trace_afs_cell(debug_id, u, a, reason);
+ trace_afs_cell(debug_id, r, a, reason);
WARN_ON(a == 0);
if (a == 1)
/* 'cell' may now be garbage collected. */
@@ -621,11 +620,11 @@ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_tr
*/
void afs_see_cell(struct afs_cell *cell, enum afs_cell_trace reason)
{
- int u, a;
+ int r, a;
- u = atomic_read(&cell->ref);
+ r = refcount_read(&cell->ref);
a = atomic_read(&cell->active);
- trace_afs_cell(cell->debug_id, u, a, reason);
+ trace_afs_cell(cell->debug_id, r, a, reason);
}
/*
@@ -751,7 +750,7 @@ again:
active = 1;
if (atomic_try_cmpxchg_relaxed(&cell->active, &active, 0)) {
rb_erase(&cell->net_node, &net->cells);
- trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), 0,
+ trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 0,
afs_cell_trace_unuse_delete);
smp_store_release(&cell->state, AFS_CELL_REMOVED);
}
@@ -878,7 +877,7 @@ void afs_manage_cells(struct work_struct *work)
bool sched_cell = false;
active = atomic_read(&cell->active);
- trace_afs_cell(cell->debug_id, atomic_read(&cell->ref),
+ trace_afs_cell(cell->debug_id, refcount_read(&cell->ref),
active, afs_cell_trace_manage);
ASSERTCMP(active, >=, 1);
@@ -886,7 +885,7 @@ void afs_manage_cells(struct work_struct *work)
if (purging) {
if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) {
active = atomic_dec_return(&cell->active);
- trace_afs_cell(cell->debug_id, atomic_read(&cell->ref),
+ trace_afs_cell(cell->debug_id, refcount_read(&cell->ref),
active, afs_cell_trace_unuse_pin);
}
}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index a3f5de28be79..cedd627e1fae 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -213,7 +213,7 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
*/
if (call->server) {
trace_afs_server(call->server,
- atomic_read(&call->server->ref),
+ refcount_read(&call->server->ref),
atomic_read(&call->server->active),
afs_server_trace_callback);
afs_break_callbacks(call->server, call->count, call->request);
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index db832cc931c8..96b404d9e13a 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -113,6 +113,7 @@ static int afs_probe_cell_name(struct dentry *dentry)
struct afs_net *net = afs_d2net(dentry);
const char *name = dentry->d_name.name;
size_t len = dentry->d_name.len;
+ char *result = NULL;
int ret;
/* Names prefixed with a dot are R/W mounts. */
@@ -130,9 +131,22 @@ static int afs_probe_cell_name(struct dentry *dentry)
}
ret = dns_query(net->net, "afsdb", name, len, "srv=1",
- NULL, NULL, false);
- if (ret == -ENODATA)
- ret = -EDESTADDRREQ;
+ &result, NULL, false);
+ if (ret == -ENODATA || ret == -ENOKEY || ret == 0)
+ ret = -ENOENT;
+ if (ret > 0 && ret >= sizeof(struct dns_server_list_v1_header)) {
+ struct dns_server_list_v1_header *v1 = (void *)result;
+
+ if (v1->hdr.zero == 0 &&
+ v1->hdr.content == DNS_PAYLOAD_IS_SERVER_LIST &&
+ v1->hdr.version == 1 &&
+ (v1->status != DNS_LOOKUP_GOOD &&
+ v1->status != DNS_LOOKUP_GOOD_WITH_BAD))
+ return -ENOENT;
+
+ }
+
+ kfree(result);
return ret;
}
@@ -251,20 +265,9 @@ static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags)
return 1;
}
-/*
- * Allow the VFS to enquire as to whether a dentry should be unhashed (mustn't
- * sleep)
- * - called from dput() when d_count is going to 0.
- * - return 1 to request dentry be unhashed, 0 otherwise
- */
-static int afs_dynroot_d_delete(const struct dentry *dentry)
-{
- return d_really_is_positive(dentry);
-}
-
const struct dentry_operations afs_dynroot_dentry_operations = {
.d_revalidate = afs_dynroot_d_revalidate,
- .d_delete = afs_dynroot_d_delete,
+ .d_delete = always_delete_dentry,
.d_release = afs_d_release,
.d_automount = afs_d_automount,
};
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 567e61b553f5..0c03877cdaf7 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -123,7 +123,7 @@ struct afs_call {
};
struct afs_operation *op;
unsigned int server_index;
- atomic_t usage;
+ refcount_t ref;
enum afs_call_state state;
spinlock_t state_lock;
int error; /* error code */
@@ -368,7 +368,7 @@ struct afs_cell {
#endif
time64_t dns_expiry; /* Time AFSDB/SRV record expires */
time64_t last_inactive; /* Time of last drop of usage count */
- atomic_t ref; /* Struct refcount */
+ refcount_t ref; /* Struct refcount */
atomic_t active; /* Active usage counter */
unsigned long flags;
#define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */
@@ -413,7 +413,7 @@ struct afs_vlserver {
#define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */
#define AFS_VLSERVER_FL_RESPONDING 3 /* VL server is responding */
rwlock_t lock; /* Lock on addresses */
- atomic_t usage;
+ refcount_t ref;
unsigned int rtt; /* Server's current RTT in uS */
/* Probe state */
@@ -449,7 +449,7 @@ struct afs_vlserver_entry {
struct afs_vlserver_list {
struct rcu_head rcu;
- atomic_t usage;
+ refcount_t ref;
u8 nr_servers;
u8 index; /* Server currently in use */
u8 preferred; /* Preferred server */
@@ -520,7 +520,7 @@ struct afs_server {
#define AFS_SERVER_FL_NO_IBULK 17 /* Fileserver doesn't support FS.InlineBulkStatus */
#define AFS_SERVER_FL_NO_RM2 18 /* Fileserver doesn't support YFS.RemoveFile2 */
#define AFS_SERVER_FL_HAS_FS64 19 /* Fileserver supports FS.{Fetch,Store}Data64 */
- atomic_t ref; /* Object refcount */
+ refcount_t ref; /* Object refcount */
atomic_t active; /* Active user count */
u32 addr_version; /* Address list version */
unsigned int rtt; /* Server's current RTT in uS */
@@ -556,6 +556,7 @@ struct afs_server_entry {
};
struct afs_server_list {
+ struct rcu_head rcu;
afs_volid_t vids[AFS_MAXTYPES]; /* Volume IDs */
refcount_t usage;
unsigned char nr_servers;
@@ -574,7 +575,7 @@ struct afs_volume {
struct rcu_head rcu;
afs_volid_t vid; /* volume ID */
};
- atomic_t usage;
+ refcount_t ref;
time64_t update_at; /* Time at which to next update */
struct afs_cell *cell; /* Cell to which belongs (pins ref) */
struct rb_node cell_node; /* Link in cell->volumes */
@@ -588,6 +589,7 @@ struct afs_volume {
#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */
#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */
#define AFS_VOLUME_MAYBE_NO_IBULK 6 /* - T if some servers don't have InlineBulkStatus */
+#define AFS_VOLUME_RM_TREE 7 /* - Set if volume removed from cell->volumes */
#ifdef CONFIG_AFS_FSCACHE
struct fscache_cookie *cache; /* caching cookie */
#endif
@@ -1482,14 +1484,14 @@ extern int afs_end_vlserver_operation(struct afs_vl_cursor *);
*/
static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver)
{
- atomic_inc(&vlserver->usage);
+ refcount_inc(&vlserver->ref);
return vlserver;
}
static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist)
{
if (vllist)
- atomic_inc(&vllist->usage);
+ refcount_inc(&vllist->ref);
return vllist;
}
@@ -1506,6 +1508,7 @@ extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
extern void afs_activate_volume(struct afs_volume *);
extern void afs_deactivate_volume(struct afs_volume *);
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason);
extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace);
extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum afs_volume_trace);
extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 065a28bfa3f1..254ccf1d592f 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -47,7 +47,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
/* display one cell per line on subsequent lines */
seq_printf(m, "%3u %3u %6lld %2u %2u %s\n",
- atomic_read(&cell->ref),
+ refcount_read(&cell->ref),
atomic_read(&cell->active),
cell->dns_expiry - ktime_get_real_seconds(),
vllist ? vllist->nr_servers : 0,
@@ -217,7 +217,7 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
}
seq_printf(m, "%3d %08llx %s %s\n",
- atomic_read(&vol->usage), vol->vid,
+ refcount_read(&vol->ref), vol->vid,
afs_vol_types[vol->type],
vol->name);
@@ -388,7 +388,7 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
alist = rcu_dereference(server->addresses);
seq_printf(m, "%pU %3d %3d\n",
&server->uuid,
- atomic_read(&server->ref),
+ refcount_read(&server->ref),
atomic_read(&server->active));
seq_printf(m, " - info: fl=%lx rtt=%u brk=%x\n",
server->flags, server->rtt, server->cb_s_break);
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e3de7fea3643..ea40da937fcd 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -145,7 +145,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
call->type = type;
call->net = net;
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
- atomic_set(&call->usage, 1);
+ refcount_set(&call->ref, 1);
INIT_WORK(&call->async_work, afs_process_async_call);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->state_lock);
@@ -163,14 +163,15 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
void afs_put_call(struct afs_call *call)
{
struct afs_net *net = call->net;
- int n = atomic_dec_return(&call->usage);
- int o = atomic_read(&net->nr_outstanding_calls);
+ bool zero;
+ int r, o;
- trace_afs_call(call, afs_call_trace_put, n, o,
+ zero = __refcount_dec_and_test(&call->ref, &r);
+ o = atomic_read(&net->nr_outstanding_calls);
+ trace_afs_call(call, afs_call_trace_put, r - 1, o,
__builtin_return_address(0));
- ASSERTCMP(n, >=, 0);
- if (n == 0) {
+ if (zero) {
ASSERT(!work_pending(&call->async_work));
ASSERT(call->type->name != NULL);
@@ -198,9 +199,11 @@ void afs_put_call(struct afs_call *call)
static struct afs_call *afs_get_call(struct afs_call *call,
enum afs_call_trace why)
{
- int u = atomic_inc_return(&call->usage);
+ int r;
- trace_afs_call(call, why, u,
+ __refcount_inc(&call->ref, &r);
+
+ trace_afs_call(call, why, r + 1,
atomic_read(&call->net->nr_outstanding_calls),
__builtin_return_address(0));
return call;
@@ -420,7 +423,7 @@ error_kill_call:
if (call->async) {
if (cancel_work_sync(&call->async_work))
afs_put_call(call);
- afs_put_call(call);
+ afs_set_call_complete(call, ret, 0);
}
ac->error = ret;
@@ -663,14 +666,13 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
unsigned long call_user_ID)
{
struct afs_call *call = (struct afs_call *)call_user_ID;
- int u;
+ int r;
trace_afs_notify_call(rxcall, call);
call->need_attention = true;
- u = atomic_fetch_add_unless(&call->usage, 1, 0);
- if (u != 0) {
- trace_afs_call(call, afs_call_trace_wake, u + 1,
+ if (__refcount_inc_not_zero(&call->ref, &r)) {
+ trace_afs_call(call, afs_call_trace_wake, r + 1,
atomic_read(&call->net->nr_outstanding_calls),
__builtin_return_address(0));
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 6e5b9a19b234..ffed828622b6 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -228,7 +228,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
if (!server)
goto enomem;
- atomic_set(&server->ref, 1);
+ refcount_set(&server->ref, 1);
atomic_set(&server->active, 1);
server->debug_id = atomic_inc_return(&afs_server_debug_id);
RCU_INIT_POINTER(server->addresses, alist);
@@ -352,9 +352,10 @@ void afs_servers_timer(struct timer_list *timer)
struct afs_server *afs_get_server(struct afs_server *server,
enum afs_server_trace reason)
{
- unsigned int u = atomic_inc_return(&server->ref);
+ int r;
- trace_afs_server(server, u, atomic_read(&server->active), reason);
+ __refcount_inc(&server->ref, &r);
+ trace_afs_server(server, r + 1, atomic_read(&server->active), reason);
return server;
}
@@ -364,14 +365,14 @@ struct afs_server *afs_get_server(struct afs_server *server,
static struct afs_server *afs_maybe_use_server(struct afs_server *server,
enum afs_server_trace reason)
{
- unsigned int r = atomic_fetch_add_unless(&server->ref, 1, 0);
unsigned int a;
+ int r;
- if (r == 0)
+ if (!__refcount_inc_not_zero(&server->ref, &r))
return NULL;
a = atomic_inc_return(&server->active);
- trace_afs_server(server, r, a, reason);
+ trace_afs_server(server, r + 1, a, reason);
return server;
}
@@ -380,10 +381,13 @@ static struct afs_server *afs_maybe_use_server(struct afs_server *server,
*/
struct afs_server *afs_use_server(struct afs_server *server, enum afs_server_trace reason)
{
- unsigned int r = atomic_inc_return(&server->ref);
- unsigned int a = atomic_inc_return(&server->active);
+ unsigned int a;
+ int r;
+
+ __refcount_inc(&server->ref, &r);
+ a = atomic_inc_return(&server->active);
- trace_afs_server(server, r, a, reason);
+ trace_afs_server(server, r + 1, a, reason);
return server;
}
@@ -393,14 +397,15 @@ struct afs_server *afs_use_server(struct afs_server *server, enum afs_server_tra
void afs_put_server(struct afs_net *net, struct afs_server *server,
enum afs_server_trace reason)
{
- unsigned int usage;
+ bool zero;
+ int r;
if (!server)
return;
- usage = atomic_dec_return(&server->ref);
- trace_afs_server(server, usage, atomic_read(&server->active), reason);
- if (unlikely(usage == 0))
+ zero = __refcount_dec_and_test(&server->ref, &r);
+ trace_afs_server(server, r - 1, atomic_read(&server->active), reason);
+ if (unlikely(zero))
__afs_put_server(net, server);
}
@@ -436,7 +441,7 @@ static void afs_server_rcu(struct rcu_head *rcu)
{
struct afs_server *server = container_of(rcu, struct afs_server, rcu);
- trace_afs_server(server, atomic_read(&server->ref),
+ trace_afs_server(server, refcount_read(&server->ref),
atomic_read(&server->active), afs_server_trace_free);
afs_put_addrlist(rcu_access_pointer(server->addresses));
kfree(server);
@@ -487,7 +492,7 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
active = atomic_read(&server->active);
if (active == 0) {
- trace_afs_server(server, atomic_read(&server->ref),
+ trace_afs_server(server, refcount_read(&server->ref),
active, afs_server_trace_gc);
next = rcu_dereference_protected(
server->uuid_next, lockdep_is_held(&net->fs_lock.lock));
@@ -553,7 +558,7 @@ void afs_manage_servers(struct work_struct *work)
_debug("manage %pU %u", &server->uuid, active);
if (purging) {
- trace_afs_server(server, atomic_read(&server->ref),
+ trace_afs_server(server, refcount_read(&server->ref),
active, afs_server_trace_purging);
if (active != 0)
pr_notice("Can't purge s=%08x\n", server->debug_id);
@@ -633,7 +638,8 @@ static noinline bool afs_update_server_record(struct afs_operation *op,
_enter("");
- trace_afs_server(server, atomic_read(&server->ref), atomic_read(&server->active),
+ trace_afs_server(server, refcount_read(&server->ref),
+ atomic_read(&server->active),
afs_server_trace_update);
alist = afs_vl_lookup_addrs(op->volume->cell, op->key, &server->uuid);
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index ed9056703505..b59896b1de0a 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -17,7 +17,7 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
for (i = 0; i < slist->nr_servers; i++)
afs_unuse_server(net, slist->servers[i].server,
afs_server_trace_put_slist);
- kfree(slist);
+ kfree_rcu(slist, rcu);
}
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 34c68724c98b..910e73bb5a08 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -406,6 +406,8 @@ static int afs_validate_fc(struct fs_context *fc)
return PTR_ERR(volume);
ctx->volume = volume;
+ if (volume->type != AFSVL_RWVOL)
+ ctx->flock_mode = afs_flock_mode_local;
}
return 0;
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
index 38b2ba1d9ec0..acc48216136a 100644
--- a/fs/afs/vl_list.c
+++ b/fs/afs/vl_list.c
@@ -17,7 +17,7 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
vlserver = kzalloc(struct_size(vlserver, name, name_len + 1),
GFP_KERNEL);
if (vlserver) {
- atomic_set(&vlserver->usage, 1);
+ refcount_set(&vlserver->ref, 1);
rwlock_init(&vlserver->lock);
init_waitqueue_head(&vlserver->probe_wq);
spin_lock_init(&vlserver->probe_lock);
@@ -39,13 +39,9 @@ static void afs_vlserver_rcu(struct rcu_head *rcu)
void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver)
{
- if (vlserver) {
- unsigned int u = atomic_dec_return(&vlserver->usage);
- //_debug("VL PUT %p{%u}", vlserver, u);
-
- if (u == 0)
- call_rcu(&vlserver->rcu, afs_vlserver_rcu);
- }
+ if (vlserver &&
+ refcount_dec_and_test(&vlserver->ref))
+ call_rcu(&vlserver->rcu, afs_vlserver_rcu);
}
struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers)
@@ -54,7 +50,7 @@ struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers)
vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL);
if (vllist) {
- atomic_set(&vllist->usage, 1);
+ refcount_set(&vllist->ref, 1);
rwlock_init(&vllist->lock);
}
@@ -64,10 +60,7 @@ struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers)
void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist)
{
if (vllist) {
- unsigned int u = atomic_dec_return(&vllist->usage);
-
- //_debug("VLLS PUT %p{%u}", vllist, u);
- if (u == 0) {
+ if (refcount_dec_and_test(&vllist->ref)) {
int i;
for (i = 0; i < vllist->nr_servers; i++) {
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index 488e58490b16..eb415ce56360 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -58,6 +58,12 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
}
/* Status load is ordered after lookup counter load */
+ if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
+ pr_warn("No record of cell %s\n", cell->name);
+ vc->error = -ENOENT;
+ return false;
+ }
+
if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
vc->error = -EDESTADDRREQ;
return false;
@@ -285,6 +291,7 @@ failed:
*/
static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
{
+ struct afs_cell *cell = vc->cell;
static int count;
int i;
@@ -294,6 +301,9 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
rcu_read_lock();
pr_notice("EDESTADDR occurred\n");
+ pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
+ pr_notice("DNS: src=%u st=%u lc=%x\n",
+ cell->dns_source, cell->dns_status, cell->dns_lookup_count);
pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index f84194b791d3..137a970c19fb 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -33,8 +33,13 @@ static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell,
} else if (p->vid > volume->vid) {
pp = &(*pp)->rb_right;
} else {
- volume = afs_get_volume(p, afs_volume_trace_get_cell_insert);
- goto found;
+ if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) {
+ volume = p;
+ goto found;
+ }
+
+ set_bit(AFS_VOLUME_RM_TREE, &volume->flags);
+ rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes);
}
}
@@ -53,11 +58,12 @@ static void afs_remove_volume_from_cell(struct afs_volume *volume)
struct afs_cell *cell = volume->cell;
if (!hlist_unhashed(&volume->proc_link)) {
- trace_afs_volume(volume->vid, atomic_read(&volume->usage),
+ trace_afs_volume(volume->vid, refcount_read(&cell->ref),
afs_volume_trace_remove);
write_seqlock(&cell->volume_lock);
hlist_del_rcu(&volume->proc_link);
- rb_erase(&volume->cell_node, &cell->volumes);
+ if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags))
+ rb_erase(&volume->cell_node, &cell->volumes);
write_sequnlock(&cell->volume_lock);
}
}
@@ -88,7 +94,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
volume->type_force = params->force;
volume->name_len = vldb->name_len;
- atomic_set(&volume->usage, 1);
+ refcount_set(&volume->ref, 1);
INIT_HLIST_NODE(&volume->proc_link);
rwlock_init(&volume->servers_lock);
rwlock_init(&volume->cb_v_break_lock);
@@ -229,7 +235,7 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
afs_remove_volume_from_cell(volume);
afs_put_serverlist(net, rcu_access_pointer(volume->servers));
afs_put_cell(volume->cell, afs_cell_trace_put_vol);
- trace_afs_volume(volume->vid, atomic_read(&volume->usage),
+ trace_afs_volume(volume->vid, refcount_read(&volume->ref),
afs_volume_trace_free);
kfree_rcu(volume, rcu);
@@ -237,14 +243,30 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
}
/*
+ * Try to get a reference on a volume record.
+ */
+bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason)
+{
+ int r;
+
+ if (__refcount_inc_not_zero(&volume->ref, &r)) {
+ trace_afs_volume(volume->vid, r + 1, reason);
+ return true;
+ }
+ return false;
+}
+
+/*
* Get a reference on a volume record.
*/
struct afs_volume *afs_get_volume(struct afs_volume *volume,
enum afs_volume_trace reason)
{
if (volume) {
- int u = atomic_inc_return(&volume->usage);
- trace_afs_volume(volume->vid, u, reason);
+ int r;
+
+ __refcount_inc(&volume->ref, &r);
+ trace_afs_volume(volume->vid, r + 1, reason);
}
return volume;
}
@@ -258,9 +280,12 @@ void afs_put_volume(struct afs_net *net, struct afs_volume *volume,
{
if (volume) {
afs_volid_t vid = volume->vid;
- int u = atomic_dec_return(&volume->usage);
- trace_afs_volume(vid, u, reason);
- if (u == 0)
+ bool zero;
+ int r;
+
+ zero = __refcount_dec_and_test(&volume->ref, &r);
+ trace_afs_volume(vid, r - 1, reason);
+ if (zero)
afs_destroy_volume(net, volume);
}
}
diff --git a/fs/attr.c b/fs/attr.c
index 28e953e86960..786d358dd699 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -402,9 +402,25 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
return error;
if ((ia_valid & ATTR_MODE)) {
- umode_t amode = attr->ia_mode;
+ /*
+ * Don't allow changing the mode of symlinks:
+ *
+ * (1) The vfs doesn't take the mode of symlinks into account
+ * during permission checking.
+ * (2) This has never worked correctly. Most major filesystems
+ * did return EOPNOTSUPP due to interactions with POSIX ACLs
+ * but did still updated the mode of the symlink.
+ * This inconsistency led system call wrapper providers such
+ * as libc to block changing the mode of symlinks with
+ * EOPNOTSUPP already.
+ * (3) To even do this in the first place one would have to use
+ * specific file descriptors and quite some effort.
+ */
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
/* Flag setting protected by i_mutex */
- if (is_sxid(amode))
+ if (is_sxid(attr->ia_mode))
inode->i_flags &= ~S_NOSEC;
}
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 54c1f8b8b075..efdc76732fae 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -32,8 +32,9 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
wq->status = -ENOENT; /* Magic is gone - report failure */
kfree(wq->name.name - wq->offset);
wq->name.name = NULL;
- wq->wait_ctr--;
wake_up_interruptible(&wq->queue);
+ if (!--wq->wait_ctr)
+ kfree(wq);
wq = nwq;
}
fput(sbi->pipe); /* Close the pipe */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c316931fc99c..f51f6e4d1a32 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -345,10 +345,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
/* there's now no turning back... the old userspace image is dead,
* defunct, deceased, etc.
*/
+ SET_PERSONALITY(exec_params.hdr);
if (elf_check_fdpic(&exec_params.hdr))
- set_personality(PER_LINUX_FDPIC);
- else
- set_personality(PER_LINUX);
+ current->personality |= PER_LINUX_FDPIC;
if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
current->personality |= READ_IMPLIES_EXEC;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a648dff2bece..8b53313bf3b2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -545,18 +545,30 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
u64 search_start;
int ret;
- if (test_bit(BTRFS_ROOT_DELETING, &root->state))
- btrfs_err(fs_info,
- "COW'ing blocks on a fs root that's being dropped");
-
- if (trans->transaction != fs_info->running_transaction)
- WARN(1, KERN_CRIT "trans %llu running %llu\n",
- trans->transid,
- fs_info->running_transaction->transid);
+ if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) {
+ btrfs_abort_transaction(trans, -EUCLEAN);
+ btrfs_crit(fs_info,
+ "attempt to COW block %llu on root %llu that is being deleted",
+ buf->start, btrfs_root_id(root));
+ return -EUCLEAN;
+ }
- if (trans->transid != fs_info->generation)
- WARN(1, KERN_CRIT "trans %llu running %llu\n",
- trans->transid, fs_info->generation);
+ /*
+ * COWing must happen through a running transaction, which always
+ * matches the current fs generation (it's a transaction with a state
+ * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs
+ * into error state to prevent the commit of any transaction.
+ */
+ if (unlikely(trans->transaction != fs_info->running_transaction ||
+ trans->transid != fs_info->generation)) {
+ btrfs_abort_transaction(trans, -EUCLEAN);
+ btrfs_crit(fs_info,
+"unexpected transaction when attempting to COW block %llu on root %llu, transaction %llu running transaction %llu fs generation %llu",
+ buf->start, btrfs_root_id(root), trans->transid,
+ fs_info->running_transaction->transid,
+ fs_info->generation);
+ return -EUCLEAN;
+ }
if (!should_cow_block(trans, root, buf)) {
*cow_ret = buf;
@@ -668,8 +680,22 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
int progress_passed = 0;
struct btrfs_disk_key disk_key;
- WARN_ON(trans->transaction != fs_info->running_transaction);
- WARN_ON(trans->transid != fs_info->generation);
+ /*
+ * COWing must happen through a running transaction, which always
+ * matches the current fs generation (it's a transaction with a state
+ * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs
+ * into error state to prevent the commit of any transaction.
+ */
+ if (unlikely(trans->transaction != fs_info->running_transaction ||
+ trans->transid != fs_info->generation)) {
+ btrfs_abort_transaction(trans, -EUCLEAN);
+ btrfs_crit(fs_info,
+"unexpected transaction when attempting to reallocate parent %llu for root %llu, transaction %llu running transaction %llu fs generation %llu",
+ parent->start, btrfs_root_id(root), trans->transid,
+ fs_info->running_transaction->transid,
+ fs_info->generation);
+ return -EUCLEAN;
+ }
parent_nritems = btrfs_header_nritems(parent);
blocksize = fs_info->nodesize;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 02d3ee6c7d9b..1467bf439cb4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -536,8 +536,6 @@ struct btrfs_swapfile_pin {
int bg_extent_count;
};
-bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
-
enum {
BTRFS_FS_BARRIER,
BTRFS_FS_CLOSING_START,
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index b934429c2435..4feddabe40a4 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -312,9 +312,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
} else {
if (current->journal_info)
flush = BTRFS_RESERVE_FLUSH_LIMIT;
-
- if (btrfs_transaction_in_commit(fs_info))
- schedule_timeout(1);
}
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 1e08eb2b27f0..fd951aeaeac5 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1083,20 +1083,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node);
if (ret) {
- btrfs_release_delayed_node(curr_node);
- curr_node = NULL;
btrfs_abort_transaction(trans, ret);
break;
}
prev_node = curr_node;
curr_node = btrfs_next_delayed_node(curr_node);
+ /*
+ * See the comment below about releasing path before releasing
+ * node. If the commit of delayed items was successful the path
+ * should always be released, but in case of an error, it may
+ * point to locked extent buffers (a leaf at the very least).
+ */
+ ASSERT(path->nodes[0] == NULL);
btrfs_release_delayed_node(prev_node);
}
+ /*
+ * Release the path to avoid a potential deadlock and lockdep splat when
+ * releasing the delayed node, as that requires taking the delayed node's
+ * mutex. If another task starts running delayed items before we take
+ * the mutex, it will first lock the mutex and then it may try to lock
+ * the same btree path (leaf).
+ */
+ btrfs_free_path(path);
+
if (curr_node)
btrfs_release_delayed_node(curr_node);
- btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
@@ -1388,9 +1401,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
- "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
- name_len, name, delayed_node->root->root_key.objectid,
- delayed_node->inode_id, ret);
+"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
+ name_len, name, index, btrfs_root_id(delayed_node->root),
+ delayed_node->inode_id, dir->index_cnt,
+ delayed_node->index_cnt, ret);
BUG();
}
mutex_unlock(&delayed_node->mutex);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4e35c6fb7be7..f0654fe80b34 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2598,21 +2598,18 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
- if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
- BTRFS_FSID_SIZE)) {
+ if (memcmp(fs_info->fs_devices->fsid, sb->fsid, BTRFS_FSID_SIZE) != 0) {
btrfs_err(fs_info,
"superblock fsid doesn't match fsid of fs_devices: %pU != %pU",
- fs_info->super_copy->fsid, fs_info->fs_devices->fsid);
+ sb->fsid, fs_info->fs_devices->fsid);
ret = -EINVAL;
}
- if (btrfs_fs_incompat(fs_info, METADATA_UUID) &&
- memcmp(fs_info->fs_devices->metadata_uuid,
- fs_info->super_copy->metadata_uuid, BTRFS_FSID_SIZE)) {
+ if (memcmp(fs_info->fs_devices->metadata_uuid, btrfs_sb_fsid_ptr(sb),
+ BTRFS_FSID_SIZE) != 0) {
btrfs_err(fs_info,
"superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU",
- fs_info->super_copy->metadata_uuid,
- fs_info->fs_devices->metadata_uuid);
+ btrfs_sb_fsid_ptr(sb), fs_info->fs_devices->metadata_uuid);
ret = -EINVAL;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 597cc2607481..a19bdb359740 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -860,6 +860,11 @@ again:
err = -ENOENT;
goto out;
} else if (WARN_ON(ret)) {
+ btrfs_print_leaf(path->nodes[0]);
+ btrfs_err(fs_info,
+"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
+ bytenr, num_bytes, parent, root_objectid, owner,
+ offset);
err = -EIO;
goto out;
}
@@ -1669,12 +1674,12 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
parent = ref->parent;
ref_root = ref->root;
- if (node->ref_mod != 1) {
+ if (unlikely(node->ref_mod != 1)) {
btrfs_err(trans->fs_info,
- "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
+ "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu",
node->bytenr, node->ref_mod, node->action, ref_root,
parent);
- return -EIO;
+ return -EUCLEAN;
}
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
BUG_ON(!extent_op || !extent_op->update_flags);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f9f6dfbc86bc..346fc46d019b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -6722,8 +6722,14 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
char *dst = (char *)dstv;
unsigned long i = get_eb_page_index(start);
- if (check_eb_range(eb, start, len))
+ if (check_eb_range(eb, start, len)) {
+ /*
+ * Invalid range hit, reset the memory, so callers won't get
+ * some random garbage for their uninitialzed memory.
+ */
+ memset(dstv, 0, len);
return;
+ }
offset = get_eb_offset_in_page(eb, start);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f8a01964a216..95af29634e55 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3226,6 +3226,13 @@ out:
btrfs_free_reserved_extent(fs_info,
ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes, 1);
+ /*
+ * Actually free the qgroup rsv which was released when
+ * the ordered extent was created.
+ */
+ btrfs_qgroup_free_refroot(fs_info, inode->root->root_key.objectid,
+ ordered_extent->qgroup_rsv,
+ BTRFS_QGROUP_RSV_DATA);
}
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c9b3d99171b2..233e465647fd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1855,6 +1855,15 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
* are limited to own subvolumes only
*/
ret = -EPERM;
+ } else if (btrfs_ino(BTRFS_I(src_inode)) != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * Snapshots must be made with the src_inode referring
+ * to the subvolume inode, otherwise the permission
+ * checking above is useless because we may have
+ * permission on a lower directory but not the subvol
+ * itself.
+ */
+ ret = -EINVAL;
} else {
ret = btrfs_mksnapshot(&file->f_path, mnt_userns,
name, namelen,
@@ -2094,7 +2103,7 @@ static noinline int key_in_sk(struct btrfs_key *key,
static noinline int copy_to_sk(struct btrfs_path *path,
struct btrfs_key *key,
struct btrfs_ioctl_search_key *sk,
- size_t *buf_size,
+ u64 *buf_size,
char __user *ubuf,
unsigned long *sk_offset,
int *num_found)
@@ -2226,7 +2235,7 @@ out:
static noinline int search_ioctl(struct inode *inode,
struct btrfs_ioctl_search_key *sk,
- size_t *buf_size,
+ u64 *buf_size,
char __user *ubuf)
{
struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
@@ -2295,7 +2304,7 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
struct btrfs_ioctl_search_key sk;
struct inode *inode;
int ret;
- size_t buf_size;
+ u64 buf_size;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -2329,8 +2338,8 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
struct btrfs_ioctl_search_args_v2 args;
struct inode *inode;
int ret;
- size_t buf_size;
- const size_t buf_limit = SZ_16M;
+ u64 buf_size;
+ const u64 buf_limit = SZ_16M;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -2526,6 +2535,13 @@ static int btrfs_search_path_in_tree_user(struct user_namespace *mnt_userns,
goto out_put;
}
+ /*
+ * We don't need the path anymore, so release it and
+ * avoid deadlocks and lockdep warnings in case
+ * btrfs_iget() needs to lookup the inode from its root
+ * btree and lock the same leaf.
+ */
+ btrfs_release_path(path);
temp_inode = btrfs_iget(sb, key2.objectid, root);
if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode);
@@ -2546,7 +2562,6 @@ static int btrfs_search_path_in_tree_user(struct user_namespace *mnt_userns,
goto out_put;
}
- btrfs_release_path(path);
key.objectid = key.offset;
key.offset = (u64)-1;
dirid = key.objectid;
@@ -3525,7 +3540,7 @@ static void get_block_group_info(struct list_head *groups_list,
static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
void __user *arg)
{
- struct btrfs_ioctl_space_args space_args;
+ struct btrfs_ioctl_space_args space_args = { 0 };
struct btrfs_ioctl_space_info space;
struct btrfs_ioctl_space_info *dest;
struct btrfs_ioctl_space_info *dest_orig;
@@ -4861,7 +4876,7 @@ static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
- struct btrfs_ioctl_send_args_32 args32;
+ struct btrfs_ioctl_send_args_32 args32 = { 0 };
ret = copy_from_user(&args32, argp, sizeof(args32));
if (ret)
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index d2062d5f71dd..50a5a5cfe38f 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -788,6 +788,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
dump_ref_action(fs_info, ra);
kfree(ref);
kfree(ra);
+ kfree(re);
goto out_unlock;
} else if (be->num_refs == 0) {
btrfs_err(fs_info,
@@ -797,6 +798,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
dump_ref_action(fs_info, ra);
kfree(ref);
kfree(ra);
+ kfree(re);
goto out_unlock;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 692ae2e2f8cc..c9fd598b0325 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7576,7 +7576,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
sctx->flags = arg->flags;
sctx->send_filp = fget(arg->send_fd);
- if (!sctx->send_filp) {
+ if (!sctx->send_filp || !(sctx->send_filp->f_mode & FMODE_WRITE)) {
ret = -EBADF;
goto out;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index ea23b83fc96b..2fd0ee0e6e93 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2364,7 +2364,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
* calculated f_bavail.
*/
if (!mixed && block_rsv->space_info->full &&
- total_free_meta - thresh < block_rsv->size)
+ (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size))
buf->f_bavail = 0;
buf->f_type = BTRFS_SUPER_MAGIC;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index daaed37bba9e..99cdd1d6a4bf 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -311,10 +311,11 @@ loop:
spin_unlock(&fs_info->trans_lock);
/*
- * If we are ATTACH, we just want to catch the current transaction,
- * and commit it. If there is no transaction, just return ENOENT.
+ * If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the
+ * current transaction, and commit it. If there is no transaction, just
+ * return ENOENT.
*/
- if (type == TRANS_ATTACH)
+ if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART)
return -ENOENT;
/*
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7c0c6fc0c536..dcf0dd2093f5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4446,7 +4446,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
int slot;
int ins_nr = 0;
- int start_slot;
+ int start_slot = 0;
int ret;
if (!(inode->flags & BTRFS_INODE_PREALLOC))
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e9236a745b8..cc18ba50a61c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -709,6 +709,14 @@ error_free_page:
return -EINVAL;
}
+u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb)
+{
+ bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) &
+ BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
+
+ return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
+}
+
/*
* Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
* being created with a disk that has already completed its fsid change. Such
@@ -3056,15 +3064,16 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
read_unlock(&em_tree->lock);
if (!em) {
- btrfs_crit(fs_info, "unable to find logical %llu length %llu",
+ btrfs_crit(fs_info,
+ "unable to find chunk map for logical %llu length %llu",
logical, length);
return ERR_PTR(-EINVAL);
}
- if (em->start > logical || em->start + em->len < logical) {
+ if (em->start > logical || em->start + em->len <= logical) {
btrfs_crit(fs_info,
- "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
- logical, length, em->start, em->start + em->len);
+ "found a bad chunk map, wanted %llu-%llu, found %llu-%llu",
+ logical, logical + length, em->start, em->start + em->len);
free_extent_map(em);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index b49fa784e5ba..eb91d6eb78ce 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -622,4 +622,7 @@ const char *btrfs_bg_type_to_raid_name(u64 flags);
int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
int btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
+bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
+u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb);
+
#endif
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index cb87714fe886..f3fba3d27efa 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2436,7 +2436,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
ret = do_splice_direct(src_file, &src_off, dst_file,
&dst_off, src_objlen, flags);
/* Abort on short copies or on error */
- if (ret < src_objlen) {
+ if (ret < (long)src_objlen) {
dout("Failed partial copy (%zd)\n", ret);
goto out;
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 42e449d3f18b..28281c83cf5f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -647,9 +647,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
ci->i_truncate_seq = truncate_seq;
/* the MDS should have revoked these caps */
- WARN_ON_ONCE(issued & (CEPH_CAP_FILE_EXCL |
- CEPH_CAP_FILE_RD |
- CEPH_CAP_FILE_WR |
+ WARN_ON_ONCE(issued & (CEPH_CAP_FILE_RD |
CEPH_CAP_FILE_LAZYIO));
/*
* If we hold relevant caps, or in the case where we're
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 353bd0dd7026..66b4413b94f7 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -64,8 +64,8 @@ struct key_type cifs_spnego_key_type = {
* strlen(";sec=ntlmsspi") */
#define MAX_MECH_STR_LEN 13
-/* strlen of "host=" */
-#define HOST_KEY_LEN 5
+/* strlen of ";host=" */
+#define HOST_KEY_LEN 6
/* strlen of ";ip4=" or ";ip6=" */
#define IP_KEY_LEN 5
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index b5ae209539ff..9bbead15a028 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1127,6 +1127,7 @@ const struct inode_operations cifs_file_inode_ops = {
const struct inode_operations cifs_symlink_inode_ops = {
.get_link = cifs_get_link,
+ .setattr = cifs_setattr,
.permission = cifs_permission,
.listxattr = cifs_listxattr,
};
@@ -1143,7 +1144,9 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
unsigned int xid;
int rc;
- if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+ if (remap_flags & REMAP_FILE_DEDUP)
+ return -EOPNOTSUPP;
+ if (remap_flags & ~REMAP_FILE_ADVISORY)
return -EINVAL;
cifs_dbg(FYI, "clone range\n");
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 82848412ad85..30a9a89c141b 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2531,7 +2531,7 @@ int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start,
}
cifsFileInfo_put(cfile);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
int cifs_truncate_page(struct address_space *mapping, loff_t from)
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 5e4dab5dfb7a..33328eae03d7 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -340,6 +340,10 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
cifs_dbg(VFS, "Length less than smb header size\n");
}
return -EIO;
+ } else if (total_read < sizeof(*smb) + 2 * smb->WordCount) {
+ cifs_dbg(VFS, "%s: can't read BCC due to invalid WordCount(%u)\n",
+ __func__, smb->WordCount);
+ return -EIO;
}
/* otherwise, there is enough to get to the BCC */
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 29b5554f6263..e2f401c8c5ce 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -298,6 +298,9 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = {
char *
smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr)
{
+ const int max_off = 4096;
+ const int max_len = 128 * 1024;
+
*off = 0;
*len = 0;
@@ -369,29 +372,20 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr)
* Invalid length or offset probably means data area is invalid, but
* we have little choice but to ignore the data area in this case.
*/
- if (*off > 4096) {
- cifs_dbg(VFS, "offset %d too large, data area ignored\n", *off);
- *len = 0;
- *off = 0;
- } else if (*off < 0) {
- cifs_dbg(VFS, "negative offset %d to data invalid ignore data area\n",
- *off);
+ if (unlikely(*off < 0 || *off > max_off ||
+ *len < 0 || *len > max_len)) {
+ cifs_dbg(VFS, "%s: invalid data area (off=%d len=%d)\n",
+ __func__, *off, *len);
*off = 0;
*len = 0;
- } else if (*len < 0) {
- cifs_dbg(VFS, "negative data length %d invalid, data area ignored\n",
- *len);
- *len = 0;
- } else if (*len > 128 * 1024) {
- cifs_dbg(VFS, "data area larger than 128K: %d\n", *len);
+ } else if (*off == 0) {
*len = 0;
}
/* return pointer to beginning of data area, ie offset from SMB start */
- if ((*off != 0) && (*len != 0))
+ if (*off > 0 && *len > 0)
return (char *)shdr + *off;
- else
- return NULL;
+ return NULL;
}
/*
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 560c4ababfe1..f31da2647d04 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -266,7 +266,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)",
credits->value, new_val);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
spin_lock(&server->req_lock);
@@ -1308,7 +1308,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
/* Use a fudge factor of 256 bytes in case we collide
* with a different set_EAs command.
*/
- if(CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE -
+ if (CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE -
MAX_SMB2_CLOSE_RESPONSE_SIZE - 256 <
used_len + ea_name_len + ea_value_len + 1) {
rc = -ENOSPC;
@@ -2926,6 +2926,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
usleep_range(512, 2048);
} while (++retry_count < 5);
+ if (!rc && !dfs_rsp)
+ rc = -EIO;
if (rc) {
if (!is_retryable_error(rc) && rc != -ENOENT && rc != -EOPNOTSUPP)
cifs_tcon_dbg(VFS, "%s: ioctl error: rc=%d\n", __func__, rc);
@@ -3261,7 +3263,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon,
struct kvec close_iov[1];
struct smb2_ioctl_rsp *ioctl_rsp;
struct reparse_data_buffer *reparse_buf;
- u32 plen;
+ u32 off, count, len;
cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
@@ -3341,16 +3343,22 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon,
*/
if (rc == 0) {
/* See MS-FSCC 2.3.23 */
+ off = le32_to_cpu(ioctl_rsp->OutputOffset);
+ count = le32_to_cpu(ioctl_rsp->OutputCount);
+ if (check_add_overflow(off, count, &len) ||
+ len > rsp_iov[1].iov_len) {
+ cifs_tcon_dbg(VFS, "%s: invalid ioctl: off=%d count=%d\n",
+ __func__, off, count);
+ rc = -EIO;
+ goto query_rp_exit;
+ }
- reparse_buf = (struct reparse_data_buffer *)
- ((char *)ioctl_rsp +
- le32_to_cpu(ioctl_rsp->OutputOffset));
- plen = le32_to_cpu(ioctl_rsp->OutputCount);
-
- if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) >
- rsp_iov[1].iov_len) {
- cifs_tcon_dbg(FYI, "srv returned invalid ioctl len: %d\n",
- plen);
+ reparse_buf = (void *)((u8 *)ioctl_rsp + off);
+ len = sizeof(*reparse_buf);
+ if (count < len ||
+ count < le16_to_cpu(reparse_buf->ReparseDataLength) + len) {
+ cifs_tcon_dbg(VFS, "%s: invalid ioctl: off=%d count=%d\n",
+ __func__, off, count);
rc = -EIO;
goto query_rp_exit;
}
@@ -4822,7 +4830,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
if (shdr->Command != SMB2_READ) {
cifs_server_dbg(VFS, "only big read responses are supported\n");
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
if (server->ops->is_session_expired &&
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index f51fea2e808d..6714e9db0ee8 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -364,10 +364,15 @@ static int __smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
void **request_buf, unsigned int *total_len)
{
/* BB eventually switch this to SMB2 specific small buf size */
- if (smb2_command == SMB2_SET_INFO)
+ switch (smb2_command) {
+ case SMB2_SET_INFO:
+ case SMB2_QUERY_INFO:
*request_buf = cifs_buf_get();
- else
+ break;
+ default:
*request_buf = cifs_small_buf_get();
+ break;
+ }
if (*request_buf == NULL) {
/* BB should we add a retry in here if not a writepage? */
return -ENOMEM;
@@ -3415,8 +3420,13 @@ SMB2_query_info_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server,
struct smb2_query_info_req *req;
struct kvec *iov = rqst->rq_iov;
unsigned int total_len;
+ size_t len;
int rc;
+ if (unlikely(check_add_overflow(input_len, sizeof(*req), &len) ||
+ len > CIFSMaxBufSize))
+ return -EINVAL;
+
rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, server,
(void **) &req, &total_len);
if (rc)
@@ -3438,7 +3448,7 @@ SMB2_query_info_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server,
iov[0].iov_base = (char *)req;
/* 1 for Buffer */
- iov[0].iov_len = total_len - 1 + input_len;
+ iov[0].iov_len = len;
return 0;
}
@@ -3446,7 +3456,7 @@ void
SMB2_query_info_free(struct smb_rqst *rqst)
{
if (rqst && rqst->rq_iov)
- cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+ cifs_buf_release(rqst->rq_iov[0].iov_base); /* request */
}
static int
@@ -5176,6 +5186,11 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon,
return 0;
}
+static inline void free_qfs_info_req(struct kvec *iov)
+{
+ cifs_buf_release(iov->iov_base);
+}
+
int
SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata)
@@ -5207,7 +5222,7 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(iov.iov_base);
+ free_qfs_info_req(&iov);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
goto posix_qfsinf_exit;
@@ -5258,7 +5273,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(iov.iov_base);
+ free_qfs_info_req(&iov);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
goto qfsinf_exit;
@@ -5325,7 +5340,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(iov.iov_base);
+ free_qfs_info_req(&iov);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
goto qfsattr_exit;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f32c99c9ba13..301c155c5267 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -779,7 +779,7 @@ struct smb2_tree_disconnect_rsp {
#define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */
#define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ"
#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC"
-#define SMB2_CREATE_ALLOCATION_SIZE "AISi"
+#define SMB2_CREATE_ALLOCATION_SIZE "AlSi"
#define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc"
#define SMB2_CREATE_TIMEWARP_REQUEST "TWrp"
#define SMB2_CREATE_QUERY_ON_DISK_ID "QFid"
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 390cc5e8c746..0f2e0ce84a03 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -430,6 +430,8 @@ generate_smb3signingkey(struct cifs_ses *ses,
ptriplet->encryption.context,
ses->smb3encryptionkey,
SMB3_ENC_DEC_KEY_SIZE);
+ if (rc)
+ return rc;
rc = generate_key(ses, ptriplet->decryption.label,
ptriplet->decryption.context,
ses->smb3decryptionkey,
@@ -438,9 +440,6 @@ generate_smb3signingkey(struct cifs_ses *ses,
return rc;
}
- if (rc)
- return rc;
-
#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS
cifs_dbg(VFS, "%s: dumping generated AES session keys\n", __func__);
/*
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 9d486fbbfbbd..6f719b9cf9e9 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -150,10 +150,13 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
goto out;
- if (pTcon->ses->server->ops->set_EA)
+ if (pTcon->ses->server->ops->set_EA) {
rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
full_path, name, value, (__u16)size,
cifs_sb->local_nls, cifs_sb);
+ if (rc == 0)
+ inode_set_ctime_current(inode);
+ }
break;
case XATTR_CIFS_ACL:
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 38930d9b0bb7..df5c2162e729 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -84,6 +84,14 @@ int debugfs_file_get(struct dentry *dentry)
struct debugfs_fsdata *fsd;
void *d_fsd;
+ /*
+ * This could only happen if some debugfs user erroneously calls
+ * debugfs_file_get() on a dentry that isn't even a file, let
+ * them know about it.
+ */
+ if (WARN_ON(!d_is_reg(dentry)))
+ return -EINVAL;
+
d_fsd = READ_ONCE(dentry->d_fsdata);
if (!((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) {
fsd = d_fsd;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 26f9cd328291..5290a721a703 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -214,17 +214,19 @@ static const struct super_operations debugfs_super_operations = {
static void debugfs_release_dentry(struct dentry *dentry)
{
- void *fsd = dentry->d_fsdata;
+ struct debugfs_fsdata *fsd = dentry->d_fsdata;
- if (!((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))
- kfree(dentry->d_fsdata);
+ if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
+ return;
+
+ kfree(fsd);
}
static struct vfsmount *debugfs_automount(struct path *path)
{
- debugfs_automount_t f;
- f = (debugfs_automount_t)path->dentry->d_fsdata;
- return f(path->dentry, d_inode(path->dentry)->i_private);
+ struct debugfs_fsdata *fsd = path->dentry->d_fsdata;
+
+ return fsd->automount(path->dentry, d_inode(path->dentry)->i_private);
}
static const struct dentry_operations debugfs_dops = {
@@ -602,13 +604,23 @@ struct dentry *debugfs_create_automount(const char *name,
void *data)
{
struct dentry *dentry = start_creating(name, parent);
+ struct debugfs_fsdata *fsd;
struct inode *inode;
if (IS_ERR(dentry))
return dentry;
+ fsd = kzalloc(sizeof(*fsd), GFP_KERNEL);
+ if (!fsd) {
+ failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ fsd->automount = f;
+
if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
failed_creating(dentry);
+ kfree(fsd);
return ERR_PTR(-EPERM);
}
@@ -616,13 +628,14 @@ struct dentry *debugfs_create_automount(const char *name,
if (unlikely(!inode)) {
pr_err("out of free dentries, can not create automount '%s'\n",
name);
+ kfree(fsd);
return failed_creating(dentry);
}
make_empty_dir_inode(inode);
inode->i_flags |= S_AUTOMOUNT;
inode->i_private = data;
- dentry->d_fsdata = (void *)f;
+ dentry->d_fsdata = fsd;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
d_instantiate(dentry, inode);
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index 92af8ae31313..f7c489b5a368 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -17,8 +17,14 @@ extern const struct file_operations debugfs_full_proxy_file_operations;
struct debugfs_fsdata {
const struct file_operations *real_fops;
- refcount_t active_users;
- struct completion active_users_drained;
+ union {
+ /* automount_fn is used when real_fops is NULL */
+ debugfs_automount_t automount;
+ struct {
+ refcount_t active_users;
+ struct completion active_users_drained;
+ };
+ };
};
/*
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 28735e8c5e20..5f2e2fa2ba09 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -466,7 +466,8 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
}
} else {
list_for_each_entry(iter, &recv_list, list) {
- if (!iter->info.wait) {
+ if (!iter->info.wait &&
+ iter->info.fsid == info.fsid) {
op = iter;
break;
}
@@ -478,8 +479,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
if (info.wait)
WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
else
- WARN_ON(op->info.fsid != info.fsid ||
- op->info.number != info.number ||
+ WARN_ON(op->info.number != info.number ||
op->info.owner != info.owner ||
op->info.optype != info.optype);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 15880a68faad..3626816b174a 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -13,6 +13,7 @@
#include <linux/ucs2_string.h>
#include <linux/slab.h>
#include <linux/magic.h>
+#include <linux/printk.h>
#include "internal.h"
@@ -231,8 +232,19 @@ static int efivarfs_get_tree(struct fs_context *fc)
return get_tree_single(fc, efivarfs_fill_super);
}
+static int efivarfs_reconfigure(struct fs_context *fc)
+{
+ if (!efivar_supports_writes() && !(fc->sb_flags & SB_RDONLY)) {
+ pr_err("Firmware does not support SetVariableRT. Can not remount with rw\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static const struct fs_context_operations efivarfs_context_ops = {
.get_tree = efivarfs_get_tree,
+ .reconfigure = efivarfs_reconfigure,
};
static int efivarfs_init_fs_context(struct fs_context *fc)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index c247b1bf57cc..f6536b224586 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -716,6 +716,8 @@ hitted:
cur = end - min_t(erofs_off_t, offset + end - map->m_la, end);
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
zero_user_segment(page, cur, end);
+ ++spiltted;
+ tight = false;
goto next_part;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 249ca6c0b784..4a60ea932e3d 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -189,7 +189,7 @@ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
{
lockdep_assert_held(&ctx->wqh.lock);
- *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+ *cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count;
ctx->count -= *cnt;
}
EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index b22d6c984f8c..cfa46d8cf5b3 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -330,14 +330,20 @@ static int exfat_find_empty_entry(struct inode *inode,
if (exfat_check_max_dentries(inode))
return -ENOSPC;
- /* we trust p_dir->size regardless of FAT type */
- if (exfat_find_last_cluster(sb, p_dir, &last_clu))
- return -EIO;
-
/*
* Allocate new cluster to this directory
*/
- exfat_chain_set(&clu, last_clu + 1, 0, p_dir->flags);
+ if (ei->start_clu != EXFAT_EOF_CLUSTER) {
+ /* we trust p_dir->size regardless of FAT type */
+ if (exfat_find_last_cluster(sb, p_dir, &last_clu))
+ return -EIO;
+
+ exfat_chain_set(&clu, last_clu + 1, 0, p_dir->flags);
+ } else {
+ /* This directory is empty */
+ exfat_chain_set(&clu, EXFAT_EOF_CLUSTER, 0,
+ ALLOC_NO_FAT_CHAIN);
+ }
/* allocate a cluster */
ret = exfat_alloc_cluster(inode, 1, &clu, IS_DIRSYNC(inode));
@@ -347,6 +353,11 @@ static int exfat_find_empty_entry(struct inode *inode,
if (exfat_zeroed_cluster(inode, clu.dir))
return -EIO;
+ if (ei->start_clu == EXFAT_EOF_CLUSTER) {
+ ei->start_clu = clu.dir;
+ p_dir->dir = clu.dir;
+ }
+
/* append to the FAT chain */
if (clu.flags != p_dir->flags) {
/* no-fat-chain bit is disabled,
@@ -644,7 +655,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname,
info->type = exfat_get_entry_type(ep);
info->attr = le16_to_cpu(ep->dentry.file.attr);
info->size = le64_to_cpu(ep2->dentry.stream.valid_size);
- if ((info->type == TYPE_FILE) && (info->size == 0)) {
+ if (info->size == 0) {
info->flags = ALLOC_NO_FAT_CHAIN;
info->start_clu = EXFAT_EOF_CLUSTER;
} else {
@@ -891,6 +902,9 @@ static int exfat_check_dir_empty(struct super_block *sb,
dentries_per_clu = sbi->dentries_per_clu;
+ if (p_dir->dir == EXFAT_EOF_CLUSTER)
+ return 0;
+
exfat_chain_dup(&clu, p_dir);
while (clu.dir != EXFAT_EOF_CLUSTER) {
@@ -1274,7 +1288,8 @@ static int __exfat_rename(struct inode *old_parent_inode,
}
/* Free the clusters if new_inode is a dir(as if exfat_rmdir) */
- if (new_entry_type == TYPE_DIR) {
+ if (new_entry_type == TYPE_DIR &&
+ new_ei->start_clu != EXFAT_EOF_CLUSTER) {
/* new_ei, new_clu_to_free */
struct exfat_chain new_clu_to_free;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 841fa6d9d744..f1dc11dab0d8 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -694,10 +694,10 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
/* We need to allocate a new block */
ext2_fsblk_t goal = ext2_group_first_block_no(sb,
EXT2_I(inode)->i_block_group);
- int block = ext2_new_block(inode, goal, &error);
+ ext2_fsblk_t block = ext2_new_block(inode, goal, &error);
if (error)
goto cleanup;
- ea_idebug(inode, "creating block %d", block);
+ ea_idebug(inode, "creating block %lu", block);
new_bh = sb_getblk(sb, block);
if (unlikely(!new_bh)) {
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 3219669732bf..0db6ae0ca936 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -68,6 +68,11 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
static inline int
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
{
+ /* usually, the umask is applied by posix_acl_create(), but if
+ ext4 ACL support is disabled at compile time, we need to do
+ it here, because posix_acl_create() will never be called */
+ inode->i_mode &= ~current_umask();
+
return 0;
}
#endif /* CONFIG_EXT4_FS_POSIX_ACL */
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 7649376fd93d..c23ac149601e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -909,11 +909,11 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
}
/*
- * This function returns the number of file system metadata clusters at
+ * This function returns the number of file system metadata blocks at
* the beginning of a block group, including the reserved gdt blocks.
*/
-static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
- ext4_group_t block_group)
+unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
+ ext4_group_t block_group)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned num;
@@ -931,8 +931,15 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
} else { /* For META_BG_BLOCK_GROUPS */
num += ext4_bg_num_gdb(sb, block_group);
}
- return EXT4_NUM_B2C(sbi, num);
+ return num;
}
+
+static unsigned int ext4_num_base_meta_clusters(struct super_block *sb,
+ ext4_group_t block_group)
+{
+ return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group));
+}
+
/**
* ext4_inode_to_goal_block - return a hint for block allocation
* @inode: inode for block allocation
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 5504f72bbbbe..6fe3c941b565 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -215,7 +215,6 @@ int ext4_setup_system_zone(struct super_block *sb)
struct ext4_system_blocks *system_blks;
struct ext4_group_desc *gdp;
ext4_group_t i;
- int flex_size = ext4_flex_bg_size(sbi);
int ret;
system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
@@ -223,12 +222,13 @@ int ext4_setup_system_zone(struct super_block *sb)
return -ENOMEM;
for (i=0; i < ngroups; i++) {
+ unsigned int meta_blks = ext4_num_base_meta_blocks(sb, i);
+
cond_resched();
- if (ext4_bg_has_super(sb, i) &&
- ((i < 5) || ((i % flex_size) == 0))) {
+ if (meta_blks != 0) {
ret = add_system_zone(system_blks,
ext4_group_first_block_no(sb, i),
- ext4_bg_num_gdb(sb, i) + 1, 0);
+ meta_blks, 0);
if (ret)
goto err;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5e8d3d53777e..e1a5ec7362ad 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1656,7 +1656,7 @@ struct ext4_sb_info {
struct task_struct *s_mmp_tsk;
/* record the last minlen when FITRIM is called. */
- atomic_t s_last_trim_minblks;
+ unsigned long s_last_trim_minblks;
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *s_chksum_driver;
@@ -3120,6 +3120,8 @@ extern const char *ext4_decode_error(struct super_block *sb, int errno,
extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
ext4_group_t block_group,
unsigned int flags);
+extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
+ ext4_group_t block_group);
extern __printf(7, 8)
void __ext4_error(struct super_block *, const char *, unsigned int, bool,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 13497bd4e14b..592be39e3d51 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1004,6 +1004,11 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
ix = curp->p_idx;
}
+ if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
+ EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
+ return -EFSCORRUPTED;
+ }
+
len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
BUG_ON(len < 0);
if (len > 0) {
@@ -1013,11 +1018,6 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
}
- if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
- EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
- return -EFSCORRUPTED;
- }
-
ix->ei_block = cpu_to_le32(logical);
ext4_idx_store_pblock(ix, ptr);
le16_add_cpu(&curp->p_hdr->eh_entries, 1);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index fee54ab42bba..cccbdfd49a86 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -144,14 +144,17 @@
static struct kmem_cache *ext4_es_cachep;
static struct kmem_cache *ext4_pending_cachep;
-static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
+static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
+ struct extent_status *prealloc);
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t end, int *reserved);
+ ext4_lblk_t end, int *reserved,
+ struct extent_status *prealloc);
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
struct ext4_inode_info *locked_ei);
-static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t len);
+static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len,
+ struct pending_reservation **prealloc);
int __init ext4_init_es(void)
{
@@ -448,22 +451,49 @@ static void ext4_es_list_del(struct inode *inode)
spin_unlock(&sbi->s_es_lock);
}
-static struct extent_status *
-ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
- ext4_fsblk_t pblk)
+static inline struct pending_reservation *__alloc_pending(bool nofail)
+{
+ if (!nofail)
+ return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
+
+ return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL);
+}
+
+static inline void __free_pending(struct pending_reservation *pr)
+{
+ kmem_cache_free(ext4_pending_cachep, pr);
+}
+
+/*
+ * Returns true if we cannot fail to allocate memory for this extent_status
+ * entry and cannot reclaim it until its status changes.
+ */
+static inline bool ext4_es_must_keep(struct extent_status *es)
+{
+ /* fiemap, bigalloc, and seek_data/hole need to use it. */
+ if (ext4_es_is_delayed(es))
+ return true;
+
+ return false;
+}
+
+static inline struct extent_status *__es_alloc_extent(bool nofail)
+{
+ if (!nofail)
+ return kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
+
+ return kmem_cache_zalloc(ext4_es_cachep, GFP_KERNEL | __GFP_NOFAIL);
+}
+
+static void ext4_es_init_extent(struct inode *inode, struct extent_status *es,
+ ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)
{
- struct extent_status *es;
- es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
- if (es == NULL)
- return NULL;
es->es_lblk = lblk;
es->es_len = len;
es->es_pblk = pblk;
- /*
- * We don't count delayed extent because we never try to reclaim them
- */
- if (!ext4_es_is_delayed(es)) {
+ /* We never try to reclaim a must kept extent, so we don't count it. */
+ if (!ext4_es_must_keep(es)) {
if (!EXT4_I(inode)->i_es_shk_nr++)
ext4_es_list_add(inode);
percpu_counter_inc(&EXT4_SB(inode->i_sb)->
@@ -472,8 +502,11 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
EXT4_I(inode)->i_es_all_nr++;
percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
+}
- return es;
+static inline void __es_free_extent(struct extent_status *es)
+{
+ kmem_cache_free(ext4_es_cachep, es);
}
static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
@@ -481,8 +514,8 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
EXT4_I(inode)->i_es_all_nr--;
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
- /* Decrease the shrink counter when this es is not delayed */
- if (!ext4_es_is_delayed(es)) {
+ /* Decrease the shrink counter when we can reclaim the extent. */
+ if (!ext4_es_must_keep(es)) {
BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
if (!--EXT4_I(inode)->i_es_shk_nr)
ext4_es_list_del(inode);
@@ -490,7 +523,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
s_es_stats.es_stats_shk_cnt);
}
- kmem_cache_free(ext4_es_cachep, es);
+ __es_free_extent(es);
}
/*
@@ -752,7 +785,8 @@ static inline void ext4_es_insert_extent_check(struct inode *inode,
}
#endif
-static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
+static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
+ struct extent_status *prealloc)
{
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
struct rb_node **p = &tree->root.rb_node;
@@ -792,10 +826,15 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
}
}
- es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
- newes->es_pblk);
+ if (prealloc)
+ es = prealloc;
+ else
+ es = __es_alloc_extent(false);
if (!es)
return -ENOMEM;
+ ext4_es_init_extent(inode, es, newes->es_lblk, newes->es_len,
+ newes->es_pblk);
+
rb_link_node(&es->rb_node, parent, p);
rb_insert_color(&es->rb_node, &tree->root);
@@ -816,8 +855,12 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
{
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
- int err = 0;
+ int err1 = 0, err2 = 0, err3 = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct extent_status *es1 = NULL;
+ struct extent_status *es2 = NULL;
+ struct pending_reservation *pr = NULL;
+ bool revise_pending = false;
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
return 0;
@@ -845,29 +888,57 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_es_insert_extent_check(inode, &newes);
+ revise_pending = sbi->s_cluster_ratio > 1 &&
+ test_opt(inode->i_sb, DELALLOC) &&
+ (status & (EXTENT_STATUS_WRITTEN |
+ EXTENT_STATUS_UNWRITTEN));
+retry:
+ if (err1 && !es1)
+ es1 = __es_alloc_extent(true);
+ if ((err1 || err2) && !es2)
+ es2 = __es_alloc_extent(true);
+ if ((err1 || err2 || err3) && revise_pending && !pr)
+ pr = __alloc_pending(true);
write_lock(&EXT4_I(inode)->i_es_lock);
- err = __es_remove_extent(inode, lblk, end, NULL);
- if (err != 0)
+
+ err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
+ if (err1 != 0)
goto error;
-retry:
- err = __es_insert_extent(inode, &newes);
- if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
- 128, EXT4_I(inode)))
- goto retry;
- if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
- err = 0;
+ /* Free preallocated extent if it didn't get used. */
+ if (es1) {
+ if (!es1->es_len)
+ __es_free_extent(es1);
+ es1 = NULL;
+ }
- if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
- (status & EXTENT_STATUS_WRITTEN ||
- status & EXTENT_STATUS_UNWRITTEN))
- __revise_pending(inode, lblk, len);
+ err2 = __es_insert_extent(inode, &newes, es2);
+ if (err2 == -ENOMEM && !ext4_es_must_keep(&newes))
+ err2 = 0;
+ if (err2 != 0)
+ goto error;
+ /* Free preallocated extent if it didn't get used. */
+ if (es2) {
+ if (!es2->es_len)
+ __es_free_extent(es2);
+ es2 = NULL;
+ }
+ if (revise_pending) {
+ err3 = __revise_pending(inode, lblk, len, &pr);
+ if (err3 != 0)
+ goto error;
+ if (pr) {
+ __free_pending(pr);
+ pr = NULL;
+ }
+ }
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
+ if (err1 || err2 || err3)
+ goto retry;
ext4_es_print_tree(inode);
-
- return err;
+ return 0;
}
/*
@@ -900,7 +971,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
if (!es || es->es_lblk > end)
- __es_insert_extent(inode, &newes);
+ __es_insert_extent(inode, &newes, NULL);
write_unlock(&EXT4_I(inode)->i_es_lock);
}
@@ -1271,7 +1342,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
rc->ndelonly--;
node = rb_next(&pr->rb_node);
rb_erase(&pr->rb_node, &tree->root);
- kmem_cache_free(ext4_pending_cachep, pr);
+ __free_pending(pr);
if (!node)
break;
pr = rb_entry(node, struct pending_reservation,
@@ -1290,6 +1361,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
* @lblk - first block in range
* @end - last block in range
* @reserved - number of cluster reservations released
+ * @prealloc - pre-allocated es to avoid memory allocation failures
*
* If @reserved is not NULL and delayed allocation is enabled, counts
* block/cluster reservations freed by removing range and if bigalloc
@@ -1297,7 +1369,8 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
* error code on failure.
*/
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t end, int *reserved)
+ ext4_lblk_t end, int *reserved,
+ struct extent_status *prealloc)
{
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
struct rb_node *node;
@@ -1305,14 +1378,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status orig_es;
ext4_lblk_t len1, len2;
ext4_fsblk_t block;
- int err;
+ int err = 0;
bool count_reserved = true;
struct rsvd_count rc;
if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
count_reserved = false;
-retry:
- err = 0;
es = __es_tree_search(&tree->root, lblk);
if (!es)
@@ -1346,14 +1417,13 @@ retry:
orig_es.es_len - len2;
ext4_es_store_pblock_status(&newes, block,
ext4_es_status(&orig_es));
- err = __es_insert_extent(inode, &newes);
+ err = __es_insert_extent(inode, &newes, prealloc);
if (err) {
+ if (!ext4_es_must_keep(&newes))
+ return 0;
+
es->es_lblk = orig_es.es_lblk;
es->es_len = orig_es.es_len;
- if ((err == -ENOMEM) &&
- __es_shrink(EXT4_SB(inode->i_sb),
- 128, EXT4_I(inode)))
- goto retry;
goto out;
}
} else {
@@ -1366,8 +1436,8 @@ retry:
}
}
if (count_reserved)
- count_rsvd(inode, lblk, orig_es.es_len - len1 - len2,
- &orig_es, &rc);
+ count_rsvd(inode, orig_es.es_lblk + len1,
+ orig_es.es_len - len1 - len2, &orig_es, &rc);
goto out_get_reserved;
}
@@ -1433,6 +1503,7 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t end;
int err = 0;
int reserved = 0;
+ struct extent_status *es = NULL;
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
return 0;
@@ -1447,17 +1518,29 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
end = lblk + len - 1;
BUG_ON(end < lblk);
+retry:
+ if (err && !es)
+ es = __es_alloc_extent(true);
/*
* ext4_clear_inode() depends on us taking i_es_lock unconditionally
* so that we are sure __es_shrink() is done with the inode before it
* is reclaimed.
*/
write_lock(&EXT4_I(inode)->i_es_lock);
- err = __es_remove_extent(inode, lblk, end, &reserved);
+ err = __es_remove_extent(inode, lblk, end, &reserved, es);
+ /* Free preallocated extent if it didn't get used. */
+ if (es) {
+ if (!es->es_len)
+ __es_free_extent(es);
+ es = NULL;
+ }
write_unlock(&EXT4_I(inode)->i_es_lock);
+ if (err)
+ goto retry;
+
ext4_es_print_tree(inode);
ext4_da_release_space(inode, reserved);
- return err;
+ return 0;
}
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
@@ -1704,11 +1787,8 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
(*nr_to_scan)--;
node = rb_next(&es->rb_node);
- /*
- * We can't reclaim delayed extent from status tree because
- * fiemap, bigallic, and seek_data/hole need to use it.
- */
- if (ext4_es_is_delayed(es))
+
+ if (ext4_es_must_keep(es))
goto next;
if (ext4_es_is_referenced(es)) {
ext4_es_clear_referenced(es);
@@ -1772,7 +1852,7 @@ void ext4_clear_inode_es(struct inode *inode)
while (node) {
es = rb_entry(node, struct extent_status, rb_node);
node = rb_next(node);
- if (!ext4_es_is_delayed(es)) {
+ if (!ext4_es_must_keep(es)) {
rb_erase(&es->rb_node, &tree->root);
ext4_es_free_extent(inode, es);
}
@@ -1859,11 +1939,13 @@ static struct pending_reservation *__get_pending(struct inode *inode,
*
* @inode - file containing the cluster
* @lblk - logical block in the cluster to be added
+ * @prealloc - preallocated pending entry
*
* Returns 0 on successful insertion and -ENOMEM on failure. If the
* pending reservation is already in the set, returns successfully.
*/
-static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
+static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
+ struct pending_reservation **prealloc)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
@@ -1889,10 +1971,15 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
}
}
- pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
- if (pr == NULL) {
- ret = -ENOMEM;
- goto out;
+ if (likely(*prealloc == NULL)) {
+ pr = __alloc_pending(false);
+ if (!pr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else {
+ pr = *prealloc;
+ *prealloc = NULL;
}
pr->lclu = lclu;
@@ -1922,7 +2009,7 @@ static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
if (pr != NULL) {
tree = &EXT4_I(inode)->i_pending_tree;
rb_erase(&pr->rb_node, &tree->root);
- kmem_cache_free(ext4_pending_cachep, pr);
+ __free_pending(pr);
}
}
@@ -1983,7 +2070,10 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
bool allocated)
{
struct extent_status newes;
- int err = 0;
+ int err1 = 0, err2 = 0, err3 = 0;
+ struct extent_status *es1 = NULL;
+ struct extent_status *es2 = NULL;
+ struct pending_reservation *pr = NULL;
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
return 0;
@@ -1998,29 +2088,52 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
ext4_es_insert_extent_check(inode, &newes);
+retry:
+ if (err1 && !es1)
+ es1 = __es_alloc_extent(true);
+ if ((err1 || err2) && !es2)
+ es2 = __es_alloc_extent(true);
+ if ((err1 || err2 || err3) && allocated && !pr)
+ pr = __alloc_pending(true);
write_lock(&EXT4_I(inode)->i_es_lock);
- err = __es_remove_extent(inode, lblk, lblk, NULL);
- if (err != 0)
- goto error;
-retry:
- err = __es_insert_extent(inode, &newes);
- if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
- 128, EXT4_I(inode)))
- goto retry;
- if (err != 0)
+ err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
+ if (err1 != 0)
goto error;
+ /* Free preallocated extent if it didn't get used. */
+ if (es1) {
+ if (!es1->es_len)
+ __es_free_extent(es1);
+ es1 = NULL;
+ }
- if (allocated)
- __insert_pending(inode, lblk);
+ err2 = __es_insert_extent(inode, &newes, es2);
+ if (err2 != 0)
+ goto error;
+ /* Free preallocated extent if it didn't get used. */
+ if (es2) {
+ if (!es2->es_len)
+ __es_free_extent(es2);
+ es2 = NULL;
+ }
+ if (allocated) {
+ err3 = __insert_pending(inode, lblk, &pr);
+ if (err3 != 0)
+ goto error;
+ if (pr) {
+ __free_pending(pr);
+ pr = NULL;
+ }
+ }
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
+ if (err1 || err2 || err3)
+ goto retry;
ext4_es_print_tree(inode);
ext4_print_pending_tree(inode);
-
- return err;
+ return 0;
}
/*
@@ -2121,21 +2234,24 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
* @inode - file containing the range
* @lblk - logical block defining the start of range
* @len - length of range in blocks
+ * @prealloc - preallocated pending entry
*
* Used after a newly allocated extent is added to the extents status tree.
* Requires that the extents in the range have either written or unwritten
* status. Must be called while holding i_es_lock.
*/
-static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t len)
+static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len,
+ struct pending_reservation **prealloc)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t end = lblk + len - 1;
ext4_lblk_t first, last;
bool f_del = false, l_del = false;
+ int ret = 0;
if (len == 0)
- return;
+ return 0;
/*
* Two cases - block range within single cluster and block range
@@ -2156,7 +2272,9 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
first, lblk - 1);
if (f_del) {
- __insert_pending(inode, first);
+ ret = __insert_pending(inode, first, prealloc);
+ if (ret < 0)
+ goto out;
} else {
last = EXT4_LBLK_CMASK(sbi, end) +
sbi->s_cluster_ratio - 1;
@@ -2164,9 +2282,11 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
l_del = __es_scan_range(inode,
&ext4_es_is_delonly,
end + 1, last);
- if (l_del)
- __insert_pending(inode, last);
- else
+ if (l_del) {
+ ret = __insert_pending(inode, last, prealloc);
+ if (ret < 0)
+ goto out;
+ } else
__remove_pending(inode, last);
}
} else {
@@ -2174,18 +2294,24 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
if (first != lblk)
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
first, lblk - 1);
- if (f_del)
- __insert_pending(inode, first);
- else
+ if (f_del) {
+ ret = __insert_pending(inode, first, prealloc);
+ if (ret < 0)
+ goto out;
+ } else
__remove_pending(inode, first);
last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
if (last != end)
l_del = __es_scan_range(inode, &ext4_es_is_delonly,
end + 1, last);
- if (l_del)
- __insert_pending(inode, last);
- else
+ if (l_del) {
+ ret = __insert_pending(inode, last, prealloc);
+ if (ret < 0)
+ goto out;
+ } else
__remove_pending(inode, last);
}
+out:
+ return ret;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 47c28e3582fd..e44c28ceb9cd 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/nospec.h>
#include <linux/backing-dev.h>
+#include <linux/freezer.h>
#include <trace/events/ext4.h>
/*
@@ -1011,8 +1012,9 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
* Return next linear group for allocation. If linear traversal should not be
* performed, this function just returns the same group
*/
-static int
-next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
+static ext4_group_t
+next_linear_group(struct ext4_allocation_context *ac, ext4_group_t group,
+ ext4_group_t ngroups)
{
if (!should_optimize_scan(ac))
goto inc_and_return;
@@ -2445,7 +2447,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
BUG_ON(cr < 0 || cr >= 4);
- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp) || !grp))
+ if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
return false;
free = grp->bb_free;
@@ -4047,8 +4049,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_super_block *es = sbi->s_es;
int bsbits, max;
- ext4_lblk_t end;
- loff_t size, start_off;
+ loff_t size, start_off, end;
loff_t orig_size __maybe_unused;
ext4_lblk_t start;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
@@ -4077,7 +4078,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
/* first, let's learn actual file size
* given current request is allocated */
- size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
+ size = extent_logical_end(sbi, &ac->ac_o_ex);
size = size << bsbits;
if (size < i_size_read(ac->ac_inode))
size = i_size_read(ac->ac_inode);
@@ -4136,6 +4137,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
start = max(start, rounddown(ac->ac_o_ex.fe_logical,
(ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb)));
+ /* avoid unnecessary preallocation that may trigger assertions */
+ if (start + size > EXT_MAX_BLOCKS)
+ size = EXT_MAX_BLOCKS - start;
+
/* don't cover already allocated blocks in selected range */
if (ar->pleft && start <= ar->lleft) {
size -= ar->lleft + 1 - start;
@@ -4156,7 +4161,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
/* check we don't cross already preallocated blocks */
rcu_read_lock();
list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
- ext4_lblk_t pa_end;
+ loff_t pa_end;
if (pa->pa_deleted)
continue;
@@ -4166,8 +4171,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
continue;
}
- pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
- pa->pa_len);
+ pa_end = pa_logical_end(EXT4_SB(ac->ac_sb), pa);
/* PA must not overlap original request */
BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
@@ -4196,12 +4200,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
/* XXX: extra loop to check we really don't overlap preallocations */
rcu_read_lock();
list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
- ext4_lblk_t pa_end;
+ loff_t pa_end;
spin_lock(&pa->pa_lock);
if (pa->pa_deleted == 0) {
- pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
- pa->pa_len);
+ pa_end = pa_logical_end(EXT4_SB(ac->ac_sb), pa);
BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
}
spin_unlock(&pa->pa_lock);
@@ -4417,8 +4420,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
/* all fields in this condition don't change,
* so we can skip locking for them */
if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
- ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
- EXT4_C2B(sbi, pa->pa_len)))
+ ac->ac_o_ex.fe_logical >= pa_logical_end(sbi, pa))
continue;
/* non-extent files can't have physical blocks past 2^32 */
@@ -4663,8 +4665,11 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
pa = ac->ac_pa;
if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
- int new_bex_start;
- int new_bex_end;
+ struct ext4_free_extent ex = {
+ .fe_logical = ac->ac_g_ex.fe_logical,
+ .fe_len = ac->ac_g_ex.fe_len,
+ };
+ loff_t orig_goal_end = extent_logical_end(sbi, &ex);
/* we can't allocate as much as normalizer wants.
* so, found space must get proper lstart
@@ -4683,29 +4688,23 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
* still cover original start
* 3. Else, keep the best ex at start of original request.
*/
- new_bex_end = ac->ac_g_ex.fe_logical +
- EXT4_C2B(sbi, ac->ac_g_ex.fe_len);
- new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- if (ac->ac_o_ex.fe_logical >= new_bex_start)
- goto adjust_bex;
+ ex.fe_len = ac->ac_b_ex.fe_len;
- new_bex_start = ac->ac_g_ex.fe_logical;
- new_bex_end =
- new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- if (ac->ac_o_ex.fe_logical < new_bex_end)
+ ex.fe_logical = orig_goal_end - EXT4_C2B(sbi, ex.fe_len);
+ if (ac->ac_o_ex.fe_logical >= ex.fe_logical)
goto adjust_bex;
- new_bex_start = ac->ac_o_ex.fe_logical;
- new_bex_end =
- new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
+ ex.fe_logical = ac->ac_g_ex.fe_logical;
+ if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex))
+ goto adjust_bex;
+ ex.fe_logical = ac->ac_o_ex.fe_logical;
adjust_bex:
- ac->ac_b_ex.fe_logical = new_bex_start;
+ ac->ac_b_ex.fe_logical = ex.fe_logical;
BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
- BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical +
- EXT4_C2B(sbi, ac->ac_g_ex.fe_len)));
+ BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end);
}
/* preallocation can change ac_b_ex, thus we store actually
@@ -5239,7 +5238,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
group_pa_eligible = sbi->s_mb_group_prealloc > 0;
inode_pa_eligible = true;
- size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
+ size = extent_logical_end(sbi, &ac->ac_o_ex);
isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
>> bsbits;
@@ -6432,6 +6431,21 @@ __acquires(bitlock)
return ret;
}
+static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
+ ext4_group_t grp)
+{
+ if (grp < ext4_get_groups_count(sb))
+ return EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+ return (ext4_blocks_count(EXT4_SB(sb)->s_es) -
+ ext4_group_first_block_no(sb, grp) - 1) >>
+ EXT4_CLUSTER_BITS(sb);
+}
+
+static bool ext4_trim_interrupted(void)
+{
+ return fatal_signal_pending(current) || freezing(current);
+}
+
static int ext4_try_to_trim_range(struct super_block *sb,
struct ext4_buddy *e4b, ext4_grpblk_t start,
ext4_grpblk_t max, ext4_grpblk_t minblocks)
@@ -6439,12 +6453,13 @@ __acquires(ext4_group_lock_ptr(sb, e4b->bd_group))
__releases(ext4_group_lock_ptr(sb, e4b->bd_group))
{
ext4_grpblk_t next, count, free_count;
+ bool set_trimmed = false;
void *bitmap;
- int ret = 0;
bitmap = e4b->bd_bitmap;
- start = (e4b->bd_info->bb_first_free > start) ?
- e4b->bd_info->bb_first_free : start;
+ if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group))
+ set_trimmed = true;
+ start = max(e4b->bd_info->bb_first_free, start);
count = 0;
free_count = 0;
@@ -6455,19 +6470,17 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
next = mb_find_next_bit(bitmap, max + 1, start);
if ((next - start) >= minblocks) {
- ret = ext4_trim_extent(sb, start, next - start, e4b);
+ int ret = ext4_trim_extent(sb, start, next - start, e4b);
+
if (ret && ret != -EOPNOTSUPP)
- break;
- ret = 0;
+ return count;
count += next - start;
}
free_count += next - start;
start = next + 1;
- if (fatal_signal_pending(current)) {
- count = -ERESTARTSYS;
- break;
- }
+ if (ext4_trim_interrupted())
+ return count;
if (need_resched()) {
ext4_unlock_group(sb, e4b->bd_group);
@@ -6479,6 +6492,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
break;
}
+ if (set_trimmed)
+ EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info);
+
return count;
}
@@ -6489,7 +6505,6 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
* @start: first group block to examine
* @max: last group block to examine
* @minblocks: minimum extent block count
- * @set_trimmed: set the trimmed flag if at least one block is trimmed
*
* ext4_trim_all_free walks through group's block bitmap searching for free
* extents. When the free extent is found, mark it as used in group buddy
@@ -6499,7 +6514,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
static ext4_grpblk_t
ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
ext4_grpblk_t start, ext4_grpblk_t max,
- ext4_grpblk_t minblocks, bool set_trimmed)
+ ext4_grpblk_t minblocks)
{
struct ext4_buddy e4b;
int ret;
@@ -6516,13 +6531,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
ext4_lock_group(sb, group);
if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
- minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) {
+ minblocks < EXT4_SB(sb)->s_last_trim_minblks)
ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
- if (ret >= 0 && set_trimmed)
- EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
- } else {
+ else
ret = 0;
- }
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
@@ -6555,7 +6567,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
ext4_fsblk_t first_data_blk =
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
- bool whole_group, eof = false;
int ret = 0;
start = range->start >> sb->s_blocksize_bits;
@@ -6574,10 +6585,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
goto out;
}
- if (end >= max_blks - 1) {
+ if (end >= max_blks - 1)
end = max_blks - 1;
- eof = true;
- }
if (end <= first_data_blk)
goto out;
if (start < first_data_blk)
@@ -6591,9 +6600,10 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
/* end now represents the last cluster to discard in this group */
end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
- whole_group = true;
for (group = first_group; group <= last_group; group++) {
+ if (ext4_trim_interrupted())
+ break;
grp = ext4_get_group_info(sb, group);
if (!grp)
continue;
@@ -6610,13 +6620,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
* change it for the last group, note that last_cluster is
* already computed earlier by ext4_get_group_no_and_offset()
*/
- if (group == last_group) {
+ if (group == last_group)
end = last_cluster;
- whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1;
- }
if (grp->bb_free >= minlen) {
cnt = ext4_trim_all_free(sb, group, first_cluster,
- end, minlen, whole_group);
+ end, minlen);
if (cnt < 0) {
ret = cnt;
break;
@@ -6632,7 +6640,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
}
if (!ret)
- atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
+ EXT4_SB(sb)->s_last_trim_minblks = minlen;
out:
range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
@@ -6661,8 +6669,7 @@ ext4_mballoc_query_range(
ext4_lock_group(sb, group);
- start = (e4b.bd_info->bb_first_free > start) ?
- e4b.bd_info->bb_first_free : start;
+ start = max(e4b.bd_info->bb_first_free, start);
if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 39da92ceabf8..bf048cbf3986 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -219,6 +219,20 @@ static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
(fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
}
+static inline loff_t extent_logical_end(struct ext4_sb_info *sbi,
+ struct ext4_free_extent *fex)
+{
+ /* Use loff_t to avoid end exceeding ext4_lblk_t max. */
+ return (loff_t)fex->fe_logical + EXT4_C2B(sbi, fex->fe_len);
+}
+
+static inline loff_t pa_logical_end(struct ext4_sb_info *sbi,
+ struct ext4_prealloc_space *pa)
+{
+ /* Use loff_t to avoid end exceeding ext4_lblk_t max. */
+ return (loff_t)pa->pa_lstart + EXT4_C2B(sbi, pa->pa_len);
+}
+
typedef int (*ext4_mballoc_query_range_fn)(
struct super_block *sb,
ext4_group_t agno,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2629e90f8dbb..14c977e1e4bb 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -343,17 +343,17 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
+ int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
#ifdef PARANOID
struct ext4_dir_entry *d, *top;
d = (struct ext4_dir_entry *)bh->b_data;
top = (struct ext4_dir_entry *)(bh->b_data +
- (EXT4_BLOCK_SIZE(inode->i_sb) -
- sizeof(struct ext4_dir_entry_tail)));
- while (d < top && d->rec_len)
+ (blocksize - sizeof(struct ext4_dir_entry_tail)));
+ while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize))
d = (struct ext4_dir_entry *)(((void *)d) +
- le16_to_cpu(d->rec_len));
+ ext4_rec_len_from_disk(d->rec_len, blocksize));
if (d != top)
return NULL;
@@ -364,7 +364,8 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
#endif
if (t->det_reserved_zero1 ||
- le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
+ (ext4_rec_len_from_disk(t->det_rec_len, blocksize) !=
+ sizeof(struct ext4_dir_entry_tail)) ||
t->det_reserved_zero2 ||
t->det_reserved_ft != EXT4_FT_DIR_CSUM)
return NULL;
@@ -445,13 +446,14 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
struct ext4_dir_entry *dp;
struct dx_root_info *root;
int count_offset;
+ int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
+ unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize);
- if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
+ if (rlen == blocksize)
count_offset = 8;
- else if (le16_to_cpu(dirent->rec_len) == 12) {
+ else if (rlen == 12) {
dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
- if (le16_to_cpu(dp->rec_len) !=
- EXT4_BLOCK_SIZE(inode->i_sb) - 12)
+ if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12)
return NULL;
root = (struct dx_root_info *)(((void *)dp + 12));
if (root->reserved_zero ||
@@ -1315,6 +1317,7 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh,
unsigned int buflen = bh->b_size;
char *base = bh->b_data;
struct dx_hash_info h = *hinfo;
+ int blocksize = EXT4_BLOCK_SIZE(dir->i_sb);
if (ext4_has_metadata_csum(dir->i_sb))
buflen -= sizeof(struct ext4_dir_entry_tail);
@@ -1335,11 +1338,12 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh,
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
- map_tail->size = le16_to_cpu(de->rec_len);
+ map_tail->size = ext4_rec_len_from_disk(de->rec_len,
+ blocksize);
count++;
cond_resched();
}
- de = ext4_next_entry(de, dir->i_sb->s_blocksize);
+ de = ext4_next_entry(de, blocksize);
}
return count;
}
@@ -2799,6 +2803,7 @@ static int ext4_add_nondir(handle_t *handle,
return err;
}
drop_nlink(inode);
+ ext4_mark_inode_dirty(handle, inode);
ext4_orphan_add(handle, inode);
unlock_new_inode(inode);
return err;
@@ -3455,6 +3460,7 @@ err_drop_inode:
if (handle)
ext4_journal_stop(handle);
clear_nlink(inode);
+ ext4_mark_inode_dirty(handle, inode);
unlock_new_inode(inode);
iput(inode);
out_free_encrypted_link:
@@ -4028,6 +4034,7 @@ end_rename:
ext4_resetent(handle, &old,
old.inode->i_ino, old_file_type);
drop_nlink(whiteout);
+ ext4_mark_inode_dirty(handle, whiteout);
ext4_orphan_add(handle, whiteout);
}
unlock_new_inode(whiteout);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 589ed99856f3..d0c41343a5c4 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -556,13 +556,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
goto handle_itb;
- if (meta_bg == 1) {
- ext4_group_t first_group;
- first_group = ext4_meta_bg_first_group(sb, group);
- if (first_group != group + 1 &&
- first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
- goto handle_itb;
- }
+ if (meta_bg == 1)
+ goto handle_itb;
block = start + ext4_bg_has_super(sb, group);
/* Copy all of the GDT blocks into the backup in this group */
@@ -1165,8 +1160,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
ext4_group_first_block_no(sb, group));
BUFFER_TRACE(bh, "get_write_access");
if ((err = ext4_journal_get_write_access(handle, sb, bh,
- EXT4_JTR_NONE)))
+ EXT4_JTR_NONE))) {
+ brelse(bh);
break;
+ }
lock_buffer(bh);
memcpy(bh->b_data, data, size);
if (rest)
@@ -1555,6 +1552,8 @@ exit_journal:
int gdb_num_end = ((group + flex_gd->count - 1) /
EXT4_DESC_PER_BLOCK(sb));
int meta_bg = ext4_has_feature_meta_bg(sb);
+ sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr -
+ ext4_group_first_block_no(sb, 0);
sector_t old_gdb = 0;
update_backups(sb, ext4_group_first_block_no(sb, 0),
@@ -1566,8 +1565,8 @@ exit_journal:
gdb_num);
if (old_gdb == gdb_bh->b_blocknr)
continue;
- update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
- gdb_bh->b_size, meta_bg);
+ update_backups(sb, gdb_bh->b_blocknr - padding_blocks,
+ gdb_bh->b_data, gdb_bh->b_size, meta_bg);
old_gdb = gdb_bh->b_blocknr;
}
}
@@ -1936,9 +1935,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
errout:
ret = ext4_journal_stop(handle);
- if (!err)
- err = ret;
- return ret;
+ return err ? err : ret;
invalid_resize_inode:
ext4_error(sb, "corrupted/inconsistent resize inode");
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 455fac164fda..3982b4a7618c 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1906,7 +1906,7 @@ void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi)
int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
{
dev_t dev = sbi->sb->s_bdev->bd_dev;
- char slab_name[32];
+ char slab_name[35];
sprintf(slab_name, "f2fs_page_array_entry-%u:%u", MAJOR(dev), MINOR(dev));
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3956852ad1de..73a7906a49b1 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2253,8 +2253,10 @@ skip_reading_dnode:
f2fs_wait_on_block_writeback(inode, blkaddr);
if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
- if (atomic_dec_and_test(&dic->remaining_pages))
+ if (atomic_dec_and_test(&dic->remaining_pages)) {
f2fs_decompress_cluster(dic);
+ break;
+ }
continue;
}
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index e1131af0396b..d220c4523982 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -41,7 +41,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
vm_fault_t ret;
ret = filemap_fault(vmf);
- if (!ret)
+ if (ret & VM_FAULT_LOCKED)
f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO,
F2FS_BLKSIZE);
@@ -2780,6 +2780,11 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
goto out;
}
+ if (f2fs_compressed_file(src) || f2fs_compressed_file(dst)) {
+ ret = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
ret = -EINVAL;
if (pos_in + len > src->i_size || pos_in + len < pos_in)
goto out_unlock;
@@ -3210,6 +3215,7 @@ int f2fs_precache_extents(struct inode *inode)
return -EOPNOTSUPP;
map.m_lblk = 0;
+ map.m_pblk = 0;
map.m_next_pgofs = NULL;
map.m_next_extent = &m_next_extent;
map.m_seg_type = NO_CHECK_TYPE;
@@ -4249,6 +4255,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ const loff_t orig_pos = iocb->ki_pos;
+ const size_t orig_count = iov_iter_count(from);
ssize_t ret;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
@@ -4352,8 +4360,7 @@ write:
unlock:
inode_unlock(inode);
out:
- trace_f2fs_file_write_iter(inode, iocb->ki_pos,
- iov_iter_count(from), ret);
+ trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 7a86a8dcf4f1..0d6906644feb 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1088,7 +1088,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
if (old_dir_entry) {
- if (old_dir != new_dir && !whiteout)
+ if (old_dir != new_dir)
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
else
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 5c0b2b300aa1..d4a76c2deb80 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -540,6 +540,29 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb,
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
+static bool is_compress_extension_exist(struct f2fs_sb_info *sbi,
+ const char *new_ext, bool is_ext)
+{
+ unsigned char (*ext)[F2FS_EXTENSION_LEN];
+ int ext_cnt;
+ int i;
+
+ if (is_ext) {
+ ext = F2FS_OPTION(sbi).extensions;
+ ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
+ } else {
+ ext = F2FS_OPTION(sbi).noextensions;
+ ext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
+ }
+
+ for (i = 0; i < ext_cnt; i++) {
+ if (!strcasecmp(new_ext, ext[i]))
+ return true;
+ }
+
+ return false;
+}
+
/*
* 1. The same extension name cannot not appear in both compress and non-compress extension
* at the same time.
@@ -1154,6 +1177,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
}
+ if (is_compress_extension_exist(sbi, name, true)) {
+ kfree(name);
+ break;
+ }
+
strcpy(ext[ext_cnt], name);
F2FS_OPTION(sbi).compress_ext_cnt++;
kfree(name);
@@ -1178,6 +1206,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
}
+ if (is_compress_extension_exist(sbi, name, false)) {
+ kfree(name);
+ break;
+ }
+
strcpy(noext[noext_cnt], name);
F2FS_OPTION(sbi).nocompress_ext_cnt++;
kfree(name);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 797ac505a075..1a18936bc583 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -745,6 +745,12 @@ static int __f2fs_setxattr(struct inode *inode, int index,
memcpy(pval, value, size);
last->e_value_size = cpu_to_le16(size);
new_hsize += newsize;
+ /*
+ * Explicitly add the null terminator. The unused xattr space
+ * is supposed to always be zeroed, which would make this
+ * unnecessary, but don't depend on that.
+ */
+ *(u32 *)((u8 *)last + newsize) = 0;
}
error = write_all_xattrs(inode, new_hsize, base_addr, ipage);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c76537a6826a..672d176524f5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -609,6 +609,24 @@ out_free:
kfree(isw);
}
+static bool isw_prepare_wbs_switch(struct inode_switch_wbs_context *isw,
+ struct list_head *list, int *nr)
+{
+ struct inode *inode;
+
+ list_for_each_entry(inode, list, i_io_list) {
+ if (!inode_prepare_wbs_switch(inode, isw->new_wb))
+ continue;
+
+ isw->inodes[*nr] = inode;
+ (*nr)++;
+
+ if (*nr >= WB_MAX_INODES_PER_ISW - 1)
+ return true;
+ }
+ return false;
+}
+
/**
* cleanup_offline_cgwb - detach associated inodes
* @wb: target wb
@@ -621,7 +639,6 @@ bool cleanup_offline_cgwb(struct bdi_writeback *wb)
{
struct cgroup_subsys_state *memcg_css;
struct inode_switch_wbs_context *isw;
- struct inode *inode;
int nr;
bool restart = false;
@@ -643,17 +660,17 @@ bool cleanup_offline_cgwb(struct bdi_writeback *wb)
nr = 0;
spin_lock(&wb->list_lock);
- list_for_each_entry(inode, &wb->b_attached, i_io_list) {
- if (!inode_prepare_wbs_switch(inode, isw->new_wb))
- continue;
-
- isw->inodes[nr++] = inode;
-
- if (nr >= WB_MAX_INODES_PER_ISW - 1) {
- restart = true;
- break;
- }
- }
+ /*
+ * In addition to the inodes that have completed writeback, also switch
+ * cgwbs for those inodes only with dirty timestamps. Otherwise, those
+ * inodes won't be written back for a long time when lazytime is
+ * enabled, and thus pinning the dying cgwbs. It won't break the
+ * bandwidth restrictions, as writeback of inode metadata is not
+ * accounted for.
+ */
+ restart = isw_prepare_wbs_switch(isw, &wb->b_attached, &nr);
+ if (!restart)
+ restart = isw_prepare_wbs_switch(isw, &wb->b_dirty_time, &nr);
spin_unlock(&wb->list_lock);
/* no attached inodes? bail out */
@@ -1557,10 +1574,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
if (wbc->pages_skipped) {
/*
- * writeback is not making progress due to locked
- * buffers. Skip this inode for now.
+ * Writeback is not making progress due to locked buffers.
+ * Skip this inode for now. Although having skipped pages
+ * is odd for clean inodes, it can happen for some
+ * filesystems so handle that gracefully.
*/
- redirty_tail_locked(inode, wb);
+ if (inode->i_state & I_DIRTY_ALL)
+ redirty_tail_locked(inode, wb);
+ else
+ inode_cgwb_move_to_attached(inode, wb);
return;
}
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 281d79f8b3d3..3e7aafe2e953 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -1227,6 +1227,7 @@ void fuse_dax_conn_free(struct fuse_conn *fc)
if (fc->dax) {
fuse_free_dax_mem_ranges(&fc->dax->free_ranges);
kfree(fc->dax);
+ fc->dax = NULL;
}
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index c3a87586a15f..4b8f094345e1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -63,6 +63,19 @@ struct fuse_forget_link {
struct fuse_forget_link *next;
};
+/* Submount lookup tracking */
+struct fuse_submount_lookup {
+ /** Refcount */
+ refcount_t count;
+
+ /** Unique ID, which identifies the inode between userspace
+ * and kernel */
+ u64 nodeid;
+
+ /** The request used for sending the FORGET message */
+ struct fuse_forget_link *forget;
+};
+
/** FUSE inode */
struct fuse_inode {
/** Inode data */
@@ -155,6 +168,8 @@ struct fuse_inode {
*/
struct fuse_inode_dax *dax;
#endif
+ /** Submount specific lookup tracking */
+ struct fuse_submount_lookup *submount_lookup;
};
/** FUSE inode state bits */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 50365143f50e..97dc24557bf2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -69,6 +69,24 @@ struct fuse_forget_link *fuse_alloc_forget(void)
return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
}
+static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void)
+{
+ struct fuse_submount_lookup *sl;
+
+ sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT);
+ if (!sl)
+ return NULL;
+ sl->forget = fuse_alloc_forget();
+ if (!sl->forget)
+ goto out_free;
+
+ return sl;
+
+out_free:
+ kfree(sl);
+ return NULL;
+}
+
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
struct fuse_inode *fi;
@@ -84,6 +102,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->attr_version = 0;
fi->orig_ino = 0;
fi->state = 0;
+ fi->submount_lookup = NULL;
mutex_init(&fi->mutex);
spin_lock_init(&fi->lock);
fi->forget = fuse_alloc_forget();
@@ -114,6 +133,17 @@ static void fuse_free_inode(struct inode *inode)
kmem_cache_free(fuse_inode_cachep, fi);
}
+static void fuse_cleanup_submount_lookup(struct fuse_conn *fc,
+ struct fuse_submount_lookup *sl)
+{
+ if (!refcount_dec_and_test(&sl->count))
+ return;
+
+ fuse_queue_forget(fc, sl->forget, sl->nodeid, 1);
+ sl->forget = NULL;
+ kfree(sl);
+}
+
static void fuse_evict_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -133,6 +163,11 @@ static void fuse_evict_inode(struct inode *inode)
fi->nlookup);
fi->forget = NULL;
}
+
+ if (fi->submount_lookup) {
+ fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
+ fi->submount_lookup = NULL;
+ }
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
WARN_ON(!list_empty(&fi->write_files));
@@ -279,6 +314,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
}
}
+static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
+ u64 nodeid)
+{
+ sl->nodeid = nodeid;
+ refcount_set(&sl->count, 1);
+}
+
static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
inode->i_mode = attr->mode & S_IFMT;
@@ -336,12 +378,22 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
*/
if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
S_ISDIR(attr->mode)) {
+ struct fuse_inode *fi;
+
inode = new_inode(sb);
if (!inode)
return NULL;
fuse_init_inode(inode, attr);
- get_fuse_inode(inode)->nodeid = nodeid;
+ fi = get_fuse_inode(inode);
+ fi->nodeid = nodeid;
+ fi->submount_lookup = fuse_alloc_submount_lookup();
+ if (!fi->submount_lookup) {
+ iput(inode);
+ return NULL;
+ }
+ /* Sets nlookup = 1 on fi->submount_lookup->nlookup */
+ fuse_init_submount_lookup(fi->submount_lookup, nodeid);
inode->i_flags |= S_AUTOMOUNT;
goto done;
}
@@ -364,11 +416,11 @@ retry:
iput(inode);
goto retry;
}
-done:
fi = get_fuse_inode(inode);
spin_lock(&fi->lock);
fi->nlookup++;
spin_unlock(&fi->lock);
+done:
fuse_change_attributes(inode, attr, attr_valid, attr_version);
return inode;
@@ -1380,6 +1432,8 @@ static int fuse_fill_super_submount(struct super_block *sb,
struct super_block *parent_sb = parent_fi->inode.i_sb;
struct fuse_attr root_attr;
struct inode *root;
+ struct fuse_submount_lookup *sl;
+ struct fuse_inode *fi;
fuse_sb_defaults(sb);
fm->sb = sb;
@@ -1402,12 +1456,27 @@ static int fuse_fill_super_submount(struct super_block *sb,
* its nlookup should not be incremented. fuse_iget() does
* that, though, so undo it here.
*/
- get_fuse_inode(root)->nlookup--;
+ fi = get_fuse_inode(root);
+ fi->nlookup--;
+
sb->s_d_op = &fuse_dentry_operations;
sb->s_root = d_make_root(root);
if (!sb->s_root)
return -ENOMEM;
+ /*
+ * Grab the parent's submount_lookup pointer and take a
+ * reference on the shared nlookup from the parent. This is to
+ * prevent the last forget for this nodeid from getting
+ * triggered until all users have finished with it.
+ */
+ sl = parent_fi->submount_lookup;
+ WARN_ON(!sl);
+ if (sl) {
+ refcount_inc(&sl->count);
+ fi->submount_lookup = sl;
+ }
+
return 0;
}
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index d5294e663df5..14e99ffa57af 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -243,8 +243,16 @@ retry:
dput(dentry);
dentry = alias;
}
- if (IS_ERR(dentry))
+ if (IS_ERR(dentry)) {
+ if (!IS_ERR(inode)) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fi->lock);
+ fi->nlookup--;
+ spin_unlock(&fi->lock);
+ }
return PTR_ERR(dentry);
+ }
}
if (fc->readdirplus_auto)
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ee212c9310ad..2b654c3b918a 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -207,13 +207,13 @@ static int gfs2_writepages(struct address_space *mapping,
int ret;
/*
- * Even if we didn't write any pages here, we might still be holding
+ * Even if we didn't write enough pages here, we might still be holding
* dirty pages in the ail. We forcibly flush the ail because we don't
* want balance_dirty_pages() to loop indefinitely trying to write out
* pages held in the ail that it can't find.
*/
ret = iomap_writepages(mapping, wbc, &wpc, &gfs2_writeback_ops);
- if (ret == 0)
+ if (ret == 0 && wbc->nr_to_write > 0)
set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
return ret;
}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 97ee17843b4d..462e957eda8b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1848,16 +1848,24 @@ out:
int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
int mask)
{
+ int may_not_block = mask & MAY_NOT_BLOCK;
struct gfs2_inode *ip;
struct gfs2_holder i_gh;
+ struct gfs2_glock *gl;
int error;
gfs2_holder_mark_uninitialized(&i_gh);
ip = GFS2_I(inode);
- if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
- if (mask & MAY_NOT_BLOCK)
+ gl = rcu_dereference_check(ip->i_gl, !may_not_block);
+ if (unlikely(!gl)) {
+ /* inode is getting torn down, must be RCU mode */
+ WARN_ON_ONCE(!may_not_block);
+ return -ECHILD;
+ }
+ if (gfs2_glock_is_locked_by_me(gl) == NULL) {
+ if (may_not_block)
return -ECHILD;
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+ error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return error;
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f0ee3ff6f9a8..9a96842aeab3 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -1277,9 +1277,6 @@ static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
{
unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
- if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
- return 1;
-
return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
atomic_read(&sdp->sd_log_thresh2);
}
@@ -1296,7 +1293,6 @@ int gfs2_logd(void *data)
{
struct gfs2_sbd *sdp = data;
unsigned long t = 1;
- DEFINE_WAIT(wait);
while (!kthread_should_stop()) {
@@ -1321,7 +1317,9 @@ int gfs2_logd(void *data)
GFS2_LFC_LOGD_JFLUSH_REQD);
}
- if (gfs2_ail_flush_reqd(sdp)) {
+ if (test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
+ gfs2_ail_flush_reqd(sdp)) {
+ clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
gfs2_ail1_empty(sdp, 0);
@@ -1333,17 +1331,12 @@ int gfs2_logd(void *data)
try_to_freeze();
- do {
- prepare_to_wait(&sdp->sd_logd_waitq, &wait,
- TASK_INTERRUPTIBLE);
- if (!gfs2_ail_flush_reqd(sdp) &&
- !gfs2_jrnl_flush_reqd(sdp) &&
- !kthread_should_stop())
- t = schedule_timeout(t);
- } while(t && !gfs2_ail_flush_reqd(sdp) &&
- !gfs2_jrnl_flush_reqd(sdp) &&
- !kthread_should_stop());
- finish_wait(&sdp->sd_logd_waitq, &wait);
+ t = wait_event_interruptible_timeout(sdp->sd_logd_waitq,
+ test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
+ gfs2_ail_flush_reqd(sdp) ||
+ gfs2_jrnl_flush_reqd(sdp) ||
+ kthread_should_stop(),
+ t);
}
return 0;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index dc77080a82bb..c381580095ba 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -431,6 +431,17 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd,
(sync_gen && (qd->qd_sync_gen >= *sync_gen)))
return 0;
+ /*
+ * If qd_change is 0 it means a pending quota change was negated.
+ * We should not sync it, but we still have a qd reference and slot
+ * reference taken by gfs2_quota_change -> do_qc that need to be put.
+ */
+ if (!qd->qd_change && test_and_clear_bit(QDF_CHANGE, &qd->qd_flags)) {
+ slot_put(qd);
+ qd_put(qd);
+ return 0;
+ }
+
if (!lockref_get_not_dead(&qd->qd_lockref))
return 0;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6901cd85f1df..e4e85010ab5b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2316,7 +2316,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt);
- if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
+ if (rgd->rd_sbd->sd_args.ar_rgrplvb && rgd->rd_rgl) {
struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
gfs2_print_dbg(seq, "%s L: f:%02x b:%u i:%u\n", fs_id_buf,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 51b44da4a0d6..268651ac9fc8 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1436,7 +1436,7 @@ out:
wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
gfs2_glock_add_to_lru(ip->i_gl);
gfs2_glock_put_eventually(ip->i_gl);
- ip->i_gl = NULL;
+ rcu_assign_pointer(ip->i_gl, NULL);
}
}
diff --git a/fs/inode.c b/fs/inode.c
index 7cb048a3b3bd..ec41a11e2f8f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2336,6 +2336,22 @@ struct timespec64 current_time(struct inode *inode)
EXPORT_SYMBOL(current_time);
/**
+ * inode_set_ctime_current - set the ctime to current_time
+ * @inode: inode
+ *
+ * Set the inode->i_ctime to the current value for the inode. Returns
+ * the current value that was assigned to i_ctime.
+ */
+struct timespec64 inode_set_ctime_current(struct inode *inode)
+{
+ struct timespec64 now = current_time(inode);
+
+ inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
+ return now;
+}
+EXPORT_SYMBOL(inode_set_ctime_current);
+
+/**
* in_group_or_capable - check whether caller is CAP_FSETID privileged
* @mnt_userns: user namespace of the mount @inode was found from
* @inode: inode to check
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 468dcbba45bc..4ee7790f7b2e 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -92,7 +92,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (offset + ret > dio->i_size &&
!(dio->flags & IOMAP_DIO_WRITE))
ret = dio->i_size - offset;
- iocb->ki_pos += ret;
}
/*
@@ -118,18 +117,19 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
}
inode_dio_end(file_inode(iocb->ki_filp));
- /*
- * If this is a DSYNC write, make sure we push it to stable storage now
- * that we've written data.
- */
- if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
- ret = generic_write_sync(iocb, ret);
-
- if (ret > 0)
- ret += dio->done_before;
+ if (ret > 0) {
+ iocb->ki_pos += ret;
+ /*
+ * If this is a DSYNC write, make sure we push it to stable
+ * storage now that we've written data.
+ */
+ if (dio->flags & IOMAP_DIO_NEED_SYNC)
+ ret = generic_write_sync(iocb, ret);
+ if (ret > 0)
+ ret += dio->done_before;
+ }
kfree(dio);
-
return ret;
}
EXPORT_SYMBOL_GPL(iomap_dio_complete);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index fc6989e7a8c5..f033ac807013 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -165,7 +165,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
tid_t this_tid;
int result, batch_count = 0;
- jbd_debug(1, "Start checkpoint\n");
+ jbd2_debug(1, "Start checkpoint\n");
/*
* First thing: if there are any transactions in the log which
@@ -174,7 +174,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
*/
result = jbd2_cleanup_journal_tail(journal);
trace_jbd2_checkpoint(journal, result);
- jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
+ jbd2_debug(1, "cleanup_journal_tail returned %d\n", result);
if (result <= 0)
return result;
@@ -349,6 +349,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* Checkpoint list management */
+enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
+
/*
* journal_shrink_one_cp_list
*
@@ -360,7 +362,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* Called with j_list_lock held.
*/
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
- bool destroy, bool *released)
+ enum shrink_type type,
+ bool *released)
{
struct journal_head *last_jh;
struct journal_head *next_jh = jh;
@@ -376,12 +379,15 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
jh = next_jh;
next_jh = jh->b_cpnext;
- if (destroy) {
+ if (type == SHRINK_DESTROY) {
ret = __jbd2_journal_remove_checkpoint(jh);
} else {
ret = jbd2_journal_try_remove_checkpoint(jh);
- if (ret < 0)
- continue;
+ if (ret < 0) {
+ if (type == SHRINK_BUSY_SKIP)
+ continue;
+ break;
+ }
}
nr_freed++;
@@ -445,7 +451,7 @@ again:
tid = transaction->t_tid;
freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
- false, &released);
+ SHRINK_BUSY_SKIP, &released);
nr_freed += freed;
(*nr_to_scan) -= min(*nr_to_scan, freed);
if (*nr_to_scan == 0)
@@ -485,19 +491,21 @@ out:
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
{
transaction_t *transaction, *last_transaction, *next_transaction;
+ enum shrink_type type;
bool released;
transaction = journal->j_checkpoint_transactions;
if (!transaction)
return;
+ type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
last_transaction = transaction->t_cpprev;
next_transaction = transaction;
do {
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
journal_shrink_one_cp_list(transaction->t_checkpoint_list,
- destroy, &released);
+ type, &released);
/*
* This function only frees up some memory if possible so we
* dont have an obligation to finish processing. Bail out if
@@ -631,6 +639,8 @@ int jbd2_journal_try_remove_checkpoint(struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);
+ if (jh->b_transaction)
+ return -EBUSY;
if (!trylock_buffer(bh))
return -EBUSY;
if (buffer_dirty(bh)) {
@@ -715,5 +725,5 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
trace_jbd2_drop_transaction(journal, transaction);
- jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
+ jbd2_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 20294c1bbeab..f858d1152368 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -300,6 +300,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
if (!ret)
ret = err;
}
+ cond_resched();
spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
smp_mb();
@@ -419,7 +420,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
/* Do we need to erase the effects of a prior jbd2_journal_flush? */
if (journal->j_flags & JBD2_FLUSHED) {
- jbd_debug(3, "super block updated\n");
+ jbd2_debug(3, "super block updated\n");
mutex_lock_io(&journal->j_checkpoint_mutex);
/*
* We hold j_checkpoint_mutex so tail cannot change under us.
@@ -433,7 +434,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
REQ_SYNC);
mutex_unlock(&journal->j_checkpoint_mutex);
} else {
- jbd_debug(3, "superblock not updated\n");
+ jbd2_debug(3, "superblock not updated\n");
}
J_ASSERT(journal->j_running_transaction != NULL);
@@ -465,7 +466,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction = journal->j_running_transaction;
trace_jbd2_start_commit(journal, commit_transaction);
- jbd_debug(1, "JBD2: starting commit of transaction %d\n",
+ jbd2_debug(1, "JBD2: starting commit of transaction %d\n",
commit_transaction->t_tid);
write_lock(&journal->j_state_lock);
@@ -484,22 +485,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
stats.run.rs_locked);
- spin_lock(&commit_transaction->t_handle_lock);
- while (atomic_read(&commit_transaction->t_updates)) {
- DEFINE_WAIT(wait);
+ // waits for any t_updates to finish
+ jbd2_journal_wait_updates(journal);
- prepare_to_wait(&journal->j_wait_updates, &wait,
- TASK_UNINTERRUPTIBLE);
- if (atomic_read(&commit_transaction->t_updates)) {
- spin_unlock(&commit_transaction->t_handle_lock);
- write_unlock(&journal->j_state_lock);
- schedule();
- write_lock(&journal->j_state_lock);
- spin_lock(&commit_transaction->t_handle_lock);
- }
- finish_wait(&journal->j_wait_updates, &wait);
- }
- spin_unlock(&commit_transaction->t_handle_lock);
commit_transaction->t_state = T_SWITCH;
J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
@@ -551,7 +539,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
__jbd2_journal_clean_checkpoint_list(journal, false);
spin_unlock(&journal->j_list_lock);
- jbd_debug(3, "JBD2: commit phase 1\n");
+ jbd2_debug(3, "JBD2: commit phase 1\n");
/*
* Clear revoked flag to reflect there is no revoked buffers
@@ -584,7 +572,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
wake_up_all(&journal->j_wait_transaction_locked);
write_unlock(&journal->j_state_lock);
- jbd_debug(3, "JBD2: commit phase 2a\n");
+ jbd2_debug(3, "JBD2: commit phase 2a\n");
/*
* Now start flushing things to disk, in the order they appear
@@ -597,7 +585,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
blk_start_plug(&plug);
jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
- jbd_debug(3, "JBD2: commit phase 2b\n");
+ jbd2_debug(3, "JBD2: commit phase 2b\n");
/*
* Way to go: we have now written out all of the data for a
@@ -653,7 +641,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (!descriptor) {
J_ASSERT (bufs == 0);
- jbd_debug(4, "JBD2: get descriptor\n");
+ jbd2_debug(4, "JBD2: get descriptor\n");
descriptor = jbd2_journal_get_descriptor_buffer(
commit_transaction,
@@ -663,7 +651,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
continue;
}
- jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
+ jbd2_debug(4, "JBD2: got buffer %llu (%p)\n",
(unsigned long long)descriptor->b_blocknr,
descriptor->b_data);
tagp = &descriptor->b_data[sizeof(journal_header_t)];
@@ -748,7 +736,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction->t_buffers == NULL ||
space_left < tag_bytes + 16 + csum_size) {
- jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
+ jbd2_debug(4, "JBD2: Submit %d IOs\n", bufs);
/* Write an end-of-descriptor marker before
submitting the IOs. "tag" still points to
@@ -819,7 +807,7 @@ start_journal_io:
commit_transaction->t_state = T_COMMIT_DFLUSH;
write_unlock(&journal->j_state_lock);
- /*
+ /*
* If the journal is not located on the file system device,
* then we must flush the file system device before we issue
* the commit record
@@ -850,7 +838,7 @@ start_journal_io:
so we incur less scheduling load.
*/
- jbd_debug(3, "JBD2: commit phase 3\n");
+ jbd2_debug(3, "JBD2: commit phase 3\n");
while (!list_empty(&io_bufs)) {
struct buffer_head *bh = list_entry(io_bufs.prev,
@@ -893,7 +881,7 @@ start_journal_io:
J_ASSERT (commit_transaction->t_shadow_list == NULL);
- jbd_debug(3, "JBD2: commit phase 4\n");
+ jbd2_debug(3, "JBD2: commit phase 4\n");
/* Here we wait for the revoke record and descriptor record buffers */
while (!list_empty(&log_bufs)) {
@@ -917,7 +905,7 @@ start_journal_io:
if (err)
jbd2_journal_abort(journal, err);
- jbd_debug(3, "JBD2: commit phase 5\n");
+ jbd2_debug(3, "JBD2: commit phase 5\n");
write_lock(&journal->j_state_lock);
J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
commit_transaction->t_state = T_COMMIT_JFLUSH;
@@ -956,7 +944,7 @@ start_journal_io:
transaction can be removed from any checkpoint list it was on
before. */
- jbd_debug(3, "JBD2: commit phase 6\n");
+ jbd2_debug(3, "JBD2: commit phase 6\n");
J_ASSERT(list_empty(&commit_transaction->t_inode_list));
J_ASSERT(commit_transaction->t_buffers == NULL);
@@ -1133,7 +1121,7 @@ restart_loop:
/* Done with this transaction! */
- jbd_debug(3, "JBD2: commit phase 7\n");
+ jbd2_debug(3, "JBD2: commit phase 7\n");
J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
@@ -1175,7 +1163,7 @@ restart_loop:
journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
trace_jbd2_end_commit(journal, commit_transaction);
- jbd_debug(1, "JBD2: commit %d complete, head %d\n",
+ jbd2_debug(1, "JBD2: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
write_lock(&journal->j_state_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 580d2fdfe21f..b7af1727a016 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -203,11 +203,11 @@ loop:
if (journal->j_flags & JBD2_UNMOUNT)
goto end_loop;
- jbd_debug(1, "commit_sequence=%u, commit_request=%u\n",
+ jbd2_debug(1, "commit_sequence=%u, commit_request=%u\n",
journal->j_commit_sequence, journal->j_commit_request);
if (journal->j_commit_sequence != journal->j_commit_request) {
- jbd_debug(1, "OK, requests differ\n");
+ jbd2_debug(1, "OK, requests differ\n");
write_unlock(&journal->j_state_lock);
del_timer_sync(&journal->j_commit_timer);
jbd2_journal_commit_transaction(journal);
@@ -222,7 +222,7 @@ loop:
* good idea, because that depends on threads that may
* be already stopped.
*/
- jbd_debug(1, "Now suspending kjournald2\n");
+ jbd2_debug(1, "Now suspending kjournald2\n");
write_unlock(&journal->j_state_lock);
try_to_freeze();
write_lock(&journal->j_state_lock);
@@ -252,7 +252,7 @@ loop:
finish_wait(&journal->j_wait_commit, &wait);
}
- jbd_debug(1, "kjournald2 wakes\n");
+ jbd2_debug(1, "kjournald2 wakes\n");
/*
* Were we woken up by a commit wakeup event?
@@ -260,7 +260,7 @@ loop:
transaction = journal->j_running_transaction;
if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
journal->j_commit_request = transaction->t_tid;
- jbd_debug(1, "woke because of timeout\n");
+ jbd2_debug(1, "woke because of timeout\n");
}
goto loop;
@@ -268,7 +268,7 @@ end_loop:
del_timer_sync(&journal->j_commit_timer);
journal->j_task = NULL;
wake_up(&journal->j_wait_done_commit);
- jbd_debug(1, "Journal thread exiting.\n");
+ jbd2_debug(1, "Journal thread exiting.\n");
write_unlock(&journal->j_state_lock);
return 0;
}
@@ -500,7 +500,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
*/
journal->j_commit_request = target;
- jbd_debug(1, "JBD2: requesting commit %u/%u\n",
+ jbd2_debug(1, "JBD2: requesting commit %u/%u\n",
journal->j_commit_request,
journal->j_commit_sequence);
journal->j_running_transaction->t_requested = jiffies;
@@ -705,7 +705,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
}
#endif
while (tid_gt(tid, journal->j_commit_sequence)) {
- jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
+ jbd2_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
tid, journal->j_commit_sequence);
read_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_commit);
@@ -1123,7 +1123,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
freed += journal->j_last - journal->j_first;
trace_jbd2_update_log_tail(journal, tid, block, freed);
- jbd_debug(1,
+ jbd2_debug(1,
"Cleaning journal tail from %u to %u (offset %lu), "
"freeing %lu\n",
journal->j_tail_sequence, tid, block, freed);
@@ -1498,7 +1498,7 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
return NULL;
}
- jbd_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
+ jbd2_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size,
inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
@@ -1577,7 +1577,7 @@ static int journal_reset(journal_t *journal)
* attempting a write to a potential-readonly device.
*/
if (sb->s_start == 0) {
- jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
+ jbd2_debug(1, "JBD2: Skipping superblock update on recovered sb "
"(start %ld, seq %u, errno %d)\n",
journal->j_tail, journal->j_tail_sequence,
journal->j_errno);
@@ -1616,9 +1616,11 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
return -EIO;
}
- trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
+
+ trace_jbd2_write_superblock(journal, write_flags);
+
if (buffer_write_io_error(bh)) {
/*
* Oh, dear. A previous attempt to write the journal
@@ -1680,7 +1682,7 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
}
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
- jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
+ jbd2_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
tail_block, tail_tid);
lock_buffer(journal->j_sb_buffer);
@@ -1721,7 +1723,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
return;
}
- jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
+ jbd2_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
journal->j_tail_sequence);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -1867,7 +1869,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
errcode = journal->j_errno;
if (errcode == -ESHUTDOWN)
errcode = 0;
- jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
+ jbd2_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
sb->s_errno = cpu_to_be32(errcode);
jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA);
@@ -2339,7 +2341,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
compat & JBD2_FEATURE_COMPAT_CHECKSUM)
compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
- jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
+ jbd2_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
compat, ro, incompat);
sb = journal->j_superblock;
@@ -2408,7 +2410,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
{
journal_superblock_t *sb;
- jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
+ jbd2_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
compat, ro, incompat);
sb = journal->j_superblock;
@@ -2865,7 +2867,7 @@ static struct journal_head *journal_alloc_journal_head(void)
#endif
ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
if (!ret) {
- jbd_debug(1, "out of memory for journal_head\n");
+ jbd2_debug(1, "out of memory for journal_head\n");
pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
ret = kmem_cache_zalloc(jbd2_journal_head_cache,
GFP_NOFS | __GFP_NOFAIL);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 3c5dd010e39d..8a960c6edb6e 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -224,12 +224,8 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
/* Make sure we wrap around the log correctly! */
#define wrap(journal, var) \
do { \
- unsigned long _wrap_last = \
- jbd2_has_feature_fast_commit(journal) ? \
- (journal)->j_fc_last : (journal)->j_last; \
- \
- if (var >= _wrap_last) \
- var -= (_wrap_last - (journal)->j_first); \
+ if (var >= (journal)->j_last) \
+ var -= ((journal)->j_last - (journal)->j_first); \
} while (0)
static int fc_do_one_pass(journal_t *journal,
@@ -245,11 +241,11 @@ static int fc_do_one_pass(journal_t *journal,
return 0;
while (next_fc_block <= journal->j_fc_last) {
- jbd_debug(3, "Fast commit replay: next block %ld\n",
+ jbd2_debug(3, "Fast commit replay: next block %ld\n",
next_fc_block);
err = jread(&bh, journal, next_fc_block);
if (err) {
- jbd_debug(3, "Fast commit replay: read error\n");
+ jbd2_debug(3, "Fast commit replay: read error\n");
break;
}
@@ -264,7 +260,7 @@ static int fc_do_one_pass(journal_t *journal,
}
if (err)
- jbd_debug(3, "Fast commit replay failed, err = %d\n", err);
+ jbd2_debug(3, "Fast commit replay failed, err = %d\n", err);
return err;
}
@@ -287,6 +283,8 @@ int jbd2_journal_recover(journal_t *journal)
journal_superblock_t * sb;
struct recovery_info info;
+ errseq_t wb_err;
+ struct address_space *mapping;
memset(&info, 0, sizeof(info));
sb = journal->j_superblock;
@@ -298,22 +296,25 @@ int jbd2_journal_recover(journal_t *journal)
*/
if (!sb->s_start) {
- jbd_debug(1, "No recovery required, last transaction %d\n",
+ jbd2_debug(1, "No recovery required, last transaction %d\n",
be32_to_cpu(sb->s_sequence));
journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
return 0;
}
+ wb_err = 0;
+ mapping = journal->j_fs_dev->bd_inode->i_mapping;
+ errseq_check_and_advance(&mapping->wb_err, &wb_err);
err = do_one_pass(journal, &info, PASS_SCAN);
if (!err)
err = do_one_pass(journal, &info, PASS_REVOKE);
if (!err)
err = do_one_pass(journal, &info, PASS_REPLAY);
- jbd_debug(1, "JBD2: recovery, exit status %d, "
+ jbd2_debug(1, "JBD2: recovery, exit status %d, "
"recovered transactions %u to %u\n",
err, info.start_transaction, info.end_transaction);
- jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
+ jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
/* Restart the log at the next transaction ID, thus invalidating
@@ -324,6 +325,9 @@ int jbd2_journal_recover(journal_t *journal)
err2 = sync_blockdev(journal->j_fs_dev);
if (!err)
err = err2;
+ err2 = errseq_check_and_advance(&mapping->wb_err, &wb_err);
+ if (!err)
+ err = err2;
/* Make sure all replayed data is on permanent storage */
if (journal->j_flags & JBD2_BARRIER) {
err2 = blkdev_issue_flush(journal->j_fs_dev);
@@ -363,7 +367,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
#ifdef CONFIG_JBD2_DEBUG
int dropped = info.end_transaction -
be32_to_cpu(journal->j_superblock->s_sequence);
- jbd_debug(1,
+ jbd2_debug(1,
"JBD2: ignoring %d transaction%s from the journal.\n",
dropped, (dropped == 1) ? "" : "s");
#endif
@@ -485,7 +489,7 @@ static int do_one_pass(journal_t *journal,
if (pass == PASS_SCAN)
info->start_transaction = first_commit_ID;
- jbd_debug(1, "Starting recovery pass %d\n", pass);
+ jbd2_debug(1, "Starting recovery pass %d\n", pass);
/*
* Now we walk through the log, transaction by transaction,
@@ -511,16 +515,14 @@ static int do_one_pass(journal_t *journal,
if (tid_geq(next_commit_ID, info->end_transaction))
break;
- jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
- next_commit_ID, next_log_block,
- jbd2_has_feature_fast_commit(journal) ?
- journal->j_fc_last : journal->j_last);
+ jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
+ next_commit_ID, next_log_block, journal->j_last);
/* Skip over each chunk of the transaction looking
* either the next descriptor block or the final commit
* record. */
- jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
+ jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block);
err = jread(&bh, journal, next_log_block);
if (err)
goto failed;
@@ -543,7 +545,7 @@ static int do_one_pass(journal_t *journal,
blocktype = be32_to_cpu(tmp->h_blocktype);
sequence = be32_to_cpu(tmp->h_sequence);
- jbd_debug(3, "Found magic %d, sequence %d\n",
+ jbd2_debug(3, "Found magic %d, sequence %d\n",
blocktype, sequence);
if (sequence != next_commit_ID) {
@@ -576,7 +578,7 @@ static int do_one_pass(journal_t *journal,
goto failed;
}
need_check_commit_time = true;
- jbd_debug(1,
+ jbd2_debug(1,
"invalid descriptor block found in %lu\n",
next_log_block);
}
@@ -759,7 +761,7 @@ static int do_one_pass(journal_t *journal,
* It likely does not belong to same journal,
* just end this recovery with success.
*/
- jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
+ jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
next_commit_ID);
brelse(bh);
goto done;
@@ -827,7 +829,7 @@ static int do_one_pass(journal_t *journal,
if (pass == PASS_SCAN &&
!jbd2_descriptor_block_csum_verify(journal,
bh->b_data)) {
- jbd_debug(1, "JBD2: invalid revoke block found in %lu\n",
+ jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n",
next_log_block);
need_check_commit_time = true;
}
@@ -846,7 +848,7 @@ static int do_one_pass(journal_t *journal,
continue;
default:
- jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
+ jbd2_debug(3, "Unrecognised magic %d, end of scan.\n",
blocktype);
brelse(bh);
goto done;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index fa608788b93d..4556e4689024 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -398,7 +398,7 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
}
handle->h_revoke_credits--;
- jbd_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in);
+ jbd2_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in);
err = insert_revoke_hash(journal, blocknr,
handle->h_transaction->t_tid);
BUFFER_TRACE(bh_in, "exit");
@@ -428,7 +428,7 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
int did_revoke = 0; /* akpm: debug */
struct buffer_head *bh = jh2bh(jh);
- jbd_debug(4, "journal_head %p, cancelling revoke\n", jh);
+ jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh);
/* Is the existing Revoke bit valid? If so, we trust it, and
* only perform the full cancel if the revoke bit is set. If
@@ -444,7 +444,7 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
if (need_cancel) {
record = find_revoke_record(journal, bh->b_blocknr);
if (record) {
- jbd_debug(4, "cancelled existing revoke on "
+ jbd2_debug(4, "cancelled existing revoke on "
"blocknr %llu\n", (unsigned long long)bh->b_blocknr);
spin_lock(&journal->j_revoke_lock);
list_del(&record->hash);
@@ -560,7 +560,7 @@ void jbd2_journal_write_revoke_records(transaction_t *transaction,
}
if (descriptor)
flush_descriptor(journal, descriptor, offset);
- jbd_debug(1, "Wrote %d revoke records\n", count);
+ jbd2_debug(1, "Wrote %d revoke records\n", count);
}
/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 62e68c5b8ec3..c2125203ef2d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -107,7 +107,6 @@ static void jbd2_get_transaction(journal_t *journal,
transaction->t_start_time = ktime_get();
transaction->t_tid = journal->j_transaction_sequence++;
transaction->t_expires = jiffies + journal->j_commit_interval;
- spin_lock_init(&transaction->t_handle_lock);
atomic_set(&transaction->t_updates, 0);
atomic_set(&transaction->t_outstanding_credits,
jbd2_descriptor_blocks_per_trans(journal) +
@@ -139,24 +138,21 @@ static void jbd2_get_transaction(journal_t *journal,
/*
* Update transaction's maximum wait time, if debugging is enabled.
*
- * In order for t_max_wait to be reliable, it must be protected by a
- * lock. But doing so will mean that start_this_handle() can not be
- * run in parallel on SMP systems, which limits our scalability. So
- * unless debugging is enabled, we no longer update t_max_wait, which
- * means that maximum wait time reported by the jbd2_run_stats
- * tracepoint will always be zero.
+ * t_max_wait is carefully updated here with use of atomic compare exchange.
+ * Note that there could be multiplre threads trying to do this simultaneously
+ * hence using cmpxchg to avoid any use of locks in this case.
*/
static inline void update_t_max_wait(transaction_t *transaction,
unsigned long ts)
{
#ifdef CONFIG_JBD2_DEBUG
+ unsigned long oldts, newts;
if (jbd2_journal_enable_debug &&
time_after(transaction->t_start, ts)) {
- ts = jbd2_time_diff(ts, transaction->t_start);
- spin_lock(&transaction->t_handle_lock);
- if (ts > transaction->t_max_wait)
- transaction->t_max_wait = ts;
- spin_unlock(&transaction->t_handle_lock);
+ newts = jbd2_time_diff(ts, transaction->t_start);
+ oldts = READ_ONCE(transaction->t_max_wait);
+ while (oldts < newts)
+ oldts = cmpxchg(&transaction->t_max_wait, oldts, newts);
}
#endif
}
@@ -378,7 +374,7 @@ alloc_transaction:
return -ENOMEM;
}
- jbd_debug(3, "New handle %p going live.\n", handle);
+ jbd2_debug(3, "New handle %p going live.\n", handle);
/*
* We need to hold j_state_lock until t_updates has been incremented,
@@ -449,7 +445,7 @@ repeat:
}
/* OK, account for the buffers that this operation expects to
- * use and add the handle to the running transaction.
+ * use and add the handle to the running transaction.
*/
update_t_max_wait(transaction, ts);
handle->h_transaction = transaction;
@@ -458,7 +454,7 @@ repeat:
handle->h_start_jiffies = jiffies;
atomic_inc(&transaction->t_updates);
atomic_inc(&transaction->t_handle_count);
- jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n",
+ jbd2_debug(4, "Handle %p given %d credits (total %d, free %lu)\n",
handle, blocks,
atomic_read(&transaction->t_outstanding_credits),
jbd2_log_space_left(journal));
@@ -679,7 +675,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
/* Don't extend a locked-down transaction! */
if (transaction->t_state != T_RUNNING) {
- jbd_debug(3, "denied handle %p %d blocks: "
+ jbd2_debug(3, "denied handle %p %d blocks: "
"transaction not running\n", handle, nblocks);
goto error_out;
}
@@ -690,15 +686,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
DIV_ROUND_UP(
handle->h_revoke_credits_requested,
journal->j_revoke_records_per_block);
- spin_lock(&transaction->t_handle_lock);
wanted = atomic_add_return(nblocks,
&transaction->t_outstanding_credits);
if (wanted > journal->j_max_transaction_buffers) {
- jbd_debug(3, "denied handle %p %d blocks: "
+ jbd2_debug(3, "denied handle %p %d blocks: "
"transaction too large\n", handle, nblocks);
atomic_sub(nblocks, &transaction->t_outstanding_credits);
- goto unlock;
+ goto error_out;
}
trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
@@ -713,9 +708,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
handle->h_revoke_credits_requested += revoke_records;
result = 0;
- jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
-unlock:
- spin_unlock(&transaction->t_handle_lock);
+ jbd2_debug(3, "extended handle %p by %d\n", handle, nblocks);
error_out:
read_unlock(&journal->j_state_lock);
return result;
@@ -803,7 +796,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records,
* First unlink the handle from its current transaction, and start the
* commit on that.
*/
- jbd_debug(2, "restarting handle %p\n", handle);
+ jbd2_debug(2, "restarting handle %p\n", handle);
stop_this_handle(handle);
handle->h_transaction = NULL;
@@ -836,6 +829,43 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
}
EXPORT_SYMBOL(jbd2_journal_restart);
+/*
+ * Waits for any outstanding t_updates to finish.
+ * This is called with write j_state_lock held.
+ */
+void jbd2_journal_wait_updates(journal_t *journal)
+{
+ DEFINE_WAIT(wait);
+
+ while (1) {
+ /*
+ * Note that the running transaction can get freed under us if
+ * this transaction is getting committed in
+ * jbd2_journal_commit_transaction() ->
+ * jbd2_journal_free_transaction(). This can only happen when we
+ * release j_state_lock -> schedule() -> acquire j_state_lock.
+ * Hence we should everytime retrieve new j_running_transaction
+ * value (after j_state_lock release acquire cycle), else it may
+ * lead to use-after-free of old freed transaction.
+ */
+ transaction_t *transaction = journal->j_running_transaction;
+
+ if (!transaction)
+ break;
+
+ prepare_to_wait(&journal->j_wait_updates, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (!atomic_read(&transaction->t_updates)) {
+ finish_wait(&journal->j_wait_updates, &wait);
+ break;
+ }
+ write_unlock(&journal->j_state_lock);
+ schedule();
+ finish_wait(&journal->j_wait_updates, &wait);
+ write_lock(&journal->j_state_lock);
+ }
+}
+
/**
* jbd2_journal_lock_updates () - establish a transaction barrier.
* @journal: Journal to establish a barrier on.
@@ -848,8 +878,6 @@ EXPORT_SYMBOL(jbd2_journal_restart);
*/
void jbd2_journal_lock_updates(journal_t *journal)
{
- DEFINE_WAIT(wait);
-
jbd2_might_wait_for_commit(journal);
write_lock(&journal->j_state_lock);
@@ -863,27 +891,9 @@ void jbd2_journal_lock_updates(journal_t *journal)
write_lock(&journal->j_state_lock);
}
- /* Wait until there are no running updates */
- while (1) {
- transaction_t *transaction = journal->j_running_transaction;
-
- if (!transaction)
- break;
+ /* Wait until there are no running t_updates */
+ jbd2_journal_wait_updates(journal);
- spin_lock(&transaction->t_handle_lock);
- prepare_to_wait(&journal->j_wait_updates, &wait,
- TASK_UNINTERRUPTIBLE);
- if (!atomic_read(&transaction->t_updates)) {
- spin_unlock(&transaction->t_handle_lock);
- finish_wait(&journal->j_wait_updates, &wait);
- break;
- }
- spin_unlock(&transaction->t_handle_lock);
- write_unlock(&journal->j_state_lock);
- schedule();
- finish_wait(&journal->j_wait_updates, &wait);
- write_lock(&journal->j_state_lock);
- }
write_unlock(&journal->j_state_lock);
/*
@@ -970,7 +980,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
journal = transaction->t_journal;
- jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
+ jbd2_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
JBUFFER_TRACE(jh, "entry");
repeat:
@@ -1270,7 +1280,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
struct journal_head *jh = jbd2_journal_add_journal_head(bh);
int err;
- jbd_debug(5, "journal_head %p\n", jh);
+ jbd2_debug(5, "journal_head %p\n", jh);
err = -EROFS;
if (is_handle_aborted(handle))
goto out;
@@ -1493,7 +1503,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
* of the running transaction.
*/
jh = bh2jh(bh);
- jbd_debug(5, "journal_head %p\n", jh);
+ jbd2_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry");
/*
@@ -1826,7 +1836,7 @@ int jbd2_journal_stop(handle_t *handle)
pid_t pid;
if (--handle->h_ref > 0) {
- jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
+ jbd2_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
handle->h_ref);
if (is_handle_aborted(handle))
return -EIO;
@@ -1846,7 +1856,7 @@ int jbd2_journal_stop(handle_t *handle)
if (is_handle_aborted(handle))
err = -EIO;
- jbd_debug(4, "Handle %p going down\n", handle);
+ jbd2_debug(4, "Handle %p going down\n", handle);
trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
tid, handle->h_type, handle->h_line_no,
jiffies - handle->h_start_jiffies,
@@ -1924,7 +1934,7 @@ int jbd2_journal_stop(handle_t *handle)
* completes the commit thread, it just doesn't write
* anything to disk. */
- jbd_debug(2, "transaction too old, requesting commit for "
+ jbd2_debug(2, "transaction too old, requesting commit for "
"handle %p\n", handle);
/* This is non-blocking */
jbd2_log_start_commit(journal, tid);
@@ -2668,7 +2678,7 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
return -EROFS;
journal = transaction->t_journal;
- jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
+ jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
transaction->t_tid);
spin_lock(&journal->j_list_lock);
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index f235a3d270a0..5b01026fff9b 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -87,7 +87,7 @@ static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno,
static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks);
static int dbFindBits(u32 word, int l2nb);
static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno);
-static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx);
+static int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl);
static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
int nblocks);
static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
@@ -180,7 +180,8 @@ int dbMount(struct inode *ipbmap)
bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree);
bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage);
- if (bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE) {
+ if (bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE ||
+ bmp->db_l2nbperpage < 0) {
err = -EINVAL;
goto err_release_metapage;
}
@@ -194,6 +195,12 @@ int dbMount(struct inode *ipbmap)
bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel);
bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
+ if (bmp->db_maxag >= MAXAG || bmp->db_maxag < 0 ||
+ bmp->db_agpref >= MAXAG || bmp->db_agpref < 0) {
+ err = -EINVAL;
+ goto err_release_metapage;
+ }
+
bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
@@ -269,6 +276,7 @@ int dbUnmount(struct inode *ipbmap, int mounterror)
/* free the memory for the in-memory bmap. */
kfree(bmp);
+ JFS_SBI(ipbmap->i_sb)->bmap = NULL;
return (0);
}
@@ -1777,7 +1785,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
* dbFindLeaf() returns the index of the leaf at which
* free space was found.
*/
- rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx);
+ rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx, true);
/* release the buffer.
*/
@@ -2024,7 +2032,7 @@ dbAllocDmapLev(struct bmap * bmp,
* free space. if sufficient free space is found, dbFindLeaf()
* returns the index of the leaf at which free space was found.
*/
- if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx))
+ if (dbFindLeaf((dmtree_t *) &dp->tree, l2nb, &leafidx, false))
return -ENOSPC;
if (leafidx < 0)
@@ -2988,14 +2996,18 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
* leafidx - return pointer to be set to the index of the leaf
* describing at least l2nb free blocks if sufficient
* free blocks are found.
+ * is_ctl - determines if the tree is of type ctl
*
* RETURN VALUES:
* 0 - success
* -ENOSPC - insufficient free blocks.
*/
-static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
+static int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl)
{
int ti, n = 0, k, x = 0;
+ int max_size;
+
+ max_size = is_ctl ? CTLTREESIZE : TREESIZE;
/* first check the root of the tree to see if there is
* sufficient free space.
@@ -3016,6 +3028,8 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
/* sufficient free space found. move to the next
* level (or quit if this is the last level).
*/
+ if (x + n > max_size)
+ return -ENOSPC;
if (l2nb <= tp->dmt_stree[x + n])
break;
}
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index bb4a342a193d..6420b6749d48 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -508,6 +508,11 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
* blocks in the map. in that case, we'll start off with the
* maximum free.
*/
+
+ /* give up if no space left */
+ if (bmp->db_maxfreebud == -1)
+ return -ENOSPC;
+
max = (s64) 1 << bmp->db_maxfreebud;
if (*nblocks >= max && *nblocks > nbperpage)
nb = nblks = (max > nbperpage) ? max : nbperpage;
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 799d3837e7c2..6ed2e1d4c894 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -193,6 +193,7 @@ int diUnmount(struct inode *ipimap, int mounterror)
* free in-memory control structure
*/
kfree(imap);
+ JFS_IP(ipimap)->i_imap = NULL;
return (0);
}
@@ -1319,7 +1320,7 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
int diAlloc(struct inode *pip, bool dir, struct inode *ip)
{
int rc, ino, iagno, addext, extno, bitno, sword;
- int nwords, rem, i, agno;
+ int nwords, rem, i, agno, dn_numag;
u32 mask, inosmap, extsmap;
struct inode *ipimap;
struct metapage *mp;
@@ -1355,6 +1356,9 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
/* get the ag number of this iag */
agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
+ dn_numag = JFS_SBI(pip->i_sb)->bmap->db_numag;
+ if (agno < 0 || agno > dn_numag)
+ return -EIO;
if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
/*
diff --git a/fs/ksmbd/Kconfig b/fs/ksmbd/Kconfig
index 6af339cfdc04..d036ab80fec3 100644
--- a/fs/ksmbd/Kconfig
+++ b/fs/ksmbd/Kconfig
@@ -1,12 +1,11 @@
config SMB_SERVER
- tristate "SMB3 server support (EXPERIMENTAL)"
+ tristate "SMB3 server support"
depends on INET
depends on MULTIUSER
depends on FILE_LOCKING
select NLS
select NLS_UTF8
select CRYPTO
- select CRYPTO_MD4
select CRYPTO_MD5
select CRYPTO_HMAC
select CRYPTO_ECB
@@ -34,14 +33,16 @@ config SMB_SERVER
in ksmbd-tools, available from
https://github.com/cifsd-team/ksmbd-tools.
More detail about how to run the ksmbd kernel server is
- available via README file
+ available via the README file
(https://github.com/cifsd-team/ksmbd-tools/blob/master/README).
ksmbd kernel server includes support for auto-negotiation,
Secure negotiate, Pre-authentication integrity, oplock/lease,
compound requests, multi-credit, packet signing, RDMA(smbdirect),
smb3 encryption, copy-offload, secure per-user session
- establishment via NTLM or NTLMv2.
+ establishment via Kerberos or NTLMv2.
+
+if SMB_SERVER
config SMB_SERVER_SMBDIRECT
bool "Support for SMB Direct protocol"
@@ -55,6 +56,8 @@ config SMB_SERVER_SMBDIRECT
SMB Direct allows transferring SMB packets over RDMA. If unsure,
say N.
+endif
+
config SMB_SERVER_CHECK_CAP_NET_ADMIN
bool "Enable check network administration capability"
depends on SMB_SERVER
diff --git a/fs/ksmbd/asn1.c b/fs/ksmbd/asn1.c
index b014f4638610..4a4b2b03ff33 100644
--- a/fs/ksmbd/asn1.c
+++ b/fs/ksmbd/asn1.c
@@ -21,101 +21,11 @@
#include "ksmbd_spnego_negtokeninit.asn1.h"
#include "ksmbd_spnego_negtokentarg.asn1.h"
-#define SPNEGO_OID_LEN 7
#define NTLMSSP_OID_LEN 10
-#define KRB5_OID_LEN 7
-#define KRB5U2U_OID_LEN 8
-#define MSKRB5_OID_LEN 7
-static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
-static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
-static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
-static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
-static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
static char NTLMSSP_OID_STR[NTLMSSP_OID_LEN] = { 0x2b, 0x06, 0x01, 0x04, 0x01,
0x82, 0x37, 0x02, 0x02, 0x0a };
-static bool
-asn1_subid_decode(const unsigned char **begin, const unsigned char *end,
- unsigned long *subid)
-{
- const unsigned char *ptr = *begin;
- unsigned char ch;
-
- *subid = 0;
-
- do {
- if (ptr >= end)
- return false;
-
- ch = *ptr++;
- *subid <<= 7;
- *subid |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
-
- *begin = ptr;
- return true;
-}
-
-static bool asn1_oid_decode(const unsigned char *value, size_t vlen,
- unsigned long **oid, size_t *oidlen)
-{
- const unsigned char *iptr = value, *end = value + vlen;
- unsigned long *optr;
- unsigned long subid;
-
- vlen += 1;
- if (vlen < 2 || vlen > UINT_MAX / sizeof(unsigned long))
- goto fail_nullify;
-
- *oid = kmalloc(vlen * sizeof(unsigned long), GFP_KERNEL);
- if (!*oid)
- return false;
-
- optr = *oid;
-
- if (!asn1_subid_decode(&iptr, end, &subid))
- goto fail;
-
- if (subid < 40) {
- optr[0] = 0;
- optr[1] = subid;
- } else if (subid < 80) {
- optr[0] = 1;
- optr[1] = subid - 40;
- } else {
- optr[0] = 2;
- optr[1] = subid - 80;
- }
-
- *oidlen = 2;
- optr += 2;
-
- while (iptr < end) {
- if (++(*oidlen) > vlen)
- goto fail;
-
- if (!asn1_subid_decode(&iptr, end, optr++))
- goto fail;
- }
- return true;
-
-fail:
- kfree(*oid);
-fail_nullify:
- *oid = NULL;
- return false;
-}
-
-static bool oid_eq(unsigned long *oid1, unsigned int oid1len,
- unsigned long *oid2, unsigned int oid2len)
-{
- if (oid1len != oid2len)
- return false;
-
- return memcmp(oid1, oid2, oid1len) == 0;
-}
-
int
ksmbd_decode_negTokenInit(unsigned char *security_blob, int length,
struct ksmbd_conn *conn)
@@ -252,26 +162,18 @@ int build_spnego_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
int ksmbd_gssapi_this_mech(void *context, size_t hdrlen, unsigned char tag,
const void *value, size_t vlen)
{
- unsigned long *oid;
- size_t oidlen;
- int err = 0;
-
- if (!asn1_oid_decode(value, vlen, &oid, &oidlen)) {
- err = -EBADMSG;
- goto out;
- }
+ enum OID oid;
- if (!oid_eq(oid, oidlen, SPNEGO_OID, SPNEGO_OID_LEN))
- err = -EBADMSG;
- kfree(oid);
-out:
- if (err) {
+ oid = look_up_OID(value, vlen);
+ if (oid != OID_spnego) {
char buf[50];
sprint_oid(value, vlen, buf, sizeof(buf));
ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ return -EBADMSG;
}
- return err;
+
+ return 0;
}
int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
@@ -279,65 +181,56 @@ int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
size_t vlen)
{
struct ksmbd_conn *conn = context;
- unsigned long *oid;
- size_t oidlen;
+ enum OID oid;
int mech_type;
- char buf[50];
-
- if (!asn1_oid_decode(value, vlen, &oid, &oidlen))
- goto fail;
- if (oid_eq(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN))
+ oid = look_up_OID(value, vlen);
+ if (oid == OID_ntlmssp) {
mech_type = KSMBD_AUTH_NTLMSSP;
- else if (oid_eq(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN))
+ } else if (oid == OID_mskrb5) {
mech_type = KSMBD_AUTH_MSKRB5;
- else if (oid_eq(oid, oidlen, KRB5_OID, KRB5_OID_LEN))
+ } else if (oid == OID_krb5) {
mech_type = KSMBD_AUTH_KRB5;
- else if (oid_eq(oid, oidlen, KRB5U2U_OID, KRB5U2U_OID_LEN))
+ } else if (oid == OID_krb5u2u) {
mech_type = KSMBD_AUTH_KRB5U2U;
- else
- goto fail;
+ } else {
+ char buf[50];
+
+ sprint_oid(value, vlen, buf, sizeof(buf));
+ ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ return -EBADMSG;
+ }
conn->auth_mechs |= mech_type;
if (conn->preferred_auth_mech == 0)
conn->preferred_auth_mech = mech_type;
- kfree(oid);
return 0;
-
-fail:
- kfree(oid);
- sprint_oid(value, vlen, buf, sizeof(buf));
- ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
- return -EBADMSG;
}
-int ksmbd_neg_token_init_mech_token(void *context, size_t hdrlen,
- unsigned char tag, const void *value,
- size_t vlen)
+static int ksmbd_neg_token_alloc(void *context, size_t hdrlen,
+ unsigned char tag, const void *value,
+ size_t vlen)
{
struct ksmbd_conn *conn = context;
- conn->mechToken = kmalloc(vlen + 1, GFP_KERNEL);
+ conn->mechToken = kmemdup_nul(value, vlen, GFP_KERNEL);
if (!conn->mechToken)
return -ENOMEM;
- memcpy(conn->mechToken, value, vlen);
- conn->mechToken[vlen] = '\0';
return 0;
}
-int ksmbd_neg_token_targ_resp_token(void *context, size_t hdrlen,
+int ksmbd_neg_token_init_mech_token(void *context, size_t hdrlen,
unsigned char tag, const void *value,
size_t vlen)
{
- struct ksmbd_conn *conn = context;
-
- conn->mechToken = kmalloc(vlen + 1, GFP_KERNEL);
- if (!conn->mechToken)
- return -ENOMEM;
+ return ksmbd_neg_token_alloc(context, hdrlen, tag, value, vlen);
+}
- memcpy(conn->mechToken, value, vlen);
- conn->mechToken[vlen] = '\0';
- return 0;
+int ksmbd_neg_token_targ_resp_token(void *context, size_t hdrlen,
+ unsigned char tag, const void *value,
+ size_t vlen)
+{
+ return ksmbd_neg_token_alloc(context, hdrlen, tag, value, vlen);
}
diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
index 59d205946746..9a08e6a90b94 100644
--- a/fs/ksmbd/auth.c
+++ b/fs/ksmbd/auth.c
@@ -29,6 +29,7 @@
#include "mgmt/user_config.h"
#include "crypto_ctx.h"
#include "transport_ipc.h"
+#include "../smbfs_common/arc4.h"
/*
* Fixed format data defining GSS header and fixed string
@@ -342,6 +343,32 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
nt_len - CIFS_ENCPWD_SIZE,
domain_name, conn->ntlmssp.cryptkey);
kfree(domain_name);
+
+ /* The recovered secondary session key */
+ if (conn->ntlmssp.client_flags & NTLMSSP_NEGOTIATE_KEY_XCH) {
+ struct arc4_ctx *ctx_arc4;
+ unsigned int sess_key_off, sess_key_len;
+
+ sess_key_off = le32_to_cpu(authblob->SessionKey.BufferOffset);
+ sess_key_len = le16_to_cpu(authblob->SessionKey.Length);
+
+ if (blob_len < (u64)sess_key_off + sess_key_len)
+ return -EINVAL;
+
+ if (sess_key_len > CIFS_KEY_SIZE)
+ return -EINVAL;
+
+ ctx_arc4 = kmalloc(sizeof(*ctx_arc4), GFP_KERNEL);
+ if (!ctx_arc4)
+ return -ENOMEM;
+
+ cifs_arc4_setkey(ctx_arc4, sess->sess_key,
+ SMB2_NTLMV2_SESSKEY_SIZE);
+ cifs_arc4_crypt(ctx_arc4, sess->sess_key,
+ (char *)authblob + sess_key_off, sess_key_len);
+ kfree_sensitive(ctx_arc4);
+ }
+
return ret;
}
@@ -404,6 +431,9 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
NTLMSSP_NEGOTIATE_56);
}
+ if (cflags & NTLMSSP_NEGOTIATE_SEAL && smb3_encryption_negotiated(conn))
+ flags |= NTLMSSP_NEGOTIATE_SEAL;
+
if (cflags & NTLMSSP_NEGOTIATE_ALWAYS_SIGN)
flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
@@ -414,6 +444,9 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
(cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+ if (cflags & NTLMSSP_NEGOTIATE_KEY_XCH)
+ flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
+
chgblob->NegotiateFlags = cpu_to_le32(flags);
len = strlen(ksmbd_netbios_name());
name = kmalloc(2 + UNICODE_LEN(len), GFP_KERNEL);
@@ -700,8 +733,9 @@ static int generate_key(struct ksmbd_conn *conn, struct ksmbd_session *sess,
goto smb3signkey_ret;
}
- if (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
- conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ if (key_size == SMB3_ENC_DEC_KEY_SIZE &&
+ (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
+ conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L256, 4);
else
rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L128, 4);
@@ -884,9 +918,9 @@ int ksmbd_gen_preauth_integrity_hash(struct ksmbd_conn *conn, char *buf,
__u8 *pi_hash)
{
int rc;
- struct smb2_hdr *rcv_hdr = (struct smb2_hdr *)buf;
+ struct smb2_hdr *rcv_hdr = smb2_get_msg(buf);
char *all_bytes_msg = (char *)&rcv_hdr->ProtocolId;
- int msg_size = be32_to_cpu(rcv_hdr->smb2_buf_length);
+ int msg_size = get_rfc1002_len(buf);
struct ksmbd_crypto_ctx *ctx = NULL;
if (conn->preauth_info->Preauth_HashId !=
@@ -961,13 +995,16 @@ out:
return rc;
}
-static int ksmbd_get_encryption_key(struct ksmbd_conn *conn, __u64 ses_id,
+static int ksmbd_get_encryption_key(struct ksmbd_work *work, __u64 ses_id,
int enc, u8 *key)
{
struct ksmbd_session *sess;
u8 *ses_enc_key;
- sess = ksmbd_session_lookup_all(conn, ses_id);
+ if (enc)
+ sess = work->sess;
+ else
+ sess = ksmbd_session_lookup_all(work->conn, ses_id);
if (!sess)
return -EINVAL;
@@ -994,12 +1031,16 @@ static struct scatterlist *ksmbd_init_sg(struct kvec *iov, unsigned int nvec,
u8 *sign)
{
struct scatterlist *sg;
- unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
- int i, nr_entries[3] = {0}, total_entries = 0, sg_idx = 0;
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
+ int i, *nr_entries, total_entries = 0, sg_idx = 0;
if (!nvec)
return NULL;
+ nr_entries = kcalloc(nvec, sizeof(int), GFP_KERNEL);
+ if (!nr_entries)
+ return NULL;
+
for (i = 0; i < nvec - 1; i++) {
unsigned long kaddr = (unsigned long)iov[i + 1].iov_base;
@@ -1017,8 +1058,10 @@ static struct scatterlist *ksmbd_init_sg(struct kvec *iov, unsigned int nvec,
total_entries += 2;
sg = kmalloc_array(total_entries, sizeof(struct scatterlist), GFP_KERNEL);
- if (!sg)
+ if (!sg) {
+ kfree(nr_entries);
return NULL;
+ }
sg_init_table(sg, total_entries);
smb2_sg_set_buf(&sg[sg_idx++], iov[0].iov_base + 24, assoc_data_len);
@@ -1052,15 +1095,16 @@ static struct scatterlist *ksmbd_init_sg(struct kvec *iov, unsigned int nvec,
}
}
smb2_sg_set_buf(&sg[sg_idx], sign, SMB2_SIGNATURE_SIZE);
+ kfree(nr_entries);
return sg;
}
-int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
+int ksmbd_crypt_message(struct ksmbd_work *work, struct kvec *iov,
unsigned int nvec, int enc)
{
- struct smb2_transform_hdr *tr_hdr =
- (struct smb2_transform_hdr *)iov[0].iov_base;
- unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_transform_hdr *tr_hdr = smb2_get_msg(iov[0].iov_base);
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
int rc;
struct scatterlist *sg;
u8 sign[SMB2_SIGNATURE_SIZE] = {};
@@ -1072,7 +1116,7 @@ int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
struct ksmbd_crypto_ctx *ctx;
- rc = ksmbd_get_encryption_key(conn,
+ rc = ksmbd_get_encryption_key(work,
le64_to_cpu(tr_hdr->SessionId),
enc,
key);
diff --git a/fs/ksmbd/auth.h b/fs/ksmbd/auth.h
index 25b772653de0..362b6159a6cf 100644
--- a/fs/ksmbd/auth.h
+++ b/fs/ksmbd/auth.h
@@ -33,9 +33,10 @@
struct ksmbd_session;
struct ksmbd_conn;
+struct ksmbd_work;
struct kvec;
-int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
+int ksmbd_crypt_message(struct ksmbd_work *work, struct kvec *iov,
unsigned int nvec, int enc);
void ksmbd_copy_gss_neg_header(void *buf);
int ksmbd_auth_ntlmv2(struct ksmbd_conn *conn, struct ksmbd_session *sess,
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
index cab274b77727..0a7a30bd531f 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/ksmbd/connection.c
@@ -20,7 +20,7 @@ static DEFINE_MUTEX(init_lock);
static struct ksmbd_conn_ops default_conn_ops;
LIST_HEAD(conn_list);
-DEFINE_RWLOCK(conn_list_lock);
+DECLARE_RWSEM(conn_list_lock);
/**
* ksmbd_conn_free() - free resources of the connection instance
@@ -32,9 +32,9 @@ DEFINE_RWLOCK(conn_list_lock);
*/
void ksmbd_conn_free(struct ksmbd_conn *conn)
{
- write_lock(&conn_list_lock);
+ down_write(&conn_list_lock);
list_del(&conn->conns_list);
- write_unlock(&conn_list_lock);
+ up_write(&conn_list_lock);
xa_destroy(&conn->sessions);
kvfree(conn->request_buf);
@@ -56,16 +56,23 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
return NULL;
conn->need_neg = true;
- conn->status = KSMBD_SESS_NEW;
+ ksmbd_conn_set_new(conn);
conn->local_nls = load_nls("utf8");
if (!conn->local_nls)
conn->local_nls = load_nls_default();
+ if (IS_ENABLED(CONFIG_UNICODE))
+ conn->um = utf8_load("12.1.0");
+ else
+ conn->um = ERR_PTR(-EOPNOTSUPP);
+ if (IS_ERR(conn->um))
+ conn->um = NULL;
atomic_set(&conn->req_running, 0);
atomic_set(&conn->r_count, 0);
conn->total_credits = 1;
conn->outstanding_credits = 0;
init_waitqueue_head(&conn->req_running_q);
+ init_waitqueue_head(&conn->r_count_q);
INIT_LIST_HEAD(&conn->conns_list);
INIT_LIST_HEAD(&conn->requests);
INIT_LIST_HEAD(&conn->async_requests);
@@ -77,9 +84,11 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
spin_lock_init(&conn->llist_lock);
INIT_LIST_HEAD(&conn->lock_list);
- write_lock(&conn_list_lock);
+ init_rwsem(&conn->session_lock);
+
+ down_write(&conn_list_lock);
list_add(&conn->conns_list, &conn_list);
- write_unlock(&conn_list_lock);
+ up_write(&conn_list_lock);
return conn;
}
@@ -88,7 +97,7 @@ bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c)
struct ksmbd_conn *t;
bool ret = false;
- read_lock(&conn_list_lock);
+ down_read(&conn_list_lock);
list_for_each_entry(t, &conn_list, conns_list) {
if (memcmp(t->ClientGUID, c->ClientGUID, SMB2_CLIENT_GUID_SIZE))
continue;
@@ -96,7 +105,7 @@ bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c)
ret = true;
break;
}
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
return ret;
}
@@ -105,10 +114,8 @@ void ksmbd_conn_enqueue_request(struct ksmbd_work *work)
struct ksmbd_conn *conn = work->conn;
struct list_head *requests_queue = NULL;
- if (conn->ops->get_cmd_val(work) != SMB2_CANCEL_HE) {
+ if (conn->ops->get_cmd_val(work) != SMB2_CANCEL_HE)
requests_queue = &conn->requests;
- work->syncronous = true;
- }
if (requests_queue) {
atomic_inc(&conn->req_running);
@@ -118,41 +125,47 @@ void ksmbd_conn_enqueue_request(struct ksmbd_work *work)
}
}
-int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work)
+void ksmbd_conn_try_dequeue_request(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
- int ret = 1;
if (list_empty(&work->request_entry) &&
list_empty(&work->async_request_entry))
- return 0;
+ return;
- if (!work->multiRsp)
- atomic_dec(&conn->req_running);
+ atomic_dec(&conn->req_running);
spin_lock(&conn->request_lock);
- if (!work->multiRsp) {
- list_del_init(&work->request_entry);
- if (work->syncronous == false)
- list_del_init(&work->async_request_entry);
- ret = 0;
- }
+ list_del_init(&work->request_entry);
spin_unlock(&conn->request_lock);
+ if (work->asynchronous)
+ release_async_work(work);
wake_up_all(&conn->req_running_q);
- return ret;
}
-static void ksmbd_conn_lock(struct ksmbd_conn *conn)
+void ksmbd_conn_lock(struct ksmbd_conn *conn)
{
mutex_lock(&conn->srv_mutex);
}
-static void ksmbd_conn_unlock(struct ksmbd_conn *conn)
+void ksmbd_conn_unlock(struct ksmbd_conn *conn)
{
mutex_unlock(&conn->srv_mutex);
}
-void ksmbd_conn_wait_idle(struct ksmbd_conn *conn)
+void ksmbd_all_conn_set_status(u64 sess_id, u32 status)
+{
+ struct ksmbd_conn *conn;
+
+ down_read(&conn_list_lock);
+ list_for_each_entry(conn, &conn_list, conns_list) {
+ if (conn->binding || xa_load(&conn->sessions, sess_id))
+ WRITE_ONCE(conn->status, status);
+ }
+ up_read(&conn_list_lock);
+}
+
+void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id)
{
wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2);
}
@@ -160,43 +173,25 @@ void ksmbd_conn_wait_idle(struct ksmbd_conn *conn)
int ksmbd_conn_write(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
- struct smb_hdr *rsp_hdr = work->response_buf;
- size_t len = 0;
int sent;
- struct kvec iov[3];
- int iov_idx = 0;
- ksmbd_conn_try_dequeue_request(work);
- if (!rsp_hdr) {
+ if (!work->response_buf) {
pr_err("NULL response header\n");
return -EINVAL;
}
- if (work->tr_buf) {
- iov[iov_idx] = (struct kvec) { work->tr_buf,
- sizeof(struct smb2_transform_hdr) };
- len += iov[iov_idx++].iov_len;
- }
+ if (work->send_no_response)
+ return 0;
- if (work->aux_payload_sz) {
- iov[iov_idx] = (struct kvec) { rsp_hdr, work->resp_hdr_sz };
- len += iov[iov_idx++].iov_len;
- iov[iov_idx] = (struct kvec) { work->aux_payload_buf, work->aux_payload_sz };
- len += iov[iov_idx++].iov_len;
- } else {
- if (work->tr_buf)
- iov[iov_idx].iov_len = work->resp_hdr_sz;
- else
- iov[iov_idx].iov_len = get_rfc1002_len(rsp_hdr) + 4;
- iov[iov_idx].iov_base = rsp_hdr;
- len += iov[iov_idx++].iov_len;
- }
+ if (!work->iov_idx)
+ return -EINVAL;
ksmbd_conn_lock(conn);
- sent = conn->transport->ops->writev(conn->transport, &iov[0],
- iov_idx, len,
- work->need_invalidate_rkey,
- work->remote_key);
+ sent = conn->transport->ops->writev(conn->transport, work->iov,
+ work->iov_cnt,
+ get_rfc1002_len(work->iov[0].iov_base) + 4,
+ work->need_invalidate_rkey,
+ work->remote_key);
ksmbd_conn_unlock(conn);
if (sent < 0) {
@@ -207,31 +202,31 @@ int ksmbd_conn_write(struct ksmbd_work *work)
return 0;
}
-int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf,
- unsigned int buflen, u32 remote_key, u64 remote_offset,
- u32 remote_len)
+int ksmbd_conn_rdma_read(struct ksmbd_conn *conn,
+ void *buf, unsigned int buflen,
+ struct smb2_buffer_desc_v1 *desc,
+ unsigned int desc_len)
{
int ret = -EINVAL;
if (conn->transport->ops->rdma_read)
ret = conn->transport->ops->rdma_read(conn->transport,
buf, buflen,
- remote_key, remote_offset,
- remote_len);
+ desc, desc_len);
return ret;
}
-int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf,
- unsigned int buflen, u32 remote_key,
- u64 remote_offset, u32 remote_len)
+int ksmbd_conn_rdma_write(struct ksmbd_conn *conn,
+ void *buf, unsigned int buflen,
+ struct smb2_buffer_desc_v1 *desc,
+ unsigned int desc_len)
{
int ret = -EINVAL;
if (conn->transport->ops->rdma_write)
ret = conn->transport->ops->rdma_write(conn->transport,
buf, buflen,
- remote_key, remote_offset,
- remote_len);
+ desc, desc_len);
return ret;
}
@@ -240,7 +235,7 @@ bool ksmbd_conn_alive(struct ksmbd_conn *conn)
if (!ksmbd_server_running())
return false;
- if (conn->status == KSMBD_SESS_EXITING)
+ if (ksmbd_conn_exiting(conn))
return false;
if (kthread_should_stop())
@@ -263,6 +258,9 @@ bool ksmbd_conn_alive(struct ksmbd_conn *conn)
return true;
}
+#define SMB1_MIN_SUPPORTED_HEADER_SIZE (sizeof(struct smb_hdr))
+#define SMB2_MIN_SUPPORTED_HEADER_SIZE (sizeof(struct smb2_hdr) + 4)
+
/**
* ksmbd_conn_handler_loop() - session thread to listen on new smb requests
* @p: connection instance
@@ -300,16 +298,16 @@ int ksmbd_conn_handler_loop(void *p)
pdu_size = get_rfc1002_len(hdr_buf);
ksmbd_debug(CONN, "RFC1002 header %u bytes\n", pdu_size);
- if (conn->status == KSMBD_SESS_GOOD)
+ if (ksmbd_conn_good(conn))
max_allowed_pdu_size =
SMB3_MAX_MSGSIZE + conn->vals->max_write_size;
else
max_allowed_pdu_size = SMB3_MAX_MSGSIZE;
if (pdu_size > max_allowed_pdu_size) {
- pr_err_ratelimited("PDU length(%u) excceed maximum allowed pdu size(%u) on connection(%d)\n",
+ pr_err_ratelimited("PDU length(%u) exceeded maximum allowed pdu size(%u) on connection(%d)\n",
pdu_size, max_allowed_pdu_size,
- conn->status);
+ READ_ONCE(conn->status));
break;
}
@@ -319,6 +317,9 @@ int ksmbd_conn_handler_loop(void *p)
if (pdu_size > MAX_STREAM_PROT_LEN)
break;
+ if (pdu_size < SMB1_MIN_SUPPORTED_HEADER_SIZE)
+ break;
+
/* 4 for rfc1002 length field */
/* 1 for implied bcc[0] */
size = pdu_size + 4 + 1;
@@ -327,8 +328,6 @@ int ksmbd_conn_handler_loop(void *p)
break;
memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf));
- if (!ksmbd_smb_request(conn))
- break;
/*
* We already read 4 bytes to find out PDU size, now
@@ -346,6 +345,15 @@ int ksmbd_conn_handler_loop(void *p)
continue;
}
+ if (!ksmbd_smb_request(conn))
+ break;
+
+ if (((struct smb2_hdr *)smb2_get_msg(conn->request_buf))->ProtocolId ==
+ SMB2_PROTO_NUMBER) {
+ if (pdu_size < SMB2_MIN_SUPPORTED_HEADER_SIZE)
+ break;
+ }
+
if (!default_conn_ops.process_fn) {
pr_err("No connection request callback\n");
break;
@@ -358,10 +366,12 @@ int ksmbd_conn_handler_loop(void *p)
}
out:
+ ksmbd_conn_set_releasing(conn);
/* Wait till all reference dropped to the Server object*/
- while (atomic_read(&conn->r_count) > 0)
- schedule_timeout(HZ);
+ wait_event(conn->r_count_q, atomic_read(&conn->r_count) == 0);
+ if (IS_ENABLED(CONFIG_UNICODE))
+ utf8_unload(conn->um);
unload_nls(conn->local_nls);
if (default_conn_ops.terminate_fn)
default_conn_ops.terminate_fn(conn);
@@ -400,19 +410,26 @@ out:
static void stop_sessions(void)
{
struct ksmbd_conn *conn;
+ struct ksmbd_transport *t;
again:
- read_lock(&conn_list_lock);
+ down_read(&conn_list_lock);
list_for_each_entry(conn, &conn_list, conns_list) {
struct task_struct *task;
- task = conn->transport->handler;
+ t = conn->transport;
+ task = t->handler;
if (task)
ksmbd_debug(CONN, "Stop session handler %s/%d\n",
task->comm, task_pid_nr(task));
- conn->status = KSMBD_SESS_EXITING;
+ ksmbd_conn_set_exiting(conn);
+ if (t->ops->shutdown) {
+ up_read(&conn_list_lock);
+ t->ops->shutdown(t);
+ down_read(&conn_list_lock);
+ }
}
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
if (!list_empty(&conn_list)) {
schedule_timeout_interruptible(HZ / 10); /* 100ms */
diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
index 89eb41bbd160..3c005246a32e 100644
--- a/fs/ksmbd/connection.h
+++ b/fs/ksmbd/connection.h
@@ -14,6 +14,7 @@
#include <net/request_sock.h>
#include <linux/kthread.h>
#include <linux/nls.h>
+#include <linux/unicode.h>
#include "smb_common.h"
#include "ksmbd_work.h"
@@ -25,7 +26,8 @@ enum {
KSMBD_SESS_GOOD,
KSMBD_SESS_EXITING,
KSMBD_SESS_NEED_RECONNECT,
- KSMBD_SESS_NEED_NEGOTIATE
+ KSMBD_SESS_NEED_NEGOTIATE,
+ KSMBD_SESS_RELEASING
};
struct ksmbd_stats {
@@ -46,7 +48,9 @@ struct ksmbd_conn {
char *request_buf;
struct ksmbd_transport *transport;
struct nls_table *local_nls;
+ struct unicode_map *um;
struct list_head conns_list;
+ struct rw_semaphore session_lock;
/* smb session 1 per user */
struct xarray sessions;
unsigned long last_active;
@@ -58,6 +62,7 @@ struct ksmbd_conn {
unsigned int outstanding_credits;
spinlock_t credits_lock;
wait_queue_head_t req_running_q;
+ wait_queue_head_t r_count_q;
/* Lock to protect requests list*/
spinlock_t request_lock;
struct list_head requests;
@@ -110,16 +115,20 @@ struct ksmbd_conn_ops {
struct ksmbd_transport_ops {
int (*prepare)(struct ksmbd_transport *t);
void (*disconnect)(struct ksmbd_transport *t);
+ void (*shutdown)(struct ksmbd_transport *t);
int (*read)(struct ksmbd_transport *t, char *buf,
unsigned int size, int max_retries);
int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov,
int size, bool need_invalidate_rkey,
unsigned int remote_key);
- int (*rdma_read)(struct ksmbd_transport *t, void *buf, unsigned int len,
- u32 remote_key, u64 remote_offset, u32 remote_len);
- int (*rdma_write)(struct ksmbd_transport *t, void *buf,
- unsigned int len, u32 remote_key, u64 remote_offset,
- u32 remote_len);
+ int (*rdma_read)(struct ksmbd_transport *t,
+ void *buf, unsigned int len,
+ struct smb2_buffer_desc_v1 *desc,
+ unsigned int desc_len);
+ int (*rdma_write)(struct ksmbd_transport *t,
+ void *buf, unsigned int len,
+ struct smb2_buffer_desc_v1 *desc,
+ unsigned int desc_len);
};
struct ksmbd_transport {
@@ -133,26 +142,30 @@ struct ksmbd_transport {
#define KSMBD_TCP_PEER_SOCKADDR(c) ((struct sockaddr *)&((c)->peer_addr))
extern struct list_head conn_list;
-extern rwlock_t conn_list_lock;
+extern struct rw_semaphore conn_list_lock;
bool ksmbd_conn_alive(struct ksmbd_conn *conn);
-void ksmbd_conn_wait_idle(struct ksmbd_conn *conn);
+void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id);
struct ksmbd_conn *ksmbd_conn_alloc(void);
void ksmbd_conn_free(struct ksmbd_conn *conn);
bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c);
int ksmbd_conn_write(struct ksmbd_work *work);
-int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf,
- unsigned int buflen, u32 remote_key, u64 remote_offset,
- u32 remote_len);
-int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf,
- unsigned int buflen, u32 remote_key, u64 remote_offset,
- u32 remote_len);
+int ksmbd_conn_rdma_read(struct ksmbd_conn *conn,
+ void *buf, unsigned int buflen,
+ struct smb2_buffer_desc_v1 *desc,
+ unsigned int desc_len);
+int ksmbd_conn_rdma_write(struct ksmbd_conn *conn,
+ void *buf, unsigned int buflen,
+ struct smb2_buffer_desc_v1 *desc,
+ unsigned int desc_len);
void ksmbd_conn_enqueue_request(struct ksmbd_work *work);
-int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work);
+void ksmbd_conn_try_dequeue_request(struct ksmbd_work *work);
void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops);
int ksmbd_conn_handler_loop(void *p);
int ksmbd_conn_transport_init(void);
void ksmbd_conn_transport_destroy(void);
+void ksmbd_conn_lock(struct ksmbd_conn *conn);
+void ksmbd_conn_unlock(struct ksmbd_conn *conn);
/*
* WARNING
@@ -160,43 +173,60 @@ void ksmbd_conn_transport_destroy(void);
* This is a hack. We will move status to a proper place once we land
* a multi-sessions support.
*/
-static inline bool ksmbd_conn_good(struct ksmbd_work *work)
+static inline bool ksmbd_conn_good(struct ksmbd_conn *conn)
{
- return work->conn->status == KSMBD_SESS_GOOD;
+ return READ_ONCE(conn->status) == KSMBD_SESS_GOOD;
}
-static inline bool ksmbd_conn_need_negotiate(struct ksmbd_work *work)
+static inline bool ksmbd_conn_need_negotiate(struct ksmbd_conn *conn)
{
- return work->conn->status == KSMBD_SESS_NEED_NEGOTIATE;
+ return READ_ONCE(conn->status) == KSMBD_SESS_NEED_NEGOTIATE;
}
-static inline bool ksmbd_conn_need_reconnect(struct ksmbd_work *work)
+static inline bool ksmbd_conn_need_reconnect(struct ksmbd_conn *conn)
{
- return work->conn->status == KSMBD_SESS_NEED_RECONNECT;
+ return READ_ONCE(conn->status) == KSMBD_SESS_NEED_RECONNECT;
}
-static inline bool ksmbd_conn_exiting(struct ksmbd_work *work)
+static inline bool ksmbd_conn_exiting(struct ksmbd_conn *conn)
{
- return work->conn->status == KSMBD_SESS_EXITING;
+ return READ_ONCE(conn->status) == KSMBD_SESS_EXITING;
}
-static inline void ksmbd_conn_set_good(struct ksmbd_work *work)
+static inline bool ksmbd_conn_releasing(struct ksmbd_conn *conn)
{
- work->conn->status = KSMBD_SESS_GOOD;
+ return READ_ONCE(conn->status) == KSMBD_SESS_RELEASING;
}
-static inline void ksmbd_conn_set_need_negotiate(struct ksmbd_work *work)
+static inline void ksmbd_conn_set_new(struct ksmbd_conn *conn)
{
- work->conn->status = KSMBD_SESS_NEED_NEGOTIATE;
+ WRITE_ONCE(conn->status, KSMBD_SESS_NEW);
}
-static inline void ksmbd_conn_set_need_reconnect(struct ksmbd_work *work)
+static inline void ksmbd_conn_set_good(struct ksmbd_conn *conn)
{
- work->conn->status = KSMBD_SESS_NEED_RECONNECT;
+ WRITE_ONCE(conn->status, KSMBD_SESS_GOOD);
}
-static inline void ksmbd_conn_set_exiting(struct ksmbd_work *work)
+static inline void ksmbd_conn_set_need_negotiate(struct ksmbd_conn *conn)
{
- work->conn->status = KSMBD_SESS_EXITING;
+ WRITE_ONCE(conn->status, KSMBD_SESS_NEED_NEGOTIATE);
}
+
+static inline void ksmbd_conn_set_need_reconnect(struct ksmbd_conn *conn)
+{
+ WRITE_ONCE(conn->status, KSMBD_SESS_NEED_RECONNECT);
+}
+
+static inline void ksmbd_conn_set_exiting(struct ksmbd_conn *conn)
+{
+ WRITE_ONCE(conn->status, KSMBD_SESS_EXITING);
+}
+
+static inline void ksmbd_conn_set_releasing(struct ksmbd_conn *conn)
+{
+ WRITE_ONCE(conn->status, KSMBD_SESS_RELEASING);
+}
+
+void ksmbd_all_conn_set_status(u64 sess_id, u32 status);
#endif /* __CONNECTION_H__ */
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
index fae859d59c79..821ed8e3cbee 100644
--- a/fs/ksmbd/ksmbd_netlink.h
+++ b/fs/ksmbd/ksmbd_netlink.h
@@ -74,6 +74,7 @@ struct ksmbd_heartbeat {
#define KSMBD_GLOBAL_FLAG_SMB2_LEASES BIT(0)
#define KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION BIT(1)
#define KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL BIT(2)
+#define KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF BIT(3)
/*
* IPC request for ksmbd server startup
@@ -164,7 +165,8 @@ struct ksmbd_share_config_response {
__u16 force_directory_mode;
__u16 force_uid;
__u16 force_gid;
- __u32 reserved[128]; /* Reserved room */
+ __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME];
+ __u32 reserved[112]; /* Reserved room */
__u32 veto_list_sz;
__s8 ____payload[];
};
@@ -350,6 +352,8 @@ enum KSMBD_TREE_CONN_STATUS {
#define KSMBD_SHARE_FLAG_STREAMS BIT(11)
#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12)
#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13)
+#define KSMBD_SHARE_FLAG_UPDATE BIT(14)
+#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15)
/*
* Tree connect request flags.
@@ -365,6 +369,7 @@ enum KSMBD_TREE_CONN_STATUS {
#define KSMBD_TREE_CONN_FLAG_READ_ONLY BIT(1)
#define KSMBD_TREE_CONN_FLAG_WRITABLE BIT(2)
#define KSMBD_TREE_CONN_FLAG_ADMIN_ACCOUNT BIT(3)
+#define KSMBD_TREE_CONN_FLAG_UPDATE BIT(4)
/*
* RPC over IPC.
diff --git a/fs/ksmbd/ksmbd_work.c b/fs/ksmbd/ksmbd_work.c
index fd58eb4809f6..d7c676c151e2 100644
--- a/fs/ksmbd/ksmbd_work.c
+++ b/fs/ksmbd/ksmbd_work.c
@@ -27,18 +27,38 @@ struct ksmbd_work *ksmbd_alloc_work_struct(void)
INIT_LIST_HEAD(&work->async_request_entry);
INIT_LIST_HEAD(&work->fp_entry);
INIT_LIST_HEAD(&work->interim_entry);
+ INIT_LIST_HEAD(&work->aux_read_list);
+ work->iov_alloc_cnt = 4;
+ work->iov = kcalloc(work->iov_alloc_cnt, sizeof(struct kvec),
+ GFP_KERNEL);
+ if (!work->iov) {
+ kmem_cache_free(work_cache, work);
+ work = NULL;
+ }
}
return work;
}
void ksmbd_free_work_struct(struct ksmbd_work *work)
{
+ struct aux_read *ar, *tmp;
+
WARN_ON(work->saved_cred != NULL);
kvfree(work->response_buf);
- kvfree(work->aux_payload_buf);
+
+ list_for_each_entry_safe(ar, tmp, &work->aux_read_list, entry) {
+ kvfree(ar->buf);
+ list_del(&ar->entry);
+ kfree(ar);
+ }
+
kfree(work->tr_buf);
kvfree(work->request_buf);
+ kfree(work->iov);
+ if (!list_empty(&work->interim_entry))
+ list_del(&work->interim_entry);
+
if (work->async_id)
ksmbd_release_id(&work->conn->async_ida, work->async_id);
kmem_cache_free(work_cache, work);
@@ -69,7 +89,6 @@ int ksmbd_workqueue_init(void)
void ksmbd_workqueue_destroy(void)
{
- flush_workqueue(ksmbd_wq);
destroy_workqueue(ksmbd_wq);
ksmbd_wq = NULL;
}
@@ -78,3 +97,81 @@ bool ksmbd_queue_work(struct ksmbd_work *work)
{
return queue_work(ksmbd_wq, &work->work);
}
+
+static inline void __ksmbd_iov_pin(struct ksmbd_work *work, void *ib,
+ unsigned int ib_len)
+{
+ work->iov[++work->iov_idx].iov_base = ib;
+ work->iov[work->iov_idx].iov_len = ib_len;
+ work->iov_cnt++;
+}
+
+static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len,
+ void *aux_buf, unsigned int aux_size)
+{
+ struct aux_read *ar = NULL;
+ int need_iov_cnt = 1;
+
+ if (aux_size) {
+ need_iov_cnt++;
+ ar = kmalloc(sizeof(struct aux_read), GFP_KERNEL);
+ if (!ar)
+ return -ENOMEM;
+ }
+
+ if (work->iov_alloc_cnt < work->iov_cnt + need_iov_cnt) {
+ struct kvec *new;
+
+ work->iov_alloc_cnt += 4;
+ new = krealloc(work->iov,
+ sizeof(struct kvec) * work->iov_alloc_cnt,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!new) {
+ kfree(ar);
+ work->iov_alloc_cnt -= 4;
+ return -ENOMEM;
+ }
+ work->iov = new;
+ }
+
+ /* Plus rfc_length size on first iov */
+ if (!work->iov_idx) {
+ work->iov[work->iov_idx].iov_base = work->response_buf;
+ *(__be32 *)work->iov[0].iov_base = 0;
+ work->iov[work->iov_idx].iov_len = 4;
+ work->iov_cnt++;
+ }
+
+ __ksmbd_iov_pin(work, ib, len);
+ inc_rfc1001_len(work->iov[0].iov_base, len);
+
+ if (aux_size) {
+ __ksmbd_iov_pin(work, aux_buf, aux_size);
+ inc_rfc1001_len(work->iov[0].iov_base, aux_size);
+
+ ar->buf = aux_buf;
+ list_add(&ar->entry, &work->aux_read_list);
+ }
+
+ return 0;
+}
+
+int ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len)
+{
+ return __ksmbd_iov_pin_rsp(work, ib, len, NULL, 0);
+}
+
+int ksmbd_iov_pin_rsp_read(struct ksmbd_work *work, void *ib, int len,
+ void *aux_buf, unsigned int aux_size)
+{
+ return __ksmbd_iov_pin_rsp(work, ib, len, aux_buf, aux_size);
+}
+
+int allocate_interim_rsp_buf(struct ksmbd_work *work)
+{
+ work->response_buf = kzalloc(MAX_CIFS_SMALL_BUFFER_SIZE, GFP_KERNEL);
+ if (!work->response_buf)
+ return -ENOMEM;
+ work->response_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
+ return 0;
+}
diff --git a/fs/ksmbd/ksmbd_work.h b/fs/ksmbd/ksmbd_work.h
index f7156bc50049..8ca2c813246e 100644
--- a/fs/ksmbd/ksmbd_work.h
+++ b/fs/ksmbd/ksmbd_work.h
@@ -19,6 +19,11 @@ enum {
KSMBD_WORK_CLOSED,
};
+struct aux_read {
+ void *buf;
+ struct list_head entry;
+};
+
/* one of these for every pending CIFS request at the connection */
struct ksmbd_work {
/* Server corresponding to this mid */
@@ -31,13 +36,19 @@ struct ksmbd_work {
/* Response buffer */
void *response_buf;
- /* Read data buffer */
- void *aux_payload_buf;
+ struct list_head aux_read_list;
+
+ struct kvec *iov;
+ int iov_alloc_cnt;
+ int iov_cnt;
+ int iov_idx;
/* Next cmd hdr in compound req buf*/
int next_smb2_rcv_hdr_off;
/* Next cmd hdr in compound rsp buf*/
int next_smb2_rsp_hdr_off;
+ /* Current cmd hdr in compound rsp buf*/
+ int curr_smb2_rsp_hdr_off;
/*
* Current Local FID assigned compound response if SMB2 CREATE
@@ -53,22 +64,17 @@ struct ksmbd_work {
unsigned int credits_granted;
/* response smb header size */
- unsigned int resp_hdr_sz;
unsigned int response_sz;
- /* Read data count */
- unsigned int aux_payload_sz;
void *tr_buf;
unsigned char state;
- /* Multiple responses for one request e.g. SMB ECHO */
- bool multiRsp:1;
/* No response for cancelled request */
bool send_no_response:1;
/* Request is encrypted */
bool encrypted:1;
/* Is this SYNC or ASYNC ksmbd_work */
- bool syncronous:1;
+ bool asynchronous:1;
bool need_invalidate_rkey:1;
unsigned int remote_key;
@@ -92,7 +98,16 @@ struct ksmbd_work {
*/
static inline void *ksmbd_resp_buf_next(struct ksmbd_work *work)
{
- return work->response_buf + work->next_smb2_rsp_hdr_off;
+ return work->response_buf + work->next_smb2_rsp_hdr_off + 4;
+}
+
+/**
+ * ksmbd_resp_buf_curr - Get current buffer on compound response.
+ * @work: smb work containing response buffer
+ */
+static inline void *ksmbd_resp_buf_curr(struct ksmbd_work *work)
+{
+ return work->response_buf + work->curr_smb2_rsp_hdr_off + 4;
}
/**
@@ -101,7 +116,7 @@ static inline void *ksmbd_resp_buf_next(struct ksmbd_work *work)
*/
static inline void *ksmbd_req_buf_next(struct ksmbd_work *work)
{
- return work->request_buf + work->next_smb2_rcv_hdr_off;
+ return work->request_buf + work->next_smb2_rcv_hdr_off + 4;
}
struct ksmbd_work *ksmbd_alloc_work_struct(void);
@@ -113,5 +128,8 @@ int ksmbd_work_pool_init(void);
int ksmbd_workqueue_init(void);
void ksmbd_workqueue_destroy(void);
bool ksmbd_queue_work(struct ksmbd_work *work);
-
+int ksmbd_iov_pin_rsp_read(struct ksmbd_work *work, void *ib, int len,
+ void *aux_buf, unsigned int aux_size);
+int ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len);
+int allocate_interim_rsp_buf(struct ksmbd_work *work);
#endif /* __KSMBD_WORK_H__ */
diff --git a/fs/ksmbd/mgmt/share_config.c b/fs/ksmbd/mgmt/share_config.c
index cb72d30f5b71..328a412259dc 100644
--- a/fs/ksmbd/mgmt/share_config.c
+++ b/fs/ksmbd/mgmt/share_config.c
@@ -16,6 +16,7 @@
#include "user_config.h"
#include "user_session.h"
#include "../transport_ipc.h"
+#include "../misc.h"
#define SHARE_HASH_BITS 3
static DEFINE_HASHTABLE(shares_table, SHARE_HASH_BITS);
@@ -26,7 +27,7 @@ struct ksmbd_veto_pattern {
struct list_head list;
};
-static unsigned int share_name_hash(char *name)
+static unsigned int share_name_hash(const char *name)
{
return jhash(name, strlen(name), 0);
}
@@ -51,12 +52,16 @@ static void kill_share(struct ksmbd_share_config *share)
kfree(share);
}
-void __ksmbd_share_config_put(struct ksmbd_share_config *share)
+void ksmbd_share_config_del(struct ksmbd_share_config *share)
{
down_write(&shares_table_lock);
hash_del(&share->hlist);
up_write(&shares_table_lock);
+}
+void __ksmbd_share_config_put(struct ksmbd_share_config *share)
+{
+ ksmbd_share_config_del(share);
kill_share(share);
}
@@ -68,7 +73,7 @@ __get_share_config(struct ksmbd_share_config *share)
return share;
}
-static struct ksmbd_share_config *__share_lookup(char *name)
+static struct ksmbd_share_config *__share_lookup(const char *name)
{
struct ksmbd_share_config *share;
unsigned int key = share_name_hash(name);
@@ -115,7 +120,8 @@ static int parse_veto_list(struct ksmbd_share_config *share,
return 0;
}
-static struct ksmbd_share_config *share_config_request(char *name)
+static struct ksmbd_share_config *share_config_request(struct unicode_map *um,
+ const char *name)
{
struct ksmbd_share_config_response *resp;
struct ksmbd_share_config *share = NULL;
@@ -129,6 +135,19 @@ static struct ksmbd_share_config *share_config_request(char *name)
if (resp->flags == KSMBD_SHARE_FLAG_INVALID)
goto out;
+ if (*resp->share_name) {
+ char *cf_resp_name;
+ bool equal;
+
+ cf_resp_name = ksmbd_casefold_sharename(um, resp->share_name);
+ if (IS_ERR(cf_resp_name))
+ goto out;
+ equal = !strcmp(cf_resp_name, name);
+ kfree(cf_resp_name);
+ if (!equal)
+ goto out;
+ }
+
share = kzalloc(sizeof(struct ksmbd_share_config), GFP_KERNEL);
if (!share)
goto out;
@@ -186,20 +205,11 @@ out:
return share;
}
-static void strtolower(char *share_name)
-{
- while (*share_name) {
- *share_name = tolower(*share_name);
- share_name++;
- }
-}
-
-struct ksmbd_share_config *ksmbd_share_config_get(char *name)
+struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um,
+ const char *name)
{
struct ksmbd_share_config *share;
- strtolower(name);
-
down_read(&shares_table_lock);
share = __share_lookup(name);
if (share)
@@ -208,7 +218,7 @@ struct ksmbd_share_config *ksmbd_share_config_get(char *name)
if (share)
return share;
- return share_config_request(name);
+ return share_config_request(um, name);
}
bool ksmbd_share_veto_filename(struct ksmbd_share_config *share,
@@ -222,17 +232,3 @@ bool ksmbd_share_veto_filename(struct ksmbd_share_config *share,
}
return false;
}
-
-void ksmbd_share_configs_cleanup(void)
-{
- struct ksmbd_share_config *share;
- struct hlist_node *tmp;
- int i;
-
- down_write(&shares_table_lock);
- hash_for_each_safe(shares_table, i, tmp, share, hlist) {
- hash_del(&share->hlist);
- kill_share(share);
- }
- up_write(&shares_table_lock);
-}
diff --git a/fs/ksmbd/mgmt/share_config.h b/fs/ksmbd/mgmt/share_config.h
index 953befc94e84..5f591751b923 100644
--- a/fs/ksmbd/mgmt/share_config.h
+++ b/fs/ksmbd/mgmt/share_config.h
@@ -9,6 +9,7 @@
#include <linux/workqueue.h>
#include <linux/hashtable.h>
#include <linux/path.h>
+#include <linux/unicode.h>
struct ksmbd_share_config {
char *name;
@@ -33,29 +34,22 @@ struct ksmbd_share_config {
#define KSMBD_SHARE_INVALID_UID ((__u16)-1)
#define KSMBD_SHARE_INVALID_GID ((__u16)-1)
-static inline int share_config_create_mode(struct ksmbd_share_config *share,
- umode_t posix_mode)
+static inline umode_t
+share_config_create_mode(struct ksmbd_share_config *share,
+ umode_t posix_mode)
{
- if (!share->force_create_mode) {
- if (!posix_mode)
- return share->create_mask;
- else
- return posix_mode & share->create_mask;
- }
- return share->force_create_mode & share->create_mask;
+ umode_t mode = (posix_mode ?: (umode_t)-1) & share->create_mask;
+
+ return mode | share->force_create_mode;
}
-static inline int share_config_directory_mode(struct ksmbd_share_config *share,
- umode_t posix_mode)
+static inline umode_t
+share_config_directory_mode(struct ksmbd_share_config *share,
+ umode_t posix_mode)
{
- if (!share->force_directory_mode) {
- if (!posix_mode)
- return share->directory_mask;
- else
- return posix_mode & share->directory_mask;
- }
+ umode_t mode = (posix_mode ?: (umode_t)-1) & share->directory_mask;
- return share->force_directory_mode & share->directory_mask;
+ return mode | share->force_directory_mode;
}
static inline int test_share_config_flag(struct ksmbd_share_config *share,
@@ -64,6 +58,7 @@ static inline int test_share_config_flag(struct ksmbd_share_config *share,
return share->flags & flag;
}
+void ksmbd_share_config_del(struct ksmbd_share_config *share);
void __ksmbd_share_config_put(struct ksmbd_share_config *share);
static inline void ksmbd_share_config_put(struct ksmbd_share_config *share)
@@ -73,9 +68,8 @@ static inline void ksmbd_share_config_put(struct ksmbd_share_config *share)
__ksmbd_share_config_put(share);
}
-struct ksmbd_share_config *ksmbd_share_config_get(char *name);
+struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um,
+ const char *name);
bool ksmbd_share_veto_filename(struct ksmbd_share_config *share,
const char *filename);
-void ksmbd_share_configs_cleanup(void);
-
#endif /* __SHARE_CONFIG_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c
index dd262daa2c4a..d2c81a8a11dd 100644
--- a/fs/ksmbd/mgmt/tree_connect.c
+++ b/fs/ksmbd/mgmt/tree_connect.c
@@ -17,7 +17,7 @@
struct ksmbd_tree_conn_status
ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
- char *share_name)
+ const char *share_name)
{
struct ksmbd_tree_conn_status status = {-ENOENT, NULL};
struct ksmbd_tree_connect_response *resp = NULL;
@@ -26,7 +26,7 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
struct sockaddr *peer_addr;
int ret;
- sc = ksmbd_share_config_get(share_name);
+ sc = ksmbd_share_config_get(conn->um, share_name);
if (!sc)
return status;
@@ -57,9 +57,26 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
goto out_error;
tree_conn->flags = resp->connection_flags;
+ if (test_tree_conn_flag(tree_conn, KSMBD_TREE_CONN_FLAG_UPDATE)) {
+ struct ksmbd_share_config *new_sc;
+
+ ksmbd_share_config_del(sc);
+ new_sc = ksmbd_share_config_get(conn->um, share_name);
+ if (!new_sc) {
+ pr_err("Failed to update stale share config\n");
+ status.ret = -ESTALE;
+ goto out_error;
+ }
+ ksmbd_share_config_put(sc);
+ sc = new_sc;
+ }
+
tree_conn->user = sess->user;
tree_conn->share_conf = sc;
+ tree_conn->t_state = TREE_NEW;
status.tree_conn = tree_conn;
+ atomic_set(&tree_conn->refcount, 1);
+ init_waitqueue_head(&tree_conn->refcount_q);
ret = xa_err(xa_store(&sess->tree_conns, tree_conn->id, tree_conn,
GFP_KERNEL));
@@ -79,14 +96,33 @@ out_error:
return status;
}
+void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon)
+{
+ /*
+ * Checking waitqueue to releasing tree connect on
+ * tree disconnect. waitqueue_active is safe because it
+ * uses atomic operation for condition.
+ */
+ if (!atomic_dec_return(&tcon->refcount) &&
+ waitqueue_active(&tcon->refcount_q))
+ wake_up(&tcon->refcount_q);
+}
+
int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess,
struct ksmbd_tree_connect *tree_conn)
{
int ret;
+ write_lock(&sess->tree_conns_lock);
+ xa_erase(&sess->tree_conns, tree_conn->id);
+ write_unlock(&sess->tree_conns_lock);
+
+ if (!atomic_dec_and_test(&tree_conn->refcount))
+ wait_event(tree_conn->refcount_q,
+ atomic_read(&tree_conn->refcount) == 0);
+
ret = ksmbd_ipc_tree_disconnect_request(sess->id, tree_conn->id);
ksmbd_release_tree_conn_id(sess, tree_conn->id);
- xa_erase(&sess->tree_conns, tree_conn->id);
ksmbd_share_config_put(tree_conn->share_conf);
kfree(tree_conn);
return ret;
@@ -95,18 +131,19 @@ int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess,
struct ksmbd_tree_connect *ksmbd_tree_conn_lookup(struct ksmbd_session *sess,
unsigned int id)
{
- return xa_load(&sess->tree_conns, id);
-}
-
-struct ksmbd_share_config *ksmbd_tree_conn_share(struct ksmbd_session *sess,
- unsigned int id)
-{
- struct ksmbd_tree_connect *tc;
+ struct ksmbd_tree_connect *tcon;
+
+ read_lock(&sess->tree_conns_lock);
+ tcon = xa_load(&sess->tree_conns, id);
+ if (tcon) {
+ if (tcon->t_state != TREE_CONNECTED)
+ tcon = NULL;
+ else if (!atomic_inc_not_zero(&tcon->refcount))
+ tcon = NULL;
+ }
+ read_unlock(&sess->tree_conns_lock);
- tc = ksmbd_tree_conn_lookup(sess, id);
- if (tc)
- return tc->share_conf;
- return NULL;
+ return tcon;
}
int ksmbd_tree_conn_session_logoff(struct ksmbd_session *sess)
@@ -115,8 +152,21 @@ int ksmbd_tree_conn_session_logoff(struct ksmbd_session *sess)
struct ksmbd_tree_connect *tc;
unsigned long id;
- xa_for_each(&sess->tree_conns, id, tc)
+ if (!sess)
+ return -EINVAL;
+
+ xa_for_each(&sess->tree_conns, id, tc) {
+ write_lock(&sess->tree_conns_lock);
+ if (tc->t_state == TREE_DISCONNECTED) {
+ write_unlock(&sess->tree_conns_lock);
+ ret = -ENOENT;
+ continue;
+ }
+ tc->t_state = TREE_DISCONNECTED;
+ write_unlock(&sess->tree_conns_lock);
+
ret |= ksmbd_tree_conn_disconnect(sess, tc);
+ }
xa_destroy(&sess->tree_conns);
return ret;
}
diff --git a/fs/ksmbd/mgmt/tree_connect.h b/fs/ksmbd/mgmt/tree_connect.h
index 71e50271dccf..6377a70b811c 100644
--- a/fs/ksmbd/mgmt/tree_connect.h
+++ b/fs/ksmbd/mgmt/tree_connect.h
@@ -14,6 +14,12 @@ struct ksmbd_share_config;
struct ksmbd_user;
struct ksmbd_conn;
+enum {
+ TREE_NEW = 0,
+ TREE_CONNECTED,
+ TREE_DISCONNECTED
+};
+
struct ksmbd_tree_connect {
int id;
@@ -25,6 +31,9 @@ struct ksmbd_tree_connect {
int maximal_access;
bool posix_extensions;
+ atomic_t refcount;
+ wait_queue_head_t refcount_q;
+ unsigned int t_state;
};
struct ksmbd_tree_conn_status {
@@ -42,7 +51,8 @@ struct ksmbd_session;
struct ksmbd_tree_conn_status
ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
- char *share_name);
+ const char *share_name);
+void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon);
int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess,
struct ksmbd_tree_connect *tree_conn);
@@ -50,9 +60,6 @@ int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess,
struct ksmbd_tree_connect *ksmbd_tree_conn_lookup(struct ksmbd_session *sess,
unsigned int id);
-struct ksmbd_share_config *ksmbd_tree_conn_share(struct ksmbd_session *sess,
- unsigned int id);
-
int ksmbd_tree_conn_session_logoff(struct ksmbd_session *sess);
#endif /* __TREE_CONNECT_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/ksmbd/mgmt/user_config.h
index 6a44109617f1..e068a19fd904 100644
--- a/fs/ksmbd/mgmt/user_config.h
+++ b/fs/ksmbd/mgmt/user_config.h
@@ -18,7 +18,6 @@ struct ksmbd_user {
size_t passkey_sz;
char *passkey;
- unsigned int failed_login_count;
};
static inline bool user_guest(struct ksmbd_user *user)
diff --git a/fs/ksmbd/mgmt/user_session.c b/fs/ksmbd/mgmt/user_session.c
index 92b1603b5abe..15f68ee05089 100644
--- a/fs/ksmbd/mgmt/user_session.c
+++ b/fs/ksmbd/mgmt/user_session.c
@@ -25,20 +25,19 @@ static DECLARE_RWSEM(sessions_table_lock);
struct ksmbd_session_rpc {
int id;
unsigned int method;
- struct list_head list;
};
static void free_channel_list(struct ksmbd_session *sess)
{
- struct channel *chann, *tmp;
+ struct channel *chann;
+ unsigned long index;
- write_lock(&sess->chann_lock);
- list_for_each_entry_safe(chann, tmp, &sess->ksmbd_chann_list,
- chann_list) {
- list_del(&chann->chann_list);
+ xa_for_each(&sess->ksmbd_chann_list, index, chann) {
+ xa_erase(&sess->ksmbd_chann_list, index);
kfree(chann);
}
- write_unlock(&sess->chann_lock);
+
+ xa_destroy(&sess->ksmbd_chann_list);
}
static void __session_rpc_close(struct ksmbd_session *sess,
@@ -58,15 +57,14 @@ static void __session_rpc_close(struct ksmbd_session *sess,
static void ksmbd_session_rpc_clear_list(struct ksmbd_session *sess)
{
struct ksmbd_session_rpc *entry;
+ long index;
- while (!list_empty(&sess->rpc_handle_list)) {
- entry = list_entry(sess->rpc_handle_list.next,
- struct ksmbd_session_rpc,
- list);
-
- list_del(&entry->list);
+ xa_for_each(&sess->rpc_handle_list, index, entry) {
+ xa_erase(&sess->rpc_handle_list, index);
__session_rpc_close(sess, entry);
}
+
+ xa_destroy(&sess->rpc_handle_list);
}
static int __rpc_method(char *rpc_name)
@@ -102,13 +100,13 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name)
entry = kzalloc(sizeof(struct ksmbd_session_rpc), GFP_KERNEL);
if (!entry)
- return -EINVAL;
+ return -ENOMEM;
- list_add(&entry->list, &sess->rpc_handle_list);
entry->method = method;
entry->id = ksmbd_ipc_id_alloc();
if (entry->id < 0)
goto free_entry;
+ xa_store(&sess->rpc_handle_list, entry->id, entry, GFP_KERNEL);
resp = ksmbd_rpc_open(sess, entry->id);
if (!resp)
@@ -117,9 +115,9 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name)
kvfree(resp);
return entry->id;
free_id:
+ xa_erase(&sess->rpc_handle_list, entry->id);
ksmbd_rpc_id_free(entry->id);
free_entry:
- list_del(&entry->list);
kfree(entry);
return -EINVAL;
}
@@ -128,24 +126,17 @@ void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id)
{
struct ksmbd_session_rpc *entry;
- list_for_each_entry(entry, &sess->rpc_handle_list, list) {
- if (entry->id == id) {
- list_del(&entry->list);
- __session_rpc_close(sess, entry);
- break;
- }
- }
+ entry = xa_erase(&sess->rpc_handle_list, id);
+ if (entry)
+ __session_rpc_close(sess, entry);
}
int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id)
{
struct ksmbd_session_rpc *entry;
- list_for_each_entry(entry, &sess->rpc_handle_list, list) {
- if (entry->id == id)
- return entry->method;
- }
- return 0;
+ entry = xa_load(&sess->rpc_handle_list, id);
+ return entry ? entry->method : 0;
}
void ksmbd_session_destroy(struct ksmbd_session *sess)
@@ -153,10 +144,6 @@ void ksmbd_session_destroy(struct ksmbd_session *sess)
if (!sess)
return;
- down_write(&sessions_table_lock);
- hash_del(&sess->hlist);
- up_write(&sessions_table_lock);
-
if (sess->user)
ksmbd_free_user(sess->user);
@@ -174,76 +161,105 @@ static struct ksmbd_session *__session_lookup(unsigned long long id)
struct ksmbd_session *sess;
hash_for_each_possible(sessions_table, sess, hlist, id) {
- if (id == sess->id)
+ if (id == sess->id) {
+ sess->last_active = jiffies;
return sess;
+ }
}
return NULL;
}
+static void ksmbd_expire_session(struct ksmbd_conn *conn)
+{
+ unsigned long id;
+ struct ksmbd_session *sess;
+
+ down_write(&conn->session_lock);
+ xa_for_each(&conn->sessions, id, sess) {
+ if (sess->state != SMB2_SESSION_VALID ||
+ time_after(jiffies,
+ sess->last_active + SMB2_SESSION_TIMEOUT)) {
+ xa_erase(&conn->sessions, sess->id);
+ hash_del(&sess->hlist);
+ ksmbd_session_destroy(sess);
+ continue;
+ }
+ }
+ up_write(&conn->session_lock);
+}
+
int ksmbd_session_register(struct ksmbd_conn *conn,
struct ksmbd_session *sess)
{
sess->dialect = conn->dialect;
memcpy(sess->ClientGUID, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
+ ksmbd_expire_session(conn);
return xa_err(xa_store(&conn->sessions, sess->id, sess, GFP_KERNEL));
}
static int ksmbd_chann_del(struct ksmbd_conn *conn, struct ksmbd_session *sess)
{
- struct channel *chann, *tmp;
-
- write_lock(&sess->chann_lock);
- list_for_each_entry_safe(chann, tmp, &sess->ksmbd_chann_list,
- chann_list) {
- if (chann->conn == conn) {
- list_del(&chann->chann_list);
- kfree(chann);
- write_unlock(&sess->chann_lock);
- return 0;
- }
- }
- write_unlock(&sess->chann_lock);
+ struct channel *chann;
+
+ chann = xa_erase(&sess->ksmbd_chann_list, (long)conn);
+ if (!chann)
+ return -ENOENT;
- return -ENOENT;
+ kfree(chann);
+ return 0;
}
void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
{
struct ksmbd_session *sess;
+ unsigned long id;
+ down_write(&sessions_table_lock);
if (conn->binding) {
int bkt;
+ struct hlist_node *tmp;
- down_write(&sessions_table_lock);
- hash_for_each(sessions_table, bkt, sess, hlist) {
- if (!ksmbd_chann_del(conn, sess)) {
- up_write(&sessions_table_lock);
- goto sess_destroy;
+ hash_for_each_safe(sessions_table, bkt, tmp, sess, hlist) {
+ if (!ksmbd_chann_del(conn, sess) &&
+ xa_empty(&sess->ksmbd_chann_list)) {
+ hash_del(&sess->hlist);
+ ksmbd_session_destroy(sess);
}
}
- up_write(&sessions_table_lock);
- } else {
- unsigned long id;
-
- xa_for_each(&conn->sessions, id, sess) {
- if (!ksmbd_chann_del(conn, sess))
- goto sess_destroy;
- }
}
+ up_write(&sessions_table_lock);
- return;
+ down_write(&conn->session_lock);
+ xa_for_each(&conn->sessions, id, sess) {
+ unsigned long chann_id;
+ struct channel *chann;
-sess_destroy:
- if (list_empty(&sess->ksmbd_chann_list)) {
- xa_erase(&conn->sessions, sess->id);
- ksmbd_session_destroy(sess);
+ xa_for_each(&sess->ksmbd_chann_list, chann_id, chann) {
+ if (chann->conn != conn)
+ ksmbd_conn_set_exiting(chann->conn);
+ }
+
+ ksmbd_chann_del(conn, sess);
+ if (xa_empty(&sess->ksmbd_chann_list)) {
+ xa_erase(&conn->sessions, sess->id);
+ hash_del(&sess->hlist);
+ ksmbd_session_destroy(sess);
+ }
}
+ up_write(&conn->session_lock);
}
struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
unsigned long long id)
{
- return xa_load(&conn->sessions, id);
+ struct ksmbd_session *sess;
+
+ down_read(&conn->session_lock);
+ sess = xa_load(&conn->sessions, id);
+ if (sess)
+ sess->last_active = jiffies;
+ up_read(&conn->session_lock);
+ return sess;
}
struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id)
@@ -252,6 +268,8 @@ struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id)
down_read(&sessions_table_lock);
sess = __session_lookup(id);
+ if (sess)
+ sess->last_active = jiffies;
up_read(&sessions_table_lock);
return sess;
@@ -320,6 +338,9 @@ static struct ksmbd_session *__session_create(int protocol)
struct ksmbd_session *sess;
int ret;
+ if (protocol != CIFDS_SESSION_FLAG_SMB2)
+ return NULL;
+
sess = kzalloc(sizeof(struct ksmbd_session), GFP_KERNEL);
if (!sess)
return NULL;
@@ -327,32 +348,25 @@ static struct ksmbd_session *__session_create(int protocol)
if (ksmbd_init_file_table(&sess->file_table))
goto error;
+ sess->last_active = jiffies;
+ sess->state = SMB2_SESSION_IN_PROGRESS;
set_session_flag(sess, protocol);
xa_init(&sess->tree_conns);
- INIT_LIST_HEAD(&sess->ksmbd_chann_list);
- INIT_LIST_HEAD(&sess->rpc_handle_list);
+ xa_init(&sess->ksmbd_chann_list);
+ xa_init(&sess->rpc_handle_list);
sess->sequence_number = 1;
- rwlock_init(&sess->chann_lock);
-
- switch (protocol) {
- case CIFDS_SESSION_FLAG_SMB2:
- ret = __init_smb2_session(sess);
- break;
- default:
- ret = -EINVAL;
- break;
- }
+ rwlock_init(&sess->tree_conns_lock);
+ ret = __init_smb2_session(sess);
if (ret)
goto error;
ida_init(&sess->tree_conn_ida);
- if (protocol == CIFDS_SESSION_FLAG_SMB2) {
- down_write(&sessions_table_lock);
- hash_add(sessions_table, &sess->hlist, sess->id);
- up_write(&sessions_table_lock);
- }
+ down_write(&sessions_table_lock);
+ hash_add(sessions_table, &sess->hlist, sess->id);
+ up_write(&sessions_table_lock);
+
return sess;
error:
diff --git a/fs/ksmbd/mgmt/user_session.h b/fs/ksmbd/mgmt/user_session.h
index 8934b8ee275b..63cb08fffde8 100644
--- a/fs/ksmbd/mgmt/user_session.h
+++ b/fs/ksmbd/mgmt/user_session.h
@@ -21,7 +21,6 @@ struct ksmbd_file_table;
struct channel {
__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
struct ksmbd_conn *conn;
- struct list_head chann_list;
};
struct preauth_session {
@@ -50,17 +49,18 @@ struct ksmbd_session {
char sess_key[CIFS_KEY_SIZE];
struct hlist_node hlist;
- rwlock_t chann_lock;
- struct list_head ksmbd_chann_list;
+ struct xarray ksmbd_chann_list;
struct xarray tree_conns;
struct ida tree_conn_ida;
- struct list_head rpc_handle_list;
+ struct xarray rpc_handle_list;
__u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE];
__u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE];
__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
struct ksmbd_file_table file_table;
+ unsigned long last_active;
+ rwlock_t tree_conns_lock;
};
static inline int test_session_flag(struct ksmbd_session *sess, int bit)
diff --git a/fs/ksmbd/misc.c b/fs/ksmbd/misc.c
index 60e7ac62c917..9e8afaa686e3 100644
--- a/fs/ksmbd/misc.c
+++ b/fs/ksmbd/misc.c
@@ -7,6 +7,7 @@
#include <linux/kernel.h>
#include <linux/xattr.h>
#include <linux/fs.h>
+#include <linux/unicode.h>
#include "misc.h"
#include "smb_common.h"
@@ -20,7 +21,7 @@
* wildcard '*' and '?'
* TODO : implement consideration about DOS_DOT, DOS_QM and DOS_STAR
*
- * @string: string to compare with a pattern
+ * @str: string to compare with a pattern
* @len: string length
* @pattern: pattern string which might include wildcard '*' and '?'
*
@@ -152,25 +153,47 @@ out:
/**
* convert_to_nt_pathname() - extract and return windows path string
* whose share directory prefix was removed from file path
- * @filename : unix filename
- * @sharepath: share path string
+ * @share: ksmbd_share_config pointer
+ * @path: path to report
*
* Return : windows path string or error
*/
-char *convert_to_nt_pathname(char *filename)
+char *convert_to_nt_pathname(struct ksmbd_share_config *share,
+ const struct path *path)
{
- char *ab_pathname;
+ char *pathname, *ab_pathname, *nt_pathname;
+ int share_path_len = share->path_sz;
- if (strlen(filename) == 0)
- filename = "\\";
+ pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!pathname)
+ return ERR_PTR(-EACCES);
- ab_pathname = kstrdup(filename, GFP_KERNEL);
- if (!ab_pathname)
- return NULL;
+ ab_pathname = d_path(path, pathname, PATH_MAX);
+ if (IS_ERR(ab_pathname)) {
+ nt_pathname = ERR_PTR(-EACCES);
+ goto free_pathname;
+ }
+
+ if (strncmp(ab_pathname, share->path, share_path_len)) {
+ nt_pathname = ERR_PTR(-EACCES);
+ goto free_pathname;
+ }
+
+ nt_pathname = kzalloc(strlen(&ab_pathname[share_path_len]) + 2, GFP_KERNEL);
+ if (!nt_pathname) {
+ nt_pathname = ERR_PTR(-ENOMEM);
+ goto free_pathname;
+ }
+ if (ab_pathname[share_path_len] == '\0')
+ strcpy(nt_pathname, "/");
+ strcat(nt_pathname, &ab_pathname[share_path_len]);
+
+ ksmbd_conv_path_to_windows(nt_pathname);
- ksmbd_conv_path_to_windows(ab_pathname);
- return ab_pathname;
+free_pathname:
+ kfree(pathname);
+ return nt_pathname;
}
int get_nlink(struct kstat *st)
@@ -204,32 +227,59 @@ void ksmbd_conv_path_to_windows(char *path)
strreplace(path, '/', '\\');
}
+char *ksmbd_casefold_sharename(struct unicode_map *um, const char *name)
+{
+ char *cf_name;
+ int cf_len;
+
+ cf_name = kzalloc(KSMBD_REQ_MAX_SHARE_NAME, GFP_KERNEL);
+ if (!cf_name)
+ return ERR_PTR(-ENOMEM);
+
+ if (IS_ENABLED(CONFIG_UNICODE) && um) {
+ const struct qstr q_name = {.name = name, .len = strlen(name)};
+
+ cf_len = utf8_casefold(um, &q_name, cf_name,
+ KSMBD_REQ_MAX_SHARE_NAME);
+ if (cf_len < 0)
+ goto out_ascii;
+
+ return cf_name;
+ }
+
+out_ascii:
+ cf_len = strscpy(cf_name, name, KSMBD_REQ_MAX_SHARE_NAME);
+ if (cf_len < 0) {
+ kfree(cf_name);
+ return ERR_PTR(-E2BIG);
+ }
+
+ for (; *cf_name; ++cf_name)
+ *cf_name = isascii(*cf_name) ? tolower(*cf_name) : *cf_name;
+ return cf_name - cf_len;
+}
+
/**
* ksmbd_extract_sharename() - get share name from tree connect request
* @treename: buffer containing tree name and share name
*
* Return: share name on success, otherwise error
*/
-char *ksmbd_extract_sharename(char *treename)
+char *ksmbd_extract_sharename(struct unicode_map *um, const char *treename)
{
- char *name = treename;
- char *dst;
- char *pos = strrchr(name, '\\');
+ const char *name = treename, *pos = strrchr(name, '\\');
if (pos)
name = (pos + 1);
/* caller has to free the memory */
- dst = kstrdup(name, GFP_KERNEL);
- if (!dst)
- return ERR_PTR(-ENOMEM);
- return dst;
+ return ksmbd_casefold_sharename(um, name);
}
/**
* convert_to_unix_name() - convert windows name to unix format
- * @path: name to be converted
- * @tid: tree id of mathing share
+ * @share: ksmbd_share_config pointer
+ * @name: file name that is relative to share
*
* Return: converted name on success, otherwise NULL
*/
diff --git a/fs/ksmbd/misc.h b/fs/ksmbd/misc.h
index 253366bd0951..1facfcd21200 100644
--- a/fs/ksmbd/misc.h
+++ b/fs/ksmbd/misc.h
@@ -14,12 +14,14 @@ struct ksmbd_file;
int match_pattern(const char *str, size_t len, const char *pattern);
int ksmbd_validate_filename(char *filename);
int parse_stream_name(char *filename, char **stream_name, int *s_type);
-char *convert_to_nt_pathname(char *filename);
+char *convert_to_nt_pathname(struct ksmbd_share_config *share,
+ const struct path *path);
int get_nlink(struct kstat *st);
void ksmbd_conv_path_to_unix(char *path);
void ksmbd_strip_last_slash(char *path);
void ksmbd_conv_path_to_windows(char *path);
-char *ksmbd_extract_sharename(char *treename);
+char *ksmbd_casefold_sharename(struct unicode_map *um, const char *name);
+char *ksmbd_extract_sharename(struct unicode_map *um, const char *treename);
char *convert_to_unix_name(struct ksmbd_share_config *share, const char *name);
#define KSMBD_DIR_INFO_ALIGNMENT 8
diff --git a/fs/ksmbd/oplock.c b/fs/ksmbd/oplock.c
index 2e2df6ede583..2da256259722 100644
--- a/fs/ksmbd/oplock.c
+++ b/fs/ksmbd/oplock.c
@@ -102,9 +102,10 @@ static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx)
lease->new_state = 0;
lease->flags = lctx->flags;
lease->duration = lctx->duration;
+ lease->is_dir = lctx->is_dir;
memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE);
lease->version = lctx->version;
- lease->epoch = 0;
+ lease->epoch = le16_to_cpu(lctx->epoch);
INIT_LIST_HEAD(&opinfo->lease_entry);
opinfo->o_lease = lease;
@@ -157,13 +158,42 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci)
rcu_read_lock();
opinfo = list_first_or_null_rcu(&ci->m_op_list, struct oplock_info,
op_entry);
- if (opinfo && !atomic_inc_not_zero(&opinfo->refcount))
- opinfo = NULL;
+ if (opinfo) {
+ if (!atomic_inc_not_zero(&opinfo->refcount))
+ opinfo = NULL;
+ else {
+ atomic_inc(&opinfo->conn->r_count);
+ if (ksmbd_conn_releasing(opinfo->conn)) {
+ atomic_dec(&opinfo->conn->r_count);
+ atomic_dec(&opinfo->refcount);
+ opinfo = NULL;
+ }
+ }
+ }
+
rcu_read_unlock();
return opinfo;
}
+static void opinfo_conn_put(struct oplock_info *opinfo)
+{
+ struct ksmbd_conn *conn;
+
+ if (!opinfo)
+ return;
+
+ conn = opinfo->conn;
+ /*
+ * Checking waitqueue to dropping pending requests on
+ * disconnection. waitqueue_active is safe because it
+ * uses atomic operation for condition.
+ */
+ if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
+ wake_up(&conn->r_count_q);
+ opinfo_put(opinfo);
+}
+
void opinfo_put(struct oplock_info *opinfo)
{
if (!atomic_dec_and_test(&opinfo->refcount))
@@ -366,8 +396,8 @@ void close_id_del_oplock(struct ksmbd_file *fp)
{
struct oplock_info *opinfo;
- if (S_ISDIR(file_inode(fp->filp)->i_mode))
- return;
+ if (fp->reserve_lease_break)
+ smb_lazy_parent_lease_break_close(fp);
opinfo = opinfo_get(fp);
if (!opinfo)
@@ -514,12 +544,13 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci,
/* upgrading lease */
if ((atomic_read(&ci->op_count) +
atomic_read(&ci->sop_count)) == 1) {
- if (lease->state ==
- (lctx->req_state & lease->state)) {
+ if (lease->state != SMB2_LEASE_NONE_LE &&
+ lease->state == (lctx->req_state & lease->state)) {
lease->state |= lctx->req_state;
if (lctx->req_state &
SMB2_LEASE_WRITE_CACHING_LE)
lease_read_to_write(opinfo);
+
}
} else if ((atomic_read(&ci->op_count) +
atomic_read(&ci->sop_count)) > 1) {
@@ -587,15 +618,6 @@ static int oplock_break_pending(struct oplock_info *opinfo, int req_op_level)
return 0;
}
-static inline int allocate_oplock_break_buf(struct ksmbd_work *work)
-{
- work->response_buf = kzalloc(MAX_CIFS_SMALL_BUFFER_SIZE, GFP_KERNEL);
- if (!work->response_buf)
- return -ENOMEM;
- work->response_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
- return 0;
-}
-
/**
* __smb2_oplock_break_noti() - send smb2 oplock break cmd from conn
* to client
@@ -610,30 +632,22 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
{
struct smb2_oplock_break *rsp = NULL;
struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
- struct ksmbd_conn *conn = work->conn;
struct oplock_break_info *br_info = work->request_buf;
struct smb2_hdr *rsp_hdr;
struct ksmbd_file *fp;
fp = ksmbd_lookup_durable_fd(br_info->fid);
- if (!fp) {
- atomic_dec(&conn->r_count);
- ksmbd_free_work_struct(work);
- return;
- }
+ if (!fp)
+ goto out;
- if (allocate_oplock_break_buf(work)) {
+ if (allocate_interim_rsp_buf(work)) {
pr_err("smb2_allocate_rsp_buf failed! ");
- atomic_dec(&conn->r_count);
ksmbd_fd_put(work, fp);
- ksmbd_free_work_struct(work);
- return;
+ goto out;
}
- rsp_hdr = work->response_buf;
+ rsp_hdr = smb2_get_msg(work->response_buf);
memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
- rsp_hdr->smb2_buf_length =
- cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
rsp_hdr->CreditRequest = cpu_to_le16(0);
@@ -646,7 +660,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
rsp_hdr->SessionId = 0;
memset(rsp_hdr->Signature, 0, 16);
- rsp = work->response_buf;
+ rsp = smb2_get_msg(work->response_buf);
rsp->StructureSize = cpu_to_le16(24);
if (!br_info->open_trunc &&
@@ -657,19 +671,22 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
rsp->OplockLevel = SMB2_OPLOCK_LEVEL_NONE;
rsp->Reserved = 0;
rsp->Reserved2 = 0;
- rsp->PersistentFid = cpu_to_le64(fp->persistent_id);
- rsp->VolatileFid = cpu_to_le64(fp->volatile_id);
+ rsp->PersistentFid = fp->persistent_id;
+ rsp->VolatileFid = fp->volatile_id;
- inc_rfc1001_len(rsp, 24);
+ ksmbd_fd_put(work, fp);
+ if (ksmbd_iov_pin_rsp(work, (void *)rsp,
+ sizeof(struct smb2_oplock_break)))
+ goto out;
ksmbd_debug(OPLOCK,
"sending oplock break v_id %llu p_id = %llu lock level = %d\n",
rsp->VolatileFid, rsp->PersistentFid, rsp->OplockLevel);
- ksmbd_fd_put(work, fp);
ksmbd_conn_write(work);
+
+out:
ksmbd_free_work_struct(work);
- atomic_dec(&conn->r_count);
}
/**
@@ -703,7 +720,6 @@ static int smb2_oplock_break_noti(struct oplock_info *opinfo)
work->conn = conn;
work->sess = opinfo->sess;
- atomic_inc(&conn->r_count);
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
INIT_WORK(&work->work, __smb2_oplock_break_noti);
ksmbd_queue_work(work);
@@ -727,20 +743,15 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
struct smb2_lease_break *rsp = NULL;
struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
struct lease_break_info *br_info = work->request_buf;
- struct ksmbd_conn *conn = work->conn;
struct smb2_hdr *rsp_hdr;
- if (allocate_oplock_break_buf(work)) {
+ if (allocate_interim_rsp_buf(work)) {
ksmbd_debug(OPLOCK, "smb2_allocate_rsp_buf failed! ");
- ksmbd_free_work_struct(work);
- atomic_dec(&conn->r_count);
- return;
+ goto out;
}
- rsp_hdr = work->response_buf;
+ rsp_hdr = smb2_get_msg(work->response_buf);
memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
- rsp_hdr->smb2_buf_length =
- cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
rsp_hdr->CreditRequest = cpu_to_le16(0);
@@ -753,7 +764,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
rsp_hdr->SessionId = 0;
memset(rsp_hdr->Signature, 0, 16);
- rsp = work->response_buf;
+ rsp = smb2_get_msg(work->response_buf);
rsp->StructureSize = cpu_to_le16(44);
rsp->Epoch = br_info->epoch;
rsp->Flags = 0;
@@ -769,11 +780,14 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
rsp->AccessMaskHint = 0;
rsp->ShareMaskHint = 0;
- inc_rfc1001_len(rsp, 44);
+ if (ksmbd_iov_pin_rsp(work, (void *)rsp,
+ sizeof(struct smb2_lease_break)))
+ goto out;
ksmbd_conn_write(work);
+
+out:
ksmbd_free_work_struct(work);
- atomic_dec(&conn->r_count);
}
/**
@@ -813,7 +827,6 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo)
work->conn = conn;
work->sess = opinfo->sess;
- atomic_inc(&conn->r_count);
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
list_for_each_safe(tmp, t, &opinfo->interim_list) {
struct ksmbd_work *in_work;
@@ -822,7 +835,8 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo)
interim_entry);
setup_async_work(in_work, NULL, NULL);
smb2_send_interim_resp(in_work, STATUS_PENDING);
- list_del(&in_work->interim_entry);
+ list_del_init(&in_work->interim_entry);
+ release_async_work(in_work);
}
INIT_WORK(&work->work, __smb2_lease_break_noti);
ksmbd_queue_work(work);
@@ -888,7 +902,8 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level)
lease->new_state =
SMB2_LEASE_READ_CACHING_LE;
} else {
- if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE)
+ if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE &&
+ !lease->is_dir)
lease->new_state =
SMB2_LEASE_READ_CACHING_LE;
else
@@ -1020,6 +1035,7 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2)
SMB2_LEASE_KEY_SIZE);
lease2->duration = lease1->duration;
lease2->flags = lease1->flags;
+ lease2->epoch = lease1->epoch++;
}
static int add_lease_global_list(struct oplock_info *opinfo)
@@ -1069,6 +1085,89 @@ static void set_oplock_level(struct oplock_info *opinfo, int level,
}
}
+void smb_send_parent_lease_break_noti(struct ksmbd_file *fp,
+ struct lease_ctx_info *lctx)
+{
+ struct oplock_info *opinfo;
+ struct ksmbd_inode *p_ci = NULL;
+
+ if (lctx->version != 2)
+ return;
+
+ p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent);
+ if (!p_ci)
+ return;
+
+ read_lock(&p_ci->m_lock);
+ list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) {
+ if (!opinfo->is_lease)
+ continue;
+
+ if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE &&
+ (!(lctx->flags & SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE) ||
+ !compare_guid_key(opinfo, fp->conn->ClientGUID,
+ lctx->parent_lease_key))) {
+ if (!atomic_inc_not_zero(&opinfo->refcount))
+ continue;
+
+ atomic_inc(&opinfo->conn->r_count);
+ if (ksmbd_conn_releasing(opinfo->conn)) {
+ atomic_dec(&opinfo->conn->r_count);
+ continue;
+ }
+
+ read_unlock(&p_ci->m_lock);
+ oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE);
+ opinfo_conn_put(opinfo);
+ read_lock(&p_ci->m_lock);
+ }
+ }
+ read_unlock(&p_ci->m_lock);
+
+ ksmbd_inode_put(p_ci);
+}
+
+void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp)
+{
+ struct oplock_info *opinfo;
+ struct ksmbd_inode *p_ci = NULL;
+
+ rcu_read_lock();
+ opinfo = rcu_dereference(fp->f_opinfo);
+ rcu_read_unlock();
+
+ if (!opinfo->is_lease || opinfo->o_lease->version != 2)
+ return;
+
+ p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent);
+ if (!p_ci)
+ return;
+
+ read_lock(&p_ci->m_lock);
+ list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) {
+ if (!opinfo->is_lease)
+ continue;
+
+ if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE) {
+ if (!atomic_inc_not_zero(&opinfo->refcount))
+ continue;
+
+ atomic_inc(&opinfo->conn->r_count);
+ if (ksmbd_conn_releasing(opinfo->conn)) {
+ atomic_dec(&opinfo->conn->r_count);
+ continue;
+ }
+ read_unlock(&p_ci->m_lock);
+ oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE);
+ opinfo_conn_put(opinfo);
+ read_lock(&p_ci->m_lock);
+ }
+ }
+ read_unlock(&p_ci->m_lock);
+
+ ksmbd_inode_put(p_ci);
+}
+
/**
* smb_grant_oplock() - handle oplock/lease request on file open
* @work: smb work
@@ -1092,10 +1191,6 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
bool prev_op_has_lease;
__le32 prev_op_state = 0;
- /* not support directory lease */
- if (S_ISDIR(file_inode(fp->filp)->i_mode))
- return 0;
-
opinfo = alloc_opinfo(work, pid, tid);
if (!opinfo)
return -ENOMEM;
@@ -1135,8 +1230,10 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
}
prev_opinfo = opinfo_get_list(ci);
if (!prev_opinfo ||
- (prev_opinfo->level == SMB2_OPLOCK_LEVEL_NONE && lctx))
+ (prev_opinfo->level == SMB2_OPLOCK_LEVEL_NONE && lctx)) {
+ opinfo_conn_put(prev_opinfo);
goto set_lev;
+ }
prev_op_has_lease = prev_opinfo->is_lease;
if (prev_op_has_lease)
prev_op_state = prev_opinfo->o_lease->state;
@@ -1144,19 +1241,19 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
if (share_ret < 0 &&
prev_opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
err = share_ret;
- opinfo_put(prev_opinfo);
+ opinfo_conn_put(prev_opinfo);
goto err_out;
}
if (prev_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH &&
prev_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
- opinfo_put(prev_opinfo);
+ opinfo_conn_put(prev_opinfo);
goto op_break_not_needed;
}
list_add(&work->interim_entry, &prev_opinfo->interim_list);
err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II);
- opinfo_put(prev_opinfo);
+ opinfo_conn_put(prev_opinfo);
if (err == -ENOENT)
goto set_lev;
/* Check all oplock was freed by close */
@@ -1219,14 +1316,14 @@ static void smb_break_all_write_oplock(struct ksmbd_work *work,
return;
if (brk_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH &&
brk_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
- opinfo_put(brk_opinfo);
+ opinfo_conn_put(brk_opinfo);
return;
}
brk_opinfo->open_trunc = is_trunc;
list_add(&work->interim_entry, &brk_opinfo->interim_list);
oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II);
- opinfo_put(brk_opinfo);
+ opinfo_conn_put(brk_opinfo);
}
/**
@@ -1254,6 +1351,13 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
list_for_each_entry_rcu(brk_op, &ci->m_op_list, op_entry) {
if (!atomic_inc_not_zero(&brk_op->refcount))
continue;
+
+ atomic_inc(&brk_op->conn->r_count);
+ if (ksmbd_conn_releasing(brk_op->conn)) {
+ atomic_dec(&brk_op->conn->r_count);
+ continue;
+ }
+
rcu_read_unlock();
if (brk_op->is_lease && (brk_op->o_lease->state &
(~(SMB2_LEASE_READ_CACHING_LE |
@@ -1283,7 +1387,7 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
brk_op->open_trunc = is_trunc;
oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE);
next:
- opinfo_put(brk_op);
+ opinfo_conn_put(brk_op);
rcu_read_lock();
}
rcu_read_unlock();
@@ -1336,19 +1440,17 @@ __u8 smb2_map_lease_to_oplock(__le32 lease_state)
*/
void create_lease_buf(u8 *rbuf, struct lease *lease)
{
- char *LeaseKey = (char *)&lease->lease_key;
-
if (lease->version == 2) {
struct create_lease_v2 *buf = (struct create_lease_v2 *)rbuf;
- char *ParentLeaseKey = (char *)&lease->parent_lease_key;
memset(buf, 0, sizeof(struct create_lease_v2));
- buf->lcontext.LeaseKeyLow = *((__le64 *)LeaseKey);
- buf->lcontext.LeaseKeyHigh = *((__le64 *)(LeaseKey + 8));
+ memcpy(buf->lcontext.LeaseKey, lease->lease_key,
+ SMB2_LEASE_KEY_SIZE);
buf->lcontext.LeaseFlags = lease->flags;
+ buf->lcontext.Epoch = cpu_to_le16(++lease->epoch);
buf->lcontext.LeaseState = lease->state;
- buf->lcontext.ParentLeaseKeyLow = *((__le64 *)ParentLeaseKey);
- buf->lcontext.ParentLeaseKeyHigh = *((__le64 *)(ParentLeaseKey + 8));
+ memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key,
+ SMB2_LEASE_KEY_SIZE);
buf->ccontext.DataOffset = cpu_to_le16(offsetof
(struct create_lease_v2, lcontext));
buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2));
@@ -1363,8 +1465,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease)
struct create_lease *buf = (struct create_lease *)rbuf;
memset(buf, 0, sizeof(struct create_lease));
- buf->lcontext.LeaseKeyLow = *((__le64 *)LeaseKey);
- buf->lcontext.LeaseKeyHigh = *((__le64 *)(LeaseKey + 8));
+ memcpy(buf->lcontext.LeaseKey, lease->lease_key, SMB2_LEASE_KEY_SIZE);
buf->lcontext.LeaseFlags = lease->flags;
buf->lcontext.LeaseState = lease->state;
buf->ccontext.DataOffset = cpu_to_le16(offsetof
@@ -1383,63 +1484,50 @@ void create_lease_buf(u8 *rbuf, struct lease *lease)
/**
* parse_lease_state() - parse lease context containted in file open request
* @open_req: buffer containing smb2 file open(create) request
+ * @is_dir: whether leasing file is directory
*
* Return: oplock state, -ENOENT if create lease context not found
*/
-struct lease_ctx_info *parse_lease_state(void *open_req)
+struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir)
{
- char *data_offset;
struct create_context *cc;
- unsigned int next = 0;
- char *name;
- bool found = false;
struct smb2_create_req *req = (struct smb2_create_req *)open_req;
- struct lease_ctx_info *lreq = kzalloc(sizeof(struct lease_ctx_info),
- GFP_KERNEL);
- if (!lreq)
+ struct lease_ctx_info *lreq;
+
+ cc = smb2_find_context_vals(req, SMB2_CREATE_REQUEST_LEASE, 4);
+ if (IS_ERR_OR_NULL(cc))
return NULL;
- data_offset = (char *)req + 4 + le32_to_cpu(req->CreateContextsOffset);
- cc = (struct create_context *)data_offset;
- do {
- cc = (struct create_context *)((char *)cc + next);
- name = le16_to_cpu(cc->NameOffset) + (char *)cc;
- if (le16_to_cpu(cc->NameLength) != 4 ||
- strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) {
- next = le32_to_cpu(cc->Next);
- continue;
- }
- found = true;
- break;
- } while (next != 0);
+ lreq = kzalloc(sizeof(struct lease_ctx_info), GFP_KERNEL);
+ if (!lreq)
+ return NULL;
- if (found) {
- if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) {
- struct create_lease_v2 *lc = (struct create_lease_v2 *)cc;
+ if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) {
+ struct create_lease_v2 *lc = (struct create_lease_v2 *)cc;
- *((__le64 *)lreq->lease_key) = lc->lcontext.LeaseKeyLow;
- *((__le64 *)(lreq->lease_key + 8)) = lc->lcontext.LeaseKeyHigh;
+ memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
+ if (is_dir) {
+ lreq->req_state = lc->lcontext.LeaseState &
+ ~SMB2_LEASE_WRITE_CACHING_LE;
+ lreq->is_dir = true;
+ } else
lreq->req_state = lc->lcontext.LeaseState;
- lreq->flags = lc->lcontext.LeaseFlags;
- lreq->duration = lc->lcontext.LeaseDuration;
- *((__le64 *)lreq->parent_lease_key) = lc->lcontext.ParentLeaseKeyLow;
- *((__le64 *)(lreq->parent_lease_key + 8)) = lc->lcontext.ParentLeaseKeyHigh;
- lreq->version = 2;
- } else {
- struct create_lease *lc = (struct create_lease *)cc;
+ lreq->flags = lc->lcontext.LeaseFlags;
+ lreq->epoch = lc->lcontext.Epoch;
+ lreq->duration = lc->lcontext.LeaseDuration;
+ memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey,
+ SMB2_LEASE_KEY_SIZE);
+ lreq->version = 2;
+ } else {
+ struct create_lease *lc = (struct create_lease *)cc;
- *((__le64 *)lreq->lease_key) = lc->lcontext.LeaseKeyLow;
- *((__le64 *)(lreq->lease_key + 8)) = lc->lcontext.LeaseKeyHigh;
- lreq->req_state = lc->lcontext.LeaseState;
- lreq->flags = lc->lcontext.LeaseFlags;
- lreq->duration = lc->lcontext.LeaseDuration;
- lreq->version = 1;
- }
- return lreq;
+ memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
+ lreq->req_state = lc->lcontext.LeaseState;
+ lreq->flags = lc->lcontext.LeaseFlags;
+ lreq->duration = lc->lcontext.LeaseDuration;
+ lreq->version = 1;
}
-
- kfree(lreq);
- return NULL;
+ return lreq;
}
/**
@@ -1464,7 +1552,7 @@ struct create_context *smb2_find_context_vals(void *open_req, const char *tag, i
* CreateContextsOffset and CreateContextsLength are guaranteed to
* be valid because of ksmbd_smb2_check_message().
*/
- cc = (struct create_context *)((char *)req + 4 +
+ cc = (struct create_context *)((char *)req +
le32_to_cpu(req->CreateContextsOffset));
remain_len = le32_to_cpu(req->CreateContextsLength);
do {
@@ -1485,7 +1573,7 @@ struct create_context *smb2_find_context_vals(void *open_req, const char *tag, i
name_len < 4 ||
name_off + name_len > cc_len ||
(value_off & 0x7) != 0 ||
- (value_off && (value_off < name_off + name_len)) ||
+ (value_len && value_off < name_off + (name_len < 8 ? 8 : name_len)) ||
((u64)value_off + value_len > cc_len))
return ERR_PTR(-EINVAL);
@@ -1612,7 +1700,11 @@ void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp)
memset(buf, 0, sizeof(struct create_posix_rsp));
buf->ccontext.DataOffset = cpu_to_le16(offsetof
(struct create_posix_rsp, nlink));
- buf->ccontext.DataLength = cpu_to_le32(52);
+ /*
+ * DataLength = nlink(4) + reparse_tag(4) + mode(4) +
+ * domain sid(28) + unix group sid(16).
+ */
+ buf->ccontext.DataLength = cpu_to_le32(56);
buf->ccontext.NameOffset = cpu_to_le16(offsetof
(struct create_posix_rsp, Name));
buf->ccontext.NameLength = cpu_to_le16(POSIX_CTXT_DATA_LEN);
@@ -1636,13 +1728,20 @@ void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp)
buf->nlink = cpu_to_le32(inode->i_nlink);
buf->reparse_tag = cpu_to_le32(fp->volatile_id);
- buf->mode = cpu_to_le32(inode->i_mode);
+ buf->mode = cpu_to_le32(inode->i_mode & 0777);
+ /*
+ * SidBuffer(44) contain two sids(Domain sid(28), UNIX group sid(16)).
+ * Domain sid(28) = revision(1) + num_subauth(1) + authority(6) +
+ * sub_auth(4 * 4(num_subauth)) + RID(4).
+ * UNIX group id(16) = revision(1) + num_subauth(1) + authority(6) +
+ * sub_auth(4 * 1(num_subauth)) + RID(4).
+ */
id_to_sid(from_kuid_munged(&init_user_ns,
i_uid_into_mnt(user_ns, inode)),
- SIDNFS_USER, (struct smb_sid *)&buf->SidBuffer[0]);
+ SIDOWNER, (struct smb_sid *)&buf->SidBuffer[0]);
id_to_sid(from_kgid_munged(&init_user_ns,
i_gid_into_mnt(user_ns, inode)),
- SIDNFS_GROUP, (struct smb_sid *)&buf->SidBuffer[20]);
+ SIDUNIX_GROUP, (struct smb_sid *)&buf->SidBuffer[28]);
}
/*
@@ -1702,33 +1801,3 @@ out:
read_unlock(&lease_list_lock);
return ret_op;
}
-
-int smb2_check_durable_oplock(struct ksmbd_file *fp,
- struct lease_ctx_info *lctx, char *name)
-{
- struct oplock_info *opinfo = opinfo_get(fp);
- int ret = 0;
-
- if (opinfo && opinfo->is_lease) {
- if (!lctx) {
- pr_err("open does not include lease\n");
- ret = -EBADF;
- goto out;
- }
- if (memcmp(opinfo->o_lease->lease_key, lctx->lease_key,
- SMB2_LEASE_KEY_SIZE)) {
- pr_err("invalid lease key\n");
- ret = -EBADF;
- goto out;
- }
- if (name && strcmp(fp->filename, name)) {
- pr_err("invalid name reconnect %s\n", name);
- ret = -EINVAL;
- goto out;
- }
- }
-out:
- if (opinfo)
- opinfo_put(opinfo);
- return ret;
-}
diff --git a/fs/ksmbd/oplock.h b/fs/ksmbd/oplock.h
index 2c4f4a0512b7..5b93ea9196c0 100644
--- a/fs/ksmbd/oplock.h
+++ b/fs/ksmbd/oplock.h
@@ -28,15 +28,15 @@
#define OPLOCK_WRITE_TO_NONE 0x04
#define OPLOCK_READ_TO_NONE 0x08
-#define SMB2_LEASE_KEY_SIZE 16
-
struct lease_ctx_info {
__u8 lease_key[SMB2_LEASE_KEY_SIZE];
__le32 req_state;
__le32 flags;
__le64 duration;
__u8 parent_lease_key[SMB2_LEASE_KEY_SIZE];
+ __le16 epoch;
int version;
+ bool is_dir;
};
struct lease_table {
@@ -55,6 +55,7 @@ struct lease {
__u8 parent_lease_key[SMB2_LEASE_KEY_SIZE];
int version;
unsigned short epoch;
+ bool is_dir;
struct lease_table *l_lb;
};
@@ -110,7 +111,7 @@ void opinfo_put(struct oplock_info *opinfo);
/* Lease related functions */
void create_lease_buf(u8 *rbuf, struct lease *lease);
-struct lease_ctx_info *parse_lease_state(void *open_req);
+struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir);
__u8 smb2_map_lease_to_oplock(__le32 lease_state);
int lease_read_to_write(struct oplock_info *opinfo);
@@ -126,6 +127,7 @@ struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn,
int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
struct lease_ctx_info *lctx);
void destroy_lease_table(struct ksmbd_conn *conn);
-int smb2_check_durable_oplock(struct ksmbd_file *fp,
- struct lease_ctx_info *lctx, char *name);
+void smb_send_parent_lease_break_noti(struct ksmbd_file *fp,
+ struct lease_ctx_info *lctx);
+void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp);
#endif /* __KSMBD_OPLOCK_H */
diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c
index eb45d56b3577..11b201e6ee44 100644
--- a/fs/ksmbd/server.c
+++ b/fs/ksmbd/server.c
@@ -93,7 +93,8 @@ static inline int check_conn_state(struct ksmbd_work *work)
{
struct smb_hdr *rsp_hdr;
- if (ksmbd_conn_exiting(work) || ksmbd_conn_need_reconnect(work)) {
+ if (ksmbd_conn_exiting(work->conn) ||
+ ksmbd_conn_need_reconnect(work->conn)) {
rsp_hdr = work->response_buf;
rsp_hdr->Status.CifsError = STATUS_CONNECTION_DISCONNECTED;
return 1;
@@ -114,8 +115,10 @@ static int __process_request(struct ksmbd_work *work, struct ksmbd_conn *conn,
if (check_conn_state(work))
return SERVER_HANDLER_CONTINUE;
- if (ksmbd_verify_smb_message(work))
+ if (ksmbd_verify_smb_message(work)) {
+ conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER);
return SERVER_HANDLER_ABORT;
+ }
command = conn->ops->get_cmd_val(work);
*cmd = command;
@@ -162,6 +165,7 @@ static void __handle_ksmbd_work(struct ksmbd_work *work,
{
u16 command = 0;
int rc;
+ bool is_chained = false;
if (conn->ops->allocate_rsp_buf(work))
return;
@@ -228,16 +232,17 @@ static void __handle_ksmbd_work(struct ksmbd_work *work,
}
}
+ is_chained = is_chained_smb2_message(work);
+
if (work->sess &&
(work->sess->sign || smb3_11_final_sess_setup_resp(work) ||
conn->ops->is_sign_req(work, command)))
conn->ops->set_sign_rsp(work);
- } while (is_chained_smb2_message(work));
-
- if (work->send_no_response)
- return;
+ } while (is_chained == true);
send:
+ if (work->tcon)
+ ksmbd_tree_connect_put(work->tcon);
smb3_preauth_hash_rsp(work);
if (work->sess && work->sess->enc && work->encrypted &&
conn->ops->encrypt_resp) {
@@ -266,7 +271,13 @@ static void handle_ksmbd_work(struct work_struct *wk)
ksmbd_conn_try_dequeue_request(work);
ksmbd_free_work_struct(work);
- atomic_dec(&conn->r_count);
+ /*
+ * Checking waitqueue to dropping pending requests on
+ * disconnection. waitqueue_active is safe because it
+ * uses atomic operation for condition.
+ */
+ if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
+ wake_up(&conn->r_count_q);
}
/**
@@ -279,6 +290,7 @@ static void handle_ksmbd_work(struct work_struct *wk)
static int queue_ksmbd_work(struct ksmbd_conn *conn)
{
struct ksmbd_work *work;
+ int err;
work = ksmbd_alloc_work_struct();
if (!work) {
@@ -290,9 +302,10 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn)
work->request_buf = conn->request_buf;
conn->request_buf = NULL;
- if (ksmbd_init_smb_server(work)) {
+ err = ksmbd_init_smb_server(work);
+ if (err) {
ksmbd_free_work_struct(work);
- return -EINVAL;
+ return 0;
}
ksmbd_conn_enqueue_request(work);
@@ -433,11 +446,9 @@ static ssize_t stats_show(struct class *class, struct class_attribute *attr,
"reset",
"shutdown"
};
-
- ssize_t sz = scnprintf(buf, PAGE_SIZE, "%d %s %d %lu\n", stats_version,
- state[server_conf.state], server_conf.tcp_port,
- server_conf.ipc_last_active / HZ);
- return sz;
+ return sysfs_emit(buf, "%d %s %d %lu\n", stats_version,
+ state[server_conf.state], server_conf.tcp_port,
+ server_conf.ipc_last_active / HZ);
}
static ssize_t kill_server_store(struct class *class,
@@ -469,19 +480,13 @@ static ssize_t debug_show(struct class *class, struct class_attribute *attr,
for (i = 0; i < ARRAY_SIZE(debug_type_strings); i++) {
if ((ksmbd_debug_types >> i) & 1) {
- pos = scnprintf(buf + sz,
- PAGE_SIZE - sz,
- "[%s] ",
- debug_type_strings[i]);
+ pos = sysfs_emit_at(buf, sz, "[%s] ", debug_type_strings[i]);
} else {
- pos = scnprintf(buf + sz,
- PAGE_SIZE - sz,
- "%s ",
- debug_type_strings[i]);
+ pos = sysfs_emit_at(buf, sz, "%s ", debug_type_strings[i]);
}
sz += pos;
}
- sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n");
+ sz += sysfs_emit_at(buf, sz, "\n");
return sz;
}
@@ -590,8 +595,6 @@ static int __init ksmbd_server_init(void)
if (ret)
goto err_crypto_destroy;
- pr_warn_once("The ksmbd server is experimental, use at your own risk.\n");
-
return 0;
err_crypto_destroy:
@@ -628,7 +631,6 @@ MODULE_DESCRIPTION("Linux kernel CIFS/SMB SERVER");
MODULE_LICENSE("GPL");
MODULE_SOFTDEP("pre: ecb");
MODULE_SOFTDEP("pre: hmac");
-MODULE_SOFTDEP("pre: md4");
MODULE_SOFTDEP("pre: md5");
MODULE_SOFTDEP("pre: nls");
MODULE_SOFTDEP("pre: aes");
diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c
index 8ef9503c4ab9..4d1211bde190 100644
--- a/fs/ksmbd/smb2misc.c
+++ b/fs/ksmbd/smb2misc.c
@@ -107,16 +107,25 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
break;
case SMB2_CREATE:
{
+ unsigned short int name_off =
+ le16_to_cpu(((struct smb2_create_req *)hdr)->NameOffset);
+ unsigned short int name_len =
+ le16_to_cpu(((struct smb2_create_req *)hdr)->NameLength);
+
if (((struct smb2_create_req *)hdr)->CreateContextsLength) {
*off = le32_to_cpu(((struct smb2_create_req *)
hdr)->CreateContextsOffset);
*len = le32_to_cpu(((struct smb2_create_req *)
hdr)->CreateContextsLength);
- break;
+ if (!name_len)
+ break;
+
+ if (name_off + name_len < (u64)*off + *len)
+ break;
}
- *off = le16_to_cpu(((struct smb2_create_req *)hdr)->NameOffset);
- *len = le16_to_cpu(((struct smb2_create_req *)hdr)->NameLength);
+ *off = name_off;
+ *len = name_len;
break;
}
case SMB2_QUERY_INFO:
@@ -441,10 +450,8 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
validate_credit:
if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) &&
- smb2_validate_credit_charge(work->conn, hdr)) {
- work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER);
+ smb2_validate_credit_charge(work->conn, hdr))
return 1;
- }
return 0;
}
diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c
index f0a5b704f301..c69943d96565 100644
--- a/fs/ksmbd/smb2ops.c
+++ b/fs/ksmbd/smb2ops.c
@@ -222,7 +222,8 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
- conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
+ SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION &&
conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
@@ -246,10 +247,12 @@ void init_smb3_02_server(struct ksmbd_conn *conn)
conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
- conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
+ SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
- if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION &&
- conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
+ (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
+ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
@@ -270,7 +273,13 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
- conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
+ SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
+ (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
+ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 0fde3d12b346..8875c04e8382 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -45,8 +45,8 @@ static void __wbuf(struct ksmbd_work *work, void **req, void **rsp)
*req = ksmbd_req_buf_next(work);
*rsp = ksmbd_resp_buf_next(work);
} else {
- *req = work->request_buf;
- *rsp = work->response_buf;
+ *req = smb2_get_msg(work->request_buf);
+ *rsp = smb2_get_msg(work->response_buf);
}
}
@@ -75,14 +75,7 @@ static inline bool check_session_id(struct ksmbd_conn *conn, u64 id)
struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn *conn)
{
- struct channel *chann;
-
- list_for_each_entry(chann, &sess->ksmbd_chann_list, chann_list) {
- if (chann->conn == conn)
- return chann;
- }
-
- return NULL;
+ return xa_load(&sess->ksmbd_chann_list, (long)conn);
}
/**
@@ -94,12 +87,13 @@ struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn
*/
int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
{
- struct smb2_hdr *req_hdr = work->request_buf;
- int tree_id;
+ struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
+ unsigned int cmd = le16_to_cpu(req_hdr->Command);
+ unsigned int tree_id;
- if (work->conn->ops->get_cmd_val(work) == SMB2_TREE_CONNECT_HE ||
- work->conn->ops->get_cmd_val(work) == SMB2_CANCEL_HE ||
- work->conn->ops->get_cmd_val(work) == SMB2_LOGOFF_HE) {
+ if (cmd == SMB2_TREE_CONNECT_HE ||
+ cmd == SMB2_CANCEL_HE ||
+ cmd == SMB2_LOGOFF_HE) {
ksmbd_debug(SMB, "skip to check tree connect request\n");
return 0;
}
@@ -120,7 +114,7 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
pr_err("The first operation in the compound does not have tcon\n");
return -EINVAL;
}
- if (work->tcon->id != tree_id) {
+ if (tree_id != UINT_MAX && work->tcon->id != tree_id) {
pr_err("tree id(%u) is different with id(%u) in first operation\n",
tree_id, work->tcon->id);
return -EINVAL;
@@ -148,15 +142,21 @@ void smb2_set_err_rsp(struct ksmbd_work *work)
if (work->next_smb2_rcv_hdr_off)
err_rsp = ksmbd_resp_buf_next(work);
else
- err_rsp = work->response_buf;
+ err_rsp = smb2_get_msg(work->response_buf);
if (err_rsp->hdr.Status != STATUS_STOPPED_ON_SYMLINK) {
+ int err;
+
err_rsp->StructureSize = SMB2_ERROR_STRUCTURE_SIZE2_LE;
err_rsp->ErrorContextCount = 0;
err_rsp->Reserved = 0;
err_rsp->ByteCount = 0;
err_rsp->ErrorData[0] = 0;
- inc_rfc1001_len(work->response_buf, SMB2_ERROR_STRUCTURE_SIZE2);
+ err = ksmbd_iov_pin_rsp(work, (void *)err_rsp,
+ __SMB2_HEADER_STRUCTURE_SIZE +
+ SMB2_ERROR_STRUCTURE_SIZE2);
+ if (err)
+ work->send_no_response = 1;
}
}
@@ -168,7 +168,7 @@ void smb2_set_err_rsp(struct ksmbd_work *work)
*/
bool is_smb2_neg_cmd(struct ksmbd_work *work)
{
- struct smb2_hdr *hdr = work->request_buf;
+ struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
/* is it SMB2 header ? */
if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
@@ -192,7 +192,7 @@ bool is_smb2_neg_cmd(struct ksmbd_work *work)
*/
bool is_smb2_rsp(struct ksmbd_work *work)
{
- struct smb2_hdr *hdr = work->response_buf;
+ struct smb2_hdr *hdr = smb2_get_msg(work->response_buf);
/* is it SMB2 header ? */
if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
@@ -218,7 +218,7 @@ u16 get_smb2_cmd_val(struct ksmbd_work *work)
if (work->next_smb2_rcv_hdr_off)
rcv_hdr = ksmbd_req_buf_next(work);
else
- rcv_hdr = work->request_buf;
+ rcv_hdr = smb2_get_msg(work->request_buf);
return le16_to_cpu(rcv_hdr->Command);
}
@@ -231,11 +231,12 @@ void set_smb2_rsp_status(struct ksmbd_work *work, __le32 err)
{
struct smb2_hdr *rsp_hdr;
- if (work->next_smb2_rcv_hdr_off)
- rsp_hdr = ksmbd_resp_buf_next(work);
- else
- rsp_hdr = work->response_buf;
+ rsp_hdr = smb2_get_msg(work->response_buf);
rsp_hdr->Status = err;
+
+ work->iov_idx = 0;
+ work->iov_cnt = 0;
+ work->next_smb2_rcv_hdr_off = 0;
smb2_set_err_rsp(work);
}
@@ -251,17 +252,10 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
struct smb2_hdr *rsp_hdr;
struct smb2_negotiate_rsp *rsp;
struct ksmbd_conn *conn = work->conn;
+ int err;
- if (conn->need_neg == false)
- return -EINVAL;
-
- rsp_hdr = work->response_buf;
-
+ rsp_hdr = smb2_get_msg(work->response_buf);
memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-
- rsp_hdr->smb2_buf_length =
- cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
-
rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
rsp_hdr->CreditRequest = cpu_to_le16(2);
@@ -274,9 +268,9 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
rsp_hdr->SessionId = 0;
memset(rsp_hdr->Signature, 0, 16);
- rsp = work->response_buf;
+ rsp = smb2_get_msg(work->response_buf);
- WARN_ON(ksmbd_conn_good(work));
+ WARN_ON(ksmbd_conn_good(conn));
rsp->StructureSize = cpu_to_le16(65);
ksmbd_debug(SMB, "conn->dialect 0x%x\n", conn->dialect);
@@ -295,18 +289,19 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
rsp->SecurityBufferOffset = cpu_to_le16(128);
rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
- ksmbd_copy_gss_neg_header(((char *)(&rsp->hdr) +
- sizeof(rsp->hdr.smb2_buf_length)) +
+ ksmbd_copy_gss_neg_header((char *)(&rsp->hdr) +
le16_to_cpu(rsp->SecurityBufferOffset));
- inc_rfc1001_len(rsp, sizeof(struct smb2_negotiate_rsp) -
- sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
- AUTH_GSS_LENGTH);
rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
if (server_conf.signing == KSMBD_CONFIG_OPT_MANDATORY)
rsp->SecurityMode |= SMB2_NEGOTIATE_SIGNING_REQUIRED_LE;
+ err = ksmbd_iov_pin_rsp(work, rsp,
+ sizeof(struct smb2_negotiate_rsp) -
+ sizeof(rsp->Buffer) + AUTH_GSS_LENGTH);
+ if (err)
+ return err;
conn->use_spnego = true;
- ksmbd_conn_set_need_negotiate(work);
+ ksmbd_conn_set_need_negotiate(conn);
return 0;
}
@@ -393,12 +388,8 @@ static void init_chained_smb2_rsp(struct ksmbd_work *work)
* command in the compound request
*/
if (req->Command == SMB2_CREATE && rsp->Status == STATUS_SUCCESS) {
- work->compound_fid =
- le64_to_cpu(((struct smb2_create_rsp *)rsp)->
- VolatileFileId);
- work->compound_pfid =
- le64_to_cpu(((struct smb2_create_rsp *)rsp)->
- PersistentFileId);
+ work->compound_fid = ((struct smb2_create_rsp *)rsp)->VolatileFileId;
+ work->compound_pfid = ((struct smb2_create_rsp *)rsp)->PersistentFileId;
work->compound_sid = le64_to_cpu(rsp->SessionId);
}
@@ -406,11 +397,12 @@ static void init_chained_smb2_rsp(struct ksmbd_work *work)
next_hdr_offset = le32_to_cpu(req->NextCommand);
new_len = ALIGN(len, 8);
- inc_rfc1001_len(work->response_buf, ((sizeof(struct smb2_hdr) - 4)
- + new_len - len));
+ work->iov[work->iov_idx].iov_len += (new_len - len);
+ inc_rfc1001_len(work->response_buf, new_len - len);
rsp->NextCommand = cpu_to_le32(new_len);
work->next_smb2_rcv_hdr_off += next_hdr_offset;
+ work->curr_smb2_rsp_hdr_off = work->next_smb2_rsp_hdr_off;
work->next_smb2_rsp_hdr_off += new_len;
ksmbd_debug(SMB,
"Compound req new_len = %d rcv off = %d rsp off = %d\n",
@@ -425,7 +417,7 @@ static void init_chained_smb2_rsp(struct ksmbd_work *work)
work->compound_fid = KSMBD_NO_FID;
work->compound_pfid = KSMBD_NO_FID;
}
- memset((char *)rsp_hdr + 4, 0, sizeof(struct smb2_hdr) + 2);
+ memset((char *)rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
rsp_hdr->Command = rcv_hdr->Command;
@@ -451,7 +443,7 @@ static void init_chained_smb2_rsp(struct ksmbd_work *work)
*/
bool is_chained_smb2_message(struct ksmbd_work *work)
{
- struct smb2_hdr *hdr = work->request_buf;
+ struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
unsigned int len, next_cmd;
if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
@@ -486,10 +478,10 @@ bool is_chained_smb2_message(struct ksmbd_work *work)
len = len - get_rfc1002_len(work->response_buf);
if (len) {
ksmbd_debug(SMB, "padding len %u\n", len);
+ work->iov[work->iov_idx].iov_len += len;
inc_rfc1001_len(work->response_buf, len);
- if (work->aux_payload_sz)
- work->aux_payload_sz += len;
}
+ work->curr_smb2_rsp_hdr_off = work->next_smb2_rsp_hdr_off;
}
return false;
}
@@ -502,13 +494,10 @@ bool is_chained_smb2_message(struct ksmbd_work *work)
*/
int init_smb2_rsp_hdr(struct ksmbd_work *work)
{
- struct smb2_hdr *rsp_hdr = work->response_buf;
- struct smb2_hdr *rcv_hdr = work->request_buf;
- struct ksmbd_conn *conn = work->conn;
+ struct smb2_hdr *rsp_hdr = smb2_get_msg(work->response_buf);
+ struct smb2_hdr *rcv_hdr = smb2_get_msg(work->request_buf);
memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
- rsp_hdr->smb2_buf_length =
- cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
rsp_hdr->ProtocolId = rcv_hdr->ProtocolId;
rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
rsp_hdr->Command = rcv_hdr->Command;
@@ -524,12 +513,6 @@ int init_smb2_rsp_hdr(struct ksmbd_work *work)
rsp_hdr->SessionId = rcv_hdr->SessionId;
memcpy(rsp_hdr->Signature, rcv_hdr->Signature, 16);
- work->syncronous = true;
- if (work->async_id) {
- ksmbd_release_id(&conn->async_ida, work->async_id);
- work->async_id = 0;
- }
-
return 0;
}
@@ -541,7 +524,7 @@ int init_smb2_rsp_hdr(struct ksmbd_work *work)
*/
int smb2_allocate_rsp_buf(struct ksmbd_work *work)
{
- struct smb2_hdr *hdr = work->request_buf;
+ struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
size_t small_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
size_t large_sz = small_sz + work->conn->vals->max_trans_size;
size_t sz = small_sz;
@@ -553,7 +536,7 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
if (cmd == SMB2_QUERY_INFO_HE) {
struct smb2_query_info_req *req;
- req = work->request_buf;
+ req = smb2_get_msg(work->request_buf);
if ((req->InfoType == SMB2_O_INFO_FILE &&
(req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
req->FileInfoClass == FILE_ALL_INFORMATION)) ||
@@ -565,7 +548,7 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
if (le32_to_cpu(hdr->NextCommand) > 0)
sz = large_sz;
- work->response_buf = kvmalloc(sz, GFP_KERNEL | __GFP_ZERO);
+ work->response_buf = kvzalloc(sz, GFP_KERNEL);
if (!work->response_buf)
return -ENOMEM;
@@ -581,9 +564,9 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
*/
int smb2_check_user_session(struct ksmbd_work *work)
{
- struct smb2_hdr *req_hdr = work->request_buf;
+ struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
struct ksmbd_conn *conn = work->conn;
- unsigned int cmd = conn->ops->get_cmd_val(work);
+ unsigned int cmd = le16_to_cpu(req_hdr->Command);
unsigned long long sess_id;
/*
@@ -595,7 +578,7 @@ int smb2_check_user_session(struct ksmbd_work *work)
cmd == SMB2_SESSION_SETUP_HE)
return 0;
- if (!ksmbd_conn_good(work))
+ if (!ksmbd_conn_good(conn))
return -EIO;
sess_id = le64_to_cpu(req_hdr->SessionId);
@@ -609,7 +592,7 @@ int smb2_check_user_session(struct ksmbd_work *work)
pr_err("The first operation in the compound does not have sess\n");
return -EINVAL;
}
- if (work->sess->id != sess_id) {
+ if (sess_id != ULLONG_MAX && work->sess->id != sess_id) {
pr_err("session id(%llu) is different with the first operation(%lld)\n",
sess_id, work->sess->id);
return -EINVAL;
@@ -631,6 +614,7 @@ static void destroy_previous_session(struct ksmbd_conn *conn,
struct ksmbd_session *prev_sess = ksmbd_session_lookup_slowpath(id);
struct ksmbd_user *prev_user;
struct channel *chann;
+ long index;
if (!prev_sess)
return;
@@ -644,24 +628,20 @@ static void destroy_previous_session(struct ksmbd_conn *conn,
return;
prev_sess->state = SMB2_SESSION_EXPIRED;
- write_lock(&prev_sess->chann_lock);
- list_for_each_entry(chann, &prev_sess->ksmbd_chann_list, chann_list)
- chann->conn->status = KSMBD_SESS_EXITING;
- write_unlock(&prev_sess->chann_lock);
+ xa_for_each(&prev_sess->ksmbd_chann_list, index, chann)
+ ksmbd_conn_set_exiting(chann->conn);
}
/**
* smb2_get_name() - get filename string from on the wire smb format
- * @share: ksmbd_share_config pointer
* @src: source buffer
* @maxlen: maxlen of source string
- * @nls_table: nls_table pointer
+ * @local_nls: nls_table pointer
*
* Return: matching converted filename on success, otherwise error ptr
*/
static char *
-smb2_get_name(struct ksmbd_share_config *share, const char *src,
- const int maxlen, struct nls_table *local_nls)
+smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls)
{
char *name;
@@ -678,21 +658,16 @@ smb2_get_name(struct ksmbd_share_config *share, const char *src,
int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
{
- struct smb2_hdr *rsp_hdr;
struct ksmbd_conn *conn = work->conn;
int id;
- rsp_hdr = work->response_buf;
- rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
-
id = ksmbd_acquire_async_msg_id(&conn->async_ida);
if (id < 0) {
pr_err("Failed to alloc async message id\n");
return id;
}
- work->syncronous = false;
+ work->asynchronous = true;
work->async_id = id;
- rsp_hdr->Id.AsyncId = cpu_to_le64(id);
ksmbd_debug(SMB,
"Send interim Response to inform async request id : %d\n",
@@ -710,18 +685,47 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
return 0;
}
+void release_async_work(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+
+ spin_lock(&conn->request_lock);
+ list_del_init(&work->async_request_entry);
+ spin_unlock(&conn->request_lock);
+
+ work->asynchronous = 0;
+ work->cancel_fn = NULL;
+ kfree(work->cancel_argv);
+ work->cancel_argv = NULL;
+ if (work->async_id) {
+ ksmbd_release_id(&conn->async_ida, work->async_id);
+ work->async_id = 0;
+ }
+}
+
void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status)
{
struct smb2_hdr *rsp_hdr;
+ struct ksmbd_work *in_work = ksmbd_alloc_work_struct();
- rsp_hdr = work->response_buf;
- smb2_set_err_rsp(work);
+ if (allocate_interim_rsp_buf(in_work)) {
+ pr_err("smb_allocate_rsp_buf failed!\n");
+ ksmbd_free_work_struct(in_work);
+ return;
+ }
+
+ in_work->conn = work->conn;
+ memcpy(smb2_get_msg(in_work->response_buf), ksmbd_resp_buf_next(work),
+ __SMB2_HEADER_STRUCTURE_SIZE);
+
+ rsp_hdr = smb2_get_msg(in_work->response_buf);
+ rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
+ rsp_hdr->Id.AsyncId = cpu_to_le64(work->async_id);
+ smb2_set_err_rsp(in_work);
rsp_hdr->Status = status;
- work->multiRsp = 1;
- ksmbd_conn_write(work);
- rsp_hdr->Status = 0;
- work->multiRsp = 0;
+ ksmbd_conn_write(in_work);
+ ksmbd_free_work_struct(in_work);
}
static __le32 smb2_get_reparse_tag_special_file(umode_t mode)
@@ -793,19 +797,6 @@ static void build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt,
pneg_ctxt->Ciphers[0] = cipher_type;
}
-static void build_compression_ctxt(struct smb2_compression_ctx *pneg_ctxt,
- __le16 comp_algo)
-{
- pneg_ctxt->ContextType = SMB2_COMPRESSION_CAPABILITIES;
- pneg_ctxt->DataLength =
- cpu_to_le16(sizeof(struct smb2_compression_ctx)
- - sizeof(struct smb2_neg_context));
- pneg_ctxt->Reserved = cpu_to_le32(0);
- pneg_ctxt->CompressionAlgorithmCount = cpu_to_le16(1);
- pneg_ctxt->Reserved1 = cpu_to_le32(0);
- pneg_ctxt->CompressionAlgorithms[0] = comp_algo;
-}
-
static void build_sign_cap_ctxt(struct smb2_signing_capabilities *pneg_ctxt,
__le16 sign_algo)
{
@@ -841,12 +832,11 @@ static void build_posix_ctxt(struct smb2_posix_neg_context *pneg_ctxt)
pneg_ctxt->Name[15] = 0x7C;
}
-static void assemble_neg_contexts(struct ksmbd_conn *conn,
+static unsigned int assemble_neg_contexts(struct ksmbd_conn *conn,
struct smb2_negotiate_rsp *rsp)
{
- /* +4 is to account for the RFC1001 len field */
- char *pneg_ctxt = (char *)rsp +
- le32_to_cpu(rsp->NegotiateContextOffset) + 4;
+ char * const pneg_ctxt = (char *)rsp +
+ le32_to_cpu(rsp->NegotiateContextOffset);
int neg_ctxt_cnt = 1;
int ctxt_size;
@@ -854,94 +844,90 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn,
"assemble SMB2_PREAUTH_INTEGRITY_CAPABILITIES context\n");
build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt,
conn->preauth_info->Preauth_HashId);
- rsp->NegotiateContextCount = cpu_to_le16(neg_ctxt_cnt);
- inc_rfc1001_len(rsp, AUTH_GSS_PADDING);
ctxt_size = sizeof(struct smb2_preauth_neg_context);
- /* Round to 8 byte boundary */
- pneg_ctxt += round_up(sizeof(struct smb2_preauth_neg_context), 8);
if (conn->cipher_type) {
+ /* Round to 8 byte boundary */
ctxt_size = round_up(ctxt_size, 8);
ksmbd_debug(SMB,
"assemble SMB2_ENCRYPTION_CAPABILITIES context\n");
- build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt,
+ build_encrypt_ctxt((struct smb2_encryption_neg_context *)
+ (pneg_ctxt + ctxt_size),
conn->cipher_type);
- rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
+ neg_ctxt_cnt++;
ctxt_size += sizeof(struct smb2_encryption_neg_context) + 2;
- /* Round to 8 byte boundary */
- pneg_ctxt +=
- round_up(sizeof(struct smb2_encryption_neg_context) + 2,
- 8);
- }
-
- if (conn->compress_algorithm) {
- ctxt_size = round_up(ctxt_size, 8);
- ksmbd_debug(SMB,
- "assemble SMB2_COMPRESSION_CAPABILITIES context\n");
- /* Temporarily set to SMB3_COMPRESS_NONE */
- build_compression_ctxt((struct smb2_compression_ctx *)pneg_ctxt,
- conn->compress_algorithm);
- rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
- ctxt_size += sizeof(struct smb2_compression_ctx) + 2;
- /* Round to 8 byte boundary */
- pneg_ctxt += round_up(sizeof(struct smb2_compression_ctx) + 2,
- 8);
}
+ /* compression context not yet supported */
+ WARN_ON(conn->compress_algorithm != SMB3_COMPRESS_NONE);
if (conn->posix_ext_supported) {
ctxt_size = round_up(ctxt_size, 8);
ksmbd_debug(SMB,
"assemble SMB2_POSIX_EXTENSIONS_AVAILABLE context\n");
- build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
- rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
+ build_posix_ctxt((struct smb2_posix_neg_context *)
+ (pneg_ctxt + ctxt_size));
+ neg_ctxt_cnt++;
ctxt_size += sizeof(struct smb2_posix_neg_context);
- /* Round to 8 byte boundary */
- pneg_ctxt += round_up(sizeof(struct smb2_posix_neg_context), 8);
}
if (conn->signing_negotiated) {
ctxt_size = round_up(ctxt_size, 8);
ksmbd_debug(SMB,
"assemble SMB2_SIGNING_CAPABILITIES context\n");
- build_sign_cap_ctxt((struct smb2_signing_capabilities *)pneg_ctxt,
+ build_sign_cap_ctxt((struct smb2_signing_capabilities *)
+ (pneg_ctxt + ctxt_size),
conn->signing_algorithm);
- rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
+ neg_ctxt_cnt++;
ctxt_size += sizeof(struct smb2_signing_capabilities) + 2;
}
- inc_rfc1001_len(rsp, ctxt_size);
+ rsp->NegotiateContextCount = cpu_to_le16(neg_ctxt_cnt);
+ return ctxt_size + AUTH_GSS_PADDING;
}
static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
- struct smb2_preauth_neg_context *pneg_ctxt)
+ struct smb2_preauth_neg_context *pneg_ctxt,
+ int ctxt_len)
{
- __le32 err = STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP;
+ /*
+ * sizeof(smb2_preauth_neg_context) assumes SMB311_SALT_SIZE Salt,
+ * which may not be present. Only check for used HashAlgorithms[1].
+ */
+ if (ctxt_len <
+ sizeof(struct smb2_neg_context) + 6)
+ return STATUS_INVALID_PARAMETER;
- if (pneg_ctxt->HashAlgorithms == SMB2_PREAUTH_INTEGRITY_SHA512) {
- conn->preauth_info->Preauth_HashId =
- SMB2_PREAUTH_INTEGRITY_SHA512;
- err = STATUS_SUCCESS;
- }
+ if (pneg_ctxt->HashAlgorithms != SMB2_PREAUTH_INTEGRITY_SHA512)
+ return STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP;
- return err;
+ conn->preauth_info->Preauth_HashId = SMB2_PREAUTH_INTEGRITY_SHA512;
+ return STATUS_SUCCESS;
}
static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
struct smb2_encryption_neg_context *pneg_ctxt,
- int len_of_ctxts)
+ int ctxt_len)
{
- int cph_cnt = le16_to_cpu(pneg_ctxt->CipherCount);
- int i, cphs_size = cph_cnt * sizeof(__le16);
+ int cph_cnt;
+ int i, cphs_size;
+
+ if (sizeof(struct smb2_encryption_neg_context) > ctxt_len) {
+ pr_err("Invalid SMB2_ENCRYPTION_CAPABILITIES context size\n");
+ return;
+ }
conn->cipher_type = 0;
+ cph_cnt = le16_to_cpu(pneg_ctxt->CipherCount);
+ cphs_size = cph_cnt * sizeof(__le16);
+
if (sizeof(struct smb2_encryption_neg_context) + cphs_size >
- len_of_ctxts) {
+ ctxt_len) {
pr_err("Invalid cipher count(%d)\n", cph_cnt);
return;
}
- if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION))
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF)
return;
for (i = 0; i < cph_cnt; i++) {
@@ -963,7 +949,7 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
*
* Return: true if connection should be encrypted, else false
*/
-static bool smb3_encryption_negotiated(struct ksmbd_conn *conn)
+bool smb3_encryption_negotiated(struct ksmbd_conn *conn)
{
if (!conn->ops->generate_encryptionkey)
return false;
@@ -984,15 +970,22 @@ static void decode_compress_ctxt(struct ksmbd_conn *conn,
static void decode_sign_cap_ctxt(struct ksmbd_conn *conn,
struct smb2_signing_capabilities *pneg_ctxt,
- int len_of_ctxts)
+ int ctxt_len)
{
- int sign_algo_cnt = le16_to_cpu(pneg_ctxt->SigningAlgorithmCount);
- int i, sign_alos_size = sign_algo_cnt * sizeof(__le16);
+ int sign_algo_cnt;
+ int i, sign_alos_size;
+
+ if (sizeof(struct smb2_signing_capabilities) > ctxt_len) {
+ pr_err("Invalid SMB2_SIGNING_CAPABILITIES context length\n");
+ return;
+ }
conn->signing_negotiated = false;
+ sign_algo_cnt = le16_to_cpu(pneg_ctxt->SigningAlgorithmCount);
+ sign_alos_size = sign_algo_cnt * sizeof(__le16);
if (sizeof(struct smb2_signing_capabilities) + sign_alos_size >
- len_of_ctxts) {
+ ctxt_len) {
pr_err("Invalid signing algorithm count(%d)\n", sign_algo_cnt);
return;
}
@@ -1011,14 +1004,14 @@ static void decode_sign_cap_ctxt(struct ksmbd_conn *conn,
}
static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
- struct smb2_negotiate_req *req)
+ struct smb2_negotiate_req *req,
+ unsigned int len_of_smb)
{
/* +4 is to account for the RFC1001 len field */
- struct smb2_neg_context *pctx = (struct smb2_neg_context *)((char *)req + 4);
+ struct smb2_neg_context *pctx = (struct smb2_neg_context *)req;
int i = 0, len_of_ctxts;
- int offset = le32_to_cpu(req->NegotiateContextOffset);
- int neg_ctxt_cnt = le16_to_cpu(req->NegotiateContextCount);
- int len_of_smb = be32_to_cpu(req->hdr.smb2_buf_length);
+ unsigned int offset = le32_to_cpu(req->NegotiateContextOffset);
+ unsigned int neg_ctxt_cnt = le16_to_cpu(req->NegotiateContextCount);
__le32 status = STATUS_INVALID_PARAMETER;
ksmbd_debug(SMB, "decoding %d negotiate contexts\n", neg_ctxt_cnt);
@@ -1030,18 +1023,16 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
len_of_ctxts = len_of_smb - offset;
while (i++ < neg_ctxt_cnt) {
- int clen;
+ int clen, ctxt_len;
- /* check that offset is not beyond end of SMB */
- if (len_of_ctxts == 0)
- break;
-
- if (len_of_ctxts < sizeof(struct smb2_neg_context))
+ if (len_of_ctxts < (int)sizeof(struct smb2_neg_context))
break;
pctx = (struct smb2_neg_context *)((char *)pctx + offset);
clen = le16_to_cpu(pctx->DataLength);
- if (clen + sizeof(struct smb2_neg_context) > len_of_ctxts)
+ ctxt_len = clen + sizeof(struct smb2_neg_context);
+
+ if (ctxt_len > len_of_ctxts)
break;
if (pctx->ContextType == SMB2_PREAUTH_INTEGRITY_CAPABILITIES) {
@@ -1051,7 +1042,8 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
break;
status = decode_preauth_ctxt(conn,
- (struct smb2_preauth_neg_context *)pctx);
+ (struct smb2_preauth_neg_context *)pctx,
+ ctxt_len);
if (status != STATUS_SUCCESS)
break;
} else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES) {
@@ -1062,7 +1054,7 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
decode_encrypt_ctxt(conn,
(struct smb2_encryption_neg_context *)pctx,
- len_of_ctxts);
+ ctxt_len);
} else if (pctx->ContextType == SMB2_COMPRESSION_CAPABILITIES) {
ksmbd_debug(SMB,
"deassemble SMB2_COMPRESSION_CAPABILITIES context\n");
@@ -1081,15 +1073,15 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
} else if (pctx->ContextType == SMB2_SIGNING_CAPABILITIES) {
ksmbd_debug(SMB,
"deassemble SMB2_SIGNING_CAPABILITIES context\n");
+
decode_sign_cap_ctxt(conn,
(struct smb2_signing_capabilities *)pctx,
- len_of_ctxts);
+ ctxt_len);
}
/* offsets must be 8 byte aligned */
- clen = (clen + 7) & ~0x7;
- offset = clen + sizeof(struct smb2_neg_context);
- len_of_ctxts -= clen + sizeof(struct smb2_neg_context);
+ offset = (ctxt_len + 7) & ~0x7;
+ len_of_ctxts -= offset;
}
return status;
}
@@ -1103,22 +1095,22 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
int smb2_handle_negotiate(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
- struct smb2_negotiate_req *req = work->request_buf;
- struct smb2_negotiate_rsp *rsp = work->response_buf;
+ struct smb2_negotiate_req *req = smb2_get_msg(work->request_buf);
+ struct smb2_negotiate_rsp *rsp = smb2_get_msg(work->response_buf);
int rc = 0;
- unsigned int smb2_buf_len, smb2_neg_size;
+ unsigned int smb2_buf_len, smb2_neg_size, neg_ctxt_len = 0;
__le32 status;
ksmbd_debug(SMB, "Received negotiate request\n");
conn->need_neg = false;
- if (ksmbd_conn_good(work)) {
+ if (ksmbd_conn_good(conn)) {
pr_err("conn->tcp_status is already in CifsGood State\n");
work->send_no_response = 1;
return rc;
}
smb2_buf_len = get_rfc1002_len(work->request_buf);
- smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects) - 4;
+ smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects);
if (smb2_neg_size > smb2_buf_len) {
rsp->hdr.Status = STATUS_INVALID_PARAMETER;
rc = -EINVAL;
@@ -1174,7 +1166,8 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
goto err_out;
}
- status = deassemble_neg_contexts(conn, req);
+ status = deassemble_neg_contexts(conn, req,
+ get_rfc1002_len(work->request_buf));
if (status != STATUS_SUCCESS) {
pr_err("deassemble_neg_contexts error(0x%x)\n",
status);
@@ -1198,7 +1191,7 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
conn->preauth_info->Preauth_HashValue);
rsp->NegotiateContextOffset =
cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
- assemble_neg_contexts(conn, rsp);
+ neg_ctxt_len = assemble_neg_contexts(conn, rsp);
break;
case SMB302_PROT_ID:
init_smb3_02_server(conn);
@@ -1246,12 +1239,8 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
rsp->SecurityBufferOffset = cpu_to_le16(128);
rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
- ksmbd_copy_gss_neg_header(((char *)(&rsp->hdr) +
- sizeof(rsp->hdr.smb2_buf_length)) +
- le16_to_cpu(rsp->SecurityBufferOffset));
- inc_rfc1001_len(rsp, sizeof(struct smb2_negotiate_rsp) -
- sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
- AUTH_GSS_LENGTH);
+ ksmbd_copy_gss_neg_header((char *)(&rsp->hdr) +
+ le16_to_cpu(rsp->SecurityBufferOffset));
rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
conn->use_spnego = true;
@@ -1266,12 +1255,19 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
}
conn->srv_sec_mode = le16_to_cpu(rsp->SecurityMode);
- ksmbd_conn_set_need_negotiate(work);
+ ksmbd_conn_set_need_negotiate(conn);
err_out:
+ if (rc)
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+
+ if (!rc)
+ rc = ksmbd_iov_pin_rsp(work, rsp,
+ sizeof(struct smb2_negotiate_rsp) -
+ sizeof(rsp->Buffer) +
+ AUTH_GSS_LENGTH + neg_ctxt_len);
if (rc < 0)
smb2_set_err_rsp(work);
-
return rc;
}
@@ -1339,9 +1335,8 @@ static int decode_negotiation_token(struct ksmbd_conn *conn,
static int ntlm_negotiate(struct ksmbd_work *work,
struct negotiate_message *negblob,
- size_t negblob_len)
+ size_t negblob_len, struct smb2_sess_setup_rsp *rsp)
{
- struct smb2_sess_setup_rsp *rsp = work->response_buf;
struct challenge_message *chgblob;
unsigned char *spnego_blob = NULL;
u16 spnego_blob_len;
@@ -1446,10 +1441,10 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
return user;
}
-static int ntlm_authenticate(struct ksmbd_work *work)
+static int ntlm_authenticate(struct ksmbd_work *work,
+ struct smb2_sess_setup_req *req,
+ struct smb2_sess_setup_rsp *rsp)
{
- struct smb2_sess_setup_req *req = work->request_buf;
- struct smb2_sess_setup_rsp *rsp = work->response_buf;
struct ksmbd_conn *conn = work->conn;
struct ksmbd_session *sess = work->sess;
struct channel *chann = NULL;
@@ -1472,7 +1467,6 @@ static int ntlm_authenticate(struct ksmbd_work *work)
memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len);
rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
kfree(spnego_blob);
- inc_rfc1001_len(rsp, spnego_blob_len - 1);
}
user = session_user(conn, req);
@@ -1545,7 +1539,8 @@ static int ntlm_authenticate(struct ksmbd_work *work)
return -EINVAL;
}
sess->enc = true;
- rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION)
+ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
/*
* signing is disable if encryption is enable
* on this session
@@ -1555,19 +1550,14 @@ static int ntlm_authenticate(struct ksmbd_work *work)
binding_session:
if (conn->dialect >= SMB30_PROT_ID) {
- read_lock(&sess->chann_lock);
chann = lookup_chann_list(sess, conn);
- read_unlock(&sess->chann_lock);
if (!chann) {
chann = kmalloc(sizeof(struct channel), GFP_KERNEL);
if (!chann)
return -ENOMEM;
chann->conn = conn;
- INIT_LIST_HEAD(&chann->chann_list);
- write_lock(&sess->chann_lock);
- list_add(&chann->chann_list, &sess->ksmbd_chann_list);
- write_unlock(&sess->chann_lock);
+ xa_store(&sess->ksmbd_chann_list, (long)conn, chann, GFP_KERNEL);
}
}
@@ -1587,10 +1577,10 @@ binding_session:
}
#ifdef CONFIG_SMB_SERVER_KERBEROS5
-static int krb5_authenticate(struct ksmbd_work *work)
+static int krb5_authenticate(struct ksmbd_work *work,
+ struct smb2_sess_setup_req *req,
+ struct smb2_sess_setup_rsp *rsp)
{
- struct smb2_sess_setup_req *req = work->request_buf;
- struct smb2_sess_setup_rsp *rsp = work->response_buf;
struct ksmbd_conn *conn = work->conn;
struct ksmbd_session *sess = work->sess;
char *in_blob, *out_blob;
@@ -1605,8 +1595,7 @@ static int krb5_authenticate(struct ksmbd_work *work)
out_blob = (char *)&rsp->hdr.ProtocolId +
le16_to_cpu(rsp->SecurityBufferOffset);
out_len = work->response_sz -
- offsetof(struct smb2_hdr, smb2_buf_length) -
- le16_to_cpu(rsp->SecurityBufferOffset);
+ (le16_to_cpu(rsp->SecurityBufferOffset) + 4);
/* Check previous session */
prev_sess_id = le64_to_cpu(req->PreviousSessionId);
@@ -1623,7 +1612,6 @@ static int krb5_authenticate(struct ksmbd_work *work)
return -EINVAL;
}
rsp->SecurityBufferLength = cpu_to_le16(out_len);
- inc_rfc1001_len(rsp, out_len - 1);
if ((conn->sign || server_conf.enforced_signing) ||
(req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
@@ -1637,24 +1625,20 @@ static int krb5_authenticate(struct ksmbd_work *work)
return -EINVAL;
}
sess->enc = true;
- rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION)
+ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
sess->sign = false;
}
if (conn->dialect >= SMB30_PROT_ID) {
- read_lock(&sess->chann_lock);
chann = lookup_chann_list(sess, conn);
- read_unlock(&sess->chann_lock);
if (!chann) {
chann = kmalloc(sizeof(struct channel), GFP_KERNEL);
if (!chann)
return -ENOMEM;
chann->conn = conn;
- INIT_LIST_HEAD(&chann->chann_list);
- write_lock(&sess->chann_lock);
- list_add(&chann->chann_list, &sess->ksmbd_chann_list);
- write_unlock(&sess->chann_lock);
+ xa_store(&sess->ksmbd_chann_list, (long)conn, chann, GFP_KERNEL);
}
}
@@ -1673,7 +1657,9 @@ static int krb5_authenticate(struct ksmbd_work *work)
return 0;
}
#else
-static int krb5_authenticate(struct ksmbd_work *work)
+static int krb5_authenticate(struct ksmbd_work *work,
+ struct smb2_sess_setup_req *req,
+ struct smb2_sess_setup_rsp *rsp)
{
return -EOPNOTSUPP;
}
@@ -1682,8 +1668,8 @@ static int krb5_authenticate(struct ksmbd_work *work)
int smb2_sess_setup(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
- struct smb2_sess_setup_req *req = work->request_buf;
- struct smb2_sess_setup_rsp *rsp = work->response_buf;
+ struct smb2_sess_setup_req *req;
+ struct smb2_sess_setup_rsp *rsp;
struct ksmbd_session *sess;
struct negotiate_message *negblob;
unsigned int negblob_len, negblob_off;
@@ -1691,12 +1677,14 @@ int smb2_sess_setup(struct ksmbd_work *work)
ksmbd_debug(SMB, "Received request for session setup\n");
+ WORK_BUFFERS(work, req, rsp);
+
rsp->StructureSize = cpu_to_le16(9);
rsp->SessionFlags = 0;
rsp->SecurityBufferOffset = cpu_to_le16(72);
rsp->SecurityBufferLength = 0;
- inc_rfc1001_len(rsp, 9);
+ ksmbd_conn_lock(conn);
if (!req->hdr.SessionId) {
sess = ksmbd_smb2_session_create();
if (!sess) {
@@ -1744,6 +1732,12 @@ int smb2_sess_setup(struct ksmbd_work *work)
goto out_err;
}
+ if (ksmbd_conn_need_reconnect(conn)) {
+ rc = -EFAULT;
+ sess = NULL;
+ goto out_err;
+ }
+
if (ksmbd_session_lookup(conn, sess_id)) {
rc = -EACCES;
goto out_err;
@@ -1768,15 +1762,23 @@ int smb2_sess_setup(struct ksmbd_work *work)
rc = -ENOENT;
goto out_err;
}
+
+ if (sess->state == SMB2_SESSION_EXPIRED) {
+ rc = -EFAULT;
+ goto out_err;
+ }
+
+ if (ksmbd_conn_need_reconnect(conn)) {
+ rc = -EFAULT;
+ sess = NULL;
+ goto out_err;
+ }
}
work->sess = sess;
- if (sess->state == SMB2_SESSION_EXPIRED)
- sess->state = SMB2_SESSION_IN_PROGRESS;
-
negblob_off = le16_to_cpu(req->SecurityBufferOffset);
negblob_len = le16_to_cpu(req->SecurityBufferLength);
- if (negblob_off < (offsetof(struct smb2_sess_setup_req, Buffer) - 4) ||
+ if (negblob_off < offsetof(struct smb2_sess_setup_req, Buffer) ||
negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) {
rc = -EINVAL;
goto out_err;
@@ -1797,36 +1799,34 @@ int smb2_sess_setup(struct ksmbd_work *work)
if (conn->preferred_auth_mech &
(KSMBD_AUTH_KRB5 | KSMBD_AUTH_MSKRB5)) {
- rc = krb5_authenticate(work);
+ rc = krb5_authenticate(work, req, rsp);
if (rc) {
rc = -EINVAL;
goto out_err;
}
- ksmbd_conn_set_good(work);
- sess->state = SMB2_SESSION_VALID;
+ if (!ksmbd_conn_need_reconnect(conn)) {
+ ksmbd_conn_set_good(conn);
+ sess->state = SMB2_SESSION_VALID;
+ }
kfree(sess->Preauth_HashValue);
sess->Preauth_HashValue = NULL;
} else if (conn->preferred_auth_mech == KSMBD_AUTH_NTLMSSP) {
if (negblob->MessageType == NtLmNegotiate) {
- rc = ntlm_negotiate(work, negblob, negblob_len);
+ rc = ntlm_negotiate(work, negblob, negblob_len, rsp);
if (rc)
goto out_err;
rsp->hdr.Status =
STATUS_MORE_PROCESSING_REQUIRED;
- /*
- * Note: here total size -1 is done as an
- * adjustment for 0 size blob
- */
- inc_rfc1001_len(rsp, le16_to_cpu(rsp->SecurityBufferLength) - 1);
-
} else if (negblob->MessageType == NtLmAuthenticate) {
- rc = ntlm_authenticate(work);
+ rc = ntlm_authenticate(work, req, rsp);
if (rc)
goto out_err;
- ksmbd_conn_set_good(work);
- sess->state = SMB2_SESSION_VALID;
+ if (!ksmbd_conn_need_reconnect(conn)) {
+ ksmbd_conn_set_good(conn);
+ sess->state = SMB2_SESSION_VALID;
+ }
if (conn->binding) {
struct preauth_session *preauth_sess;
@@ -1894,14 +1894,29 @@ out_err:
if (sess->user && sess->user->flags & KSMBD_USER_FLAG_DELAY_SESSION)
try_delay = true;
- xa_erase(&conn->sessions, sess->id);
- ksmbd_session_destroy(sess);
- work->sess = NULL;
- if (try_delay)
+ sess->last_active = jiffies;
+ sess->state = SMB2_SESSION_EXPIRED;
+ if (try_delay) {
+ ksmbd_conn_set_need_reconnect(conn);
ssleep(5);
+ ksmbd_conn_set_need_negotiate(conn);
+ }
}
+ smb2_set_err_rsp(work);
+ } else {
+ unsigned int iov_len;
+
+ if (rsp->SecurityBufferLength)
+ iov_len = offsetof(struct smb2_sess_setup_rsp, Buffer) +
+ le16_to_cpu(rsp->SecurityBufferLength);
+ else
+ iov_len = sizeof(struct smb2_sess_setup_rsp);
+ rc = ksmbd_iov_pin_rsp(work, rsp, iov_len);
+ if (rc)
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
}
+ ksmbd_conn_unlock(conn);
return rc;
}
@@ -1914,14 +1929,16 @@ out_err:
int smb2_tree_connect(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
- struct smb2_tree_connect_req *req = work->request_buf;
- struct smb2_tree_connect_rsp *rsp = work->response_buf;
+ struct smb2_tree_connect_req *req;
+ struct smb2_tree_connect_rsp *rsp;
struct ksmbd_session *sess = work->sess;
char *treename = NULL, *name = NULL;
struct ksmbd_tree_conn_status status;
struct ksmbd_share_config *share;
int rc = -EINVAL;
+ WORK_BUFFERS(work, req, rsp);
+
treename = smb_strndup_from_utf16(req->Buffer,
le16_to_cpu(req->PathLength), true,
conn->local_nls);
@@ -1931,7 +1948,7 @@ int smb2_tree_connect(struct ksmbd_work *work)
goto out_err1;
}
- name = ksmbd_extract_sharename(treename);
+ name = ksmbd_extract_sharename(conn->um, treename);
if (IS_ERR(name)) {
status.ret = KSMBD_TREE_CONN_STATUS_ERROR;
goto out_err1;
@@ -1974,13 +1991,19 @@ int smb2_tree_connect(struct ksmbd_work *work)
if (conn->posix_ext_supported)
status.tree_conn->posix_extensions = true;
-out_err1:
+ write_lock(&sess->tree_conns_lock);
+ status.tree_conn->t_state = TREE_CONNECTED;
+ write_unlock(&sess->tree_conns_lock);
rsp->StructureSize = cpu_to_le16(16);
+out_err1:
rsp->Capabilities = 0;
rsp->Reserved = 0;
/* default manual caching */
rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING;
- inc_rfc1001_len(rsp, 16);
+
+ rc = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_tree_connect_rsp));
+ if (rc)
+ status.ret = KSMBD_TREE_CONN_STATUS_NOMEM;
if (!IS_ERR(treename))
kfree(treename);
@@ -1992,6 +2015,7 @@ out_err1:
rsp->hdr.Status = STATUS_SUCCESS;
rc = 0;
break;
+ case -ESTALE:
case -ENOENT:
case KSMBD_TREE_CONN_STATUS_NO_SHARE:
rsp->hdr.Status = STATUS_BAD_NETWORK_NAME;
@@ -2012,6 +2036,9 @@ out_err1:
rsp->hdr.Status = STATUS_ACCESS_DENIED;
}
+ if (status.ret != KSMBD_TREE_CONN_STATUS_OK)
+ smb2_set_err_rsp(work);
+
return rc;
}
@@ -2086,28 +2113,60 @@ static int smb2_create_open_flags(bool file_present, __le32 access,
*/
int smb2_tree_disconnect(struct ksmbd_work *work)
{
- struct smb2_tree_disconnect_rsp *rsp = work->response_buf;
+ struct smb2_tree_disconnect_rsp *rsp;
+ struct smb2_tree_disconnect_req *req;
struct ksmbd_session *sess = work->sess;
struct ksmbd_tree_connect *tcon = work->tcon;
+ int err;
- rsp->StructureSize = cpu_to_le16(4);
- inc_rfc1001_len(rsp, 4);
+ WORK_BUFFERS(work, req, rsp);
ksmbd_debug(SMB, "request\n");
if (!tcon) {
- struct smb2_tree_disconnect_req *req = work->request_buf;
-
ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
+
rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
- smb2_set_err_rsp(work);
- return 0;
+ err = -ENOENT;
+ goto err_out;
}
ksmbd_close_tree_conn_fds(work);
- ksmbd_tree_conn_disconnect(sess, tcon);
+
+ write_lock(&sess->tree_conns_lock);
+ if (tcon->t_state == TREE_DISCONNECTED) {
+ write_unlock(&sess->tree_conns_lock);
+ rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ WARN_ON_ONCE(atomic_dec_and_test(&tcon->refcount));
+ tcon->t_state = TREE_DISCONNECTED;
+ write_unlock(&sess->tree_conns_lock);
+
+ err = ksmbd_tree_conn_disconnect(sess, tcon);
+ if (err) {
+ rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
+ goto err_out;
+ }
+
work->tcon = NULL;
+
+ rsp->StructureSize = cpu_to_le16(4);
+ err = ksmbd_iov_pin_rsp(work, rsp,
+ sizeof(struct smb2_tree_disconnect_rsp));
+ if (err) {
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto err_out;
+ }
+
return 0;
+
+err_out:
+ smb2_set_err_rsp(work);
+ return err;
+
}
/**
@@ -2119,26 +2178,40 @@ int smb2_tree_disconnect(struct ksmbd_work *work)
int smb2_session_logoff(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
- struct smb2_logoff_rsp *rsp = work->response_buf;
- struct ksmbd_session *sess = work->sess;
+ struct smb2_logoff_req *req;
+ struct smb2_logoff_rsp *rsp;
+ struct ksmbd_session *sess;
+ u64 sess_id;
+ int err;
- rsp->StructureSize = cpu_to_le16(4);
- inc_rfc1001_len(rsp, 4);
+ WORK_BUFFERS(work, req, rsp);
ksmbd_debug(SMB, "request\n");
- /* setting CifsExiting here may race with start_tcp_sess */
- ksmbd_conn_set_need_reconnect(work);
+ ksmbd_conn_lock(conn);
+ if (!ksmbd_conn_good(conn)) {
+ ksmbd_conn_unlock(conn);
+ rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
+ smb2_set_err_rsp(work);
+ return -ENOENT;
+ }
+ sess_id = le64_to_cpu(req->hdr.SessionId);
+ ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_RECONNECT);
+ ksmbd_conn_unlock(conn);
+
ksmbd_close_session_fds(work);
- ksmbd_conn_wait_idle(conn);
+ ksmbd_conn_wait_idle(conn, sess_id);
+ /*
+ * Re-lookup session to validate if session is deleted
+ * while waiting request complete
+ */
+ sess = ksmbd_session_lookup_all(conn, sess_id);
if (ksmbd_tree_conn_session_logoff(sess)) {
- struct smb2_logoff_req *req = work->request_buf;
-
ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
smb2_set_err_rsp(work);
- return 0;
+ return -ENOENT;
}
ksmbd_destroy_file_table(&sess->file_table);
@@ -2146,9 +2219,15 @@ int smb2_session_logoff(struct ksmbd_work *work)
ksmbd_free_user(sess->user);
sess->user = NULL;
+ ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_NEGOTIATE);
- /* let start_tcp_sess free connection info now */
- ksmbd_conn_set_need_negotiate(work);
+ rsp->StructureSize = cpu_to_le16(4);
+ err = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_logoff_rsp));
+ if (err) {
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+ smb2_set_err_rsp(work);
+ return err;
+ }
return 0;
}
@@ -2160,12 +2239,14 @@ int smb2_session_logoff(struct ksmbd_work *work)
*/
static noinline int create_smb2_pipe(struct ksmbd_work *work)
{
- struct smb2_create_rsp *rsp = work->response_buf;
- struct smb2_create_req *req = work->request_buf;
+ struct smb2_create_rsp *rsp;
+ struct smb2_create_req *req;
int id;
int err;
char *name;
+ WORK_BUFFERS(work, req, rsp);
+
name = smb_strndup_from_utf16(req->Buffer, le16_to_cpu(req->NameLength),
1, work->conn->local_nls);
if (IS_ERR(name)) {
@@ -2194,12 +2275,15 @@ static noinline int create_smb2_pipe(struct ksmbd_work *work)
rsp->EndofFile = cpu_to_le64(0);
rsp->FileAttributes = ATTR_NORMAL_LE;
rsp->Reserved2 = 0;
- rsp->VolatileFileId = cpu_to_le64(id);
+ rsp->VolatileFileId = id;
rsp->PersistentFileId = 0;
rsp->CreateContextsOffset = 0;
rsp->CreateContextsLength = 0;
- inc_rfc1001_len(rsp, 88); /* StructureSize - 1*/
+ err = ksmbd_iov_pin_rsp(work, rsp, offsetof(struct smb2_create_rsp, Buffer));
+ if (err)
+ goto out;
+
kfree(name);
return 0;
@@ -2231,7 +2315,7 @@ out:
* Return: 0 on success, otherwise error
*/
static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
- struct path *path)
+ const struct path *path)
{
struct user_namespace *user_ns = mnt_user_ns(path->mnt);
char *attr_name = NULL, *value;
@@ -2278,7 +2362,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
/* delete the EA only when it exits */
if (rc > 0) {
rc = ksmbd_vfs_remove_xattr(user_ns,
- path->dentry,
+ path,
attr_name);
if (rc < 0) {
@@ -2292,9 +2376,9 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
/* if the EA doesn't exist, just do nothing. */
rc = 0;
} else {
- rc = ksmbd_vfs_setxattr(user_ns,
- path->dentry, attr_name, value,
- le16_to_cpu(eabuf->EaValueLength), 0);
+ rc = ksmbd_vfs_setxattr(user_ns, path, attr_name, value,
+ le16_to_cpu(eabuf->EaValueLength),
+ 0, true);
if (rc < 0) {
ksmbd_debug(SMB,
"ksmbd_vfs_setxattr is failed(%d)\n",
@@ -2325,7 +2409,7 @@ next:
return rc;
}
-static noinline int smb2_set_stream_name_xattr(struct path *path,
+static noinline int smb2_set_stream_name_xattr(const struct path *path,
struct ksmbd_file *fp,
char *stream_name, int s_type)
{
@@ -2357,14 +2441,13 @@ static noinline int smb2_set_stream_name_xattr(struct path *path,
return -EBADF;
}
- rc = ksmbd_vfs_setxattr(user_ns, path->dentry,
- xattr_stream_name, NULL, 0, 0);
+ rc = ksmbd_vfs_setxattr(user_ns, path, xattr_stream_name, NULL, 0, 0, false);
if (rc < 0)
pr_err("Failed to store XATTR stream name :%d\n", rc);
return 0;
}
-static int smb2_remove_smb_xattrs(struct path *path)
+static int smb2_remove_smb_xattrs(const struct path *path)
{
struct user_namespace *user_ns = mnt_user_ns(path->mnt);
char *name, *xattr_list = NULL;
@@ -2386,7 +2469,7 @@ static int smb2_remove_smb_xattrs(struct path *path)
if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
!strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX,
STREAM_PREFIX_LEN)) {
- err = ksmbd_vfs_remove_xattr(user_ns, path->dentry,
+ err = ksmbd_vfs_remove_xattr(user_ns, path,
name);
if (err)
ksmbd_debug(SMB, "remove xattr failed : %s\n",
@@ -2398,7 +2481,7 @@ out:
return err;
}
-static int smb2_create_truncate(struct path *path)
+static int smb2_create_truncate(const struct path *path)
{
int rc = vfs_truncate(path, 0);
@@ -2417,7 +2500,7 @@ static int smb2_create_truncate(struct path *path)
return rc;
}
-static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, struct path *path,
+static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path *path,
struct ksmbd_file *fp)
{
struct xattr_dos_attrib da = {0};
@@ -2433,14 +2516,13 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, struct path *path,
da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
XATTR_DOSINFO_ITIME;
- rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_user_ns(path->mnt),
- path->dentry, &da);
+ rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_user_ns(path->mnt), path, &da, true);
if (rc)
ksmbd_debug(SMB, "failed to store file attribute into xattr\n");
}
static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon,
- struct path *path, struct ksmbd_file *fp)
+ const struct path *path, struct ksmbd_file *fp)
{
struct xattr_dos_attrib da;
int rc;
@@ -2461,8 +2543,9 @@ static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon,
}
}
-static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name,
- int open_flags, umode_t posix_mode, bool is_dir)
+static int smb2_creat(struct ksmbd_work *work, struct path *parent_path,
+ struct path *path, char *name, int open_flags,
+ umode_t posix_mode, bool is_dir)
{
struct ksmbd_tree_connect *tcon = work->tcon;
struct ksmbd_share_config *share = tcon->share_conf;
@@ -2489,7 +2572,7 @@ static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name,
return rc;
}
- rc = ksmbd_vfs_kern_path(work, name, 0, path, 0);
+ rc = ksmbd_vfs_kern_path_locked(work, name, 0, parent_path, path, 0);
if (rc) {
pr_err("cannot get linux path (%s), err = %d\n",
name, rc);
@@ -2500,7 +2583,7 @@ static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name,
static int smb2_create_sd_buffer(struct ksmbd_work *work,
struct smb2_create_req *req,
- struct path *path)
+ const struct path *path)
{
struct create_context *context;
struct create_sd_buf_req *sd_buf;
@@ -2523,7 +2606,7 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work,
sizeof(struct create_sd_buf_req))
return -EINVAL;
return set_info_sec(work->conn, work->tcon, path, &sd_buf->ntsd,
- le32_to_cpu(sd_buf->ccontext.DataLength), true);
+ le32_to_cpu(sd_buf->ccontext.DataLength), true, false);
}
static void ksmbd_acls_fattr(struct smb_fattr *fattr,
@@ -2555,8 +2638,8 @@ int smb2_open(struct ksmbd_work *work)
struct ksmbd_session *sess = work->sess;
struct ksmbd_tree_connect *tcon = work->tcon;
struct smb2_create_req *req;
- struct smb2_create_rsp *rsp, *rsp_org;
- struct path path;
+ struct smb2_create_rsp *rsp;
+ struct path path, parent_path;
struct ksmbd_share_config *share = tcon->share_conf;
struct ksmbd_file *fp = NULL;
struct file *filp = NULL;
@@ -2580,8 +2663,8 @@ int smb2_open(struct ksmbd_work *work)
u64 time;
umode_t posix_mode = 0;
__le32 daccess, maximal_access = 0;
+ int iov_len = 0;
- rsp_org = work->response_buf;
WORK_BUFFERS(work, req, rsp);
if (req->hdr.NextCommand && !work->next_smb2_rcv_hdr_off &&
@@ -2602,11 +2685,10 @@ int smb2_open(struct ksmbd_work *work)
*(char *)req->Buffer == '\\') {
pr_err("not allow directory name included leading slash\n");
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
}
- name = smb2_get_name(share,
- req->Buffer,
+ name = smb2_get_name(req->Buffer,
le16_to_cpu(req->NameLength),
work->conn->local_nls);
if (IS_ERR(name)) {
@@ -2614,7 +2696,7 @@ int smb2_open(struct ksmbd_work *work)
if (rc != -ENOMEM)
rc = -ENOENT;
name = NULL;
- goto err_out1;
+ goto err_out2;
}
ksmbd_debug(SMB, "converted name = %s\n", name);
@@ -2622,48 +2704,44 @@ int smb2_open(struct ksmbd_work *work)
if (!test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_STREAMS)) {
rc = -EBADF;
- goto err_out1;
+ goto err_out2;
}
rc = parse_stream_name(name, &stream_name, &s_type);
if (rc < 0)
- goto err_out1;
+ goto err_out2;
}
rc = ksmbd_validate_filename(name);
if (rc < 0)
- goto err_out1;
+ goto err_out2;
if (ksmbd_share_veto_filename(share, name)) {
rc = -ENOENT;
ksmbd_debug(SMB, "Reject open(), vetoed file: %s\n",
name);
- goto err_out1;
+ goto err_out2;
}
} else {
name = kstrdup("", GFP_KERNEL);
if (!name) {
rc = -ENOMEM;
- goto err_out1;
+ goto err_out2;
}
}
- req_op_level = req->RequestedOplockLevel;
- if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE)
- lc = parse_lease_state(req);
-
if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE_LE)) {
pr_err("Invalid impersonationlevel : 0x%x\n",
le32_to_cpu(req->ImpersonationLevel));
rc = -EIO;
rsp->hdr.Status = STATUS_BAD_IMPERSONATION_LEVEL;
- goto err_out1;
+ goto err_out2;
}
if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK)) {
pr_err("Invalid create options : 0x%x\n",
le32_to_cpu(req->CreateOptions));
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
} else {
if (req->CreateOptions & FILE_SEQUENTIAL_ONLY_LE &&
req->CreateOptions & FILE_RANDOM_ACCESS_LE)
@@ -2673,13 +2751,13 @@ int smb2_open(struct ksmbd_work *work)
(FILE_OPEN_BY_FILE_ID_LE | CREATE_TREE_CONNECTION |
FILE_RESERVE_OPFILTER_LE)) {
rc = -EOPNOTSUPP;
- goto err_out1;
+ goto err_out2;
}
if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) {
if (req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE) {
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
} else if (req->CreateOptions & FILE_NO_COMPRESSION_LE) {
req->CreateOptions = ~(FILE_NO_COMPRESSION_LE);
}
@@ -2691,21 +2769,21 @@ int smb2_open(struct ksmbd_work *work)
pr_err("Invalid create disposition : 0x%x\n",
le32_to_cpu(req->CreateDisposition));
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
}
if (!(req->DesiredAccess & DESIRED_ACCESS_MASK)) {
pr_err("Invalid desired access : 0x%x\n",
le32_to_cpu(req->DesiredAccess));
rc = -EACCES;
- goto err_out1;
+ goto err_out2;
}
if (req->FileAttributes && !(req->FileAttributes & ATTR_MASK_LE)) {
pr_err("Invalid file attribute : 0x%x\n",
le32_to_cpu(req->FileAttributes));
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
}
if (req->CreateContextsOffset) {
@@ -2713,19 +2791,19 @@ int smb2_open(struct ksmbd_work *work)
context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
- goto err_out1;
+ goto err_out2;
} else if (context) {
ea_buf = (struct create_ea_buf_req *)context;
if (le16_to_cpu(context->DataOffset) +
le32_to_cpu(context->DataLength) <
sizeof(struct create_ea_buf_req)) {
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
}
if (req->CreateOptions & FILE_NO_EA_KNOWLEDGE_LE) {
rsp->hdr.Status = STATUS_ACCESS_DENIED;
rc = -EACCES;
- goto err_out1;
+ goto err_out2;
}
}
@@ -2733,7 +2811,7 @@ int smb2_open(struct ksmbd_work *work)
SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
- goto err_out1;
+ goto err_out2;
} else if (context) {
ksmbd_debug(SMB,
"get query maximal access context\n");
@@ -2744,11 +2822,11 @@ int smb2_open(struct ksmbd_work *work)
SMB2_CREATE_TIMEWARP_REQUEST, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
- goto err_out1;
+ goto err_out2;
} else if (context) {
ksmbd_debug(SMB, "get timewarp context\n");
rc = -EBADF;
- goto err_out1;
+ goto err_out2;
}
if (tcon->posix_extensions) {
@@ -2756,7 +2834,7 @@ int smb2_open(struct ksmbd_work *work)
SMB2_CREATE_TAG_POSIX, 16);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
- goto err_out1;
+ goto err_out2;
} else if (context) {
struct create_posix *posix =
(struct create_posix *)context;
@@ -2764,7 +2842,7 @@ int smb2_open(struct ksmbd_work *work)
le32_to_cpu(context->DataLength) <
sizeof(struct create_posix) - 4) {
rc = -EINVAL;
- goto err_out1;
+ goto err_out2;
}
ksmbd_debug(SMB, "get posix context\n");
@@ -2776,11 +2854,14 @@ int smb2_open(struct ksmbd_work *work)
if (ksmbd_override_fsids(work)) {
rc = -ENOMEM;
- goto err_out1;
+ goto err_out2;
}
- rc = ksmbd_vfs_kern_path(work, name, LOOKUP_NO_SYMLINKS, &path, 1);
+ rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS,
+ &parent_path, &path, 1);
if (!rc) {
+ file_present = true;
+
if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE) {
/*
* If file exists with under flags, return access
@@ -2789,7 +2870,6 @@ int smb2_open(struct ksmbd_work *work)
if (req->CreateDisposition == FILE_OVERWRITE_IF_LE ||
req->CreateDisposition == FILE_OPEN_IF_LE) {
rc = -EACCES;
- path_put(&path);
goto err_out;
}
@@ -2797,27 +2877,23 @@ int smb2_open(struct ksmbd_work *work)
ksmbd_debug(SMB,
"User does not have write permission\n");
rc = -EACCES;
- path_put(&path);
goto err_out;
}
} else if (d_is_symlink(path.dentry)) {
rc = -EACCES;
- path_put(&path);
goto err_out;
}
- }
- if (rc) {
+ file_present = true;
+ user_ns = mnt_user_ns(path.mnt);
+ } else {
if (rc != -ENOENT)
goto err_out;
ksmbd_debug(SMB, "can not get linux path for %s, rc = %d\n",
name, rc);
rc = 0;
- } else {
- file_present = true;
- user_ns = mnt_user_ns(path.mnt);
- generic_fillattr(user_ns, d_inode(path.dentry), &stat);
}
+
if (stream_name) {
if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) {
if (s_type == DATA_STREAM) {
@@ -2825,7 +2901,8 @@ int smb2_open(struct ksmbd_work *work)
rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
}
} else {
- if (S_ISDIR(stat.mode) && s_type == DATA_STREAM) {
+ if (file_present && S_ISDIR(d_inode(path.dentry)->i_mode) &&
+ s_type == DATA_STREAM) {
rc = -EIO;
rsp->hdr.Status = STATUS_FILE_IS_A_DIRECTORY;
}
@@ -2842,7 +2919,8 @@ int smb2_open(struct ksmbd_work *work)
}
if (file_present && req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE &&
- S_ISDIR(stat.mode) && !(req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
+ S_ISDIR(d_inode(path.dentry)->i_mode) &&
+ !(req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
ksmbd_debug(SMB, "open() argument is a directory: %s, %x\n",
name, req->CreateOptions);
rsp->hdr.Status = STATUS_FILE_IS_A_DIRECTORY;
@@ -2852,7 +2930,7 @@ int smb2_open(struct ksmbd_work *work)
if (file_present && (req->CreateOptions & FILE_DIRECTORY_FILE_LE) &&
!(req->CreateDisposition == FILE_CREATE_LE) &&
- !S_ISDIR(stat.mode)) {
+ !S_ISDIR(d_inode(path.dentry)->i_mode)) {
rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
rc = -EIO;
goto err_out;
@@ -2877,11 +2955,9 @@ int smb2_open(struct ksmbd_work *work)
if (!file_present) {
daccess = cpu_to_le32(GENERIC_ALL_FLAGS);
} else {
- rc = ksmbd_vfs_query_maximal_access(user_ns,
+ ksmbd_vfs_query_maximal_access(user_ns,
path.dentry,
&daccess);
- if (rc)
- goto err_out;
already_permitted = true;
}
maximal_access = daccess;
@@ -2902,7 +2978,8 @@ int smb2_open(struct ksmbd_work *work)
/*create file if not present */
if (!file_present) {
- rc = smb2_creat(work, &path, name, open_flags, posix_mode,
+ rc = smb2_creat(work, &parent_path, &path, name, open_flags,
+ posix_mode,
req->CreateOptions & FILE_DIRECTORY_FILE_LE);
if (rc) {
if (rc == -ENOENT) {
@@ -2943,15 +3020,16 @@ int smb2_open(struct ksmbd_work *work)
if ((daccess & FILE_DELETE_LE) ||
(req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
- rc = ksmbd_vfs_may_delete(user_ns,
- path.dentry);
+ rc = inode_permission(user_ns,
+ d_inode(path.dentry->d_parent),
+ MAY_EXEC | MAY_WRITE);
if (rc)
goto err_out;
}
}
}
- rc = ksmbd_query_inode_status(d_inode(path.dentry->d_parent));
+ rc = ksmbd_query_inode_status(path.dentry->d_parent);
if (rc == KSMBD_INODE_STATUS_PENDING_DELETE) {
rc = -EBUSY;
goto err_out;
@@ -2996,7 +3074,6 @@ int smb2_open(struct ksmbd_work *work)
goto err_out;
}
- fp->filename = name;
fp->cdoption = req->CreateDisposition;
fp->daccess = daccess;
fp->saccess = req->ShareAccess;
@@ -3008,7 +3085,7 @@ int smb2_open(struct ksmbd_work *work)
struct inode *inode = d_inode(path.dentry);
posix_acl_rc = ksmbd_vfs_inherit_posix_acl(user_ns,
- inode,
+ &path,
d_inode(path.dentry->d_parent));
if (posix_acl_rc)
ksmbd_debug(SMB, "inherit posix acl failed : %d\n", posix_acl_rc);
@@ -3024,7 +3101,7 @@ int smb2_open(struct ksmbd_work *work)
if (rc) {
if (posix_acl_rc)
ksmbd_vfs_set_init_posix_acl(user_ns,
- inode);
+ &path);
if (test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_ACL_XATTR)) {
@@ -3064,9 +3141,10 @@ int smb2_open(struct ksmbd_work *work)
rc = ksmbd_vfs_set_sd_xattr(conn,
user_ns,
- path.dentry,
+ &path,
pntsd,
- pntsd_size);
+ pntsd_size,
+ false);
kfree(pntsd);
if (rc)
pr_err("failed to store ntacl in xattr : %d\n",
@@ -3089,11 +3167,6 @@ int smb2_open(struct ksmbd_work *work)
fp->attrib_only = !(req->DesiredAccess & ~(FILE_READ_ATTRIBUTES_LE |
FILE_WRITE_ATTRIBUTES_LE | FILE_SYNCHRONIZE_LE));
- if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC &&
- !fp->attrib_only && !stream_name) {
- smb_break_all_oplock(work, fp);
- need_truncate = 1;
- }
/* fp should be searchable through ksmbd_inode.m_fp_list
* after daccess, saccess, attrib_only, and stream are
@@ -3103,35 +3176,49 @@ int smb2_open(struct ksmbd_work *work)
list_add(&fp->node, &fp->f_ci->m_fp_list);
write_unlock(&fp->f_ci->m_lock);
- rc = ksmbd_vfs_getattr(&path, &stat);
- if (rc) {
- generic_fillattr(user_ns, d_inode(path.dentry), &stat);
- rc = 0;
- }
-
/* Check delete pending among previous fp before oplock break */
if (ksmbd_inode_pending_delete(fp)) {
rc = -EBUSY;
goto err_out;
}
+ if (file_present || created)
+ ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
+ if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC &&
+ !fp->attrib_only && !stream_name) {
+ smb_break_all_oplock(work, fp);
+ need_truncate = 1;
+ }
+
+ req_op_level = req->RequestedOplockLevel;
+ if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE)
+ lc = parse_lease_state(req, S_ISDIR(file_inode(filp)->i_mode));
+
share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp);
if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) ||
(req_op_level == SMB2_OPLOCK_LEVEL_LEASE &&
!(conn->vals->capabilities & SMB2_GLOBAL_CAP_LEASING))) {
if (share_ret < 0 && !S_ISDIR(file_inode(fp->filp)->i_mode)) {
rc = share_ret;
- goto err_out;
+ goto err_out1;
}
} else {
if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) {
+ /*
+ * Compare parent lease using parent key. If there is no
+ * a lease that has same parent key, Send lease break
+ * notification.
+ */
+ smb_send_parent_lease_break_noti(fp, lc);
+
req_op_level = smb2_map_lease_to_oplock(lc->req_state);
ksmbd_debug(SMB,
"lease req for(%s) req oplock state 0x%x, lease state 0x%x\n",
name, req_op_level, lc->req_state);
rc = find_same_lease_key(sess, fp->f_ci, lc);
if (rc)
- goto err_out;
+ goto err_out1;
} else if (open_flags == O_RDONLY &&
(req_op_level == SMB2_OPLOCK_LEVEL_BATCH ||
req_op_level == SMB2_OPLOCK_LEVEL_EXCLUSIVE))
@@ -3142,16 +3229,16 @@ int smb2_open(struct ksmbd_work *work)
le32_to_cpu(req->hdr.Id.SyncId.TreeId),
lc, share_ret);
if (rc < 0)
- goto err_out;
+ goto err_out1;
}
if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)
ksmbd_fd_set_delete_on_close(fp, file_info);
if (need_truncate) {
- rc = smb2_create_truncate(&path);
+ rc = smb2_create_truncate(&fp->filp->f_path);
if (rc)
- goto err_out;
+ goto err_out1;
}
if (req->CreateContextsOffset) {
@@ -3161,7 +3248,7 @@ int smb2_open(struct ksmbd_work *work)
SMB2_CREATE_ALLOCATION_SIZE, 4);
if (IS_ERR(az_req)) {
rc = PTR_ERR(az_req);
- goto err_out;
+ goto err_out1;
} else if (az_req) {
loff_t alloc_size;
int err;
@@ -3170,7 +3257,7 @@ int smb2_open(struct ksmbd_work *work)
le32_to_cpu(az_req->ccontext.DataLength) <
sizeof(struct create_alloc_size_req)) {
rc = -EINVAL;
- goto err_out;
+ goto err_out1;
}
alloc_size = le64_to_cpu(az_req->AllocationSize);
ksmbd_debug(SMB,
@@ -3188,13 +3275,17 @@ int smb2_open(struct ksmbd_work *work)
context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
- goto err_out;
+ goto err_out1;
} else if (context) {
ksmbd_debug(SMB, "get query on disk id context\n");
query_disk_id = 1;
}
}
+ rc = ksmbd_vfs_getattr(&path, &stat);
+ if (rc)
+ goto err_out1;
+
if (stat.result_mask & STATX_BTIME)
fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
else
@@ -3210,9 +3301,6 @@ int smb2_open(struct ksmbd_work *work)
memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
- generic_fillattr(user_ns, file_inode(fp->filp),
- &stat);
-
rsp->StructureSize = cpu_to_le16(89);
rcu_read_lock();
opinfo = rcu_dereference(fp->f_opinfo);
@@ -3234,12 +3322,12 @@ int smb2_open(struct ksmbd_work *work)
rsp->Reserved2 = 0;
- rsp->PersistentFileId = cpu_to_le64(fp->persistent_id);
- rsp->VolatileFileId = cpu_to_le64(fp->volatile_id);
+ rsp->PersistentFileId = fp->persistent_id;
+ rsp->VolatileFileId = fp->volatile_id;
rsp->CreateContextsOffset = 0;
rsp->CreateContextsLength = 0;
- inc_rfc1001_len(rsp_org, 88); /* StructureSize - 1*/
+ iov_len = offsetof(struct smb2_create_rsp, Buffer);
/* If lease is request send lease context response */
if (opinfo && opinfo->is_lease) {
@@ -3254,7 +3342,7 @@ int smb2_open(struct ksmbd_work *work)
create_lease_buf(rsp->Buffer, opinfo->o_lease);
le32_add_cpu(&rsp->CreateContextsLength,
conn->vals->create_lease_size);
- inc_rfc1001_len(rsp_org, conn->vals->create_lease_size);
+ iov_len += conn->vals->create_lease_size;
next_ptr = &lease_ccontext->Next;
next_off = conn->vals->create_lease_size;
}
@@ -3274,7 +3362,7 @@ int smb2_open(struct ksmbd_work *work)
le32_to_cpu(maximal_access));
le32_add_cpu(&rsp->CreateContextsLength,
conn->vals->create_mxac_size);
- inc_rfc1001_len(rsp_org, conn->vals->create_mxac_size);
+ iov_len += conn->vals->create_mxac_size;
if (next_ptr)
*next_ptr = cpu_to_le32(next_off);
next_ptr = &mxac_ccontext->Next;
@@ -3292,7 +3380,7 @@ int smb2_open(struct ksmbd_work *work)
stat.ino, tcon->id);
le32_add_cpu(&rsp->CreateContextsLength,
conn->vals->create_disk_id_size);
- inc_rfc1001_len(rsp_org, conn->vals->create_disk_id_size);
+ iov_len += conn->vals->create_disk_id_size;
if (next_ptr)
*next_ptr = cpu_to_le32(next_off);
next_ptr = &disk_id_ccontext->Next;
@@ -3306,22 +3394,28 @@ int smb2_open(struct ksmbd_work *work)
fp);
le32_add_cpu(&rsp->CreateContextsLength,
conn->vals->create_posix_size);
- inc_rfc1001_len(rsp_org, conn->vals->create_posix_size);
+ iov_len += conn->vals->create_posix_size;
if (next_ptr)
*next_ptr = cpu_to_le32(next_off);
}
if (contxt_cnt > 0) {
rsp->CreateContextsOffset =
- cpu_to_le32(offsetof(struct smb2_create_rsp, Buffer)
- - 4);
+ cpu_to_le32(offsetof(struct smb2_create_rsp, Buffer));
}
err_out:
- if (file_present || created)
- path_put(&path);
- ksmbd_revert_fsids(work);
+ if (rc && (file_present || created))
+ ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
err_out1:
+ ksmbd_revert_fsids(work);
+
+err_out2:
+ if (!rc) {
+ ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED);
+ rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len);
+ }
if (rc) {
if (rc == -EINVAL)
rsp->hdr.Status = STATUS_INVALID_PARAMETER;
@@ -3348,14 +3442,13 @@ err_out1:
if (!rsp->hdr.Status)
rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
- if (!fp || !fp->filename)
- kfree(name);
if (fp)
ksmbd_fd_put(work, fp);
smb2_set_err_rsp(work);
ksmbd_debug(SMB, "Error response: %x\n", rsp->hdr.Status);
}
+ kfree(name);
kfree(lc);
return 0;
@@ -3467,7 +3560,6 @@ static int dentry_name(struct ksmbd_dir_info *d_info, int info_level)
* @conn: connection instance
* @info_level: smb information level
* @d_info: structure included variables for query dir
- * @user_ns: user namespace
* @ksmbd_kstat: ksmbd wrapper of dirent stat information
*
* if directory has many entries, find first can't read it fully.
@@ -3497,7 +3589,7 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
goto free_conv_name;
}
- struct_sz = readdir_info_level_struct_sz(info_level) - 1 + conv_len;
+ struct_sz = readdir_info_level_struct_sz(info_level) + conv_len;
next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT);
d_info->last_entry_off_align = next_entry_offset - struct_sz;
@@ -3624,16 +3716,21 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
posix_info->AllocationSize = cpu_to_le64(ksmbd_kstat->kstat->blocks << 9);
posix_info->DeviceId = cpu_to_le32(ksmbd_kstat->kstat->rdev);
posix_info->HardLinks = cpu_to_le32(ksmbd_kstat->kstat->nlink);
- posix_info->Mode = cpu_to_le32(ksmbd_kstat->kstat->mode);
+ posix_info->Mode = cpu_to_le32(ksmbd_kstat->kstat->mode & 0777);
posix_info->Inode = cpu_to_le64(ksmbd_kstat->kstat->ino);
posix_info->DosAttributes =
S_ISDIR(ksmbd_kstat->kstat->mode) ? ATTR_DIRECTORY_LE : ATTR_ARCHIVE_LE;
if (d_info->hide_dot_file && d_info->name[0] == '.')
posix_info->DosAttributes |= ATTR_HIDDEN_LE;
+ /*
+ * SidBuffer(32) contain two sids(Domain sid(16), UNIX group sid(16)).
+ * UNIX sid(16) = revision(1) + num_subauth(1) + authority(6) +
+ * sub_auth(4 * 1(num_subauth)) + RID(4).
+ */
id_to_sid(from_kuid_munged(&init_user_ns, ksmbd_kstat->kstat->uid),
- SIDNFS_USER, (struct smb_sid *)&posix_info->SidBuffer[0]);
+ SIDUNIX_USER, (struct smb_sid *)&posix_info->SidBuffer[0]);
id_to_sid(from_kgid_munged(&init_user_ns, ksmbd_kstat->kstat->gid),
- SIDNFS_GROUP, (struct smb_sid *)&posix_info->SidBuffer[20]);
+ SIDUNIX_GROUP, (struct smb_sid *)&posix_info->SidBuffer[16]);
memcpy(posix_info->name, conv_name, conv_len);
posix_info->name_len = cpu_to_le32(conv_len);
posix_info->NextEntryOffset = cpu_to_le32(next_entry_offset);
@@ -3743,7 +3840,7 @@ static int reserve_populate_dentry(struct ksmbd_dir_info *d_info,
return -EOPNOTSUPP;
conv_len = (d_info->name_len + 1) * 2;
- next_entry_offset = ALIGN(struct_sz - 1 + conv_len,
+ next_entry_offset = ALIGN(struct_sz + conv_len,
KSMBD_DIR_INFO_ALIGNMENT);
if (next_entry_offset > d_info->out_buf_len) {
@@ -3917,7 +4014,7 @@ int smb2_query_dir(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
struct smb2_query_directory_req *req;
- struct smb2_query_directory_rsp *rsp, *rsp_org;
+ struct smb2_query_directory_rsp *rsp;
struct ksmbd_share_config *share = work->tcon->share_conf;
struct ksmbd_file *dir_fp = NULL;
struct ksmbd_dir_info d_info;
@@ -3927,7 +4024,6 @@ int smb2_query_dir(struct ksmbd_work *work)
int buffer_sz;
struct smb2_query_dir_private query_dir_private = {NULL, };
- rsp_org = work->response_buf;
WORK_BUFFERS(work, req, rsp);
if (ksmbd_override_fsids(work)) {
@@ -3942,9 +4038,7 @@ int smb2_query_dir(struct ksmbd_work *work)
goto err_out2;
}
- dir_fp = ksmbd_lookup_fd_slow(work,
- le64_to_cpu(req->VolatileFileId),
- le64_to_cpu(req->PersistentFileId));
+ dir_fp = ksmbd_lookup_fd_slow(work, req->VolatileFileId, req->PersistentFileId);
if (!dir_fp) {
rc = -EBADF;
goto err_out2;
@@ -3954,8 +4048,7 @@ int smb2_query_dir(struct ksmbd_work *work)
inode_permission(file_mnt_user_ns(dir_fp->filp),
file_inode(dir_fp->filp),
MAY_READ | MAY_EXEC)) {
- pr_err("no right to enumerate directory (%pd)\n",
- dir_fp->filp->f_path.dentry);
+ pr_err("no right to enumerate directory (%pD)\n", dir_fp->filp);
rc = -EACCES;
goto err_out2;
}
@@ -3978,8 +4071,6 @@ int smb2_query_dir(struct ksmbd_work *work)
ksmbd_debug(SMB, "Search pattern is %s\n", srch_ptr);
}
- ksmbd_debug(SMB, "Directory name is %s\n", dir_fp->filename);
-
if (srch_flag & SMB2_REOPEN || srch_flag & SMB2_RESTART_SCANS) {
ksmbd_debug(SMB, "Restart directory scan\n");
generic_file_llseek(dir_fp->filp, 0, SEEK_SET);
@@ -4051,7 +4142,10 @@ int smb2_query_dir(struct ksmbd_work *work)
rsp->OutputBufferOffset = cpu_to_le16(0);
rsp->OutputBufferLength = cpu_to_le32(0);
rsp->Buffer[0] = 0;
- inc_rfc1001_len(rsp_org, 9);
+ rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
+ sizeof(struct smb2_query_directory_rsp));
+ if (rc)
+ goto err_out;
} else {
no_buf_len:
((struct file_directory_info *)
@@ -4063,7 +4157,11 @@ no_buf_len:
rsp->StructureSize = cpu_to_le16(9);
rsp->OutputBufferOffset = cpu_to_le16(72);
rsp->OutputBufferLength = cpu_to_le32(d_info.data_count);
- inc_rfc1001_len(rsp_org, 8 + d_info.data_count);
+ rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
+ offsetof(struct smb2_query_directory_rsp, Buffer) +
+ d_info.data_count);
+ if (rc)
+ goto err_out;
}
kfree(srch_ptr);
@@ -4103,31 +4201,25 @@ err_out2:
* buffer_check_err() - helper function to check buffer errors
* @reqOutputBufferLength: max buffer length expected in command response
* @rsp: query info response buffer contains output buffer length
- * @infoclass_size: query info class response buffer size
+ * @rsp_org: base response buffer pointer in case of chained response
*
* Return: 0 on success, otherwise error
*/
static int buffer_check_err(int reqOutputBufferLength,
- struct smb2_query_info_rsp *rsp, int infoclass_size)
+ struct smb2_query_info_rsp *rsp,
+ void *rsp_org)
{
if (reqOutputBufferLength < le32_to_cpu(rsp->OutputBufferLength)) {
- if (reqOutputBufferLength < infoclass_size) {
- pr_err("Invalid Buffer Size Requested\n");
- rsp->hdr.Status = STATUS_INFO_LENGTH_MISMATCH;
- rsp->hdr.smb2_buf_length = cpu_to_be32(sizeof(struct smb2_hdr) - 4);
- return -EINVAL;
- }
-
- ksmbd_debug(SMB, "Buffer Overflow\n");
- rsp->hdr.Status = STATUS_BUFFER_OVERFLOW;
- rsp->hdr.smb2_buf_length = cpu_to_be32(sizeof(struct smb2_hdr) - 4 +
- reqOutputBufferLength);
- rsp->OutputBufferLength = cpu_to_le32(reqOutputBufferLength);
+ pr_err("Invalid Buffer Size Requested\n");
+ rsp->hdr.Status = STATUS_INFO_LENGTH_MISMATCH;
+ *(__be32 *)rsp_org = cpu_to_be32(sizeof(struct smb2_hdr));
+ return -EINVAL;
}
return 0;
}
-static void get_standard_info_pipe(struct smb2_query_info_rsp *rsp)
+static void get_standard_info_pipe(struct smb2_query_info_rsp *rsp,
+ void *rsp_org)
{
struct smb2_file_standard_info *sinfo;
@@ -4140,10 +4232,10 @@ static void get_standard_info_pipe(struct smb2_query_info_rsp *rsp)
sinfo->Directory = 0;
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_standard_info));
- inc_rfc1001_len(rsp, sizeof(struct smb2_file_standard_info));
}
-static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num)
+static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num,
+ void *rsp_org)
{
struct smb2_file_internal_info *file_info;
@@ -4153,12 +4245,12 @@ static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num)
file_info->IndexNumber = cpu_to_le64(num | (1ULL << 63));
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_internal_info));
- inc_rfc1001_len(rsp, sizeof(struct smb2_file_internal_info));
}
static int smb2_get_info_file_pipe(struct ksmbd_session *sess,
struct smb2_query_info_req *req,
- struct smb2_query_info_rsp *rsp)
+ struct smb2_query_info_rsp *rsp,
+ void *rsp_org)
{
u64 id;
int rc;
@@ -4167,23 +4259,23 @@ static int smb2_get_info_file_pipe(struct ksmbd_session *sess,
* Windows can sometime send query file info request on
* pipe without opening it, checking error condition here
*/
- id = le64_to_cpu(req->VolatileFileId);
+ id = req->VolatileFileId;
if (!ksmbd_session_rpc_method(sess, id))
return -ENOENT;
ksmbd_debug(SMB, "FileInfoClass %u, FileId 0x%llx\n",
- req->FileInfoClass, le64_to_cpu(req->VolatileFileId));
+ req->FileInfoClass, req->VolatileFileId);
switch (req->FileInfoClass) {
case FILE_STANDARD_INFORMATION:
- get_standard_info_pipe(rsp);
+ get_standard_info_pipe(rsp, rsp_org);
rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
- rsp, FILE_STANDARD_INFORMATION_SIZE);
+ rsp, rsp_org);
break;
case FILE_INTERNAL_INFORMATION:
- get_internal_info_pipe(rsp, id);
+ get_internal_info_pipe(rsp, id, rsp_org);
rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
- rsp, FILE_INTERNAL_INFORMATION_SIZE);
+ rsp, rsp_org);
break;
default:
ksmbd_debug(SMB, "smb2_info_file_pipe for %u not supported\n",
@@ -4212,7 +4304,7 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
int rc, name_len, value_len, xattr_list_len, idx;
ssize_t buf_free_len, alignment_bytes, next_offset, rsp_data_cnt = 0;
struct smb2_ea_info_req *ea_req = NULL;
- struct path *path;
+ const struct path *path;
struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
if (!(fp->daccess & FILE_READ_EA_LE)) {
@@ -4289,7 +4381,7 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
name_len -= XATTR_USER_PREFIX_LEN;
- ptr = (char *)(&eainfo->name + name_len + 1);
+ ptr = eainfo->name + name_len + 1;
buf_free_len -= (offsetof(struct smb2_ea_info, name) +
name_len + 1);
/* bailout if xattr can't fit in buf_free_len */
@@ -4351,7 +4443,6 @@ done:
if (rsp_data_cnt == 0)
rsp->hdr.Status = STATUS_NO_EAS_ON_FILE;
rsp->OutputBufferLength = cpu_to_le32(rsp_data_cnt);
- inc_rfc1001_len(rsp_org, rsp_data_cnt);
out:
kvfree(xattr_list);
return rc;
@@ -4366,7 +4457,6 @@ static void get_file_access_info(struct smb2_query_info_rsp *rsp,
file_info->AccessFlags = fp->daccess;
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_access_info));
- inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_access_info));
}
static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
@@ -4396,7 +4486,6 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
basic_info->Pad1 = 0;
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_basic_info));
- inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_basic_info));
return 0;
}
@@ -4421,8 +4510,6 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
sinfo->Directory = S_ISDIR(stat.mode) ? 1 : 0;
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_standard_info));
- inc_rfc1001_len(rsp_org,
- sizeof(struct smb2_file_standard_info));
}
static void get_file_alignment_info(struct smb2_query_info_rsp *rsp,
@@ -4434,8 +4521,6 @@ static void get_file_alignment_info(struct smb2_query_info_rsp *rsp,
file_info->AlignmentRequirement = 0;
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_alignment_info));
- inc_rfc1001_len(rsp_org,
- sizeof(struct smb2_file_alignment_info));
}
static int get_file_all_info(struct ksmbd_work *work,
@@ -4458,9 +4543,9 @@ static int get_file_all_info(struct ksmbd_work *work,
return -EACCES;
}
- filename = convert_to_nt_pathname(fp->filename);
- if (!filename)
- return -ENOMEM;
+ filename = convert_to_nt_pathname(work->tcon->share_conf, &fp->filp->f_path);
+ if (IS_ERR(filename))
+ return PTR_ERR(filename);
inode = file_inode(fp->filp);
generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
@@ -4499,7 +4584,6 @@ static int get_file_all_info(struct ksmbd_work *work,
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_all_info) + conv_len - 1);
kfree(filename);
- inc_rfc1001_len(rsp_org, le32_to_cpu(rsp->OutputBufferLength));
return 0;
}
@@ -4522,7 +4606,6 @@ static void get_file_alternate_info(struct ksmbd_work *work,
file_info->FileNameLength = cpu_to_le32(conv_len);
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_alt_name_info) + conv_len);
- inc_rfc1001_len(rsp_org, le32_to_cpu(rsp->OutputBufferLength));
}
static void get_file_stream_info(struct ksmbd_work *work,
@@ -4534,7 +4617,7 @@ static void get_file_stream_info(struct ksmbd_work *work,
struct smb2_file_stream_info *file_info;
char *stream_name, *xattr_list = NULL, *stream_buf;
struct kstat stat;
- struct path *path = &fp->filp->f_path;
+ const struct path *path = &fp->filp->f_path;
ssize_t xattr_list_len;
int nbytes = 0, streamlen, stream_name_len, next, idx = 0;
int buf_free_len;
@@ -4622,7 +4705,6 @@ out:
kvfree(xattr_list);
rsp->OutputBufferLength = cpu_to_le32(nbytes);
- inc_rfc1001_len(rsp_org, nbytes);
}
static void get_file_internal_info(struct smb2_query_info_rsp *rsp,
@@ -4637,7 +4719,6 @@ static void get_file_internal_info(struct smb2_query_info_rsp *rsp,
file_info->IndexNumber = cpu_to_le64(stat.ino);
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_internal_info));
- inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_internal_info));
}
static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
@@ -4673,7 +4754,6 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
file_info->Reserved = cpu_to_le32(0);
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_ntwrk_info));
- inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_ntwrk_info));
return 0;
}
@@ -4685,7 +4765,6 @@ static void get_file_ea_info(struct smb2_query_info_rsp *rsp, void *rsp_org)
file_info->EASize = 0;
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_ea_info));
- inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_ea_info));
}
static void get_file_position_info(struct smb2_query_info_rsp *rsp,
@@ -4697,7 +4776,6 @@ static void get_file_position_info(struct smb2_query_info_rsp *rsp,
file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_pos_info));
- inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_pos_info));
}
static void get_file_mode_info(struct smb2_query_info_rsp *rsp,
@@ -4709,7 +4787,6 @@ static void get_file_mode_info(struct smb2_query_info_rsp *rsp,
file_info->Mode = fp->coption & FILE_MODE_INFO_MASK;
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_mode_info));
- inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_mode_info));
}
static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
@@ -4731,7 +4808,6 @@ static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_comp_info));
- inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_comp_info));
}
static int get_file_attribute_tag_info(struct smb2_query_info_rsp *rsp,
@@ -4750,16 +4826,17 @@ static int get_file_attribute_tag_info(struct smb2_query_info_rsp *rsp,
file_info->ReparseTag = 0;
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_attr_tag_info));
- inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_attr_tag_info));
return 0;
}
-static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
+static void find_file_posix_info(struct smb2_query_info_rsp *rsp,
struct ksmbd_file *fp, void *rsp_org)
{
struct smb311_posix_qinfo *file_info;
struct inode *inode = file_inode(fp->filp);
+ struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
u64 time;
+ int out_buf_len = sizeof(struct smb311_posix_qinfo) + 32;
file_info = (struct smb311_posix_qinfo *)rsp->Buffer;
file_info->CreationTime = cpu_to_le64(fp->create_time);
@@ -4774,32 +4851,44 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
file_info->EndOfFile = cpu_to_le64(inode->i_size);
file_info->AllocationSize = cpu_to_le64(inode->i_blocks << 9);
file_info->HardLinks = cpu_to_le32(inode->i_nlink);
- file_info->Mode = cpu_to_le32(inode->i_mode);
+ file_info->Mode = cpu_to_le32(inode->i_mode & 0777);
file_info->DeviceId = cpu_to_le32(inode->i_rdev);
- rsp->OutputBufferLength =
- cpu_to_le32(sizeof(struct smb311_posix_qinfo));
- inc_rfc1001_len(rsp_org, sizeof(struct smb311_posix_qinfo));
- return 0;
+
+ /*
+ * Sids(32) contain two sids(Domain sid(16), UNIX group sid(16)).
+ * UNIX sid(16) = revision(1) + num_subauth(1) + authority(6) +
+ * sub_auth(4 * 1(num_subauth)) + RID(4).
+ */
+ id_to_sid(from_kuid_munged(&init_user_ns,
+ i_uid_into_mnt(user_ns, inode)),
+ SIDUNIX_USER,
+ (struct smb_sid *)&file_info->Sids[0]);
+ id_to_sid(from_kgid_munged(&init_user_ns,
+ i_gid_into_mnt(user_ns, inode)),
+ SIDUNIX_GROUP,
+ (struct smb_sid *)&file_info->Sids[16]);
+
+ rsp->OutputBufferLength = cpu_to_le32(out_buf_len);
}
static int smb2_get_info_file(struct ksmbd_work *work,
struct smb2_query_info_req *req,
- struct smb2_query_info_rsp *rsp, void *rsp_org)
+ struct smb2_query_info_rsp *rsp)
{
struct ksmbd_file *fp;
int fileinfoclass = 0;
int rc = 0;
- int file_infoclass_size;
unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
if (test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_PIPE)) {
/* smb2 info file called for pipe */
- return smb2_get_info_file_pipe(work->sess, req, rsp);
+ return smb2_get_info_file_pipe(work->sess, req, rsp,
+ work->response_buf);
}
if (work->next_smb2_rcv_hdr_off) {
- if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ if (!has_file_id(req->VolatileFileId)) {
ksmbd_debug(SMB, "Compound request set FID = %llu\n",
work->compound_fid);
id = work->compound_fid;
@@ -4808,8 +4897,8 @@ static int smb2_get_info_file(struct ksmbd_work *work,
}
if (!has_file_id(id)) {
- id = le64_to_cpu(req->VolatileFileId);
- pid = le64_to_cpu(req->PersistentFileId);
+ id = req->VolatileFileId;
+ pid = req->PersistentFileId;
}
fp = ksmbd_lookup_fd_slow(work, id, pid);
@@ -4820,86 +4909,70 @@ static int smb2_get_info_file(struct ksmbd_work *work,
switch (fileinfoclass) {
case FILE_ACCESS_INFORMATION:
- get_file_access_info(rsp, fp, rsp_org);
- file_infoclass_size = FILE_ACCESS_INFORMATION_SIZE;
+ get_file_access_info(rsp, fp, work->response_buf);
break;
case FILE_BASIC_INFORMATION:
- rc = get_file_basic_info(rsp, fp, rsp_org);
- file_infoclass_size = FILE_BASIC_INFORMATION_SIZE;
+ rc = get_file_basic_info(rsp, fp, work->response_buf);
break;
case FILE_STANDARD_INFORMATION:
- get_file_standard_info(rsp, fp, rsp_org);
- file_infoclass_size = FILE_STANDARD_INFORMATION_SIZE;
+ get_file_standard_info(rsp, fp, work->response_buf);
break;
case FILE_ALIGNMENT_INFORMATION:
- get_file_alignment_info(rsp, rsp_org);
- file_infoclass_size = FILE_ALIGNMENT_INFORMATION_SIZE;
+ get_file_alignment_info(rsp, work->response_buf);
break;
case FILE_ALL_INFORMATION:
- rc = get_file_all_info(work, rsp, fp, rsp_org);
- file_infoclass_size = FILE_ALL_INFORMATION_SIZE;
+ rc = get_file_all_info(work, rsp, fp, work->response_buf);
break;
case FILE_ALTERNATE_NAME_INFORMATION:
- get_file_alternate_info(work, rsp, fp, rsp_org);
- file_infoclass_size = FILE_ALTERNATE_NAME_INFORMATION_SIZE;
+ get_file_alternate_info(work, rsp, fp, work->response_buf);
break;
case FILE_STREAM_INFORMATION:
- get_file_stream_info(work, rsp, fp, rsp_org);
- file_infoclass_size = FILE_STREAM_INFORMATION_SIZE;
+ get_file_stream_info(work, rsp, fp, work->response_buf);
break;
case FILE_INTERNAL_INFORMATION:
- get_file_internal_info(rsp, fp, rsp_org);
- file_infoclass_size = FILE_INTERNAL_INFORMATION_SIZE;
+ get_file_internal_info(rsp, fp, work->response_buf);
break;
case FILE_NETWORK_OPEN_INFORMATION:
- rc = get_file_network_open_info(rsp, fp, rsp_org);
- file_infoclass_size = FILE_NETWORK_OPEN_INFORMATION_SIZE;
+ rc = get_file_network_open_info(rsp, fp, work->response_buf);
break;
case FILE_EA_INFORMATION:
- get_file_ea_info(rsp, rsp_org);
- file_infoclass_size = FILE_EA_INFORMATION_SIZE;
+ get_file_ea_info(rsp, work->response_buf);
break;
case FILE_FULL_EA_INFORMATION:
- rc = smb2_get_ea(work, fp, req, rsp, rsp_org);
- file_infoclass_size = FILE_FULL_EA_INFORMATION_SIZE;
+ rc = smb2_get_ea(work, fp, req, rsp, work->response_buf);
break;
case FILE_POSITION_INFORMATION:
- get_file_position_info(rsp, fp, rsp_org);
- file_infoclass_size = FILE_POSITION_INFORMATION_SIZE;
+ get_file_position_info(rsp, fp, work->response_buf);
break;
case FILE_MODE_INFORMATION:
- get_file_mode_info(rsp, fp, rsp_org);
- file_infoclass_size = FILE_MODE_INFORMATION_SIZE;
+ get_file_mode_info(rsp, fp, work->response_buf);
break;
case FILE_COMPRESSION_INFORMATION:
- get_file_compression_info(rsp, fp, rsp_org);
- file_infoclass_size = FILE_COMPRESSION_INFORMATION_SIZE;
+ get_file_compression_info(rsp, fp, work->response_buf);
break;
case FILE_ATTRIBUTE_TAG_INFORMATION:
- rc = get_file_attribute_tag_info(rsp, fp, rsp_org);
- file_infoclass_size = FILE_ATTRIBUTE_TAG_INFORMATION_SIZE;
+ rc = get_file_attribute_tag_info(rsp, fp, work->response_buf);
break;
case SMB_FIND_FILE_POSIX_INFO:
if (!work->tcon->posix_extensions) {
pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
rc = -EOPNOTSUPP;
} else {
- rc = find_file_posix_info(rsp, fp, rsp_org);
- file_infoclass_size = sizeof(struct smb311_posix_qinfo);
+ find_file_posix_info(rsp, fp, work->response_buf);
}
break;
default:
@@ -4909,15 +4982,14 @@ static int smb2_get_info_file(struct ksmbd_work *work,
}
if (!rc)
rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
- rsp,
- file_infoclass_size);
+ rsp, work->response_buf);
ksmbd_fd_put(work, fp);
return rc;
}
static int smb2_get_info_filesystem(struct ksmbd_work *work,
struct smb2_query_info_req *req,
- struct smb2_query_info_rsp *rsp, void *rsp_org)
+ struct smb2_query_info_rsp *rsp)
{
struct ksmbd_session *sess = work->sess;
struct ksmbd_conn *conn = work->conn;
@@ -4926,7 +4998,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
struct kstatfs stfs;
struct path path;
int rc = 0, len;
- int fs_infoclass_size = 0;
if (!share->path)
return -EIO;
@@ -4956,8 +5027,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info->DeviceType = cpu_to_le32(stfs.f_type);
info->DeviceCharacteristics = cpu_to_le32(0x00000020);
rsp->OutputBufferLength = cpu_to_le32(8);
- inc_rfc1001_len(rsp_org, 8);
- fs_infoclass_size = FS_DEVICE_INFORMATION_SIZE;
break;
}
case FS_ATTRIBUTE_INFORMATION:
@@ -4986,8 +5055,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info->FileSystemNameLen = cpu_to_le32(len);
sz = sizeof(struct filesystem_attribute_info) - 2 + len;
rsp->OutputBufferLength = cpu_to_le32(sz);
- inc_rfc1001_len(rsp_org, sz);
- fs_infoclass_size = FS_ATTRIBUTE_INFORMATION_SIZE;
break;
}
case FS_VOLUME_INFORMATION:
@@ -5014,8 +5081,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info->Reserved = 0;
sz = sizeof(struct filesystem_vol_info) - 2 + len;
rsp->OutputBufferLength = cpu_to_le32(sz);
- inc_rfc1001_len(rsp_org, sz);
- fs_infoclass_size = FS_VOLUME_INFORMATION_SIZE;
break;
}
case FS_SIZE_INFORMATION:
@@ -5028,8 +5093,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info->SectorsPerAllocationUnit = cpu_to_le32(1);
info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
rsp->OutputBufferLength = cpu_to_le32(24);
- inc_rfc1001_len(rsp_org, 24);
- fs_infoclass_size = FS_SIZE_INFORMATION_SIZE;
break;
}
case FS_FULL_SIZE_INFORMATION:
@@ -5045,8 +5108,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info->SectorsPerAllocationUnit = cpu_to_le32(1);
info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
rsp->OutputBufferLength = cpu_to_le32(32);
- inc_rfc1001_len(rsp_org, 32);
- fs_infoclass_size = FS_FULL_SIZE_INFORMATION_SIZE;
break;
}
case FS_OBJECT_ID_INFORMATION:
@@ -5066,8 +5127,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info->extended_info.rel_date = 0;
memcpy(info->extended_info.version_string, "1.1.0", strlen("1.1.0"));
rsp->OutputBufferLength = cpu_to_le32(64);
- inc_rfc1001_len(rsp_org, 64);
- fs_infoclass_size = FS_OBJECT_ID_INFORMATION_SIZE;
break;
}
case FS_SECTOR_SIZE_INFORMATION:
@@ -5089,8 +5148,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info->ByteOffsetForSectorAlignment = 0;
info->ByteOffsetForPartitionAlignment = 0;
rsp->OutputBufferLength = cpu_to_le32(28);
- inc_rfc1001_len(rsp_org, 28);
- fs_infoclass_size = FS_SECTOR_SIZE_INFORMATION_SIZE;
break;
}
case FS_CONTROL_INFORMATION:
@@ -5111,8 +5168,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info->DefaultQuotaLimit = cpu_to_le64(SMB2_NO_FID);
info->Padding = 0;
rsp->OutputBufferLength = cpu_to_le32(48);
- inc_rfc1001_len(rsp_org, 48);
- fs_infoclass_size = FS_CONTROL_INFORMATION_SIZE;
break;
}
case FS_POSIX_INFORMATION:
@@ -5132,8 +5187,6 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info->TotalFileNodes = cpu_to_le64(stfs.f_files);
info->FreeFileNodes = cpu_to_le64(stfs.f_ffree);
rsp->OutputBufferLength = cpu_to_le32(56);
- inc_rfc1001_len(rsp_org, 56);
- fs_infoclass_size = FS_POSIX_INFORMATION_SIZE;
}
break;
}
@@ -5142,15 +5195,14 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
return -EOPNOTSUPP;
}
rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
- rsp,
- fs_infoclass_size);
+ rsp, work->response_buf);
path_put(&path);
return rc;
}
static int smb2_get_info_sec(struct ksmbd_work *work,
struct smb2_query_info_req *req,
- struct smb2_query_info_rsp *rsp, void *rsp_org)
+ struct smb2_query_info_rsp *rsp)
{
struct ksmbd_file *fp;
struct user_namespace *user_ns;
@@ -5177,13 +5229,12 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
secdesclen = sizeof(struct smb_ntsd);
rsp->OutputBufferLength = cpu_to_le32(secdesclen);
- inc_rfc1001_len(rsp_org, secdesclen);
return 0;
}
if (work->next_smb2_rcv_hdr_off) {
- if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ if (!has_file_id(req->VolatileFileId)) {
ksmbd_debug(SMB, "Compound request set FID = %llu\n",
work->compound_fid);
id = work->compound_fid;
@@ -5192,8 +5243,8 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
}
if (!has_file_id(id)) {
- id = le64_to_cpu(req->VolatileFileId);
- pid = le64_to_cpu(req->PersistentFileId);
+ id = req->VolatileFileId;
+ pid = req->PersistentFileId;
}
fp = ksmbd_lookup_fd_slow(work, id, pid);
@@ -5222,7 +5273,6 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
return rc;
rsp->OutputBufferLength = cpu_to_le32(secdesclen);
- inc_rfc1001_len(rsp_org, secdesclen);
return 0;
}
@@ -5235,10 +5285,9 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
int smb2_query_info(struct ksmbd_work *work)
{
struct smb2_query_info_req *req;
- struct smb2_query_info_rsp *rsp, *rsp_org;
+ struct smb2_query_info_rsp *rsp;
int rc = 0;
- rsp_org = work->response_buf;
WORK_BUFFERS(work, req, rsp);
ksmbd_debug(SMB, "GOT query info request\n");
@@ -5246,15 +5295,15 @@ int smb2_query_info(struct ksmbd_work *work)
switch (req->InfoType) {
case SMB2_O_INFO_FILE:
ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n");
- rc = smb2_get_info_file(work, req, rsp, (void *)rsp_org);
+ rc = smb2_get_info_file(work, req, rsp);
break;
case SMB2_O_INFO_FILESYSTEM:
ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILESYSTEM\n");
- rc = smb2_get_info_filesystem(work, req, rsp, (void *)rsp_org);
+ rc = smb2_get_info_filesystem(work, req, rsp);
break;
case SMB2_O_INFO_SECURITY:
ksmbd_debug(SMB, "GOT SMB2_O_INFO_SECURITY\n");
- rc = smb2_get_info_sec(work, req, rsp, (void *)rsp_org);
+ rc = smb2_get_info_sec(work, req, rsp);
break;
default:
ksmbd_debug(SMB, "InfoType %d not supported yet\n",
@@ -5262,6 +5311,14 @@ int smb2_query_info(struct ksmbd_work *work)
rc = -EOPNOTSUPP;
}
+ if (!rc) {
+ rsp->StructureSize = cpu_to_le16(9);
+ rsp->OutputBufferOffset = cpu_to_le16(72);
+ rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
+ offsetof(struct smb2_query_info_rsp, Buffer) +
+ le32_to_cpu(rsp->OutputBufferLength));
+ }
+
if (rc < 0) {
if (rc == -EACCES)
rsp->hdr.Status = STATUS_ACCESS_DENIED;
@@ -5269,6 +5326,8 @@ int smb2_query_info(struct ksmbd_work *work)
rsp->hdr.Status = STATUS_FILE_CLOSED;
else if (rc == -EIO)
rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+ else if (rc == -ENOMEM)
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
else if (rc == -EOPNOTSUPP || rsp->hdr.Status == 0)
rsp->hdr.Status = STATUS_INVALID_INFO_CLASS;
smb2_set_err_rsp(work);
@@ -5277,9 +5336,6 @@ int smb2_query_info(struct ksmbd_work *work)
rc);
return rc;
}
- rsp->StructureSize = cpu_to_le16(9);
- rsp->OutputBufferOffset = cpu_to_le16(72);
- inc_rfc1001_len(rsp_org, 8);
return 0;
}
@@ -5292,10 +5348,12 @@ int smb2_query_info(struct ksmbd_work *work)
static noinline int smb2_close_pipe(struct ksmbd_work *work)
{
u64 id;
- struct smb2_close_req *req = work->request_buf;
- struct smb2_close_rsp *rsp = work->response_buf;
+ struct smb2_close_req *req;
+ struct smb2_close_rsp *rsp;
- id = le64_to_cpu(req->VolatileFileId);
+ WORK_BUFFERS(work, req, rsp);
+
+ id = req->VolatileFileId;
ksmbd_session_rpc_close(work->sess, id);
rsp->StructureSize = cpu_to_le16(60);
@@ -5308,8 +5366,9 @@ static noinline int smb2_close_pipe(struct ksmbd_work *work)
rsp->AllocationSize = 0;
rsp->EndOfFile = 0;
rsp->Attributes = 0;
- inc_rfc1001_len(rsp, 60);
- return 0;
+
+ return ksmbd_iov_pin_rsp(work, (void *)rsp,
+ sizeof(struct smb2_close_rsp));
}
/**
@@ -5324,14 +5383,12 @@ int smb2_close(struct ksmbd_work *work)
u64 sess_id;
struct smb2_close_req *req;
struct smb2_close_rsp *rsp;
- struct smb2_close_rsp *rsp_org;
struct ksmbd_conn *conn = work->conn;
struct ksmbd_file *fp;
struct inode *inode;
u64 time;
int err = 0;
- rsp_org = work->response_buf;
WORK_BUFFERS(work, req, rsp);
if (test_share_config_flag(work->tcon->share_conf,
@@ -5356,7 +5413,7 @@ int smb2_close(struct ksmbd_work *work)
}
if (work->next_smb2_rcv_hdr_off &&
- !has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ !has_file_id(req->VolatileFileId)) {
if (!has_file_id(work->compound_fid)) {
/* file already closed, return FILE_CLOSED */
ksmbd_debug(SMB, "file already closed\n");
@@ -5375,7 +5432,7 @@ int smb2_close(struct ksmbd_work *work)
work->compound_pfid = KSMBD_NO_FID;
}
} else {
- volatile_id = le64_to_cpu(req->VolatileFileId);
+ volatile_id = req->VolatileFileId;
}
ksmbd_debug(SMB, "volatile_id = %llu\n", volatile_id);
@@ -5416,15 +5473,17 @@ int smb2_close(struct ksmbd_work *work)
err = ksmbd_close_fd(work, volatile_id);
out:
+ if (!err)
+ err = ksmbd_iov_pin_rsp(work, (void *)rsp,
+ sizeof(struct smb2_close_rsp));
+
if (err) {
if (rsp->hdr.Status == 0)
rsp->hdr.Status = STATUS_FILE_CLOSED;
smb2_set_err_rsp(work);
- } else {
- inc_rfc1001_len(rsp_org, 60);
}
- return 0;
+ return err;
}
/**
@@ -5435,55 +5494,31 @@ out:
*/
int smb2_echo(struct ksmbd_work *work)
{
- struct smb2_echo_rsp *rsp = work->response_buf;
+ struct smb2_echo_rsp *rsp = smb2_get_msg(work->response_buf);
+
+ if (work->next_smb2_rcv_hdr_off)
+ rsp = ksmbd_resp_buf_next(work);
rsp->StructureSize = cpu_to_le16(4);
rsp->Reserved = 0;
- inc_rfc1001_len(rsp, 4);
- return 0;
+ return ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_echo_rsp));
}
static int smb2_rename(struct ksmbd_work *work,
struct ksmbd_file *fp,
- struct user_namespace *user_ns,
struct smb2_file_rename_info *file_info,
struct nls_table *local_nls)
{
struct ksmbd_share_config *share = fp->tcon->share_conf;
- char *new_name = NULL, *abs_oldname = NULL, *old_name = NULL;
- char *pathname = NULL;
- struct path path;
- bool file_present = true;
- int rc;
+ char *new_name = NULL;
+ int rc, flags = 0;
ksmbd_debug(SMB, "setting FILE_RENAME_INFO\n");
- pathname = kmalloc(PATH_MAX, GFP_KERNEL);
- if (!pathname)
- return -ENOMEM;
-
- abs_oldname = d_path(&fp->filp->f_path, pathname, PATH_MAX);
- if (IS_ERR(abs_oldname)) {
- rc = -EINVAL;
- goto out;
- }
- old_name = strrchr(abs_oldname, '/');
- if (old_name && old_name[1] != '\0') {
- old_name++;
- } else {
- ksmbd_debug(SMB, "can't get last component in path %s\n",
- abs_oldname);
- rc = -ENOENT;
- goto out;
- }
-
- new_name = smb2_get_name(share,
- file_info->FileName,
+ new_name = smb2_get_name(file_info->FileName,
le32_to_cpu(file_info->FileNameLength),
local_nls);
- if (IS_ERR(new_name)) {
- rc = PTR_ERR(new_name);
- goto out;
- }
+ if (IS_ERR(new_name))
+ return PTR_ERR(new_name);
if (strchr(new_name, ':')) {
int s_type;
@@ -5509,10 +5544,10 @@ static int smb2_rename(struct ksmbd_work *work,
if (rc)
goto out;
- rc = ksmbd_vfs_setxattr(user_ns,
- fp->filp->f_path.dentry,
+ rc = ksmbd_vfs_setxattr(file_mnt_user_ns(fp->filp),
+ &fp->filp->f_path,
xattr_stream_name,
- NULL, 0, 0);
+ NULL, 0, 0, true);
if (rc < 0) {
pr_err("failed to store stream name in xattr: %d\n",
rc);
@@ -5524,47 +5559,18 @@ static int smb2_rename(struct ksmbd_work *work,
}
ksmbd_debug(SMB, "new name %s\n", new_name);
- rc = ksmbd_vfs_kern_path(work, new_name, LOOKUP_NO_SYMLINKS, &path, 1);
- if (rc) {
- if (rc != -ENOENT)
- goto out;
- file_present = false;
- } else {
- path_put(&path);
- }
-
if (ksmbd_share_veto_filename(share, new_name)) {
rc = -ENOENT;
ksmbd_debug(SMB, "Can't rename vetoed file: %s\n", new_name);
goto out;
}
- if (file_info->ReplaceIfExists) {
- if (file_present) {
- rc = ksmbd_vfs_remove_file(work, new_name);
- if (rc) {
- if (rc != -ENOTEMPTY)
- rc = -EINVAL;
- ksmbd_debug(SMB, "cannot delete %s, rc %d\n",
- new_name, rc);
- goto out;
- }
- }
- } else {
- if (file_present &&
- strncmp(old_name, path.dentry->d_name.name, strlen(old_name))) {
- rc = -EEXIST;
- ksmbd_debug(SMB,
- "cannot rename already existing file\n");
- goto out;
- }
- }
+ if (!file_info->ReplaceIfExists)
+ flags = RENAME_NOREPLACE;
- rc = ksmbd_vfs_fp_rename(work, fp, new_name);
+ rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags);
out:
- kfree(pathname);
- if (!IS_ERR(new_name))
- kfree(new_name);
+ kfree(new_name);
return rc;
}
@@ -5575,8 +5581,8 @@ static int smb2_create_link(struct ksmbd_work *work,
struct nls_table *local_nls)
{
char *link_name = NULL, *target_name = NULL, *pathname = NULL;
- struct path path;
- bool file_present = true;
+ struct path path, parent_path;
+ bool file_present = false;
int rc;
if (buf_len < (u64)sizeof(struct smb2_file_link_info) +
@@ -5588,8 +5594,7 @@ static int smb2_create_link(struct ksmbd_work *work,
if (!pathname)
return -ENOMEM;
- link_name = smb2_get_name(share,
- file_info->FileName,
+ link_name = smb2_get_name(file_info->FileName,
le32_to_cpu(file_info->FileNameLength),
local_nls);
if (IS_ERR(link_name) || S_ISDIR(file_inode(filp)->i_mode)) {
@@ -5598,25 +5603,24 @@ static int smb2_create_link(struct ksmbd_work *work,
}
ksmbd_debug(SMB, "link name is %s\n", link_name);
- target_name = d_path(&filp->f_path, pathname, PATH_MAX);
+ target_name = file_path(filp, pathname, PATH_MAX);
if (IS_ERR(target_name)) {
rc = -EINVAL;
goto out;
}
ksmbd_debug(SMB, "target name is %s\n", target_name);
- rc = ksmbd_vfs_kern_path(work, link_name, LOOKUP_NO_SYMLINKS, &path, 0);
+ rc = ksmbd_vfs_kern_path_locked(work, link_name, LOOKUP_NO_SYMLINKS,
+ &parent_path, &path, 0);
if (rc) {
if (rc != -ENOENT)
goto out;
- file_present = false;
- } else {
- path_put(&path);
- }
+ } else
+ file_present = true;
if (file_info->ReplaceIfExists) {
if (file_present) {
- rc = ksmbd_vfs_remove_file(work, link_name);
+ rc = ksmbd_vfs_remove_file(work, &path);
if (rc) {
rc = -EINVAL;
ksmbd_debug(SMB, "cannot delete %s\n",
@@ -5636,6 +5640,9 @@ static int smb2_create_link(struct ksmbd_work *work,
if (rc)
rc = -EINVAL;
out:
+ if (file_present)
+ ksmbd_vfs_kern_path_unlock(&parent_path, &path);
+
if (!IS_ERR(link_name))
kfree(link_name);
kfree(pathname);
@@ -5702,8 +5709,8 @@ static int set_file_basic_info(struct ksmbd_file *fp,
da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
XATTR_DOSINFO_ITIME;
- rc = ksmbd_vfs_set_dos_attrib_xattr(user_ns,
- filp->f_path.dentry, &da);
+ rc = ksmbd_vfs_set_dos_attrib_xattr(user_ns, &filp->f_path, &da,
+ true);
if (rc)
ksmbd_debug(SMB,
"failed to restore file attribute in EA\n");
@@ -5767,8 +5774,7 @@ static int set_file_allocation_info(struct ksmbd_work *work,
size = i_size_read(inode);
rc = ksmbd_vfs_truncate(work, fp, alloc_blks * 512);
if (rc) {
- pr_err("truncate failed! filename : %s, err %d\n",
- fp->filename, rc);
+ pr_err("truncate failed!, err %d\n", rc);
return rc;
}
if (size < alloc_blks * 512)
@@ -5798,12 +5804,10 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp,
* truncated range.
*/
if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) {
- ksmbd_debug(SMB, "filename : %s truncated to newsize %lld\n",
- fp->filename, newsize);
+ ksmbd_debug(SMB, "truncated to newsize %lld\n", newsize);
rc = ksmbd_vfs_truncate(work, fp, newsize);
if (rc) {
- ksmbd_debug(SMB, "truncate failed! filename : %s err %d\n",
- fp->filename, rc);
+ ksmbd_debug(SMB, "truncate failed!, err %d\n", rc);
if (rc != -EAGAIN)
rc = -EBADF;
return rc;
@@ -5816,12 +5820,6 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
struct smb2_file_rename_info *rename_info,
unsigned int buf_len)
{
- struct user_namespace *user_ns;
- struct ksmbd_file *parent_fp;
- struct dentry *parent;
- struct dentry *dentry = fp->filp->f_path.dentry;
- int ret;
-
if (!(fp->daccess & FILE_DELETE_LE)) {
pr_err("no right to delete : 0x%x\n", fp->daccess);
return -EACCES;
@@ -5831,32 +5829,10 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
le32_to_cpu(rename_info->FileNameLength))
return -EINVAL;
- user_ns = file_mnt_user_ns(fp->filp);
- if (ksmbd_stream_fd(fp))
- goto next;
-
- parent = dget_parent(dentry);
- ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
- if (ret) {
- dput(parent);
- return ret;
- }
-
- parent_fp = ksmbd_lookup_fd_inode(d_inode(parent));
- inode_unlock(d_inode(parent));
- dput(parent);
+ if (!le32_to_cpu(rename_info->FileNameLength))
+ return -EINVAL;
- if (parent_fp) {
- if (parent_fp->daccess & FILE_DELETE_LE) {
- pr_err("parent dir is opened with delete access\n");
- ksmbd_fd_put(work, parent_fp);
- return -ESHARE;
- }
- ksmbd_fd_put(work, parent_fp);
- }
-next:
- return smb2_rename(work, fp, user_ns, rename_info,
- work->conn->local_nls);
+ return smb2_rename(work, fp, rename_info, work->conn->local_nls);
}
static int set_file_disposition_info(struct ksmbd_file *fp,
@@ -5931,7 +5907,7 @@ static int set_file_mode_info(struct ksmbd_file *fp,
* smb2_set_info_file() - handler for smb2 set info command
* @work: smb work containing set info command buffer
* @fp: ksmbd_file pointer
- * @info_class: smb2 set info class
+ * @req: request buffer pointer
* @share: ksmbd_share_config pointer
*
* Return: 0 on success, otherwise error
@@ -6048,7 +6024,7 @@ static int smb2_set_info_sec(struct ksmbd_file *fp, int addition_info,
fp->saccess |= FILE_SHARE_DELETE_LE;
return set_info_sec(fp->conn, fp->tcon, &fp->filp->f_path, pntsd,
- buf_len, false);
+ buf_len, false, true);
}
/**
@@ -6060,31 +6036,30 @@ static int smb2_set_info_sec(struct ksmbd_file *fp, int addition_info,
int smb2_set_info(struct ksmbd_work *work)
{
struct smb2_set_info_req *req;
- struct smb2_set_info_rsp *rsp, *rsp_org;
+ struct smb2_set_info_rsp *rsp;
struct ksmbd_file *fp;
int rc = 0;
unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
ksmbd_debug(SMB, "Received set info request\n");
- rsp_org = work->response_buf;
if (work->next_smb2_rcv_hdr_off) {
req = ksmbd_req_buf_next(work);
rsp = ksmbd_resp_buf_next(work);
- if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ if (!has_file_id(req->VolatileFileId)) {
ksmbd_debug(SMB, "Compound request set FID = %llu\n",
work->compound_fid);
id = work->compound_fid;
pid = work->compound_pfid;
}
} else {
- req = work->request_buf;
- rsp = work->response_buf;
+ req = smb2_get_msg(work->request_buf);
+ rsp = smb2_get_msg(work->response_buf);
}
if (!has_file_id(id)) {
- id = le64_to_cpu(req->VolatileFileId);
- pid = le64_to_cpu(req->PersistentFileId);
+ id = req->VolatileFileId;
+ pid = req->PersistentFileId;
}
fp = ksmbd_lookup_fd_slow(work, id, pid);
@@ -6119,7 +6094,10 @@ int smb2_set_info(struct ksmbd_work *work)
goto err_out;
rsp->StructureSize = cpu_to_le16(2);
- inc_rfc1001_len(rsp_org, 2);
+ rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
+ sizeof(struct smb2_set_info_rsp));
+ if (rc)
+ goto err_out;
ksmbd_fd_put(work, fp);
return 0;
@@ -6159,33 +6137,43 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
int nbytes = 0, err;
u64 id;
struct ksmbd_rpc_command *rpc_resp;
- struct smb2_read_req *req = work->request_buf;
- struct smb2_read_rsp *rsp = work->response_buf;
+ struct smb2_read_req *req;
+ struct smb2_read_rsp *rsp;
- id = le64_to_cpu(req->VolatileFileId);
+ WORK_BUFFERS(work, req, rsp);
+
+ id = req->VolatileFileId;
- inc_rfc1001_len(rsp, 16);
rpc_resp = ksmbd_rpc_read(work->sess, id);
if (rpc_resp) {
+ void *aux_payload_buf;
+
if (rpc_resp->flags != KSMBD_RPC_OK) {
err = -EINVAL;
goto out;
}
- work->aux_payload_buf =
- kvmalloc(rpc_resp->payload_sz, GFP_KERNEL | __GFP_ZERO);
- if (!work->aux_payload_buf) {
+ aux_payload_buf =
+ kvmalloc(rpc_resp->payload_sz, GFP_KERNEL);
+ if (!aux_payload_buf) {
err = -ENOMEM;
goto out;
}
- memcpy(work->aux_payload_buf, rpc_resp->payload,
- rpc_resp->payload_sz);
+ memcpy(aux_payload_buf, rpc_resp->payload, rpc_resp->payload_sz);
nbytes = rpc_resp->payload_sz;
- work->resp_hdr_sz = get_rfc1002_len(rsp) + 4;
- work->aux_payload_sz = nbytes;
+ err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
+ offsetof(struct smb2_read_rsp, Buffer),
+ aux_payload_buf, nbytes);
+ if (err)
+ goto out;
kvfree(rpc_resp);
+ } else {
+ err = ksmbd_iov_pin_rsp(work, (void *)rsp,
+ offsetof(struct smb2_read_rsp, Buffer));
+ if (err)
+ goto out;
}
rsp->StructureSize = cpu_to_le16(17);
@@ -6194,7 +6182,6 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
rsp->DataLength = cpu_to_le32(nbytes);
rsp->DataRemaining = 0;
rsp->Reserved2 = 0;
- inc_rfc1001_len(rsp, nbytes);
return 0;
out:
@@ -6204,30 +6191,46 @@ out:
return err;
}
-static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
- struct smb2_read_req *req, void *data_buf,
- size_t length)
+static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work,
+ struct smb2_buffer_desc_v1 *desc,
+ __le32 Channel,
+ __le16 ChannelInfoLength)
{
- struct smb2_buffer_desc_v1 *desc =
- (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
- int err;
+ unsigned int i, ch_count;
if (work->conn->dialect == SMB30_PROT_ID &&
- req->Channel != SMB2_CHANNEL_RDMA_V1)
+ Channel != SMB2_CHANNEL_RDMA_V1)
return -EINVAL;
- if (req->ReadChannelInfoOffset == 0 ||
- le16_to_cpu(req->ReadChannelInfoLength) < sizeof(*desc))
+ ch_count = le16_to_cpu(ChannelInfoLength) / sizeof(*desc);
+ if (ksmbd_debug_types & KSMBD_DEBUG_RDMA) {
+ for (i = 0; i < ch_count; i++) {
+ pr_info("RDMA r/w request %#x: token %#x, length %#x\n",
+ i,
+ le32_to_cpu(desc[i].token),
+ le32_to_cpu(desc[i].length));
+ }
+ }
+ if (!ch_count)
return -EINVAL;
work->need_invalidate_rkey =
- (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
- work->remote_key = le32_to_cpu(desc->token);
+ (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
+ if (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE)
+ work->remote_key = le32_to_cpu(desc->token);
+ return 0;
+}
+
+static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
+ struct smb2_read_req *req, void *data_buf,
+ size_t length)
+{
+ int err;
err = ksmbd_conn_rdma_write(work->conn, data_buf, length,
- le32_to_cpu(desc->token),
- le64_to_cpu(desc->offset),
- le32_to_cpu(desc->length));
+ (struct smb2_buffer_desc_v1 *)
+ ((char *)req + le16_to_cpu(req->ReadChannelInfoOffset)),
+ le16_to_cpu(req->ReadChannelInfoLength));
if (err)
return err;
@@ -6244,15 +6247,16 @@ int smb2_read(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
struct smb2_read_req *req;
- struct smb2_read_rsp *rsp, *rsp_org;
- struct ksmbd_file *fp;
+ struct smb2_read_rsp *rsp;
+ struct ksmbd_file *fp = NULL;
loff_t offset;
size_t length, mincount;
ssize_t nbytes = 0, remain_bytes = 0;
int err = 0;
-
- rsp_org = work->response_buf;
- WORK_BUFFERS(work, req, rsp);
+ bool is_rdma_channel = false;
+ unsigned int max_read_size = conn->vals->max_read_size;
+ unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+ void *aux_payload_buf;
if (test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_PIPE)) {
@@ -6260,8 +6264,48 @@ int smb2_read(struct ksmbd_work *work)
return smb2_read_pipe(work);
}
- fp = ksmbd_lookup_fd_slow(work, le64_to_cpu(req->VolatileFileId),
- le64_to_cpu(req->PersistentFileId));
+ if (work->next_smb2_rcv_hdr_off) {
+ req = ksmbd_req_buf_next(work);
+ rsp = ksmbd_resp_buf_next(work);
+ if (!has_file_id(req->VolatileFileId)) {
+ ksmbd_debug(SMB, "Compound request set FID = %llu\n",
+ work->compound_fid);
+ id = work->compound_fid;
+ pid = work->compound_pfid;
+ }
+ } else {
+ req = smb2_get_msg(work->request_buf);
+ rsp = smb2_get_msg(work->response_buf);
+ }
+
+ if (!has_file_id(id)) {
+ id = req->VolatileFileId;
+ pid = req->PersistentFileId;
+ }
+
+ if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
+ req->Channel == SMB2_CHANNEL_RDMA_V1) {
+ is_rdma_channel = true;
+ max_read_size = get_smbd_max_read_write_size();
+ }
+
+ if (is_rdma_channel == true) {
+ unsigned int ch_offset = le16_to_cpu(req->ReadChannelInfoOffset);
+
+ if (ch_offset < offsetof(struct smb2_read_req, Buffer)) {
+ err = -EINVAL;
+ goto out;
+ }
+ err = smb2_set_remote_key_for_rdma(work,
+ (struct smb2_buffer_desc_v1 *)
+ ((char *)req + ch_offset),
+ req->Channel,
+ req->ReadChannelInfoLength);
+ if (err)
+ goto out;
+ }
+
+ fp = ksmbd_lookup_fd_slow(work, id, pid);
if (!fp) {
err = -ENOENT;
goto out;
@@ -6277,31 +6321,30 @@ int smb2_read(struct ksmbd_work *work)
length = le32_to_cpu(req->Length);
mincount = le32_to_cpu(req->MinimumCount);
- if (length > conn->vals->max_read_size) {
+ if (length > max_read_size) {
ksmbd_debug(SMB, "limiting read size to max size(%u)\n",
- conn->vals->max_read_size);
+ max_read_size);
err = -EINVAL;
goto out;
}
- ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n",
- fp->filp->f_path.dentry, offset, length);
+ ksmbd_debug(SMB, "filename %pD, offset %lld, len %zu\n",
+ fp->filp, offset, length);
- work->aux_payload_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
- if (!work->aux_payload_buf) {
+ aux_payload_buf = kvzalloc(length, GFP_KERNEL);
+ if (!aux_payload_buf) {
err = -ENOMEM;
goto out;
}
- nbytes = ksmbd_vfs_read(work, fp, length, &offset);
+ nbytes = ksmbd_vfs_read(work, fp, length, &offset, aux_payload_buf);
if (nbytes < 0) {
err = nbytes;
goto out;
}
if ((nbytes == 0 && length != 0) || nbytes < mincount) {
- kvfree(work->aux_payload_buf);
- work->aux_payload_buf = NULL;
+ kvfree(aux_payload_buf);
rsp->hdr.Status = STATUS_END_OF_FILE;
smb2_set_err_rsp(work);
ksmbd_fd_put(work, fp);
@@ -6311,15 +6354,13 @@ int smb2_read(struct ksmbd_work *work)
ksmbd_debug(SMB, "nbytes %zu, offset %lld mincount %zu\n",
nbytes, offset, mincount);
- if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
- req->Channel == SMB2_CHANNEL_RDMA_V1) {
+ if (is_rdma_channel == true) {
/* write data to the client using rdma channel */
remain_bytes = smb2_read_rdma_channel(work, req,
- work->aux_payload_buf,
+ aux_payload_buf,
nbytes);
- kvfree(work->aux_payload_buf);
- work->aux_payload_buf = NULL;
-
+ kvfree(aux_payload_buf);
+ aux_payload_buf = NULL;
nbytes = 0;
if (remain_bytes < 0) {
err = (int)remain_bytes;
@@ -6333,10 +6374,11 @@ int smb2_read(struct ksmbd_work *work)
rsp->DataLength = cpu_to_le32(nbytes);
rsp->DataRemaining = cpu_to_le32(remain_bytes);
rsp->Reserved2 = 0;
- inc_rfc1001_len(rsp_org, 16);
- work->resp_hdr_sz = get_rfc1002_len(rsp_org) + 4;
- work->aux_payload_sz = nbytes;
- inc_rfc1001_len(rsp_org, nbytes);
+ err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
+ offsetof(struct smb2_read_rsp, Buffer),
+ aux_payload_buf, nbytes);
+ if (err)
+ goto out;
ksmbd_fd_put(work, fp);
return 0;
@@ -6371,33 +6413,31 @@ out:
*/
static noinline int smb2_write_pipe(struct ksmbd_work *work)
{
- struct smb2_write_req *req = work->request_buf;
- struct smb2_write_rsp *rsp = work->response_buf;
+ struct smb2_write_req *req;
+ struct smb2_write_rsp *rsp;
struct ksmbd_rpc_command *rpc_resp;
u64 id = 0;
int err = 0, ret = 0;
char *data_buf;
size_t length;
- length = le32_to_cpu(req->Length);
- id = le64_to_cpu(req->VolatileFileId);
+ WORK_BUFFERS(work, req, rsp);
- if (le16_to_cpu(req->DataOffset) ==
- (offsetof(struct smb2_write_req, Buffer) - 4)) {
- data_buf = (char *)&req->Buffer[0];
- } else {
- if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) {
- pr_err("invalid write data offset %u, smb_len %u\n",
- le16_to_cpu(req->DataOffset),
- get_rfc1002_len(req));
- err = -EINVAL;
- goto out;
- }
+ length = le32_to_cpu(req->Length);
+ id = req->VolatileFileId;
- data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
- le16_to_cpu(req->DataOffset));
+ if ((u64)le16_to_cpu(req->DataOffset) + length >
+ get_rfc1002_len(work->request_buf)) {
+ pr_err("invalid write data offset %u, smb_len %u\n",
+ le16_to_cpu(req->DataOffset),
+ get_rfc1002_len(work->request_buf));
+ err = -EINVAL;
+ goto out;
}
+ data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
+ le16_to_cpu(req->DataOffset));
+
rpc_resp = ksmbd_rpc_write(work->sess, id, data_buf, length);
if (rpc_resp) {
if (rpc_resp->flags == KSMBD_RPC_ENOTIMPLEMENTED) {
@@ -6421,8 +6461,8 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work)
rsp->DataLength = cpu_to_le32(length);
rsp->DataRemaining = 0;
rsp->Reserved2 = 0;
- inc_rfc1001_len(rsp, 16);
- return 0;
+ err = ksmbd_iov_pin_rsp(work, (void *)rsp,
+ offsetof(struct smb2_write_rsp, Buffer));
out:
if (err) {
rsp->hdr.Status = STATUS_INVALID_HANDLE;
@@ -6437,36 +6477,18 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work,
struct ksmbd_file *fp,
loff_t offset, size_t length, bool sync)
{
- struct smb2_buffer_desc_v1 *desc;
char *data_buf;
int ret;
ssize_t nbytes;
- desc = (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
-
- if (work->conn->dialect == SMB30_PROT_ID &&
- req->Channel != SMB2_CHANNEL_RDMA_V1)
- return -EINVAL;
-
- if (req->Length != 0 || req->DataOffset != 0)
- return -EINVAL;
-
- if (req->WriteChannelInfoOffset == 0 ||
- le16_to_cpu(req->WriteChannelInfoLength) < sizeof(*desc))
- return -EINVAL;
-
- work->need_invalidate_rkey =
- (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
- work->remote_key = le32_to_cpu(desc->token);
-
- data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
+ data_buf = kvzalloc(length, GFP_KERNEL);
if (!data_buf)
return -ENOMEM;
ret = ksmbd_conn_rdma_read(work->conn, data_buf, length,
- le32_to_cpu(desc->token),
- le64_to_cpu(desc->offset),
- le32_to_cpu(desc->length));
+ (struct smb2_buffer_desc_v1 *)
+ ((char *)req + le16_to_cpu(req->WriteChannelInfoOffset)),
+ le16_to_cpu(req->WriteChannelInfoLength));
if (ret < 0) {
kvfree(data_buf);
return ret;
@@ -6489,16 +6511,16 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work,
int smb2_write(struct ksmbd_work *work)
{
struct smb2_write_req *req;
- struct smb2_write_rsp *rsp, *rsp_org;
+ struct smb2_write_rsp *rsp;
struct ksmbd_file *fp = NULL;
loff_t offset;
size_t length;
ssize_t nbytes;
char *data_buf;
- bool writethrough = false;
+ bool writethrough = false, is_rdma_channel = false;
int err = 0;
+ unsigned int max_write_size = work->conn->vals->max_write_size;
- rsp_org = work->response_buf;
WORK_BUFFERS(work, req, rsp);
if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_PIPE)) {
@@ -6506,14 +6528,40 @@ int smb2_write(struct ksmbd_work *work)
return smb2_write_pipe(work);
}
+ offset = le64_to_cpu(req->Offset);
+ length = le32_to_cpu(req->Length);
+
+ if (req->Channel == SMB2_CHANNEL_RDMA_V1 ||
+ req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
+ is_rdma_channel = true;
+ max_write_size = get_smbd_max_read_write_size();
+ length = le32_to_cpu(req->RemainingBytes);
+ }
+
+ if (is_rdma_channel == true) {
+ unsigned int ch_offset = le16_to_cpu(req->WriteChannelInfoOffset);
+
+ if (req->Length != 0 || req->DataOffset != 0 ||
+ ch_offset < offsetof(struct smb2_write_req, Buffer)) {
+ err = -EINVAL;
+ goto out;
+ }
+ err = smb2_set_remote_key_for_rdma(work,
+ (struct smb2_buffer_desc_v1 *)
+ ((char *)req + ch_offset),
+ req->Channel,
+ req->WriteChannelInfoLength);
+ if (err)
+ goto out;
+ }
+
if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
ksmbd_debug(SMB, "User does not have write permission\n");
err = -EACCES;
goto out;
}
- fp = ksmbd_lookup_fd_slow(work, le64_to_cpu(req->VolatileFileId),
- le64_to_cpu(req->PersistentFileId));
+ fp = ksmbd_lookup_fd_slow(work, req->VolatileFileId, req->PersistentFileId);
if (!fp) {
err = -ENOENT;
goto out;
@@ -6525,41 +6573,29 @@ int smb2_write(struct ksmbd_work *work)
goto out;
}
- offset = le64_to_cpu(req->Offset);
- length = le32_to_cpu(req->Length);
-
- if (length > work->conn->vals->max_write_size) {
+ if (length > max_write_size) {
ksmbd_debug(SMB, "limiting write size to max size(%u)\n",
- work->conn->vals->max_write_size);
+ max_write_size);
err = -EINVAL;
goto out;
}
+ ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags));
if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
writethrough = true;
- if (req->Channel != SMB2_CHANNEL_RDMA_V1 &&
- req->Channel != SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
- if (le16_to_cpu(req->DataOffset) ==
- (offsetof(struct smb2_write_req, Buffer) - 4)) {
- data_buf = (char *)&req->Buffer[0];
- } else {
- if (le16_to_cpu(req->DataOffset) <
- offsetof(struct smb2_write_req, Buffer)) {
- err = -EINVAL;
- goto out;
- }
-
- data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
- le16_to_cpu(req->DataOffset));
+ if (is_rdma_channel == false) {
+ if (le16_to_cpu(req->DataOffset) <
+ offsetof(struct smb2_write_req, Buffer)) {
+ err = -EINVAL;
+ goto out;
}
- ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags));
- if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
- writethrough = true;
+ data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
+ le16_to_cpu(req->DataOffset));
- ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n",
- fp->filp->f_path.dentry, offset, length);
+ ksmbd_debug(SMB, "filename %pD, offset %lld, len %zu\n",
+ fp->filp, offset, length);
err = ksmbd_vfs_write(work, fp, data_buf, length, &offset,
writethrough, &nbytes);
if (err < 0)
@@ -6568,8 +6604,7 @@ int smb2_write(struct ksmbd_work *work)
/* read data from the client using rdma channel, and
* write the data.
*/
- nbytes = smb2_write_rdma_channel(work, req, fp, offset,
- le32_to_cpu(req->RemainingBytes),
+ nbytes = smb2_write_rdma_channel(work, req, fp, offset, length,
writethrough);
if (nbytes < 0) {
err = (int)nbytes;
@@ -6583,7 +6618,9 @@ int smb2_write(struct ksmbd_work *work)
rsp->DataLength = cpu_to_le32(nbytes);
rsp->DataRemaining = 0;
rsp->Reserved2 = 0;
- inc_rfc1001_len(rsp_org, 16);
+ err = ksmbd_iov_pin_rsp(work, rsp, offsetof(struct smb2_write_rsp, Buffer));
+ if (err)
+ goto out;
ksmbd_fd_put(work, fp);
return 0;
@@ -6617,32 +6654,24 @@ out:
int smb2_flush(struct ksmbd_work *work)
{
struct smb2_flush_req *req;
- struct smb2_flush_rsp *rsp, *rsp_org;
+ struct smb2_flush_rsp *rsp;
int err;
- rsp_org = work->response_buf;
WORK_BUFFERS(work, req, rsp);
- ksmbd_debug(SMB, "SMB2_FLUSH called for fid %llu\n",
- le64_to_cpu(req->VolatileFileId));
+ ksmbd_debug(SMB, "SMB2_FLUSH called for fid %llu\n", req->VolatileFileId);
- err = ksmbd_vfs_fsync(work,
- le64_to_cpu(req->VolatileFileId),
- le64_to_cpu(req->PersistentFileId));
+ err = ksmbd_vfs_fsync(work, req->VolatileFileId, req->PersistentFileId);
if (err)
goto out;
rsp->StructureSize = cpu_to_le16(4);
rsp->Reserved = 0;
- inc_rfc1001_len(rsp_org, 4);
- return 0;
+ return ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_flush_rsp));
out:
- if (err) {
- rsp->hdr.Status = STATUS_INVALID_HANDLE;
- smb2_set_err_rsp(work);
- }
-
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ smb2_set_err_rsp(work);
return err;
}
@@ -6655,12 +6684,14 @@ out:
int smb2_cancel(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
- struct smb2_hdr *hdr = work->request_buf;
+ struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
struct smb2_hdr *chdr;
- struct ksmbd_work *cancel_work = NULL;
- int canceled = 0;
+ struct ksmbd_work *iter;
struct list_head *command_list;
+ if (work->next_smb2_rcv_hdr_off)
+ hdr = ksmbd_resp_buf_next(work);
+
ksmbd_debug(SMB, "smb2 cancel called on mid %llu, async flags 0x%x\n",
hdr->MessageId, hdr->Flags);
@@ -6668,11 +6699,11 @@ int smb2_cancel(struct ksmbd_work *work)
command_list = &conn->async_requests;
spin_lock(&conn->request_lock);
- list_for_each_entry(cancel_work, command_list,
+ list_for_each_entry(iter, command_list,
async_request_entry) {
- chdr = cancel_work->request_buf;
+ chdr = smb2_get_msg(iter->request_buf);
- if (cancel_work->async_id !=
+ if (iter->async_id !=
le64_to_cpu(hdr->Id.AsyncId))
continue;
@@ -6680,7 +6711,9 @@ int smb2_cancel(struct ksmbd_work *work)
"smb2 with AsyncId %llu cancelled command = 0x%x\n",
le64_to_cpu(hdr->Id.AsyncId),
le16_to_cpu(chdr->Command));
- canceled = 1;
+ iter->state = KSMBD_WORK_CANCELLED;
+ if (iter->cancel_fn)
+ iter->cancel_fn(iter->cancel_argv);
break;
}
spin_unlock(&conn->request_lock);
@@ -6688,29 +6721,23 @@ int smb2_cancel(struct ksmbd_work *work)
command_list = &conn->requests;
spin_lock(&conn->request_lock);
- list_for_each_entry(cancel_work, command_list, request_entry) {
- chdr = cancel_work->request_buf;
+ list_for_each_entry(iter, command_list, request_entry) {
+ chdr = smb2_get_msg(iter->request_buf);
if (chdr->MessageId != hdr->MessageId ||
- cancel_work == work)
+ iter == work)
continue;
ksmbd_debug(SMB,
"smb2 with mid %llu cancelled command = 0x%x\n",
le64_to_cpu(hdr->MessageId),
le16_to_cpu(chdr->Command));
- canceled = 1;
+ iter->state = KSMBD_WORK_CANCELLED;
break;
}
spin_unlock(&conn->request_lock);
}
- if (canceled) {
- cancel_work->state = KSMBD_WORK_CANCELLED;
- if (cancel_work->cancel_fn)
- cancel_work->cancel_fn(cancel_work->cancel_argv);
- }
-
/* For SMB2_CANCEL command itself send no response*/
work->send_no_response = 1;
return 0;
@@ -6770,7 +6797,7 @@ static int smb2_set_flock_flags(struct file_lock *flock, int flags)
case SMB2_LOCKFLAG_UNLOCK:
ksmbd_debug(SMB, "received unlock request\n");
flock->fl_type = F_UNLCK;
- cmd = 0;
+ cmd = F_SETLK;
break;
}
@@ -6824,8 +6851,8 @@ static inline bool lock_defer_pending(struct file_lock *fl)
*/
int smb2_lock(struct ksmbd_work *work)
{
- struct smb2_lock_req *req = work->request_buf;
- struct smb2_lock_rsp *rsp = work->response_buf;
+ struct smb2_lock_req *req;
+ struct smb2_lock_rsp *rsp;
struct smb2_lock_element *lock_ele;
struct ksmbd_file *fp = NULL;
struct file_lock *flock = NULL;
@@ -6842,13 +6869,12 @@ int smb2_lock(struct ksmbd_work *work)
LIST_HEAD(rollback_list);
int prior_lock = 0;
+ WORK_BUFFERS(work, req, rsp);
+
ksmbd_debug(SMB, "Received lock request\n");
- fp = ksmbd_lookup_fd_slow(work,
- le64_to_cpu(req->VolatileFileId),
- le64_to_cpu(req->PersistentFileId));
+ fp = ksmbd_lookup_fd_slow(work, req->VolatileFileId, req->PersistentFileId);
if (!fp) {
- ksmbd_debug(SMB, "Invalid file id for lock : %llu\n",
- le64_to_cpu(req->VolatileFileId));
+ ksmbd_debug(SMB, "Invalid file id for lock : %llu\n", req->VolatileFileId);
err = -ENOENT;
goto out2;
}
@@ -6877,6 +6903,7 @@ int smb2_lock(struct ksmbd_work *work)
if (lock_start > U64_MAX - lock_length) {
pr_err("Invalid lock range requested\n");
rsp->hdr.Status = STATUS_INVALID_LOCK_RANGE;
+ locks_free_lock(flock);
goto out;
}
@@ -6896,6 +6923,7 @@ int smb2_lock(struct ksmbd_work *work)
"the end offset(%llx) is smaller than the start offset(%llx)\n",
flock->fl_end, flock->fl_start);
rsp->hdr.Status = STATUS_INVALID_LOCK_RANGE;
+ locks_free_lock(flock);
goto out;
}
@@ -6907,6 +6935,7 @@ int smb2_lock(struct ksmbd_work *work)
flock->fl_type != F_UNLCK) {
pr_err("conflict two locks in one request\n");
err = -EINVAL;
+ locks_free_lock(flock);
goto out;
}
}
@@ -6915,6 +6944,7 @@ int smb2_lock(struct ksmbd_work *work)
smb_lock = smb2_lock_init(flock, cmd, flags, &lock_list);
if (!smb_lock) {
err = -EINVAL;
+ locks_free_lock(flock);
goto out;
}
}
@@ -6946,7 +6976,7 @@ int smb2_lock(struct ksmbd_work *work)
nolock = 1;
/* check locks in connection list */
- read_lock(&conn_list_lock);
+ down_read(&conn_list_lock);
list_for_each_entry(conn, &conn_list, conns_list) {
spin_lock(&conn->llist_lock);
list_for_each_entry_safe(cmp_lock, tmp2, &conn->lock_list, clist) {
@@ -6963,7 +6993,7 @@ int smb2_lock(struct ksmbd_work *work)
list_del(&cmp_lock->flist);
list_del(&cmp_lock->clist);
spin_unlock(&conn->llist_lock);
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
locks_free_lock(cmp_lock->fl);
kfree(cmp_lock);
@@ -6985,7 +7015,7 @@ int smb2_lock(struct ksmbd_work *work)
cmp_lock->start > smb_lock->start &&
cmp_lock->start < smb_lock->end) {
spin_unlock(&conn->llist_lock);
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
pr_err("previous lock conflict with zero byte lock range\n");
goto out;
}
@@ -6994,7 +7024,7 @@ int smb2_lock(struct ksmbd_work *work)
smb_lock->start > cmp_lock->start &&
smb_lock->start < cmp_lock->end) {
spin_unlock(&conn->llist_lock);
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
pr_err("current lock conflict with zero byte lock range\n");
goto out;
}
@@ -7005,14 +7035,14 @@ int smb2_lock(struct ksmbd_work *work)
cmp_lock->end >= smb_lock->end)) &&
!cmp_lock->zero_len && !smb_lock->zero_len) {
spin_unlock(&conn->llist_lock);
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
pr_err("Not allow lock operation on exclusive lock range\n");
goto out;
}
}
spin_unlock(&conn->llist_lock);
}
- read_unlock(&conn_list_lock);
+ up_read(&conn_list_lock);
out_check_cl:
if (smb_lock->fl->fl_type == F_UNLCK && nolock) {
pr_err("Try to unlock nolocked range\n");
@@ -7046,10 +7076,6 @@ skip:
ksmbd_debug(SMB,
"would have to wait for getting lock\n");
- spin_lock(&work->conn->llist_lock);
- list_add_tail(&smb_lock->clist,
- &work->conn->lock_list);
- spin_unlock(&work->conn->llist_lock);
list_add(&smb_lock->llist, &rollback_list);
argv = kmalloc(sizeof(void *), GFP_KERNEL);
@@ -7063,6 +7089,7 @@ skip:
smb2_remove_blocked_lock,
argv);
if (rc) {
+ kfree(argv);
err = -ENOMEM;
goto out;
}
@@ -7074,17 +7101,15 @@ skip:
ksmbd_vfs_posix_lock_wait(flock);
+ spin_lock(&fp->f_lock);
+ list_del(&work->fp_entry);
+ spin_unlock(&fp->f_lock);
+
if (work->state != KSMBD_WORK_ACTIVE) {
list_del(&smb_lock->llist);
- spin_lock(&work->conn->llist_lock);
- list_del(&smb_lock->clist);
- spin_unlock(&work->conn->llist_lock);
locks_free_lock(flock);
if (work->state == KSMBD_WORK_CANCELLED) {
- spin_lock(&fp->f_lock);
- list_del(&work->fp_entry);
- spin_unlock(&fp->f_lock);
rsp->hdr.Status =
STATUS_CANCELLED;
kfree(smb_lock);
@@ -7093,8 +7118,7 @@ skip:
work->send_no_response = 1;
goto out;
}
- init_smb2_rsp_hdr(work);
- smb2_set_err_rsp(work);
+
rsp->hdr.Status =
STATUS_RANGE_NOT_LOCKED;
kfree(smb_lock);
@@ -7102,22 +7126,16 @@ skip:
}
list_del(&smb_lock->llist);
- spin_lock(&work->conn->llist_lock);
- list_del(&smb_lock->clist);
- spin_unlock(&work->conn->llist_lock);
-
- spin_lock(&fp->f_lock);
- list_del(&work->fp_entry);
- spin_unlock(&fp->f_lock);
+ release_async_work(work);
goto retry;
} else if (!rc) {
+ list_add(&smb_lock->llist, &rollback_list);
spin_lock(&work->conn->llist_lock);
list_add_tail(&smb_lock->clist,
&work->conn->lock_list);
list_add_tail(&smb_lock->flist,
&fp->lock_list);
spin_unlock(&work->conn->llist_lock);
- list_add(&smb_lock->llist, &rollback_list);
ksmbd_debug(SMB, "successful in taking lock\n");
} else {
goto out;
@@ -7132,7 +7150,10 @@ skip:
ksmbd_debug(SMB, "successful in taking lock\n");
rsp->hdr.Status = STATUS_SUCCESS;
rsp->Reserved = 0;
- inc_rfc1001_len(rsp, 4);
+ err = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lock_rsp));
+ if (err)
+ goto out;
+
ksmbd_fd_put(work, fp);
return 0;
@@ -7151,7 +7172,7 @@ out:
rlock->fl_start = smb_lock->start;
rlock->fl_end = smb_lock->end;
- rc = vfs_lock_file(filp, 0, rlock, NULL);
+ rc = vfs_lock_file(filp, F_SETLK, rlock, NULL);
if (rc)
pr_err("rollback unlock fail : %d\n", rc);
@@ -7203,8 +7224,8 @@ static int fsctl_copychunk(struct ksmbd_work *work,
ci_rsp = (struct copychunk_ioctl_rsp *)&rsp->Buffer[0];
- rsp->VolatileFileId = cpu_to_le64(volatile_id);
- rsp->PersistentFileId = cpu_to_le64(persistent_id);
+ rsp->VolatileFileId = volatile_id;
+ rsp->PersistentFileId = persistent_id;
ci_rsp->ChunksWritten =
cpu_to_le32(ksmbd_server_side_copy_max_chunk_count());
ci_rsp->ChunkBytesWritten =
@@ -7325,15 +7346,10 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
struct sockaddr_storage_rsp *sockaddr_storage;
unsigned int flags;
unsigned long long speed;
- struct sockaddr_in6 *csin6 = (struct sockaddr_in6 *)&conn->peer_addr;
rtnl_lock();
for_each_netdev(&init_net, netdev) {
- if (out_buf_len <
- nbytes + sizeof(struct network_interface_info_ioctl_rsp)) {
- rtnl_unlock();
- return -ENOSPC;
- }
+ bool ipv4_set = false;
if (netdev->type == ARPHRD_LOOPBACK)
continue;
@@ -7341,6 +7357,12 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
flags = dev_get_flags(netdev);
if (!(flags & IFF_RUNNING))
continue;
+ipv6_retry:
+ if (out_buf_len <
+ nbytes + sizeof(struct network_interface_info_ioctl_rsp)) {
+ rtnl_unlock();
+ return -ENOSPC;
+ }
nii_rsp = (struct network_interface_info_ioctl_rsp *)
&rsp->Buffer[nbytes];
@@ -7373,8 +7395,7 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
nii_rsp->SockAddr_Storage;
memset(sockaddr_storage, 0, 128);
- if (conn->peer_addr.ss_family == PF_INET ||
- ipv6_addr_v4mapped(&csin6->sin6_addr)) {
+ if (!ipv4_set) {
struct in_device *idev;
sockaddr_storage->Family = cpu_to_le16(INTERNETWORK);
@@ -7385,6 +7406,9 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
continue;
sockaddr_storage->addr4.IPv4address =
idev_ipv4_address(idev);
+ nbytes += sizeof(struct network_interface_info_ioctl_rsp);
+ ipv4_set = true;
+ goto ipv6_retry;
} else {
struct inet6_dev *idev6;
struct inet6_ifaddr *ifa;
@@ -7406,9 +7430,8 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
break;
}
sockaddr_storage->addr6.ScopeId = 0;
+ nbytes += sizeof(struct network_interface_info_ioctl_rsp);
}
-
- nbytes += sizeof(struct network_interface_info_ioctl_rsp);
}
rtnl_unlock();
@@ -7416,8 +7439,8 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
if (nii_rsp)
nii_rsp->Next = 0;
- rsp->PersistentFileId = cpu_to_le64(SMB2_NO_FID);
- rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID);
+ rsp->PersistentFileId = SMB2_NO_FID;
+ rsp->VolatileFileId = SMB2_NO_FID;
return nbytes;
}
@@ -7571,7 +7594,8 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id,
da.attr = le32_to_cpu(fp->f_ci->m_fattr);
ret = ksmbd_vfs_set_dos_attrib_xattr(user_ns,
- fp->filp->f_path.dentry, &da);
+ &fp->filp->f_path,
+ &da, true);
if (ret)
fp->f_ci->m_fattr = old_fattr;
}
@@ -7587,9 +7611,7 @@ static int fsctl_request_resume_key(struct ksmbd_work *work,
{
struct ksmbd_file *fp;
- fp = ksmbd_lookup_fd_slow(work,
- le64_to_cpu(req->VolatileFileId),
- le64_to_cpu(req->PersistentFileId));
+ fp = ksmbd_lookup_fd_slow(work, req->VolatileFileId, req->PersistentFileId);
if (!fp)
return -ENOENT;
@@ -7610,28 +7632,27 @@ static int fsctl_request_resume_key(struct ksmbd_work *work,
int smb2_ioctl(struct ksmbd_work *work)
{
struct smb2_ioctl_req *req;
- struct smb2_ioctl_rsp *rsp, *rsp_org;
+ struct smb2_ioctl_rsp *rsp;
unsigned int cnt_code, nbytes = 0, out_buf_len, in_buf_len;
u64 id = KSMBD_NO_FID;
struct ksmbd_conn *conn = work->conn;
int ret = 0;
- rsp_org = work->response_buf;
if (work->next_smb2_rcv_hdr_off) {
req = ksmbd_req_buf_next(work);
rsp = ksmbd_resp_buf_next(work);
- if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ if (!has_file_id(req->VolatileFileId)) {
ksmbd_debug(SMB, "Compound request set FID = %llu\n",
work->compound_fid);
id = work->compound_fid;
}
} else {
- req = work->request_buf;
- rsp = work->response_buf;
+ req = smb2_get_msg(work->request_buf);
+ rsp = smb2_get_msg(work->response_buf);
}
if (!has_file_id(id))
- id = le64_to_cpu(req->VolatileFileId);
+ id = req->VolatileFileId;
if (req->Flags != cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL)) {
rsp->hdr.Status = STATUS_NOT_SUPPORTED;
@@ -7702,8 +7723,8 @@ int smb2_ioctl(struct ksmbd_work *work)
goto out;
nbytes = sizeof(struct validate_negotiate_info_rsp);
- rsp->PersistentFileId = cpu_to_le64(SMB2_NO_FID);
- rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID);
+ rsp->PersistentFileId = SMB2_NO_FID;
+ rsp->VolatileFileId = SMB2_NO_FID;
break;
case FSCTL_QUERY_NETWORK_INTERFACE_INFO:
ret = fsctl_query_iface_info_ioctl(conn, rsp, out_buf_len);
@@ -7751,8 +7772,8 @@ int smb2_ioctl(struct ksmbd_work *work)
(struct copychunk_ioctl_req *)&req->Buffer[0],
le32_to_cpu(req->CntCode),
le32_to_cpu(req->InputCount),
- le64_to_cpu(req->VolatileFileId),
- le64_to_cpu(req->PersistentFileId),
+ req->VolatileFileId,
+ req->PersistentFileId,
rsp);
break;
case FSCTL_SET_SPARSE:
@@ -7929,9 +7950,9 @@ dup_ext_out:
rsp->Reserved = cpu_to_le16(0);
rsp->Flags = cpu_to_le32(0);
rsp->Reserved2 = cpu_to_le32(0);
- inc_rfc1001_len(rsp_org, 48 + nbytes);
-
- return 0;
+ ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_ioctl_rsp) + nbytes);
+ if (!ret)
+ return ret;
out:
if (ret == -EACCES)
@@ -7956,8 +7977,8 @@ out:
*/
static void smb20_oplock_break_ack(struct ksmbd_work *work)
{
- struct smb2_oplock_break *req = work->request_buf;
- struct smb2_oplock_break *rsp = work->response_buf;
+ struct smb2_oplock_break *req;
+ struct smb2_oplock_break *rsp;
struct ksmbd_file *fp;
struct oplock_info *opinfo = NULL;
__le32 err = 0;
@@ -7966,8 +7987,10 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
char req_oplevel = 0, rsp_oplevel = 0;
unsigned int oplock_change_type;
- volatile_id = le64_to_cpu(req->VolatileFid);
- persistent_id = le64_to_cpu(req->PersistentFid);
+ WORK_BUFFERS(work, req, rsp);
+
+ volatile_id = req->VolatileFid;
+ persistent_id = req->PersistentFid;
req_oplevel = req->OplockLevel;
ksmbd_debug(OPLOCK, "v_id %llu, p_id %llu request oplock level %d\n",
volatile_id, persistent_id, req_oplevel);
@@ -8053,19 +8076,20 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
goto err_out;
}
- opinfo_put(opinfo);
- ksmbd_fd_put(work, fp);
opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
+ opinfo_put(opinfo);
+ ksmbd_fd_put(work, fp);
rsp->StructureSize = cpu_to_le16(24);
rsp->OplockLevel = rsp_oplevel;
rsp->Reserved = 0;
rsp->Reserved2 = 0;
- rsp->VolatileFid = cpu_to_le64(volatile_id);
- rsp->PersistentFid = cpu_to_le64(persistent_id);
- inc_rfc1001_len(rsp, 24);
- return;
+ rsp->VolatileFid = volatile_id;
+ rsp->PersistentFid = persistent_id;
+ ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_oplock_break));
+ if (!ret)
+ return;
err_out:
opinfo->op_state = OPLOCK_STATE_NONE;
@@ -8100,8 +8124,8 @@ static int check_lease_state(struct lease *lease, __le32 req_state)
static void smb21_lease_break_ack(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
- struct smb2_lease_ack *req = work->request_buf;
- struct smb2_lease_ack *rsp = work->response_buf;
+ struct smb2_lease_ack *req;
+ struct smb2_lease_ack *rsp;
struct oplock_info *opinfo;
__le32 err = 0;
int ret = 0;
@@ -8109,6 +8133,8 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
__le32 lease_state;
struct lease *lease;
+ WORK_BUFFERS(work, req, rsp);
+
ksmbd_debug(OPLOCK, "smb21 lease break, lease state(0x%x)\n",
le32_to_cpu(req->LeaseState));
opinfo = lookup_lease_in_table(conn, req->LeaseKey);
@@ -8195,6 +8221,11 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
le32_to_cpu(req->LeaseState));
}
+ if (ret < 0) {
+ rsp->hdr.Status = err;
+ goto err_out;
+ }
+
lease_state = lease->state;
opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
@@ -8202,22 +8233,17 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
wake_up_interruptible_all(&opinfo->oplock_brk);
opinfo_put(opinfo);
- if (ret < 0) {
- rsp->hdr.Status = err;
- goto err_out;
- }
-
rsp->StructureSize = cpu_to_le16(36);
rsp->Reserved = 0;
rsp->Flags = 0;
memcpy(rsp->LeaseKey, req->LeaseKey, 16);
rsp->LeaseState = lease_state;
rsp->LeaseDuration = 0;
- inc_rfc1001_len(rsp, 36);
- return;
+ ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lease_ack));
+ if (!ret)
+ return;
err_out:
- opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
atomic_dec(&opinfo->breaking_cnt);
wake_up_interruptible_all(&opinfo->oplock_brk);
@@ -8234,8 +8260,10 @@ err_out:
*/
int smb2_oplock_break(struct ksmbd_work *work)
{
- struct smb2_oplock_break *req = work->request_buf;
- struct smb2_oplock_break *rsp = work->response_buf;
+ struct smb2_oplock_break *req;
+ struct smb2_oplock_break *rsp;
+
+ WORK_BUFFERS(work, req, rsp);
switch (le16_to_cpu(req->StructureSize)) {
case OP_BREAK_STRUCT_SIZE_20:
@@ -8287,7 +8315,7 @@ int smb2_notify(struct ksmbd_work *work)
*/
bool smb2_is_sign_req(struct ksmbd_work *work, unsigned int command)
{
- struct smb2_hdr *rcv_hdr2 = work->request_buf;
+ struct smb2_hdr *rcv_hdr2 = smb2_get_msg(work->request_buf);
if ((rcv_hdr2->Flags & SMB2_FLAGS_SIGNED) &&
command != SMB2_NEGOTIATE_HE &&
@@ -8306,22 +8334,22 @@ bool smb2_is_sign_req(struct ksmbd_work *work, unsigned int command)
*/
int smb2_check_sign_req(struct ksmbd_work *work)
{
- struct smb2_hdr *hdr, *hdr_org;
+ struct smb2_hdr *hdr;
char signature_req[SMB2_SIGNATURE_SIZE];
char signature[SMB2_HMACSHA256_SIZE];
struct kvec iov[1];
size_t len;
- hdr_org = hdr = work->request_buf;
+ hdr = smb2_get_msg(work->request_buf);
if (work->next_smb2_rcv_hdr_off)
hdr = ksmbd_req_buf_next(work);
if (!hdr->NextCommand && !work->next_smb2_rcv_hdr_off)
- len = be32_to_cpu(hdr_org->smb2_buf_length);
+ len = get_rfc1002_len(work->request_buf);
else if (hdr->NextCommand)
len = le32_to_cpu(hdr->NextCommand);
else
- len = be32_to_cpu(hdr_org->smb2_buf_length) -
+ len = get_rfc1002_len(work->request_buf) -
work->next_smb2_rcv_hdr_off;
memcpy(signature_req, hdr->Signature, SMB2_SIGNATURE_SIZE);
@@ -8349,43 +8377,20 @@ int smb2_check_sign_req(struct ksmbd_work *work)
*/
void smb2_set_sign_rsp(struct ksmbd_work *work)
{
- struct smb2_hdr *hdr, *hdr_org;
- struct smb2_hdr *req_hdr;
+ struct smb2_hdr *hdr;
char signature[SMB2_HMACSHA256_SIZE];
- struct kvec iov[2];
- size_t len;
+ struct kvec *iov;
int n_vec = 1;
- hdr_org = hdr = work->response_buf;
- if (work->next_smb2_rsp_hdr_off)
- hdr = ksmbd_resp_buf_next(work);
-
- req_hdr = ksmbd_req_buf_next(work);
-
- if (!work->next_smb2_rsp_hdr_off) {
- len = get_rfc1002_len(hdr_org);
- if (req_hdr->NextCommand)
- len = ALIGN(len, 8);
- } else {
- len = get_rfc1002_len(hdr_org) - work->next_smb2_rsp_hdr_off;
- len = ALIGN(len, 8);
- }
-
- if (req_hdr->NextCommand)
- hdr->NextCommand = cpu_to_le32(len);
-
+ hdr = ksmbd_resp_buf_curr(work);
hdr->Flags |= SMB2_FLAGS_SIGNED;
memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
- iov[0].iov_base = (char *)&hdr->ProtocolId;
- iov[0].iov_len = len;
-
- if (work->aux_payload_sz) {
- iov[0].iov_len -= work->aux_payload_sz;
-
- iov[1].iov_base = work->aux_payload_buf;
- iov[1].iov_len = work->aux_payload_sz;
+ if (hdr->Command == SMB2_READ) {
+ iov = &work->iov[work->iov_idx - 1];
n_vec++;
+ } else {
+ iov = &work->iov[work->iov_idx];
}
if (!ksmbd_sign_smb2_pdu(work->conn, work->sess->sess_key, iov, n_vec,
@@ -8403,36 +8408,33 @@ int smb3_check_sign_req(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
char *signing_key;
- struct smb2_hdr *hdr, *hdr_org;
+ struct smb2_hdr *hdr;
struct channel *chann;
char signature_req[SMB2_SIGNATURE_SIZE];
char signature[SMB2_CMACAES_SIZE];
struct kvec iov[1];
size_t len;
- hdr_org = hdr = work->request_buf;
+ hdr = smb2_get_msg(work->request_buf);
if (work->next_smb2_rcv_hdr_off)
hdr = ksmbd_req_buf_next(work);
if (!hdr->NextCommand && !work->next_smb2_rcv_hdr_off)
- len = be32_to_cpu(hdr_org->smb2_buf_length);
+ len = get_rfc1002_len(work->request_buf);
else if (hdr->NextCommand)
len = le32_to_cpu(hdr->NextCommand);
else
- len = be32_to_cpu(hdr_org->smb2_buf_length) -
+ len = get_rfc1002_len(work->request_buf) -
work->next_smb2_rcv_hdr_off;
if (le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
signing_key = work->sess->smb3signingkey;
} else {
- read_lock(&work->sess->chann_lock);
chann = lookup_chann_list(work->sess, conn);
if (!chann) {
- read_unlock(&work->sess->chann_lock);
return 0;
}
signing_key = chann->smb3signingkey;
- read_unlock(&work->sess->chann_lock);
}
if (!signing_key) {
@@ -8464,62 +8466,41 @@ int smb3_check_sign_req(struct ksmbd_work *work)
void smb3_set_sign_rsp(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
- struct smb2_hdr *req_hdr;
- struct smb2_hdr *hdr, *hdr_org;
+ struct smb2_hdr *hdr;
struct channel *chann;
char signature[SMB2_CMACAES_SIZE];
- struct kvec iov[2];
+ struct kvec *iov;
int n_vec = 1;
- size_t len;
char *signing_key;
- hdr_org = hdr = work->response_buf;
- if (work->next_smb2_rsp_hdr_off)
- hdr = ksmbd_resp_buf_next(work);
-
- req_hdr = ksmbd_req_buf_next(work);
-
- if (!work->next_smb2_rsp_hdr_off) {
- len = get_rfc1002_len(hdr_org);
- if (req_hdr->NextCommand)
- len = ALIGN(len, 8);
- } else {
- len = get_rfc1002_len(hdr_org) - work->next_smb2_rsp_hdr_off;
- len = ALIGN(len, 8);
- }
+ hdr = ksmbd_resp_buf_curr(work);
if (conn->binding == false &&
le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
signing_key = work->sess->smb3signingkey;
} else {
- read_lock(&work->sess->chann_lock);
chann = lookup_chann_list(work->sess, work->conn);
if (!chann) {
- read_unlock(&work->sess->chann_lock);
return;
}
signing_key = chann->smb3signingkey;
- read_unlock(&work->sess->chann_lock);
}
if (!signing_key)
return;
- if (req_hdr->NextCommand)
- hdr->NextCommand = cpu_to_le32(len);
-
hdr->Flags |= SMB2_FLAGS_SIGNED;
memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
- iov[0].iov_base = (char *)&hdr->ProtocolId;
- iov[0].iov_len = len;
- if (work->aux_payload_sz) {
- iov[0].iov_len -= work->aux_payload_sz;
- iov[1].iov_base = work->aux_payload_buf;
- iov[1].iov_len = work->aux_payload_sz;
+
+ if (hdr->Command == SMB2_READ) {
+ iov = &work->iov[work->iov_idx - 1];
n_vec++;
+ } else {
+ iov = &work->iov[work->iov_idx];
}
- if (!ksmbd_sign_smb3_pdu(conn, signing_key, iov, n_vec, signature))
+ if (!ksmbd_sign_smb3_pdu(conn, signing_key, iov, n_vec,
+ signature))
memcpy(hdr->Signature, signature, SMB2_SIGNATURE_SIZE);
}
@@ -8541,7 +8522,7 @@ void smb3_preauth_hash_rsp(struct ksmbd_work *work)
if (le16_to_cpu(req->Command) == SMB2_NEGOTIATE_HE &&
conn->preauth_info)
- ksmbd_gen_preauth_integrity_hash(conn, (char *)rsp,
+ ksmbd_gen_preauth_integrity_hash(conn, work->response_buf,
conn->preauth_info->Preauth_HashValue);
if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE && sess) {
@@ -8559,18 +8540,18 @@ void smb3_preauth_hash_rsp(struct ksmbd_work *work)
if (!hash_value)
return;
}
- ksmbd_gen_preauth_integrity_hash(conn, (char *)rsp,
+ ksmbd_gen_preauth_integrity_hash(conn, work->response_buf,
hash_value);
}
}
-static void fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, char *old_buf,
- __le16 cipher_type)
+static void fill_transform_hdr(void *tr_buf, char *old_buf, __le16 cipher_type)
{
- struct smb2_hdr *hdr = (struct smb2_hdr *)old_buf;
+ struct smb2_transform_hdr *tr_hdr = tr_buf + 4;
+ struct smb2_hdr *hdr = smb2_get_msg(old_buf);
unsigned int orig_len = get_rfc1002_len(old_buf);
- memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr));
+ /* tr_buf must be cleared by the caller */
tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM;
tr_hdr->OriginalMessageSize = cpu_to_le32(orig_len);
tr_hdr->Flags = cpu_to_le16(0x01);
@@ -8580,76 +8561,49 @@ static void fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, char *old_buf,
else
get_random_bytes(&tr_hdr->Nonce, SMB3_AES_CCM_NONCE);
memcpy(&tr_hdr->SessionId, &hdr->SessionId, 8);
- inc_rfc1001_len(tr_hdr, sizeof(struct smb2_transform_hdr) - 4);
- inc_rfc1001_len(tr_hdr, orig_len);
+ inc_rfc1001_len(tr_buf, sizeof(struct smb2_transform_hdr));
+ inc_rfc1001_len(tr_buf, orig_len);
}
int smb3_encrypt_resp(struct ksmbd_work *work)
{
- char *buf = work->response_buf;
- struct smb2_transform_hdr *tr_hdr;
- struct kvec iov[3];
+ struct kvec *iov = work->iov;
int rc = -ENOMEM;
- int buf_size = 0, rq_nvec = 2 + (work->aux_payload_sz ? 1 : 0);
-
- if (ARRAY_SIZE(iov) < rq_nvec)
- return -ENOMEM;
+ void *tr_buf;
- tr_hdr = kzalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
- if (!tr_hdr)
+ tr_buf = kzalloc(sizeof(struct smb2_transform_hdr) + 4, GFP_KERNEL);
+ if (!tr_buf)
return rc;
/* fill transform header */
- fill_transform_hdr(tr_hdr, buf, work->conn->cipher_type);
-
- iov[0].iov_base = tr_hdr;
- iov[0].iov_len = sizeof(struct smb2_transform_hdr);
- buf_size += iov[0].iov_len - 4;
+ fill_transform_hdr(tr_buf, work->response_buf, work->conn->cipher_type);
- iov[1].iov_base = buf + 4;
- iov[1].iov_len = get_rfc1002_len(buf);
- if (work->aux_payload_sz) {
- iov[1].iov_len = work->resp_hdr_sz - 4;
+ iov[0].iov_base = tr_buf;
+ iov[0].iov_len = sizeof(struct smb2_transform_hdr) + 4;
+ work->tr_buf = tr_buf;
- iov[2].iov_base = work->aux_payload_buf;
- iov[2].iov_len = work->aux_payload_sz;
- buf_size += iov[2].iov_len;
- }
- buf_size += iov[1].iov_len;
- work->resp_hdr_sz = iov[1].iov_len;
-
- rc = ksmbd_crypt_message(work->conn, iov, rq_nvec, 1);
- if (rc)
- return rc;
-
- memmove(buf, iov[1].iov_base, iov[1].iov_len);
- tr_hdr->smb2_buf_length = cpu_to_be32(buf_size);
- work->tr_buf = tr_hdr;
-
- return rc;
+ return ksmbd_crypt_message(work, iov, work->iov_idx + 1, 1);
}
bool smb3_is_transform_hdr(void *buf)
{
- struct smb2_transform_hdr *trhdr = buf;
+ struct smb2_transform_hdr *trhdr = smb2_get_msg(buf);
return trhdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM;
}
int smb3_decrypt_req(struct ksmbd_work *work)
{
- struct ksmbd_conn *conn = work->conn;
struct ksmbd_session *sess;
char *buf = work->request_buf;
- struct smb2_hdr *hdr;
unsigned int pdu_length = get_rfc1002_len(buf);
struct kvec iov[2];
- int buf_data_size = pdu_length + 4 -
- sizeof(struct smb2_transform_hdr);
- struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
+ int buf_data_size = pdu_length - sizeof(struct smb2_transform_hdr);
+ struct smb2_transform_hdr *tr_hdr = smb2_get_msg(buf);
int rc = 0;
- if (buf_data_size < sizeof(struct smb2_hdr)) {
+ if (pdu_length < sizeof(struct smb2_transform_hdr) ||
+ buf_data_size < sizeof(struct smb2_hdr)) {
pr_err("Transform message is too small (%u)\n",
pdu_length);
return -ECONNABORTED;
@@ -8660,7 +8614,7 @@ int smb3_decrypt_req(struct ksmbd_work *work)
return -ECONNABORTED;
}
- sess = ksmbd_session_lookup_all(conn, le64_to_cpu(tr_hdr->SessionId));
+ sess = ksmbd_session_lookup_all(work->conn, le64_to_cpu(tr_hdr->SessionId));
if (!sess) {
pr_err("invalid session id(%llx) in transform header\n",
le64_to_cpu(tr_hdr->SessionId));
@@ -8668,16 +8622,15 @@ int smb3_decrypt_req(struct ksmbd_work *work)
}
iov[0].iov_base = buf;
- iov[0].iov_len = sizeof(struct smb2_transform_hdr);
- iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr);
+ iov[0].iov_len = sizeof(struct smb2_transform_hdr) + 4;
+ iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr) + 4;
iov[1].iov_len = buf_data_size;
- rc = ksmbd_crypt_message(conn, iov, 2, 0);
+ rc = ksmbd_crypt_message(work, iov, 2, 0);
if (rc)
return rc;
memmove(buf + 4, iov[1].iov_base, buf_data_size);
- hdr = (struct smb2_hdr *)buf;
- hdr->smb2_buf_length = cpu_to_be32(buf_data_size);
+ *(__be32 *)buf = cpu_to_be32(buf_data_size);
return rc;
}
@@ -8686,7 +8639,7 @@ bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
struct ksmbd_session *sess = work->sess;
- struct smb2_hdr *rsp = work->response_buf;
+ struct smb2_hdr *rsp = smb2_get_msg(work->response_buf);
if (conn->dialect < SMB30_PROT_ID)
return false;
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index ddc3cea9c905..912bd94257ec 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -109,6 +109,7 @@
#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) /* 'B''M''S' */
#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
+#define SMB2_COMPRESSION_TRANSFORM_ID cpu_to_le32(0x424d53fc)
#define SMB21_DEFAULT_IOSIZE (1024 * 1024)
#define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024)
@@ -131,11 +132,6 @@
cpu_to_le16(__SMB2_HEADER_STRUCTURE_SIZE)
struct smb2_hdr {
- __be32 smb2_buf_length; /* big endian on wire */
- /*
- * length is only two or three bytes - with
- * one or two byte type preceding it that MBZ
- */
__le32 ProtocolId; /* 0xFE 'S' 'M' 'B' */
__le16 StructureSize; /* 64 */
__le16 CreditCharge; /* MBZ */
@@ -165,11 +161,6 @@ struct smb2_pdu {
#define SMB3_AES_GCM_NONCE 12
struct smb2_transform_hdr {
- __be32 smb2_buf_length; /* big endian on wire */
- /*
- * length is only two or three bytes - with
- * one or two byte type preceding it that MBZ
- */
__le32 ProtocolId; /* 0xFD 'S' 'M' 'B' */
__u8 Signature[16];
__u8 Nonce[16];
@@ -254,14 +245,14 @@ struct preauth_integrity_info {
__u8 Preauth_HashValue[PREAUTH_HASHVALUE_SIZE];
};
-/* offset is sizeof smb2_negotiate_rsp - 4 but rounded up to 8 bytes. */
+/* offset is sizeof smb2_negotiate_rsp but rounded up to 8 bytes. */
#ifdef CONFIG_SMB_SERVER_KERBEROS5
-/* sizeof(struct smb2_negotiate_rsp) - 4 =
+/* sizeof(struct smb2_negotiate_rsp) =
* header(64) + response(64) + GSS_LENGTH(96) + GSS_PADDING(0)
*/
#define OFFSET_OF_NEG_CONTEXT 0xe0
#else
-/* sizeof(struct smb2_negotiate_rsp) - 4 =
+/* sizeof(struct smb2_negotiate_rsp) =
* header(64) + response(64) + GSS_LENGTH(74) + GSS_PADDING(6)
*/
#define OFFSET_OF_NEG_CONTEXT 0xd0
@@ -629,6 +620,8 @@ struct create_context {
__u8 Buffer[0];
} __packed;
+#define SMB2_SESSION_TIMEOUT (10 * HZ)
+
struct create_durable_req_v2 {
struct create_context ccontext;
__u8 Name[8];
@@ -644,8 +637,8 @@ struct create_durable_reconn_req {
union {
__u8 Reserved[16];
struct {
- __le64 PersistentFileId;
- __le64 VolatileFileId;
+ __u64 PersistentFileId;
+ __u64 VolatileFileId;
} Fid;
} Data;
} __packed;
@@ -654,8 +647,8 @@ struct create_durable_reconn_v2_req {
struct create_context ccontext;
__u8 Name[8];
struct {
- __le64 PersistentFileId;
- __le64 VolatileFileId;
+ __u64 PersistentFileId;
+ __u64 VolatileFileId;
} Fid;
__u8 CreateGuid[16];
__le32 Flags;
@@ -734,7 +727,8 @@ struct create_posix_rsp {
__le32 nlink;
__le32 reparse_tag;
__le32 mode;
- u8 SidBuffer[40];
+ /* SidBuffer contain two sids(Domain sid(28), UNIX group sid(16)) */
+ u8 SidBuffer[44];
} __packed;
#define SMB2_LEASE_NONE_LE cpu_to_le32(0x00)
@@ -743,23 +737,23 @@ struct create_posix_rsp {
#define SMB2_LEASE_WRITE_CACHING_LE cpu_to_le32(0x04)
#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS_LE cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE cpu_to_le32(0x04)
+
+#define SMB2_LEASE_KEY_SIZE 16
struct lease_context {
- __le64 LeaseKeyLow;
- __le64 LeaseKeyHigh;
+ __u8 LeaseKey[SMB2_LEASE_KEY_SIZE];
__le32 LeaseState;
__le32 LeaseFlags;
__le64 LeaseDuration;
} __packed;
struct lease_context_v2 {
- __le64 LeaseKeyLow;
- __le64 LeaseKeyHigh;
+ __u8 LeaseKey[SMB2_LEASE_KEY_SIZE];
__le32 LeaseState;
__le32 LeaseFlags;
__le64 LeaseDuration;
- __le64 ParentLeaseKeyLow;
- __le64 ParentLeaseKeyHigh;
+ __u8 ParentLeaseKey[SMB2_LEASE_KEY_SIZE];
__le16 Epoch;
__le16 Reserved;
} __packed;
@@ -900,8 +894,8 @@ struct smb2_ioctl_req {
__le16 StructureSize; /* Must be 57 */
__le16 Reserved; /* offset from start of SMB2 header to write data */
__le32 CntCode;
- __le64 PersistentFileId;
- __le64 VolatileFileId;
+ __u64 PersistentFileId;
+ __u64 VolatileFileId;
__le32 InputOffset; /* Reserved MBZ */
__le32 InputCount;
__le32 MaxInputResponse;
@@ -918,8 +912,8 @@ struct smb2_ioctl_rsp {
__le16 StructureSize; /* Must be 49 */
__le16 Reserved; /* offset from start of SMB2 header to write data */
__le32 CntCode;
- __le64 PersistentFileId;
- __le64 VolatileFileId;
+ __u64 PersistentFileId;
+ __u64 VolatileFileId;
__le32 InputOffset; /* Reserved MBZ */
__le32 InputCount;
__le32 OutputOffset;
@@ -988,7 +982,7 @@ struct file_object_buf_type1_ioctl_rsp {
} __packed;
struct resume_key_ioctl_rsp {
- __le64 ResumeKey[3];
+ __u64 ResumeKey[3];
__le32 ContextLength;
__u8 Context[4]; /* ignored, Windows sets to 4 bytes of zero */
} __packed;
@@ -1100,8 +1094,8 @@ struct smb2_lock_req {
__le16 StructureSize; /* Must be 48 */
__le16 LockCount;
__le32 Reserved;
- __le64 PersistentFileId;
- __le64 VolatileFileId;
+ __u64 PersistentFileId;
+ __u64 VolatileFileId;
/* Followed by at least one */
struct smb2_lock_element locks[1];
} __packed;
@@ -1136,8 +1130,8 @@ struct smb2_query_directory_req {
__u8 FileInformationClass;
__u8 Flags;
__le32 FileIndex;
- __le64 PersistentFileId;
- __le64 VolatileFileId;
+ __u64 PersistentFileId;
+ __u64 VolatileFileId;
__le16 FileNameOffset;
__le16 FileNameLength;
__le32 OutputBufferLength;
@@ -1183,8 +1177,8 @@ struct smb2_query_info_req {
__le32 InputBufferLength;
__le32 AdditionalInformation;
__le32 Flags;
- __le64 PersistentFileId;
- __le64 VolatileFileId;
+ __u64 PersistentFileId;
+ __u64 VolatileFileId;
__u8 Buffer[1];
} __packed;
@@ -1205,8 +1199,8 @@ struct smb2_set_info_req {
__le16 BufferOffset;
__u16 Reserved;
__le32 AdditionalInformation;
- __le64 PersistentFileId;
- __le64 VolatileFileId;
+ __u64 PersistentFileId;
+ __u64 VolatileFileId;
__u8 Buffer[1];
} __packed;
@@ -1567,7 +1561,7 @@ struct smb2_ea_info {
__u8 Flags;
__u8 EaNameLength;
__le16 EaValueLength;
- char name[1];
+ char name[];
/* optionally followed by value */
} __packed; /* level 15 Query */
@@ -1628,9 +1622,10 @@ struct smb2_posix_info {
__le32 HardLinks;
__le32 ReparseTag;
__le32 Mode;
- u8 SidBuffer[40];
+ /* SidBuffer contain two sids (UNIX user sid(16), UNIX group sid(16)) */
+ u8 SidBuffer[32];
__le32 name_len;
- u8 name[1];
+ u8 name[];
/*
* var sized owner SID
* var sized group SID
@@ -1672,6 +1667,7 @@ int find_matching_smb2_dialect(int start_index, __le16 *cli_dialects,
struct file_lock *smb_flock_init(struct file *f);
int setup_async_work(struct ksmbd_work *work, void (*fn)(void **),
void **arg);
+void release_async_work(struct ksmbd_work *work);
void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status);
struct channel *lookup_chann_list(struct ksmbd_session *sess,
struct ksmbd_conn *conn);
@@ -1681,6 +1677,7 @@ int smb3_decrypt_req(struct ksmbd_work *work);
int smb3_encrypt_resp(struct ksmbd_work *work);
bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work);
int smb2_set_rsp_credits(struct ksmbd_work *work);
+bool smb3_encryption_negotiated(struct ksmbd_conn *conn);
/* smb2 misc functions */
int ksmbd_smb2_check_message(struct ksmbd_work *work);
@@ -1707,4 +1704,13 @@ int smb2_ioctl(struct ksmbd_work *work);
int smb2_oplock_break(struct ksmbd_work *work);
int smb2_notify(struct ksmbd_work *ksmbd_work);
+/*
+ * Get the body of the smb2 message excluding the 4 byte rfc1002 headers
+ * from request/response buffer.
+ */
+static inline void *smb2_get_msg(void *buf)
+{
+ return buf + 4;
+}
+
#endif /* _SMB2PDU_H */
diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
index af583e426621..e90a1e8c1951 100644
--- a/fs/ksmbd/smb_common.c
+++ b/fs/ksmbd/smb_common.c
@@ -134,7 +134,7 @@ int ksmbd_lookup_protocol_idx(char *str)
*/
int ksmbd_verify_smb_message(struct ksmbd_work *work)
{
- struct smb2_hdr *smb2_hdr = work->request_buf + work->next_smb2_rcv_hdr_off;
+ struct smb2_hdr *smb2_hdr = ksmbd_req_buf_next(work);
struct smb_hdr *hdr;
if (smb2_hdr->ProtocolId == SMB2_PROTO_NUMBER)
@@ -158,7 +158,23 @@ int ksmbd_verify_smb_message(struct ksmbd_work *work)
*/
bool ksmbd_smb_request(struct ksmbd_conn *conn)
{
- return conn->request_buf[0] == 0;
+ __le32 *proto;
+
+ if (conn->request_buf[0] != 0)
+ return false;
+
+ proto = (__le32 *)smb2_get_msg(conn->request_buf);
+ if (*proto == SMB2_COMPRESSION_TRANSFORM_ID) {
+ pr_err_ratelimited("smb2 compression not support yet");
+ return false;
+ }
+
+ if (*proto != SMB1_PROTO_NUMBER &&
+ *proto != SMB2_PROTO_NUMBER &&
+ *proto != SMB2_TRANSFORM_PROTO_NUM)
+ return false;
+
+ return true;
}
static bool supported_protocol(int idx)
@@ -243,18 +259,18 @@ int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count)
static int ksmbd_negotiate_smb_dialect(void *buf)
{
int smb_buf_length = get_rfc1002_len(buf);
- __le32 proto = ((struct smb2_hdr *)buf)->ProtocolId;
+ __le32 proto = ((struct smb2_hdr *)smb2_get_msg(buf))->ProtocolId;
if (proto == SMB2_PROTO_NUMBER) {
struct smb2_negotiate_req *req;
int smb2_neg_size =
- offsetof(struct smb2_negotiate_req, Dialects) - 4;
+ offsetof(struct smb2_negotiate_req, Dialects);
- req = (struct smb2_negotiate_req *)buf;
+ req = (struct smb2_negotiate_req *)smb2_get_msg(buf);
if (smb2_neg_size > smb_buf_length)
goto err_out;
- if (smb2_neg_size + le16_to_cpu(req->DialectCount) * sizeof(__le16) >
+ if (struct_size(req, Dialects, le16_to_cpu(req->DialectCount)) >
smb_buf_length)
goto err_out;
@@ -283,20 +299,129 @@ err_out:
return BAD_PROT_ID;
}
-int ksmbd_init_smb_server(struct ksmbd_work *work)
+#define SMB_COM_NEGOTIATE_EX 0x0
+
+/**
+ * get_smb1_cmd_val() - get smb command value from smb header
+ * @work: smb work containing smb header
+ *
+ * Return: smb command value
+ */
+static u16 get_smb1_cmd_val(struct ksmbd_work *work)
{
- struct ksmbd_conn *conn = work->conn;
+ return SMB_COM_NEGOTIATE_EX;
+}
- if (conn->need_neg == false)
+/**
+ * init_smb1_rsp_hdr() - initialize smb negotiate response header
+ * @work: smb work containing smb request
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+static int init_smb1_rsp_hdr(struct ksmbd_work *work)
+{
+ struct smb_hdr *rsp_hdr = (struct smb_hdr *)work->response_buf;
+ struct smb_hdr *rcv_hdr = (struct smb_hdr *)work->request_buf;
+
+ rsp_hdr->Command = SMB_COM_NEGOTIATE;
+ *(__le32 *)rsp_hdr->Protocol = SMB1_PROTO_NUMBER;
+ rsp_hdr->Flags = SMBFLG_RESPONSE;
+ rsp_hdr->Flags2 = SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS |
+ SMBFLG2_EXT_SEC | SMBFLG2_IS_LONG_NAME;
+ rsp_hdr->Pid = rcv_hdr->Pid;
+ rsp_hdr->Mid = rcv_hdr->Mid;
+ return 0;
+}
+
+/**
+ * smb1_check_user_session() - check for valid session for a user
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb1_check_user_session(struct ksmbd_work *work)
+{
+ unsigned int cmd = work->conn->ops->get_cmd_val(work);
+
+ if (cmd == SMB_COM_NEGOTIATE_EX)
return 0;
- init_smb3_11_server(conn);
+ return -EINVAL;
+}
+
+/**
+ * smb1_allocate_rsp_buf() - allocate response buffer for a command
+ * @work: smb work containing smb request
+ *
+ * Return: 0 on success, otherwise -ENOMEM
+ */
+static int smb1_allocate_rsp_buf(struct ksmbd_work *work)
+{
+ work->response_buf = kzalloc(MAX_CIFS_SMALL_BUFFER_SIZE,
+ GFP_KERNEL);
+ work->response_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
+
+ if (!work->response_buf) {
+ pr_err("Failed to allocate %u bytes buffer\n",
+ MAX_CIFS_SMALL_BUFFER_SIZE);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * set_smb1_rsp_status() - set error type in smb response header
+ * @work: smb work containing smb response header
+ * @err: error code to set in response
+ */
+static void set_smb1_rsp_status(struct ksmbd_work *work, __le32 err)
+{
+ work->send_no_response = 1;
+}
+
+static struct smb_version_ops smb1_server_ops = {
+ .get_cmd_val = get_smb1_cmd_val,
+ .init_rsp_hdr = init_smb1_rsp_hdr,
+ .allocate_rsp_buf = smb1_allocate_rsp_buf,
+ .check_user_session = smb1_check_user_session,
+ .set_rsp_status = set_smb1_rsp_status,
+};
- if (conn->ops->get_cmd_val(work) != SMB_COM_NEGOTIATE)
- conn->need_neg = false;
+static int smb1_negotiate(struct ksmbd_work *work)
+{
+ return ksmbd_smb_negotiate_common(work, SMB_COM_NEGOTIATE);
+}
+
+static struct smb_version_cmds smb1_server_cmds[1] = {
+ [SMB_COM_NEGOTIATE_EX] = { .proc = smb1_negotiate, },
+};
+
+static int init_smb1_server(struct ksmbd_conn *conn)
+{
+ conn->ops = &smb1_server_ops;
+ conn->cmds = smb1_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb1_server_cmds);
return 0;
}
+int ksmbd_init_smb_server(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ __le32 proto;
+
+ proto = *(__le32 *)((struct smb_hdr *)work->request_buf)->Protocol;
+ if (conn->need_neg == false) {
+ if (proto == SMB1_PROTO_NUMBER)
+ return -EINVAL;
+ return 0;
+ }
+
+ if (proto == SMB1_PROTO_NUMBER)
+ return init_smb1_server(conn);
+ return init_smb3_11_server(conn);
+}
+
int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
struct ksmbd_file *dir,
struct ksmbd_dir_info *d_info,
@@ -444,20 +569,11 @@ static int smb_handle_negotiate(struct ksmbd_work *work)
ksmbd_debug(SMB, "Unsupported SMB1 protocol\n");
- /*
- * Remove 4 byte direct TCP header, add 2 byte bcc and
- * 2 byte DialectIndex.
- */
- *(__be32 *)work->response_buf =
- cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2 + 2);
- neg_rsp->hdr.Status.CifsError = STATUS_SUCCESS;
-
- neg_rsp->hdr.Command = SMB_COM_NEGOTIATE;
- *(__le32 *)neg_rsp->hdr.Protocol = SMB1_PROTO_NUMBER;
- neg_rsp->hdr.Flags = SMBFLG_RESPONSE;
- neg_rsp->hdr.Flags2 = SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS |
- SMBFLG2_EXT_SEC | SMBFLG2_IS_LONG_NAME;
+ if (ksmbd_iov_pin_rsp(work, (void *)neg_rsp,
+ sizeof(struct smb_negotiate_rsp) - 4))
+ return -ENOMEM;
+ neg_rsp->hdr.Status.CifsError = STATUS_SUCCESS;
neg_rsp->hdr.WordCount = 1;
neg_rsp->DialectIndex = cpu_to_le16(work->conn->dialect);
neg_rsp->ByteCount = 0;
@@ -469,27 +585,17 @@ int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command)
struct ksmbd_conn *conn = work->conn;
int ret;
- conn->dialect = ksmbd_negotiate_smb_dialect(work->request_buf);
+ conn->dialect =
+ ksmbd_negotiate_smb_dialect(work->request_buf);
ksmbd_debug(SMB, "conn->dialect 0x%x\n", conn->dialect);
if (command == SMB2_NEGOTIATE_HE) {
- struct smb2_hdr *smb2_hdr = work->request_buf;
-
- if (smb2_hdr->ProtocolId != SMB2_PROTO_NUMBER) {
- ksmbd_debug(SMB, "Downgrade to SMB1 negotiation\n");
- command = SMB_COM_NEGOTIATE;
- }
- }
-
- if (command == SMB2_NEGOTIATE_HE) {
ret = smb2_handle_negotiate(work);
- init_smb2_neg_rsp(work);
return ret;
}
if (command == SMB_COM_NEGOTIATE) {
if (__smb2_negotiate(conn)) {
- conn->need_neg = true;
init_smb3_11_server(conn);
init_smb2_neg_rsp(work);
ksmbd_debug(SMB, "Upgrade to SMB2 negotiation\n");
diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
index 48cbaa032140..c4978579c541 100644
--- a/fs/ksmbd/smb_common.h
+++ b/fs/ksmbd/smb_common.h
@@ -247,7 +247,7 @@ struct smb_hdr {
struct smb_negotiate_req {
struct smb_hdr hdr; /* wct = 0 */
__le16 ByteCount;
- unsigned char DialectsArray[1];
+ unsigned char DialectsArray[];
} __packed;
struct smb_negotiate_rsp {
@@ -310,14 +310,14 @@ struct file_directory_info {
__le64 AllocationSize;
__le32 ExtFileAttributes;
__le32 FileNameLength;
- char FileName[1];
+ char FileName[];
} __packed; /* level 0x101 FF resp data */
struct file_names_info {
__le32 NextEntryOffset;
__u32 FileIndex;
__le32 FileNameLength;
- char FileName[1];
+ char FileName[];
} __packed; /* level 0xc FF resp data */
struct file_full_directory_info {
@@ -332,7 +332,7 @@ struct file_full_directory_info {
__le32 ExtFileAttributes;
__le32 FileNameLength;
__le32 EaSize;
- char FileName[1];
+ char FileName[];
} __packed; /* level 0x102 FF resp */
struct file_both_directory_info {
@@ -350,7 +350,7 @@ struct file_both_directory_info {
__u8 ShortNameLength;
__u8 Reserved;
__u8 ShortName[24];
- char FileName[1];
+ char FileName[];
} __packed; /* level 0x104 FFrsp data */
struct file_id_both_directory_info {
@@ -370,7 +370,7 @@ struct file_id_both_directory_info {
__u8 ShortName[24];
__le16 Reserved2;
__le64 UniqueId;
- char FileName[1];
+ char FileName[];
} __packed;
struct file_id_full_dir_info {
@@ -387,7 +387,7 @@ struct file_id_full_dir_info {
__le32 EaSize; /* EA size */
__le32 Reserved;
__le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
- char FileName[1];
+ char FileName[];
} __packed; /* level 0x105 FF rsp data */
struct smb_version_values {
@@ -464,12 +464,6 @@ struct smb_version_cmds {
int (*proc)(struct ksmbd_work *swork);
};
-static inline size_t
-smb2_hdr_size_no_buflen(struct smb_version_values *vals)
-{
- return vals->header_size - 4;
-}
-
int ksmbd_min_protocol(void);
int ksmbd_max_protocol(void);
diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c
index 3781bca2c8fc..9ace5027684d 100644
--- a/fs/ksmbd/smbacl.c
+++ b/fs/ksmbd/smbacl.c
@@ -97,7 +97,7 @@ int compare_sids(const struct smb_sid *ctsid, const struct smb_sid *cwsid)
/* compare all of the subauth values if any */
num_sat = ctsid->num_subauth;
num_saw = cwsid->num_subauth;
- num_subauth = num_sat < num_saw ? num_sat : num_saw;
+ num_subauth = min(num_sat, num_saw);
if (num_subauth) {
for (i = 0; i < num_subauth; ++i) {
if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
@@ -991,7 +991,7 @@ static void smb_set_ace(struct smb_ace *ace, const struct smb_sid *sid, u8 type,
}
int smb_inherit_dacl(struct ksmbd_conn *conn,
- struct path *path,
+ const struct path *path,
unsigned int uid, unsigned int gid)
{
const struct smb_sid *psid, *creator = NULL;
@@ -1105,6 +1105,7 @@ pass:
struct smb_acl *pdacl;
struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL;
int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size;
+ int pntsd_alloc_size;
if (parent_pntsd->osidoffset) {
powner_sid = (struct smb_sid *)((char *)parent_pntsd +
@@ -1117,9 +1118,10 @@ pass:
pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4);
}
- pntsd = kzalloc(sizeof(struct smb_ntsd) + powner_sid_size +
- pgroup_sid_size + sizeof(struct smb_acl) +
- nt_size, GFP_KERNEL);
+ pntsd_alloc_size = sizeof(struct smb_ntsd) + powner_sid_size +
+ pgroup_sid_size + sizeof(struct smb_acl) + nt_size;
+
+ pntsd = kzalloc(pntsd_alloc_size, GFP_KERNEL);
if (!pntsd) {
rc = -ENOMEM;
goto free_aces_base;
@@ -1134,6 +1136,27 @@ pass:
pntsd->gsidoffset = parent_pntsd->gsidoffset;
pntsd->dacloffset = parent_pntsd->dacloffset;
+ if ((u64)le32_to_cpu(pntsd->osidoffset) + powner_sid_size >
+ pntsd_alloc_size) {
+ rc = -EINVAL;
+ kfree(pntsd);
+ goto free_aces_base;
+ }
+
+ if ((u64)le32_to_cpu(pntsd->gsidoffset) + pgroup_sid_size >
+ pntsd_alloc_size) {
+ rc = -EINVAL;
+ kfree(pntsd);
+ goto free_aces_base;
+ }
+
+ if ((u64)le32_to_cpu(pntsd->dacloffset) + sizeof(struct smb_acl) + nt_size >
+ pntsd_alloc_size) {
+ rc = -EINVAL;
+ kfree(pntsd);
+ goto free_aces_base;
+ }
+
if (pntsd->osidoffset) {
struct smb_sid *owner_sid = (struct smb_sid *)((char *)pntsd +
le32_to_cpu(pntsd->osidoffset));
@@ -1160,8 +1183,7 @@ pass:
pntsd_size += sizeof(struct smb_acl) + nt_size;
}
- ksmbd_vfs_set_sd_xattr(conn, user_ns,
- path->dentry, pntsd, pntsd_size);
+ ksmbd_vfs_set_sd_xattr(conn, user_ns, path, pntsd, pntsd_size, false);
kfree(pntsd);
}
@@ -1185,7 +1207,7 @@ bool smb_inherit_flags(int flags, bool is_dir)
return false;
}
-int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
__le32 *pdaccess, int uid)
{
struct user_namespace *user_ns = mnt_user_ns(path->mnt);
@@ -1288,7 +1310,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) {
posix_acls = get_acl(d_inode(path->dentry), ACL_TYPE_ACCESS);
- if (posix_acls && !found) {
+ if (!IS_ERR_OR_NULL(posix_acls) && !found) {
unsigned int id = -1;
pa_entry = posix_acls->a_entries;
@@ -1312,7 +1334,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
}
}
}
- if (posix_acls)
+ if (!IS_ERR_OR_NULL(posix_acls))
posix_acl_release(posix_acls);
}
@@ -1352,8 +1374,8 @@ err_out:
}
int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
- struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
- bool type_check)
+ const struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
+ bool type_check, bool get_write)
{
int rc;
struct smb_fattr fattr = {{0}};
@@ -1381,7 +1403,7 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
newattrs.ia_valid |= ATTR_MODE;
newattrs.ia_mode = (inode->i_mode & ~0777) | (fattr.cf_mode & 0777);
- ksmbd_vfs_remove_acl_xattrs(user_ns, path->dentry);
+ ksmbd_vfs_remove_acl_xattrs(user_ns, path);
/* Update posix acls */
if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) {
rc = set_posix_acl(user_ns, inode,
@@ -1412,15 +1434,14 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) {
/* Update WinACL in xattr */
- ksmbd_vfs_remove_sd_xattrs(user_ns, path->dentry);
- ksmbd_vfs_set_sd_xattr(conn, user_ns,
- path->dentry, pntsd, ntsd_len);
+ ksmbd_vfs_remove_sd_xattrs(user_ns, path);
+ ksmbd_vfs_set_sd_xattr(conn, user_ns, path, pntsd, ntsd_len,
+ get_write);
}
out:
posix_acl_release(fattr.cf_acls);
posix_acl_release(fattr.cf_dacls);
- mark_inode_dirty(inode);
return rc;
}
diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h
index fcb2c83f2992..17f81a510f23 100644
--- a/fs/ksmbd/smbacl.h
+++ b/fs/ksmbd/smbacl.h
@@ -201,13 +201,13 @@ void posix_state_to_acl(struct posix_acl_state *state,
struct posix_acl_entry *pace);
int compare_sids(const struct smb_sid *ctsid, const struct smb_sid *cwsid);
bool smb_inherit_flags(int flags, bool is_dir);
-int smb_inherit_dacl(struct ksmbd_conn *conn, struct path *path,
+int smb_inherit_dacl(struct ksmbd_conn *conn, const struct path *path,
unsigned int uid, unsigned int gid);
-int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
__le32 *pdaccess, int uid);
int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
- struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
- bool type_check);
+ const struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
+ bool type_check, bool get_write);
void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid);
void ksmbd_init_domain(u32 *sub_auth);
diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c
index a8313eed4f10..9560c704033e 100644
--- a/fs/ksmbd/transport_ipc.c
+++ b/fs/ksmbd/transport_ipc.c
@@ -228,7 +228,7 @@ static struct ksmbd_ipc_msg *ipc_msg_alloc(size_t sz)
struct ksmbd_ipc_msg *msg;
size_t msg_sz = sz + sizeof(struct ksmbd_ipc_msg);
- msg = kvmalloc(msg_sz, GFP_KERNEL | __GFP_ZERO);
+ msg = kvzalloc(msg_sz, GFP_KERNEL);
if (msg)
msg->sz = sz;
return msg;
@@ -267,7 +267,7 @@ static int handle_response(int type, void *payload, size_t sz)
entry->type + 1, type);
}
- entry->response = kvmalloc(sz, GFP_KERNEL | __GFP_ZERO);
+ entry->response = kvzalloc(sz, GFP_KERNEL);
if (!entry->response) {
ret = -ENOMEM;
break;
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
index 9ca29cdb7898..252a1e7afcc0 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/ksmbd/transport_rdma.c
@@ -5,16 +5,6 @@
*
* Author(s): Long Li <longli@microsoft.com>,
* Hyunchul Lee <hyc.lee@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
*/
#define SUBMOD_NAME "smb_direct"
@@ -34,14 +24,15 @@
#include "smbstatus.h"
#include "transport_rdma.h"
-#define SMB_DIRECT_PORT 5445
+#define SMB_DIRECT_PORT_IWARP 5445
+#define SMB_DIRECT_PORT_INFINIBAND 445
#define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
/* SMB_DIRECT negotiation timeout in seconds */
#define SMB_DIRECT_NEGOTIATE_TIMEOUT 120
-#define SMB_DIRECT_MAX_SEND_SGES 8
+#define SMB_DIRECT_MAX_SEND_SGES 6
#define SMB_DIRECT_MAX_RECV_SGES 1
/*
@@ -60,6 +51,10 @@
* as defined in [MS-SMBD] 3.1.1.1
* Those may change after a SMB_DIRECT negotiation
*/
+
+/* Set 445 port to SMB Direct port by default */
+static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
+
/* The local peer's maximum number of credits to grant to the peer */
static int smb_direct_receive_credit_max = 255;
@@ -67,17 +62,23 @@ static int smb_direct_receive_credit_max = 255;
static int smb_direct_send_credit_target = 255;
/* The maximum single message size can be sent to remote peer */
-static int smb_direct_max_send_size = 8192;
+static int smb_direct_max_send_size = 1364;
/* The maximum fragmented upper-layer payload receive size supported */
static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
/* The maximum single-message size which can be received */
-static int smb_direct_max_receive_size = 8192;
+static int smb_direct_max_receive_size = 1364;
static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
-static int smb_direct_max_outstanding_rw_ops = 8;
+static LIST_HEAD(smb_direct_device_list);
+static DEFINE_RWLOCK(smb_direct_device_lock);
+
+struct smb_direct_device {
+ struct ib_device *ib_dev;
+ struct list_head list;
+};
static struct smb_direct_listener {
struct rdma_cm_id *cm_id;
@@ -134,18 +135,18 @@ struct smb_direct_transport {
atomic_t send_credits;
spinlock_t lock_new_recv_credits;
int new_recv_credits;
- atomic_t rw_avail_ops;
+ int max_rw_credits;
+ int pages_per_rw_credit;
+ atomic_t rw_credits;
wait_queue_head_t wait_send_credits;
- wait_queue_head_t wait_rw_avail_ops;
+ wait_queue_head_t wait_rw_credits;
mempool_t *sendmsg_mempool;
struct kmem_cache *sendmsg_cache;
mempool_t *recvmsg_mempool;
struct kmem_cache *recvmsg_cache;
- wait_queue_head_t wait_send_payload_pending;
- atomic_t send_payload_pending;
wait_queue_head_t wait_send_pending;
atomic_t send_pending;
@@ -195,7 +196,9 @@ struct smb_direct_recvmsg {
struct smb_direct_rdma_rw_msg {
struct smb_direct_transport *t;
struct ib_cqe cqe;
+ int status;
struct completion *completion;
+ struct list_head list;
struct rdma_rw_ctx rw_ctx;
struct sg_table sgt;
struct scatterlist sg_list[0];
@@ -207,6 +210,11 @@ void init_smbd_max_io_size(unsigned int sz)
smb_direct_max_read_write_size = sz;
}
+unsigned int get_smbd_max_read_write_size(void)
+{
+ return smb_direct_max_read_write_size;
+}
+
static inline int get_buf_page_count(void *buf, int size)
{
return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
@@ -370,7 +378,7 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
t->reassembly_queue_length = 0;
init_waitqueue_head(&t->wait_reassembly_queue);
init_waitqueue_head(&t->wait_send_credits);
- init_waitqueue_head(&t->wait_rw_avail_ops);
+ init_waitqueue_head(&t->wait_rw_credits);
spin_lock_init(&t->receive_credit_lock);
spin_lock_init(&t->recvmsg_queue_lock);
@@ -379,8 +387,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
spin_lock_init(&t->empty_recvmsg_queue_lock);
INIT_LIST_HEAD(&t->empty_recvmsg_queue);
- init_waitqueue_head(&t->wait_send_payload_pending);
- atomic_set(&t->send_payload_pending, 0);
init_waitqueue_head(&t->wait_send_pending);
atomic_set(&t->send_pending, 0);
@@ -410,8 +416,6 @@ static void free_transport(struct smb_direct_transport *t)
wake_up_interruptible(&t->wait_send_credits);
ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
- wait_event(t->wait_send_payload_pending,
- atomic_read(&t->send_payload_pending) == 0);
wait_event(t->wait_send_pending,
atomic_read(&t->send_pending) == 0);
@@ -421,6 +425,7 @@ static void free_transport(struct smb_direct_transport *t)
if (t->qp) {
ib_drain_qp(t->qp);
+ ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
ib_destroy_qp(t->qp);
}
@@ -490,7 +495,7 @@ static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
struct smb_direct_data_transfer *req =
(struct smb_direct_data_transfer *)recvmsg->packet;
struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
- + le32_to_cpu(req->data_offset) - 4);
+ + le32_to_cpu(req->data_offset));
ksmbd_debug(RDMA,
"CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
le16_to_cpu(req->credits_granted),
@@ -561,6 +566,8 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
}
t->negotiation_requested = true;
t->full_packet_received = true;
+ t->status = SMB_DIRECT_CS_CONNECTED;
+ enqueue_reassembly(t, recvmsg, 0);
wake_up_interruptible(&t->wait_status);
break;
case SMB_DIRECT_MSG_DATA_TRANSFER: {
@@ -864,13 +871,8 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc)
smb_direct_disconnect_rdma_connection(t);
}
- if (sendmsg->num_sge > 1) {
- if (atomic_dec_and_test(&t->send_payload_pending))
- wake_up(&t->wait_send_payload_pending);
- } else {
- if (atomic_dec_and_test(&t->send_pending))
- wake_up(&t->wait_send_pending);
- }
+ if (atomic_dec_and_test(&t->send_pending))
+ wake_up(&t->wait_send_pending);
/* iterate and free the list of messages in reverse. the list's head
* is invalid.
@@ -902,21 +904,12 @@ static int smb_direct_post_send(struct smb_direct_transport *t,
{
int ret;
- if (wr->num_sge > 1)
- atomic_inc(&t->send_payload_pending);
- else
- atomic_inc(&t->send_pending);
-
+ atomic_inc(&t->send_pending);
ret = ib_post_send(t->qp, wr, NULL);
if (ret) {
pr_err("failed to post send: %d\n", ret);
- if (wr->num_sge > 1) {
- if (atomic_dec_and_test(&t->send_payload_pending))
- wake_up(&t->wait_send_payload_pending);
- } else {
- if (atomic_dec_and_test(&t->send_pending))
- wake_up(&t->wait_send_pending);
- }
+ if (atomic_dec_and_test(&t->send_pending))
+ wake_up(&t->wait_send_pending);
smb_direct_disconnect_rdma_connection(t);
}
return ret;
@@ -974,18 +967,19 @@ static int smb_direct_flush_send_list(struct smb_direct_transport *t,
}
static int wait_for_credits(struct smb_direct_transport *t,
- wait_queue_head_t *waitq, atomic_t *credits)
+ wait_queue_head_t *waitq, atomic_t *total_credits,
+ int needed)
{
int ret;
do {
- if (atomic_dec_return(credits) >= 0)
+ if (atomic_sub_return(needed, total_credits) >= 0)
return 0;
- atomic_inc(credits);
+ atomic_add(needed, total_credits);
ret = wait_event_interruptible(*waitq,
- atomic_read(credits) > 0 ||
- t->status != SMB_DIRECT_CS_CONNECTED);
+ atomic_read(total_credits) >= needed ||
+ t->status != SMB_DIRECT_CS_CONNECTED);
if (t->status != SMB_DIRECT_CS_CONNECTED)
return -ENOTCONN;
@@ -1006,7 +1000,19 @@ static int wait_for_send_credits(struct smb_direct_transport *t,
return ret;
}
- return wait_for_credits(t, &t->wait_send_credits, &t->send_credits);
+ return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1);
+}
+
+static int wait_for_rw_credits(struct smb_direct_transport *t, int credits)
+{
+ return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits);
+}
+
+static int calc_rw_credits(struct smb_direct_transport *t,
+ char *buf, unsigned int len)
+{
+ return DIV_ROUND_UP(get_buf_page_count(buf, len),
+ t->pages_per_rw_credit);
}
static int smb_direct_create_header(struct smb_direct_transport *t,
@@ -1077,7 +1083,7 @@ static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nen
int offset, len;
int i = 0;
- if (nentries < get_buf_page_count(buf, size))
+ if (size <= 0 || nentries < get_buf_page_count(buf, size))
return -EINVAL;
offset = offset_in_page(buf);
@@ -1109,7 +1115,7 @@ static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
int npages;
npages = get_sg_list(buf, size, sg_list, nentries);
- if (npages <= 0)
+ if (npages < 0)
return -EINVAL;
return ib_dma_map_sg(device, sg_list, npages, dir);
}
@@ -1235,14 +1241,12 @@ static int smb_direct_writev(struct ksmbd_transport *t,
//FIXME: skip RFC1002 header..
buflen -= 4;
- iov[0].iov_base += 4;
- iov[0].iov_len -= 4;
remaining_data_length = buflen;
ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
- start = i = 0;
+ start = i = 1;
buflen = 0;
while (true) {
buflen += iov[i].iov_len;
@@ -1304,11 +1308,21 @@ done:
* that means all the I/Os have been out and we are good to return
*/
- wait_event(st->wait_send_payload_pending,
- atomic_read(&st->send_payload_pending) == 0);
+ wait_event(st->wait_send_pending,
+ atomic_read(&st->send_pending) == 0);
return ret;
}
+static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t,
+ struct smb_direct_rdma_rw_msg *msg,
+ enum dma_data_direction dir)
+{
+ rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
+ msg->sgt.sgl, msg->sgt.nents, dir);
+ sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
+ kfree(msg);
+}
+
static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
enum dma_data_direction dir)
{
@@ -1317,19 +1331,14 @@ static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
struct smb_direct_transport *t = msg->t;
if (wc->status != IB_WC_SUCCESS) {
+ msg->status = -EIO;
pr_err("read/write error. opcode = %d, status = %s(%d)\n",
wc->opcode, ib_wc_status_msg(wc->status), wc->status);
- smb_direct_disconnect_rdma_connection(t);
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ smb_direct_disconnect_rdma_connection(t);
}
- if (atomic_inc_return(&t->rw_avail_ops) > 0)
- wake_up(&t->wait_rw_avail_ops);
-
- rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
- msg->sg_list, msg->sgt.nents, dir);
- sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
complete(msg->completion);
- kfree(msg);
}
static void read_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1342,94 +1351,152 @@ static void write_done(struct ib_cq *cq, struct ib_wc *wc)
read_write_done(cq, wc, DMA_TO_DEVICE);
}
-static int smb_direct_rdma_xmit(struct smb_direct_transport *t, void *buf,
- int buf_len, u32 remote_key, u64 remote_offset,
- u32 remote_len, bool is_read)
+static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
+ void *buf, int buf_len,
+ struct smb2_buffer_desc_v1 *desc,
+ unsigned int desc_len,
+ bool is_read)
{
- struct smb_direct_rdma_rw_msg *msg;
- int ret;
+ struct smb_direct_rdma_rw_msg *msg, *next_msg;
+ int i, ret;
DECLARE_COMPLETION_ONSTACK(completion);
- struct ib_send_wr *first_wr = NULL;
+ struct ib_send_wr *first_wr;
+ LIST_HEAD(msg_list);
+ char *desc_buf;
+ int credits_needed;
+ unsigned int desc_buf_len, desc_num = 0;
+
+ if (t->status != SMB_DIRECT_CS_CONNECTED)
+ return -ENOTCONN;
+
+ if (buf_len > t->max_rdma_rw_size)
+ return -EINVAL;
+
+ /* calculate needed credits */
+ credits_needed = 0;
+ desc_buf = buf;
+ for (i = 0; i < desc_len / sizeof(*desc); i++) {
+ if (!buf_len)
+ break;
+
+ desc_buf_len = le32_to_cpu(desc[i].length);
+ if (!desc_buf_len)
+ return -EINVAL;
+
+ if (desc_buf_len > buf_len) {
+ desc_buf_len = buf_len;
+ desc[i].length = cpu_to_le32(desc_buf_len);
+ buf_len = 0;
+ }
+
+ credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len);
+ desc_buf += desc_buf_len;
+ buf_len -= desc_buf_len;
+ desc_num++;
+ }
+
+ ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n",
+ is_read ? "read" : "write", buf_len, credits_needed);
- ret = wait_for_credits(t, &t->wait_rw_avail_ops, &t->rw_avail_ops);
+ ret = wait_for_rw_credits(t, credits_needed);
if (ret < 0)
return ret;
- /* TODO: mempool */
- msg = kmalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
- sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
- if (!msg) {
- atomic_inc(&t->rw_avail_ops);
- return -ENOMEM;
- }
+ /* build rdma_rw_ctx for each descriptor */
+ desc_buf = buf;
+ for (i = 0; i < desc_num; i++) {
+ msg = kzalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
+ sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
- msg->sgt.sgl = &msg->sg_list[0];
- ret = sg_alloc_table_chained(&msg->sgt,
- get_buf_page_count(buf, buf_len),
- msg->sg_list, SG_CHUNK_SIZE);
- if (ret) {
- atomic_inc(&t->rw_avail_ops);
- kfree(msg);
- return -ENOMEM;
- }
+ desc_buf_len = le32_to_cpu(desc[i].length);
- ret = get_sg_list(buf, buf_len, msg->sgt.sgl, msg->sgt.orig_nents);
- if (ret <= 0) {
- pr_err("failed to get pages\n");
- goto err;
- }
+ msg->t = t;
+ msg->cqe.done = is_read ? read_done : write_done;
+ msg->completion = &completion;
- ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
- msg->sg_list, get_buf_page_count(buf, buf_len),
- 0, remote_offset, remote_key,
- is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
- if (ret < 0) {
- pr_err("failed to init rdma_rw_ctx: %d\n", ret);
- goto err;
+ msg->sgt.sgl = &msg->sg_list[0];
+ ret = sg_alloc_table_chained(&msg->sgt,
+ get_buf_page_count(desc_buf, desc_buf_len),
+ msg->sg_list, SG_CHUNK_SIZE);
+ if (ret) {
+ kfree(msg);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = get_sg_list(desc_buf, desc_buf_len,
+ msg->sgt.sgl, msg->sgt.orig_nents);
+ if (ret < 0) {
+ sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
+ kfree(msg);
+ goto out;
+ }
+
+ ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
+ msg->sgt.sgl,
+ get_buf_page_count(desc_buf, desc_buf_len),
+ 0,
+ le64_to_cpu(desc[i].offset),
+ le32_to_cpu(desc[i].token),
+ is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ if (ret < 0) {
+ pr_err("failed to init rdma_rw_ctx: %d\n", ret);
+ sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
+ kfree(msg);
+ goto out;
+ }
+
+ list_add_tail(&msg->list, &msg_list);
+ desc_buf += desc_buf_len;
}
- msg->t = t;
- msg->cqe.done = is_read ? read_done : write_done;
- msg->completion = &completion;
- first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
- &msg->cqe, NULL);
+ /* concatenate work requests of rdma_rw_ctxs */
+ first_wr = NULL;
+ list_for_each_entry_reverse(msg, &msg_list, list) {
+ first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
+ &msg->cqe, first_wr);
+ }
ret = ib_post_send(t->qp, first_wr, NULL);
if (ret) {
- pr_err("failed to post send wr: %d\n", ret);
- goto err;
+ pr_err("failed to post send wr for RDMA R/W: %d\n", ret);
+ goto out;
}
+ msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list);
wait_for_completion(&completion);
- return 0;
-
-err:
- atomic_inc(&t->rw_avail_ops);
- if (first_wr)
- rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
- msg->sg_list, msg->sgt.nents,
- is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
- sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
- kfree(msg);
+ ret = msg->status;
+out:
+ list_for_each_entry_safe(msg, next_msg, &msg_list, list) {
+ list_del(&msg->list);
+ smb_direct_free_rdma_rw_msg(t, msg,
+ is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ }
+ atomic_add(credits_needed, &t->rw_credits);
+ wake_up(&t->wait_rw_credits);
return ret;
}
-static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf,
- unsigned int buflen, u32 remote_key,
- u64 remote_offset, u32 remote_len)
+static int smb_direct_rdma_write(struct ksmbd_transport *t,
+ void *buf, unsigned int buflen,
+ struct smb2_buffer_desc_v1 *desc,
+ unsigned int desc_len)
{
return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
- remote_key, remote_offset,
- remote_len, false);
+ desc, desc_len, false);
}
-static int smb_direct_rdma_read(struct ksmbd_transport *t, void *buf,
- unsigned int buflen, u32 remote_key,
- u64 remote_offset, u32 remote_len)
+static int smb_direct_rdma_read(struct ksmbd_transport *t,
+ void *buf, unsigned int buflen,
+ struct smb2_buffer_desc_v1 *desc,
+ unsigned int desc_len)
{
return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
- remote_key, remote_offset,
- remote_len, true);
+ desc, desc_len, true);
}
static void smb_direct_disconnect(struct ksmbd_transport *t)
@@ -1444,6 +1511,15 @@ static void smb_direct_disconnect(struct ksmbd_transport *t)
free_transport(st);
}
+static void smb_direct_shutdown(struct ksmbd_transport *t)
+{
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+
+ ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id);
+
+ smb_direct_disconnect_rdma_work(&st->disconnect_work);
+}
+
static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@@ -1460,6 +1536,8 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
}
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_DISCONNECTED: {
+ ib_drain_qp(t->qp);
+
t->status = SMB_DIRECT_CS_DISCONNECTED;
wake_up_interruptible(&t->wait_status);
wake_up_interruptible(&t->wait_reassembly_queue);
@@ -1587,19 +1665,13 @@ static int smb_direct_accept_client(struct smb_direct_transport *t)
pr_err("error at rdma_accept: %d\n", ret);
return ret;
}
-
- wait_event_interruptible(t->wait_status,
- t->status != SMB_DIRECT_CS_NEW);
- if (t->status != SMB_DIRECT_CS_CONNECTED)
- return -ENOTCONN;
return 0;
}
-static int smb_direct_negotiate(struct smb_direct_transport *t)
+static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
{
int ret;
struct smb_direct_recvmsg *recvmsg;
- struct smb_direct_negotiate_req *req;
recvmsg = get_free_recvmsg(t);
if (!recvmsg)
@@ -1609,82 +1681,74 @@ static int smb_direct_negotiate(struct smb_direct_transport *t)
ret = smb_direct_post_recv(t, recvmsg);
if (ret) {
pr_err("Can't post recv: %d\n", ret);
- goto out;
+ goto out_err;
}
t->negotiation_requested = false;
ret = smb_direct_accept_client(t);
if (ret) {
pr_err("Can't accept client\n");
- goto out;
+ goto out_err;
}
smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
-
- ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
- ret = wait_event_interruptible_timeout(t->wait_status,
- t->negotiation_requested ||
- t->status == SMB_DIRECT_CS_DISCONNECTED,
- SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
- if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) {
- ret = ret < 0 ? ret : -ETIMEDOUT;
- goto out;
- }
-
- ret = smb_direct_check_recvmsg(recvmsg);
- if (ret == -ECONNABORTED)
- goto out;
-
- req = (struct smb_direct_negotiate_req *)recvmsg->packet;
- t->max_recv_size = min_t(int, t->max_recv_size,
- le32_to_cpu(req->preferred_send_size));
- t->max_send_size = min_t(int, t->max_send_size,
- le32_to_cpu(req->max_receive_size));
- t->max_fragmented_send_size =
- le32_to_cpu(req->max_fragmented_size);
-
- ret = smb_direct_send_negotiate_response(t, ret);
-out:
- if (recvmsg)
- put_recvmsg(t, recvmsg);
+ return 0;
+out_err:
+ put_recvmsg(t, recvmsg);
return ret;
}
+static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t)
+{
+ return min_t(unsigned int,
+ t->cm_id->device->attrs.max_fast_reg_page_list_len,
+ 256);
+}
+
static int smb_direct_init_params(struct smb_direct_transport *t,
struct ib_qp_cap *cap)
{
struct ib_device *device = t->cm_id->device;
- int max_send_sges, max_pages, max_rw_wrs, max_send_wrs;
+ int max_send_sges, max_rw_wrs, max_send_wrs;
+ unsigned int max_sge_per_wr, wrs_per_credit;
- /* need 2 more sge. because a SMB_DIRECT header will be mapped,
- * and maybe a send buffer could be not page aligned.
+ /* need 3 more sge. because a SMB_DIRECT header, SMB2 header,
+ * SMB2 response could be mapped.
*/
t->max_send_size = smb_direct_max_send_size;
- max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 2;
+ max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3;
if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
pr_err("max_send_size %d is too large\n", t->max_send_size);
return -EINVAL;
}
- /*
- * allow smb_direct_max_outstanding_rw_ops of in-flight RDMA
- * read/writes. HCA guarantees at least max_send_sge of sges for
- * a RDMA read/write work request, and if memory registration is used,
- * we need reg_mr, local_inv wrs for each read/write.
+ /* Calculate the number of work requests for RDMA R/W.
+ * The maximum number of pages which can be registered
+ * with one Memory region can be transferred with one
+ * R/W credit. And at least 4 work requests for each credit
+ * are needed for MR registration, RDMA R/W, local & remote
+ * MR invalidation.
*/
t->max_rdma_rw_size = smb_direct_max_read_write_size;
- max_pages = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
- max_rw_wrs = DIV_ROUND_UP(max_pages, SMB_DIRECT_MAX_SEND_SGES);
- max_rw_wrs += rdma_rw_mr_factor(device, t->cm_id->port_num,
- max_pages) * 2;
- max_rw_wrs *= smb_direct_max_outstanding_rw_ops;
+ t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t);
+ t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size,
+ (t->pages_per_rw_credit - 1) *
+ PAGE_SIZE);
+
+ max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge,
+ device->attrs.max_sge_rd);
+ max_sge_per_wr = max_t(unsigned int, max_sge_per_wr,
+ max_send_sges);
+ wrs_per_credit = max_t(unsigned int, 4,
+ DIV_ROUND_UP(t->pages_per_rw_credit,
+ max_sge_per_wr) + 1);
+ max_rw_wrs = t->max_rw_credits * wrs_per_credit;
max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
if (max_send_wrs > device->attrs.max_cqe ||
max_send_wrs > device->attrs.max_qp_wr) {
- pr_err("consider lowering send_credit_target = %d, or max_outstanding_rw_ops = %d\n",
- smb_direct_send_credit_target,
- smb_direct_max_outstanding_rw_ops);
+ pr_err("consider lowering send_credit_target = %d\n",
+ smb_direct_send_credit_target);
pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
device->attrs.max_cqe, device->attrs.max_qp_wr);
return -EINVAL;
@@ -1699,11 +1763,6 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
return -EINVAL;
}
- if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) {
- pr_err("warning: device max_send_sge = %d too small\n",
- device->attrs.max_send_sge);
- return -EINVAL;
- }
if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
pr_err("warning: device max_recv_sge = %d too small\n",
device->attrs.max_recv_sge);
@@ -1719,7 +1778,7 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
t->send_credit_target = smb_direct_send_credit_target;
atomic_set(&t->send_credits, 0);
- atomic_set(&t->rw_avail_ops, smb_direct_max_outstanding_rw_ops);
+ atomic_set(&t->rw_credits, t->max_rw_credits);
t->max_send_size = smb_direct_max_send_size;
t->max_recv_size = smb_direct_max_receive_size;
@@ -1727,10 +1786,10 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
cap->max_send_wr = max_send_wrs;
cap->max_recv_wr = t->recv_credit_max;
- cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
+ cap->max_send_sge = max_sge_per_wr;
cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
cap->max_inline_data = 0;
- cap->max_rdma_ctxs = 0;
+ cap->max_rdma_ctxs = t->max_rw_credits;
return 0;
}
@@ -1812,6 +1871,7 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
{
int ret;
struct ib_qp_init_attr qp_attr;
+ int pages_per_rw;
t->pd = ib_alloc_pd(t->cm_id->device, 0);
if (IS_ERR(t->pd)) {
@@ -1822,7 +1882,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
}
t->send_cq = ib_alloc_cq(t->cm_id->device, t,
- t->send_credit_target, 0, IB_POLL_WORKQUEUE);
+ smb_direct_send_credit_target + cap->max_rdma_ctxs,
+ 0, IB_POLL_WORKQUEUE);
if (IS_ERR(t->send_cq)) {
pr_err("Can't create RDMA send CQ\n");
ret = PTR_ERR(t->send_cq);
@@ -1831,8 +1892,7 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
}
t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
- cap->max_send_wr + cap->max_rdma_ctxs,
- 0, IB_POLL_WORKQUEUE);
+ t->recv_credit_max, 0, IB_POLL_WORKQUEUE);
if (IS_ERR(t->recv_cq)) {
pr_err("Can't create RDMA recv CQ\n");
ret = PTR_ERR(t->recv_cq);
@@ -1859,6 +1919,18 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
t->qp = t->cm_id->qp;
t->cm_id->event_handler = smb_direct_cm_handler;
+ pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
+ if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
+ ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs,
+ t->max_rw_credits, IB_MR_TYPE_MEM_REG,
+ t->pages_per_rw_credit, 0);
+ if (ret) {
+ pr_err("failed to init mr pool count %d pages %d\n",
+ t->max_rw_credits, t->pages_per_rw_credit);
+ goto err;
+ }
+ }
+
return 0;
err:
if (t->qp) {
@@ -1883,6 +1955,49 @@ err:
static int smb_direct_prepare(struct ksmbd_transport *t)
{
struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+ struct smb_direct_recvmsg *recvmsg;
+ struct smb_direct_negotiate_req *req;
+ int ret;
+
+ ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
+ ret = wait_event_interruptible_timeout(st->wait_status,
+ st->negotiation_requested ||
+ st->status == SMB_DIRECT_CS_DISCONNECTED,
+ SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
+ if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED)
+ return ret < 0 ? ret : -ETIMEDOUT;
+
+ recvmsg = get_first_reassembly(st);
+ if (!recvmsg)
+ return -ECONNABORTED;
+
+ ret = smb_direct_check_recvmsg(recvmsg);
+ if (ret == -ECONNABORTED)
+ goto out;
+
+ req = (struct smb_direct_negotiate_req *)recvmsg->packet;
+ st->max_recv_size = min_t(int, st->max_recv_size,
+ le32_to_cpu(req->preferred_send_size));
+ st->max_send_size = min_t(int, st->max_send_size,
+ le32_to_cpu(req->max_receive_size));
+ st->max_fragmented_send_size =
+ le32_to_cpu(req->max_fragmented_size);
+ st->max_fragmented_recv_size =
+ (st->recv_credit_max * st->max_recv_size) / 2;
+
+ ret = smb_direct_send_negotiate_response(st, ret);
+out:
+ spin_lock_irq(&st->reassembly_queue_lock);
+ st->reassembly_queue_length--;
+ list_del(&recvmsg->list);
+ spin_unlock_irq(&st->reassembly_queue_lock);
+ put_recvmsg(st, recvmsg);
+
+ return ret;
+}
+
+static int smb_direct_connect(struct smb_direct_transport *st)
+{
int ret;
struct ib_qp_cap qp_cap;
@@ -1904,13 +2019,11 @@ static int smb_direct_prepare(struct ksmbd_transport *t)
return ret;
}
- ret = smb_direct_negotiate(st);
+ ret = smb_direct_prepare_negotiation(st);
if (ret) {
pr_err("Can't negotiate: %d\n", ret);
return ret;
}
-
- st->status = SMB_DIRECT_CS_CONNECTED;
return 0;
}
@@ -1926,6 +2039,7 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
{
struct smb_direct_transport *t;
+ int ret;
if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
ksmbd_debug(RDMA,
@@ -1938,18 +2052,23 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
if (!t)
return -ENOMEM;
+ ret = smb_direct_connect(t);
+ if (ret)
+ goto out_err;
+
KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
KSMBD_TRANS(t)->conn, "ksmbd:r%u",
- SMB_DIRECT_PORT);
+ smb_direct_port);
if (IS_ERR(KSMBD_TRANS(t)->handler)) {
- int ret = PTR_ERR(KSMBD_TRANS(t)->handler);
-
+ ret = PTR_ERR(KSMBD_TRANS(t)->handler);
pr_err("Can't start thread\n");
- free_transport(t);
- return ret;
+ goto out_err;
}
return 0;
+out_err:
+ free_transport(t);
+ return ret;
}
static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
@@ -2013,12 +2132,64 @@ err:
return ret;
}
+static int smb_direct_ib_client_add(struct ib_device *ib_dev)
+{
+ struct smb_direct_device *smb_dev;
+
+ /* Set 5445 port if device type is iWARP(No IB) */
+ if (ib_dev->node_type != RDMA_NODE_IB_CA)
+ smb_direct_port = SMB_DIRECT_PORT_IWARP;
+
+ if (!rdma_frwr_is_supported(&ib_dev->attrs))
+ return 0;
+
+ smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
+ if (!smb_dev)
+ return -ENOMEM;
+ smb_dev->ib_dev = ib_dev;
+
+ write_lock(&smb_direct_device_lock);
+ list_add(&smb_dev->list, &smb_direct_device_list);
+ write_unlock(&smb_direct_device_lock);
+
+ ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
+ return 0;
+}
+
+static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
+ void *client_data)
+{
+ struct smb_direct_device *smb_dev, *tmp;
+
+ write_lock(&smb_direct_device_lock);
+ list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
+ if (smb_dev->ib_dev == ib_dev) {
+ list_del(&smb_dev->list);
+ kfree(smb_dev);
+ break;
+ }
+ }
+ write_unlock(&smb_direct_device_lock);
+}
+
+static struct ib_client smb_direct_ib_client = {
+ .name = "ksmbd_smb_direct_ib",
+ .add = smb_direct_ib_client_add,
+ .remove = smb_direct_ib_client_remove,
+};
+
int ksmbd_rdma_init(void)
{
int ret;
smb_direct_listener.cm_id = NULL;
+ ret = ib_register_client(&smb_direct_ib_client);
+ if (ret) {
+ pr_err("failed to ib_register_client\n");
+ return ret;
+ }
+
/* When a client is running out of send credits, the credits are
* granted by the server's sending a packet using this queue.
* This avoids the situation that a clients cannot send packets
@@ -2029,7 +2200,7 @@ int ksmbd_rdma_init(void)
if (!smb_direct_wq)
return -ENOMEM;
- ret = smb_direct_listen(SMB_DIRECT_PORT);
+ ret = smb_direct_listen(smb_direct_port);
if (ret) {
destroy_workqueue(smb_direct_wq);
smb_direct_wq = NULL;
@@ -2042,37 +2213,88 @@ int ksmbd_rdma_init(void)
return 0;
}
-int ksmbd_rdma_destroy(void)
+void ksmbd_rdma_destroy(void)
{
- if (smb_direct_listener.cm_id)
- rdma_destroy_id(smb_direct_listener.cm_id);
+ if (!smb_direct_listener.cm_id)
+ return;
+
+ ib_unregister_client(&smb_direct_ib_client);
+ rdma_destroy_id(smb_direct_listener.cm_id);
+
smb_direct_listener.cm_id = NULL;
if (smb_direct_wq) {
- flush_workqueue(smb_direct_wq);
destroy_workqueue(smb_direct_wq);
smb_direct_wq = NULL;
}
- return 0;
}
bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
{
- struct ib_device *ibdev;
+ struct smb_direct_device *smb_dev;
+ int i;
bool rdma_capable = false;
- ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
- if (ibdev) {
- if (rdma_frwr_is_supported(&ibdev->attrs))
- rdma_capable = true;
- ib_device_put(ibdev);
+ read_lock(&smb_direct_device_lock);
+ list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
+ for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
+ struct net_device *ndev;
+
+ if (smb_dev->ib_dev->ops.get_netdev) {
+ ndev = smb_dev->ib_dev->ops.get_netdev(
+ smb_dev->ib_dev, i + 1);
+ if (!ndev)
+ continue;
+
+ if (ndev == netdev) {
+ dev_put(ndev);
+ rdma_capable = true;
+ goto out;
+ }
+ dev_put(ndev);
+ /* if ib_dev does not implement ops.get_netdev
+ * check for matching infiniband GUID in hw_addr
+ */
+ } else if (netdev->type == ARPHRD_INFINIBAND) {
+ struct netdev_hw_addr *ha;
+ union ib_gid gid;
+ u32 port_num;
+ int ret;
+
+ netdev_hw_addr_list_for_each(
+ ha, &netdev->dev_addrs) {
+ memcpy(&gid, ha->addr + 4, sizeof(gid));
+ ret = ib_find_gid(smb_dev->ib_dev, &gid,
+ &port_num, NULL);
+ if (!ret) {
+ rdma_capable = true;
+ goto out;
+ }
+ }
+ }
+ }
}
+out:
+ read_unlock(&smb_direct_device_lock);
+
+ if (rdma_capable == false) {
+ struct ib_device *ibdev;
+
+ ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
+ if (ibdev) {
+ if (rdma_frwr_is_supported(&ibdev->attrs))
+ rdma_capable = true;
+ ib_device_put(ibdev);
+ }
+ }
+
return rdma_capable;
}
static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
.prepare = smb_direct_prepare,
.disconnect = smb_direct_disconnect,
+ .shutdown = smb_direct_shutdown,
.writev = smb_direct_writev,
.read = smb_direct_read,
.rdma_read = smb_direct_rdma_read,
diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h
index 04a7a37685c3..77aee4e5c9dc 100644
--- a/fs/ksmbd/transport_rdma.h
+++ b/fs/ksmbd/transport_rdma.h
@@ -7,8 +7,6 @@
#ifndef __KSMBD_TRANSPORT_RDMA_H__
#define __KSMBD_TRANSPORT_RDMA_H__
-#define SMB_DIRECT_PORT 5445
-
#define SMBD_DEFAULT_IOSIZE (8 * 1024 * 1024)
#define SMBD_MIN_IOSIZE (512 * 1024)
#define SMBD_MAX_IOSIZE (16 * 1024 * 1024)
@@ -56,14 +54,16 @@ struct smb_direct_data_transfer {
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
int ksmbd_rdma_init(void);
-int ksmbd_rdma_destroy(void);
+void ksmbd_rdma_destroy(void);
bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
void init_smbd_max_io_size(unsigned int sz);
+unsigned int get_smbd_max_read_write_size(void);
#else
static inline int ksmbd_rdma_init(void) { return 0; }
static inline int ksmbd_rdma_destroy(void) { return 0; }
static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; }
static inline void init_smbd_max_io_size(unsigned int sz) { }
+static inline unsigned int get_smbd_max_read_write_size(void) { return 0; }
#endif
#endif /* __KSMBD_TRANSPORT_RDMA_H__ */
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
index d1d7954368a5..eff7a1d793f0 100644
--- a/fs/ksmbd/transport_tcp.c
+++ b/fs/ksmbd/transport_tcp.c
@@ -333,7 +333,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
if (length == -EINTR) {
total_read = -ESHUTDOWN;
break;
- } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) {
+ } else if (ksmbd_conn_need_reconnect(conn)) {
total_read = -EAGAIN;
break;
} else if (length == -ERESTARTSYS || length == -EAGAIN) {
@@ -428,7 +428,8 @@ static int create_socket(struct interface *iface)
ret = sock_create(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &ksmbd_socket);
if (ret) {
- pr_err("Can't create socket for ipv6, try ipv4: %d\n", ret);
+ if (ret != -EAFNOSUPPORT)
+ pr_err("Can't create socket for ipv6, fallback to ipv4: %d\n", ret);
ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP,
&ksmbd_socket);
if (ret) {
@@ -505,7 +506,7 @@ static int ksmbd_netdev_event(struct notifier_block *nb, unsigned long event,
switch (event) {
case NETDEV_UP:
- if (netdev->priv_flags & IFF_BRIDGE_PORT)
+ if (netif_is_bridge_port(netdev))
return NOTIFY_OK;
list_for_each_entry(iface, &iface_list, entry) {
@@ -614,7 +615,7 @@ int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz)
rtnl_lock();
for_each_netdev(&init_net, netdev) {
- if (netdev->priv_flags & IFF_BRIDGE_PORT)
+ if (netif_is_bridge_port(netdev))
continue;
if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL)))
return -ENOMEM;
diff --git a/fs/ksmbd/unicode.c b/fs/ksmbd/unicode.c
index a0db699ddafd..33fc6d45c0f3 100644
--- a/fs/ksmbd/unicode.c
+++ b/fs/ksmbd/unicode.c
@@ -15,45 +15,9 @@
#include "smb_common.h"
/*
- * smb_utf16_bytes() - how long will a string be after conversion?
- * @from: pointer to input string
- * @maxbytes: don't go past this many bytes of input string
- * @codepage: destination codepage
- *
- * Walk a utf16le string and return the number of bytes that the string will
- * be after being converted to the given charset, not including any null
- * termination required. Don't walk past maxbytes in the source buffer.
- *
- * Return: string length after conversion
- */
-static int smb_utf16_bytes(const __le16 *from, int maxbytes,
- const struct nls_table *codepage)
-{
- int i;
- int charlen, outlen = 0;
- int maxwords = maxbytes / 2;
- char tmp[NLS_MAX_CHARSET_SIZE];
- __u16 ftmp;
-
- for (i = 0; i < maxwords; i++) {
- ftmp = get_unaligned_le16(&from[i]);
- if (ftmp == 0)
- break;
-
- charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
- if (charlen > 0)
- outlen += charlen;
- else
- outlen++;
- }
-
- return outlen;
-}
-
-/*
* cifs_mapchar() - convert a host-endian char to proper char in codepage
* @target: where converted character should be copied
- * @src_char: 2 byte host-endian source character
+ * @from: host-endian source string
* @cp: codepage to which character should be converted
* @mapchar: should character be mapped according to mapchars mount option?
*
@@ -64,10 +28,13 @@ static int smb_utf16_bytes(const __le16 *from, int maxbytes,
* Return: string length after conversion
*/
static int
-cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
+cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
bool mapchar)
{
int len = 1;
+ __u16 src_char;
+
+ src_char = *from;
if (!mapchar)
goto cp_convert;
@@ -105,30 +72,66 @@ out:
cp_convert:
len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
- if (len <= 0) {
- *target = '?';
- len = 1;
- }
+ if (len <= 0)
+ goto surrogate_pair;
+
+ goto out;
+
+surrogate_pair:
+ /* convert SURROGATE_PAIR and IVS */
+ if (strcmp(cp->charset, "utf8"))
+ goto unknown;
+ len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
+ if (len <= 0)
+ goto unknown;
+ return len;
+unknown:
+ *target = '?';
+ len = 1;
goto out;
}
/*
- * is_char_allowed() - check for valid character
- * @ch: input character to be checked
+ * smb_utf16_bytes() - compute converted string length
+ * @from: pointer to input string
+ * @maxbytes: input string length
+ * @codepage: destination codepage
+ *
+ * Walk a utf16le string and return the number of bytes that the string will
+ * be after being converted to the given charset, not including any null
+ * termination required. Don't walk past maxbytes in the source buffer.
*
- * Return: 1 if char is allowed, otherwise 0
+ * Return: string length after conversion
*/
-static inline int is_char_allowed(char *ch)
+static int smb_utf16_bytes(const __le16 *from, int maxbytes,
+ const struct nls_table *codepage)
{
- /* check for control chars, wildcards etc. */
- if (!(*ch & 0x80) &&
- (*ch <= 0x1f ||
- *ch == '?' || *ch == '"' || *ch == '<' ||
- *ch == '>' || *ch == '|'))
- return 0;
-
- return 1;
+ int i, j;
+ int charlen, outlen = 0;
+ int maxwords = maxbytes / 2;
+ char tmp[NLS_MAX_CHARSET_SIZE];
+ __u16 ftmp[3];
+
+ for (i = 0; i < maxwords; i++) {
+ ftmp[0] = get_unaligned_le16(&from[i]);
+ if (ftmp[0] == 0)
+ break;
+ for (j = 1; j <= 2; j++) {
+ if (i + j < maxwords)
+ ftmp[j] = get_unaligned_le16(&from[i + j]);
+ else
+ ftmp[j] = 0;
+ }
+
+ charlen = cifs_mapchar(tmp, ftmp, codepage, 0);
+ if (charlen > 0)
+ outlen += charlen;
+ else
+ outlen++;
+ }
+
+ return outlen;
}
/*
@@ -158,12 +161,12 @@ static inline int is_char_allowed(char *ch)
static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
const struct nls_table *codepage, bool mapchar)
{
- int i, charlen, safelen;
+ int i, j, charlen, safelen;
int outlen = 0;
int nullsize = nls_nullsize(codepage);
int fromwords = fromlen / 2;
char tmp[NLS_MAX_CHARSET_SIZE];
- __u16 ftmp;
+ __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
/*
* because the chars can be of varying widths, we need to take care
@@ -174,9 +177,15 @@ static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
for (i = 0; i < fromwords; i++) {
- ftmp = get_unaligned_le16(&from[i]);
- if (ftmp == 0)
+ ftmp[0] = get_unaligned_le16(&from[i]);
+ if (ftmp[0] == 0)
break;
+ for (j = 1; j <= 2; j++) {
+ if (i + j < fromwords)
+ ftmp[j] = get_unaligned_le16(&from[i + j]);
+ else
+ ftmp[j] = 0;
+ }
/*
* check to see if converting this character might make the
@@ -191,6 +200,19 @@ static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
/* put converted char into 'to' buffer */
charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
outlen += charlen;
+
+ /*
+ * charlen (=bytes of UTF-8 for 1 character)
+ * 4bytes UTF-8(surrogate pair) is charlen=4
+ * (4bytes UTF-16 code)
+ * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
+ * (2 UTF-8 pairs divided to 2 UTF-16 pairs)
+ */
+ if (charlen == 4)
+ i++;
+ else if (charlen >= 5)
+ /* 5-6bytes UTF-8 */
+ i += 2;
}
/* properly null-terminate string */
@@ -325,6 +347,9 @@ int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
char src_char;
__le16 dst_char;
wchar_t tmp;
+ wchar_t wchar_to[6]; /* UTF-16 */
+ int ret;
+ unicode_t u;
if (!mapchars)
return smb_strtoUTF16(target, source, srclen, cp);
@@ -367,11 +392,57 @@ int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
* if no match, use question mark, which at least in
* some cases serves as wild card
*/
- if (charlen < 1) {
- dst_char = cpu_to_le16(0x003f);
- charlen = 1;
+ if (charlen > 0)
+ goto ctoUTF16;
+
+ /* convert SURROGATE_PAIR */
+ if (strcmp(cp->charset, "utf8"))
+ goto unknown;
+ if (*(source + i) & 0x80) {
+ charlen = utf8_to_utf32(source + i, 6, &u);
+ if (charlen < 0)
+ goto unknown;
+ } else
+ goto unknown;
+ ret = utf8s_to_utf16s(source + i, charlen,
+ UTF16_LITTLE_ENDIAN,
+ wchar_to, 6);
+ if (ret < 0)
+ goto unknown;
+
+ i += charlen;
+ dst_char = cpu_to_le16(*wchar_to);
+ if (charlen <= 3)
+ /* 1-3bytes UTF-8 to 2bytes UTF-16 */
+ put_unaligned(dst_char, &target[j]);
+ else if (charlen == 4) {
+ /*
+ * 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
+ * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
+ * (charlen=3+4 or 4+4)
+ */
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 1));
+ j++;
+ put_unaligned(dst_char, &target[j]);
+ } else if (charlen >= 5) {
+ /* 5-6bytes UTF-8 to 6bytes UTF-16 */
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 1));
+ j++;
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 2));
+ j++;
+ put_unaligned(dst_char, &target[j]);
}
+ continue;
+
+unknown:
+ dst_char = cpu_to_le16(0x003f);
+ charlen = 1;
}
+
+ctoUTF16:
/*
* character may take more than one byte in the source string,
* but will take exactly two bytes in the target string
diff --git a/fs/ksmbd/unicode.h b/fs/ksmbd/unicode.h
index 5593024230ae..076f6034a789 100644
--- a/fs/ksmbd/unicode.h
+++ b/fs/ksmbd/unicode.h
@@ -24,6 +24,7 @@
#include <asm/byteorder.h>
#include <linux/types.h>
#include <linux/nls.h>
+#include <linux/unicode.h>
#define UNIUPR_NOLOWER /* Example to not expand lower case tables */
@@ -69,7 +70,7 @@ char *smb_strndup_from_utf16(const char *src, const int maxlen,
const struct nls_table *codepage);
int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
const struct nls_table *cp, int mapchars);
-char *ksmbd_extract_sharename(char *treename);
+char *ksmbd_extract_sharename(struct unicode_map *um, const char *treename);
#endif
/*
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index f76acd83c294..173a488bfeee 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -18,6 +18,7 @@
#include <linux/vmalloc.h>
#include <linux/sched/xacct.h>
#include <linux/crc32c.h>
+#include <linux/namei.h>
#include "glob.h"
#include "oplock.h"
@@ -35,19 +36,6 @@
#include "mgmt/user_session.h"
#include "mgmt/user_config.h"
-static char *extract_last_component(char *path)
-{
- char *p = strrchr(path, '/');
-
- if (p && p[1] != '\0') {
- *p = '\0';
- p++;
- } else {
- p = NULL;
- }
- return p;
-}
-
static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
struct inode *parent_inode,
struct inode *inode)
@@ -61,67 +49,96 @@ static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
/**
* ksmbd_vfs_lock_parent() - lock parent dentry if it is stable
- *
- * the parent dentry got by dget_parent or @parent could be
- * unstable, we try to lock a parent inode and lookup the
- * child dentry again.
- *
- * the reference count of @parent isn't incremented.
*/
-int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
- struct dentry *child)
+int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child)
{
- struct dentry *dentry;
- int ret = 0;
-
inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
- dentry = lookup_one(user_ns, child->d_name.name, parent,
- child->d_name.len);
- if (IS_ERR(dentry)) {
- ret = PTR_ERR(dentry);
- goto out_err;
- }
-
- if (dentry != child) {
- ret = -ESTALE;
- dput(dentry);
- goto out_err;
+ if (child->d_parent != parent) {
+ inode_unlock(d_inode(parent));
+ return -ENOENT;
}
- dput(dentry);
return 0;
-out_err:
- inode_unlock(d_inode(parent));
- return ret;
}
-int ksmbd_vfs_may_delete(struct user_namespace *user_ns,
- struct dentry *dentry)
+static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
+ char *pathname, unsigned int flags,
+ struct path *parent_path,
+ struct path *path)
{
- struct dentry *parent;
- int ret;
+ struct qstr last;
+ struct filename *filename;
+ struct path *root_share_path = &share_conf->vfs_path;
+ int err, type;
+ struct dentry *d;
+
+ if (pathname[0] == '\0') {
+ pathname = share_conf->path;
+ root_share_path = NULL;
+ } else {
+ flags |= LOOKUP_BENEATH;
+ }
+
+ filename = getname_kernel(pathname);
+ if (IS_ERR(filename))
+ return PTR_ERR(filename);
- parent = dget_parent(dentry);
- ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
- if (ret) {
- dput(parent);
- return ret;
+ err = vfs_path_parent_lookup(filename, flags,
+ parent_path, &last, &type,
+ root_share_path);
+ if (err) {
+ putname(filename);
+ return err;
}
- ret = inode_permission(user_ns, d_inode(parent),
- MAY_EXEC | MAY_WRITE);
+ if (unlikely(type != LAST_NORM)) {
+ path_put(parent_path);
+ putname(filename);
+ return -ENOENT;
+ }
- inode_unlock(d_inode(parent));
- dput(parent);
- return ret;
+ err = mnt_want_write(parent_path->mnt);
+ if (err) {
+ path_put(parent_path);
+ putname(filename);
+ return -ENOENT;
+ }
+
+ inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT);
+ d = lookup_one_qstr_excl(&last, parent_path->dentry, 0);
+ if (IS_ERR(d))
+ goto err_out;
+
+ if (d_is_negative(d)) {
+ dput(d);
+ goto err_out;
+ }
+
+ path->dentry = d;
+ path->mnt = mntget(parent_path->mnt);
+
+ if (test_share_config_flag(share_conf, KSMBD_SHARE_FLAG_CROSSMNT)) {
+ err = follow_down(path);
+ if (err < 0) {
+ path_put(path);
+ goto err_out;
+ }
+ }
+
+ putname(filename);
+ return 0;
+
+err_out:
+ inode_unlock(d_inode(parent_path->dentry));
+ mnt_drop_write(parent_path->mnt);
+ path_put(parent_path);
+ putname(filename);
+ return -ENOENT;
}
-int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
+void ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
struct dentry *dentry, __le32 *daccess)
{
- struct dentry *parent;
- int ret = 0;
-
*daccess = cpu_to_le32(FILE_READ_ATTRIBUTES | READ_CONTROL);
if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_WRITE))
@@ -136,19 +153,8 @@ int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_EXEC))
*daccess |= FILE_EXECUTE_LE;
- parent = dget_parent(dentry);
- ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
- if (ret) {
- dput(parent);
- return ret;
- }
-
- if (!inode_permission(user_ns, d_inode(parent), MAY_EXEC | MAY_WRITE))
+ if (!inode_permission(user_ns, d_inode(dentry->d_parent), MAY_EXEC | MAY_WRITE))
*daccess |= FILE_DELETE_LE;
-
- inode_unlock(d_inode(parent));
- dput(parent);
- return ret;
}
/**
@@ -184,6 +190,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
} else {
pr_err("File(%s): creation failed (err:%d)\n", name, err);
}
+
done_path_create(&path, dentry);
return err;
}
@@ -217,27 +224,26 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
user_ns = mnt_user_ns(path.mnt);
mode |= S_IFDIR;
err = vfs_mkdir(user_ns, d_inode(path.dentry), dentry, mode);
- if (err) {
- goto out;
- } else if (d_unhashed(dentry)) {
+ if (!err && d_unhashed(dentry)) {
struct dentry *d;
d = lookup_one(user_ns, dentry->d_name.name, dentry->d_parent,
dentry->d_name.len);
if (IS_ERR(d)) {
err = PTR_ERR(d);
- goto out;
+ goto out_err;
}
if (unlikely(d_is_negative(d))) {
dput(d);
err = -ENOENT;
- goto out;
+ goto out_err;
}
ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d));
dput(d);
}
-out:
+
+out_err:
done_path_create(&path, dentry);
if (err)
pr_err("mkdir(%s): creation failed (err:%d)\n", name, err);
@@ -357,15 +363,15 @@ out:
* @fid: file id of open file
* @count: read byte count
* @pos: file pos
+ * @rbuf: read data buffer
*
* Return: number of read bytes on success, otherwise error
*/
int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
- loff_t *pos)
+ loff_t *pos, char *rbuf)
{
struct file *filp = fp->filp;
ssize_t nbytes = 0;
- char *rbuf = work->aux_payload_buf;
struct inode *inode = file_inode(filp);
if (S_ISDIR(inode->i_mode))
@@ -376,8 +382,7 @@ int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
if (work->conn->connection_type) {
if (!(fp->daccess & (FILE_READ_DATA_LE | FILE_EXECUTE_LE))) {
- pr_err("no right to read(%pd)\n",
- fp->filp->f_path.dentry);
+ pr_err("no right to read(%pD)\n", fp->filp);
return -EACCES;
}
}
@@ -397,8 +402,7 @@ int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
nbytes = kernel_read(filp, rbuf, count, pos);
if (nbytes < 0) {
- pr_err("smb read failed for (%s), err = %zd\n",
- fp->filename, nbytes);
+ pr_err("smb read failed, err = %zd\n", nbytes);
return nbytes;
}
@@ -411,7 +415,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
{
char *stream_buf = NULL, *wbuf;
struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
- size_t size, v_len;
+ size_t size;
+ ssize_t v_len;
int err = 0;
ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n",
@@ -428,14 +433,14 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
fp->stream.name,
fp->stream.size,
&stream_buf);
- if ((int)v_len < 0) {
+ if (v_len < 0) {
pr_err("not found stream in xattr : %zd\n", v_len);
- err = (int)v_len;
+ err = v_len;
goto out;
}
if (v_len < size) {
- wbuf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ wbuf = kvzalloc(size, GFP_KERNEL);
if (!wbuf) {
err = -ENOMEM;
goto out;
@@ -450,11 +455,12 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
memcpy(&stream_buf[*pos], buf, count);
err = ksmbd_vfs_setxattr(user_ns,
- fp->filp->f_path.dentry,
+ &fp->filp->f_path,
fp->stream.name,
(void *)stream_buf,
size,
- 0);
+ 0,
+ true);
if (err < 0)
goto out;
@@ -487,8 +493,7 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
if (work->conn->connection_type) {
if (!(fp->daccess & FILE_WRITE_DATA_LE)) {
- pr_err("no right to write(%pd)\n",
- fp->filp->f_path.dentry);
+ pr_err("no right to write(%pD)\n", fp->filp);
err = -EACCES;
goto out;
}
@@ -512,6 +517,9 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
}
}
+ /* Reserve lease break for parent dir at closing time */
+ fp->reserve_lease_break = true;
+
/* Do we need to break any of a levelII oplock? */
smb_break_all_levII_oplock(work, fp, 1);
@@ -527,8 +535,8 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
if (sync) {
err = vfs_fsync_range(filp, offset, offset + *written, 0);
if (err < 0)
- pr_err("fsync failed for filename = %pd, err = %d\n",
- fp->filp->f_path.dentry, err);
+ pr_err("fsync failed for filename = %pD, err = %d\n",
+ fp->filp, err);
}
out:
@@ -543,7 +551,7 @@ out:
*
* Return: 0 on success, otherwise error
*/
-int ksmbd_vfs_getattr(struct path *path, struct kstat *stat)
+int ksmbd_vfs_getattr(const struct path *path, struct kstat *stat)
{
int err;
@@ -583,54 +591,32 @@ int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id)
*
* Return: 0 on success, otherwise error
*/
-int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
+int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path)
{
struct user_namespace *user_ns;
- struct path path;
- struct dentry *parent;
+ struct dentry *parent = path->dentry->d_parent;
int err;
if (ksmbd_override_fsids(work))
return -ENOMEM;
- err = ksmbd_vfs_kern_path(work, name, LOOKUP_NO_SYMLINKS, &path, false);
- if (err) {
- ksmbd_debug(VFS, "can't get %s, err %d\n", name, err);
- ksmbd_revert_fsids(work);
- return err;
- }
-
- user_ns = mnt_user_ns(path.mnt);
- parent = dget_parent(path.dentry);
- err = ksmbd_vfs_lock_parent(user_ns, parent, path.dentry);
- if (err) {
- dput(parent);
- path_put(&path);
- ksmbd_revert_fsids(work);
- return err;
- }
-
- if (!d_inode(path.dentry)->i_nlink) {
+ if (!d_inode(path->dentry)->i_nlink) {
err = -ENOENT;
goto out_err;
}
- if (S_ISDIR(d_inode(path.dentry)->i_mode)) {
- err = vfs_rmdir(user_ns, d_inode(parent), path.dentry);
+ user_ns = mnt_user_ns(path->mnt);
+ if (S_ISDIR(d_inode(path->dentry)->i_mode)) {
+ err = vfs_rmdir(user_ns, d_inode(parent), path->dentry);
if (err && err != -ENOTEMPTY)
- ksmbd_debug(VFS, "%s: rmdir failed, err %d\n", name,
- err);
+ ksmbd_debug(VFS, "rmdir failed, err %d\n", err);
} else {
- err = vfs_unlink(user_ns, d_inode(parent), path.dentry, NULL);
+ err = vfs_unlink(user_ns, d_inode(parent), path->dentry, NULL);
if (err)
- ksmbd_debug(VFS, "%s: unlink failed, err %d\n", name,
- err);
+ ksmbd_debug(VFS, "unlink failed, err %d\n", err);
}
out_err:
- inode_unlock(d_inode(parent));
- dput(parent);
- path_put(&path);
ksmbd_revert_fsids(work);
return err;
}
@@ -689,149 +675,120 @@ out1:
return err;
}
-static int ksmbd_validate_entry_in_use(struct dentry *src_dent)
+int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
+ char *newname, int flags)
{
- struct dentry *dst_dent;
+ struct dentry *old_parent, *new_dentry, *trap;
+ struct dentry *old_child = old_path->dentry;
+ struct path new_path;
+ struct qstr new_last;
+ struct renamedata rd;
+ struct filename *to;
+ struct ksmbd_share_config *share_conf = work->tcon->share_conf;
+ struct ksmbd_file *parent_fp;
+ int new_type;
+ int err, lookup_flags = LOOKUP_NO_SYMLINKS;
+
+ if (ksmbd_override_fsids(work))
+ return -ENOMEM;
- spin_lock(&src_dent->d_lock);
- list_for_each_entry(dst_dent, &src_dent->d_subdirs, d_child) {
- struct ksmbd_file *child_fp;
+ to = getname_kernel(newname);
+ if (IS_ERR(to)) {
+ err = PTR_ERR(to);
+ goto revert_fsids;
+ }
- if (d_really_is_negative(dst_dent))
- continue;
+retry:
+ err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH,
+ &new_path, &new_last, &new_type,
+ &share_conf->vfs_path);
+ if (err)
+ goto out1;
- child_fp = ksmbd_lookup_fd_inode(d_inode(dst_dent));
- if (child_fp) {
- spin_unlock(&src_dent->d_lock);
- ksmbd_debug(VFS, "Forbid rename, sub file/dir is in use\n");
- return -EACCES;
- }
+ if (old_path->mnt != new_path.mnt) {
+ err = -EXDEV;
+ goto out2;
}
- spin_unlock(&src_dent->d_lock);
- return 0;
-}
+ err = mnt_want_write(old_path->mnt);
+ if (err)
+ goto out2;
-static int __ksmbd_vfs_rename(struct ksmbd_work *work,
- struct user_namespace *src_user_ns,
- struct dentry *src_dent_parent,
- struct dentry *src_dent,
- struct user_namespace *dst_user_ns,
- struct dentry *dst_dent_parent,
- struct dentry *trap_dent,
- char *dst_name)
-{
- struct dentry *dst_dent;
- int err;
+ trap = lock_rename_child(old_child, new_path.dentry);
- if (!work->tcon->posix_extensions) {
- err = ksmbd_validate_entry_in_use(src_dent);
- if (err)
- return err;
+ old_parent = dget(old_child->d_parent);
+ if (d_unhashed(old_child)) {
+ err = -EINVAL;
+ goto out3;
}
- if (d_really_is_negative(src_dent_parent))
- return -ENOENT;
- if (d_really_is_negative(dst_dent_parent))
- return -ENOENT;
- if (d_really_is_negative(src_dent))
- return -ENOENT;
- if (src_dent == trap_dent)
- return -EINVAL;
-
- if (ksmbd_override_fsids(work))
- return -ENOMEM;
+ parent_fp = ksmbd_lookup_fd_inode(old_child->d_parent);
+ if (parent_fp) {
+ if (parent_fp->daccess & FILE_DELETE_LE) {
+ pr_err("parent dir is opened with delete access\n");
+ err = -ESHARE;
+ ksmbd_fd_put(work, parent_fp);
+ goto out3;
+ }
+ ksmbd_fd_put(work, parent_fp);
+ }
- dst_dent = lookup_one(dst_user_ns, dst_name, dst_dent_parent,
- strlen(dst_name));
- err = PTR_ERR(dst_dent);
- if (IS_ERR(dst_dent)) {
- pr_err("lookup failed %s [%d]\n", dst_name, err);
- goto out;
+ new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
+ lookup_flags | LOOKUP_RENAME_TARGET);
+ if (IS_ERR(new_dentry)) {
+ err = PTR_ERR(new_dentry);
+ goto out3;
}
- err = -ENOTEMPTY;
- if (dst_dent != trap_dent && !d_really_is_positive(dst_dent)) {
- struct renamedata rd = {
- .old_mnt_userns = src_user_ns,
- .old_dir = d_inode(src_dent_parent),
- .old_dentry = src_dent,
- .new_mnt_userns = dst_user_ns,
- .new_dir = d_inode(dst_dent_parent),
- .new_dentry = dst_dent,
- };
- err = vfs_rename(&rd);
+ if (d_is_symlink(new_dentry)) {
+ err = -EACCES;
+ goto out4;
}
- if (err)
- pr_err("vfs_rename failed err %d\n", err);
- if (dst_dent)
- dput(dst_dent);
-out:
- ksmbd_revert_fsids(work);
- return err;
-}
-int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
- char *newname)
-{
- struct user_namespace *user_ns;
- struct path dst_path;
- struct dentry *src_dent_parent, *dst_dent_parent;
- struct dentry *src_dent, *trap_dent, *src_child;
- char *dst_name;
- int err;
+ if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
+ err = -EEXIST;
+ goto out4;
+ }
- dst_name = extract_last_component(newname);
- if (!dst_name) {
- dst_name = newname;
- newname = "";
+ if (old_child == trap) {
+ err = -EINVAL;
+ goto out4;
}
- src_dent_parent = dget_parent(fp->filp->f_path.dentry);
- src_dent = fp->filp->f_path.dentry;
+ if (new_dentry == trap) {
+ err = -ENOTEMPTY;
+ goto out4;
+ }
+
+ rd.old_mnt_userns = mnt_user_ns(old_path->mnt),
+ rd.old_dir = d_inode(old_parent),
+ rd.old_dentry = old_child,
+ rd.new_mnt_userns = mnt_user_ns(new_path.mnt),
+ rd.new_dir = new_path.dentry->d_inode,
+ rd.new_dentry = new_dentry,
+ rd.flags = flags,
+ rd.delegated_inode = NULL,
+ err = vfs_rename(&rd);
+ if (err)
+ ksmbd_debug(VFS, "vfs_rename failed err %d\n", err);
+
+out4:
+ dput(new_dentry);
+out3:
+ dput(old_parent);
+ unlock_rename(old_parent, new_path.dentry);
+ mnt_drop_write(old_path->mnt);
+out2:
+ path_put(&new_path);
- err = ksmbd_vfs_kern_path(work, newname,
- LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY,
- &dst_path, false);
- if (err) {
- ksmbd_debug(VFS, "Cannot get path for %s [%d]\n", newname, err);
- goto out;
+ if (retry_estale(err, lookup_flags)) {
+ lookup_flags |= LOOKUP_REVAL;
+ goto retry;
}
- dst_dent_parent = dst_path.dentry;
-
- trap_dent = lock_rename(src_dent_parent, dst_dent_parent);
- dget(src_dent);
- dget(dst_dent_parent);
- user_ns = file_mnt_user_ns(fp->filp);
- src_child = lookup_one(user_ns, src_dent->d_name.name, src_dent_parent,
- src_dent->d_name.len);
- if (IS_ERR(src_child)) {
- err = PTR_ERR(src_child);
- goto out_lock;
- }
-
- if (src_child != src_dent) {
- err = -ESTALE;
- dput(src_child);
- goto out_lock;
- }
- dput(src_child);
-
- err = __ksmbd_vfs_rename(work,
- user_ns,
- src_dent_parent,
- src_dent,
- mnt_user_ns(dst_path.mnt),
- dst_dent_parent,
- trap_dent,
- dst_name);
-out_lock:
- dput(src_dent);
- dput(dst_dent_parent);
- unlock_rename(src_dent_parent, dst_dent_parent);
- path_put(&dst_path);
-out:
- dput(src_dent_parent);
+out1:
+ putname(to);
+revert_fsids:
+ ksmbd_revert_fsids(work);
return err;
}
@@ -873,8 +830,7 @@ int ksmbd_vfs_truncate(struct ksmbd_work *work,
err = vfs_truncate(&filp->f_path, size);
if (err)
- pr_err("truncate failed for filename : %s err %d\n",
- fp->filename, err);
+ pr_err("truncate failed, err %d\n", err);
return err;
}
@@ -895,7 +851,7 @@ ssize_t ksmbd_vfs_listxattr(struct dentry *dentry, char **list)
if (size <= 0)
return size;
- vlist = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ vlist = kvzalloc(size, GFP_KERNEL);
if (!vlist)
return -ENOMEM;
@@ -953,28 +909,38 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
/**
* ksmbd_vfs_setxattr() - vfs helper for smb set extended attributes value
* @user_ns: user namespace
- * @dentry: dentry to set XATTR at
- * @name: xattr name for setxattr
- * @value: xattr value to set
- * @size: size of xattr value
+ * @path: path of dentry to set XATTR at
+ * @attr_name: xattr name for setxattr
+ * @attr_value: xattr value to set
+ * @attr_size: size of xattr value
* @flags: destination buffer length
+ * @get_write: get write access to a mount
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
- struct dentry *dentry, const char *attr_name,
- const void *attr_value, size_t attr_size, int flags)
+ const struct path *path, const char *attr_name,
+ const void *attr_value, size_t attr_size, int flags,
+ bool get_write)
{
int err;
+ if (get_write == true) {
+ err = mnt_want_write(path->mnt);
+ if (err)
+ return err;
+ }
+
err = vfs_setxattr(user_ns,
- dentry,
+ path->dentry,
attr_name,
attr_value,
attr_size,
flags);
if (err)
ksmbd_debug(VFS, "setxattr failed, err %d\n", err);
+ if (get_write == true)
+ mnt_drop_write(path->mnt);
return err;
}
@@ -1078,19 +1044,34 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
}
int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns,
- struct dentry *dentry, char *attr_name)
+ const struct path *path, char *attr_name)
{
- return vfs_removexattr(user_ns, dentry, attr_name);
+ int err;
+
+ err = mnt_want_write(path->mnt);
+ if (err)
+ return err;
+
+ err = vfs_removexattr(user_ns, path->dentry, attr_name);
+ mnt_drop_write(path->mnt);
+
+ return err;
}
-int ksmbd_vfs_unlink(struct user_namespace *user_ns,
- struct dentry *dir, struct dentry *dentry)
+int ksmbd_vfs_unlink(struct file *filp)
{
int err = 0;
+ struct dentry *dir, *dentry = filp->f_path.dentry;
+ struct user_namespace *user_ns = file_mnt_user_ns(filp);
- err = ksmbd_vfs_lock_parent(user_ns, dir, dentry);
+ err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
+
+ dir = dget_parent(dentry);
+ err = ksmbd_vfs_lock_parent(dir, dentry);
+ if (err)
+ goto out;
dget(dentry);
if (S_ISDIR(d_inode(dentry)->i_mode))
@@ -1102,6 +1083,9 @@ int ksmbd_vfs_unlink(struct user_namespace *user_ns,
inode_unlock(d_inode(dir));
if (err)
ksmbd_debug(VFS, "failed to delete, err %d\n", err);
+out:
+ dput(dir);
+ mnt_drop_write(filp->f_path.mnt);
return err;
}
@@ -1148,12 +1132,23 @@ static int __caseless_lookup(struct dir_context *ctx, const char *name,
unsigned int d_type)
{
struct ksmbd_readdir_data *buf;
+ int cmp = -EINVAL;
buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
if (buf->used != namlen)
return 0;
- if (!strncasecmp((char *)buf->private, name, namlen)) {
+ if (IS_ENABLED(CONFIG_UNICODE) && buf->um) {
+ const struct qstr q_buf = {.name = buf->private,
+ .len = buf->used};
+ const struct qstr q_name = {.name = name,
+ .len = namlen};
+
+ cmp = utf8_strncasecmp(buf->um, &q_buf, &q_name);
+ }
+ if (cmp < 0)
+ cmp = strncasecmp((char *)buf->private, name, namlen);
+ if (!cmp) {
memcpy((char *)buf->private, name, namlen);
buf->dirent_count = 1;
return -EEXIST;
@@ -1169,7 +1164,8 @@ static int __caseless_lookup(struct dir_context *ctx, const char *name,
*
* Return: 0 on success, otherwise error
*/
-static int ksmbd_vfs_lookup_in_dir(struct path *dir, char *name, size_t namelen)
+static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
+ size_t namelen, struct unicode_map *um)
{
int ret;
struct file *dfilp;
@@ -1179,6 +1175,7 @@ static int ksmbd_vfs_lookup_in_dir(struct path *dir, char *name, size_t namelen)
.private = name,
.used = namelen,
.dirent_count = 0,
+ .um = um,
};
dfilp = dentry_open(dir, flags, current_cred());
@@ -1193,32 +1190,29 @@ static int ksmbd_vfs_lookup_in_dir(struct path *dir, char *name, size_t namelen)
}
/**
- * ksmbd_vfs_kern_path() - lookup a file and get path info
- * @name: file path that is relative to share
- * @flags: lookup flags
- * @path: if lookup succeed, return path info
+ * ksmbd_vfs_kern_path_locked() - lookup a file and get path info
+ * @name: file path that is relative to share
+ * @flags: lookup flags
+ * @parent_path: if lookup succeed, return parent_path info
+ * @path: if lookup succeed, return path info
* @caseless: caseless filename lookup
*
* Return: 0 on success, otherwise error
*/
-int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name,
- unsigned int flags, struct path *path, bool caseless)
+int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
+ unsigned int flags, struct path *parent_path,
+ struct path *path, bool caseless)
{
struct ksmbd_share_config *share_conf = work->tcon->share_conf;
int err;
- flags |= LOOKUP_BENEATH;
- err = vfs_path_lookup(share_conf->vfs_path.dentry,
- share_conf->vfs_path.mnt,
- name,
- flags,
- path);
+ err = ksmbd_vfs_path_lookup_locked(share_conf, name, flags, parent_path,
+ path);
if (!err)
return 0;
if (caseless) {
char *filepath;
- struct path parent;
size_t path_len, remain_len;
filepath = kstrdup(name, GFP_KERNEL);
@@ -1228,10 +1222,10 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name,
path_len = strlen(filepath);
remain_len = path_len;
- parent = share_conf->vfs_path;
- path_get(&parent);
+ *parent_path = share_conf->vfs_path;
+ path_get(parent_path);
- while (d_can_lookup(parent.dentry)) {
+ while (d_can_lookup(parent_path->dentry)) {
char *filename = filepath + path_len - remain_len;
char *next = strchrnul(filename, '/');
size_t filename_len = next - filename;
@@ -1240,11 +1234,11 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name,
if (filename_len == 0)
break;
- err = ksmbd_vfs_lookup_in_dir(&parent, filename,
- filename_len);
- path_put(&parent);
+ err = ksmbd_vfs_lookup_in_dir(parent_path, filename,
+ filename_len,
+ work->conn->um);
if (err)
- goto out;
+ goto out2;
next[0] = '\0';
@@ -1252,26 +1246,50 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name,
share_conf->vfs_path.mnt,
filepath,
flags,
- &parent);
+ path);
if (err)
- goto out;
- else if (is_last) {
- *path = parent;
- goto out;
- }
+ goto out2;
+ else if (is_last)
+ goto out1;
+ path_put(parent_path);
+ *parent_path = *path;
next[0] = '/';
remain_len -= filename_len + 1;
}
- path_put(&parent);
err = -EINVAL;
-out:
+out2:
+ path_put(parent_path);
+out1:
kfree(filepath);
}
+
+ if (!err) {
+ err = mnt_want_write(parent_path->mnt);
+ if (err) {
+ path_put(path);
+ path_put(parent_path);
+ return err;
+ }
+
+ err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry);
+ if (err) {
+ path_put(path);
+ path_put(parent_path);
+ }
+ }
return err;
}
+void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path)
+{
+ inode_unlock(d_inode(parent_path->dentry));
+ mnt_drop_write(parent_path->mnt);
+ path_put(path);
+ path_put(parent_path);
+}
+
struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
const char *name,
unsigned int flags,
@@ -1290,13 +1308,13 @@ struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
}
int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns,
- struct dentry *dentry)
+ const struct path *path)
{
char *name, *xattr_list = NULL;
ssize_t xattr_list_len;
int err = 0;
- xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
if (xattr_list_len < 0) {
goto out;
} else if (!xattr_list_len) {
@@ -1312,25 +1330,25 @@ int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns,
sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1) ||
!strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1)) {
- err = ksmbd_vfs_remove_xattr(user_ns, dentry, name);
+ err = ksmbd_vfs_remove_xattr(user_ns, path, name);
if (err)
ksmbd_debug(SMB,
"remove acl xattr failed : %s\n", name);
}
}
+
out:
kvfree(xattr_list);
return err;
}
-int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns,
- struct dentry *dentry)
+int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns, const struct path *path)
{
char *name, *xattr_list = NULL;
ssize_t xattr_list_len;
int err = 0;
- xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
if (xattr_list_len < 0) {
goto out;
} else if (!xattr_list_len) {
@@ -1343,7 +1361,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns,
ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) {
- err = ksmbd_vfs_remove_xattr(user_ns, dentry, name);
+ err = ksmbd_vfs_remove_xattr(user_ns, path, name);
if (err)
ksmbd_debug(SMB, "remove xattr failed : %s\n", name);
}
@@ -1367,7 +1385,7 @@ static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct user_namespac
return NULL;
posix_acls = get_acl(inode, acl_type);
- if (!posix_acls)
+ if (IS_ERR_OR_NULL(posix_acls))
return NULL;
smb_acl = kzalloc(sizeof(struct xattr_smb_acl) +
@@ -1420,13 +1438,15 @@ out:
int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
struct user_namespace *user_ns,
- struct dentry *dentry,
- struct smb_ntsd *pntsd, int len)
+ const struct path *path,
+ struct smb_ntsd *pntsd, int len,
+ bool get_write)
{
int rc;
struct ndr sd_ndr = {0}, acl_ndr = {0};
struct xattr_ntacl acl = {0};
struct xattr_smb_acl *smb_acl, *def_smb_acl = NULL;
+ struct dentry *dentry = path->dentry;
struct inode *inode = d_inode(dentry);
acl.version = 4;
@@ -1478,9 +1498,9 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
goto out;
}
- rc = ksmbd_vfs_setxattr(user_ns, dentry,
+ rc = ksmbd_vfs_setxattr(user_ns, path,
XATTR_NAME_SD, sd_ndr.data,
- sd_ndr.offset, 0);
+ sd_ndr.offset, 0, get_write);
if (rc < 0)
pr_err("Failed to store XATTR ntacl :%d\n", rc);
@@ -1568,8 +1588,9 @@ free_n_data:
}
int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns,
- struct dentry *dentry,
- struct xattr_dos_attrib *da)
+ const struct path *path,
+ struct xattr_dos_attrib *da,
+ bool get_write)
{
struct ndr n;
int err;
@@ -1578,8 +1599,8 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns,
if (err)
return err;
- err = ksmbd_vfs_setxattr(user_ns, dentry, XATTR_NAME_DOS_ATTRIBUTE,
- (void *)n.data, n.offset, 0);
+ err = ksmbd_vfs_setxattr(user_ns, path, XATTR_NAME_DOS_ATTRIBUTE,
+ (void *)n.data, n.offset, 0, get_write);
if (err)
ksmbd_debug(SMB, "failed to store dos attribute in xattr\n");
kfree(n.data);
@@ -1744,11 +1765,11 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
*total_size_written = 0;
if (!(src_fp->daccess & (FILE_READ_DATA_LE | FILE_EXECUTE_LE))) {
- pr_err("no right to read(%pd)\n", src_fp->filp->f_path.dentry);
+ pr_err("no right to read(%pD)\n", src_fp->filp);
return -EACCES;
}
if (!(dst_fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE))) {
- pr_err("no right to write(%pd)\n", dst_fp->filp->f_path.dentry);
+ pr_err("no right to write(%pD)\n", dst_fp->filp);
return -EACCES;
}
@@ -1815,10 +1836,11 @@ void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock)
}
int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
- struct inode *inode)
+ struct path *path)
{
struct posix_acl_state acl_state;
struct posix_acl *acls;
+ struct inode *inode = d_inode(path->dentry);
int rc;
if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
@@ -1847,6 +1869,7 @@ int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
return -ENOMEM;
}
posix_state_to_acl(&acl_state, acls->a_entries);
+
rc = set_posix_acl(user_ns, inode, ACL_TYPE_ACCESS, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@ -1859,23 +1882,25 @@ int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
rc);
}
+
free_acl_state(&acl_state);
posix_acl_release(acls);
return rc;
}
int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns,
- struct inode *inode, struct inode *parent_inode)
+ struct path *path, struct inode *parent_inode)
{
struct posix_acl *acls;
struct posix_acl_entry *pace;
+ struct inode *inode = d_inode(path->dentry);
int rc, i;
if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
return -EOPNOTSUPP;
acls = get_acl(parent_inode, ACL_TYPE_DEFAULT);
- if (!acls)
+ if (IS_ERR_OR_NULL(acls))
return -ENOENT;
pace = acls->a_entries;
@@ -1897,6 +1922,7 @@ int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns,
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
rc);
}
+
posix_acl_release(acls);
return rc;
}
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
index 432c94773177..6d108cba7e0c 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/ksmbd/vfs.h
@@ -12,6 +12,7 @@
#include <linux/namei.h>
#include <uapi/linux/xattr.h>
#include <linux/posix_acl.h>
+#include <linux/unicode.h>
#include "smbacl.h"
#include "xattr.h"
@@ -99,6 +100,7 @@ struct ksmbd_readdir_data {
unsigned int used;
unsigned int dirent_count;
unsigned int file_attr;
+ struct unicode_map *um;
};
/* ksmbd kstat wrapper to get valid create time when reading dir entry */
@@ -108,25 +110,23 @@ struct ksmbd_kstat {
__le32 file_attributes;
};
-int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
- struct dentry *child);
-int ksmbd_vfs_may_delete(struct user_namespace *user_ns, struct dentry *dentry);
-int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
+int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child);
+void ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
struct dentry *dentry, __le32 *daccess);
int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode);
int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode);
-int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp,
- size_t count, loff_t *pos);
+int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
+ loff_t *pos, char *rbuf);
int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
char *buf, size_t count, loff_t *pos, bool sync,
ssize_t *written);
int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id);
-int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name);
+int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path);
int ksmbd_vfs_link(struct ksmbd_work *work,
const char *oldname, const char *newname);
-int ksmbd_vfs_getattr(struct path *path, struct kstat *stat);
-int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
- char *newname);
+int ksmbd_vfs_getattr(const struct path *path, struct kstat *stat);
+int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
+ char *newname, int flags);
int ksmbd_vfs_truncate(struct ksmbd_work *work,
struct ksmbd_file *fp, loff_t size);
struct srv_copychunk;
@@ -147,15 +147,17 @@ ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns,
struct dentry *dentry, char *attr_name,
int attr_name_len);
int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
- struct dentry *dentry, const char *attr_name,
- const void *attr_value, size_t attr_size, int flags);
+ const struct path *path, const char *attr_name,
+ const void *attr_value, size_t attr_size, int flags,
+ bool get_write);
int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
size_t *xattr_stream_name_size, int s_type);
int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns,
- struct dentry *dentry, char *attr_name);
-int ksmbd_vfs_kern_path(struct ksmbd_work *work,
- char *name, unsigned int flags, struct path *path,
- bool caseless);
+ const struct path *path, char *attr_name);
+int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
+ unsigned int flags, struct path *parent_path,
+ struct path *path, bool caseless);
+void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path);
struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
const char *name,
unsigned int flags,
@@ -168,8 +170,7 @@ struct file_allocated_range_buffer;
int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
struct file_allocated_range_buffer *ranges,
unsigned int in_count, unsigned int *out_count);
-int ksmbd_vfs_unlink(struct user_namespace *user_ns,
- struct dentry *dir, struct dentry *dentry);
+int ksmbd_vfs_unlink(struct file *filp);
void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat);
int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
struct user_namespace *user_ns,
@@ -179,26 +180,27 @@ void ksmbd_vfs_posix_lock_wait(struct file_lock *flock);
int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout);
void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock);
int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns,
- struct dentry *dentry);
-int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns,
- struct dentry *dentry);
+ const struct path *path);
+int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns, const struct path *path);
int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
struct user_namespace *user_ns,
- struct dentry *dentry,
- struct smb_ntsd *pntsd, int len);
+ const struct path *path,
+ struct smb_ntsd *pntsd, int len,
+ bool get_write);
int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
struct user_namespace *user_ns,
struct dentry *dentry,
struct smb_ntsd **pntsd);
int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns,
- struct dentry *dentry,
- struct xattr_dos_attrib *da);
+ const struct path *path,
+ struct xattr_dos_attrib *da,
+ bool get_write);
int ksmbd_vfs_get_dos_attrib_xattr(struct user_namespace *user_ns,
struct dentry *dentry,
struct xattr_dos_attrib *da);
int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
- struct inode *inode);
+ struct path *path);
int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns,
- struct inode *inode,
+ struct path *path,
struct inode *parent_inode);
#endif /* __KSMBD_VFS_H__ */
diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c
index 0df8467af39a..2528ce8aeebb 100644
--- a/fs/ksmbd/vfs_cache.c
+++ b/fs/ksmbd/vfs_cache.c
@@ -65,14 +65,14 @@ static unsigned long inode_hash(struct super_block *sb, unsigned long hashval)
return tmp & inode_hash_mask;
}
-static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode)
+static struct ksmbd_inode *__ksmbd_inode_lookup(struct dentry *de)
{
struct hlist_head *head = inode_hashtable +
- inode_hash(inode->i_sb, inode->i_ino);
+ inode_hash(d_inode(de)->i_sb, (unsigned long)de);
struct ksmbd_inode *ci = NULL, *ret_ci = NULL;
hlist_for_each_entry(ci, head, m_hash) {
- if (ci->m_inode == inode) {
+ if (ci->m_de == de) {
if (atomic_inc_not_zero(&ci->m_count))
ret_ci = ci;
break;
@@ -83,29 +83,30 @@ static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode)
static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp)
{
- return __ksmbd_inode_lookup(file_inode(fp->filp));
+ return __ksmbd_inode_lookup(fp->filp->f_path.dentry);
}
-static struct ksmbd_inode *ksmbd_inode_lookup_by_vfsinode(struct inode *inode)
+struct ksmbd_inode *ksmbd_inode_lookup_lock(struct dentry *d)
{
struct ksmbd_inode *ci;
read_lock(&inode_hash_lock);
- ci = __ksmbd_inode_lookup(inode);
+ ci = __ksmbd_inode_lookup(d);
read_unlock(&inode_hash_lock);
+
return ci;
}
-int ksmbd_query_inode_status(struct inode *inode)
+int ksmbd_query_inode_status(struct dentry *dentry)
{
struct ksmbd_inode *ci;
int ret = KSMBD_INODE_STATUS_UNKNOWN;
read_lock(&inode_hash_lock);
- ci = __ksmbd_inode_lookup(inode);
+ ci = __ksmbd_inode_lookup(dentry);
if (ci) {
ret = KSMBD_INODE_STATUS_OK;
- if (ci->m_flags & S_DEL_PENDING)
+ if (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS))
ret = KSMBD_INODE_STATUS_PENDING_DELETE;
atomic_dec(&ci->m_count);
}
@@ -115,7 +116,7 @@ int ksmbd_query_inode_status(struct inode *inode)
bool ksmbd_inode_pending_delete(struct ksmbd_file *fp)
{
- return (fp->f_ci->m_flags & S_DEL_PENDING);
+ return (fp->f_ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS));
}
void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp)
@@ -142,7 +143,7 @@ void ksmbd_fd_set_delete_on_close(struct ksmbd_file *fp,
static void ksmbd_inode_hash(struct ksmbd_inode *ci)
{
struct hlist_head *b = inode_hashtable +
- inode_hash(ci->m_inode->i_sb, ci->m_inode->i_ino);
+ inode_hash(d_inode(ci->m_de)->i_sb, (unsigned long)ci->m_de);
hlist_add_head(&ci->m_hash, b);
}
@@ -156,7 +157,6 @@ static void ksmbd_inode_unhash(struct ksmbd_inode *ci)
static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp)
{
- ci->m_inode = file_inode(fp->filp);
atomic_set(&ci->m_count, 1);
atomic_set(&ci->op_count, 0);
atomic_set(&ci->sop_count, 0);
@@ -165,6 +165,7 @@ static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp)
INIT_LIST_HEAD(&ci->m_fp_list);
INIT_LIST_HEAD(&ci->m_op_list);
rwlock_init(&ci->m_lock);
+ ci->m_de = fp->filp->f_path.dentry;
return 0;
}
@@ -208,7 +209,7 @@ static void ksmbd_inode_free(struct ksmbd_inode *ci)
kfree(ci);
}
-static void ksmbd_inode_put(struct ksmbd_inode *ci)
+void ksmbd_inode_put(struct ksmbd_inode *ci)
{
if (atomic_dec_and_test(&ci->m_count))
ksmbd_inode_free(ci);
@@ -243,7 +244,6 @@ void ksmbd_release_inode_hash(void)
static void __ksmbd_inode_close(struct ksmbd_file *fp)
{
- struct dentry *dir, *dentry;
struct ksmbd_inode *ci = fp->f_ci;
int err;
struct file *filp;
@@ -252,7 +252,7 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp)
if (ksmbd_stream_fd(fp) && (ci->m_flags & S_DEL_ON_CLS_STREAM)) {
ci->m_flags &= ~S_DEL_ON_CLS_STREAM;
err = ksmbd_vfs_remove_xattr(file_mnt_user_ns(filp),
- filp->f_path.dentry,
+ &filp->f_path,
fp->stream.name);
if (err)
pr_err("remove xattr failed : %s\n",
@@ -262,11 +262,9 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp)
if (atomic_dec_and_test(&ci->m_count)) {
write_lock(&ci->m_lock);
if (ci->m_flags & (S_DEL_ON_CLS | S_DEL_PENDING)) {
- dentry = filp->f_path.dentry;
- dir = dentry->d_parent;
ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING);
write_unlock(&ci->m_lock);
- ksmbd_vfs_unlink(file_mnt_user_ns(filp), dir, dentry);
+ ksmbd_vfs_unlink(filp);
write_lock(&ci->m_lock);
}
write_unlock(&ci->m_lock);
@@ -328,7 +326,6 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
kfree(smb_lock);
}
- kfree(fp->filename);
if (ksmbd_stream_fd(fp))
kfree(fp->stream.name);
kmem_cache_free(filp_cache, fp);
@@ -336,6 +333,9 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
static struct ksmbd_file *ksmbd_fp_get(struct ksmbd_file *fp)
{
+ if (fp->f_state != FP_INITED)
+ return NULL;
+
if (!atomic_inc_not_zero(&fp->refcount))
return NULL;
return fp;
@@ -365,12 +365,11 @@ static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp)
static void set_close_state_blocked_works(struct ksmbd_file *fp)
{
- struct ksmbd_work *cancel_work, *ctmp;
+ struct ksmbd_work *cancel_work;
spin_lock(&fp->f_lock);
- list_for_each_entry_safe(cancel_work, ctmp, &fp->blocked_works,
+ list_for_each_entry(cancel_work, &fp->blocked_works,
fp_entry) {
- list_del(&cancel_work->fp_entry);
cancel_work->state = KSMBD_WORK_CLOSED;
cancel_work->cancel_fn(cancel_work->cancel_argv);
}
@@ -386,15 +385,20 @@ int ksmbd_close_fd(struct ksmbd_work *work, u64 id)
return 0;
ft = &work->sess->file_table;
- read_lock(&ft->lock);
+ write_lock(&ft->lock);
fp = idr_find(ft->idr, id);
if (fp) {
set_close_state_blocked_works(fp);
- if (!atomic_dec_and_test(&fp->refcount))
+ if (fp->f_state != FP_INITED)
fp = NULL;
+ else {
+ fp->f_state = FP_CLOSED;
+ if (!atomic_dec_and_test(&fp->refcount))
+ fp = NULL;
+ }
}
- read_unlock(&ft->lock);
+ write_unlock(&ft->lock);
if (!fp)
return -EINVAL;
@@ -484,12 +488,15 @@ struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid)
return fp;
}
-struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode)
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry)
{
struct ksmbd_file *lfp;
struct ksmbd_inode *ci;
+ struct inode *inode = d_inode(dentry);
- ci = ksmbd_inode_lookup_by_vfsinode(inode);
+ read_lock(&inode_hash_lock);
+ ci = __ksmbd_inode_lookup(dentry);
+ read_unlock(&inode_hash_lock);
if (!ci)
return NULL;
@@ -574,6 +581,7 @@ struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp)
fp->tcon = work->tcon;
fp->volatile_id = KSMBD_NO_FID;
fp->persistent_id = KSMBD_NO_FID;
+ fp->f_state = FP_NEW;
fp->f_ci = ksmbd_inode_get(fp);
if (!fp->f_ci) {
@@ -595,6 +603,17 @@ err_out:
return ERR_PTR(ret);
}
+void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp,
+ unsigned int state)
+{
+ if (!fp)
+ return;
+
+ write_lock(&ft->lock);
+ fp->f_state = state;
+ write_unlock(&ft->lock);
+}
+
static int
__close_file_table_ids(struct ksmbd_file_table *ft,
struct ksmbd_tree_connect *tcon,
diff --git a/fs/ksmbd/vfs_cache.h b/fs/ksmbd/vfs_cache.h
index 448576fbe4b7..a528f0cc775a 100644
--- a/fs/ksmbd/vfs_cache.h
+++ b/fs/ksmbd/vfs_cache.h
@@ -51,7 +51,7 @@ struct ksmbd_inode {
atomic_t op_count;
/* opinfo count for streams */
atomic_t sop_count;
- struct inode *m_inode;
+ struct dentry *m_de;
unsigned int m_flags;
struct hlist_node m_hash;
struct list_head m_fp_list;
@@ -60,9 +60,14 @@ struct ksmbd_inode {
__le32 m_fattr;
};
+enum {
+ FP_NEW = 0,
+ FP_INITED,
+ FP_CLOSED
+};
+
struct ksmbd_file {
struct file *filp;
- char *filename;
u64 persistent_id;
u64 volatile_id;
@@ -96,19 +101,11 @@ struct ksmbd_file {
int durable_timeout;
- /* for SMB1 */
- int pid;
-
- /* conflict lock fail count for SMB1 */
- unsigned int cflock_cnt;
- /* last lock failure start offset for SMB1 */
- unsigned long long llock_fstart;
-
- int dirent_offset;
-
/* if ls is happening on directory, below is valid*/
struct ksmbd_readdir_data readdir_data;
int dot_dotdot[2];
+ unsigned int f_state;
+ bool reserve_lease_break;
};
static inline void set_ctx_actor(struct dir_context *ctx,
@@ -142,9 +139,11 @@ struct ksmbd_file *ksmbd_lookup_foreign_fd(struct ksmbd_work *work, u64 id);
struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id,
u64 pid);
void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp);
+struct ksmbd_inode *ksmbd_inode_lookup_lock(struct dentry *d);
+void ksmbd_inode_put(struct ksmbd_inode *ci);
struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id);
struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid);
-struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode);
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry);
unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp);
struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp);
void ksmbd_close_tree_conn_fds(struct ksmbd_work *work);
@@ -153,6 +152,8 @@ int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode);
int ksmbd_init_global_file_table(void);
void ksmbd_free_global_file_table(void);
void ksmbd_set_fd_limit(unsigned long limit);
+void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp,
+ unsigned int state);
/*
* INODE hash
@@ -166,7 +167,7 @@ enum KSMBD_INODE_STATUS {
KSMBD_INODE_STATUS_PENDING_DELETE,
};
-int ksmbd_query_inode_status(struct inode *inode);
+int ksmbd_query_inode_status(struct dentry *dentry);
bool ksmbd_inode_pending_delete(struct ksmbd_file *fp);
void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp);
void ksmbd_clear_inode_pending_delete(struct ksmbd_file *fp);
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1d9488cf0534..87a0f207df0b 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -276,6 +276,9 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
{
struct nsm_handle *new;
+ if (!hostname)
+ return NULL;
+
new = kzalloc(sizeof(*new) + hostname_len + 1, GFP_KERNEL);
if (unlikely(new == NULL))
return NULL;
diff --git a/fs/locks.c b/fs/locks.c
index 881fd16905c6..4899a4666f24 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1339,6 +1339,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
out:
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
+ trace_posix_lock_inode(inode, request, error);
/*
* Free any unused locks.
*/
@@ -1347,7 +1348,6 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
if (new_fl2)
locks_free_lock(new_fl2);
locks_dispose_list(&dispose);
- trace_posix_lock_inode(inode, request, error);
return error;
}
diff --git a/fs/namei.c b/fs/namei.c
index 885b0e93ee96..a7f88c5b3d90 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -252,6 +252,7 @@ getname_kernel(const char * filename)
return result;
}
+EXPORT_SYMBOL(getname_kernel);
void putname(struct filename *name)
{
@@ -269,6 +270,7 @@ void putname(struct filename *name)
} else
__putname(name);
}
+EXPORT_SYMBOL(putname);
/**
* check_acl - perform ACL permission checking
@@ -1539,8 +1541,9 @@ static struct dentry *lookup_dcache(const struct qstr *name,
* when directory is guaranteed to have no in-lookup children
* at all.
*/
-static struct dentry *__lookup_hash(const struct qstr *name,
- struct dentry *base, unsigned int flags)
+struct dentry *lookup_one_qstr_excl(const struct qstr *name,
+ struct dentry *base,
+ unsigned int flags)
{
struct dentry *dentry = lookup_dcache(name, base, flags);
struct dentry *old;
@@ -1564,6 +1567,7 @@ static struct dentry *__lookup_hash(const struct qstr *name,
}
return dentry;
}
+EXPORT_SYMBOL(lookup_one_qstr_excl);
static struct dentry *lookup_fast(struct nameidata *nd,
struct inode **inode,
@@ -2508,16 +2512,17 @@ static int path_parentat(struct nameidata *nd, unsigned flags,
}
/* Note: this does not consume "name" */
-static int filename_parentat(int dfd, struct filename *name,
- unsigned int flags, struct path *parent,
- struct qstr *last, int *type)
+static int __filename_parentat(int dfd, struct filename *name,
+ unsigned int flags, struct path *parent,
+ struct qstr *last, int *type,
+ const struct path *root)
{
int retval;
struct nameidata nd;
if (IS_ERR(name))
return PTR_ERR(name);
- set_nameidata(&nd, dfd, name, NULL);
+ set_nameidata(&nd, dfd, name, root);
retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
if (unlikely(retval == -ECHILD))
retval = path_parentat(&nd, flags, parent);
@@ -2532,6 +2537,13 @@ static int filename_parentat(int dfd, struct filename *name,
return retval;
}
+static int filename_parentat(int dfd, struct filename *name,
+ unsigned int flags, struct path *parent,
+ struct qstr *last, int *type)
+{
+ return __filename_parentat(dfd, name, flags, parent, last, type, NULL);
+}
+
/* does lookup, returns the object with parent locked */
static struct dentry *__kern_path_locked(struct filename *name, struct path *path)
{
@@ -2547,7 +2559,7 @@ static struct dentry *__kern_path_locked(struct filename *name, struct path *pat
return ERR_PTR(-EINVAL);
}
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
- d = __lookup_hash(&last, path->dentry, 0);
+ d = lookup_one_qstr_excl(&last, path->dentry, 0);
if (IS_ERR(d)) {
inode_unlock(path->dentry->d_inode);
path_put(path);
@@ -2576,6 +2588,24 @@ int kern_path(const char *name, unsigned int flags, struct path *path)
EXPORT_SYMBOL(kern_path);
/**
+ * vfs_path_parent_lookup - lookup a parent path relative to a dentry-vfsmount pair
+ * @filename: filename structure
+ * @flags: lookup flags
+ * @parent: pointer to struct path to fill
+ * @last: last component
+ * @type: type of the last component
+ * @root: pointer to struct path of the base directory
+ */
+int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
+ struct path *parent, struct qstr *last, int *type,
+ const struct path *root)
+{
+ return __filename_parentat(AT_FDCWD, filename, flags, parent, last,
+ type, root);
+}
+EXPORT_SYMBOL(vfs_path_parent_lookup);
+
+/**
* vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
* @dentry: pointer to dentry of the base directory
* @mnt: pointer to vfs mount of the base directory
@@ -2836,7 +2866,7 @@ int path_pts(struct path *path)
dput(path->dentry);
path->dentry = parent;
child = d_hash_and_lookup(parent, &this);
- if (!child)
+ if (IS_ERR_OR_NULL(child))
return -ENOENT;
path->dentry = child;
@@ -2956,20 +2986,10 @@ static inline int may_create(struct user_namespace *mnt_userns,
return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
}
-/*
- * p1 and p2 should be directories on the same fs.
- */
-struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
+static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
{
struct dentry *p;
- if (p1 == p2) {
- inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
- return NULL;
- }
-
- mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
-
p = d_ancestor(p2, p1);
if (p) {
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
@@ -2988,8 +3008,64 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
I_MUTEX_PARENT, I_MUTEX_PARENT2);
return NULL;
}
+
+/*
+ * p1 and p2 should be directories on the same fs.
+ */
+struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
+{
+ if (p1 == p2) {
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ return NULL;
+ }
+
+ mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
+ return lock_two_directories(p1, p2);
+}
EXPORT_SYMBOL(lock_rename);
+/*
+ * c1 and p2 should be on the same fs.
+ */
+struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2)
+{
+ if (READ_ONCE(c1->d_parent) == p2) {
+ /*
+ * hopefully won't need to touch ->s_vfs_rename_mutex at all.
+ */
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
+ /*
+ * now that p2 is locked, nobody can move in or out of it,
+ * so the test below is safe.
+ */
+ if (likely(c1->d_parent == p2))
+ return NULL;
+
+ /*
+ * c1 got moved out of p2 while we'd been taking locks;
+ * unlock and fall back to slow case.
+ */
+ inode_unlock(p2->d_inode);
+ }
+
+ mutex_lock(&c1->d_sb->s_vfs_rename_mutex);
+ /*
+ * nobody can move out of any directories on this fs.
+ */
+ if (likely(c1->d_parent != p2))
+ return lock_two_directories(c1->d_parent, p2);
+
+ /*
+ * c1 got moved into p2 while we were taking locks;
+ * we need p2 locked and ->s_vfs_rename_mutex unlocked,
+ * for consistency with lock_rename().
+ */
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
+ mutex_unlock(&c1->d_sb->s_vfs_rename_mutex);
+ return NULL;
+}
+EXPORT_SYMBOL(lock_rename_child);
+
void unlock_rename(struct dentry *p1, struct dentry *p2)
{
inode_unlock(p1->d_inode);
@@ -3763,7 +3839,8 @@ static struct dentry *filename_create(int dfd, struct filename *name,
if (last.name[last.len] && !want_dir)
create_flags = 0;
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
- dentry = __lookup_hash(&last, path->dentry, reval_flag | create_flags);
+ dentry = lookup_one_qstr_excl(&last, path->dentry,
+ reval_flag | create_flags);
if (IS_ERR(dentry))
goto unlock;
@@ -4124,7 +4201,7 @@ retry:
goto exit2;
inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
- dentry = __lookup_hash(&last, path.dentry, lookup_flags);
+ dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit3;
@@ -4258,7 +4335,7 @@ retry:
goto exit2;
retry_deleg:
inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
- dentry = __lookup_hash(&last, path.dentry, lookup_flags);
+ dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
struct user_namespace *mnt_userns;
@@ -4832,7 +4909,8 @@ retry:
retry_deleg:
trap = lock_rename(new_path.dentry, old_path.dentry);
- old_dentry = __lookup_hash(&old_last, old_path.dentry, lookup_flags);
+ old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
+ lookup_flags);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
@@ -4840,7 +4918,8 @@ retry_deleg:
error = -ENOENT;
if (d_is_negative(old_dentry))
goto exit4;
- new_dentry = __lookup_hash(&new_last, new_path.dentry, lookup_flags | target_flags);
+ new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
+ lookup_flags | target_flags);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
diff --git a/fs/namespace.c b/fs/namespace.c
index 1a9df6afb90b..932986448a98 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2647,7 +2647,12 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
if (IS_ERR(fc))
return PTR_ERR(fc);
+ /*
+ * Indicate to the filesystem that the remount request is coming
+ * from the legacy mount system call.
+ */
fc->oldapi = true;
+
err = parse_monolithic_mount_data(fc, data);
if (!err) {
down_write(&sb->s_umount);
@@ -2981,6 +2986,12 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
if (IS_ERR(fc))
return PTR_ERR(fc);
+ /*
+ * Indicate to the filesystem that the mount request is coming
+ * from the legacy mount system call.
+ */
+ fc->oldapi = true;
+
if (subtype)
err = vfs_parse_fs_string(fc, "subtype",
subtype, strlen(subtype));
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index fe860c538747..dc657b12822d 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -599,6 +599,8 @@ retry:
nfs4_delete_deviceid(node->ld, node->nfs_client, id);
goto retry;
}
+
+ nfs4_put_deviceid_node(node);
return ERR_PTR(-ENODEV);
}
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index acb1d22907da..16412d6636e8 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -422,7 +422,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
int ret, i;
d->children = kcalloc(v->concat.volumes_count,
- sizeof(struct pnfs_block_dev), GFP_KERNEL);
+ sizeof(struct pnfs_block_dev), gfp_mask);
if (!d->children)
return -ENOMEM;
@@ -451,7 +451,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
int ret, i;
d->children = kcalloc(v->stripe.volumes_count,
- sizeof(struct pnfs_block_dev), GFP_KERNEL);
+ sizeof(struct pnfs_block_dev), gfp_mask);
if (!d->children)
return -ENOMEM;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fbc7304bed56..bbe2a5cc49f6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -509,13 +509,33 @@ out:
return result;
}
-static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
+static void nfs_direct_add_page_head(struct list_head *list,
+ struct nfs_page *req)
+{
+ struct nfs_page *head = req->wb_head;
+
+ if (!list_empty(&head->wb_list) || !nfs_lock_request(head))
+ return;
+ if (!list_empty(&head->wb_list)) {
+ nfs_unlock_request(head);
+ return;
+ }
+ list_add(&head->wb_list, list);
+ kref_get(&head->wb_kref);
+ kref_get(&head->wb_kref);
+}
+
+static void nfs_direct_join_group(struct list_head *list,
+ struct nfs_commit_info *cinfo,
+ struct inode *inode)
{
struct nfs_page *req, *subreq;
list_for_each_entry(req, list, wb_list) {
- if (req->wb_head != req)
+ if (req->wb_head != req) {
+ nfs_direct_add_page_head(&req->wb_list, req);
continue;
+ }
subreq = req->wb_this_page;
if (subreq == req)
continue;
@@ -529,7 +549,7 @@ static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
nfs_release_request(subreq);
}
} while ((subreq = subreq->wb_this_page) != req);
- nfs_join_page_group(req, inode);
+ nfs_join_page_group(req, cinfo, inode);
}
}
@@ -555,7 +575,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
nfs_init_cinfo_from_dreq(&cinfo, dreq);
nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
- nfs_direct_join_group(&reqs, dreq->inode);
+ nfs_direct_join_group(&reqs, &cinfo, dreq->inode);
dreq->count = 0;
dreq->max_count = 0;
@@ -766,16 +786,21 @@ static void nfs_write_sync_pgio_error(struct list_head *head, int error)
static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
+ struct nfs_page *req;
+ struct nfs_commit_info cinfo;
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
spin_lock(&dreq->lock);
- if (dreq->error == 0) {
+ if (dreq->error == 0)
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- /* fake unstable write to let common nfs resend pages */
- hdr->verf.committed = NFS_UNSTABLE;
- hdr->good_bytes = hdr->args.offset + hdr->args.count -
- hdr->io_start;
- }
+ set_bit(NFS_IOHDR_REDO, &hdr->flags);
spin_unlock(&dreq->lock);
+ while (!list_empty(&hdr->pages)) {
+ req = nfs_list_entry(hdr->pages.next);
+ nfs_list_remove_request(req);
+ nfs_unlock_request(req);
+ nfs_mark_request_commit(req, NULL, &cinfo, 0);
+ }
}
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index ceef75b4d249..4269df0f0ffa 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1238,6 +1238,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:
case -EOPNOTSUPP:
+ case -EINVAL:
case -ECONNREFUSED:
case -ECONNRESET:
case -EHOSTDOWN:
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 3d5ba43f44bb..266a4badf1df 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -949,7 +949,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
error = decode_filename_inline(xdr, &entry->name, &entry->len);
if (unlikely(error))
- return -EAGAIN;
+ return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN;
/*
* The type (size and byte order) of nfscookie isn't defined in
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 7ab60ad98776..d48db2f6f4f0 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1990,7 +1990,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
error = decode_inline_filename3(xdr, &entry->name, &entry->len);
if (unlikely(error))
- return -EAGAIN;
+ return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN;
error = decode_cookie3(xdr, &new_cookie);
if (unlikely(error))
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index bc07012741cb..2975bbc33d28 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -462,8 +462,9 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
continue;
}
break;
- } else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) {
- args.sync = true;
+ } else if (err == -NFS4ERR_OFFLOAD_NO_REQS &&
+ args.sync != res.synchronous) {
+ args.sync = res.synchronous;
dst_exception.retry = 1;
continue;
} else if ((err == -ESTALE ||
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 1bf7a72ebda6..cba8b4c1fb4a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -231,6 +231,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
+ if (test_bit(NFS_CS_DS, &cl_init->init_flags))
+ __set_bit(NFS_CS_DS, &clp->cl_flags);
/*
* Set up the connection to the server before we add add to the
* global list.
@@ -414,6 +416,8 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
.net = old->cl_net,
.servername = old->cl_hostname,
};
+ int max_connect = test_bit(NFS_CS_PNFS, &clp->cl_flags) ?
+ clp->cl_max_connect : old->cl_max_connect;
if (clp->cl_proto != old->cl_proto)
return;
@@ -427,7 +431,7 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
xprt_args.addrlen = clp_salen;
rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args,
- rpc_clnt_test_and_add_xprt, NULL);
+ rpc_clnt_test_and_add_xprt, &max_connect);
}
/**
@@ -993,6 +997,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ __set_bit(NFS_CS_DS, &cl_init.init_flags);
+ __set_bit(NFS_CS_PNFS, &cl_init.init_flags);
+ cl_init.max_connect = NFS_MAX_TRANSPORTS;
/*
* Set an authflavor equual to the MDS value. Use the MDS nfs_client
* cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a21e25cbd451..a865b384f0fc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -173,6 +173,7 @@ static int nfs4_map_errors(int err)
case -NFS4ERR_RESOURCE:
case -NFS4ERR_LAYOUTTRYLATER:
case -NFS4ERR_RECALLCONFLICT:
+ case -NFS4ERR_RETURNCONFLICT:
return -EREMOTEIO;
case -NFS4ERR_WRONGSEC:
case -NFS4ERR_WRONG_CRED:
@@ -560,6 +561,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
case -NFS4ERR_GRACE:
case -NFS4ERR_LAYOUTTRYLATER:
case -NFS4ERR_RECALLCONFLICT:
+ case -NFS4ERR_RETURNCONFLICT:
exception->delay = 1;
return 0;
@@ -5608,7 +5610,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
- nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr);
+ nfs4_state_protect_write(hdr->ds_clp ? hdr->ds_clp : server->nfs_client, clnt, msg, hdr);
}
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -5649,7 +5651,8 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
data->res.server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0);
- nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
+ nfs4_state_protect(data->ds_clp ? data->ds_clp : server->nfs_client,
+ NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
}
static int _nfs4_proc_commit(struct file *dst, struct nfs_commitargs *args,
@@ -8715,6 +8718,8 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred,
#ifdef CONFIG_NFS_V4_1_MIGRATION
calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
#endif
+ if (test_bit(NFS_CS_DS, &clp->cl_flags))
+ calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS;
msg.rpc_argp = &calldata->args;
msg.rpc_resp = &calldata->res;
task_setup_data.callback_data = calldata;
@@ -9585,6 +9590,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
status = -EBUSY;
break;
case -NFS4ERR_RECALLCONFLICT:
+ case -NFS4ERR_RETURNCONFLICT:
status = -ERECALLCONFLICT;
break;
case -NFS4ERR_DELEG_REVOKED:
@@ -10553,7 +10559,9 @@ static void nfs4_disable_swap(struct inode *inode)
*/
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
- nfs4_schedule_state_manager(clp);
+ set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+ wake_up_var(&clp->cl_state);
}
static const struct inode_operations nfs4_dir_inode_operations = {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 15ba6ad1c571..258e6b167285 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1213,17 +1213,23 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
{
struct task_struct *task;
char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
- struct rpc_clnt *cl = clp->cl_rpcclient;
-
- while (cl != cl->cl_parent)
- cl = cl->cl_parent;
+ struct rpc_clnt *clnt = clp->cl_rpcclient;
+ bool swapon = false;
set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
- if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
- wake_up_var(&clp->cl_state);
- return;
+
+ if (atomic_read(&clnt->cl_swapper)) {
+ swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE,
+ &clp->cl_state);
+ if (!swapon) {
+ wake_up_var(&clp->cl_state);
+ return;
+ }
}
- set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
+
+ if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
+ return;
+
__module_get(THIS_MODULE);
refcount_inc(&clp->cl_count);
@@ -1240,8 +1246,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
__func__, PTR_ERR(task));
if (!nfs_client_init_is_complete(clp))
nfs_mark_client_ready(clp, PTR_ERR(task));
+ if (swapon)
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
nfs4_clear_state_manager_bit(clp);
- clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
nfs_put_client(clp);
module_put(THIS_MODULE);
}
@@ -2692,6 +2699,13 @@ static void nfs4_state_manager(struct nfs_client *clp)
nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);
+ if (test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
+ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING,
+ &clp->cl_state)) {
+ memflags = memalloc_nofs_save();
+ continue;
+ }
+
if (!test_and_set_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state)) {
if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
nfs_client_return_marked_delegations(clp);
@@ -2730,22 +2744,25 @@ static int nfs4_run_state_manager(void *ptr)
allow_signal(SIGKILL);
again:
- set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
nfs4_state_manager(clp);
- if (atomic_read(&cl->cl_swapper)) {
+
+ if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) &&
+ !test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) {
wait_var_event_interruptible(&clp->cl_state,
test_bit(NFS4CLNT_RUN_MANAGER,
&clp->cl_state));
- if (atomic_read(&cl->cl_swapper) &&
- test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
+ if (!atomic_read(&cl->cl_swapper))
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+ if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
+ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state))
goto again;
/* Either no longer a swapper, or were signalled */
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
}
- clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
- !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state))
+ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state))
goto again;
nfs_put_client(clp);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 7217f3eeb069..9f6776c7062e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2629,31 +2629,44 @@ pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
return mode == 0;
}
-static int
-pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data)
+static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
+ void *data)
{
const struct pnfs_layout_range *range = data;
+ const struct cred *cred;
struct pnfs_layout_hdr *lo;
struct inode *inode;
+ nfs4_stateid stateid;
+ enum pnfs_iomode iomode;
+
restart:
rcu_read_lock();
list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
- if (!pnfs_layout_can_be_returned(lo) ||
+ inode = lo->plh_inode;
+ if (!inode || !pnfs_layout_can_be_returned(lo) ||
test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
continue;
- inode = lo->plh_inode;
spin_lock(&inode->i_lock);
- if (!pnfs_should_return_unused_layout(lo, range)) {
+ if (!lo->plh_inode ||
+ !pnfs_should_return_unused_layout(lo, range)) {
spin_unlock(&inode->i_lock);
continue;
}
+ pnfs_get_layout_hdr(lo);
+ pnfs_set_plh_return_info(lo, range->iomode, 0);
+ if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
+ range, 0) != 0 ||
+ !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
+ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
+ pnfs_put_layout_hdr(lo);
+ cond_resched();
+ goto restart;
+ }
spin_unlock(&inode->i_lock);
- inode = pnfs_grab_inode_layout_hdr(lo);
- if (!inode)
- continue;
rcu_read_unlock();
- pnfs_mark_layout_for_return(inode, range);
- iput(inode);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+ pnfs_put_layout_hdr(lo);
cond_resched();
goto restart;
}
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index ddbbf4fcda86..178001c90156 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -154,7 +154,7 @@ nfs4_get_device_info(struct nfs_server *server,
set_bit(NFS_DEVICEID_NOCACHE, &d->flags);
out_free_pages:
- for (i = 0; i < max_pages; i++)
+ while (--i >= 0)
__free_page(pages[i]);
kfree(pages);
out_free_pdev:
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 657c242a18ff..6b681f0c5df0 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -943,7 +943,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
* Test this address for session trunking and
* add as an alias
*/
- xprtdata.cred = nfs4_get_clid_cred(clp),
+ xprtdata.cred = nfs4_get_clid_cred(clp);
rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
rpc_clnt_setup_test_and_add_xprt,
&rpcdata);
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 8cb70755e3c9..f7f778e3e5ca 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -18,7 +18,7 @@
#include "sysfs.h"
struct kobject *nfs_client_kobj;
-static struct kset *nfs_client_kset;
+static struct kset *nfs_kset;
static void nfs_netns_object_release(struct kobject *kobj)
{
@@ -55,13 +55,13 @@ static struct kobject *nfs_netns_object_alloc(const char *name,
int nfs_sysfs_init(void)
{
- nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj);
- if (!nfs_client_kset)
+ nfs_kset = kset_create_and_add("nfs", NULL, fs_kobj);
+ if (!nfs_kset)
return -ENOMEM;
- nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL);
+ nfs_client_kobj = nfs_netns_object_alloc("net", nfs_kset, NULL);
if (!nfs_client_kobj) {
- kset_unregister(nfs_client_kset);
- nfs_client_kset = NULL;
+ kset_unregister(nfs_kset);
+ nfs_kset = NULL;
return -ENOMEM;
}
return 0;
@@ -70,7 +70,7 @@ int nfs_sysfs_init(void)
void nfs_sysfs_exit(void)
{
kobject_put(nfs_client_kobj);
- kset_unregister(nfs_client_kset);
+ kset_unregister(nfs_kset);
}
static ssize_t nfs_netns_identifier_show(struct kobject *kobj,
@@ -158,7 +158,7 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent,
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (p) {
p->net = net;
- p->kobject.kset = nfs_client_kset;
+ p->kobject.kset = nfs_kset;
if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type,
parent, "nfs_client") == 0)
return p;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index be70874bc329..4231d51fc1ad 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -58,7 +58,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static const struct nfs_rw_ops nfs_rw_write_ops;
static void nfs_inode_remove_request(struct nfs_page *req);
-static void nfs_clear_request_commit(struct nfs_page *req);
+static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
+ struct nfs_page *req);
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
struct inode *inode);
static struct nfs_page *
@@ -500,8 +501,8 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
* the (former) group. All subrequests are removed from any write or commit
* lists, unlinked from the group and destroyed.
*/
-void
-nfs_join_page_group(struct nfs_page *head, struct inode *inode)
+void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
+ struct inode *inode)
{
struct nfs_page *subreq;
struct nfs_page *destroy_list = NULL;
@@ -531,7 +532,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode)
* Commit list removal accounting is done after locks are dropped */
subreq = head;
do {
- nfs_clear_request_commit(subreq);
+ nfs_clear_request_commit(cinfo, subreq);
subreq = subreq->wb_this_page;
} while (subreq != head);
@@ -565,8 +566,10 @@ nfs_lock_and_join_requests(struct page *page)
{
struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *head;
+ struct nfs_commit_info cinfo;
int ret;
+ nfs_init_cinfo_from_inode(&cinfo, inode);
/*
* A reference is taken only on the head request which acts as a
* reference to the whole page group - the group will not be destroyed
@@ -583,7 +586,7 @@ nfs_lock_and_join_requests(struct page *page)
return ERR_PTR(ret);
}
- nfs_join_page_group(head, inode);
+ nfs_join_page_group(head, &cinfo, inode);
return head;
}
@@ -945,18 +948,16 @@ nfs_clear_page_commit(struct page *page)
}
/* Called holding the request lock on @req */
-static void
-nfs_clear_request_commit(struct nfs_page *req)
+static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
+ struct nfs_page *req)
{
if (test_bit(PG_CLEAN, &req->wb_flags)) {
struct nfs_open_context *ctx = nfs_req_openctx(req);
struct inode *inode = d_inode(ctx->dentry);
- struct nfs_commit_info cinfo;
- nfs_init_cinfo_from_inode(&cinfo, inode);
mutex_lock(&NFS_I(inode)->commit_mutex);
- if (!pnfs_clear_request_commit(req, &cinfo)) {
- nfs_request_remove_commit_list(req, &cinfo);
+ if (!pnfs_clear_request_commit(req, cinfo)) {
+ nfs_request_remove_commit_list(req, cinfo);
}
mutex_unlock(&NFS_I(inode)->commit_mutex);
nfs_clear_page_commit(req->wb_page);
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 442543304930..2455dc8be18a 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -82,6 +82,15 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
int len = sizeof(__be32), ret, i;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
p = xdr_reserve_space(xdr, len + sizeof(__be32));
if (!p)
return nfserr_resource;
diff --git a/fs/nfsd/flexfilelayoutxdr.c b/fs/nfsd/flexfilelayoutxdr.c
index e81d2a5cf381..bb205328e043 100644
--- a/fs/nfsd/flexfilelayoutxdr.c
+++ b/fs/nfsd/flexfilelayoutxdr.c
@@ -85,6 +85,15 @@ nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
int addr_len;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
/* len + padding for two strings */
addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len;
ver_len = 20;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3eb500adcda2..f71af990e1e8 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -895,8 +895,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
rename->rn_tname, rename->rn_tnamelen);
if (status)
return status;
- set_change_info(&rename->rn_sinfo, &cstate->current_fh);
- set_change_info(&rename->rn_tinfo, &cstate->save_fh);
+ set_change_info(&rename->rn_sinfo, &cstate->save_fh);
+ set_change_info(&rename->rn_tinfo, &cstate->current_fh);
return nfs_ok;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f54ef526f25d..9b660491f393 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2686,7 +2686,7 @@ static int client_opens_release(struct inode *inode, struct file *file)
/* XXX: alternatively, we could get/drop in seq start/stop */
drop_client(clp);
- return 0;
+ return seq_release(inode, file);
}
static const struct file_operations client_states_fops = {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e8132a17eeb3..d28b75909de8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4533,20 +4533,17 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
*p++ = cpu_to_be32(gdev->gd_layout_type);
- /* If maxcount is 0 then just update notifications */
- if (gdev->gd_maxcount != 0) {
- ops = nfsd4_layout_ops[gdev->gd_layout_type];
- nfserr = ops->encode_getdeviceinfo(xdr, gdev);
- if (nfserr) {
- /*
- * We don't bother to burden the layout drivers with
- * enforcing gd_maxcount, just tell the client to
- * come back with a bigger buffer if it's not enough.
- */
- if (xdr->buf->len + 4 > gdev->gd_maxcount)
- goto toosmall;
- return nfserr;
- }
+ ops = nfsd4_layout_ops[gdev->gd_layout_type];
+ nfserr = ops->encode_getdeviceinfo(xdr, gdev);
+ if (nfserr) {
+ /*
+ * We don't bother to burden the layout drivers with
+ * enforcing gd_maxcount, just tell the client to
+ * come back with a bigger buffer if it's not enough.
+ */
+ if (xdr->buf->len + 4 > gdev->gd_maxcount)
+ goto toosmall;
+ return nfserr;
}
if (gdev->gd_notify_types) {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 15a86876e3d9..c39b8a653804 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1762,6 +1762,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
goto out;
+ err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
+ if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
+ goto out;
+ if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
+ goto out;
+
retry:
host_err = fh_want_write(ffhp);
if (host_err) {
@@ -1796,12 +1802,6 @@ retry:
if (ndentry == trap)
goto out_dput_new;
- host_err = -EXDEV;
- if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
- goto out_dput_new;
- if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
- goto out_dput_new;
-
if ((ndentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) &&
nfsd_has_cached_files(ndentry)) {
close_cached = true;
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index adf3bb0a8048..279d945d4ebe 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -205,7 +205,8 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
int ret;
spin_lock(lock);
- if (prev->bh && blkoff == prev->blkoff) {
+ if (prev->bh && blkoff == prev->blkoff &&
+ likely(buffer_uptodate(prev->bh))) {
get_bh(prev->bh);
*bhp = prev->bh;
spin_unlock(lock);
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index aadea660c66c..b0077f5f7112 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -73,10 +73,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
- if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
- brelse(bh);
+ if (unlikely(err)) /* -EIO, -ENOMEM, -ENOENT */
goto failed;
- }
}
lock_buffer(bh);
@@ -102,6 +100,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
failed:
unlock_page(bh->b_page);
put_page(bh->b_page);
+ if (unlikely(err))
+ brelse(bh);
return err;
}
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b908216f306d..324e23236c34 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -1029,7 +1029,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
int err;
spin_lock(&nilfs->ns_inode_lock);
- if (ii->i_bh == NULL) {
+ if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) {
spin_unlock(&nilfs->ns_inode_lock);
err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
inode->i_ino, pbh);
@@ -1038,7 +1038,10 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
spin_lock(&nilfs->ns_inode_lock);
if (ii->i_bh == NULL)
ii->i_bh = *pbh;
- else {
+ else if (unlikely(!buffer_uptodate(ii->i_bh))) {
+ __brelse(ii->i_bh);
+ ii->i_bh = *pbh;
+ } else {
brelse(*pbh);
*pbh = ii->i_bh;
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 9e865732d352..d61d702215db 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -725,6 +725,11 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
struct page *page = pvec.pages[i];
lock_page(page);
+ if (unlikely(page->mapping != mapping)) {
+ /* Exclude pages removed from the address space */
+ unlock_page(page);
+ continue;
+ }
if (!page_has_buffers(page))
create_empty_buffers(page, i_blocksize(inode), 0);
unlock_page(page);
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index b3abe69382fd..23b4b8863e7f 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -501,15 +501,38 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
down_write(&NILFS_MDT(sufile)->mi_sem);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
- if (!ret) {
- mark_buffer_dirty(bh);
- nilfs_mdt_mark_dirty(sufile);
- kaddr = kmap_atomic(bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+ if (ret)
+ goto out_sem;
+
+ kaddr = kmap_atomic(bh->b_page);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+ if (unlikely(nilfs_segment_usage_error(su))) {
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+
+ kunmap_atomic(kaddr);
+ brelse(bh);
+ if (nilfs_segment_is_active(nilfs, segnum)) {
+ nilfs_error(sufile->i_sb,
+ "active segment %llu is erroneous",
+ (unsigned long long)segnum);
+ } else {
+ /*
+ * Segments marked erroneous are never allocated by
+ * nilfs_sufile_alloc(); only active segments, ie,
+ * the segments indexed by ns_segnum or ns_nextnum,
+ * can be erroneous here.
+ */
+ WARN_ON_ONCE(1);
+ }
+ ret = -EIO;
+ } else {
nilfs_segment_usage_set_dirty(su);
kunmap_atomic(kaddr);
+ mark_buffer_dirty(bh);
+ nilfs_mdt_mark_dirty(sufile);
brelse(bh);
}
+out_sem:
up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}
@@ -536,9 +559,14 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
kaddr = kmap_atomic(bh->b_page);
su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
- WARN_ON(nilfs_segment_usage_error(su));
- if (modtime)
+ if (modtime) {
+ /*
+ * Check segusage error and set su_lastmod only when updating
+ * this entry with a valid timestamp, not for cancellation.
+ */
+ WARN_ON_ONCE(nilfs_segment_usage_error(su));
su->su_lastmod = cpu_to_le64(modtime);
+ }
su->su_nblocks = cpu_to_le32(nblocks);
kunmap_atomic(kaddr);
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index fe2e7197268b..a07e20147abc 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -717,7 +717,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
goto failed_sbh;
}
nilfs_release_super_block(nilfs);
- sb_set_blocksize(sb, blocksize);
+ if (!sb_set_blocksize(sb, blocksize)) {
+ nilfs_err(sb, "bad blocksize %d", blocksize);
+ err = -EINVAL;
+ goto out;
+ }
err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
if (err)
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 52ccd34b1e79..a026dbd3593f 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -272,7 +272,7 @@ int unregister_nls(struct nls_table * nls)
return -EINVAL;
}
-static struct nls_table *find_nls(char *charset)
+static struct nls_table *find_nls(const char *charset)
{
struct nls_table *nls;
spin_lock(&nls_lock);
@@ -288,7 +288,7 @@ static struct nls_table *find_nls(char *charset)
return nls;
}
-struct nls_table *load_nls(char *charset)
+struct nls_table *load_nls(const char *charset)
{
return try_then_request_module(find_nls(charset), "nls_%s", charset);
}
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
index 321d55b3ca17..1d5ac2164d94 100644
--- a/fs/ntfs3/attrib.c
+++ b/fs/ntfs3/attrib.c
@@ -1583,10 +1583,8 @@ repack:
le_b = NULL;
attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL,
0, NULL, &mi_b);
- if (!attr_b) {
- err = -ENOENT;
- goto out;
- }
+ if (!attr_b)
+ return -ENOENT;
attr = attr_b;
le = le_b;
diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c
index 81c22df27c72..0c6a68e71e7d 100644
--- a/fs/ntfs3/attrlist.c
+++ b/fs/ntfs3/attrlist.c
@@ -52,7 +52,8 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr)
if (!attr->non_res) {
lsize = le32_to_cpu(attr->res.data_size);
- le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN);
+ /* attr is resident: lsize < record_size (1K or 4K) */
+ le = kvmalloc(al_aligned(lsize), GFP_KERNEL);
if (!le) {
err = -ENOMEM;
goto out;
@@ -80,7 +81,17 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr)
if (err < 0)
goto out;
- le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN);
+ /* attr is nonresident.
+ * The worst case:
+ * 1T (2^40) extremely fragmented file.
+ * cluster = 4K (2^12) => 2^28 fragments
+ * 2^9 fragments per one record => 2^19 records
+ * 2^5 bytes of ATTR_LIST_ENTRY per one record => 2^24 bytes.
+ *
+ * the result is 16M bytes per attribute list.
+ * Use kvmalloc to allocate in range [several Kbytes - dozen Mbytes]
+ */
+ le = kvmalloc(al_aligned(lsize), GFP_KERNEL);
if (!le) {
err = -ENOMEM;
goto out;
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index 3261b69cdac3..21536b72aa5e 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -667,7 +667,8 @@ int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits)
wnd->bits_last = wbits;
wnd->free_bits =
- kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS | __GFP_NOWARN);
+ kvmalloc_array(wnd->nwnd, sizeof(u16), GFP_KERNEL | __GFP_ZERO);
+
if (!wnd->free_bits)
return -ENOMEM;
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index fb438d604040..d4d9f4ffb6d9 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -309,7 +309,11 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
return 0;
}
- dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;
+ /* NTFS: symlinks are "dir + reparse" or "file + reparse" */
+ if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT)
+ dt_type = DT_LNK;
+ else
+ dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;
return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type);
}
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 9a1744955d1c..b02778cbb1d3 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -2100,7 +2100,7 @@ out1:
for (i = 0; i < pages_per_frame; i++) {
pg = pages[i];
- if (i == idx)
+ if (i == idx || !pg)
continue;
unlock_page(pg);
put_page(pg);
@@ -3144,6 +3144,12 @@ static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup,
if (!fname || !memcmp(&fname->dup, dup, sizeof(fname->dup)))
continue;
+ /* Check simple case when parent inode equals current inode. */
+ if (ino_get(&fname->home) == ni->vfs_inode.i_ino) {
+ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ continue;
+ }
+
/* ntfs_iget5 may sleep. */
dir = ntfs_iget5(sb, &fname->home, NULL);
if (IS_ERR(dir)) {
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index 59f813cbdaa8..8b95c06e5a4c 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -2169,8 +2169,10 @@ file_is_valid:
if (!page) {
page = kmalloc(log->page_size, GFP_NOFS);
- if (!page)
- return -ENOMEM;
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
}
/*
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 0ae70010b01d..4413f6da68e6 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -958,18 +958,11 @@ out:
if (err)
return err;
- mark_inode_dirty(&ni->vfs_inode);
+ mark_inode_dirty_sync(&ni->vfs_inode);
/* verify(!ntfs_update_mftmirr()); */
- /*
- * If we used wait=1, sync_inode_metadata waits for the io for the
- * inode to finish. It hangs when media is removed.
- * So wait=0 is sent down to sync_inode_metadata
- * and filemap_fdatawrite is used for the data blocks.
- */
- err = sync_inode_metadata(&ni->vfs_inode, 0);
- if (!err)
- err = filemap_fdatawrite(ni->vfs_inode.i_mapping);
+ /* write mft record on disk. */
+ err = _ni_write_inode(&ni->vfs_inode, 1);
return err;
}
@@ -2458,10 +2451,12 @@ void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim)
{
CLST end, i;
struct wnd_bitmap *wnd = &sbi->used.bitmap;
+ bool dirty = false;
down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
if (!wnd_is_used(wnd, lcn, len)) {
- ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ /* mark volume as dirty out of wnd->rw_lock */
+ dirty = true;
end = lcn + len;
len = 0;
@@ -2493,6 +2488,8 @@ void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim)
out:
up_write(&wnd->rw_lock);
+ if (dirty)
+ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
}
/*
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 7705adc926b8..b49e62e2080b 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -729,6 +729,9 @@ static struct NTFS_DE *hdr_find_e(const struct ntfs_index *indx,
u32 total = le32_to_cpu(hdr->total);
u16 offs[128];
+ if (unlikely(!cmp))
+ return NULL;
+
fill_table:
if (end > total)
return NULL;
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 33b1833ad525..fbaf1c84311b 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -1136,7 +1136,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
goto put_inode_out;
}
bytes = inode->i_size;
- sbi->def_table = t = kmalloc(bytes, GFP_NOFS | __GFP_NOWARN);
+ sbi->def_table = t = kvmalloc(bytes, GFP_KERNEL);
if (!t) {
err = -ENOMEM;
goto put_inode_out;
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index aaf4bafa2c70..8e739023e305 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -209,7 +209,8 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
size = le32_to_cpu(info->size);
/* Enumerate all xattrs. */
- for (ret = 0, off = 0; off < size; off += ea_size) {
+ ret = 0;
+ for (off = 0; off + sizeof(struct EA_FULL) < size; off += ea_size) {
ea = Add2Ptr(ea_all, off);
ea_size = unpacked_ea_size(ea);
@@ -217,6 +218,10 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
break;
if (buffer) {
+ /* Check if we can use field ea->name */
+ if (off + ea_size > size)
+ break;
+
if (ret + ea->name_len + 1 > bytes_per_buffer) {
err = -ERANGE;
goto out;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3c24eceeb4ff..0b4f3d287cbc 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1535,6 +1535,10 @@ static int ocfs2_rename(struct user_namespace *mnt_userns,
status = ocfs2_add_entry(handle, new_dentry, old_inode,
OCFS2_I(old_inode)->ip_blkno,
new_dir_bh, &target_insert);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
}
old_inode->i_ctime = current_time(old_inode);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 46cc429c44f7..0ed70eff9cb9 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -306,7 +306,7 @@ static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
{
struct iattr attr = {
.ia_valid =
- ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
+ ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME,
.ia_atime = stat->atime,
.ia_mtime = stat->mtime,
};
@@ -583,7 +583,8 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
if (err)
return err;
- if (inode->i_flags & OVL_COPY_I_FLAGS_MASK) {
+ if (inode->i_flags & OVL_COPY_I_FLAGS_MASK &&
+ (S_ISREG(c->stat.mode) || S_ISDIR(c->stat.mode))) {
/*
* Copy the fileattr inode flags that are the source of already
* copied i_flags
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 28cb05ef018c..49d3feded593 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -19,7 +19,6 @@ struct ovl_aio_req {
struct kiocb iocb;
refcount_t ref;
struct kiocb *orig_iocb;
- struct fd fd;
};
static struct kmem_cache *ovl_aio_request_cachep;
@@ -256,7 +255,7 @@ static rwf_t ovl_iocb_to_rwf(int ifl)
static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
{
if (refcount_dec_and_test(&aio_req->ref)) {
- fdput(aio_req->fd);
+ fput(aio_req->iocb.ki_filp);
kmem_cache_free(ovl_aio_request_cachep, aio_req);
}
}
@@ -322,10 +321,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
if (!aio_req)
goto out;
- aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
- kiocb_clone(&aio_req->iocb, iocb, real.file);
+ kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
refcount_set(&aio_req->ref, 2);
ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
@@ -394,10 +392,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
/* Pacify lockdep, same trick as done in aio_write() */
__sb_writers_release(file_inode(real.file)->i_sb,
SB_FREEZE_WRITE);
- aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
- kiocb_clone(&aio_req->iocb, iocb, real.file);
+ kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
refcount_set(&aio_req->ref, 2);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 300d53ee7040..e5d7a5a75aff 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3544,7 +3544,8 @@ static int proc_tid_comm_permission(struct user_namespace *mnt_userns,
}
static const struct inode_operations proc_tid_comm_inode_operations = {
- .permission = proc_tid_comm_permission,
+ .setattr = proc_setattr,
+ .permission = proc_tid_comm_permission,
};
/*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 0b7a00ed6c49..4192fe6ec3da 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1765,7 +1765,6 @@ static const struct sysctl_alias sysctl_aliases[] = {
{"hung_task_panic", "kernel.hung_task_panic" },
{"numa_zonelist_order", "vm.numa_zonelist_order" },
{"softlockup_all_cpu_backtrace", "kernel.softlockup_all_cpu_backtrace" },
- {"softlockup_panic", "kernel.softlockup_panic" },
{ }
};
@@ -1781,6 +1780,13 @@ static const char *sysctl_find_alias(char *param)
return NULL;
}
+bool sysctl_is_alias(char *param)
+{
+ const char *alias = sysctl_find_alias(param);
+
+ return alias != NULL;
+}
+
/* Set sysctl value passed on kernel command line. */
static int process_sysctl_arg(char *param, char *val,
const char *unused, void *arg)
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index a6d21fc0033c..97f387d30e74 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -208,11 +208,16 @@ static void *m_start(struct seq_file *m, loff_t *pos)
return ERR_PTR(-ESRCH);
mm = priv->mm;
- if (!mm || !mmget_not_zero(mm))
+ if (!mm || !mmget_not_zero(mm)) {
+ put_task_struct(priv->task);
+ priv->task = NULL;
return NULL;
+ }
if (mmap_read_lock_killable(mm)) {
mmput(mm);
+ put_task_struct(priv->task);
+ priv->task = NULL;
return ERR_PTR(-EINTR);
}
@@ -221,23 +226,21 @@ static void *m_start(struct seq_file *m, loff_t *pos)
if (n-- == 0)
return p;
- mmap_read_unlock(mm);
- mmput(mm);
return NULL;
}
-static void m_stop(struct seq_file *m, void *_vml)
+static void m_stop(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
+ struct mm_struct *mm = priv->mm;
- if (!IS_ERR_OR_NULL(_vml)) {
- mmap_read_unlock(priv->mm);
- mmput(priv->mm);
- }
- if (priv->task) {
- put_task_struct(priv->task);
- priv->task = NULL;
- }
+ if (!priv->task)
+ return;
+
+ mmap_read_unlock(mm);
+ mmput(mm);
+ put_task_struct(priv->task);
+ priv->task = NULL;
}
static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index ad96ba97d8f9..3fc4739f82d8 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -561,6 +561,8 @@ out:
*/
int pstore_register(struct pstore_info *psi)
{
+ char *new_backend;
+
if (backend && strcmp(backend, psi->name)) {
pr_warn("ignoring unexpected backend '%s'\n", psi->name);
return -EPERM;
@@ -580,11 +582,16 @@ int pstore_register(struct pstore_info *psi)
return -EINVAL;
}
+ new_backend = kstrdup(psi->name, GFP_KERNEL);
+ if (!new_backend)
+ return -ENOMEM;
+
mutex_lock(&psinfo_lock);
if (psinfo) {
pr_warn("backend '%s' already loaded: ignoring '%s'\n",
psinfo->name, psi->name);
mutex_unlock(&psinfo_lock);
+ kfree(new_backend);
return -EBUSY;
}
@@ -617,7 +624,7 @@ int pstore_register(struct pstore_info *psi)
* Update the module parameter backend, so it is visible
* through /sys/module/pstore/parameters/backend
*/
- backend = kstrdup(psi->name, GFP_KERNEL);
+ backend = new_backend;
pr_info("Registered %s as persistent store backend\n", psi->name);
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index fd9bab137685..ec321722384d 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -190,7 +190,7 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz,
{
int numerr;
struct persistent_ram_buffer *buffer = prz->buffer;
- int ecc_blocks;
+ size_t ecc_blocks;
size_t ecc_total;
if (!ecc_info || !ecc_info->ecc_size)
@@ -518,7 +518,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
sig ^= PERSISTENT_RAM_SIG;
if (prz->buffer->sig == sig) {
- if (buffer_size(prz) == 0) {
+ if (buffer_size(prz) == 0 && buffer_start(prz) == 0) {
pr_debug("found existing empty buffer\n");
return 0;
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b88f5a2f6032..75e593b1c03e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -225,18 +225,26 @@ static void put_quota_format(struct quota_format_type *fmt)
/*
* Dquot List Management:
- * The quota code uses four lists for dquot management: the inuse_list,
- * free_dquots, dqi_dirty_list, and dquot_hash[] array. A single dquot
- * structure may be on some of those lists, depending on its current state.
+ * The quota code uses five lists for dquot management: the inuse_list,
+ * releasing_dquots, free_dquots, dqi_dirty_list, and dquot_hash[] array.
+ * A single dquot structure may be on some of those lists, depending on
+ * its current state.
*
* All dquots are placed to the end of inuse_list when first created, and this
* list is used for invalidate operation, which must look at every dquot.
*
- * Unused dquots (dq_count == 0) are added to the free_dquots list when freed,
- * and this list is searched whenever we need an available dquot. Dquots are
- * removed from the list as soon as they are used again, and
- * dqstats.free_dquots gives the number of dquots on the list. When
- * dquot is invalidated it's completely released from memory.
+ * When the last reference of a dquot is dropped, the dquot is added to
+ * releasing_dquots. We'll then queue work item which will call
+ * synchronize_srcu() and after that perform the final cleanup of all the
+ * dquots on the list. Each cleaned up dquot is moved to free_dquots list.
+ * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot
+ * struct.
+ *
+ * Unused and cleaned up dquots are in the free_dquots list and this list is
+ * searched whenever we need an available dquot. Dquots are removed from the
+ * list as soon as they are used again and dqstats.free_dquots gives the number
+ * of dquots on the list. When dquot is invalidated it's completely released
+ * from memory.
*
* Dirty dquots are added to the dqi_dirty_list of quota_info when mark
* dirtied, and this list is searched when writing dirty dquots back to
@@ -250,6 +258,7 @@ static void put_quota_format(struct quota_format_type *fmt)
static LIST_HEAD(inuse_list);
static LIST_HEAD(free_dquots);
+static LIST_HEAD(releasing_dquots);
static unsigned int dq_hash_bits, dq_hash_mask;
static struct hlist_head *dquot_hash;
@@ -260,6 +269,9 @@ static qsize_t inode_get_rsv_space(struct inode *inode);
static qsize_t __inode_get_rsv_space(struct inode *inode);
static int __dquot_initialize(struct inode *inode, int type);
+static void quota_release_workfn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(quota_release_work, quota_release_workfn);
+
static inline unsigned int
hashfn(const struct super_block *sb, struct kqid qid)
{
@@ -305,12 +317,21 @@ static inline void put_dquot_last(struct dquot *dquot)
dqstats_inc(DQST_FREE_DQUOTS);
}
+static inline void put_releasing_dquots(struct dquot *dquot)
+{
+ list_add_tail(&dquot->dq_free, &releasing_dquots);
+ set_bit(DQ_RELEASING_B, &dquot->dq_flags);
+}
+
static inline void remove_free_dquot(struct dquot *dquot)
{
if (list_empty(&dquot->dq_free))
return;
list_del_init(&dquot->dq_free);
- dqstats_dec(DQST_FREE_DQUOTS);
+ if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags))
+ dqstats_dec(DQST_FREE_DQUOTS);
+ else
+ clear_bit(DQ_RELEASING_B, &dquot->dq_flags);
}
static inline void put_inuse(struct dquot *dquot)
@@ -336,6 +357,11 @@ static void wait_on_dquot(struct dquot *dquot)
mutex_unlock(&dquot->dq_lock);
}
+static inline int dquot_active(struct dquot *dquot)
+{
+ return test_bit(DQ_ACTIVE_B, &dquot->dq_flags);
+}
+
static inline int dquot_dirty(struct dquot *dquot)
{
return test_bit(DQ_MOD_B, &dquot->dq_flags);
@@ -351,14 +377,14 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
{
int ret = 1;
- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ if (!dquot_active(dquot))
return 0;
if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY)
return test_and_set_bit(DQ_MOD_B, &dquot->dq_flags);
/* If quota is dirty already, we don't have to acquire dq_list_lock */
- if (test_bit(DQ_MOD_B, &dquot->dq_flags))
+ if (dquot_dirty(dquot))
return 1;
spin_lock(&dq_list_lock);
@@ -440,7 +466,7 @@ int dquot_acquire(struct dquot *dquot)
smp_mb__before_atomic();
set_bit(DQ_READ_B, &dquot->dq_flags);
/* Instantiate dquot if needed */
- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) {
+ if (!dquot_active(dquot) && !dquot->dq_off) {
ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
/* Write the info if needed */
if (info_dirty(&dqopt->info[dquot->dq_id.type])) {
@@ -482,7 +508,7 @@ int dquot_commit(struct dquot *dquot)
goto out_lock;
/* Inactive dquot can be only if there was error during read/init
* => we have better not writing it */
- if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ if (dquot_active(dquot))
ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
else
ret = -EIO;
@@ -547,6 +573,8 @@ static void invalidate_dquots(struct super_block *sb, int type)
struct dquot *dquot, *tmp;
restart:
+ flush_delayed_work(&quota_release_work);
+
spin_lock(&dq_list_lock);
list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
if (dquot->dq_sb != sb)
@@ -574,6 +602,15 @@ restart:
goto restart;
}
/*
+ * The last user already dropped its reference but dquot didn't
+ * get fully cleaned up yet. Restart the scan which flushes the
+ * work cleaning up released dquots.
+ */
+ if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) {
+ spin_unlock(&dq_list_lock);
+ goto restart;
+ }
+ /*
* Quota now has no users and it has been written on last
* dqput()
*/
@@ -597,7 +634,7 @@ int dquot_scan_active(struct super_block *sb,
spin_lock(&dq_list_lock);
list_for_each_entry(dquot, &inuse_list, dq_inuse) {
- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ if (!dquot_active(dquot))
continue;
if (dquot->dq_sb != sb)
continue;
@@ -612,7 +649,7 @@ int dquot_scan_active(struct super_block *sb,
* outstanding call and recheck the DQ_ACTIVE_B after that.
*/
wait_on_dquot(dquot);
- if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+ if (dquot_active(dquot)) {
ret = fn(dquot, priv);
if (ret < 0)
goto out;
@@ -628,6 +665,18 @@ out:
}
EXPORT_SYMBOL(dquot_scan_active);
+static inline int dquot_write_dquot(struct dquot *dquot)
+{
+ int ret = dquot->dq_sb->dq_op->write_dquot(dquot);
+ if (ret < 0) {
+ quota_error(dquot->dq_sb, "Can't write quota structure "
+ "(error %d). Quota may get out of sync!", ret);
+ /* Clear dirty bit anyway to avoid infinite loop. */
+ clear_dquot_dirty(dquot);
+ }
+ return ret;
+}
+
/* Write all dquot structures to quota files */
int dquot_writeback_dquots(struct super_block *sb, int type)
{
@@ -651,23 +700,23 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
dquot = list_first_entry(&dirty, struct dquot,
dq_dirty);
- WARN_ON(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags));
+ WARN_ON(!dquot_active(dquot));
+ /* If the dquot is releasing we should not touch it */
+ if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) {
+ spin_unlock(&dq_list_lock);
+ flush_delayed_work(&quota_release_work);
+ spin_lock(&dq_list_lock);
+ continue;
+ }
/* Now we have active dquot from which someone is
* holding reference so we can safely just increase
* use count */
dqgrab(dquot);
spin_unlock(&dq_list_lock);
- err = sb->dq_op->write_dquot(dquot);
- if (err) {
- /*
- * Clear dirty bit anyway to avoid infinite
- * loop here.
- */
- clear_dquot_dirty(dquot);
- if (!ret)
- ret = err;
- }
+ err = dquot_write_dquot(dquot);
+ if (err && !ret)
+ ret = err;
dqput(dquot);
spin_lock(&dq_list_lock);
}
@@ -761,12 +810,52 @@ static struct shrinker dqcache_shrinker = {
};
/*
+ * Safely release dquot and put reference to dquot.
+ */
+static void quota_release_workfn(struct work_struct *work)
+{
+ struct dquot *dquot;
+ struct list_head rls_head;
+
+ spin_lock(&dq_list_lock);
+ /* Exchange the list head to avoid livelock. */
+ list_replace_init(&releasing_dquots, &rls_head);
+ spin_unlock(&dq_list_lock);
+ synchronize_srcu(&dquot_srcu);
+
+restart:
+ spin_lock(&dq_list_lock);
+ while (!list_empty(&rls_head)) {
+ dquot = list_first_entry(&rls_head, struct dquot, dq_free);
+ WARN_ON_ONCE(atomic_read(&dquot->dq_count));
+ /*
+ * Note that DQ_RELEASING_B protects us from racing with
+ * invalidate_dquots() calls so we are safe to work with the
+ * dquot even after we drop dq_list_lock.
+ */
+ if (dquot_dirty(dquot)) {
+ spin_unlock(&dq_list_lock);
+ /* Commit dquot before releasing */
+ dquot_write_dquot(dquot);
+ goto restart;
+ }
+ if (dquot_active(dquot)) {
+ spin_unlock(&dq_list_lock);
+ dquot->dq_sb->dq_op->release_dquot(dquot);
+ goto restart;
+ }
+ /* Dquot is inactive and clean, now move it to free list */
+ remove_free_dquot(dquot);
+ put_dquot_last(dquot);
+ }
+ spin_unlock(&dq_list_lock);
+}
+
+/*
* Put reference to dquot
*/
void dqput(struct dquot *dquot)
{
- int ret;
-
if (!dquot)
return;
#ifdef CONFIG_QUOTA_DEBUG
@@ -778,7 +867,7 @@ void dqput(struct dquot *dquot)
}
#endif
dqstats_inc(DQST_DROPS);
-we_slept:
+
spin_lock(&dq_list_lock);
if (atomic_read(&dquot->dq_count) > 1) {
/* We have more than one user... nothing to do */
@@ -790,35 +879,16 @@ we_slept:
spin_unlock(&dq_list_lock);
return;
}
+
/* Need to release dquot? */
- if (dquot_dirty(dquot)) {
- spin_unlock(&dq_list_lock);
- /* Commit dquot before releasing */
- ret = dquot->dq_sb->dq_op->write_dquot(dquot);
- if (ret < 0) {
- quota_error(dquot->dq_sb, "Can't write quota structure"
- " (error %d). Quota may get out of sync!",
- ret);
- /*
- * We clear dirty bit anyway, so that we avoid
- * infinite loop here
- */
- clear_dquot_dirty(dquot);
- }
- goto we_slept;
- }
- if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
- spin_unlock(&dq_list_lock);
- dquot->dq_sb->dq_op->release_dquot(dquot);
- goto we_slept;
- }
- atomic_dec(&dquot->dq_count);
#ifdef CONFIG_QUOTA_DEBUG
/* sanity check */
BUG_ON(!list_empty(&dquot->dq_free));
#endif
- put_dquot_last(dquot);
+ put_releasing_dquots(dquot);
+ atomic_dec(&dquot->dq_count);
spin_unlock(&dq_list_lock);
+ queue_delayed_work(system_unbound_wq, &quota_release_work, 1);
}
EXPORT_SYMBOL(dqput);
@@ -905,10 +975,10 @@ we_slept:
dqstats_inc(DQST_LOOKUPS);
}
/* Wait for dq_lock - after this we know that either dquot_release() is
- * already finished or it will be canceled due to dq_count > 1 test */
+ * already finished or it will be canceled due to dq_count > 0 test */
wait_on_dquot(dquot);
/* Read the dquot / allocate space in quota file */
- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+ if (!dquot_active(dquot)) {
int err;
err = sb->dq_op->acquire_dquot(dquot);
@@ -1425,7 +1495,7 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
return QUOTA_NL_NOWARN;
}
-static int dquot_active(const struct inode *inode)
+static int inode_quota_active(const struct inode *inode)
{
struct super_block *sb = inode->i_sb;
@@ -1448,7 +1518,7 @@ static int __dquot_initialize(struct inode *inode, int type)
qsize_t rsv;
int ret = 0;
- if (!dquot_active(inode))
+ if (!inode_quota_active(inode))
return 0;
dquots = i_dquot(inode);
@@ -1556,7 +1626,7 @@ bool dquot_initialize_needed(struct inode *inode)
struct dquot **dquots;
int i;
- if (!dquot_active(inode))
+ if (!inode_quota_active(inode))
return false;
dquots = i_dquot(inode);
@@ -1667,7 +1737,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
int reserve = flags & DQUOT_SPACE_RESERVE;
struct dquot **dquots;
- if (!dquot_active(inode)) {
+ if (!inode_quota_active(inode)) {
if (reserve) {
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) += number;
@@ -1737,7 +1807,7 @@ int dquot_alloc_inode(struct inode *inode)
struct dquot_warn warn[MAXQUOTAS];
struct dquot * const *dquots;
- if (!dquot_active(inode))
+ if (!inode_quota_active(inode))
return 0;
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warn[cnt].w_type = QUOTA_NL_NOWARN;
@@ -1780,7 +1850,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
struct dquot **dquots;
int cnt, index;
- if (!dquot_active(inode)) {
+ if (!inode_quota_active(inode)) {
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) -= number;
__inode_add_bytes(inode, number);
@@ -1822,7 +1892,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
struct dquot **dquots;
int cnt, index;
- if (!dquot_active(inode)) {
+ if (!inode_quota_active(inode)) {
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) += number;
__inode_sub_bytes(inode, number);
@@ -1866,7 +1936,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
struct dquot **dquots;
int reserve = flags & DQUOT_SPACE_RESERVE, index;
- if (!dquot_active(inode)) {
+ if (!inode_quota_active(inode)) {
if (reserve) {
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) -= number;
@@ -1921,7 +1991,7 @@ void dquot_free_inode(struct inode *inode)
struct dquot * const *dquots;
int index;
- if (!dquot_active(inode))
+ if (!inode_quota_active(inode))
return;
dquots = i_dquot(inode);
@@ -2092,7 +2162,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
struct super_block *sb = inode->i_sb;
int ret;
- if (!dquot_active(inode))
+ if (!inode_quota_active(inode))
return 0;
if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)){
@@ -2326,6 +2396,20 @@ static int vfs_setup_quota_inode(struct inode *inode, int type)
if (sb_has_quota_loaded(sb, type))
return -EBUSY;
+ /*
+ * Quota files should never be encrypted. They should be thought of as
+ * filesystem metadata, not user data. New-style internal quota files
+ * cannot be encrypted by users anyway, but old-style external quota
+ * files could potentially be incorrectly created in an encrypted
+ * directory, hence this explicit check. Some reasons why encrypted
+ * quota files don't work include: (1) some filesystems that support
+ * encryption don't handle it in their quota_read and quota_write, and
+ * (2) cleaning up encrypted quota files at unmount would need special
+ * consideration, as quota files are cleaned up later than user files.
+ */
+ if (IS_ENCRYPTED(inode))
+ return -EINVAL;
+
dqopt->files[type] = igrab(inode);
if (!dqopt->files[type])
return -EIO;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 0834b101c316..86a1dee6e2e7 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2323,7 +2323,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
int i, j;
bh = __getblk(dev, block, bufsize);
- if (buffer_uptodate(bh))
+ if (!bh || buffer_uptodate(bh))
return (bh);
if (block + BUFNR > max_block) {
@@ -2333,6 +2333,8 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
j = 1;
for (i = 1; i < blocks; i++) {
bh = __getblk(dev, block + i, bufsize);
+ if (!bh)
+ break;
if (buffer_uptodate(bh)) {
brelse(bh);
break;
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 066e8344934d..7c1aa4a60dc6 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -556,6 +556,9 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
*/
struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
{
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return NULL;
+
return __create_dir(name, parent, &simple_dir_inode_operations);
}
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 8e597db4d971..f416b7fe092f 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -36,18 +36,41 @@ static int read_block_bitmap(struct super_block *sb,
unsigned long bitmap_nr)
{
struct buffer_head *bh = NULL;
- int retval = 0;
+ int i;
+ int max_bits, off, count;
struct kernel_lb_addr loc;
loc.logicalBlockNum = bitmap->s_extPosition;
loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
bh = udf_tread(sb, udf_get_lb_pblock(sb, &loc, block));
+ bitmap->s_block_bitmap[bitmap_nr] = bh;
if (!bh)
- retval = -EIO;
+ return -EIO;
- bitmap->s_block_bitmap[bitmap_nr] = bh;
- return retval;
+ /* Check consistency of Space Bitmap buffer. */
+ max_bits = sb->s_blocksize * 8;
+ if (!bitmap_nr) {
+ off = sizeof(struct spaceBitmapDesc) << 3;
+ count = min(max_bits - off, bitmap->s_nr_groups);
+ } else {
+ /*
+ * Rough check if bitmap number is too big to have any bitmap
+ * blocks reserved.
+ */
+ if (bitmap_nr >
+ (bitmap->s_nr_groups >> (sb->s_blocksize_bits + 3)) + 2)
+ return 0;
+ off = 0;
+ count = bitmap->s_nr_groups - bitmap_nr * max_bits +
+ (sizeof(struct spaceBitmapDesc) << 3);
+ count = min(count, max_bits);
+ }
+
+ for (i = 0; i < count; i++)
+ if (udf_test_bit(i + off, bh->b_data))
+ return -EFSCORRUPTED;
+ return 0;
}
static int __load_block_bitmap(struct super_block *sb,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 594d22458881..da6fb28b4eea 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -57,15 +57,15 @@ static int udf_update_inode(struct inode *, int);
static int udf_sync_inode(struct inode *inode);
static int udf_alloc_i_data(struct inode *inode, size_t size);
static sector_t inode_getblk(struct inode *, sector_t, int *, int *);
-static int8_t udf_insert_aext(struct inode *, struct extent_position,
- struct kernel_lb_addr, uint32_t);
+static int udf_insert_aext(struct inode *, struct extent_position,
+ struct kernel_lb_addr, uint32_t);
static void udf_split_extents(struct inode *, int *, int, udf_pblk_t,
struct kernel_long_ad *, int *);
static void udf_prealloc_extents(struct inode *, int, int,
struct kernel_long_ad *, int *);
static void udf_merge_extents(struct inode *, struct kernel_long_ad *, int *);
-static void udf_update_extents(struct inode *, struct kernel_long_ad *, int,
- int, struct extent_position *);
+static int udf_update_extents(struct inode *, struct kernel_long_ad *, int,
+ int, struct extent_position *);
static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
static void __udf_clear_extent_cache(struct inode *inode)
@@ -695,7 +695,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
struct kernel_lb_addr eloc, tmpeloc;
int c = 1;
loff_t lbcount = 0, b_off = 0;
- udf_pblk_t newblocknum, newblock;
+ udf_pblk_t newblocknum, newblock = 0;
sector_t offset = 0;
int8_t etype;
struct udf_inode_info *iinfo = UDF_I(inode);
@@ -798,7 +798,6 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len);
if (ret < 0) {
*err = ret;
- newblock = 0;
goto out_free;
}
c = 0;
@@ -861,7 +860,6 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
goal, err);
if (!newblocknum) {
*err = -ENOSPC;
- newblock = 0;
goto out_free;
}
if (isBeyondEOF)
@@ -887,7 +885,9 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
/* write back the new extents, inserting new extents if the new number
* of extents is greater than the old number, and deleting extents if
* the new number of extents is less than the old number */
- udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
+ *err = udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
+ if (*err < 0)
+ goto out_free;
newblock = udf_get_pblock(inode->i_sb, newblocknum,
iinfo->i_location.partitionReferenceNum, 0);
@@ -1155,21 +1155,30 @@ static void udf_merge_extents(struct inode *inode, struct kernel_long_ad *laarr,
}
}
-static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr,
- int startnum, int endnum,
- struct extent_position *epos)
+static int udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr,
+ int startnum, int endnum,
+ struct extent_position *epos)
{
int start = 0, i;
struct kernel_lb_addr tmploc;
uint32_t tmplen;
+ int err;
if (startnum > endnum) {
for (i = 0; i < (startnum - endnum); i++)
udf_delete_aext(inode, *epos);
} else if (startnum < endnum) {
for (i = 0; i < (endnum - startnum); i++) {
- udf_insert_aext(inode, *epos, laarr[i].extLocation,
- laarr[i].extLength);
+ err = udf_insert_aext(inode, *epos,
+ laarr[i].extLocation,
+ laarr[i].extLength);
+ /*
+ * If we fail here, we are likely corrupting the extent
+ * list and leaking blocks. At least stop early to
+ * limit the damage.
+ */
+ if (err < 0)
+ return err;
udf_next_aext(inode, epos, &laarr[i].extLocation,
&laarr[i].extLength, 1);
start++;
@@ -1181,6 +1190,7 @@ static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr
udf_write_aext(inode, epos, &laarr[i].extLocation,
laarr[i].extLength, 1);
}
+ return 0;
}
struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block,
@@ -2215,12 +2225,13 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
return etype;
}
-static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
- struct kernel_lb_addr neloc, uint32_t nelen)
+static int udf_insert_aext(struct inode *inode, struct extent_position epos,
+ struct kernel_lb_addr neloc, uint32_t nelen)
{
struct kernel_lb_addr oeloc;
uint32_t oelen;
int8_t etype;
+ int err;
if (epos.bh)
get_bh(epos.bh);
@@ -2230,10 +2241,10 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
neloc = oeloc;
nelen = (etype << 30) | oelen;
}
- udf_add_aext(inode, &epos, &neloc, nelen, 1);
+ err = udf_add_aext(inode, &epos, &neloc, nelen, 1);
brelse(epos.bh);
- return (nelen >> 30);
+ return err;
}
int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
diff --git a/fs/verity/signature.c b/fs/verity/signature.c
index 143a530a8008..b59de03055e1 100644
--- a/fs/verity/signature.c
+++ b/fs/verity/signature.c
@@ -54,6 +54,22 @@ int fsverity_verify_signature(const struct fsverity_info *vi,
return 0;
}
+ if (fsverity_keyring->keys.nr_leaves_on_tree == 0) {
+ /*
+ * The ".fs-verity" keyring is empty, due to builtin signatures
+ * being supported by the kernel but not actually being used.
+ * In this case, verify_pkcs7_signature() would always return an
+ * error, usually ENOKEY. It could also be EBADMSG if the
+ * PKCS#7 is malformed, but that isn't very important to
+ * distinguish. So, just skip to ENOKEY to avoid the attack
+ * surface of the PKCS#7 parser, which would otherwise be
+ * reachable by any task able to execute FS_IOC_ENABLE_VERITY.
+ */
+ fsverity_err(inode,
+ "fs-verity keyring is empty, rejecting signed file!");
+ return -ENOKEY;
+ }
+
d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL);
if (!d)
return -ENOMEM;
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index d9b66306a9a7..cb9e950a911d 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -146,6 +146,8 @@ xfs_dir3_leaf_check_int(
xfs_dir2_leaf_tail_t *ltp;
int stale;
int i;
+ bool isleaf1 = (hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
+ hdr->magic == XFS_DIR3_LEAF1_MAGIC);
ltp = xfs_dir2_leaf_tail_p(geo, leaf);
@@ -158,8 +160,7 @@ xfs_dir3_leaf_check_int(
return __this_address;
/* Leaves and bests don't overlap in leaf format. */
- if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
- hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
+ if (isleaf1 &&
(char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
return __this_address;
@@ -175,6 +176,10 @@ xfs_dir3_leaf_check_int(
}
if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
+ if (isleaf1 && xfs_dir2_dataptr_to_db(geo,
+ be32_to_cpu(hdr->ents[i].address)) >=
+ be32_to_cpu(ltp->bestcount))
+ return __this_address;
}
if (hdr->stale != stale)
return __this_address;
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 20095233d7bc..c1f965af8432 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -330,6 +330,7 @@ xfs_iformat_attr_fork(
}
if (error) {
+ xfs_idestroy_fork(ip->i_afp);
kmem_cache_free(xfs_ifork_zone, ip->i_afp);
ip->i_afp = NULL;
}
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index ff69a0000817..81a065b0b571 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -108,12 +108,6 @@ struct xlog_recover {
#define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr)
-/*
- * This is the number of entries in the l_buf_cancel_table used during
- * recovery.
- */
-#define XLOG_BC_TABLE_SIZE 64
-
#define XLOG_RECOVER_CRCPASS 0
#define XLOG_RECOVER_PASS1 1
#define XLOG_RECOVER_PASS2 2
@@ -126,5 +120,13 @@ int xlog_recover_iget(struct xfs_mount *mp, xfs_ino_t ino,
struct xfs_inode **ipp);
void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
uint64_t intent_id);
+int xlog_alloc_buf_cancel_table(struct xlog *log);
+void xlog_free_buf_cancel_table(struct xlog *log);
+
+#ifdef DEBUG
+void xlog_check_buf_cancel_table(struct xlog *log);
+#else
+#define xlog_check_buf_cancel_table(log) do { } while (0)
+#endif
#endif /* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 5e300daa2559..2db9d9d12344 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -423,7 +423,7 @@ xfs_calc_remove_reservation(
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_iunlink_add_reservation(mp) +
- max((xfs_calc_inode_res(mp, 1) +
+ max((xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index bf1f3607d0b6..08df23edea72 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -864,28 +864,3 @@ xchk_ilock_inverted(
return -EDEADLOCK;
}
-/* Pause background reaping of resources. */
-void
-xchk_stop_reaping(
- struct xfs_scrub *sc)
-{
- sc->flags |= XCHK_REAPING_DISABLED;
- xfs_blockgc_stop(sc->mp);
- xfs_inodegc_stop(sc->mp);
-}
-
-/* Restart background reaping of resources. */
-void
-xchk_start_reaping(
- struct xfs_scrub *sc)
-{
- /*
- * Readonly filesystems do not perform inactivation or speculative
- * preallocation, so there's no need to restart the workers.
- */
- if (!xfs_is_readonly(sc->mp)) {
- xfs_inodegc_start(sc->mp);
- xfs_blockgc_start(sc->mp);
- }
- sc->flags &= ~XCHK_REAPING_DISABLED;
-}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 454145db10e7..2ca80102e704 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -148,7 +148,5 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
int xchk_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
-void xchk_stop_reaping(struct xfs_scrub *sc);
-void xchk_start_reaping(struct xfs_scrub *sc);
#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 48a6cbdf95d0..037541339d80 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -128,13 +128,6 @@ xchk_setup_fscounters(
if (error)
return error;
- /*
- * Pause background reclaim while we're scrubbing to reduce the
- * likelihood of background perturbations to the counters throwing off
- * our calculations.
- */
- xchk_stop_reaping(sc);
-
return xchk_trans_alloc(sc, 0);
}
@@ -354,6 +347,12 @@ xchk_fscounters(
xchk_set_corrupt(sc);
/*
+ * XXX: We can't quiesce percpu counter updates, so exit early.
+ * This can be re-enabled when we gain exclusive freeze functionality.
+ */
+ return 0;
+
+ /*
* If ifree exceeds icount by more than the minimum variance then
* something's probably wrong with the counters.
*/
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 51e4c61916d2..e4d2a41983f7 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -171,8 +171,6 @@ xchk_teardown(
}
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
mnt_drop_write_file(sc->file);
- if (sc->flags & XCHK_REAPING_DISABLED)
- xchk_start_reaping(sc);
if (sc->flags & XCHK_HAS_QUOTAOFFLOCK) {
mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
sc->flags &= ~XCHK_HAS_QUOTAOFFLOCK;
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 80e5026bba44..e8d9fe9de26e 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -89,7 +89,6 @@ struct xfs_scrub {
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
#define XCHK_HAS_QUOTAOFFLOCK (1 << 1) /* we hold the quotaoff lock */
-#define XCHK_REAPING_DISABLED (1 << 2) /* background block reaping paused */
#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
/* Metadata scrubbers */
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 2b5da6218977..2afa6d9a7f8f 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -158,6 +158,7 @@ xfs_attr3_node_inactive(
}
child_fsb = be32_to_cpu(ichdr.btree[0].before);
xfs_trans_brelse(*trans, bp); /* no locks for later trans */
+ bp = NULL;
/*
* If this is the node level just above the leaves, simply loop
@@ -211,12 +212,8 @@ xfs_attr3_node_inactive(
&child_bp);
if (error)
return error;
- error = bp->b_error;
- if (error) {
- xfs_trans_brelse(*trans, child_bp);
- return error;
- }
xfs_trans_binval(*trans, child_bp);
+ child_bp = NULL;
/*
* If we're not done, re-read the parent to get the next
@@ -233,6 +230,7 @@ xfs_attr3_node_inactive(
bp->b_addr);
child_fsb = be32_to_cpu(phdr.btree[i + 1].before);
xfs_trans_brelse(*trans, bp);
+ bp = NULL;
}
/*
* Atomically commit the whole invalidate stuff.
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index e04e44ef14c6..31cbe7deebfa 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -24,6 +24,15 @@
#include "xfs_quota.h"
/*
+ * This is the number of entries in the l_buf_cancel_table used during
+ * recovery.
+ */
+#define XLOG_BC_TABLE_SIZE 64
+
+#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
+ ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
+
+/*
* This structure is used during recovery to record the buf log items which
* have been canceled and should not be replayed.
*/
@@ -1003,3 +1012,60 @@ const struct xlog_recover_item_ops xlog_buf_item_ops = {
.commit_pass1 = xlog_recover_buf_commit_pass1,
.commit_pass2 = xlog_recover_buf_commit_pass2,
};
+
+#ifdef DEBUG
+void
+xlog_check_buf_cancel_table(
+ struct xlog *log)
+{
+ int i;
+
+ for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
+ ASSERT(list_empty(&log->l_buf_cancel_table[i]));
+}
+#endif
+
+int
+xlog_alloc_buf_cancel_table(
+ struct xlog *log)
+{
+ void *p;
+ int i;
+
+ ASSERT(log->l_buf_cancel_table == NULL);
+
+ p = kmalloc_array(XLOG_BC_TABLE_SIZE, sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ log->l_buf_cancel_table = p;
+ for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
+ INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
+
+ return 0;
+}
+
+void
+xlog_free_buf_cancel_table(
+ struct xlog *log)
+{
+ int i;
+
+ if (!log->l_buf_cancel_table)
+ return;
+
+ for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) {
+ struct xfs_buf_cancel *bc;
+
+ while ((bc = list_first_entry_or_null(
+ &log->l_buf_cancel_table[i],
+ struct xfs_buf_cancel, bc_list))) {
+ list_del(&bc->bc_list);
+ kmem_free(bc);
+ }
+ }
+
+ kmem_free(log->l_buf_cancel_table);
+ log->l_buf_cancel_table = NULL;
+}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 81c445e9489b..b0ccec92e015 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -224,13 +224,18 @@ int
xfs_errortag_init(
struct xfs_mount *mp)
{
+ int ret;
+
mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX,
KM_MAYFAIL);
if (!mp->m_errortag)
return -ENOMEM;
- return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
- &mp->m_kobj, "errortag");
+ ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
+ &mp->m_kobj, "errortag");
+ if (ret)
+ kmem_free(mp->m_errortag);
+ return ret;
}
void
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 5e44d7bbd8fc..eab98d76dbe1 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -448,18 +448,23 @@ xfs_iget_check_free_state(
}
/* Make all pending inactivation work start immediately. */
-static void
+static bool
xfs_inodegc_queue_all(
struct xfs_mount *mp)
{
struct xfs_inodegc *gc;
int cpu;
+ bool ret = false;
for_each_online_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu);
- if (!llist_empty(&gc->list))
- queue_work_on(cpu, mp->m_inodegc_wq, &gc->work);
+ if (!llist_empty(&gc->list)) {
+ mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
+ ret = true;
+ }
}
+
+ return ret;
}
/*
@@ -1851,11 +1856,13 @@ void
xfs_inodegc_worker(
struct work_struct *work)
{
- struct xfs_inodegc *gc = container_of(work, struct xfs_inodegc,
- work);
+ struct xfs_inodegc *gc = container_of(to_delayed_work(work),
+ struct xfs_inodegc, work);
struct llist_node *node = llist_del_all(&gc->list);
struct xfs_inode *ip, *n;
+ ASSERT(gc->cpu == smp_processor_id());
+
WRITE_ONCE(gc->items, 0);
if (!node)
@@ -1872,42 +1879,69 @@ xfs_inodegc_worker(
}
/*
- * Force all currently queued inode inactivation work to run immediately and
- * wait for the work to finish.
+ * Expedite all pending inodegc work to run immediately. This does not wait for
+ * completion of the work.
*/
void
-xfs_inodegc_flush(
+xfs_inodegc_push(
struct xfs_mount *mp)
{
if (!xfs_is_inodegc_enabled(mp))
return;
+ trace_xfs_inodegc_push(mp, __return_address);
+ xfs_inodegc_queue_all(mp);
+}
+/*
+ * Force all currently queued inode inactivation work to run immediately and
+ * wait for the work to finish.
+ */
+void
+xfs_inodegc_flush(
+ struct xfs_mount *mp)
+{
+ xfs_inodegc_push(mp);
trace_xfs_inodegc_flush(mp, __return_address);
-
- xfs_inodegc_queue_all(mp);
flush_workqueue(mp->m_inodegc_wq);
}
/*
* Flush all the pending work and then disable the inode inactivation background
- * workers and wait for them to stop.
+ * workers and wait for them to stop. Caller must hold sb->s_umount to
+ * coordinate changes in the inodegc_enabled state.
*/
void
xfs_inodegc_stop(
struct xfs_mount *mp)
{
+ bool rerun;
+
if (!xfs_clear_inodegc_enabled(mp))
return;
+ /*
+ * Drain all pending inodegc work, including inodes that could be
+ * queued by racing xfs_inodegc_queue or xfs_inodegc_shrinker_scan
+ * threads that sample the inodegc state just prior to us clearing it.
+ * The inodegc flag state prevents new threads from queuing more
+ * inodes, so we queue pending work items and flush the workqueue until
+ * all inodegc lists are empty. IOWs, we cannot use drain_workqueue
+ * here because it does not allow other unserialized mechanisms to
+ * reschedule inodegc work while this draining is in progress.
+ */
xfs_inodegc_queue_all(mp);
- drain_workqueue(mp->m_inodegc_wq);
+ do {
+ flush_workqueue(mp->m_inodegc_wq);
+ rerun = xfs_inodegc_queue_all(mp);
+ } while (rerun);
trace_xfs_inodegc_stop(mp, __return_address);
}
/*
* Enable the inode inactivation background workers and schedule deferred inode
- * inactivation work if there is any.
+ * inactivation work if there is any. Caller must hold sb->s_umount to
+ * coordinate changes in the inodegc_enabled state.
*/
void
xfs_inodegc_start(
@@ -2021,6 +2055,7 @@ xfs_inodegc_queue(
struct xfs_inodegc *gc;
int items;
unsigned int shrinker_hits;
+ unsigned long queue_delay = 1;
trace_xfs_inode_set_need_inactive(ip);
spin_lock(&ip->i_flags_lock);
@@ -2032,19 +2067,27 @@ xfs_inodegc_queue(
items = READ_ONCE(gc->items);
WRITE_ONCE(gc->items, items + 1);
shrinker_hits = READ_ONCE(gc->shrinker_hits);
- put_cpu_ptr(gc);
- if (!xfs_is_inodegc_enabled(mp))
+ /*
+ * We queue the work while holding the current CPU so that the work
+ * is scheduled to run on this CPU.
+ */
+ if (!xfs_is_inodegc_enabled(mp)) {
+ put_cpu_ptr(gc);
return;
-
- if (xfs_inodegc_want_queue_work(ip, items)) {
- trace_xfs_inodegc_queue(mp, __return_address);
- queue_work(mp->m_inodegc_wq, &gc->work);
}
+ if (xfs_inodegc_want_queue_work(ip, items))
+ queue_delay = 0;
+
+ trace_xfs_inodegc_queue(mp, __return_address);
+ mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
+ queue_delay);
+ put_cpu_ptr(gc);
+
if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
trace_xfs_inodegc_throttle(mp, __return_address);
- flush_work(&gc->work);
+ flush_delayed_work(&gc->work);
}
}
@@ -2061,7 +2104,7 @@ xfs_inodegc_cpu_dead(
unsigned int count = 0;
dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu);
- cancel_work_sync(&dead_gc->work);
+ cancel_delayed_work_sync(&dead_gc->work);
if (llist_empty(&dead_gc->list))
return;
@@ -2080,12 +2123,13 @@ xfs_inodegc_cpu_dead(
llist_add_batch(first, last, &gc->list);
count += READ_ONCE(gc->items);
WRITE_ONCE(gc->items, count);
- put_cpu_ptr(gc);
if (xfs_is_inodegc_enabled(mp)) {
trace_xfs_inodegc_queue(mp, __return_address);
- queue_work(mp->m_inodegc_wq, &gc->work);
+ mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
+ 0);
}
+ put_cpu_ptr(gc);
}
/*
@@ -2180,7 +2224,7 @@ xfs_inodegc_shrinker_scan(
unsigned int h = READ_ONCE(gc->shrinker_hits);
WRITE_ONCE(gc->shrinker_hits, h + 1);
- queue_work_on(cpu, mp->m_inodegc_wq, &gc->work);
+ mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
no_items = false;
}
}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 2e4cfddf8b8e..6cd180721659 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -76,6 +76,7 @@ void xfs_blockgc_stop(struct xfs_mount *mp);
void xfs_blockgc_start(struct xfs_mount *mp);
void xfs_inodegc_worker(struct work_struct *work);
+void xfs_inodegc_push(struct xfs_mount *mp);
void xfs_inodegc_flush(struct xfs_mount *mp);
void xfs_inodegc_stop(struct xfs_mount *mp);
void xfs_inodegc_start(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b2ea85318214..df64b902842d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -378,8 +378,8 @@ xfs_isilocked(
}
if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
- return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
- (lock_flags & XFS_IOLOCK_SHARED));
+ return __xfs_rwsem_islocked(&VFS_I(ip)->i_mapping->invalidate_lock,
+ (lock_flags & XFS_MMAPLOCK_SHARED));
}
if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1eb71275e5b0..8696d6551200 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -511,27 +511,6 @@ xfs_vn_get_link(
return ERR_PTR(error);
}
-STATIC const char *
-xfs_vn_get_link_inline(
- struct dentry *dentry,
- struct inode *inode,
- struct delayed_call *done)
-{
- struct xfs_inode *ip = XFS_I(inode);
- char *link;
-
- ASSERT(ip->i_df.if_format == XFS_DINODE_FMT_LOCAL);
-
- /*
- * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if
- * if_data is junk.
- */
- link = ip->i_df.if_u1.if_data;
- if (XFS_IS_CORRUPT(ip->i_mount, !link))
- return ERR_PTR(-EFSCORRUPTED);
- return link;
-}
-
static uint32_t
xfs_stat_blksize(
struct xfs_inode *ip)
@@ -1200,14 +1179,6 @@ static const struct inode_operations xfs_symlink_inode_operations = {
.update_time = xfs_vn_update_time,
};
-static const struct inode_operations xfs_inline_symlink_inode_operations = {
- .get_link = xfs_vn_get_link_inline,
- .getattr = xfs_vn_getattr,
- .setattr = xfs_vn_setattr,
- .listxattr = xfs_vn_listxattr,
- .update_time = xfs_vn_update_time,
-};
-
/* Figure out if this file actually supports DAX. */
static bool
xfs_inode_supports_dax(
@@ -1358,10 +1329,7 @@ xfs_setup_iops(
inode->i_fop = &xfs_dir_file_operations;
break;
case S_IFLNK:
- if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL)
- inode->i_op = &xfs_inline_symlink_inode_operations;
- else
- inode->i_op = &xfs_symlink_inode_operations;
+ inode->i_op = &xfs_symlink_inode_operations;
break;
default:
inode->i_op = &xfs_inode_operations;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 0fb7d05ca308..eba295f666ac 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2061,8 +2061,6 @@ xlog_dealloc_log(
xlog_in_core_t *iclog, *next_iclog;
int i;
- xlog_cil_destroy(log);
-
/*
* Cycle all the iclogbuf locks to make sure all log IO completion
* is done before we tear down these buffers.
@@ -2074,6 +2072,13 @@ xlog_dealloc_log(
iclog = iclog->ic_next;
}
+ /*
+ * Destroy the CIL after waiting for iclog IO completion because an
+ * iclog EIO error will try to shut down the log, which accesses the
+ * CIL to wake up the waiters.
+ */
+ xlog_cil_destroy(log);
+
iclog = log->l_iclog;
for (i = 0; i < log->l_iclog_bufs; i++) {
next_iclog = iclog->ic_next;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index f3d68ca39f45..03393595676f 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -454,9 +454,6 @@ struct xlog {
struct rw_semaphore l_incompat_users;
};
-#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
- ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
-
/*
* Bits for operational state
*/
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 581aeb288b32..3d844a250b71 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2560,6 +2560,7 @@ xlog_recover_process_intents(
for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
lip != NULL;
lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
+ const struct xfs_item_ops *ops;
/*
* We're done when we see something other than an intent.
* There should be no intents left in the AIL now.
@@ -2584,13 +2585,17 @@ xlog_recover_process_intents(
* deferred ops, you /must/ attach them to the capture list in
* the recover routine or else those subsequent intents will be
* replayed in the wrong order!
+ *
+ * The recovery function can free the log item, so we must not
+ * access lip after it returns.
*/
spin_unlock(&ailp->ail_lock);
- error = lip->li_ops->iop_recover(lip, &capture_list);
+ ops = lip->li_ops;
+ error = ops->iop_recover(lip, &capture_list);
spin_lock(&ailp->ail_lock);
if (error) {
trace_xlog_intent_recovery_failed(log->l_mp, error,
- lip->li_ops->iop_recover);
+ ops->iop_recover);
break;
}
}
@@ -2739,6 +2744,7 @@ xlog_recover_process_one_iunlink(
* Call xlog_recover_clear_agi_bucket() to perform a transaction to
* clear the inode pointer in the bucket.
*/
+ xfs_inodegc_flush(mp);
xlog_recover_clear_agi_bucket(mp, agno, bucket);
return NULLAGINO;
}
@@ -3248,7 +3254,7 @@ xlog_do_log_recovery(
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk)
{
- int error, i;
+ int error;
ASSERT(head_blk != tail_blk);
@@ -3256,37 +3262,25 @@ xlog_do_log_recovery(
* First do a pass to find all of the cancelled buf log items.
* Store them in the buf_cancel_table for use in the second pass.
*/
- log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
- sizeof(struct list_head),
- 0);
- for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
- INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
+ error = xlog_alloc_buf_cancel_table(log);
+ if (error)
+ return error;
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
XLOG_RECOVER_PASS1, NULL);
- if (error != 0) {
- kmem_free(log->l_buf_cancel_table);
- log->l_buf_cancel_table = NULL;
- return error;
- }
+ if (error != 0)
+ goto out_cancel;
+
/*
* Then do a second pass to actually recover the items in the log.
* When it is complete free the table of buf cancel items.
*/
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
XLOG_RECOVER_PASS2, NULL);
-#ifdef DEBUG
- if (!error) {
- int i;
-
- for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
- ASSERT(list_empty(&log->l_buf_cancel_table[i]));
- }
-#endif /* DEBUG */
-
- kmem_free(log->l_buf_cancel_table);
- log->l_buf_cancel_table = NULL;
-
+ if (!error)
+ xlog_check_buf_cancel_table(log);
+out_cancel:
+ xlog_free_buf_cancel_table(log);
return error;
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 86564295fce6..29f35169bf9c 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -61,11 +61,14 @@ struct xfs_error_cfg {
*/
struct xfs_inodegc {
struct llist_head list;
- struct work_struct work;
+ struct delayed_work work;
/* approximate count of inodes in the list */
unsigned int items;
unsigned int shrinker_hits;
+#if defined(DEBUG) || defined(XFS_WARN)
+ unsigned int cpu;
+#endif
};
/*
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 623244650a2f..792736e29a37 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1244,6 +1244,13 @@ xfs_qm_flush_one(
error = -EINVAL;
goto out_unlock;
}
+
+ if (!(bp->b_flags & _XBF_DELWRI_Q)) {
+ error = -EAGAIN;
+ xfs_buf_relse(bp);
+ goto out_unlock;
+ }
+
xfs_buf_unlock(bp);
xfs_buf_delwri_pushbuf(bp, buffer_list);
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 47fe60e1a887..322a111dfbc0 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -481,9 +481,12 @@ xfs_qm_scall_getquota(
struct xfs_dquot *dqp;
int error;
- /* Flush inodegc work at the start of a quota reporting scan. */
+ /*
+ * Expedite pending inodegc work at the start of a quota reporting
+ * scan but don't block waiting for it to complete.
+ */
if (id == 0)
- xfs_inodegc_flush(mp);
+ xfs_inodegc_push(mp);
/*
* Try to get the dquot. We don't want it allocated on disk, so don't
@@ -525,7 +528,7 @@ xfs_qm_scall_getquota_next(
/* Flush inodegc work at the start of a quota reporting scan. */
if (*id == 0)
- xfs_inodegc_flush(mp);
+ xfs_inodegc_push(mp);
error = xfs_qm_dqget_next(mp, *id, type, &dqp);
if (error)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 36832e4bc803..793bdf5ac2f7 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -340,9 +340,41 @@ xfs_find_trim_cow_extent(
return 0;
}
-/* Allocate all CoW reservations covering a range of blocks in a file. */
-int
-xfs_reflink_allocate_cow(
+static int
+xfs_reflink_convert_unwritten(
+ struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap,
+ struct xfs_bmbt_irec *cmap,
+ bool convert_now)
+{
+ xfs_fileoff_t offset_fsb = imap->br_startoff;
+ xfs_filblks_t count_fsb = imap->br_blockcount;
+ int error;
+
+ /*
+ * cmap might larger than imap due to cowextsize hint.
+ */
+ xfs_trim_extent(cmap, offset_fsb, count_fsb);
+
+ /*
+ * COW fork extents are supposed to remain unwritten until we're ready
+ * to initiate a disk write. For direct I/O we are going to write the
+ * data and need the conversion, but for buffered writes we're done.
+ */
+ if (!convert_now || cmap->br_state == XFS_EXT_NORM)
+ return 0;
+
+ trace_xfs_reflink_convert_cow(ip, cmap);
+
+ error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb);
+ if (!error)
+ cmap->br_state = XFS_EXT_NORM;
+
+ return error;
+}
+
+static int
+xfs_reflink_fill_cow_hole(
struct xfs_inode *ip,
struct xfs_bmbt_irec *imap,
struct xfs_bmbt_irec *cmap,
@@ -351,25 +383,12 @@ xfs_reflink_allocate_cow(
bool convert_now)
{
struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t offset_fsb = imap->br_startoff;
- xfs_filblks_t count_fsb = imap->br_blockcount;
struct xfs_trans *tp;
- int nimaps, error = 0;
- bool found;
xfs_filblks_t resaligned;
- xfs_extlen_t resblks = 0;
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- if (!ip->i_cowfp) {
- ASSERT(!xfs_is_reflink_inode(ip));
- xfs_ifork_init_cow(ip);
- }
-
- error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
- if (error || !*shared)
- return error;
- if (found)
- goto convert;
+ xfs_extlen_t resblks;
+ int nimaps;
+ int error;
+ bool found;
resaligned = xfs_aligned_fsb_count(imap->br_startoff,
imap->br_blockcount, xfs_get_cowextsz_hint(ip));
@@ -385,17 +404,17 @@ xfs_reflink_allocate_cow(
*lockmode = XFS_ILOCK_EXCL;
- /*
- * Check for an overlapping extent again now that we dropped the ilock.
- */
error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
if (error || !*shared)
goto out_trans_cancel;
+
if (found) {
xfs_trans_cancel(tp);
goto convert;
}
+ ASSERT(cmap->br_startoff > imap->br_startoff);
+
/* Allocate the entire reservation as unwritten blocks. */
nimaps = 1;
error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
@@ -415,23 +434,135 @@ xfs_reflink_allocate_cow(
*/
if (nimaps == 0)
return -ENOSPC;
+
convert:
- xfs_trim_extent(cmap, offset_fsb, count_fsb);
- /*
- * COW fork extents are supposed to remain unwritten until we're ready
- * to initiate a disk write. For direct I/O we are going to write the
- * data and need the conversion, but for buffered writes we're done.
- */
- if (!convert_now || cmap->br_state == XFS_EXT_NORM)
- return 0;
- trace_xfs_reflink_convert_cow(ip, cmap);
- return xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb);
+ return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
out_trans_cancel:
xfs_trans_cancel(tp);
return error;
}
+static int
+xfs_reflink_fill_delalloc(
+ struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap,
+ struct xfs_bmbt_irec *cmap,
+ bool *shared,
+ uint *lockmode,
+ bool convert_now)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int nimaps;
+ int error;
+ bool found;
+
+ do {
+ xfs_iunlock(ip, *lockmode);
+ *lockmode = 0;
+
+ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, 0, 0,
+ false, &tp);
+ if (error)
+ return error;
+
+ *lockmode = XFS_ILOCK_EXCL;
+
+ error = xfs_find_trim_cow_extent(ip, imap, cmap, shared,
+ &found);
+ if (error || !*shared)
+ goto out_trans_cancel;
+
+ if (found) {
+ xfs_trans_cancel(tp);
+ break;
+ }
+
+ ASSERT(isnullstartblock(cmap->br_startblock) ||
+ cmap->br_startblock == DELAYSTARTBLOCK);
+
+ /*
+ * Replace delalloc reservation with an unwritten extent.
+ */
+ nimaps = 1;
+ error = xfs_bmapi_write(tp, ip, cmap->br_startoff,
+ cmap->br_blockcount,
+ XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0,
+ cmap, &nimaps);
+ if (error)
+ goto out_trans_cancel;
+
+ xfs_inode_set_cowblocks_tag(ip);
+ error = xfs_trans_commit(tp);
+ if (error)
+ return error;
+
+ /*
+ * Allocation succeeded but the requested range was not even
+ * partially satisfied? Bail out!
+ */
+ if (nimaps == 0)
+ return -ENOSPC;
+ } while (cmap->br_startoff + cmap->br_blockcount <= imap->br_startoff);
+
+ return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
+
+out_trans_cancel:
+ xfs_trans_cancel(tp);
+ return error;
+}
+
+/* Allocate all CoW reservations covering a range of blocks in a file. */
+int
+xfs_reflink_allocate_cow(
+ struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap,
+ struct xfs_bmbt_irec *cmap,
+ bool *shared,
+ uint *lockmode,
+ bool convert_now)
+{
+ int error;
+ bool found;
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ if (!ip->i_cowfp) {
+ ASSERT(!xfs_is_reflink_inode(ip));
+ xfs_ifork_init_cow(ip);
+ }
+
+ error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
+ if (error || !*shared)
+ return error;
+
+ /* CoW fork has a real extent */
+ if (found)
+ return xfs_reflink_convert_unwritten(ip, imap, cmap,
+ convert_now);
+
+ /*
+ * CoW fork does not have an extent and data extent is shared.
+ * Allocate a real extent in the CoW fork.
+ */
+ if (cmap->br_startoff > imap->br_startoff)
+ return xfs_reflink_fill_cow_hole(ip, imap, cmap, shared,
+ lockmode, convert_now);
+
+ /*
+ * CoW fork has a delalloc reservation. Replace it with a real extent.
+ * There may or may not be a data fork mapping.
+ */
+ if (isnullstartblock(cmap->br_startblock) ||
+ cmap->br_startblock == DELAYSTARTBLOCK)
+ return xfs_reflink_fill_delalloc(ip, imap, cmap, shared,
+ lockmode, convert_now);
+
+ /* Shouldn't get here. */
+ ASSERT(0);
+ return -EFSCORRUPTED;
+}
+
/*
* Cancel CoW reservations for some block range of an inode.
*
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index df1d6be61bfa..569960e4ea3a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -795,8 +795,11 @@ xfs_fs_statfs(
xfs_extlen_t lsize;
int64_t ffree;
- /* Wait for whatever inactivations are in progress. */
- xfs_inodegc_flush(mp);
+ /*
+ * Expedite background inodegc but don't wait. We do not want to block
+ * here waiting hours for a billion extent file to be truncated.
+ */
+ xfs_inodegc_push(mp);
statp->f_type = XFS_SUPER_MAGIC;
statp->f_namelen = MAXNAMELEN - 1;
@@ -1059,9 +1062,12 @@ xfs_inodegc_init_percpu(
for_each_possible_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu);
+#if defined(DEBUG) || defined(XFS_WARN)
+ gc->cpu = cpu;
+#endif
init_llist_head(&gc->list);
gc->items = 0;
- INIT_WORK(&gc->work, xfs_inodegc_worker);
+ INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
}
return 0;
}
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index a31d2e5d0321..affbedf78160 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -22,6 +22,7 @@
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_ialloc.h"
+#include "xfs_error.h"
/* ----- Kernel only functions below ----- */
int
@@ -96,17 +97,15 @@ xfs_readlink_bmap_ilocked(
int
xfs_readlink(
- struct xfs_inode *ip,
- char *link)
+ struct xfs_inode *ip,
+ char *link)
{
- struct xfs_mount *mp = ip->i_mount;
- xfs_fsize_t pathlen;
- int error = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fsize_t pathlen;
+ int error = -EFSCORRUPTED;
trace_xfs_readlink(ip);
- ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_LOCAL);
-
if (xfs_is_shutdown(mp))
return -EIO;
@@ -121,12 +120,22 @@ xfs_readlink(
__func__, (unsigned long long) ip->i_ino,
(long long) pathlen);
ASSERT(0);
- error = -EFSCORRUPTED;
goto out;
}
-
- error = xfs_readlink_bmap_ilocked(ip, link);
+ if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
+ /*
+ * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED
+ * if if_data is junk.
+ */
+ if (XFS_IS_CORRUPT(ip->i_mount, !ip->i_df.if_u1.if_data))
+ goto out;
+
+ memcpy(link, ip->i_df.if_u1.if_data, pathlen + 1);
+ error = 0;
+ } else {
+ error = xfs_readlink_bmap_ilocked(ip, link);
+ }
out:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index 43585850f154..513095e353a5 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -33,10 +33,15 @@ xfs_sysfs_init(
const char *name)
{
struct kobject *parent;
+ int err;
parent = parent_kobj ? &parent_kobj->kobject : NULL;
init_completion(&kobj->complete);
- return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
+ err = kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
+ if (err)
+ kobject_put(&kobj->kobject);
+
+ return err;
}
static inline void
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 1033a95fbf8e..ebd17ddba024 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -240,6 +240,7 @@ DEFINE_EVENT(xfs_fs_class, name, \
TP_PROTO(struct xfs_mount *mp, void *caller_ip), \
TP_ARGS(mp, caller_ip))
DEFINE_FS_EVENT(xfs_inodegc_flush);
+DEFINE_FS_EVENT(xfs_inodegc_push);
DEFINE_FS_EVENT(xfs_inodegc_start);
DEFINE_FS_EVENT(xfs_inodegc_stop);
DEFINE_FS_EVENT(xfs_inodegc_queue);