summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/affs/file.c9
-rw-r--r--fs/binfmt_elf.c273
-rw-r--r--fs/btrfs/block-group.c18
-rw-r--r--fs/btrfs/discard.c34
-rw-r--r--fs/btrfs/disk-io.c40
-rw-r--r--fs/btrfs/extent-tree.c30
-rw-r--r--fs/btrfs/extent_io.c7
-rw-r--r--fs/btrfs/file.c9
-rw-r--r--fs/btrfs/inode.c15
-rw-r--r--fs/btrfs/relocation.c6
-rw-r--r--fs/btrfs/scrub.c38
-rw-r--r--fs/btrfs/send.c6
-rw-r--r--fs/btrfs/super.c3
-rw-r--r--fs/btrfs/tree-checker.c30
-rw-r--r--fs/btrfs/tree-checker.h1
-rw-r--r--fs/btrfs/zoned.c6
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/coredump.c81
-rw-r--r--fs/dlm/lowcomms.c4
-rw-r--r--fs/exec.c15
-rw-r--r--fs/exfat/fatent.c2
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/block_validity.c5
-rw-r--r--fs/ext4/dir.c3
-rw-r--r--fs/ext4/ext4.h5
-rw-r--r--fs/ext4/extents.c19
-rw-r--r--fs/ext4/inode.c158
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/page-io.c16
-rw-r--r--fs/ext4/super.c63
-rw-r--r--fs/ext4/xattr.c11
-rw-r--r--fs/f2fs/checkpoint.c21
-rw-r--r--fs/f2fs/data.c4
-rw-r--r--fs/f2fs/f2fs.h42
-rw-r--r--fs/f2fs/inode.c8
-rw-r--r--fs/f2fs/namei.c10
-rw-r--r--fs/f2fs/node.c110
-rw-r--r--fs/f2fs/super.c8
-rw-r--r--fs/f2fs/sysfs.c74
-rw-r--r--fs/file.c26
-rw-r--r--fs/filesystems.c14
-rw-r--r--fs/fuse/dax.c1
-rw-r--r--fs/fuse/dir.c4
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/virtio_fs.c3
-rw-r--r--fs/gfs2/glock.c11
-rw-r--r--fs/gfs2/inode.c3
-rw-r--r--fs/hfs/bnode.c6
-rw-r--r--fs/hfsplus/bnode.c6
-rw-r--r--fs/hostfs/hostfs.h2
-rw-r--r--fs/hostfs/hostfs_kern.c7
-rw-r--r--fs/hostfs/hostfs_user.c59
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/isofs/dir.c3
-rw-r--r--fs/isofs/export.c2
-rw-r--r--fs/jbd2/journal.c1
-rw-r--r--fs/jbd2/recovery.c11
-rw-r--r--fs/jfs/jfs_dmap.c10
-rw-r--r--fs/jfs/jfs_dtree.c3
-rw-r--r--fs/jfs/jfs_imap.c4
-rw-r--r--fs/jfs/xattr.c15
-rw-r--r--fs/kernfs/dir.c5
-rw-r--r--fs/kernfs/file.c3
-rw-r--r--fs/namespace.c106
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/delegation.c36
-rw-r--r--fs/nfs/dir.c15
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c6
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c6
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h8
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs4proc.c18
-rw-r--r--fs/nfs/nfs4session.h4
-rw-r--r--fs/nfs/nfs4state.c10
-rw-r--r--fs/nfs/pnfs.c9
-rw-r--r--fs/nfs/pnfs.h4
-rw-r--r--fs/nfs/pnfs_nfs.c9
-rw-r--r--fs/nfs/super.c19
-rw-r--r--fs/nfs/sysfs.c22
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/nfs4state.c31
-rw-r--r--fs/nfsd/nfsfh.h7
-rw-r--r--fs/nilfs2/btree.c4
-rw-r--r--fs/nilfs2/direct.c3
-rw-r--r--fs/ntfs3/file.c1
-rw-r--r--fs/ntfs3/index.c12
-rw-r--r--fs/ntfs3/ntfs.h2
-rw-r--r--fs/ocfs2/alloc.c8
-rw-r--r--fs/ocfs2/journal.c80
-rw-r--r--fs/ocfs2/journal.h1
-rw-r--r--fs/ocfs2/ocfs2.h17
-rw-r--r--fs/ocfs2/quota_local.c11
-rw-r--r--fs/ocfs2/super.c3
-rw-r--r--fs/orangefs/inode.c7
-rw-r--r--fs/overlayfs/overlayfs.h2
-rw-r--r--fs/overlayfs/super.c5
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/generic.c10
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/proc/internal.h14
-rw-r--r--fs/pstore/inode.c2
-rw-r--r--fs/pstore/internal.h4
-rw-r--r--fs/pstore/platform.c11
-rw-r--r--fs/smb/client/cached_dir.c10
-rw-r--r--fs/smb/client/cifsacl.c25
-rw-r--r--fs/smb/client/cifspdu.h5
-rw-r--r--fs/smb/client/cifsproto.h9
-rw-r--r--fs/smb/client/cifssmb.c77
-rw-r--r--fs/smb/client/connect.c68
-rw-r--r--fs/smb/client/file.c28
-rw-r--r--fs/smb/client/fs_context.c7
-rw-r--r--fs/smb/client/fs_context.h3
-rw-r--r--fs/smb/client/inode.c10
-rw-r--r--fs/smb/client/link.c8
-rw-r--r--fs/smb/client/readdir.c7
-rw-r--r--fs/smb/client/reparse.c4
-rw-r--r--fs/smb/client/sess.c60
-rw-r--r--fs/smb/client/smb1ops.c264
-rw-r--r--fs/smb/client/smb2file.c11
-rw-r--r--fs/smb/client/smb2misc.c9
-rw-r--r--fs/smb/client/smb2ops.c30
-rw-r--r--fs/smb/client/smb2pdu.c3
-rw-r--r--fs/smb/client/transport.c2
-rw-r--r--fs/smb/common/smb2pdu.h3
-rw-r--r--fs/smb/server/auth.c20
-rw-r--r--fs/smb/server/mgmt/user_session.c33
-rw-r--r--fs/smb/server/mgmt/user_session.h2
-rw-r--r--fs/smb/server/oplock.c49
-rw-r--r--fs/smb/server/oplock.h1
-rw-r--r--fs/smb/server/smb2pdu.c48
-rw-r--r--fs/smb/server/smbacl.c10
-rw-r--r--fs/smb/server/transport_ipc.c7
-rw-r--r--fs/smb/server/vfs.c10
-rw-r--r--fs/smb/server/vfs_cache.c33
-rw-r--r--fs/splice.c2
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/udf/inode.c1
-rw-r--r--fs/udf/truncate.c2
-rw-r--r--fs/userfaultfd.c51
-rw-r--r--fs/xattr.c24
144 files changed, 2004 insertions, 860 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 02a9237807a7..85ae4953f2d7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -365,6 +365,7 @@ config GRACE_PERIOD
config LOCKD
tristate
depends on FILE_LOCKING
+ select CRC32
select GRACE_PERIOD
config LOCKD_V4
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 04c018e19602..93b319917c9a 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -597,7 +597,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
BUG_ON(tmp > bsize);
AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino);
- AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx);
+ AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1);
AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp);
affs_fix_checksum(sb, bh);
bh->b_state &= ~(1UL << BH_New);
@@ -726,7 +726,8 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
tmp = min(bsize - boff, to - from);
BUG_ON(boff + tmp > bsize || tmp > bsize);
memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
- be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
+ AFFS_DATA_HEAD(bh)->size = cpu_to_be32(
+ max(boff + tmp, be32_to_cpu(AFFS_DATA_HEAD(bh)->size)));
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
written += tmp;
@@ -748,7 +749,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
if (buffer_new(bh)) {
AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino);
- AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx);
+ AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1);
AFFS_DATA_HEAD(bh)->size = cpu_to_be32(bsize);
AFFS_DATA_HEAD(bh)->next = 0;
bh->b_state &= ~(1UL << BH_New);
@@ -782,7 +783,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
if (buffer_new(bh)) {
AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino);
- AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx);
+ AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1);
AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp);
AFFS_DATA_HEAD(bh)->next = 0;
bh->b_state &= ~(1UL << BH_New);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fb2c8d14327a..3ff7d2e47c7e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -110,25 +110,6 @@ static struct linux_binfmt elf_format = {
#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
-static int set_brk(unsigned long start, unsigned long end, int prot)
-{
- start = ELF_PAGEALIGN(start);
- end = ELF_PAGEALIGN(end);
- if (end > start) {
- /*
- * Map the last of the bss segment.
- * If the header is requesting these pages to be
- * executable, honour that (ppc32 needs this).
- */
- int error = vm_brk_flags(start, end - start,
- prot & PROT_EXEC ? VM_EXEC : 0);
- if (error)
- return error;
- }
- current->mm->start_brk = current->mm->brk = end;
- return 0;
-}
-
/* We need to explicitly zero any fractional pages
after the data section (i.e. bss). This would
contain the junk from the file that should not
@@ -406,6 +387,51 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
return(map_addr);
}
+static unsigned long elf_load(struct file *filep, unsigned long addr,
+ const struct elf_phdr *eppnt, int prot, int type,
+ unsigned long total_size)
+{
+ unsigned long zero_start, zero_end;
+ unsigned long map_addr;
+
+ if (eppnt->p_filesz) {
+ map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
+ if (BAD_ADDR(map_addr))
+ return map_addr;
+ if (eppnt->p_memsz > eppnt->p_filesz) {
+ zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+ eppnt->p_filesz;
+ zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+ eppnt->p_memsz;
+
+ /* Zero the end of the last mapped page */
+ padzero(zero_start);
+ }
+ } else {
+ map_addr = zero_start = ELF_PAGESTART(addr);
+ zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
+ eppnt->p_memsz;
+ }
+ if (eppnt->p_memsz > eppnt->p_filesz) {
+ /*
+ * Map the last of the segment.
+ * If the header is requesting these pages to be
+ * executable, honour that (ppc32 needs this).
+ */
+ int error;
+
+ zero_start = ELF_PAGEALIGN(zero_start);
+ zero_end = ELF_PAGEALIGN(zero_end);
+
+ error = vm_brk_flags(zero_start, zero_end - zero_start,
+ prot & PROT_EXEC ? VM_EXEC : 0);
+ if (error)
+ map_addr = error;
+ }
+ return map_addr;
+}
+
+
static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
{
elf_addr_t min_addr = -1;
@@ -828,8 +854,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
unsigned long error;
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
struct elf_phdr *elf_property_phdata = NULL;
- unsigned long elf_bss, elf_brk;
- int bss_prot = 0;
+ unsigned long elf_brk;
+ bool brk_moved = false;
int retval, i;
unsigned long elf_entry;
unsigned long e_entry;
@@ -1021,7 +1047,6 @@ out_free_interp:
if (retval < 0)
goto out_free_dentry;
- elf_bss = 0;
elf_brk = 0;
start_code = ~0UL;
@@ -1041,33 +1066,6 @@ out_free_interp:
if (elf_ppnt->p_type != PT_LOAD)
continue;
- if (unlikely (elf_brk > elf_bss)) {
- unsigned long nbyte;
-
- /* There was a PT_LOAD segment with p_memsz > p_filesz
- before this one. Map anonymous pages, if needed,
- and clear the area. */
- retval = set_brk(elf_bss + load_bias,
- elf_brk + load_bias,
- bss_prot);
- if (retval)
- goto out_free_dentry;
- nbyte = ELF_PAGEOFFSET(elf_bss);
- if (nbyte) {
- nbyte = ELF_MIN_ALIGN - nbyte;
- if (nbyte > elf_brk - elf_bss)
- nbyte = elf_brk - elf_bss;
- if (clear_user((void __user *)elf_bss +
- load_bias, nbyte)) {
- /*
- * This bss-zeroing can fail if the ELF
- * file specifies odd protections. So
- * we don't check the return value
- */
- }
- }
- }
-
elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
!!interpreter, false);
@@ -1095,15 +1093,49 @@ out_free_interp:
* Header for ET_DYN binaries to calculate the
* randomization (load_bias) for all the LOAD
* Program Headers.
+ */
+
+ /*
+ * Calculate the entire size of the ELF mapping
+ * (total_size), used for the initial mapping,
+ * due to load_addr_set which is set to true later
+ * once the initial mapping is performed.
+ *
+ * Note that this is only sensible when the LOAD
+ * segments are contiguous (or overlapping). If
+ * used for LOADs that are far apart, this would
+ * cause the holes between LOADs to be mapped,
+ * running the risk of having the mapping fail,
+ * as it would be larger than the ELF file itself.
+ *
+ * As a result, only ET_DYN does this, since
+ * some ET_EXEC (e.g. ia64) may have large virtual
+ * memory holes between LOADs.
+ *
+ */
+ total_size = total_mapping_size(elf_phdata,
+ elf_ex->e_phnum);
+ if (!total_size) {
+ retval = -EINVAL;
+ goto out_free_dentry;
+ }
+
+ /* Calculate any requested alignment. */
+ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+
+ /**
+ * DOC: PIE handling
*
- * There are effectively two types of ET_DYN
- * binaries: programs (i.e. PIE: ET_DYN with INTERP)
- * and loaders (ET_DYN without INTERP, since they
- * _are_ the ELF interpreter). The loaders must
- * be loaded away from programs since the program
- * may otherwise collide with the loader (especially
- * for ET_EXEC which does not have a randomized
- * position). For example to handle invocations of
+ * There are effectively two types of ET_DYN ELF
+ * binaries: programs (i.e. PIE: ET_DYN with
+ * PT_INTERP) and loaders (i.e. static PIE: ET_DYN
+ * without PT_INTERP, usually the ELF interpreter
+ * itself). Loaders must be loaded away from programs
+ * since the program may otherwise collide with the
+ * loader (especially for ET_EXEC which does not have
+ * a randomized position).
+ *
+ * For example, to handle invocations of
* "./ld.so someprog" to test out a new version of
* the loader, the subsequent program that the
* loader loads must avoid the loader itself, so
@@ -1116,17 +1148,49 @@ out_free_interp:
* ELF_ET_DYN_BASE and loaders are loaded into the
* independently randomized mmap region (0 load_bias
* without MAP_FIXED nor MAP_FIXED_NOREPLACE).
+ *
+ * See below for "brk" handling details, which is
+ * also affected by program vs loader and ASLR.
*/
if (interpreter) {
+ /* On ET_DYN with PT_INTERP, we do the ASLR. */
load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
- alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+ /* Adjust alignment as requested. */
if (alignment)
load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED_NOREPLACE;
- } else
- load_bias = 0;
+ } else {
+ /*
+ * For ET_DYN without PT_INTERP, we rely on
+ * the architectures's (potentially ASLR) mmap
+ * base address (via a load_bias of 0).
+ *
+ * When a large alignment is requested, we
+ * must do the allocation at address "0" right
+ * now to discover where things will load so
+ * that we can adjust the resulting alignment.
+ * In this case (load_bias != 0), we can use
+ * MAP_FIXED_NOREPLACE to make sure the mapping
+ * doesn't collide with anything.
+ */
+ if (alignment > ELF_MIN_ALIGN) {
+ load_bias = elf_load(bprm->file, 0, elf_ppnt,
+ elf_prot, elf_flags, total_size);
+ if (BAD_ADDR(load_bias)) {
+ retval = IS_ERR_VALUE(load_bias) ?
+ PTR_ERR((void*)load_bias) : -EINVAL;
+ goto out_free_dentry;
+ }
+ vm_munmap(load_bias, total_size);
+ /* Adjust alignment as requested. */
+ if (alignment)
+ load_bias &= ~(alignment - 1);
+ elf_flags |= MAP_FIXED_NOREPLACE;
+ } else
+ load_bias = 0;
+ }
/*
* Since load_bias is used for all subsequent loading
@@ -1136,34 +1200,9 @@ out_free_interp:
* is then page aligned.
*/
load_bias = ELF_PAGESTART(load_bias - vaddr);
-
- /*
- * Calculate the entire size of the ELF mapping
- * (total_size), used for the initial mapping,
- * due to load_addr_set which is set to true later
- * once the initial mapping is performed.
- *
- * Note that this is only sensible when the LOAD
- * segments are contiguous (or overlapping). If
- * used for LOADs that are far apart, this would
- * cause the holes between LOADs to be mapped,
- * running the risk of having the mapping fail,
- * as it would be larger than the ELF file itself.
- *
- * As a result, only ET_DYN does this, since
- * some ET_EXEC (e.g. ia64) may have large virtual
- * memory holes between LOADs.
- *
- */
- total_size = total_mapping_size(elf_phdata,
- elf_ex->e_phnum);
- if (!total_size) {
- retval = -EINVAL;
- goto out_free_dentry;
- }
}
- error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
+ error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
retval = IS_ERR_VALUE(error) ?
@@ -1211,41 +1250,23 @@ out_free_interp:
k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
- if (k > elf_bss)
- elf_bss = k;
if ((elf_ppnt->p_flags & PF_X) && end_code < k)
end_code = k;
if (end_data < k)
end_data = k;
k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
- if (k > elf_brk) {
- bss_prot = elf_prot;
+ if (k > elf_brk)
elf_brk = k;
- }
}
e_entry = elf_ex->e_entry + load_bias;
phdr_addr += load_bias;
- elf_bss += load_bias;
elf_brk += load_bias;
start_code += load_bias;
end_code += load_bias;
start_data += load_bias;
end_data += load_bias;
- /* Calling set_brk effectively mmaps the pages that we need
- * for the bss and break sections. We must do this before
- * mapping in the interpreter, to make sure it doesn't wind
- * up getting placed where the bss needs to go.
- */
- retval = set_brk(elf_bss, elf_brk, bss_prot);
- if (retval)
- goto out_free_dentry;
- if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
- retval = -EFAULT; /* Nobody gets to see this, but.. */
- goto out_free_dentry;
- }
-
if (interpreter) {
elf_entry = load_elf_interp(interp_elf_ex,
interpreter,
@@ -1301,24 +1322,44 @@ out_free_interp:
mm->end_data = end_data;
mm->start_stack = bprm->p;
- if ((current->flags & PF_RANDOMIZE) && (snapshot_randomize_va_space > 1)) {
+ /**
+ * DOC: "brk" handling
+ *
+ * For architectures with ELF randomization, when executing a
+ * loader directly (i.e. static PIE: ET_DYN without PT_INTERP),
+ * move the brk area out of the mmap region and into the unused
+ * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide
+ * early with the stack growing down or other regions being put
+ * into the mmap region by the kernel (e.g. vdso).
+ *
+ * In the CONFIG_COMPAT_BRK case, though, everything is turned
+ * off because we're not allowed to move the brk at all.
+ */
+ if (!IS_ENABLED(CONFIG_COMPAT_BRK) &&
+ IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
+ elf_ex->e_type == ET_DYN && !interpreter) {
+ elf_brk = ELF_ET_DYN_BASE;
+ /* This counts as moving the brk, so let brk(2) know. */
+ brk_moved = true;
+ }
+ mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk);
+
+ if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) {
/*
- * For architectures with ELF randomization, when executing
- * a loader directly (i.e. no interpreter listed in ELF
- * headers), move the brk area out of the mmap region
- * (since it grows up, and may collide early with the stack
- * growing down), and into the unused ELF_ET_DYN_BASE region.
+ * If we didn't move the brk to ELF_ET_DYN_BASE (above),
+ * leave a gap between .bss and brk.
*/
- if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
- elf_ex->e_type == ET_DYN && !interpreter) {
- mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
- }
+ if (!brk_moved)
+ mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
mm->brk = mm->start_brk = arch_randomize_brk(mm);
+ brk_moved = true;
+ }
+
#ifdef compat_brk_randomized
+ if (brk_moved)
current->brk_randomized = 1;
#endif
- }
if (current->personality & MMAP_PAGE_ZERO) {
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 434cf3d5f4cf..226e6434a58a 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1885,6 +1885,17 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
up_write(&space_info->groups_sem);
goto next;
}
+
+ /*
+ * Cache the zone_unusable value before turning the block group
+ * to read only. As soon as the block group is read only it's
+ * zone_unusable value gets moved to the block group's read-only
+ * bytes and isn't available for calculations anymore. We also
+ * cache it before unlocking the block group, to prevent races
+ * (reports from KCSAN and such tools) with tasks updating it.
+ */
+ zone_unusable = bg->zone_unusable;
+
spin_unlock(&bg->lock);
/*
@@ -1900,13 +1911,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
goto next;
}
- /*
- * Cache the zone_unusable value before turning the block group
- * to read only. As soon as the blog group is read only it's
- * zone_unusable value gets moved to the block group's read-only
- * bytes and isn't available for calculations anymore.
- */
- zone_unusable = bg->zone_unusable;
ret = inc_block_group_ro(bg, 0);
up_write(&space_info->groups_sem);
if (ret < 0)
diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
index 944a7340f6a4..3981c941f5b5 100644
--- a/fs/btrfs/discard.c
+++ b/fs/btrfs/discard.c
@@ -167,13 +167,7 @@ static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
block_group->discard_eligible_time = 0;
queued = !list_empty(&block_group->discard_list);
list_del_init(&block_group->discard_list);
- /*
- * If the block group is currently running in the discard workfn, we
- * don't want to deref it, since it's still being used by the workfn.
- * The workfn will notice this case and deref the block group when it is
- * finished.
- */
- if (queued && !running)
+ if (queued)
btrfs_put_block_group(block_group);
spin_unlock(&discard_ctl->lock);
@@ -260,9 +254,10 @@ again:
block_group->discard_cursor = block_group->start;
block_group->discard_state = BTRFS_DISCARD_EXTENTS;
}
- discard_ctl->block_group = block_group;
}
if (block_group) {
+ btrfs_get_block_group(block_group);
+ discard_ctl->block_group = block_group;
*discard_state = block_group->discard_state;
*discard_index = block_group->discard_index;
}
@@ -493,9 +488,20 @@ static void btrfs_discard_workfn(struct work_struct *work)
block_group = peek_discard_list(discard_ctl, &discard_state,
&discard_index, now);
- if (!block_group || !btrfs_run_discard_work(discard_ctl))
+ if (!block_group)
return;
+ if (!btrfs_run_discard_work(discard_ctl)) {
+ spin_lock(&discard_ctl->lock);
+ btrfs_put_block_group(block_group);
+ discard_ctl->block_group = NULL;
+ spin_unlock(&discard_ctl->lock);
+ return;
+ }
if (now < block_group->discard_eligible_time) {
+ spin_lock(&discard_ctl->lock);
+ btrfs_put_block_group(block_group);
+ discard_ctl->block_group = NULL;
+ spin_unlock(&discard_ctl->lock);
btrfs_discard_schedule_work(discard_ctl, false);
return;
}
@@ -547,15 +553,7 @@ static void btrfs_discard_workfn(struct work_struct *work)
spin_lock(&discard_ctl->lock);
discard_ctl->prev_discard = trimmed;
discard_ctl->prev_discard_time = now;
- /*
- * If the block group was removed from the discard list while it was
- * running in this workfn, then we didn't deref it, since this function
- * still owned that reference. But we set the discard_ctl->block_group
- * back to NULL, so we can use that condition to know that now we need
- * to deref the block_group.
- */
- if (discard_ctl->block_group == NULL)
- btrfs_put_block_group(block_group);
+ btrfs_put_block_group(block_group);
discard_ctl->block_group = NULL;
__btrfs_discard_schedule_work(discard_ctl, now, false);
spin_unlock(&discard_ctl->lock);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 967c6b5dd0a4..34a30d61b470 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4314,6 +4314,14 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_cleanup_defrag_inodes(fs_info);
/*
+ * Handle the error fs first, as it will flush and wait for all ordered
+ * extents. This will generate delayed iputs, thus we want to handle
+ * it first.
+ */
+ if (unlikely(BTRFS_FS_ERROR(fs_info)))
+ btrfs_error_commit_super(fs_info);
+
+ /*
* Wait for any fixup workers to complete.
* If we don't wait for them here and they are still running by the time
* we call kthread_stop() against the cleaner kthread further below, we
@@ -4334,6 +4342,31 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_flush_workqueue(fs_info->delalloc_workers);
/*
+ * We can have ordered extents getting their last reference dropped from
+ * the fs_info->workers queue because for async writes for data bios we
+ * queue a work for that queue, at btrfs_wq_submit_bio(), that runs
+ * run_one_async_done() which calls btrfs_bio_end_io() in case the bio
+ * has an error, and that later function can do the final
+ * btrfs_put_ordered_extent() on the ordered extent attached to the bio,
+ * which adds a delayed iput for the inode. So we must flush the queue
+ * so that we don't have delayed iputs after committing the current
+ * transaction below and stopping the cleaner and transaction kthreads.
+ */
+ btrfs_flush_workqueue(fs_info->workers);
+
+ /*
+ * When finishing a compressed write bio we schedule a work queue item
+ * to finish an ordered extent - btrfs_finish_compressed_write_work()
+ * calls btrfs_finish_ordered_extent() which in turns does a call to
+ * btrfs_queue_ordered_fn(), and that queues the ordered extent
+ * completion either in the endio_write_workers work queue or in the
+ * fs_info->endio_freespace_worker work queue. We flush those queues
+ * below, so before we flush them we must flush this queue for the
+ * workers of compressed writes.
+ */
+ flush_workqueue(fs_info->compressed_write_workers);
+
+ /*
* After we parked the cleaner kthread, ordered extents may have
* completed and created new delayed iputs. If one of the async reclaim
* tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
@@ -4390,9 +4423,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_err(fs_info, "commit super ret %d", ret);
}
- if (BTRFS_FS_ERROR(fs_info))
- btrfs_error_commit_super(fs_info);
-
kthread_stop(fs_info->transaction_kthread);
kthread_stop(fs_info->cleaner_kthread);
@@ -4529,10 +4559,6 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
/* cleanup FS via transaction */
btrfs_cleanup_transaction(fs_info);
- mutex_lock(&fs_info->cleaner_mutex);
- btrfs_run_delayed_iputs(fs_info);
- mutex_unlock(&fs_info->cleaner_mutex);
-
down_write(&fs_info->cleanup_work_sem);
up_write(&fs_info->cleanup_work_sem);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 021cf468274b..ef77d4208510 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -164,6 +164,14 @@ search_again:
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item);
num_refs = btrfs_extent_refs(leaf, ei);
+ if (unlikely(num_refs == 0)) {
+ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+ "unexpected zero reference count for extent item (%llu %u %llu)",
+ key.objectid, key.type, key.offset);
+ btrfs_abort_transaction(trans, ret);
+ goto out_free;
+ }
extent_flags = btrfs_extent_flags(leaf, ei);
} else {
ret = -EUCLEAN;
@@ -177,8 +185,6 @@ search_again:
goto out_free;
}
-
- BUG_ON(num_refs == 0);
} else {
num_refs = 0;
extent_flags = 0;
@@ -208,10 +214,19 @@ search_again:
goto search_again;
}
spin_lock(&head->lock);
- if (head->extent_op && head->extent_op->update_flags)
+ if (head->extent_op && head->extent_op->update_flags) {
extent_flags |= head->extent_op->flags_to_set;
- else
- BUG_ON(num_refs == 0);
+ } else if (unlikely(num_refs == 0)) {
+ spin_unlock(&head->lock);
+ mutex_unlock(&head->mutex);
+ spin_unlock(&delayed_refs->lock);
+ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+ "unexpected zero reference count for extent %llu (%s)",
+ bytenr, metadata ? "metadata" : "data");
+ btrfs_abort_transaction(trans, ret);
+ goto out_free;
+ }
num_refs += head->ref_mod;
spin_unlock(&head->lock);
@@ -5540,7 +5555,10 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
ret = btrfs_dec_ref(trans, root, eb, 1);
else
ret = btrfs_dec_ref(trans, root, eb, 0);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
if (is_fstree(root->root_key.objectid)) {
ret = btrfs_qgroup_trace_leaf_items(trans, eb);
if (ret) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b2ae50dcca0f..ed08d8e5639f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3565,10 +3565,10 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
return eb;
}
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start)
{
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct extent_buffer *eb, *exists = NULL;
int ret;
@@ -3604,8 +3604,11 @@ again:
free_eb:
btrfs_release_extent_buffer(eb);
return exists;
-}
+#else
+ /* Stub to avoid linker error when compiled with optimizations turned off. */
+ return NULL;
#endif
+}
static struct extent_buffer *grab_extent_buffer(
struct btrfs_fs_info *fs_info, struct page *page)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 68092b64e29e..e794606e7c78 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2225,15 +2225,20 @@ static void btrfs_punch_hole_lock_range(struct inode *inode,
* will always return true.
* So here we need to do extra page alignment for
* filemap_range_has_page().
+ *
+ * And do not decrease page_lockend right now, as it can be 0.
*/
const u64 page_lockstart = round_up(lockstart, PAGE_SIZE);
- const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1;
+ const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE);
while (1) {
truncate_pagecache_range(inode, lockstart, lockend);
lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
cached_state);
+ /* The same page or adjacent pages. */
+ if (page_lockend <= page_lockstart)
+ break;
/*
* We can't have ordered extents in the range, nor dirty/writeback
* pages, because we have locked the inode's VFS lock in exclusive
@@ -2245,7 +2250,7 @@ static void btrfs_punch_hole_lock_range(struct inode *inode,
* we do, unlock the range and retry.
*/
if (!filemap_range_has_page(inode->i_mapping, page_lockstart,
- page_lockend))
+ page_lockend - 1))
break;
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cedffa567a75..48d257923672 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1546,6 +1546,7 @@ out_unlock:
locked_page,
clear_bits,
page_ops);
+ btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
start += cur_alloc_size;
}
@@ -1559,6 +1560,7 @@ out_unlock:
clear_bits |= EXTENT_CLEAR_DATA_RESV;
extent_clear_unlock_delalloc(inode, start, end, locked_page,
clear_bits, page_ops);
+ btrfs_qgroup_free_data(inode, NULL, start, end - start + 1, NULL);
}
return ret;
}
@@ -2056,12 +2058,13 @@ next_slot:
/*
* If the found extent starts after requested offset, then
- * adjust extent_end to be right before this extent begins
+ * adjust cur_offset to be right before this extent begins.
*/
if (found_key.offset > cur_offset) {
- extent_end = found_key.offset;
- extent_type = 0;
- goto must_cow;
+ if (cow_start == (u64)-1)
+ cow_start = cur_offset;
+ cur_offset = found_key.offset;
+ goto next_slot;
}
/*
@@ -2222,13 +2225,15 @@ error:
*/
if (cow_start != (u64)-1)
cur_offset = cow_start;
- if (cur_offset < end)
+ if (cur_offset < end) {
extent_clear_unlock_delalloc(inode, cur_offset, end,
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK);
+ btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL);
+ }
btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 537e184b4b1d..474758c878fc 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2931,6 +2931,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
int ret;
ASSERT(page_index <= last_index);
+again:
page = find_lock_page(inode->i_mapping, page_index);
if (!page) {
page_cache_sync_readahead(inode->i_mapping, ra, NULL,
@@ -2952,6 +2953,11 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
ret = -EIO;
goto release_page;
}
+ if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
+ put_page(page);
+ goto again;
+ }
}
/*
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6be092bb814f..7632d652a125 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -153,12 +153,14 @@ struct scrub_stripe {
unsigned int init_nr_io_errors;
unsigned int init_nr_csum_errors;
unsigned int init_nr_meta_errors;
+ unsigned int init_nr_meta_gen_errors;
/*
* The following error bitmaps are all for the current status.
* Every time we submit a new read, these bitmaps may be updated.
*
- * error_bitmap = io_error_bitmap | csum_error_bitmap | meta_error_bitmap;
+ * error_bitmap = io_error_bitmap | csum_error_bitmap |
+ * meta_error_bitmap | meta_generation_bitmap;
*
* IO and csum errors can happen for both metadata and data.
*/
@@ -166,6 +168,7 @@ struct scrub_stripe {
unsigned long io_error_bitmap;
unsigned long csum_error_bitmap;
unsigned long meta_error_bitmap;
+ unsigned long meta_gen_error_bitmap;
/* For writeback (repair or replace) error reporting. */
unsigned long write_error_bitmap;
@@ -617,7 +620,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
memcpy(on_disk_csum, header->csum, fs_info->csum_size);
if (logical != btrfs_stack_header_bytenr(header)) {
- bitmap_set(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
+ bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad bytenr, has %llu want %llu",
@@ -673,7 +676,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
}
if (stripe->sectors[sector_nr].generation !=
btrfs_stack_header_generation(header)) {
- bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
+ bitmap_set(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree);
bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad generation, has %llu want %llu",
@@ -685,6 +688,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree);
bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
+ bitmap_clear(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree);
}
static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
@@ -973,8 +977,22 @@ skip:
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("header error", dev, false,
stripe->logical, physical);
+ if (test_bit(sector_nr, &stripe->meta_gen_error_bitmap))
+ if (__ratelimit(&rs) && dev)
+ scrub_print_common_warning("generation error", dev, false,
+ stripe->logical, physical);
}
+ /* Update the device stats. */
+ for (int i = 0; i < stripe->init_nr_io_errors; i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_READ_ERRS);
+ for (int i = 0; i < stripe->init_nr_csum_errors; i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
+ /* Generation mismatch error is based on each metadata, not each block. */
+ for (int i = 0; i < stripe->init_nr_meta_gen_errors;
+ i += (fs_info->nodesize >> fs_info->sectorsize_bits))
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_GENERATION_ERRS);
+
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed += stripe->nr_data_extents;
sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents;
@@ -983,7 +1001,8 @@ skip:
sctx->stat.no_csum += nr_nodatacsum_sectors;
sctx->stat.read_errors += stripe->init_nr_io_errors;
sctx->stat.csum_errors += stripe->init_nr_csum_errors;
- sctx->stat.verify_errors += stripe->init_nr_meta_errors;
+ sctx->stat.verify_errors += stripe->init_nr_meta_errors +
+ stripe->init_nr_meta_gen_errors;
sctx->stat.uncorrectable_errors +=
bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
sctx->stat.corrected_errors += nr_repaired_sectors;
@@ -1029,6 +1048,8 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
stripe->nr_sectors);
stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap,
stripe->nr_sectors);
+ stripe->init_nr_meta_gen_errors = bitmap_weight(&stripe->meta_gen_error_bitmap,
+ stripe->nr_sectors);
if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
goto out;
@@ -1143,6 +1164,9 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
bitmap_set(&stripe->write_error_bitmap, sector_nr,
bio_size >> fs_info->sectorsize_bits);
spin_unlock_irqrestore(&stripe->write_error_lock, flags);
+ for (int i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev,
+ BTRFS_DEV_STAT_WRITE_ERRS);
}
bio_put(&bbio->bio);
@@ -1505,10 +1529,12 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
stripe->init_nr_io_errors = 0;
stripe->init_nr_csum_errors = 0;
stripe->init_nr_meta_errors = 0;
+ stripe->init_nr_meta_gen_errors = 0;
stripe->error_bitmap = 0;
stripe->io_error_bitmap = 0;
stripe->csum_error_bitmap = 0;
stripe->meta_error_bitmap = 0;
+ stripe->meta_gen_error_bitmap = 0;
}
/*
@@ -1538,8 +1564,8 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
u64 extent_gen;
int ret;
- if (unlikely(!extent_root)) {
- btrfs_err(fs_info, "no valid extent root for scrub");
+ if (unlikely(!extent_root || !csum_root)) {
+ btrfs_err(fs_info, "no valid extent or csum root for scrub");
return -EUCLEAN;
}
memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) *
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index aa1e6d88a72c..e2ead36e5be4 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -487,10 +487,8 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
if (p->buf_len >= len)
return 0;
- if (len > PATH_MAX) {
- WARN_ON(1);
- return -ENOMEM;
- }
+ if (WARN_ON(len > PATH_MAX))
+ return -ENAMETOOLONG;
path_len = p->end - p->start;
old_buf_len = p->buf_len;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index fb4992570c2b..2045ac3d94c4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1336,8 +1336,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
subvol_name = btrfs_get_subvol_name_from_objectid(info,
BTRFS_I(d_inode(dentry))->root->root_key.objectid);
if (!IS_ERR(subvol_name)) {
- seq_puts(seq, ",subvol=");
- seq_escape(seq, subvol_name, " \t\n\\");
+ seq_show_option(seq, "subvol", subvol_name);
kfree(subvol_name);
}
return 0;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 53c74010140e..6d16506bbdc0 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1889,6 +1889,11 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
return BTRFS_TREE_BLOCK_INVALID_LEVEL;
}
+ if (unlikely(!btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN))) {
+ generic_err(leaf, 0, "invalid flag for leaf, WRITTEN not set");
+ return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET;
+ }
+
/*
* Extent buffers from a relocation tree have a owner field that
* corresponds to the subvolume tree they are based on. So just from an
@@ -1950,6 +1955,7 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
for (slot = 0; slot < nritems; slot++) {
u32 item_end_expected;
u64 item_data_end;
+ enum btrfs_tree_block_status ret;
btrfs_item_key_to_cpu(leaf, &key, slot);
@@ -2005,21 +2011,10 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
}
- /*
- * We only want to do this if WRITTEN is set, otherwise the leaf
- * may be in some intermediate state and won't appear valid.
- */
- if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) {
- enum btrfs_tree_block_status ret;
-
- /*
- * Check if the item size and content meet other
- * criteria
- */
- ret = check_leaf_item(leaf, &key, slot, &prev_key);
- if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
- return ret;
- }
+ /* Check if the item size and content meet other criteria. */
+ ret = check_leaf_item(leaf, &key, slot, &prev_key);
+ if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
+ return ret;
prev_key.objectid = key.objectid;
prev_key.type = key.type;
@@ -2049,6 +2044,11 @@ enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node)
int level = btrfs_header_level(node);
u64 bytenr;
+ if (unlikely(!btrfs_header_flag(node, BTRFS_HEADER_FLAG_WRITTEN))) {
+ generic_err(node, 0, "invalid flag for node, WRITTEN not set");
+ return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET;
+ }
+
if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) {
generic_err(node, 0,
"invalid level for node, have %d expect [1, %d]",
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
index 3c2a02a72f64..43f2ceb78f34 100644
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -51,6 +51,7 @@ enum btrfs_tree_block_status {
BTRFS_TREE_BLOCK_INVALID_BLOCKPTR,
BTRFS_TREE_BLOCK_INVALID_ITEM,
BTRFS_TREE_BLOCK_INVALID_OWNER,
+ BTRFS_TREE_BLOCK_WRITTEN_NOT_SET,
};
/*
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index c4463c3f2068..197dfafbf401 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2006,6 +2006,9 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
physical = map->stripes[i].physical;
zinfo = device->zone_info;
+ if (!device->bdev)
+ continue;
+
if (zinfo->max_active_zones == 0)
continue;
@@ -2165,6 +2168,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
const u64 physical = map->stripes[i].physical;
struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ if (!device->bdev)
+ continue;
+
if (zinfo->max_active_zones == 0)
continue;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index db6977c15c28..f0befbeb6cb8 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2319,7 +2319,7 @@ static int fill_fscrypt_truncate(struct inode *inode,
/* Try to writeback the dirty pagecaches */
if (issued & (CEPH_CAP_FILE_BUFFER)) {
- loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1;
+ loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SIZE - 1;
ret = filemap_write_and_wait_range(inode->i_mapping,
orig_pos, lend);
diff --git a/fs/coredump.c b/fs/coredump.c
index 9d235fa14ab9..d3a4f5dc2e36 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -42,6 +42,7 @@
#include <linux/timekeeping.h>
#include <linux/sysctl.h>
#include <linux/elf.h>
+#include <uapi/linux/pidfd.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -56,6 +57,13 @@
static bool dump_vma_snapshot(struct coredump_params *cprm);
static void free_vma_snapshot(struct coredump_params *cprm);
+/*
+ * File descriptor number for the pidfd for the thread-group leader of
+ * the coredumping task installed into the usermode helper's file
+ * descriptor table.
+ */
+#define COREDUMP_PIDFD_NUMBER 3
+
static int core_uses_pid;
static unsigned int core_pipe_limit;
static char core_pattern[CORENAME_MAX_SIZE] = "core";
@@ -332,6 +340,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
case 'C':
err = cn_printf(cn, "%d", cprm->cpu);
break;
+ /* pidfd number */
+ case 'F': {
+ /*
+ * Installing a pidfd only makes sense if
+ * we actually spawn a usermode helper.
+ */
+ if (!ispipe)
+ break;
+
+ /*
+ * Note that we'll install a pidfd for the
+ * thread-group leader. We know that task
+ * linkage hasn't been removed yet and even if
+ * this @current isn't the actual thread-group
+ * leader we know that the thread-group leader
+ * cannot be reaped until @current has exited.
+ */
+ cprm->pid = task_tgid(current);
+ err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER);
+ break;
+ }
default:
break;
}
@@ -488,7 +517,7 @@ static void wait_for_dump_helpers(struct file *file)
}
/*
- * umh_pipe_setup
+ * umh_coredump_setup
* helper function to customize the process used
* to collect the core in userspace. Specifically
* it sets up a pipe and installs it as fd 0 (stdin)
@@ -498,21 +527,61 @@ static void wait_for_dump_helpers(struct file *file)
* is a special value that we use to trap recursive
* core dumps
*/
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
{
struct file *files[2];
+ struct file *pidfs_file = NULL;
struct coredump_params *cp = (struct coredump_params *)info->data;
- int err = create_pipe_files(files, 0);
+ int err;
+
+ if (cp->pid) {
+ int fd;
+
+ fd = pidfd_prepare(cp->pid, 0, &pidfs_file);
+ if (fd < 0)
+ return fd;
+
+ /*
+ * We don't care about the fd. We also cannot simply
+ * replace it below because dup2() will refuse to close
+ * this file descriptor if its in a larval state. So
+ * close it!
+ */
+ put_unused_fd(fd);
+
+ /*
+ * Usermode helpers are childen of either
+ * system_unbound_wq or of kthreadd. So we know that
+ * we're starting off with a clean file descriptor
+ * table. So we should always be able to use
+ * COREDUMP_PIDFD_NUMBER as our file descriptor value.
+ */
+ err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0);
+ if (err < 0)
+ goto out_fail;
+
+ pidfs_file = NULL;
+ }
+
+ err = create_pipe_files(files, 0);
if (err)
- return err;
+ goto out_fail;
cp->file = files[1];
err = replace_fd(0, files[0], 0);
fput(files[0]);
+ if (err < 0)
+ goto out_fail;
+
/* and disallow core files too */
current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
+ err = 0;
+
+out_fail:
+ if (pidfs_file)
+ fput(pidfs_file);
return err;
}
@@ -589,7 +658,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
}
if (cprm.limit == 1) {
- /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
+ /* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
*
* Normally core limits are irrelevant to pipes, since
* we're not writing to the file system, but we use
@@ -634,7 +703,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
retval = -ENOMEM;
sub_info = call_usermodehelper_setup(helper_argv[0],
helper_argv, NULL, GFP_KERNEL,
- umh_pipe_setup, NULL, &cprm);
+ umh_coredump_setup, NULL, &cprm);
if (sub_info)
retval = call_usermodehelper_exec(sub_info,
UMH_WAIT_EXEC);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 0618af36f550..3c9ab6461579 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1826,8 +1826,8 @@ static int dlm_tcp_listen_validate(void)
{
/* We don't support multi-homed hosts */
if (dlm_local_count > 1) {
- log_print("TCP protocol can't handle multi-homed hosts, try SCTP");
- return -EINVAL;
+ log_print("Detect multi-homed hosts but use only the first IP address.");
+ log_print("Try SCTP, if you want to enable multi-link.");
}
return 0;
diff --git a/fs/exec.c b/fs/exec.c
index 4a6255aa4ea7..ee71a315cc51 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1257,13 +1257,12 @@ int begin_new_exec(struct linux_binprm * bprm)
*/
bprm->point_of_no_return = true;
- /*
- * Make this the only thread in the thread group.
- */
+ /* Make this the only thread in the thread group */
retval = de_thread(me);
if (retval)
goto out;
-
+ /* see the comment in check_unsafe_exec() */
+ current->fs->in_exec = 0;
/*
* Cancel any io_uring activity across execve
*/
@@ -1516,6 +1515,8 @@ static void free_bprm(struct linux_binprm *bprm)
}
free_arg_pages(bprm);
if (bprm->cred) {
+ /* in case exec fails before de_thread() succeeds */
+ current->fs->in_exec = 0;
mutex_unlock(&current->signal->cred_guard_mutex);
abort_creds(bprm->cred);
}
@@ -1604,6 +1605,10 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
* suid exec because the differently privileged task
* will be able to manipulate the current directory, etc.
* It would be nice to force an unshare instead...
+ *
+ * Otherwise we set fs->in_exec = 1 to deny clone(CLONE_FS)
+ * from another sub-thread until de_thread() succeeds, this
+ * state is protected by cred_guard_mutex we hold.
*/
t = p;
n_fs = 1;
@@ -1890,7 +1895,6 @@ static int bprm_execve(struct linux_binprm *bprm,
sched_mm_cid_after_execve(current);
/* execve succeeded */
- current->fs->in_exec = 0;
current->in_execve = 0;
rseq_execve(current);
user_events_execve(current);
@@ -1910,7 +1914,6 @@ out:
out_unmark:
sched_mm_cid_after_execve(current);
- current->fs->in_exec = 0;
current->in_execve = 0;
return retval;
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index 74590041fb2c..24e1e05f9f34 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -265,7 +265,7 @@ int exfat_find_last_cluster(struct super_block *sb, struct exfat_chain *p_chain,
clu = next;
if (exfat_ent_get(sb, clu, &next))
return -EIO;
- } while (next != EXFAT_EOF_CLUSTER);
+ } while (next != EXFAT_EOF_CLUSTER && count <= p_chain->size);
if (p_chain->size != count) {
exfat_fs_error(sb,
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 396474e9e2bf..3a2dfc59fb40 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -641,8 +641,8 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
/* Hm, nope. Are (enough) root reserved clusters available? */
if (uid_eq(sbi->s_resuid, current_fsuid()) ||
(!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
- capable(CAP_SYS_RESOURCE) ||
- (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
+ (flags & EXT4_MB_USE_ROOT_BLOCKS) ||
+ capable(CAP_SYS_RESOURCE)) {
if (free_clusters >= (nclusters + dirty_clusters +
resv_clusters))
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 6fe3c941b565..4d6ba140276b 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -351,10 +351,9 @@ int ext4_check_blockref(const char *function, unsigned int line,
{
__le32 *bref = p;
unsigned int blk;
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
- if (ext4_has_feature_journal(inode->i_sb) &&
- (inode->i_ino ==
- le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
+ if (journal && inode == journal->j_inode)
return 0;
while (bref < p+max) {
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 7ea33c3fe94e..6682b8ab11f1 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -104,6 +104,9 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
else if (unlikely(le32_to_cpu(de->inode) >
le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
error_msg = "inode out of bounds";
+ else if (unlikely(next_offset == size && de->name_len == 1 &&
+ de->name[0] == '.'))
+ error_msg = "'.' directory cannot be the last in data block";
else
return 0;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 60455c84a937..81fe87fcbfa0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -273,7 +273,8 @@ struct ext4_system_blocks {
/*
* Flags for ext4_io_end->flags
*/
-#define EXT4_IO_END_UNWRITTEN 0x0001
+#define EXT4_IO_END_UNWRITTEN 0x0001
+#define EXT4_IO_END_FAILED 0x0002
struct ext4_io_end_vec {
struct list_head list; /* list of io_end_vec */
@@ -2994,6 +2995,8 @@ extern int ext4_inode_attach_jinode(struct inode *inode);
extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
extern int ext4_break_layouts(struct inode *);
+extern int ext4_truncate_page_cache_block_range(struct inode *inode,
+ loff_t start, loff_t end);
extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 32218ac7f50f..39e3661a80c4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4660,22 +4660,13 @@ static long ext4_zero_range(struct file *file, loff_t offset,
goto out_mutex;
}
- /*
- * For journalled data we need to write (and checkpoint) pages
- * before discarding page cache to avoid inconsitent data on
- * disk in case of crash before zeroing trans is committed.
- */
- if (ext4_should_journal_data(inode)) {
- ret = filemap_write_and_wait_range(mapping, start,
- end - 1);
- if (ret) {
- filemap_invalidate_unlock(mapping);
- goto out_mutex;
- }
+ /* Now release the pages and zero block aligned part of pages */
+ ret = ext4_truncate_page_cache_block_range(inode, start, end);
+ if (ret) {
+ filemap_invalidate_unlock(mapping);
+ goto out_mutex;
}
- /* Now release the pages and zero block aligned part of pages */
- truncate_pagecache_range(inode, start, end - 1);
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 19d7bcf16ebb..86245e27be18 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -31,6 +31,7 @@
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/mpage.h>
+#include <linux/rmap.h>
#include <linux/namei.h>
#include <linux/uio.h>
#include <linux/bio.h>
@@ -378,10 +379,11 @@ static int __check_block_validity(struct inode *inode, const char *func,
unsigned int line,
struct ext4_map_blocks *map)
{
- if (ext4_has_feature_journal(inode->i_sb) &&
- (inode->i_ino ==
- le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+
+ if (journal && inode == journal->j_inode)
return 0;
+
if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) {
ext4_error_inode(inode, func, line, map->m_pblk,
"lblock %lu mapped to illegal pblock %llu "
@@ -3891,6 +3893,68 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
return ret;
}
+static inline void ext4_truncate_folio(struct inode *inode,
+ loff_t start, loff_t end)
+{
+ unsigned long blocksize = i_blocksize(inode);
+ struct folio *folio;
+
+ /* Nothing to be done if no complete block needs to be truncated. */
+ if (round_up(start, blocksize) >= round_down(end, blocksize))
+ return;
+
+ folio = filemap_lock_folio(inode->i_mapping, start >> PAGE_SHIFT);
+ if (IS_ERR(folio))
+ return;
+
+ if (folio_mkclean(folio))
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+}
+
+int ext4_truncate_page_cache_block_range(struct inode *inode,
+ loff_t start, loff_t end)
+{
+ unsigned long blocksize = i_blocksize(inode);
+ int ret;
+
+ /*
+ * For journalled data we need to write (and checkpoint) pages
+ * before discarding page cache to avoid inconsitent data on disk
+ * in case of crash before freeing or unwritten converting trans
+ * is committed.
+ */
+ if (ext4_should_journal_data(inode)) {
+ ret = filemap_write_and_wait_range(inode->i_mapping, start,
+ end - 1);
+ if (ret)
+ return ret;
+ goto truncate_pagecache;
+ }
+
+ /*
+ * If the block size is less than the page size, the file's mapped
+ * blocks within one page could be freed or converted to unwritten.
+ * So it's necessary to remove writable userspace mappings, and then
+ * ext4_page_mkwrite() can be called during subsequent write access
+ * to these partial folios.
+ */
+ if (!IS_ALIGNED(start | end, PAGE_SIZE) &&
+ blocksize < PAGE_SIZE && start < inode->i_size) {
+ loff_t page_boundary = round_up(start, PAGE_SIZE);
+
+ ext4_truncate_folio(inode, start, min(page_boundary, end));
+ if (end > page_boundary)
+ ext4_truncate_folio(inode,
+ round_down(end, PAGE_SIZE), end);
+ }
+
+truncate_pagecache:
+ truncate_pagecache_range(inode, start, end - 1);
+ return 0;
+}
+
static void ext4_wait_dax_page(struct inode *inode)
{
filemap_invalidate_unlock(inode->i_mapping);
@@ -3945,17 +4009,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
trace_ext4_punch_hole(inode, offset, length, 0);
- /*
- * Write out all dirty pages to avoid race conditions
- * Then release them.
- */
- if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
- ret = filemap_write_and_wait_range(mapping, offset,
- offset + length - 1);
- if (ret)
- return ret;
- }
-
inode_lock(inode);
/* No need to punch hole beyond i_size */
@@ -4017,8 +4070,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
ret = ext4_update_disksize_before_punch(inode, offset, length);
if (ret)
goto out_dio;
- truncate_pagecache_range(inode, first_block_offset,
- last_block_offset);
+
+ ret = ext4_truncate_page_cache_block_range(inode,
+ first_block_offset, last_block_offset + 1);
+ if (ret)
+ goto out_dio;
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4692,22 +4748,43 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val)
inode_set_iversion_queried(inode, val);
}
-static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags)
-
+static int check_igot_inode(struct inode *inode, ext4_iget_flags flags,
+ const char *function, unsigned int line)
{
+ const char *err_str;
+
if (flags & EXT4_IGET_EA_INODE) {
- if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
- return "missing EA_INODE flag";
+ if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+ err_str = "missing EA_INODE flag";
+ goto error;
+ }
if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
- EXT4_I(inode)->i_file_acl)
- return "ea_inode with extended attributes";
+ EXT4_I(inode)->i_file_acl) {
+ err_str = "ea_inode with extended attributes";
+ goto error;
+ }
} else {
- if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
- return "unexpected EA_INODE flag";
+ if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+ /*
+ * open_by_handle_at() could provide an old inode number
+ * that has since been reused for an ea_inode; this does
+ * not indicate filesystem corruption
+ */
+ if (flags & EXT4_IGET_HANDLE)
+ return -ESTALE;
+ err_str = "unexpected EA_INODE flag";
+ goto error;
+ }
}
- if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD))
- return "unexpected bad inode w/o EXT4_IGET_BAD";
- return NULL;
+ if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) {
+ err_str = "unexpected bad inode w/o EXT4_IGET_BAD";
+ goto error;
+ }
+ return 0;
+
+error:
+ ext4_error_inode(inode, function, line, 0, err_str);
+ return -EFSCORRUPTED;
}
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
@@ -4719,7 +4796,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
struct ext4_inode_info *ei;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct inode *inode;
- const char *err_str;
journal_t *journal = EXT4_SB(sb)->s_journal;
long ret;
loff_t size;
@@ -4748,10 +4824,10 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (!inode)
return ERR_PTR(-ENOMEM);
if (!(inode->i_state & I_NEW)) {
- if ((err_str = check_igot_inode(inode, flags)) != NULL) {
- ext4_error_inode(inode, function, line, 0, err_str);
+ ret = check_igot_inode(inode, flags, function, line);
+ if (ret) {
iput(inode);
- return ERR_PTR(-EFSCORRUPTED);
+ return ERR_PTR(ret);
}
return inode;
}
@@ -5023,13 +5099,21 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ret = -EFSCORRUPTED;
goto bad_inode;
}
- if ((err_str = check_igot_inode(inode, flags)) != NULL) {
- ext4_error_inode(inode, function, line, 0, err_str);
- ret = -EFSCORRUPTED;
- goto bad_inode;
+ ret = check_igot_inode(inode, flags, function, line);
+ /*
+ * -ESTALE here means there is nothing inherently wrong with the inode,
+ * it's just not an inode we can return for an fhandle lookup.
+ */
+ if (ret == -ESTALE) {
+ brelse(iloc.bh);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(-ESTALE);
}
-
+ if (ret)
+ goto bad_inode;
brelse(iloc.bh);
+
unlock_new_inode(inode);
return inode;
@@ -5450,7 +5534,7 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
oldsize & (inode->i_sb->s_blocksize - 1)) {
error = ext4_inode_attach_jinode(inode);
if (error)
- goto err_out;
+ goto out_mmap_sem;
}
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 96a048d3f51b..2e4575becd4f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2041,7 +2041,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
* split it in half by count; each resulting block will have at least
* half the space free.
*/
- if (i > 0)
+ if (i >= 0)
split = count - move;
else
split = count/2;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7ab4f5a9bf5b..7287dbfe13f1 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -181,14 +181,25 @@ static int ext4_end_io_end(ext4_io_end_t *io_end)
"list->prev 0x%p\n",
io_end, inode->i_ino, io_end->list.next, io_end->list.prev);
- io_end->handle = NULL; /* Following call will use up the handle */
- ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
+ /*
+ * Do not convert the unwritten extents if data writeback fails,
+ * or stale data may be exposed.
+ */
+ io_end->handle = NULL; /* Following call will use up the handle */
+ if (unlikely(io_end->flag & EXT4_IO_END_FAILED)) {
+ ret = -EIO;
+ if (handle)
+ jbd2_journal_free_reserved(handle);
+ } else {
+ ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
+ }
if (ret < 0 && !ext4_forced_shutdown(inode->i_sb)) {
ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written "
"extents -- potential data loss! "
"(inode %lu, error %d)", inode->i_ino, ret);
}
+
ext4_clear_io_unwritten_flag(io_end);
ext4_release_io_end(io_end);
return ret;
@@ -344,6 +355,7 @@ static void ext4_end_bio(struct bio *bio)
bio->bi_status, inode->i_ino,
(unsigned long long)
bi_sector >> (inode->i_blkbits - 9));
+ io_end->flag |= EXT4_IO_END_FAILED;
mapping_set_error(inode->i_mapping,
blk_status_to_errno(bio->bi_status));
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f019ce64eba4..d2b58f940aab 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2821,6 +2821,13 @@ static int ext4_check_opt_consistency(struct fs_context *fc,
}
if (is_remount) {
+ if (!sbi->s_journal &&
+ ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT)) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Remounting fs w/o journal so ignoring data_err option");
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT);
+ }
+
if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
ext4_msg(NULL, KERN_ERR, "can't mount with "
@@ -5421,6 +5428,11 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
"data=, fs mounted w/o journal");
goto failed_mount3a;
}
+ if (test_opt(sb, DATA_ERR_ABORT)) {
+ ext4_msg(sb, KERN_ERR,
+ "can't mount with data_err=abort, fs mounted w/o journal");
+ goto failed_mount3a;
+ }
sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
clear_opt(sb, JOURNAL_CHECKSUM);
clear_opt(sb, DATA_FLAGS);
@@ -6771,6 +6783,7 @@ static int ext4_reconfigure(struct fs_context *fc)
{
struct super_block *sb = fc->root->d_sb;
int ret;
+ bool old_ro = sb_rdonly(sb);
fc->s_fs_info = EXT4_SB(sb);
@@ -6782,9 +6795,9 @@ static int ext4_reconfigure(struct fs_context *fc)
if (ret < 0)
return ret;
- ext4_msg(sb, KERN_INFO, "re-mounted %pU %s. Quota mode: %s.",
- &sb->s_uuid, sb_rdonly(sb) ? "ro" : "r/w",
- ext4_quota_mode(sb));
+ ext4_msg(sb, KERN_INFO, "re-mounted %pU%s.",
+ &sb->s_uuid,
+ (old_ro != sb_rdonly(sb)) ? (sb_rdonly(sb) ? " ro" : " r/w") : "");
return 0;
}
@@ -6808,22 +6821,29 @@ static int ext4_statfs_project(struct super_block *sb,
dquot->dq_dqb.dqb_bhardlimit);
limit >>= sb->s_blocksize_bits;
- if (limit && buf->f_blocks > limit) {
+ if (limit) {
+ uint64_t remaining = 0;
+
curblock = (dquot->dq_dqb.dqb_curspace +
dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
- buf->f_blocks = limit;
- buf->f_bfree = buf->f_bavail =
- (buf->f_blocks > curblock) ?
- (buf->f_blocks - curblock) : 0;
+ if (limit > curblock)
+ remaining = limit - curblock;
+
+ buf->f_blocks = min(buf->f_blocks, limit);
+ buf->f_bfree = min(buf->f_bfree, remaining);
+ buf->f_bavail = min(buf->f_bavail, remaining);
}
limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
dquot->dq_dqb.dqb_ihardlimit);
- if (limit && buf->f_files > limit) {
- buf->f_files = limit;
- buf->f_ffree =
- (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
- (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+ if (limit) {
+ uint64_t remaining = 0;
+
+ if (limit > dquot->dq_dqb.dqb_curinodes)
+ remaining = limit - dquot->dq_dqb.dqb_curinodes;
+
+ buf->f_files = min(buf->f_files, limit);
+ buf->f_ffree = min(buf->f_ffree, remaining);
}
spin_unlock(&dquot->dq_dqb_lock);
@@ -6926,12 +6946,25 @@ static int ext4_release_dquot(struct dquot *dquot)
{
int ret, err;
handle_t *handle;
+ bool freeze_protected = false;
+
+ /*
+ * Trying to sb_start_intwrite() in a running transaction
+ * can result in a deadlock. Further, running transactions
+ * are already protected from freezing.
+ */
+ if (!ext4_journal_current_handle()) {
+ sb_start_intwrite(dquot->dq_sb);
+ freeze_protected = true;
+ }
handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
if (IS_ERR(handle)) {
/* Release dquot anyway to avoid endless cycle in dqput() */
dquot_release(dquot);
+ if (freeze_protected)
+ sb_end_intwrite(dquot->dq_sb);
return PTR_ERR(handle);
}
ret = dquot_release(dquot);
@@ -6942,6 +6975,10 @@ static int ext4_release_dquot(struct dquot *dquot)
err = ext4_journal_stop(handle);
if (!ret)
ret = err;
+
+ if (freeze_protected)
+ sb_end_intwrite(dquot->dq_sb);
+
return ret;
}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index df5ab1a75fc4..ca22aa9e04b4 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1176,15 +1176,24 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
{
struct inode *ea_inode;
struct ext4_xattr_entry *entry;
+ struct ext4_iloc iloc;
bool dirty = false;
unsigned int ea_ino;
int err;
int credits;
+ void *end;
+
+ if (block_csum)
+ end = (void *)bh->b_data + bh->b_size;
+ else {
+ ext4_get_inode_loc(parent, &iloc);
+ end = (void *)ext4_raw_inode(&iloc) + EXT4_SB(parent->i_sb)->s_inode_size;
+ }
/* One credit for dec ref on ea_inode, one for orphan list addition, */
credits = 2 + extra_credits;
- for (entry = first; !IS_LAST_ENTRY(entry);
+ for (entry = first; (void *)entry < end && !IS_LAST_ENTRY(entry);
entry = EXT4_XATTR_NEXT(entry)) {
if (!entry->e_value_inum)
continue;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c6317596e695..3ec815e615e7 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1327,21 +1327,13 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long flags;
- if (cpc->reason & CP_UMOUNT) {
- if (le32_to_cpu(ckpt->cp_pack_total_block_count) +
- NM_I(sbi)->nat_bits_blocks > BLKS_PER_SEG(sbi)) {
- clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
- f2fs_notice(sbi, "Disable nat_bits due to no space");
- } else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
- f2fs_nat_bitmap_enabled(sbi)) {
- f2fs_enable_nat_bits(sbi);
- set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
- f2fs_notice(sbi, "Rebuild and enable nat_bits");
- }
- }
-
spin_lock_irqsave(&sbi->cp_lock, flags);
+ if ((cpc->reason & CP_UMOUNT) &&
+ le32_to_cpu(ckpt->cp_pack_total_block_count) >
+ sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
+ disable_nat_bits(sbi, false);
+
if (cpc->reason & CP_TRIMMED)
__set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
else
@@ -1524,8 +1516,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_next_addr(sbi);
/* write nat bits */
- if ((cpc->reason & CP_UMOUNT) &&
- is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
+ if (enabled_nat_bits(sbi, cpc)) {
__u64 cp_ver = cur_cp_version(ckpt);
block_t blk;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index acd0764b0286..a123bb26acd8 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -54,8 +54,8 @@ bool f2fs_is_cp_guaranteed(struct page *page)
struct inode *inode;
struct f2fs_sb_info *sbi;
- if (!mapping)
- return false;
+ if (fscrypt_is_bounce_page(page))
+ return page_private_gcing(fscrypt_pagecache_page(page));
inode = mapping->host;
sbi = F2FS_I_SB(inode);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d28e3df61cc4..911c4c64d729 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2197,6 +2197,36 @@ static inline void f2fs_up_write(struct f2fs_rwsem *sem)
#endif
}
+static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
+{
+ unsigned long flags;
+ unsigned char *nat_bits;
+
+ /*
+ * In order to re-enable nat_bits we need to call fsck.f2fs by
+ * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost,
+ * so let's rely on regular fsck or unclean shutdown.
+ */
+
+ if (lock)
+ spin_lock_irqsave(&sbi->cp_lock, flags);
+ __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
+ nat_bits = NM_I(sbi)->nat_bits;
+ NM_I(sbi)->nat_bits = NULL;
+ if (lock)
+ spin_unlock_irqrestore(&sbi->cp_lock, flags);
+
+ kvfree(nat_bits);
+}
+
+static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc)
+{
+ bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
+
+ return (cpc) ? (cpc->reason & CP_UMOUNT) && set : set;
+}
+
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
f2fs_down_read(&sbi->cp_rwsem);
@@ -2444,8 +2474,14 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK;
spin_lock(&sbi->stat_lock);
- f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
- sbi->total_valid_block_count -= (block_t)count;
+ if (unlikely(sbi->total_valid_block_count < count)) {
+ f2fs_warn(sbi, "Inconsistent total_valid_block_count:%u, ino:%lu, count:%u",
+ sbi->total_valid_block_count, inode->i_ino, count);
+ sbi->total_valid_block_count = 0;
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ } else {
+ sbi->total_valid_block_count -= count;
+ }
if (sbi->reserved_blocks &&
sbi->current_reserved_blocks < sbi->reserved_blocks)
sbi->current_reserved_blocks = min(sbi->reserved_blocks,
@@ -3623,7 +3659,6 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
int f2fs_truncate_xattr_node(struct inode *inode);
int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
unsigned int seq_id);
-bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi);
int f2fs_remove_inode_page(struct inode *inode);
struct page *f2fs_new_inode_page(struct inode *inode);
struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
@@ -3648,7 +3683,6 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
-void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi);
int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 7ad4a9241759..06941705e893 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -35,10 +35,8 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
if (f2fs_inode_dirtied(inode, sync))
return;
- if (f2fs_is_atomic_file(inode)) {
- set_inode_flag(inode, FI_ATOMIC_DIRTIED);
+ if (f2fs_is_atomic_file(inode))
return;
- }
mark_inode_dirty_sync(inode);
}
@@ -764,8 +762,12 @@ retry:
if (err == -ENOENT)
return;
+ if (err == -EFSCORRUPTED)
+ goto stop_checkpoint;
+
if (err == -ENOMEM || ++count <= DEFAULT_RETRY_IO_COUNT)
goto retry;
+stop_checkpoint:
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_UPDATE_INODE);
return;
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 2e08e1fdf485..4d6f0a6365fe 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -411,7 +411,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
if (is_inode_flag_set(dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(dir)->i_projid,
- F2FS_I(old_dentry->d_inode)->i_projid)))
+ F2FS_I(inode)->i_projid)))
return -EXDEV;
err = f2fs_dquot_initialize(dir);
@@ -905,7 +905,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(new_dir)->i_projid,
- F2FS_I(old_dentry->d_inode)->i_projid)))
+ F2FS_I(old_inode)->i_projid)))
return -EXDEV;
/*
@@ -1101,10 +1101,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(new_dir)->i_projid,
- F2FS_I(old_dentry->d_inode)->i_projid)) ||
- (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
+ F2FS_I(old_inode)->i_projid)) ||
+ (is_inode_flag_set(old_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(old_dir)->i_projid,
- F2FS_I(new_dentry->d_inode)->i_projid)))
+ F2FS_I(new_inode)->i_projid)))
return -EXDEV;
err = f2fs_dquot_initialize(old_dir);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index dedba481b66d..b00d66b95321 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1134,7 +1134,14 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
trace_f2fs_truncate_inode_blocks_enter(inode, from);
level = get_node_path(inode, from, offset, noffset);
- if (level < 0) {
+ if (level <= 0) {
+ if (!level) {
+ level = -EFSCORRUPTED;
+ f2fs_err(sbi, "%s: inode ino=%lx has corrupted node block, from:%lu addrs:%u",
+ __func__, inode->i_ino,
+ from, ADDRS_PER_INODE(inode));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ }
trace_f2fs_truncate_inode_blocks_exit(inode, level);
return level;
}
@@ -2258,24 +2265,6 @@ static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
}
}
-bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi)
-{
- struct f2fs_nm_info *nm_i = NM_I(sbi);
- unsigned int i;
- bool ret = true;
-
- f2fs_down_read(&nm_i->nat_tree_lock);
- for (i = 0; i < nm_i->nat_blocks; i++) {
- if (!test_bit_le(i, nm_i->nat_block_bitmap)) {
- ret = false;
- break;
- }
- }
- f2fs_up_read(&nm_i->nat_tree_lock);
-
- return ret;
-}
-
static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
bool set, bool build)
{
@@ -2954,23 +2943,7 @@ add_out:
list_add_tail(&nes->set_list, head);
}
-static void __update_nat_bits(struct f2fs_nm_info *nm_i, unsigned int nat_ofs,
- unsigned int valid)
-{
- if (valid == 0) {
- __set_bit_le(nat_ofs, nm_i->empty_nat_bits);
- __clear_bit_le(nat_ofs, nm_i->full_nat_bits);
- return;
- }
-
- __clear_bit_le(nat_ofs, nm_i->empty_nat_bits);
- if (valid == NAT_ENTRY_PER_BLOCK)
- __set_bit_le(nat_ofs, nm_i->full_nat_bits);
- else
- __clear_bit_le(nat_ofs, nm_i->full_nat_bits);
-}
-
-static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
struct page *page)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2979,7 +2952,7 @@ static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
int valid = 0;
int i = 0;
- if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+ if (!enabled_nat_bits(sbi, NULL))
return;
if (nat_index == 0) {
@@ -2990,36 +2963,17 @@ static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR)
valid++;
}
-
- __update_nat_bits(nm_i, nat_index, valid);
-}
-
-void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi)
-{
- struct f2fs_nm_info *nm_i = NM_I(sbi);
- unsigned int nat_ofs;
-
- f2fs_down_read(&nm_i->nat_tree_lock);
-
- for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) {
- unsigned int valid = 0, nid_ofs = 0;
-
- /* handle nid zero due to it should never be used */
- if (unlikely(nat_ofs == 0)) {
- valid = 1;
- nid_ofs = 1;
- }
-
- for (; nid_ofs < NAT_ENTRY_PER_BLOCK; nid_ofs++) {
- if (!test_bit_le(nid_ofs,
- nm_i->free_nid_bitmap[nat_ofs]))
- valid++;
- }
-
- __update_nat_bits(nm_i, nat_ofs, valid);
+ if (valid == 0) {
+ __set_bit_le(nat_index, nm_i->empty_nat_bits);
+ __clear_bit_le(nat_index, nm_i->full_nat_bits);
+ return;
}
- f2fs_up_read(&nm_i->nat_tree_lock);
+ __clear_bit_le(nat_index, nm_i->empty_nat_bits);
+ if (valid == NAT_ENTRY_PER_BLOCK)
+ __set_bit_le(nat_index, nm_i->full_nat_bits);
+ else
+ __clear_bit_le(nat_index, nm_i->full_nat_bits);
}
static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -3038,7 +2992,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
* #1, flush nat entries to journal in current hot data summary block.
* #2, flush nat entries to nat page.
*/
- if ((cpc->reason & CP_UMOUNT) ||
+ if (enabled_nat_bits(sbi, cpc) ||
!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
to_journal = false;
@@ -3085,7 +3039,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
if (to_journal) {
up_write(&curseg->journal_rwsem);
} else {
- update_nat_bits(sbi, start_nid, page);
+ __update_nat_bits(sbi, start_nid, page);
f2fs_put_page(page, 1);
}
@@ -3116,7 +3070,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* during unmount, let's flush nat_bits before checking
* nat_cnt[DIRTY_NAT].
*/
- if (cpc->reason & CP_UMOUNT) {
+ if (enabled_nat_bits(sbi, cpc)) {
f2fs_down_write(&nm_i->nat_tree_lock);
remove_nats_in_journal(sbi);
f2fs_up_write(&nm_i->nat_tree_lock);
@@ -3132,7 +3086,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* entries, remove all entries from journal and merge them
* into nat entry set.
*/
- if (cpc->reason & CP_UMOUNT ||
+ if (enabled_nat_bits(sbi, cpc) ||
!__has_cursum_space(journal,
nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL))
remove_nats_in_journal(sbi);
@@ -3169,18 +3123,15 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
__u64 cp_ver = cur_cp_version(ckpt);
block_t nat_bits_addr;
+ if (!enabled_nat_bits(sbi, NULL))
+ return 0;
+
nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
nm_i->nat_bits = f2fs_kvzalloc(sbi,
nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL);
if (!nm_i->nat_bits)
return -ENOMEM;
- nm_i->full_nat_bits = nm_i->nat_bits + 8;
- nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
-
- if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
- return 0;
-
nat_bits_addr = __start_cp_addr(sbi) + BLKS_PER_SEG(sbi) -
nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++) {
@@ -3197,12 +3148,13 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
cp_ver |= (cur_cp_crc(ckpt) << 32);
if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
- clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
- f2fs_notice(sbi, "Disable nat_bits due to incorrect cp_ver (%llu, %llu)",
- cp_ver, le64_to_cpu(*(__le64 *)nm_i->nat_bits));
+ disable_nat_bits(sbi, true);
return 0;
}
+ nm_i->full_nat_bits = nm_i->nat_bits + 8;
+ nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
+
f2fs_notice(sbi, "Found nat_bits in checkpoint");
return 0;
}
@@ -3213,7 +3165,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
unsigned int i = 0;
nid_t nid, last_nid;
- if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+ if (!enabled_nat_bits(sbi, NULL))
return;
for (i = 0; i < nm_i->nat_blocks; i++) {
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index aa0e7cc2489a..6b3cafbe9867 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1499,6 +1499,10 @@ int f2fs_inode_dirtied(struct inode *inode, bool sync)
inc_page_count(sbi, F2FS_DIRTY_IMETA);
}
spin_unlock(&sbi->inode_lock[DIRTY_META]);
+
+ if (!ret && f2fs_is_atomic_file(inode))
+ set_inode_flag(inode, FI_ATOMIC_DIRTIED);
+
return ret;
}
@@ -1845,9 +1849,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid = u64_to_fsid(id);
#ifdef CONFIG_QUOTA
- if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) &&
+ if (is_inode_flag_set(d_inode(dentry), FI_PROJ_INHERIT) &&
sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
- f2fs_statfs_project(sb, F2FS_I(dentry->d_inode)->i_projid, buf);
+ f2fs_statfs_project(sb, F2FS_I(d_inode(dentry))->i_projid, buf);
}
#endif
return 0;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 180feefc4a9c..c4b0661888a1 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -61,6 +61,12 @@ struct f2fs_attr {
int id;
};
+struct f2fs_base_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct f2fs_base_attr *a, char *buf);
+ ssize_t (*store)(struct f2fs_base_attr *a, const char *buf, size_t len);
+};
+
static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf);
@@ -791,6 +797,25 @@ static void f2fs_sb_release(struct kobject *kobj)
complete(&sbi->s_kobj_unregister);
}
+static ssize_t f2fs_base_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct f2fs_base_attr *a = container_of(attr,
+ struct f2fs_base_attr, attr);
+
+ return a->show ? a->show(a, buf) : 0;
+}
+
+static ssize_t f2fs_base_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct f2fs_base_attr *a = container_of(attr,
+ struct f2fs_base_attr, attr);
+
+ return a->store ? a->store(a, buf, len) : 0;
+}
+
/*
* Note that there are three feature list entries:
* 1) /sys/fs/f2fs/features
@@ -809,14 +834,13 @@ static void f2fs_sb_release(struct kobject *kobj)
* please add new on-disk feature in this list only.
* - ref. F2FS_SB_FEATURE_RO_ATTR()
*/
-static ssize_t f2fs_feature_show(struct f2fs_attr *a,
- struct f2fs_sb_info *sbi, char *buf)
+static ssize_t f2fs_feature_show(struct f2fs_base_attr *a, char *buf)
{
return sysfs_emit(buf, "supported\n");
}
#define F2FS_FEATURE_RO_ATTR(_name) \
-static struct f2fs_attr f2fs_attr_##_name = { \
+static struct f2fs_base_attr f2fs_base_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = 0444 }, \
.show = f2fs_feature_show, \
}
@@ -1166,37 +1190,38 @@ static struct attribute *f2fs_attrs[] = {
};
ATTRIBUTE_GROUPS(f2fs);
+#define BASE_ATTR_LIST(name) (&f2fs_base_attr_##name.attr)
static struct attribute *f2fs_feat_attrs[] = {
#ifdef CONFIG_FS_ENCRYPTION
- ATTR_LIST(encryption),
- ATTR_LIST(test_dummy_encryption_v2),
+ BASE_ATTR_LIST(encryption),
+ BASE_ATTR_LIST(test_dummy_encryption_v2),
#if IS_ENABLED(CONFIG_UNICODE)
- ATTR_LIST(encrypted_casefold),
+ BASE_ATTR_LIST(encrypted_casefold),
#endif
#endif /* CONFIG_FS_ENCRYPTION */
#ifdef CONFIG_BLK_DEV_ZONED
- ATTR_LIST(block_zoned),
+ BASE_ATTR_LIST(block_zoned),
#endif
- ATTR_LIST(atomic_write),
- ATTR_LIST(extra_attr),
- ATTR_LIST(project_quota),
- ATTR_LIST(inode_checksum),
- ATTR_LIST(flexible_inline_xattr),
- ATTR_LIST(quota_ino),
- ATTR_LIST(inode_crtime),
- ATTR_LIST(lost_found),
+ BASE_ATTR_LIST(atomic_write),
+ BASE_ATTR_LIST(extra_attr),
+ BASE_ATTR_LIST(project_quota),
+ BASE_ATTR_LIST(inode_checksum),
+ BASE_ATTR_LIST(flexible_inline_xattr),
+ BASE_ATTR_LIST(quota_ino),
+ BASE_ATTR_LIST(inode_crtime),
+ BASE_ATTR_LIST(lost_found),
#ifdef CONFIG_FS_VERITY
- ATTR_LIST(verity),
+ BASE_ATTR_LIST(verity),
#endif
- ATTR_LIST(sb_checksum),
+ BASE_ATTR_LIST(sb_checksum),
#if IS_ENABLED(CONFIG_UNICODE)
- ATTR_LIST(casefold),
+ BASE_ATTR_LIST(casefold),
#endif
- ATTR_LIST(readonly),
+ BASE_ATTR_LIST(readonly),
#ifdef CONFIG_F2FS_FS_COMPRESSION
- ATTR_LIST(compression),
+ BASE_ATTR_LIST(compression),
#endif
- ATTR_LIST(pin_file),
+ BASE_ATTR_LIST(pin_file),
NULL,
};
ATTRIBUTE_GROUPS(f2fs_feat);
@@ -1263,9 +1288,14 @@ static struct kset f2fs_kset = {
.kobj = {.ktype = &f2fs_ktype},
};
+static const struct sysfs_ops f2fs_feat_attr_ops = {
+ .show = f2fs_base_attr_show,
+ .store = f2fs_base_attr_store,
+};
+
static const struct kobj_type f2fs_feat_ktype = {
.default_groups = f2fs_feat_groups,
- .sysfs_ops = &f2fs_attr_ops,
+ .sysfs_ops = &f2fs_feat_attr_ops,
};
static struct kobject f2fs_feat = {
diff --git a/fs/file.c b/fs/file.c
index a178efc8cf4b..f8cf6728c6a0 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -362,17 +362,25 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho
old_fds = old_fdt->fd;
new_fds = new_fdt->fd;
+ /*
+ * We may be racing against fd allocation from other threads using this
+ * files_struct, despite holding ->file_lock.
+ *
+ * alloc_fd() might have already claimed a slot, while fd_install()
+ * did not populate it yet. Note the latter operates locklessly, so
+ * the file can show up as we are walking the array below.
+ *
+ * At the same time we know no files will disappear as all other
+ * operations take the lock.
+ *
+ * Instead of trying to placate userspace racing with itself, we
+ * ref the file if we see it and mark the fd slot as unused otherwise.
+ */
for (i = open_files; i != 0; i--) {
- struct file *f = *old_fds++;
+ struct file *f = rcu_dereference_raw(*old_fds++);
if (f) {
get_file(f);
} else {
- /*
- * The fd may be claimed in the fd bitmap but not yet
- * instantiated in the files array if a sibling thread
- * is partway through open(). So make sure that this
- * fd is available to the new process.
- */
__clear_open_fd(open_files - i, new_fdt);
}
rcu_assign_pointer(*new_fds++, f);
@@ -625,7 +633,7 @@ static struct file *pick_file(struct files_struct *files, unsigned fd)
return NULL;
fd = array_index_nospec(fd, fdt->max_fds);
- file = fdt->fd[fd];
+ file = rcu_dereference_raw(fdt->fd[fd]);
if (file) {
rcu_assign_pointer(fdt->fd[fd], NULL);
__put_unused_fd(files, fd);
@@ -1095,7 +1103,7 @@ __releases(&files->file_lock)
*/
fdt = files_fdtable(files);
fd = array_index_nospec(fd, fdt->max_fds);
- tofree = fdt->fd[fd];
+ tofree = rcu_dereference_raw(fdt->fd[fd]);
if (!tofree && fd_is_open(fd, fdt))
goto Ebusy;
get_file(file);
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 58b9067b2391..95e5256821a5 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -156,15 +156,19 @@ static int fs_index(const char __user * __name)
static int fs_name(unsigned int index, char __user * buf)
{
struct file_system_type * tmp;
- int len, res;
+ int len, res = -EINVAL;
read_lock(&file_systems_lock);
- for (tmp = file_systems; tmp; tmp = tmp->next, index--)
- if (index <= 0 && try_module_get(tmp->owner))
+ for (tmp = file_systems; tmp; tmp = tmp->next, index--) {
+ if (index == 0) {
+ if (try_module_get(tmp->owner))
+ res = 0;
break;
+ }
+ }
read_unlock(&file_systems_lock);
- if (!tmp)
- return -EINVAL;
+ if (res)
+ return res;
/* OK, we got the reference, so we can safely block */
len = strlen(tmp->name) + 1;
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 12ef91d170bb..7faf1af59d5d 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -681,7 +681,6 @@ static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
0, 0, fuse_wait_dax_page(inode));
}
-/* dmap_end == 0 leads to unmapping of whole file */
int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start,
u64 dmap_end)
{
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 89bffaed421f..82951a535d2d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1121,6 +1121,8 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
else if (err == -EINTR)
fuse_invalidate_attr(inode);
+ if (err == -ENOSYS)
+ err = -EPERM;
return err;
}
@@ -1875,7 +1877,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
if (FUSE_IS_DAX(inode) && is_truncate) {
filemap_invalidate_lock(mapping);
fault_blocked = true;
- err = fuse_dax_break_layouts(inode, 0, 0);
+ err = fuse_dax_break_layouts(inode, 0, -1);
if (err) {
filemap_invalidate_unlock(mapping);
return err;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ceb9f7d23038..3e4c3fcb588b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -241,7 +241,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (dax_truncate) {
filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
+ err = fuse_dax_break_layouts(inode, 0, -1);
if (err)
goto out_inode_unlock;
}
@@ -3023,7 +3023,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
inode_lock(inode);
if (block_faults) {
filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
+ err = fuse_dax_break_layouts(inode, 0, -1);
if (err)
goto out;
}
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index d84dacbdce2c..a6814bea0e0a 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -1430,6 +1430,9 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
unsigned int virtqueue_size;
int err = -EIO;
+ if (!fsc->source)
+ return invalf(fsc, "No source specified");
+
/* This gets a reference on virtio_fs object. This ptr gets installed
* in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
* to drop the reference to this object.
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2c0908a30210..687670075d22 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -853,11 +853,12 @@ static void run_queue(struct gfs2_glock *gl, const int nonblock)
__releases(&gl->gl_lockref.lock)
__acquires(&gl->gl_lockref.lock)
{
- struct gfs2_holder *gh = NULL;
+ struct gfs2_holder *gh;
if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
return;
+ /* While a demote is in progress, the GLF_LOCK flag must be set. */
GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
@@ -869,18 +870,22 @@ __acquires(&gl->gl_lockref.lock)
set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
gl->gl_target = gl->gl_demote_state;
+ do_xmote(gl, NULL, gl->gl_target);
+ return;
} else {
if (test_bit(GLF_DEMOTE, &gl->gl_flags))
gfs2_demote_wake(gl);
if (do_promote(gl))
goto out_unlock;
gh = find_first_waiter(gl);
+ if (!gh)
+ goto out_unlock;
gl->gl_target = gh->gh_state;
if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
do_error(gl, 0); /* Fail queued try locks */
+ do_xmote(gl, gh, gl->gl_target);
+ return;
}
- do_xmote(gl, gh, gl->gl_target);
- return;
out_sched:
clear_bit(GLF_LOCK, &gl->gl_flags);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 29085643ad10..1cb5ce63fbf6 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -658,7 +658,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (!IS_ERR(inode)) {
if (S_ISDIR(inode->i_mode)) {
iput(inode);
- inode = ERR_PTR(-EISDIR);
+ inode = NULL;
+ error = -EISDIR;
goto fail_gunlock;
}
d_instantiate(dentry, inode);
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 6add6ebfef89..cb823a8a6ba9 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off)
else
key_len = tree->max_key_len + 1;
+ if (key_len > sizeof(hfs_btree_key) || key_len < 1) {
+ memset(key, 0, sizeof(hfs_btree_key));
+ pr_err("hfs: Invalid key length: %d\n", key_len);
+ return;
+ }
+
hfs_bnode_read(node, key, off, key_len);
}
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 87974d5e6791..079ea80534f7 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off)
else
key_len = tree->max_key_len + 2;
+ if (key_len > sizeof(hfsplus_btree_key) || key_len < 1) {
+ memset(key, 0, sizeof(hfsplus_btree_key));
+ pr_err("hfsplus: Invalid key length: %d\n", key_len);
+ return;
+ }
+
hfs_bnode_read(node, key, off, key_len);
}
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 8b39c15c408c..15b2f094d36e 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -60,7 +60,7 @@ struct hostfs_stat {
unsigned int uid;
unsigned int gid;
unsigned long long size;
- struct hostfs_timespec atime, mtime, ctime;
+ struct hostfs_timespec atime, mtime, ctime, btime;
unsigned int blksize;
unsigned long long blocks;
struct {
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index ff201753fd18..44fe76174e12 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -27,6 +27,7 @@ struct hostfs_inode_info {
struct inode vfs_inode;
struct mutex open_mutex;
dev_t dev;
+ struct hostfs_timespec btime;
};
static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
@@ -557,6 +558,7 @@ static int hostfs_inode_set(struct inode *ino, void *data)
}
HOSTFS_I(ino)->dev = dev;
+ HOSTFS_I(ino)->btime = st->btime;
ino->i_ino = st->ino;
ino->i_mode = st->mode;
return hostfs_inode_update(ino, st);
@@ -567,7 +569,10 @@ static int hostfs_inode_test(struct inode *inode, void *data)
const struct hostfs_stat *st = data;
dev_t dev = MKDEV(st->dev.maj, st->dev.min);
- return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == dev;
+ return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == dev &&
+ (inode->i_mode & S_IFMT) == (st->mode & S_IFMT) &&
+ HOSTFS_I(inode)->btime.tv_sec == st->btime.tv_sec &&
+ HOSTFS_I(inode)->btime.tv_nsec == st->btime.tv_nsec;
}
static struct inode *hostfs_iget(struct super_block *sb, char *name)
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 97e9c40a9448..3bcd9f35e70b 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -18,39 +18,48 @@
#include "hostfs.h"
#include <utime.h>
-static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
+static void statx_to_hostfs(const struct statx *buf, struct hostfs_stat *p)
{
- p->ino = buf->st_ino;
- p->mode = buf->st_mode;
- p->nlink = buf->st_nlink;
- p->uid = buf->st_uid;
- p->gid = buf->st_gid;
- p->size = buf->st_size;
- p->atime.tv_sec = buf->st_atime;
- p->atime.tv_nsec = 0;
- p->ctime.tv_sec = buf->st_ctime;
- p->ctime.tv_nsec = 0;
- p->mtime.tv_sec = buf->st_mtime;
- p->mtime.tv_nsec = 0;
- p->blksize = buf->st_blksize;
- p->blocks = buf->st_blocks;
- p->rdev.maj = os_major(buf->st_rdev);
- p->rdev.min = os_minor(buf->st_rdev);
- p->dev.maj = os_major(buf->st_dev);
- p->dev.min = os_minor(buf->st_dev);
+ p->ino = buf->stx_ino;
+ p->mode = buf->stx_mode;
+ p->nlink = buf->stx_nlink;
+ p->uid = buf->stx_uid;
+ p->gid = buf->stx_gid;
+ p->size = buf->stx_size;
+ p->atime.tv_sec = buf->stx_atime.tv_sec;
+ p->atime.tv_nsec = buf->stx_atime.tv_nsec;
+ p->ctime.tv_sec = buf->stx_ctime.tv_sec;
+ p->ctime.tv_nsec = buf->stx_ctime.tv_nsec;
+ p->mtime.tv_sec = buf->stx_mtime.tv_sec;
+ p->mtime.tv_nsec = buf->stx_mtime.tv_nsec;
+ if (buf->stx_mask & STATX_BTIME) {
+ p->btime.tv_sec = buf->stx_btime.tv_sec;
+ p->btime.tv_nsec = buf->stx_btime.tv_nsec;
+ } else {
+ memset(&p->btime, 0, sizeof(p->btime));
+ }
+ p->blksize = buf->stx_blksize;
+ p->blocks = buf->stx_blocks;
+ p->rdev.maj = buf->stx_rdev_major;
+ p->rdev.min = buf->stx_rdev_minor;
+ p->dev.maj = buf->stx_dev_major;
+ p->dev.min = buf->stx_dev_minor;
}
int stat_file(const char *path, struct hostfs_stat *p, int fd)
{
- struct stat64 buf;
+ struct statx buf;
+ int flags = AT_SYMLINK_NOFOLLOW;
if (fd >= 0) {
- if (fstat64(fd, &buf) < 0)
- return -errno;
- } else if (lstat64(path, &buf) < 0) {
- return -errno;
+ flags |= AT_EMPTY_PATH;
+ path = "";
}
- stat64_to_hostfs(&buf, p);
+
+ if ((statx(fd, path, flags, STATX_BASIC_STATS | STATX_BTIME, &buf)) < 0)
+ return -errno;
+
+ statx_to_hostfs(&buf, p);
return 0;
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index e7e6701806ad..7ffdf0d037fa 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -224,7 +224,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
}
/* truncate len if we find any trailing uptodate block(s) */
- for ( ; i <= last; i++) {
+ while (++i <= last) {
if (ifs_block_is_uptodate(ifs, i)) {
plen -= (last - i + 1) * block_size;
last = i - 1;
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index eb2f8273e6f1..09df40b612fb 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -147,7 +147,8 @@ static int do_isofs_readdir(struct inode *inode, struct file *file,
de = tmpde;
}
/* Basic sanity check, whether name doesn't exceed dir entry */
- if (de_len < de->name_len[0] +
+ if (de_len < sizeof(struct iso_directory_record) ||
+ de_len < de->name_len[0] +
sizeof(struct iso_directory_record)) {
printk(KERN_NOTICE "iso9660: Corrupted directory entry"
" in block %lu of inode %lu\n", block,
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index 35768a63fb1d..421d247fae52 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -180,7 +180,7 @@ static struct dentry *isofs_fh_to_parent(struct super_block *sb,
return NULL;
return isofs_export_iget(sb,
- fh_len > 2 ? ifid->parent_block : 0,
+ fh_len > 3 ? ifid->parent_block : 0,
ifid->parent_offset,
fh_len > 4 ? ifid->parent_generation : 0);
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index dfbb8f73861f..ddde73299d62 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1914,7 +1914,6 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
/* Log is no longer empty */
write_lock(&journal->j_state_lock);
- WARN_ON(!sb->s_sequence);
journal->j_flags &= ~JBD2_FLUSHED;
write_unlock(&journal->j_state_lock);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 421c0d360836..19ec32537483 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -286,21 +286,22 @@ static int fc_do_one_pass(journal_t *journal,
int jbd2_journal_recover(journal_t *journal)
{
int err, err2;
- journal_superblock_t * sb;
-
struct recovery_info info;
errseq_t wb_err;
struct address_space *mapping;
memset(&info, 0, sizeof(info));
- sb = journal->j_superblock;
/*
* The journal superblock's s_start field (the current log head)
* is always zero if, and only if, the journal was cleanly
- * unmounted.
+ * unmounted. We use its in-memory version j_tail here because
+ * jbd2_journal_wipe() could have updated it without updating journal
+ * superblock.
*/
- if (!sb->s_start) {
+ if (!journal->j_tail) {
+ journal_superblock_t *sb = journal->j_superblock;
+
jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n",
be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head));
journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index f9009e4f9ffd..0e1019382cf5 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -204,6 +204,10 @@ int dbMount(struct inode *ipbmap)
bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
+ if (!bmp->db_agwidth) {
+ err = -EINVAL;
+ goto err_release_metapage;
+ }
bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
if (bmp->db_agl2size > L2MAXL2SIZE - L2MAXAG ||
@@ -3403,7 +3407,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
oldl2agsize = bmp->db_agl2size;
bmp->db_agl2size = l2agsize;
- bmp->db_agsize = 1 << l2agsize;
+ bmp->db_agsize = (s64)1 << l2agsize;
/* compute new number of AG */
agno = bmp->db_numag;
@@ -3666,8 +3670,8 @@ void dbFinalizeBmap(struct inode *ipbmap)
* system size is not a multiple of the group size).
*/
inactfree = (inactags && ag_rem) ?
- ((inactags - 1) << bmp->db_agl2size) + ag_rem
- : inactags << bmp->db_agl2size;
+ (((s64)inactags - 1) << bmp->db_agl2size) + ag_rem
+ : ((s64)inactags << bmp->db_agl2size);
/* determine how many free blocks are in the active
* allocation groups plus the average number of free blocks
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 8f85177f284b..93db6eec4465 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -117,7 +117,8 @@ do { \
if (!(RC)) { \
if (((P)->header.nextindex > \
(((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \
- ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) { \
+ ((BN) && (((P)->header.maxslot > DTPAGEMAXSLOT) || \
+ ((P)->header.stblindex >= DTPAGEMAXSLOT)))) { \
BT_PUTPAGE(MP); \
jfs_error((IP)->i_sb, \
"DT_GETPAGE: dtree page corrupt\n"); \
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index b30e4cf2f579..9a6d504228e7 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -102,7 +102,7 @@ int diMount(struct inode *ipimap)
* allocate/initialize the in-memory inode map control structure
*/
/* allocate the in-memory inode map control structure. */
- imap = kmalloc(sizeof(struct inomap), GFP_KERNEL);
+ imap = kzalloc(sizeof(struct inomap), GFP_KERNEL);
if (imap == NULL)
return -ENOMEM;
@@ -456,7 +456,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
dp += inum % 8; /* 8 inodes per 4K page */
/* copy on-disk inode to in-memory inode */
- if ((copy_from_dinode(dp, ip)) != 0) {
+ if ((copy_from_dinode(dp, ip) != 0) || (ip->i_nlink == 0)) {
/* handle bad return by returning NULL for ip */
set_nlink(ip, 1); /* Don't want iput() deleting it */
iput(ip);
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 7252941bf165..b3b08c5ae701 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -559,11 +559,16 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
size_check:
if (EALIST_SIZE(ea_buf->xattr) != ea_size) {
- int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr));
-
- printk(KERN_ERR "ea_get: invalid extended attribute\n");
- print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
- ea_buf->xattr, size, 1);
+ if (unlikely(EALIST_SIZE(ea_buf->xattr) > INT_MAX)) {
+ printk(KERN_ERR "ea_get: extended attribute size too large: %u > INT_MAX\n",
+ EALIST_SIZE(ea_buf->xattr));
+ } else {
+ int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr));
+
+ printk(KERN_ERR "ea_get: invalid extended attribute\n");
+ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
+ ea_buf->xattr, size, 1);
+ }
ea_release(inode, ea_buf);
rc = -EIO;
goto clean_up;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index b068ed32d7b3..f6e2a4523f7e 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1560,8 +1560,9 @@ void kernfs_break_active_protection(struct kernfs_node *kn)
* invoked before finishing the kernfs operation. Note that while this
* function restores the active reference, it doesn't and can't actually
* restore the active protection - @kn may already or be in the process of
- * being removed. Once kernfs_break_active_protection() is invoked, that
- * protection is irreversibly gone for the kernfs operation instance.
+ * being drained and removed. Once kernfs_break_active_protection() is
+ * invoked, that protection is irreversibly gone for the kernfs operation
+ * instance.
*
* While this function may be called at any point after
* kernfs_break_active_protection() is invoked, its most useful location
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 332d08d2fe0d..6b90fea6cca2 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -820,8 +820,9 @@ bool kernfs_should_drain_open_files(struct kernfs_node *kn)
/*
* @kn being deactivated guarantees that @kn->attr.open can't change
* beneath us making the lockless test below safe.
+ * Callers post kernfs_unbreak_active_protection may be counted in
+ * kn->active by now, do not WARN_ON because of them.
*/
- WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
rcu_read_lock();
on = rcu_dereference(kn->attr.open);
diff --git a/fs/namespace.c b/fs/namespace.c
index b4385e2413d5..eab9185e2285 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -633,15 +633,11 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
return 0;
mnt = real_mount(bastard);
mnt_add_count(mnt, 1);
- smp_mb(); // see mntput_no_expire()
+ smp_mb(); // see mntput_no_expire() and do_umount()
if (likely(!read_seqretry(&mount_lock, seq)))
return 0;
- if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
- mnt_add_count(mnt, -1);
- return 1;
- }
lock_mount_hash();
- if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
+ if (unlikely(bastard->mnt_flags & (MNT_SYNC_UMOUNT | MNT_DOOMED))) {
mnt_add_count(mnt, -1);
unlock_mount_hash();
return 1;
@@ -1786,6 +1782,7 @@ static int do_umount(struct mount *mnt, int flags)
umount_tree(mnt, UMOUNT_PROPAGATE);
retval = 0;
} else {
+ smp_mb(); // paired with __legitimize_mnt()
shrink_submounts(mnt);
retval = -EBUSY;
if (!propagate_mount_busy(mnt, 2)) {
@@ -1869,6 +1866,7 @@ static void warn_mandlock(void)
static int can_umount(const struct path *path, int flags)
{
struct mount *mnt = real_mount(path->mnt);
+ struct super_block *sb = path->dentry->d_sb;
if (!may_mount())
return -EPERM;
@@ -1878,7 +1876,7 @@ static int can_umount(const struct path *path, int flags)
return -EINVAL;
if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
return -EINVAL;
- if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
+ if (flags & MNT_FORCE && !ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
return -EPERM;
return 0;
}
@@ -2438,56 +2436,62 @@ static struct mountpoint *do_lock_mount(struct path *path, bool beneath)
struct vfsmount *mnt = path->mnt;
struct dentry *dentry;
struct mountpoint *mp = ERR_PTR(-ENOENT);
+ struct path under = {};
for (;;) {
- struct mount *m;
+ struct mount *m = real_mount(mnt);
if (beneath) {
- m = real_mount(mnt);
+ path_put(&under);
read_seqlock_excl(&mount_lock);
- dentry = dget(m->mnt_mountpoint);
+ under.mnt = mntget(&m->mnt_parent->mnt);
+ under.dentry = dget(m->mnt_mountpoint);
read_sequnlock_excl(&mount_lock);
+ dentry = under.dentry;
} else {
dentry = path->dentry;
}
inode_lock(dentry->d_inode);
- if (unlikely(cant_mount(dentry))) {
- inode_unlock(dentry->d_inode);
- goto out;
- }
-
namespace_lock();
- if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) {
+ if (unlikely(cant_mount(dentry) || !is_mounted(mnt)))
+ break; // not to be mounted on
+
+ if (beneath && unlikely(m->mnt_mountpoint != dentry ||
+ &m->mnt_parent->mnt != under.mnt)) {
namespace_unlock();
inode_unlock(dentry->d_inode);
- goto out;
+ continue; // got moved
}
mnt = lookup_mnt(path);
- if (likely(!mnt))
+ if (unlikely(mnt)) {
+ namespace_unlock();
+ inode_unlock(dentry->d_inode);
+ path_put(path);
+ path->mnt = mnt;
+ path->dentry = dget(mnt->mnt_root);
+ continue; // got overmounted
+ }
+ mp = get_mountpoint(dentry);
+ if (IS_ERR(mp))
break;
-
- namespace_unlock();
- inode_unlock(dentry->d_inode);
- if (beneath)
- dput(dentry);
- path_put(path);
- path->mnt = mnt;
- path->dentry = dget(mnt->mnt_root);
- }
-
- mp = get_mountpoint(dentry);
- if (IS_ERR(mp)) {
- namespace_unlock();
- inode_unlock(dentry->d_inode);
+ if (beneath) {
+ /*
+ * @under duplicates the references that will stay
+ * at least until namespace_unlock(), so the path_put()
+ * below is safe (and OK to do under namespace_lock -
+ * we are not dropping the final references here).
+ */
+ path_put(&under);
+ }
+ return mp;
}
-
-out:
+ namespace_unlock();
+ inode_unlock(dentry->d_inode);
if (beneath)
- dput(dentry);
-
+ path_put(&under);
return mp;
}
@@ -2498,14 +2502,11 @@ static inline struct mountpoint *lock_mount(struct path *path)
static void unlock_mount(struct mountpoint *where)
{
- struct dentry *dentry = where->m_dentry;
-
+ inode_unlock(where->m_dentry->d_inode);
read_seqlock_excl(&mount_lock);
put_mountpoint(where);
read_sequnlock_excl(&mount_lock);
-
namespace_unlock();
- inode_unlock(dentry->d_inode);
}
static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
@@ -2556,6 +2557,10 @@ static int do_change_type(struct path *path, int ms_flags)
return -EINVAL;
namespace_lock();
+ if (!check_mnt(mnt)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
if (err)
@@ -2994,7 +2999,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
if (IS_MNT_SLAVE(from)) {
struct mount *m = from->mnt_master;
- list_add(&to->mnt_slave, &m->mnt_slave_list);
+ list_add(&to->mnt_slave, &from->mnt_slave);
to->mnt_master = m;
}
@@ -3019,18 +3024,25 @@ out:
* Check if path is overmounted, i.e., if there's a mount on top of
* @path->mnt with @path->dentry as mountpoint.
*
- * Context: This function expects namespace_lock() to be held.
+ * Context: namespace_sem must be held at least shared.
+ * MUST NOT be called under lock_mount_hash() (there one should just
+ * call __lookup_mnt() and check if it returns NULL).
* Return: If path is overmounted true is returned, false if not.
*/
static inline bool path_overmounted(const struct path *path)
{
+ unsigned seq = read_seqbegin(&mount_lock);
+ bool no_child;
+
rcu_read_lock();
- if (unlikely(__lookup_mnt(path->mnt, path->dentry))) {
- rcu_read_unlock();
- return true;
- }
+ no_child = !__lookup_mnt(path->mnt, path->dentry);
rcu_read_unlock();
- return false;
+ if (need_seqretry(&mount_lock, seq)) {
+ read_seqlock_excl(&mount_lock);
+ no_child = !__lookup_mnt(path->mnt, path->dentry);
+ read_sequnlock_excl(&mount_lock);
+ }
+ return unlikely(!no_child);
}
/**
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 7df2503cef6c..2d99f5e7a686 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -2,6 +2,7 @@
config NFS_FS
tristate "NFS client support"
depends on INET && FILE_LOCKING && MULTIUSER
+ select CRC32
select LOCKD
select SUNRPC
select NFS_ACL_SUPPORT if NFS_V3_ACL
@@ -194,7 +195,6 @@ config NFS_USE_KERNEL_DNS
config NFS_DEBUG
bool
depends on NFS_FS && SUNRPC_DEBUG
- select CRC32
default y
config NFS_DISABLE_UDP_SUPPORT
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 62607d52bfa5..aa09f930eeaf 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1080,6 +1080,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
server->namelen = NFS2_MAXNAMLEN;
}
+ /* Linux 'subtree_check' borkenness mandates this setting */
+ server->fh_expire_type = NFS_FH_VOL_RENAME;
if (!(fattr->valid & NFS_ATTR_FATTR)) {
error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 4bf2526a3a18..bbd582d8a7dc 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -297,7 +297,8 @@ nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
if (delegation == NULL)
goto out;
spin_lock(&delegation->lock);
- if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ if (delegation->inode &&
+ !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
clear_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags);
/* Refcount matched in nfs_end_delegation_return() */
ret = nfs_get_delegation(delegation);
@@ -570,17 +571,6 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
- else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) {
- struct inode *inode;
-
- spin_lock(&delegation->lock);
- inode = delegation->inode;
- if (inode && list_empty(&NFS_I(inode)->open_files))
- ret = true;
- spin_unlock(&delegation->lock);
- }
- if (ret)
- clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
test_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags) ||
test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
@@ -821,11 +811,25 @@ int nfs4_inode_make_writeable(struct inode *inode)
return nfs4_inode_return_delegation(inode);
}
-static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
- struct nfs_delegation *delegation)
+static void
+nfs_mark_return_if_closed_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
- set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
- set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
+ struct inode *inode;
+
+ if (test_bit(NFS_DELEGATION_RETURN, &delegation->flags) ||
+ test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags))
+ return;
+ spin_lock(&delegation->lock);
+ inode = delegation->inode;
+ if (!inode)
+ goto out;
+ if (list_empty(&NFS_I(inode)->open_files))
+ nfs_mark_return_delegation(server, delegation);
+ else
+ set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+out:
+ spin_unlock(&delegation->lock);
}
static bool nfs_server_mark_return_all_delegations(struct nfs_server *server)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 39f7549afcf5..389186384235 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2642,6 +2642,18 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
unblock_revalidate(new_dentry);
}
+static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry,
+ struct dentry *new_dentry)
+{
+ struct nfs_server *server = NFS_SB(old_dentry->d_sb);
+
+ if (old_dentry->d_parent != new_dentry->d_parent)
+ return false;
+ if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE)
+ return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN);
+ return true;
+}
+
/*
* RENAME
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -2729,7 +2741,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
}
- if (S_ISREG(old_inode->i_mode))
+ if (S_ISREG(old_inode->i_mode) &&
+ nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry))
nfs_sync_inode(old_inode);
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
must_unblock ? nfs_unblock_rename : NULL);
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index acf4b88889dc..d5f1fbfd9a0c 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -75,6 +75,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct page *scratch;
struct list_head dsaddrs;
struct nfs4_pnfs_ds_addr *da;
+ struct net *net = server->nfs_client->cl_net;
/* set up xdr stream */
scratch = alloc_page(gfp_flags);
@@ -158,8 +159,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
mp_count = be32_to_cpup(p); /* multipath count */
for (j = 0; j < mp_count; j++) {
- da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
- &stream, gfp_flags);
+ da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
@@ -169,7 +169,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_err_free_deviceid;
}
- dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ dsaddr->ds_list[i] = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags);
if (!dsaddr->ds_list[i])
goto out_err_drain_dsaddrs;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 2b3c5eea1f13..0bc537de1b29 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1255,6 +1255,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
case -ECONNRESET:
case -EHOSTDOWN:
case -EHOSTUNREACH:
+ case -ENETDOWN:
case -ENETUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e028f5a0ef5f..d21c5ecfbf1c 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -49,6 +49,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct nfs4_pnfs_ds_addr *da;
struct nfs4_ff_layout_ds *new_ds = NULL;
struct nfs4_ff_ds_version *ds_versions = NULL;
+ struct net *net = server->nfs_client->cl_net;
u32 mp_count;
u32 version_count;
__be32 *p;
@@ -80,8 +81,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
for (i = 0; i < mp_count; i++) {
/* multipath ds */
- da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
- &stream, gfp_flags);
+ da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
@@ -149,7 +149,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
new_ds->ds_versions = ds_versions;
new_ds->ds_versions_cnt = version_count;
- new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ new_ds->ds = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags);
if (!new_ds->ds)
goto out_err_drain_dsaddrs;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 56bbf59bda3c..06230baaa554 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -74,6 +74,8 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
+ if (unlikely(nfs_current_task_exiting()))
+ return -EINTR;
schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index a92b234ae087..c29ad2e1d416 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -859,18 +859,16 @@ u64 nfs_timespec_to_change_attr(const struct timespec64 *ts)
return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
}
-#ifdef CONFIG_CRC32
static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid)
{
return ~crc32_le(0xFFFFFFFF, &stateid->other[0],
NFS4_STATEID_OTHER_SIZE);
}
-#else
-static inline u32 nfs_stateid_hash(nfs4_stateid *stateid)
+
+static inline bool nfs_current_task_exiting(void)
{
- return 0;
+ return (current->flags & PF_EXITING) != 0;
}
-#endif
static inline bool nfs_error_is_fatal(int err)
{
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 4bf208a0a8e9..715753f41fb0 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -39,7 +39,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
- } while (!fatal_signal_pending(current));
+ } while (!fatal_signal_pending(current) && !nfs_current_task_exiting());
return res;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4b12e45f5753..1b94a55215e7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -422,6 +422,8 @@ static int nfs4_delay_killable(long *timeout)
{
might_sleep();
+ if (unlikely(nfs_current_task_exiting()))
+ return -EINTR;
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(nfs4_update_delay(timeout));
if (!__fatal_signal_pending(current))
@@ -433,6 +435,8 @@ static int nfs4_delay_interruptible(long *timeout)
{
might_sleep();
+ if (unlikely(nfs_current_task_exiting()))
+ return -EINTR;
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(nfs4_update_delay(timeout));
if (!signal_pending(current))
@@ -1712,7 +1716,8 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
rcu_read_unlock();
trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0);
- if (!fatal_signal_pending(current)) {
+ if (!fatal_signal_pending(current) &&
+ !nfs_current_task_exiting()) {
if (schedule_timeout(5*HZ) == 0)
status = -EAGAIN;
else
@@ -3494,7 +3499,7 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
write_sequnlock(&state->seqlock);
trace_nfs4_close_stateid_update_wait(state->inode, dst, 0);
- if (fatal_signal_pending(current))
+ if (fatal_signal_pending(current) || nfs_current_task_exiting())
status = -EINTR;
else
if (schedule_timeout(5*HZ) != 0)
@@ -6880,10 +6885,18 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
struct nfs4_unlockdata *p;
struct nfs4_state *state = lsp->ls_state;
struct inode *inode = state->inode;
+ struct nfs_lock_context *l_ctx;
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (p == NULL)
return NULL;
+ l_ctx = nfs_get_lock_context(ctx);
+ if (!IS_ERR(l_ctx)) {
+ p->l_ctx = l_ctx;
+ } else {
+ kfree(p);
+ return NULL;
+ }
p->arg.fh = NFS_FH(inode);
p->arg.fl = &p->fl;
p->arg.seqid = seqid;
@@ -6891,7 +6904,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
p->lsp = lsp;
/* Ensure we don't close file until we're done freeing locks! */
p->ctx = get_nfs_open_context(ctx);
- p->l_ctx = nfs_get_lock_context(ctx);
locks_init_lock(&p->fl);
locks_copy_lock(&p->fl, fl);
p->server = NFS_SERVER(inode);
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 351616c61df5..f9c291e2165c 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -148,16 +148,12 @@ static inline void nfs4_copy_sessionid(struct nfs4_sessionid *dst,
memcpy(dst->data, src->data, NFS4_MAX_SESSIONID_LEN);
}
-#ifdef CONFIG_CRC32
/*
* nfs_session_id_hash - calculate the crc32 hash for the session id
* @session - pointer to session
*/
#define nfs_session_id_hash(sess_id) \
(~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data)))
-#else
-#define nfs_session_id_hash(session) (0)
-#endif
#else /* defined(CONFIG_NFS_V4_1) */
static inline int nfs4_init_session(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 794bb4aa588d..9fc71dc090c2 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2741,7 +2741,15 @@ out_error:
pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
" with error %d\n", section_sep, section,
clp->cl_hostname, -status);
- ssleep(1);
+ switch (status) {
+ case -ENETDOWN:
+ case -ENETUNREACH:
+ nfs_mark_client_ready(clp, -EIO);
+ break;
+ default:
+ ssleep(1);
+ break;
+ }
out_drain:
memalloc_nofs_restore(memflags);
nfs4_end_drain_session(clp);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index fe83c681e3fe..73aa5a63afe3 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -732,6 +732,14 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
return remaining;
}
+static void pnfs_reset_return_info(struct pnfs_layout_hdr *lo)
+{
+ struct pnfs_layout_segment *lseg;
+
+ list_for_each_entry(lseg, &lo->plh_return_segs, pls_list)
+ pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
+}
+
static void
pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
struct list_head *free_me,
@@ -1180,6 +1188,7 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
pnfs_free_returned_lsegs(lo, &freeme, range, seq);
pnfs_set_layout_stateid(lo, stateid, NULL, true);
+ pnfs_reset_return_info(lo);
} else
pnfs_mark_layout_stateid_invalid(lo, &freeme);
out_unlock:
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d886c8226d8f..79996d7dad0f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -59,6 +59,7 @@ struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
char *ds_remotestr; /* comma sep list of addrs */
struct list_head ds_addrs;
+ const struct net *ds_net;
struct nfs_client *ds_clp;
refcount_t ds_count;
unsigned long ds_state;
@@ -405,7 +406,8 @@ int pnfs_generic_commit_pagelist(struct inode *inode,
int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max);
void pnfs_generic_write_commit_done(struct rpc_task *task, void *data);
void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
-struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
+struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(const struct net *net,
+ struct list_head *dsaddrs,
gfp_t gfp_flags);
void nfs4_pnfs_v3_ds_connect_unload(void);
int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 88e061bd711b..1b317c44da12 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -651,12 +651,12 @@ _same_data_server_addrs_locked(const struct list_head *dsaddrs1,
* Lookup DS by addresses. nfs4_ds_cache_lock is held
*/
static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(const struct list_head *dsaddrs)
+_data_server_lookup_locked(const struct net *net, const struct list_head *dsaddrs)
{
struct nfs4_pnfs_ds *ds;
list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
- if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
+ if (ds->ds_net == net && _same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
return ds;
return NULL;
}
@@ -763,7 +763,7 @@ out_err:
* uncached and return cached struct nfs4_pnfs_ds.
*/
struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
+nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
char *remotestr;
@@ -781,13 +781,14 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
spin_lock(&nfs4_ds_cache_lock);
- tmp_ds = _data_server_lookup_locked(dsaddrs);
+ tmp_ds = _data_server_lookup_locked(net, dsaddrs);
if (tmp_ds == NULL) {
INIT_LIST_HEAD(&ds->ds_addrs);
list_splice_init(dsaddrs, &ds->ds_addrs);
ds->ds_remotestr = remotestr;
refcount_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
+ ds->ds_net = net;
ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache);
dprintk("%s add new data server %s\n", __func__,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e1bcad5906ae..4e72ee57fc8f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1021,6 +1021,16 @@ int nfs_reconfigure(struct fs_context *fc)
sync_filesystem(sb);
/*
+ * The SB_RDONLY flag has been removed from the superblock during
+ * mounts to prevent interference between different filesystems.
+ * Similarly, it is also necessary to ignore the SB_RDONLY flag
+ * during reconfiguration; otherwise, it may also result in the
+ * creation of redundant superblocks when mounting a directory with
+ * different rw and ro flags multiple times.
+ */
+ fc->sb_flags_mask &= ~SB_RDONLY;
+
+ /*
* Userspace mount programs that send binary options generally send
* them populated with default values. We have no way to know which
* ones were explicitly specified. Fall back to legacy behavior and
@@ -1277,8 +1287,17 @@ int nfs_get_tree_common(struct fs_context *fc)
if (IS_ERR(server))
return PTR_ERR(server);
+ /*
+ * When NFS_MOUNT_UNSHARED is not set, NFS forces the sharing of a
+ * superblock among each filesystem that mounts sub-directories
+ * belonging to a single exported root path.
+ * To prevent interference between different filesystems, the
+ * SB_RDONLY flag should be removed from the superblock.
+ */
if (server->flags & NFS_MOUNT_UNSHARED)
compare_super = NULL;
+ else
+ fc->sb_flags &= ~SB_RDONLY;
/* -o noac implies -o sync */
if (server->flags & NFS_MOUNT_NOAC)
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 7b59a40d40c0..784f7c1d003b 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -14,6 +14,7 @@
#include <linux/rcupdate.h>
#include <linux/lockd/lockd.h>
+#include "internal.h"
#include "nfs4_fs.h"
#include "netns.h"
#include "sysfs.h"
@@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt *clnt)
rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL);
}
+/*
+ * Shut down the nfs_client only once all the superblocks
+ * have been shut down.
+ */
+static void shutdown_nfs_client(struct nfs_client *clp)
+{
+ struct nfs_server *server;
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ if (!(server->flags & NFS_MOUNT_SHUTDOWN)) {
+ rcu_read_unlock();
+ return;
+ }
+ }
+ rcu_read_unlock();
+ nfs_mark_client_ready(clp, -EIO);
+ shutdown_client(clp->cl_rpcclient);
+}
+
static ssize_t
shutdown_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
@@ -259,7 +279,6 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
server->flags |= NFS_MOUNT_SHUTDOWN;
shutdown_client(server->client);
- shutdown_client(server->nfs_client->cl_rpcclient);
if (!IS_ERR(server->client_acl))
shutdown_client(server->client_acl);
@@ -267,6 +286,7 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
if (server->nlm_host)
shutdown_client(server->nlm_host->h_rpcclnt);
out:
+ shutdown_nfs_client(server->nfs_client);
return count;
}
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 43b88eaf0673..05c10f70456c 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -4,6 +4,7 @@ config NFSD
depends on INET
depends on FILE_LOCKING
depends on FSNOTIFY
+ select CRC32
select LOCKD
select SUNRPC
select EXPORTFS
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a25cb2ff1b0b..e2875706e6bf 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1066,6 +1066,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
return openlockstateid(stid);
}
+/*
+ * As the sc_free callback of deleg, this may be called by nfs4_put_stid
+ * in nfsd_break_one_deleg.
+ * Considering nfsd_break_one_deleg is called with the flc->flc_lock held,
+ * this function mustn't ever sleep.
+ */
static void nfs4_free_deleg(struct nfs4_stid *stid)
{
struct nfs4_delegation *dp = delegstateid(stid);
@@ -4920,6 +4926,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
{
+ bool queued;
/*
* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
@@ -4928,7 +4935,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
* we know it's safe to take a reference.
*/
refcount_inc(&dp->dl_stid.sc_count);
- WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
+ queued = nfsd4_run_cb(&dp->dl_recall);
+ WARN_ON_ONCE(!queued);
+ if (!queued)
+ refcount_dec(&dp->dl_stid.sc_count);
}
/* Called from break_lease() with flc_lock held. */
@@ -6279,14 +6289,19 @@ deleg_reaper(struct nfsd_net *nn)
spin_lock(&nn->client_lock);
list_for_each_safe(pos, next, &nn->client_lru) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
- if (clp->cl_state != NFSD4_ACTIVE ||
- list_empty(&clp->cl_delegations) ||
- atomic_read(&clp->cl_delegs_in_recall) ||
- test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
- (ktime_get_boottime_seconds() -
- clp->cl_ra_time < 5)) {
+
+ if (clp->cl_state != NFSD4_ACTIVE)
+ continue;
+ if (list_empty(&clp->cl_delegations))
+ continue;
+ if (atomic_read(&clp->cl_delegs_in_recall))
+ continue;
+ if (test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags))
+ continue;
+ if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5)
+ continue;
+ if (clp->cl_cb_state != NFSD4_CB_UP)
continue;
- }
list_add(&clp->cl_ra_cblist, &cblist);
/* release in nfsd4_cb_recall_any_release */
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 40426f899e76..f1420d3510d2 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -263,7 +263,6 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
return true;
}
-#ifdef CONFIG_CRC32
/**
* knfsd_fh_hash - calculate the crc32 hash for the filehandle
* @fh - pointer to filehandle
@@ -275,12 +274,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
{
return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size);
}
-#else
-static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
-{
- return 0;
-}
-#endif
/**
* fh_clear_pre_post_attrs - Reset pre/post attributes
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index dbd27a44632f..5e70a3478afe 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -2094,11 +2094,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree,
ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
if (ret < 0) {
- if (unlikely(ret == -ENOENT))
+ if (unlikely(ret == -ENOENT)) {
nilfs_crit(btree->b_inode->i_sb,
"writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
btree->b_inode->i_ino,
(unsigned long long)key, level);
+ ret = -EINVAL;
+ }
goto out;
}
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 893ab36824cc..2d8dc6b35b54 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -273,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap,
dat = nilfs_bmap_get_dat(bmap);
key = nilfs_bmap_data_get_key(bmap, bh);
ptr = nilfs_direct_get_ptr(bmap, key);
+ if (ptr == NILFS_BMAP_INVALID_PTR)
+ return -EINVAL;
+
if (!buffer_nilfs_volatile(bh)) {
oldreq.pr_entry_nr = ptr;
newreq.pr_entry_nr = ptr;
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 2ecd0303f942..4aea45821611 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -335,6 +335,7 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count,
}
if (extend_init && !is_compressed(ni)) {
+ WARN_ON(ni->i_valid >= pos);
err = ntfs_extend_initialized_size(file, ni, ni->i_valid, pos);
if (err)
goto out;
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 9089c58a005c..191b91ffadbb 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -618,7 +618,7 @@ static bool index_hdr_check(const struct INDEX_HDR *hdr, u32 bytes)
u32 off = le32_to_cpu(hdr->de_off);
if (!IS_ALIGNED(off, 8) || tot > bytes || end > tot ||
- off + sizeof(struct NTFS_DE) > end) {
+ size_add(off, sizeof(struct NTFS_DE)) > end) {
/* incorrect index buffer. */
return false;
}
@@ -736,7 +736,7 @@ fill_table:
if (end > total)
return NULL;
- if (off + sizeof(struct NTFS_DE) > end)
+ if (size_add(off, sizeof(struct NTFS_DE)) > end)
return NULL;
e = Add2Ptr(hdr, off);
@@ -2184,6 +2184,10 @@ static int indx_get_entry_to_replace(struct ntfs_index *indx,
e = hdr_first_de(&n->index->ihdr);
fnd_push(fnd, n, e);
+ if (!e) {
+ err = -EINVAL;
+ goto out;
+ }
if (!de_is_last(e)) {
/*
@@ -2205,6 +2209,10 @@ static int indx_get_entry_to_replace(struct ntfs_index *indx,
n = fnd->nodes[level];
te = hdr_first_de(&n->index->ihdr);
+ if (!te) {
+ err = -EINVAL;
+ goto out;
+ }
/* Copy the candidate entry into the replacement entry buffer. */
re = kmalloc(le16_to_cpu(te->size) + sizeof(u64), GFP_NOFS);
if (!re) {
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 964e27c7b901..c1d1c4a7cf4d 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -717,7 +717,7 @@ static inline struct NTFS_DE *hdr_first_de(const struct INDEX_HDR *hdr)
struct NTFS_DE *e;
u16 esize;
- if (de_off >= used || de_off + sizeof(struct NTFS_DE) > used )
+ if (de_off >= used || size_add(de_off, sizeof(struct NTFS_DE)) > used)
return NULL;
e = Add2Ptr(hdr, de_off);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f0937902f7b4..e6191249169e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1796,6 +1796,14 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
el = root_el;
while (el->l_tree_depth) {
+ if (unlikely(le16_to_cpu(el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH)) {
+ ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+ "Owner %llu has invalid tree depth %u in extent list\n",
+ (unsigned long long)ocfs2_metadata_cache_owner(ci),
+ le16_to_cpu(el->l_tree_depth));
+ ret = -EROFS;
+ goto out;
+ }
if (le16_to_cpu(el->l_next_free_rec) == 0) {
ocfs2_error(ocfs2_metadata_cache_get_super(ci),
"Owner %llu has empty extent list at depth %u\n",
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index cbe3c12ff5f7..b246e5327111 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
struct ocfs2_recovery_map *rm;
mutex_init(&osb->recovery_lock);
- osb->disable_recovery = 0;
+ osb->recovery_state = OCFS2_REC_ENABLED;
osb->recovery_thread_task = NULL;
init_waitqueue_head(&osb->recovery_event);
@@ -190,31 +190,53 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
return 0;
}
-/* we can't grab the goofy sem lock from inside wait_event, so we use
- * memory barriers to make sure that we'll see the null task before
- * being woken up */
static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
{
- mb();
return osb->recovery_thread_task != NULL;
}
-void ocfs2_recovery_exit(struct ocfs2_super *osb)
+static void ocfs2_recovery_disable(struct ocfs2_super *osb,
+ enum ocfs2_recovery_state state)
{
- struct ocfs2_recovery_map *rm;
-
- /* disable any new recovery threads and wait for any currently
- * running ones to exit. Do this before setting the vol_state. */
mutex_lock(&osb->recovery_lock);
- osb->disable_recovery = 1;
+ /*
+ * If recovery thread is not running, we can directly transition to
+ * final state.
+ */
+ if (!ocfs2_recovery_thread_running(osb)) {
+ osb->recovery_state = state + 1;
+ goto out_lock;
+ }
+ osb->recovery_state = state;
+ /* Wait for recovery thread to acknowledge state transition */
+ wait_event_cmd(osb->recovery_event,
+ !ocfs2_recovery_thread_running(osb) ||
+ osb->recovery_state >= state + 1,
+ mutex_unlock(&osb->recovery_lock),
+ mutex_lock(&osb->recovery_lock));
+out_lock:
mutex_unlock(&osb->recovery_lock);
- wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
- /* At this point, we know that no more recovery threads can be
- * launched, so wait for any recovery completion work to
- * complete. */
+ /*
+ * At this point we know that no more recovery work can be queued so
+ * wait for any recovery completion work to complete.
+ */
if (osb->ocfs2_wq)
flush_workqueue(osb->ocfs2_wq);
+}
+
+void ocfs2_recovery_disable_quota(struct ocfs2_super *osb)
+{
+ ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE);
+}
+
+void ocfs2_recovery_exit(struct ocfs2_super *osb)
+{
+ struct ocfs2_recovery_map *rm;
+
+ /* disable any new recovery threads and wait for any currently
+ * running ones to exit. Do this before setting the vol_state. */
+ ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE);
/*
* Now that recovery is shut down, and the osb is about to be
@@ -1472,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg)
}
}
restart:
+ if (quota_enabled) {
+ mutex_lock(&osb->recovery_lock);
+ /* Confirm that recovery thread will no longer recover quotas */
+ if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) {
+ osb->recovery_state = OCFS2_REC_QUOTA_DISABLED;
+ wake_up(&osb->recovery_event);
+ }
+ if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED)
+ quota_enabled = 0;
+ mutex_unlock(&osb->recovery_lock);
+ }
+
status = ocfs2_super_lock(osb, 1);
if (status < 0) {
mlog_errno(status);
@@ -1569,27 +1603,29 @@ bail:
ocfs2_free_replay_slots(osb);
osb->recovery_thread_task = NULL;
- mb(); /* sync with ocfs2_recovery_thread_running */
+ if (osb->recovery_state == OCFS2_REC_WANT_DISABLE)
+ osb->recovery_state = OCFS2_REC_DISABLED;
wake_up(&osb->recovery_event);
mutex_unlock(&osb->recovery_lock);
- if (quota_enabled)
- kfree(rm_quota);
+ kfree(rm_quota);
return status;
}
void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
{
+ int was_set = -1;
+
mutex_lock(&osb->recovery_lock);
+ if (osb->recovery_state < OCFS2_REC_WANT_DISABLE)
+ was_set = ocfs2_recovery_map_set(osb, node_num);
trace_ocfs2_recovery_thread(node_num, osb->node_num,
- osb->disable_recovery, osb->recovery_thread_task,
- osb->disable_recovery ?
- -1 : ocfs2_recovery_map_set(osb, node_num));
+ osb->recovery_state, osb->recovery_thread_task, was_set);
- if (osb->disable_recovery)
+ if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE)
goto out;
if (osb->recovery_thread_task)
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index e3c3a35dc5e0..6397170f302f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
int ocfs2_recovery_init(struct ocfs2_super *osb);
void ocfs2_recovery_exit(struct ocfs2_super *osb);
+void ocfs2_recovery_disable_quota(struct ocfs2_super *osb);
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
void ocfs2_free_replay_slots(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 8fe826143d7b..5e3ebbab698a 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -308,6 +308,21 @@ enum ocfs2_journal_trigger_type {
void ocfs2_initialize_journal_triggers(struct super_block *sb,
struct ocfs2_triggers triggers[]);
+enum ocfs2_recovery_state {
+ OCFS2_REC_ENABLED = 0,
+ OCFS2_REC_QUOTA_WANT_DISABLE,
+ /*
+ * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for
+ * ocfs2_recovery_disable_quota() to work.
+ */
+ OCFS2_REC_QUOTA_DISABLED,
+ OCFS2_REC_WANT_DISABLE,
+ /*
+ * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work
+ */
+ OCFS2_REC_DISABLED,
+};
+
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
@@ -370,7 +385,7 @@ struct ocfs2_super
struct ocfs2_recovery_map *recovery_map;
struct ocfs2_replay_map *replay_map;
struct task_struct *recovery_thread_task;
- int disable_recovery;
+ enum ocfs2_recovery_state recovery_state;
wait_queue_head_t checkpoint_event;
struct ocfs2_journal *journal;
unsigned long osb_commit_interval;
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 4b4fa58cd32f..c7bda48b5fb2 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -453,8 +453,7 @@ out:
/* Sync changes in local quota file into global quota file and
* reinitialize local quota file.
- * The function expects local quota file to be already locked and
- * s_umount locked in shared mode. */
+ * The function expects local quota file to be already locked. */
static int ocfs2_recover_local_quota_file(struct inode *lqinode,
int type,
struct ocfs2_quota_recovery *rec)
@@ -585,7 +584,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
{
unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
LOCAL_GROUP_QUOTA_SYSTEM_INODE };
- struct super_block *sb = osb->sb;
struct ocfs2_local_disk_dqinfo *ldinfo;
struct buffer_head *bh;
handle_t *handle;
@@ -597,7 +595,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for "
"slot %u\n", osb->dev_str, slot_num);
- down_read(&sb->s_umount);
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
if (list_empty(&(rec->r_list[type])))
continue;
@@ -674,8 +671,7 @@ out_put:
break;
}
out:
- up_read(&sb->s_umount);
- kfree(rec);
+ ocfs2_free_quota_recovery(rec);
return status;
}
@@ -840,8 +836,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
/*
- * s_umount held in exclusive mode protects us against racing with
- * recovery thread...
+ * ocfs2_dismount_volume() has already aborted quota recovery...
*/
if (oinfo->dqi_rec) {
ocfs2_free_quota_recovery(oinfo->dqi_rec);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 84fa585c6513..e585e77cdc88 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1870,6 +1870,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
/* Orphan scan should be stopped as early as possible */
ocfs2_orphan_scan_stop(osb);
+ /* Stop quota recovery so that we can disable quotas */
+ ocfs2_recovery_disable_quota(osb);
+
ocfs2_disable_quotas(osb);
/* All dquots should be freed by now */
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 085912268442..dd4dc70e4aaa 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -23,9 +23,9 @@ static int orangefs_writepage_locked(struct page *page,
struct orangefs_write_range *wr = NULL;
struct iov_iter iter;
struct bio_vec bv;
- size_t len, wlen;
+ size_t wlen;
ssize_t ret;
- loff_t off;
+ loff_t len, off;
set_page_writeback(page);
@@ -92,8 +92,7 @@ static int orangefs_writepages_work(struct orangefs_writepages *ow,
struct orangefs_write_range *wrp, wr;
struct iov_iter iter;
ssize_t ret;
- size_t len;
- loff_t off;
+ loff_t len, off;
int i;
len = i_size_read(inode);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 981967e507b3..23b5c5f9c782 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -506,8 +506,6 @@ int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d,
bool ovl_is_metacopy_dentry(struct dentry *dentry);
char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding);
int ovl_ensure_verity_loaded(struct path *path);
-int ovl_get_verity_xattr(struct ovl_fs *ofs, const struct path *path,
- u8 *digest_buf, int *buf_length);
int ovl_validate_verity(struct ovl_fs *ofs,
struct path *metapath,
struct path *datapath);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 2c056d737c27..93ee57bc82ad 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1180,6 +1180,11 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
return ERR_PTR(-EINVAL);
}
+ if (ctx->nr == ctx->nr_data) {
+ pr_err("at least one non-data lowerdir is required\n");
+ return ERR_PTR(-EINVAL);
+ }
+
err = -EINVAL;
for (i = 0; i < ctx->nr; i++) {
l = &ctx->lower[i];
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 91fe20b7657c..d444155581ca 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -416,7 +416,7 @@ static const struct file_operations proc_pid_cmdline_ops = {
#ifdef CONFIG_KALLSYMS
/*
* Provides a wchan file via kallsyms in a proper one-value-per-file format.
- * Returns the resolved symbol. If that fails, simply return the address.
+ * Returns the resolved symbol to user space.
*/
static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 775ce0bcf08c..c8785d68e870 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -557,10 +557,16 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
return p;
}
-static inline void pde_set_flags(struct proc_dir_entry *pde)
+static void pde_set_flags(struct proc_dir_entry *pde)
{
if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT)
pde->flags |= PROC_ENTRY_PERMANENT;
+ if (pde->proc_ops->proc_read_iter)
+ pde->flags |= PROC_ENTRY_proc_read_iter;
+#ifdef CONFIG_COMPAT
+ if (pde->proc_ops->proc_compat_ioctl)
+ pde->flags |= PROC_ENTRY_proc_compat_ioctl;
+#endif
}
struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
@@ -624,6 +630,7 @@ struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
p->proc_ops = &proc_seq_ops;
p->seq_ops = ops;
p->state_size = state_size;
+ pde_set_flags(p);
return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_seq_private);
@@ -654,6 +661,7 @@ struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
return NULL;
p->proc_ops = &proc_single_ops;
p->single_show = show;
+ pde_set_flags(p);
return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_single_data);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 532dc9d240f7..897c71077a0f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -679,13 +679,13 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
if (S_ISREG(inode->i_mode)) {
inode->i_op = de->proc_iops;
- if (de->proc_ops->proc_read_iter)
+ if (pde_has_proc_read_iter(de))
inode->i_fop = &proc_iter_file_ops;
else
inode->i_fop = &proc_reg_file_ops;
#ifdef CONFIG_COMPAT
- if (de->proc_ops->proc_compat_ioctl) {
- if (de->proc_ops->proc_read_iter)
+ if (pde_has_proc_compat_ioctl(de)) {
+ if (pde_has_proc_read_iter(de))
inode->i_fop = &proc_iter_file_ops_compat;
else
inode->i_fop = &proc_reg_file_ops_compat;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 9a8f32f21ff5..445c74a39a93 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -84,6 +84,20 @@ static inline void pde_make_permanent(struct proc_dir_entry *pde)
pde->flags |= PROC_ENTRY_PERMANENT;
}
+static inline bool pde_has_proc_read_iter(const struct proc_dir_entry *pde)
+{
+ return pde->flags & PROC_ENTRY_proc_read_iter;
+}
+
+static inline bool pde_has_proc_compat_ioctl(const struct proc_dir_entry *pde)
+{
+#ifdef CONFIG_COMPAT
+ return pde->flags & PROC_ENTRY_proc_compat_ioctl;
+#else
+ return false;
+#endif
+}
+
extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 7dbbf3b6d98d..a1c97cd2720a 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -264,7 +264,7 @@ static void parse_options(char *options)
static int pstore_show_options(struct seq_file *m, struct dentry *root)
{
if (kmsg_bytes != CONFIG_PSTORE_DEFAULT_KMSG_BYTES)
- seq_printf(m, ",kmsg_bytes=%lu", kmsg_bytes);
+ seq_printf(m, ",kmsg_bytes=%u", kmsg_bytes);
return 0;
}
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 801d6c0b170c..a0fc51196910 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -6,7 +6,7 @@
#include <linux/time.h>
#include <linux/pstore.h>
-extern unsigned long kmsg_bytes;
+extern unsigned int kmsg_bytes;
#ifdef CONFIG_PSTORE_FTRACE
extern void pstore_register_ftrace(void);
@@ -35,7 +35,7 @@ static inline void pstore_unregister_pmsg(void) {}
extern struct pstore_info *psinfo;
-extern void pstore_set_kmsg_bytes(int);
+extern void pstore_set_kmsg_bytes(unsigned int bytes);
extern void pstore_get_records(int);
extern void pstore_get_backend_records(struct pstore_info *psi,
struct dentry *root, int quiet);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 03425928d2fb..ef62389212b6 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -92,8 +92,8 @@ module_param(compress, charp, 0444);
MODULE_PARM_DESC(compress, "compression to use");
/* How much of the kernel log to snapshot */
-unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES;
-module_param(kmsg_bytes, ulong, 0444);
+unsigned int kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES;
+module_param(kmsg_bytes, uint, 0444);
MODULE_PARM_DESC(kmsg_bytes, "amount of kernel log to snapshot (in bytes)");
static void *compress_workspace;
@@ -107,9 +107,9 @@ static void *compress_workspace;
static char *big_oops_buf;
static size_t max_compressed_size;
-void pstore_set_kmsg_bytes(int bytes)
+void pstore_set_kmsg_bytes(unsigned int bytes)
{
- kmsg_bytes = bytes;
+ WRITE_ONCE(kmsg_bytes, bytes);
}
/* Tag each group of saved records with a sequence number */
@@ -278,6 +278,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason)
{
struct kmsg_dump_iter iter;
+ unsigned int remaining = READ_ONCE(kmsg_bytes);
unsigned long total = 0;
const char *why;
unsigned int part = 1;
@@ -300,7 +301,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
kmsg_dump_rewind(&iter);
oopscount++;
- while (total < kmsg_bytes) {
+ while (total < remaining) {
char *dst;
size_t dst_size;
int header_size;
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 9c0ef4195b58..749794667295 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -29,7 +29,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
{
struct cached_fid *cfid;
- spin_lock(&cfids->cfid_list_lock);
list_for_each_entry(cfid, &cfids->entries, entry) {
if (!strcmp(cfid->path, path)) {
/*
@@ -38,25 +37,20 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
* being deleted due to a lease break.
*/
if (!cfid->time || !cfid->has_lease) {
- spin_unlock(&cfids->cfid_list_lock);
return NULL;
}
kref_get(&cfid->refcount);
- spin_unlock(&cfids->cfid_list_lock);
return cfid;
}
}
if (lookup_only) {
- spin_unlock(&cfids->cfid_list_lock);
return NULL;
}
if (cfids->num_entries >= max_cached_dirs) {
- spin_unlock(&cfids->cfid_list_lock);
return NULL;
}
cfid = init_cached_dir(path);
if (cfid == NULL) {
- spin_unlock(&cfids->cfid_list_lock);
return NULL;
}
cfid->cfids = cfids;
@@ -74,7 +68,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
*/
cfid->has_lease = true;
- spin_unlock(&cfids->cfid_list_lock);
return cfid;
}
@@ -185,8 +178,10 @@ replay_again:
if (!utf16_path)
return -ENOMEM;
+ spin_lock(&cfids->cfid_list_lock);
cfid = find_or_create_cached_dir(cfids, path, lookup_only, tcon->max_cached_dirs);
if (cfid == NULL) {
+ spin_unlock(&cfids->cfid_list_lock);
kfree(utf16_path);
return -ENOENT;
}
@@ -195,7 +190,6 @@ replay_again:
* Otherwise, it is either a new entry or laundromat worker removed it
* from @cfids->entries. Caller will put last reference if the latter.
*/
- spin_lock(&cfids->cfid_list_lock);
if (cfid->has_lease && cfid->time) {
spin_unlock(&cfids->cfid_list_lock);
*ret_cfid = cfid;
diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c
index 1fc1683b15bd..bf32bc22ebd6 100644
--- a/fs/smb/client/cifsacl.c
+++ b/fs/smb/client/cifsacl.c
@@ -778,7 +778,8 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl,
}
/* validate that we do not go past end of acl */
- if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
+ if (end_of_acl < (char *)pdacl + sizeof(struct smb_acl) ||
+ end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
cifs_dbg(VFS, "ACL too small to parse DACL\n");
return;
}
@@ -799,15 +800,34 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl,
if (num_aces > 0) {
umode_t denied_mode = 0;
- if (num_aces > ULONG_MAX / sizeof(struct smb_ace *))
+ if (num_aces > (le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) /
+ (offsetof(struct smb_ace, sid) +
+ offsetof(struct smb_sid, sub_auth) + sizeof(__le16)))
return;
+
ppace = kmalloc_array(num_aces, sizeof(struct smb_ace *),
GFP_KERNEL);
if (!ppace)
return;
for (i = 0; i < num_aces; ++i) {
+ if (end_of_acl - acl_base < acl_size)
+ break;
+
ppace[i] = (struct smb_ace *) (acl_base + acl_size);
+ acl_base = (char *)ppace[i];
+ acl_size = offsetof(struct smb_ace, sid) +
+ offsetof(struct smb_sid, sub_auth);
+
+ if (end_of_acl - acl_base < acl_size ||
+ ppace[i]->sid.num_subauth == 0 ||
+ ppace[i]->sid.num_subauth > SID_MAX_SUB_AUTHORITIES ||
+ (end_of_acl - acl_base <
+ acl_size + sizeof(__le32) * ppace[i]->sid.num_subauth) ||
+ (le16_to_cpu(ppace[i]->size) <
+ acl_size + sizeof(__le32) * ppace[i]->sid.num_subauth))
+ break;
+
#ifdef CONFIG_CIFS_DEBUG2
dump_ace(ppace[i], end_of_acl);
#endif
@@ -851,7 +871,6 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl,
(void *)ppace[i],
sizeof(struct smb_ace)); */
- acl_base = (char *)ppace[i];
acl_size = le16_to_cpu(ppace[i]->size);
}
diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index c46d418c1c0c..ca33f6cd6a80 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -1226,10 +1226,9 @@ typedef struct smb_com_query_information_rsp {
typedef struct smb_com_setattr_req {
struct smb_hdr hdr; /* wct = 8 */
__le16 attr;
- __le16 time_low;
- __le16 time_high;
+ __le32 last_write_time;
__le16 reserved[5]; /* must be zero */
- __u16 ByteCount;
+ __le16 ByteCount;
__u8 BufferFormat; /* 4 = ASCII */
unsigned char fileName[];
} __attribute__((packed)) SETATTR_REQ;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 85b0a30493a6..c6d325666b5c 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -31,6 +31,9 @@ extern void cifs_small_buf_release(void *);
extern void free_rsp_buf(int, void *);
extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *,
unsigned int /* length */);
+extern int smb_send_kvec(struct TCP_Server_Info *server,
+ struct msghdr *msg,
+ size_t *sent);
extern unsigned int _get_xid(void);
extern void _free_xid(unsigned int);
#define get_xid() \
@@ -158,6 +161,8 @@ extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
struct cifsFileInfo **ret_file);
+extern int cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode,
+ struct file *file);
extern unsigned int smbCalcSize(void *buf);
extern int decode_negTokenInit(unsigned char *security_blob, int length,
struct TCP_Server_Info *server);
@@ -393,6 +398,10 @@ extern int CIFSSMBQFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon);
extern int CIFSSMBQFSPosixInfo(const unsigned int xid, struct cifs_tcon *tcon,
struct kstatfs *FSData);
+extern int SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon,
+ const char *fileName, __le32 attributes, __le64 write_time,
+ const struct nls_table *nls_codepage,
+ struct cifs_sb_info *cifs_sb);
extern int CIFSSMBSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon,
const char *fileName, const FILE_BASIC_INFO *data,
const struct nls_table *nls_codepage,
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 769950adb776..c36ab20050c1 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -2738,10 +2738,10 @@ int cifs_query_reparse_point(const unsigned int xid,
io_req->TotalParameterCount = 0;
io_req->TotalDataCount = 0;
- io_req->MaxParameterCount = cpu_to_le32(2);
+ io_req->MaxParameterCount = cpu_to_le32(0);
/* BB find exact data count max from sess structure BB */
io_req->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00);
- io_req->MaxSetupCount = 4;
+ io_req->MaxSetupCount = 1;
io_req->Reserved = 0;
io_req->ParameterOffset = 0;
io_req->DataCount = 0;
@@ -2768,6 +2768,22 @@ int cifs_query_reparse_point(const unsigned int xid,
goto error;
}
+ /* SetupCount must be 1, otherwise offset to ByteCount is incorrect. */
+ if (io_rsp->SetupCount != 1) {
+ rc = -EIO;
+ goto error;
+ }
+
+ /*
+ * ReturnedDataLen is output length of executed IOCTL.
+ * DataCount is output length transferred over network.
+ * Check that we have full FSCTL_GET_REPARSE_POINT buffer.
+ */
+ if (data_count != le16_to_cpu(io_rsp->ReturnedDataLen)) {
+ rc = -EIO;
+ goto error;
+ }
+
end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount;
start = (__u8 *)&io_rsp->hdr.Protocol + data_offset;
if (start >= end) {
@@ -5157,6 +5173,63 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
+int
+SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon,
+ const char *fileName, __le32 attributes, __le64 write_time,
+ const struct nls_table *nls_codepage,
+ struct cifs_sb_info *cifs_sb)
+{
+ SETATTR_REQ *pSMB;
+ SETATTR_RSP *pSMBr;
+ struct timespec64 ts;
+ int bytes_returned;
+ int name_len;
+ int rc;
+
+ cifs_dbg(FYI, "In %s path %s\n", __func__, fileName);
+
+retry:
+ rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB,
+ (void **) &pSMBr);
+ if (rc)
+ return rc;
+
+ if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
+ name_len =
+ cifsConvertToUTF16((__le16 *) pSMB->fileName,
+ fileName, PATH_MAX, nls_codepage,
+ cifs_remap(cifs_sb));
+ name_len++; /* trailing null */
+ name_len *= 2;
+ } else {
+ name_len = copy_path_name(pSMB->fileName, fileName);
+ }
+ /* Only few attributes can be set by this command, others are not accepted by Win9x. */
+ pSMB->attr = cpu_to_le16(le32_to_cpu(attributes) &
+ (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | ATTR_ARCHIVE));
+ /* Zero write time value (in both NT and SETATTR formats) means to not change it. */
+ if (le64_to_cpu(write_time) != 0) {
+ ts = cifs_NTtimeToUnix(write_time);
+ pSMB->last_write_time = cpu_to_le32(ts.tv_sec);
+ }
+ pSMB->BufferFormat = 0x04;
+ name_len++; /* account for buffer type byte */
+ inc_rfc1001_len(pSMB, (__u16)name_len);
+ pSMB->ByteCount = cpu_to_le16(name_len);
+
+ rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
+ (struct smb_hdr *) pSMBr, &bytes_returned, 0);
+ if (rc)
+ cifs_dbg(FYI, "Send error in %s = %d\n", __func__, rc);
+
+ cifs_buf_release(pSMB);
+
+ if (rc == -EAGAIN)
+ goto retry;
+
+ return rc;
+}
+
/* Some legacy servers such as NT4 require that the file times be set on
an open handle, rather than by pathname - this is awkward due to
potential access conflicts on the open, but it is unavoidable for these
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 198681d14153..3faaee33ad45 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -1003,13 +1003,9 @@ clean_demultiplex_info(struct TCP_Server_Info *server)
msleep(125);
if (cifs_rdma_enabled(server))
smbd_destroy(server);
-
if (server->ssocket) {
sock_release(server->ssocket);
server->ssocket = NULL;
-
- /* Release netns reference for the socket. */
- put_net(cifs_net_ns(server));
}
if (!list_empty(&server->pending_mid_q)) {
@@ -1058,7 +1054,6 @@ clean_demultiplex_info(struct TCP_Server_Info *server)
*/
}
- /* Release netns reference for this server. */
put_net(cifs_net_ns(server));
kfree(server->leaf_fullpath);
kfree(server->hostname);
@@ -1730,8 +1725,6 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx,
tcp_ses->ops = ctx->ops;
tcp_ses->vals = ctx->vals;
-
- /* Grab netns reference for this server. */
cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId);
@@ -1863,7 +1856,6 @@ smbd_connected:
out_err_crypto_release:
cifs_crypto_secmech_release(tcp_ses);
- /* Release netns reference for this server. */
put_net(cifs_net_ns(tcp_ses));
out_err:
@@ -1872,10 +1864,8 @@ out_err:
cifs_put_tcp_session(tcp_ses->primary_server, false);
kfree(tcp_ses->hostname);
kfree(tcp_ses->leaf_fullpath);
- if (tcp_ses->ssocket) {
+ if (tcp_ses->ssocket)
sock_release(tcp_ses->ssocket);
- put_net(cifs_net_ns(tcp_ses));
- }
kfree(tcp_ses);
}
return ERR_PTR(rc);
@@ -2487,6 +2477,8 @@ static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
return 0;
if (tcon->nodelete != ctx->nodelete)
return 0;
+ if (tcon->posix_extensions != ctx->linux_ext)
+ return 0;
return 1;
}
@@ -3059,8 +3051,10 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
* sessinit is sent but no second negprot
*/
struct rfc1002_session_packet req = {};
- struct smb_hdr *smb_buf = (struct smb_hdr *)&req;
+ struct msghdr msg = {};
+ struct kvec iov = {};
unsigned int len;
+ size_t sent;
req.trailer.session_req.called_len = sizeof(req.trailer.session_req.called_name);
@@ -3089,10 +3083,18 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
* As per rfc1002, @len must be the number of bytes that follows the
* length field of a rfc1002 session request payload.
*/
- len = sizeof(req) - offsetof(struct rfc1002_session_packet, trailer.session_req);
+ len = sizeof(req.trailer.session_req);
+ req.type = RFC1002_SESSION_REQUEST;
+ req.flags = 0;
+ req.length = cpu_to_be16(len);
+ len += offsetof(typeof(req), trailer.session_req);
+ iov.iov_base = &req;
+ iov.iov_len = len;
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, len);
+ rc = smb_send_kvec(server, &msg, &sent);
+ if (rc < 0 || len != sent)
+ return (rc == -EINTR || rc == -EAGAIN) ? rc : -ECONNABORTED;
- smb_buf->smb_buf_length = cpu_to_be32((RFC1002_SESSION_REQUEST << 24) | len);
- rc = smb_send(server, smb_buf, len);
/*
* RFC1001 layer in at least one server requires very short break before
* negprot presumably because not expecting negprot to follow so fast.
@@ -3101,7 +3103,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
*/
usleep_range(1000, 2000);
- return rc;
+ return 0;
}
static int
@@ -3137,20 +3139,20 @@ generic_ip_connect(struct TCP_Server_Info *server)
socket = server->ssocket;
} else {
struct net *net = cifs_net_ns(server);
+ struct sock *sk;
- rc = sock_create_kern(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket);
+ rc = __sock_create(net, sfamily, SOCK_STREAM,
+ IPPROTO_TCP, &server->ssocket, 1);
if (rc < 0) {
cifs_server_dbg(VFS, "Error %d creating socket\n", rc);
return rc;
}
- /*
- * Grab netns reference for the socket.
- *
- * It'll be released here, on error, or in clean_demultiplex_info() upon server
- * teardown.
- */
- get_net(net);
+ sk = server->ssocket->sk;
+ __netns_tracker_free(net, &sk->ns_tracker, false);
+ sk->sk_net_refcnt = 1;
+ get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
/* BB other socket options to set KEEPALIVE, NODELAY? */
cifs_dbg(FYI, "Socket created\n");
@@ -3164,10 +3166,8 @@ generic_ip_connect(struct TCP_Server_Info *server)
}
rc = bind_socket(server);
- if (rc < 0) {
- put_net(cifs_net_ns(server));
+ if (rc < 0)
return rc;
- }
/*
* Eventually check for other socket options to change from
@@ -3204,7 +3204,6 @@ generic_ip_connect(struct TCP_Server_Info *server)
if (rc < 0) {
cifs_dbg(FYI, "Error %d connecting to server\n", rc);
trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc);
- put_net(cifs_net_ns(server));
sock_release(socket);
server->ssocket = NULL;
return rc;
@@ -3213,9 +3212,6 @@ generic_ip_connect(struct TCP_Server_Info *server)
if (sport == htons(RFC1001_PORT))
rc = ip_rfc1001_connect(server);
- if (rc < 0)
- put_net(cifs_net_ns(server));
-
return rc;
}
@@ -3960,11 +3956,13 @@ int
cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses,
struct TCP_Server_Info *server)
{
+ bool in_retry = false;
int rc = 0;
if (!server->ops->need_neg || !server->ops->negotiate)
return -ENOSYS;
+retry:
/* only send once per connect */
spin_lock(&server->srv_lock);
if (server->tcpStatus != CifsGood &&
@@ -3984,6 +3982,14 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses,
spin_unlock(&server->srv_lock);
rc = server->ops->negotiate(xid, ses, server);
+ if (rc == -EAGAIN) {
+ /* Allow one retry attempt */
+ if (!in_retry) {
+ in_retry = true;
+ goto retry;
+ }
+ rc = -EHOSTDOWN;
+ }
if (rc == 0) {
spin_lock(&server->srv_lock);
if (server->tcpStatus == CifsInNegotiate)
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index cb75b95efb70..d883ed75022c 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -816,6 +816,11 @@ int cifs_open(struct inode *inode, struct file *file)
} else {
_cifsFileInfo_put(cfile, true, false);
}
+ } else {
+ /* hard link on the defeered close file */
+ rc = cifs_get_hardlink_path(tcon, inode, file);
+ if (rc)
+ cifs_close_deferred_file(CIFS_I(inode));
}
if (server->oplocks)
@@ -1878,6 +1883,29 @@ cifs_move_llist(struct list_head *source, struct list_head *dest)
list_move(li, dest);
}
+int
+cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode,
+ struct file *file)
+{
+ struct cifsFileInfo *open_file = NULL;
+ struct cifsInodeInfo *cinode = CIFS_I(inode);
+ int rc = 0;
+
+ spin_lock(&tcon->open_file_lock);
+ spin_lock(&cinode->open_file_lock);
+
+ list_for_each_entry(open_file, &cinode->openFileList, flist) {
+ if (file->f_flags == open_file->f_flags) {
+ rc = -EINVAL;
+ break;
+ }
+ }
+
+ spin_unlock(&cinode->open_file_lock);
+ spin_unlock(&tcon->open_file_lock);
+ return rc;
+}
+
void
cifs_free_llist(struct list_head *llist)
{
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index b90cc918de7a..137d03781d52 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -1249,6 +1249,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
case Opt_rsize:
ctx->rsize = result.uint_32;
ctx->got_rsize = true;
+ ctx->vol_rsize = ctx->rsize;
break;
case Opt_wsize:
ctx->wsize = result.uint_32;
@@ -1264,6 +1265,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->wsize, PAGE_SIZE);
}
}
+ ctx->vol_wsize = ctx->wsize;
break;
case Opt_acregmax:
if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) {
@@ -1299,6 +1301,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->closetimeo = HZ * result.uint_32;
break;
case Opt_echo_interval:
+ if (result.uint_32 < SMB_ECHO_INTERVAL_MIN ||
+ result.uint_32 > SMB_ECHO_INTERVAL_MAX) {
+ cifs_errorf(fc, "echo interval is out of bounds\n");
+ goto cifs_parse_mount_err;
+ }
ctx->echo_interval = result.uint_32;
break;
case Opt_snapshot:
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index bbd2063ab838..d0a2043ea446 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -253,6 +253,9 @@ struct smb3_fs_context {
bool use_client_guid:1;
/* reuse existing guid for multichannel */
u8 client_guid[SMB2_CLIENT_GUID_SIZE];
+ /* User-specified original r/wsize value */
+ unsigned int vol_rsize;
+ unsigned int vol_wsize;
unsigned int bsize;
unsigned int rasize;
unsigned int rsize;
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index b9cf05e0940d..d93ebd58ecae 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1145,6 +1145,16 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
cifs_create_junction_fattr(fattr, sb);
goto out;
}
+ /*
+ * If the reparse point is unsupported by the Linux SMB
+ * client then let it process by the SMB server. So mask
+ * the -EOPNOTSUPP error code. This will allow Linux SMB
+ * client to send SMB OPEN request to server. If server
+ * does not support this reparse point too then server
+ * will return error during open the path.
+ */
+ if (rc == -EOPNOTSUPP)
+ rc = 0;
}
break;
}
diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c
index d86da949a919..007da0a699cb 100644
--- a/fs/smb/client/link.c
+++ b/fs/smb/client/link.c
@@ -257,7 +257,7 @@ cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_open_parms oparms;
struct cifs_io_parms io_parms = {0};
int buf_type = CIFS_NO_BUFFER;
- FILE_ALL_INFO file_info;
+ struct cifs_open_info_data query_data;
oparms = (struct cifs_open_parms) {
.tcon = tcon,
@@ -269,11 +269,11 @@ cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
.fid = &fid,
};
- rc = CIFS_open(xid, &oparms, &oplock, &file_info);
+ rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &query_data);
if (rc)
return rc;
- if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
+ if (query_data.fi.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
rc = -ENOENT;
/* it's not a symlink */
goto out;
@@ -312,7 +312,7 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
.fid = &fid,
};
- rc = CIFS_open(xid, &oparms, &oplock, NULL);
+ rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, NULL);
if (rc)
return rc;
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index 75929a0a56f9..e616be8196de 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -733,7 +733,10 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
else
cifs_buf_release(cfile->srch_inf.
ntwrk_buf_start);
+ /* Reset all pointers to the network buffer to prevent stale references */
cfile->srch_inf.ntwrk_buf_start = NULL;
+ cfile->srch_inf.srch_entries_start = NULL;
+ cfile->srch_inf.last_entry = NULL;
}
rc = initiate_cifs_search(xid, file, full_path);
if (rc) {
@@ -756,11 +759,11 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
rc = server->ops->query_dir_next(xid, tcon, &cfile->fid,
search_flags,
&cfile->srch_inf);
+ if (rc)
+ return -ENOENT;
/* FindFirst/Next set last_entry to NULL on malformed reply */
if (cfile->srch_inf.last_entry)
cifs_save_resume_key(cfile->srch_inf.last_entry, cfile);
- if (rc)
- return -ENOENT;
}
if (index_to_find < cfile->srch_inf.index_of_last_entry) {
/* we found the buffer that contains the entry */
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index bb246ef0458f..b6556fe3dfa1 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -633,8 +633,6 @@ int parse_reparse_point(struct reparse_data_buffer *buf,
const char *full_path,
bool unicode, struct cifs_open_info_data *data)
{
- struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
-
data->reparse.buf = buf;
/* See MS-FSCC 2.1.2 */
@@ -658,8 +656,6 @@ int parse_reparse_point(struct reparse_data_buffer *buf,
}
return 0;
default:
- cifs_tcon_dbg(VFS | ONCE, "unhandled reparse tag: 0x%08x\n",
- le32_to_cpu(buf->ReparseTag));
return -EOPNOTSUPP;
}
}
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index c2a98b273664..f04922eb45d4 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -732,6 +732,22 @@ unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp)
*pbcc_area = bcc_ptr;
}
+static void
+ascii_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp)
+{
+ char *bcc_ptr = *pbcc_area;
+
+ strcpy(bcc_ptr, "Linux version ");
+ bcc_ptr += strlen("Linux version ");
+ strcpy(bcc_ptr, init_utsname()->release);
+ bcc_ptr += strlen(init_utsname()->release) + 1;
+
+ strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
+ bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
+
+ *pbcc_area = bcc_ptr;
+}
+
static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses,
const struct nls_table *nls_cp)
{
@@ -756,6 +772,25 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses,
*pbcc_area = bcc_ptr;
}
+static void ascii_domain_string(char **pbcc_area, struct cifs_ses *ses,
+ const struct nls_table *nls_cp)
+{
+ char *bcc_ptr = *pbcc_area;
+ int len;
+
+ /* copy domain */
+ if (ses->domainName != NULL) {
+ len = strscpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+ if (WARN_ON_ONCE(len < 0))
+ len = CIFS_MAX_DOMAINNAME_LEN - 1;
+ bcc_ptr += len;
+ } /* else we send a null domain name so server will default to its own domain */
+ *bcc_ptr = 0;
+ bcc_ptr++;
+
+ *pbcc_area = bcc_ptr;
+}
+
static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
const struct nls_table *nls_cp)
{
@@ -801,25 +836,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
*bcc_ptr = 0;
bcc_ptr++; /* account for null termination */
- /* copy domain */
- if (ses->domainName != NULL) {
- len = strscpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
- if (WARN_ON_ONCE(len < 0))
- len = CIFS_MAX_DOMAINNAME_LEN - 1;
- bcc_ptr += len;
- } /* else we send a null domain name so server will default to its own domain */
- *bcc_ptr = 0;
- bcc_ptr++;
-
/* BB check for overflow here */
- strcpy(bcc_ptr, "Linux version ");
- bcc_ptr += strlen("Linux version ");
- strcpy(bcc_ptr, init_utsname()->release);
- bcc_ptr += strlen(init_utsname()->release) + 1;
-
- strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
- bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
+ ascii_domain_string(&bcc_ptr, ses, nls_cp);
+ ascii_oslm_strings(&bcc_ptr, nls_cp);
*pbcc_area = bcc_ptr;
}
@@ -1622,7 +1642,7 @@ sess_auth_kerberos(struct sess_data *sess_data)
sess_data->iov[1].iov_len = msg->secblob_len;
pSMB->req.SecurityBlobLength = cpu_to_le16(sess_data->iov[1].iov_len);
- if (ses->capabilities & CAP_UNICODE) {
+ if (pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) {
/* unicode strings must be word aligned */
if (!IS_ALIGNED(sess_data->iov[0].iov_len + sess_data->iov[1].iov_len, 2)) {
*bcc_ptr = 0;
@@ -1631,8 +1651,8 @@ sess_auth_kerberos(struct sess_data *sess_data)
unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
unicode_domain_string(&bcc_ptr, ses, sess_data->nls_cp);
} else {
- /* BB: is this right? */
- ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
+ ascii_oslm_strings(&bcc_ptr, sess_data->nls_cp);
+ ascii_domain_string(&bcc_ptr, ses, sess_data->nls_cp);
}
sess_data->iov[2].iov_len = (long) bcc_ptr -
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index bc1bac36c1b2..e62d9cc592e0 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -426,13 +426,6 @@ cifs_negotiate(const unsigned int xid,
{
int rc;
rc = CIFSSMBNegotiate(xid, ses, server);
- if (rc == -EAGAIN) {
- /* retry only once on 1st time connection */
- set_credits(server, 1);
- rc = CIFSSMBNegotiate(xid, ses, server);
- if (rc == -EAGAIN)
- rc = -EHOSTDOWN;
- }
return rc;
}
@@ -444,8 +437,8 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
unsigned int wsize;
/* start with specified wsize, or default */
- if (ctx->wsize)
- wsize = ctx->wsize;
+ if (ctx->got_wsize)
+ wsize = ctx->vol_wsize;
else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
wsize = CIFS_DEFAULT_IOSIZE;
else
@@ -497,7 +490,7 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
else
defsize = server->maxBuf - sizeof(READ_RSP);
- rsize = ctx->rsize ? ctx->rsize : defsize;
+ rsize = ctx->got_rsize ? ctx->vol_rsize : defsize;
/*
* no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to
@@ -548,24 +541,104 @@ static int cifs_query_path_info(const unsigned int xid,
const char *full_path,
struct cifs_open_info_data *data)
{
- int rc;
+ int rc = -EOPNOTSUPP;
FILE_ALL_INFO fi = {};
+ struct cifs_search_info search_info = {};
+ bool non_unicode_wildcard = false;
data->symlink = false;
data->adjust_tz = false;
- /* could do find first instead but this returns more info */
- rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, cifs_sb->local_nls,
- cifs_remap(cifs_sb));
/*
- * BB optimize code so we do not make the above call when server claims
- * no NT SMB support and the above call failed at least once - set flag
- * in tcon or mount.
+ * First try CIFSSMBQPathInfo() function which returns more info
+ * (NumberOfLinks) than CIFSFindFirst() fallback function.
+ * Some servers like Win9x do not support SMB_QUERY_FILE_ALL_INFO over
+ * TRANS2_QUERY_PATH_INFORMATION, but supports it with filehandle over
+ * TRANS2_QUERY_FILE_INFORMATION (function CIFSSMBQFileInfo(). But SMB
+ * Open command on non-NT servers works only for files, does not work
+ * for directories. And moreover Win9x SMB server returns bogus data in
+ * SMB_QUERY_FILE_ALL_INFO Attributes field. So for non-NT servers,
+ * do not even use CIFSSMBQPathInfo() or CIFSSMBQFileInfo() function.
*/
- if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) {
+ if (tcon->ses->capabilities & CAP_NT_SMBS)
+ rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */,
+ cifs_sb->local_nls, cifs_remap(cifs_sb));
+
+ /*
+ * Non-UNICODE variant of fallback functions below expands wildcards,
+ * so they cannot be used for querying paths with wildcard characters.
+ */
+ if (rc && !(tcon->ses->capabilities & CAP_UNICODE) && strpbrk(full_path, "*?\"><"))
+ non_unicode_wildcard = true;
+
+ /*
+ * Then fallback to CIFSFindFirst() which works also with non-NT servers
+ * but does not does not provide NumberOfLinks.
+ */
+ if ((rc == -EOPNOTSUPP || rc == -EINVAL) &&
+ !non_unicode_wildcard) {
+ if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find))
+ search_info.info_level = SMB_FIND_FILE_INFO_STANDARD;
+ else
+ search_info.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO;
+ rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb, NULL,
+ CIFS_SEARCH_CLOSE_ALWAYS | CIFS_SEARCH_CLOSE_AT_END,
+ &search_info, false);
+ if (rc == 0) {
+ if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find)) {
+ FIND_FILE_STANDARD_INFO *di;
+ int offset = tcon->ses->server->timeAdj;
+
+ di = (FIND_FILE_STANDARD_INFO *)search_info.srch_entries_start;
+ fi.CreationTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm(
+ di->CreationDate, di->CreationTime, offset)));
+ fi.LastAccessTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm(
+ di->LastAccessDate, di->LastAccessTime, offset)));
+ fi.LastWriteTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm(
+ di->LastWriteDate, di->LastWriteTime, offset)));
+ fi.ChangeTime = fi.LastWriteTime;
+ fi.Attributes = cpu_to_le32(le16_to_cpu(di->Attributes));
+ fi.AllocationSize = cpu_to_le64(le32_to_cpu(di->AllocationSize));
+ fi.EndOfFile = cpu_to_le64(le32_to_cpu(di->DataSize));
+ } else {
+ FILE_FULL_DIRECTORY_INFO *di;
+
+ di = (FILE_FULL_DIRECTORY_INFO *)search_info.srch_entries_start;
+ fi.CreationTime = di->CreationTime;
+ fi.LastAccessTime = di->LastAccessTime;
+ fi.LastWriteTime = di->LastWriteTime;
+ fi.ChangeTime = di->ChangeTime;
+ fi.Attributes = di->ExtFileAttributes;
+ fi.AllocationSize = di->AllocationSize;
+ fi.EndOfFile = di->EndOfFile;
+ fi.EASize = di->EaSize;
+ }
+ fi.NumberOfLinks = cpu_to_le32(1);
+ fi.DeletePending = 0;
+ fi.Directory = !!(le32_to_cpu(fi.Attributes) & ATTR_DIRECTORY);
+ cifs_buf_release(search_info.ntwrk_buf_start);
+ } else if (!full_path[0]) {
+ /*
+ * CIFSFindFirst() does not work on root path if the
+ * root path was exported on the server from the top
+ * level path (drive letter).
+ */
+ rc = -EOPNOTSUPP;
+ }
+ }
+
+ /*
+ * If everything failed then fallback to the legacy SMB command
+ * SMB_COM_QUERY_INFORMATION which works with all servers, but
+ * provide just few information.
+ */
+ if ((rc == -EOPNOTSUPP || rc == -EINVAL) && !non_unicode_wildcard) {
rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls,
cifs_remap(cifs_sb));
data->adjust_tz = true;
+ } else if ((rc == -EOPNOTSUPP || rc == -EINVAL) && non_unicode_wildcard) {
+ /* Path with non-UNICODE wildcard character cannot exist. */
+ rc = -ENOENT;
}
if (!rc) {
@@ -597,6 +670,42 @@ static int cifs_query_path_info(const unsigned int xid,
CIFSSMBClose(xid, tcon, fid.netfid);
}
+#ifdef CONFIG_CIFS_XATTR
+ /*
+ * For WSL CHR and BLK reparse points it is required to fetch
+ * EA $LXDEV which contains major and minor device numbers.
+ */
+ if (!rc && data->reparse_point) {
+ struct smb2_file_full_ea_info *ea;
+
+ ea = (struct smb2_file_full_ea_info *)data->wsl.eas;
+ rc = CIFSSMBQAllEAs(xid, tcon, full_path, SMB2_WSL_XATTR_DEV,
+ &ea->ea_data[SMB2_WSL_XATTR_NAME_LEN + 1],
+ SMB2_WSL_XATTR_DEV_SIZE, cifs_sb);
+ if (rc == SMB2_WSL_XATTR_DEV_SIZE) {
+ ea->next_entry_offset = cpu_to_le32(0);
+ ea->flags = 0;
+ ea->ea_name_length = SMB2_WSL_XATTR_NAME_LEN;
+ ea->ea_value_length = cpu_to_le16(SMB2_WSL_XATTR_DEV_SIZE);
+ memcpy(&ea->ea_data[0], SMB2_WSL_XATTR_DEV, SMB2_WSL_XATTR_NAME_LEN + 1);
+ data->wsl.eas_len = sizeof(*ea) + SMB2_WSL_XATTR_NAME_LEN + 1 +
+ SMB2_WSL_XATTR_DEV_SIZE;
+ rc = 0;
+ } else if (rc >= 0) {
+ /* It is an error if EA $LXDEV has wrong size. */
+ rc = -EINVAL;
+ } else {
+ /*
+ * In all other cases ignore error if fetching
+ * of EA $LXDEV failed. It is needed only for
+ * WSL CHR and BLK reparse points and wsl_to_fattr()
+ * handle the case when EA is missing.
+ */
+ rc = 0;
+ }
+ }
+#endif
+
return rc;
}
@@ -626,6 +735,13 @@ static int cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc;
FILE_ALL_INFO fi = {};
+ /*
+ * CIFSSMBQFileInfo() for non-NT servers returns bogus data in
+ * Attributes fields. So do not use this command for non-NT servers.
+ */
+ if (!(tcon->ses->capabilities & CAP_NT_SMBS))
+ return -EOPNOTSUPP;
+
if (cfile->symlink_target) {
data->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL);
if (!data->symlink_target)
@@ -796,6 +912,9 @@ smb_set_file_info(struct inode *inode, const char *full_path,
struct cifs_fid fid;
struct cifs_open_parms oparms;
struct cifsFileInfo *open_file;
+ FILE_BASIC_INFO new_buf;
+ struct cifs_open_info_data query_data;
+ __le64 write_time = buf->LastWriteTime;
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct tcon_link *tlink = NULL;
@@ -803,20 +922,58 @@ smb_set_file_info(struct inode *inode, const char *full_path,
/* if the file is already open for write, just use that fileid */
open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY);
+
if (open_file) {
fid.netfid = open_file->fid.netfid;
netpid = open_file->pid;
tcon = tlink_tcon(open_file->tlink);
- goto set_via_filehandle;
+ } else {
+ tlink = cifs_sb_tlink(cifs_sb);
+ if (IS_ERR(tlink)) {
+ rc = PTR_ERR(tlink);
+ tlink = NULL;
+ goto out;
+ }
+ tcon = tlink_tcon(tlink);
}
- tlink = cifs_sb_tlink(cifs_sb);
- if (IS_ERR(tlink)) {
- rc = PTR_ERR(tlink);
- tlink = NULL;
- goto out;
+ /*
+ * Non-NT servers interprets zero time value in SMB_SET_FILE_BASIC_INFO
+ * over TRANS2_SET_FILE_INFORMATION as a valid time value. NT servers
+ * interprets zero time value as do not change existing value on server.
+ * API of ->set_file_info() callback expects that zero time value has
+ * the NT meaning - do not change. Therefore if server is non-NT and
+ * some time values in "buf" are zero, then fetch missing time values.
+ */
+ if (!(tcon->ses->capabilities & CAP_NT_SMBS) &&
+ (!buf->CreationTime || !buf->LastAccessTime ||
+ !buf->LastWriteTime || !buf->ChangeTime)) {
+ rc = cifs_query_path_info(xid, tcon, cifs_sb, full_path, &query_data);
+ if (rc) {
+ if (open_file) {
+ cifsFileInfo_put(open_file);
+ open_file = NULL;
+ }
+ goto out;
+ }
+ /*
+ * Original write_time from buf->LastWriteTime is preserved
+ * as SMBSetInformation() interprets zero as do not change.
+ */
+ new_buf = *buf;
+ buf = &new_buf;
+ if (!buf->CreationTime)
+ buf->CreationTime = query_data.fi.CreationTime;
+ if (!buf->LastAccessTime)
+ buf->LastAccessTime = query_data.fi.LastAccessTime;
+ if (!buf->LastWriteTime)
+ buf->LastWriteTime = query_data.fi.LastWriteTime;
+ if (!buf->ChangeTime)
+ buf->ChangeTime = query_data.fi.ChangeTime;
}
- tcon = tlink_tcon(tlink);
+
+ if (open_file)
+ goto set_via_filehandle;
rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, cifs_sb->local_nls,
cifs_sb);
@@ -837,8 +994,45 @@ smb_set_file_info(struct inode *inode, const char *full_path,
.fid = &fid,
};
- cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n");
- rc = CIFS_open(xid, &oparms, &oplock, NULL);
+ if (S_ISDIR(inode->i_mode) && !(tcon->ses->capabilities & CAP_NT_SMBS)) {
+ /* Opening directory path is not possible on non-NT servers. */
+ rc = -EOPNOTSUPP;
+ } else {
+ /*
+ * Use cifs_open_file() instead of CIFS_open() as the
+ * cifs_open_file() selects the correct function which
+ * works also on non-NT servers.
+ */
+ rc = cifs_open_file(xid, &oparms, &oplock, NULL);
+ /*
+ * Opening path for writing on non-NT servers is not
+ * possible when the read-only attribute is already set.
+ * Non-NT server in this case returns -EACCES. For those
+ * servers the only possible way how to clear the read-only
+ * bit is via SMB_COM_SETATTR command.
+ */
+ if (rc == -EACCES &&
+ (cinode->cifsAttrs & ATTR_READONLY) &&
+ le32_to_cpu(buf->Attributes) != 0 && /* 0 = do not change attrs */
+ !(le32_to_cpu(buf->Attributes) & ATTR_READONLY) &&
+ !(tcon->ses->capabilities & CAP_NT_SMBS))
+ rc = -EOPNOTSUPP;
+ }
+
+ /* Fallback to SMB_COM_SETATTR command when absolutelty needed. */
+ if (rc == -EOPNOTSUPP) {
+ cifs_dbg(FYI, "calling SetInformation since SetPathInfo for attrs/times not supported by this server\n");
+ rc = SMBSetInformation(xid, tcon, full_path,
+ buf->Attributes != 0 ? buf->Attributes : cpu_to_le32(cinode->cifsAttrs),
+ write_time,
+ cifs_sb->local_nls, cifs_sb);
+ if (rc == 0)
+ cinode->cifsAttrs = le32_to_cpu(buf->Attributes);
+ else
+ rc = -EACCES;
+ goto out;
+ }
+
if (rc != 0) {
if (rc == -EIO)
rc = -EINVAL;
@@ -846,6 +1040,7 @@ smb_set_file_info(struct inode *inode, const char *full_path,
}
netpid = current->tgid;
+ cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for attrs/times not supported by this server\n");
set_via_filehandle:
rc = CIFSSMBSetFileInfo(xid, tcon, buf, fid.netfid, netpid);
@@ -856,6 +1051,21 @@ set_via_filehandle:
CIFSSMBClose(xid, tcon, fid.netfid);
else
cifsFileInfo_put(open_file);
+
+ /*
+ * Setting the read-only bit is not honered on non-NT servers when done
+ * via open-semantics. So for setting it, use SMB_COM_SETATTR command.
+ * This command works only after the file is closed, so use it only when
+ * operation was called without the filehandle.
+ */
+ if (open_file == NULL &&
+ !(tcon->ses->capabilities & CAP_NT_SMBS) &&
+ le32_to_cpu(buf->Attributes) & ATTR_READONLY) {
+ SMBSetInformation(xid, tcon, full_path,
+ buf->Attributes,
+ 0 /* do not change write time */,
+ cifs_sb->local_nls, cifs_sb);
+ }
out:
if (tlink != NULL)
cifs_put_tlink(tlink);
diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c
index db9c807115c6..d7f2835e0b1c 100644
--- a/fs/smb/client/smb2file.c
+++ b/fs/smb/client/smb2file.c
@@ -107,16 +107,25 @@ int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32
int err_buftype = CIFS_NO_BUFFER;
struct cifs_fid *fid = oparms->fid;
struct network_resiliency_req nr_ioctl_req;
+ bool retry_without_read_attributes = false;
smb2_path = cifs_convert_path_to_utf16(oparms->path, oparms->cifs_sb);
if (smb2_path == NULL)
return -ENOMEM;
- oparms->desired_access |= FILE_READ_ATTRIBUTES;
+ if (!(oparms->desired_access & FILE_READ_ATTRIBUTES)) {
+ oparms->desired_access |= FILE_READ_ATTRIBUTES;
+ retry_without_read_attributes = true;
+ }
smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH;
rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL, &err_iov,
&err_buftype);
+ if (rc == -EACCES && retry_without_read_attributes) {
+ oparms->desired_access &= ~FILE_READ_ATTRIBUTES;
+ rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL, &err_iov,
+ &err_buftype);
+ }
if (rc && data) {
struct smb2_hdr *hdr = err_iov.iov_base;
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
index 677ef6f99a5b..fadc5fc274eb 100644
--- a/fs/smb/client/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
@@ -816,11 +816,12 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid,
WARN_ONCE(tcon->tc_count < 0, "tcon refcount is negative");
spin_unlock(&cifs_tcp_ses_lock);
- if (tcon->ses)
+ if (tcon->ses) {
server = tcon->ses->server;
-
- cifs_server_dbg(FYI, "tid=0x%x: tcon is closing, skipping async close retry of fid %llu %llu\n",
- tcon->tid, persistent_fid, volatile_fid);
+ cifs_server_dbg(FYI,
+ "tid=0x%x: tcon is closing, skipping async close retry of fid %llu %llu\n",
+ tcon->tid, persistent_fid, volatile_fid);
+ }
return 0;
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index b809a616728f..4e3eacbec96d 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -428,12 +428,20 @@ smb2_negotiate(const unsigned int xid,
server->CurrentMid = 0;
spin_unlock(&server->mid_lock);
rc = SMB2_negotiate(xid, ses, server);
- /* BB we probably don't need to retry with modern servers */
- if (rc == -EAGAIN)
- rc = -EHOSTDOWN;
return rc;
}
+static inline unsigned int
+prevent_zero_iosize(unsigned int size, const char *type)
+{
+ if (size == 0) {
+ cifs_dbg(VFS, "SMB: Zero %ssize calculated, using minimum value %u\n",
+ type, CIFS_MIN_DEFAULT_IOSIZE);
+ return CIFS_MIN_DEFAULT_IOSIZE;
+ }
+ return size;
+}
+
static unsigned int
smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
{
@@ -441,12 +449,12 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
unsigned int wsize;
/* start with specified wsize, or default */
- wsize = ctx->wsize ? ctx->wsize : CIFS_DEFAULT_IOSIZE;
+ wsize = ctx->got_wsize ? ctx->vol_wsize : CIFS_DEFAULT_IOSIZE;
wsize = min_t(unsigned int, wsize, server->max_write);
if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
- return wsize;
+ return prevent_zero_iosize(wsize, "w");
}
static unsigned int
@@ -456,7 +464,7 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
unsigned int wsize;
/* start with specified wsize, or default */
- wsize = ctx->wsize ? ctx->wsize : SMB3_DEFAULT_IOSIZE;
+ wsize = ctx->got_wsize ? ctx->vol_wsize : SMB3_DEFAULT_IOSIZE;
wsize = min_t(unsigned int, wsize, server->max_write);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (server->rdma) {
@@ -478,7 +486,7 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
- return wsize;
+ return prevent_zero_iosize(wsize, "w");
}
static unsigned int
@@ -488,13 +496,13 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
unsigned int rsize;
/* start with specified rsize, or default */
- rsize = ctx->rsize ? ctx->rsize : CIFS_DEFAULT_IOSIZE;
+ rsize = ctx->got_rsize ? ctx->vol_rsize : CIFS_DEFAULT_IOSIZE;
rsize = min_t(unsigned int, rsize, server->max_read);
if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
- return rsize;
+ return prevent_zero_iosize(rsize, "r");
}
static unsigned int
@@ -504,7 +512,7 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
unsigned int rsize;
/* start with specified rsize, or default */
- rsize = ctx->rsize ? ctx->rsize : SMB3_DEFAULT_IOSIZE;
+ rsize = ctx->got_rsize ? ctx->vol_rsize : SMB3_DEFAULT_IOSIZE;
rsize = min_t(unsigned int, rsize, server->max_read);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (server->rdma) {
@@ -527,7 +535,7 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
- return rsize;
+ return prevent_zero_iosize(rsize, "r");
}
/*
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 0af3535e08f3..3e88e8b3c16e 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -2932,6 +2932,7 @@ replay_again:
req->CreateContextsOffset = cpu_to_le32(
sizeof(struct smb2_create_req) +
iov[1].iov_len);
+ le32_add_cpu(&req->CreateContextsLength, iov[n_iov-1].iov_len);
pc_buf = iov[n_iov-1].iov_base;
}
@@ -2978,7 +2979,7 @@ replay_again:
/* Eventually save off posix specific response info and timestaps */
err_free_rsp_buf:
- free_rsp_buf(resp_buftype, rsp);
+ free_rsp_buf(resp_buftype, rsp_iov.iov_base);
kfree(pc_buf);
err_free_req:
cifs_small_buf_release(req);
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index ddf1a3aafee5..2269963e5008 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -178,7 +178,7 @@ delete_mid(struct mid_q_entry *mid)
* Our basic "send data to server" function. Should be called with srv_mutex
* held. The caller is responsible for handling the results.
*/
-static int
+int
smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
size_t *sent)
{
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index c3ee42188d25..1af827ae757e 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -95,6 +95,9 @@
*/
#define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024)
+/* According to MS-SMB2 specification The minimum recommended value is 65536.*/
+#define CIFS_MIN_DEFAULT_IOSIZE (65536)
+
/*
* SMB2 Header Definition
*
diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c
index 58380a986af5..f4b20b80af06 100644
--- a/fs/smb/server/auth.c
+++ b/fs/smb/server/auth.c
@@ -546,7 +546,19 @@ int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
retval = -ENOMEM;
goto out;
}
- sess->user = user;
+
+ if (!sess->user) {
+ /* First successful authentication */
+ sess->user = user;
+ } else {
+ if (!ksmbd_compare_user(sess->user, user)) {
+ ksmbd_debug(AUTH, "different user tried to reuse session\n");
+ retval = -EPERM;
+ ksmbd_free_user(user);
+ goto out;
+ }
+ ksmbd_free_user(user);
+ }
memcpy(sess->sess_key, resp->payload, resp->session_key_len);
memcpy(out_blob, resp->payload + resp->session_key_len,
@@ -1012,9 +1024,9 @@ static int ksmbd_get_encryption_key(struct ksmbd_work *work, __u64 ses_id,
ses_enc_key = enc ? sess->smb3encryptionkey :
sess->smb3decryptionkey;
- if (enc)
- ksmbd_user_session_get(sess);
memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE);
+ if (!enc)
+ ksmbd_user_session_put(sess);
return 0;
}
@@ -1213,7 +1225,7 @@ free_iv:
free_sg:
kfree(sg);
free_req:
- kfree(req);
+ aead_request_free(req);
free_ctx:
ksmbd_release_crypto_ctx(ctx);
return rc;
diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index 9a134181df61..82dcc86a32c5 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -180,7 +180,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn)
down_write(&sessions_table_lock);
down_write(&conn->session_lock);
xa_for_each(&conn->sessions, id, sess) {
- if (atomic_read(&sess->refcnt) == 0 &&
+ if (atomic_read(&sess->refcnt) <= 1 &&
(sess->state != SMB2_SESSION_VALID ||
time_after(jiffies,
sess->last_active + SMB2_SESSION_TIMEOUT))) {
@@ -229,7 +229,11 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
if (!ksmbd_chann_del(conn, sess) &&
xa_empty(&sess->ksmbd_chann_list)) {
hash_del(&sess->hlist);
- ksmbd_session_destroy(sess);
+ down_write(&conn->session_lock);
+ xa_erase(&conn->sessions, sess->id);
+ up_write(&conn->session_lock);
+ if (atomic_dec_and_test(&sess->refcnt))
+ ksmbd_session_destroy(sess);
}
}
}
@@ -248,13 +252,30 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
if (xa_empty(&sess->ksmbd_chann_list)) {
xa_erase(&conn->sessions, sess->id);
hash_del(&sess->hlist);
- ksmbd_session_destroy(sess);
+ if (atomic_dec_and_test(&sess->refcnt))
+ ksmbd_session_destroy(sess);
}
}
up_write(&conn->session_lock);
up_write(&sessions_table_lock);
}
+bool is_ksmbd_session_in_connection(struct ksmbd_conn *conn,
+ unsigned long long id)
+{
+ struct ksmbd_session *sess;
+
+ down_read(&conn->session_lock);
+ sess = xa_load(&conn->sessions, id);
+ if (sess) {
+ up_read(&conn->session_lock);
+ return true;
+ }
+ up_read(&conn->session_lock);
+
+ return false;
+}
+
struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
unsigned long long id)
{
@@ -308,8 +329,8 @@ void ksmbd_user_session_put(struct ksmbd_session *sess)
if (atomic_read(&sess->refcnt) <= 0)
WARN_ON(1);
- else
- atomic_dec(&sess->refcnt);
+ else if (atomic_dec_and_test(&sess->refcnt))
+ ksmbd_session_destroy(sess);
}
struct preauth_session *ksmbd_preauth_session_alloc(struct ksmbd_conn *conn,
@@ -414,7 +435,7 @@ static struct ksmbd_session *__session_create(int protocol)
xa_init(&sess->rpc_handle_list);
sess->sequence_number = 1;
rwlock_init(&sess->tree_conns_lock);
- atomic_set(&sess->refcnt, 1);
+ atomic_set(&sess->refcnt, 2);
ret = __init_smb2_session(sess);
if (ret)
diff --git a/fs/smb/server/mgmt/user_session.h b/fs/smb/server/mgmt/user_session.h
index c1c4b20bd5c6..f21348381d59 100644
--- a/fs/smb/server/mgmt/user_session.h
+++ b/fs/smb/server/mgmt/user_session.h
@@ -87,6 +87,8 @@ void ksmbd_session_destroy(struct ksmbd_session *sess);
struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id);
struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
unsigned long long id);
+bool is_ksmbd_session_in_connection(struct ksmbd_conn *conn,
+ unsigned long long id);
int ksmbd_session_register(struct ksmbd_conn *conn,
struct ksmbd_session *sess);
void ksmbd_sessions_deregister(struct ksmbd_conn *conn);
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 11e82a14a40a..e564432643ea 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -129,14 +129,6 @@ static void free_opinfo(struct oplock_info *opinfo)
kfree(opinfo);
}
-static inline void opinfo_free_rcu(struct rcu_head *rcu_head)
-{
- struct oplock_info *opinfo;
-
- opinfo = container_of(rcu_head, struct oplock_info, rcu_head);
- free_opinfo(opinfo);
-}
-
struct oplock_info *opinfo_get(struct ksmbd_file *fp)
{
struct oplock_info *opinfo;
@@ -154,12 +146,9 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci)
{
struct oplock_info *opinfo;
- if (list_empty(&ci->m_op_list))
- return NULL;
-
- rcu_read_lock();
- opinfo = list_first_or_null_rcu(&ci->m_op_list, struct oplock_info,
- op_entry);
+ down_read(&ci->m_lock);
+ opinfo = list_first_entry_or_null(&ci->m_op_list, struct oplock_info,
+ op_entry);
if (opinfo) {
if (opinfo->conn == NULL ||
!atomic_inc_not_zero(&opinfo->refcount))
@@ -171,8 +160,7 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci)
}
}
}
-
- rcu_read_unlock();
+ up_read(&ci->m_lock);
return opinfo;
}
@@ -185,7 +173,7 @@ void opinfo_put(struct oplock_info *opinfo)
if (!atomic_dec_and_test(&opinfo->refcount))
return;
- call_rcu(&opinfo->rcu_head, opinfo_free_rcu);
+ free_opinfo(opinfo);
}
static void opinfo_add(struct oplock_info *opinfo)
@@ -193,7 +181,7 @@ static void opinfo_add(struct oplock_info *opinfo)
struct ksmbd_inode *ci = opinfo->o_fp->f_ci;
down_write(&ci->m_lock);
- list_add_rcu(&opinfo->op_entry, &ci->m_op_list);
+ list_add(&opinfo->op_entry, &ci->m_op_list);
up_write(&ci->m_lock);
}
@@ -207,7 +195,7 @@ static void opinfo_del(struct oplock_info *opinfo)
write_unlock(&lease_list_lock);
}
down_write(&ci->m_lock);
- list_del_rcu(&opinfo->op_entry);
+ list_del(&opinfo->op_entry);
up_write(&ci->m_lock);
}
@@ -724,8 +712,8 @@ static int smb2_oplock_break_noti(struct oplock_info *opinfo)
work->conn = conn;
work->sess = opinfo->sess;
+ ksmbd_conn_r_count_inc(conn);
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
- ksmbd_conn_r_count_inc(conn);
INIT_WORK(&work->work, __smb2_oplock_break_noti);
ksmbd_queue_work(work);
@@ -833,8 +821,8 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo)
work->conn = conn;
work->sess = opinfo->sess;
+ ksmbd_conn_r_count_inc(conn);
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
- ksmbd_conn_r_count_inc(conn);
INIT_WORK(&work->work, __smb2_lease_break_noti);
ksmbd_queue_work(work);
wait_for_break_ack(opinfo);
@@ -1347,8 +1335,8 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
ci = fp->f_ci;
op = opinfo_get(fp);
- rcu_read_lock();
- list_for_each_entry_rcu(brk_op, &ci->m_op_list, op_entry) {
+ down_read(&ci->m_lock);
+ list_for_each_entry(brk_op, &ci->m_op_list, op_entry) {
if (brk_op->conn == NULL)
continue;
@@ -1358,7 +1346,6 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
if (ksmbd_conn_releasing(brk_op->conn))
continue;
- rcu_read_unlock();
if (brk_op->is_lease && (brk_op->o_lease->state &
(~(SMB2_LEASE_READ_CACHING_LE |
SMB2_LEASE_HANDLE_CACHING_LE)))) {
@@ -1388,9 +1375,8 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE, NULL);
next:
opinfo_put(brk_op);
- rcu_read_lock();
}
- rcu_read_unlock();
+ up_read(&ci->m_lock);
if (op)
opinfo_put(op);
@@ -1505,6 +1491,10 @@ struct lease_ctx_info *parse_lease_state(void *open_req)
if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) {
struct create_lease_v2 *lc = (struct create_lease_v2 *)cc;
+ if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) <
+ sizeof(struct create_lease_v2) - 4)
+ goto err_out;
+
memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
lreq->req_state = lc->lcontext.LeaseState;
lreq->flags = lc->lcontext.LeaseFlags;
@@ -1517,6 +1507,10 @@ struct lease_ctx_info *parse_lease_state(void *open_req)
} else {
struct create_lease *lc = (struct create_lease *)cc;
+ if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) <
+ sizeof(struct create_lease))
+ goto err_out;
+
memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
lreq->req_state = lc->lcontext.LeaseState;
lreq->flags = lc->lcontext.LeaseFlags;
@@ -1524,6 +1518,9 @@ struct lease_ctx_info *parse_lease_state(void *open_req)
lreq->version = 1;
}
return lreq;
+err_out:
+ kfree(lreq);
+ return NULL;
}
/**
diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h
index 59554b73f60c..0fe931485465 100644
--- a/fs/smb/server/oplock.h
+++ b/fs/smb/server/oplock.h
@@ -78,7 +78,6 @@ struct oplock_info {
struct list_head lease_entry;
wait_queue_head_t oplock_q; /* Other server threads */
wait_queue_head_t oplock_brk; /* oplock breaking wait */
- struct rcu_head rcu_head;
};
struct lease_break_info {
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 58e5cc2b1f3e..9bd817427a34 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -632,6 +632,11 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls)
return name;
}
+ if (*name == '\0') {
+ kfree(name);
+ return ERR_PTR(-EINVAL);
+ }
+
if (*name == '\\') {
pr_err("not allow directory name included leading slash\n");
kfree(name);
@@ -1599,9 +1604,6 @@ static int krb5_authenticate(struct ksmbd_work *work,
if (prev_sess_id && prev_sess_id != sess->id)
destroy_previous_session(conn, sess->user, prev_sess_id);
- if (sess->state == SMB2_SESSION_VALID)
- ksmbd_free_user(sess->user);
-
retval = ksmbd_krb5_authenticate(sess, in_blob, in_len,
out_blob, &out_len);
if (retval) {
@@ -1707,44 +1709,38 @@ int smb2_sess_setup(struct ksmbd_work *work)
if (conn->dialect != sess->dialect) {
rc = -EINVAL;
- ksmbd_user_session_put(sess);
goto out_err;
}
if (!(req->hdr.Flags & SMB2_FLAGS_SIGNED)) {
rc = -EINVAL;
- ksmbd_user_session_put(sess);
goto out_err;
}
if (strncmp(conn->ClientGUID, sess->ClientGUID,
SMB2_CLIENT_GUID_SIZE)) {
rc = -ENOENT;
- ksmbd_user_session_put(sess);
goto out_err;
}
if (sess->state == SMB2_SESSION_IN_PROGRESS) {
rc = -EACCES;
- ksmbd_user_session_put(sess);
goto out_err;
}
if (sess->state == SMB2_SESSION_EXPIRED) {
rc = -EFAULT;
- ksmbd_user_session_put(sess);
goto out_err;
}
- ksmbd_user_session_put(sess);
if (ksmbd_conn_need_reconnect(conn)) {
rc = -EFAULT;
+ ksmbd_user_session_put(sess);
sess = NULL;
goto out_err;
}
- sess = ksmbd_session_lookup(conn, sess_id);
- if (!sess) {
+ if (is_ksmbd_session_in_connection(conn, sess_id)) {
rc = -EACCES;
goto out_err;
}
@@ -1910,6 +1906,8 @@ out_err:
sess->last_active = jiffies;
sess->state = SMB2_SESSION_EXPIRED;
+ ksmbd_user_session_put(sess);
+ work->sess = NULL;
if (try_delay) {
ksmbd_conn_set_need_reconnect(conn);
ssleep(5);
@@ -2235,13 +2233,14 @@ int smb2_session_logoff(struct ksmbd_work *work)
return -ENOENT;
}
- ksmbd_destroy_file_table(&sess->file_table);
down_write(&conn->session_lock);
sess->state = SMB2_SESSION_EXPIRED;
up_write(&conn->session_lock);
- ksmbd_free_user(sess->user);
- sess->user = NULL;
+ if (sess->user) {
+ ksmbd_free_user(sess->user);
+ sess->user = NULL;
+ }
ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_NEGOTIATE);
rsp->StructureSize = cpu_to_le16(4);
@@ -2704,6 +2703,13 @@ static int parse_durable_handle_context(struct ksmbd_work *work,
goto out;
}
+ if (le16_to_cpu(context->DataOffset) +
+ le32_to_cpu(context->DataLength) <
+ sizeof(struct create_durable_reconn_v2_req)) {
+ err = -EINVAL;
+ goto out;
+ }
+
recon_v2 = (struct create_durable_reconn_v2_req *)context;
persistent_id = recon_v2->Fid.PersistentFileId;
dh_info->fp = ksmbd_lookup_durable_fd(persistent_id);
@@ -2737,6 +2743,13 @@ static int parse_durable_handle_context(struct ksmbd_work *work,
goto out;
}
+ if (le16_to_cpu(context->DataOffset) +
+ le32_to_cpu(context->DataLength) <
+ sizeof(struct create_durable_reconn_req)) {
+ err = -EINVAL;
+ goto out;
+ }
+
recon = (struct create_durable_reconn_req *)context;
persistent_id = recon->Data.Fid.PersistentFileId;
dh_info->fp = ksmbd_lookup_durable_fd(persistent_id);
@@ -2762,6 +2775,13 @@ static int parse_durable_handle_context(struct ksmbd_work *work,
goto out;
}
+ if (le16_to_cpu(context->DataOffset) +
+ le32_to_cpu(context->DataLength) <
+ sizeof(struct create_durable_req_v2)) {
+ err = -EINVAL;
+ goto out;
+ }
+
durable_v2_blob =
(struct create_durable_req_v2 *)context;
ksmbd_debug(SMB, "Request for durable v2 open\n");
diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c
index da8ed72f335d..b90f893762f4 100644
--- a/fs/smb/server/smbacl.c
+++ b/fs/smb/server/smbacl.c
@@ -270,6 +270,11 @@ static int sid_to_id(struct mnt_idmap *idmap,
return -EIO;
}
+ if (psid->num_subauth == 0) {
+ pr_err("%s: zero subauthorities!\n", __func__);
+ return -EIO;
+ }
+
if (sidtype == SIDOWNER) {
kuid_t uid;
uid_t id;
@@ -398,7 +403,9 @@ static void parse_dacl(struct mnt_idmap *idmap,
if (num_aces <= 0)
return;
- if (num_aces > ULONG_MAX / sizeof(struct smb_ace *))
+ if (num_aces > (le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) /
+ (offsetof(struct smb_ace, sid) +
+ offsetof(struct smb_sid, sub_auth) + sizeof(__le16)))
return;
ret = init_acl_state(&acl_state, num_aces);
@@ -432,6 +439,7 @@ static void parse_dacl(struct mnt_idmap *idmap,
offsetof(struct smb_sid, sub_auth);
if (end_of_acl - acl_base < acl_size ||
+ ppace[i]->sid.num_subauth == 0 ||
ppace[i]->sid.num_subauth > SID_MAX_SUB_AUTHORITIES ||
(end_of_acl - acl_base <
acl_size + sizeof(__le32) * ppace[i]->sid.num_subauth) ||
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index 2d7cd7f42f27..281101fd1f76 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -296,7 +296,11 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
server_conf.signing = req->signing;
server_conf.tcp_port = req->tcp_port;
server_conf.ipc_timeout = req->ipc_timeout * HZ;
- server_conf.deadtime = req->deadtime * SMB_ECHO_INTERVAL;
+ if (check_mul_overflow(req->deadtime, SMB_ECHO_INTERVAL,
+ &server_conf.deadtime)) {
+ ret = -EINVAL;
+ goto out;
+ }
server_conf.share_fake_fscaps = req->share_fake_fscaps;
ksmbd_init_domain(req->sub_auth);
@@ -322,6 +326,7 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
ret |= ksmbd_set_work_group(req->work_group);
ret |= ksmbd_tcp_set_interfaces(KSMBD_STARTUP_CONFIG_INTERFACES(req),
req->ifc_list_sz);
+out:
if (ret) {
pr_err("Server configuration error: %s %s %s\n",
req->netbios_name, req->server_string,
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index d0c19ad9d014..3bbf23827060 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -426,10 +426,15 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n",
*pos, count);
+ if (*pos >= XATTR_SIZE_MAX) {
+ pr_err("stream write position %lld is out of bounds\n", *pos);
+ return -EINVAL;
+ }
+
size = *pos + count;
if (size > XATTR_SIZE_MAX) {
size = XATTR_SIZE_MAX;
- count = (*pos + count) - XATTR_SIZE_MAX;
+ count = XATTR_SIZE_MAX - *pos;
}
v_len = ksmbd_vfs_getcasexattr(idmap,
@@ -496,7 +501,8 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
int err = 0;
if (work->conn->connection_type) {
- if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE))) {
+ if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE)) ||
+ S_ISDIR(file_inode(fp->filp)->i_mode)) {
pr_err("no right to write(%pD)\n", fp->filp);
err = -EACCES;
goto out;
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index 271a23abc82f..002f0864abee 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -644,21 +644,40 @@ __close_file_table_ids(struct ksmbd_file_table *ft,
bool (*skip)(struct ksmbd_tree_connect *tcon,
struct ksmbd_file *fp))
{
- unsigned int id;
- struct ksmbd_file *fp;
- int num = 0;
+ struct ksmbd_file *fp;
+ unsigned int id = 0;
+ int num = 0;
+
+ while (1) {
+ write_lock(&ft->lock);
+ fp = idr_get_next(ft->idr, &id);
+ if (!fp) {
+ write_unlock(&ft->lock);
+ break;
+ }
- idr_for_each_entry(ft->idr, fp, id) {
- if (skip(tcon, fp))
+ if (skip(tcon, fp) ||
+ !atomic_dec_and_test(&fp->refcount)) {
+ id++;
+ write_unlock(&ft->lock);
continue;
+ }
set_close_state_blocked_works(fp);
+ idr_remove(ft->idr, fp->volatile_id);
+ fp->volatile_id = KSMBD_NO_FID;
+ write_unlock(&ft->lock);
+
+ down_write(&fp->f_ci->m_lock);
+ list_del_init(&fp->node);
+ up_write(&fp->f_ci->m_lock);
- if (!atomic_dec_and_test(&fp->refcount))
- continue;
__ksmbd_close_fd(ft, fp);
+
num++;
+ id++;
}
+
return num;
}
diff --git a/fs/splice.c b/fs/splice.c
index d983d375ff11..6f9b06bbb860 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -45,7 +45,7 @@
* here if set to avoid blocking other users of this pipe if splice is
* being done on it.
*/
-static noinline void noinline pipe_clear_nowait(struct file *file)
+static noinline void pipe_clear_nowait(struct file *file)
{
fmode_t fmode = READ_ONCE(file->f_mode);
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 22e812808e5c..3a27d4268b3c 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -202,6 +202,11 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
+ if (!msblk->devblksize) {
+ errorf(fc, "squashfs: unable to set blocksize\n");
+ return -EINVAL;
+ }
+
msblk->devblksize_log2 = ffz(~msblk->devblksize);
mutex_init(&msblk->meta_index_mutex);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index e98c198f85b9..2f73119c7ec9 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -814,6 +814,7 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
}
map->oflags = UDF_BLK_MAPPED;
map->pblk = udf_get_lb_pblock(inode->i_sb, &eloc, offset);
+ ret = 0;
goto out_free;
}
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 4f33a4a48886..b4071c9cf8c9 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -115,7 +115,7 @@ void udf_truncate_tail_extent(struct inode *inode)
}
/* This inode entry is in-memory only and thus we don't have to mark
* the inode dirty */
- if (ret == 0)
+ if (ret >= 0)
iinfo->i_lenExtents = inode->i_size;
brelse(epos.bh);
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 5d3e595f9da9..5ceb1fa8eb11 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -452,32 +452,6 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
goto out;
/*
- * If it's already released don't get it. This avoids to loop
- * in __get_user_pages if userfaultfd_release waits on the
- * caller of handle_userfault to release the mmap_lock.
- */
- if (unlikely(READ_ONCE(ctx->released))) {
- /*
- * Don't return VM_FAULT_SIGBUS in this case, so a non
- * cooperative manager can close the uffd after the
- * last UFFDIO_COPY, without risking to trigger an
- * involuntary SIGBUS if the process was starting the
- * userfaultfd while the userfaultfd was still armed
- * (but after the last UFFDIO_COPY). If the uffd
- * wasn't already closed when the userfault reached
- * this point, that would normally be solved by
- * userfaultfd_must_wait returning 'false'.
- *
- * If we were to return VM_FAULT_SIGBUS here, the non
- * cooperative manager would be instead forced to
- * always call UFFDIO_UNREGISTER before it can safely
- * close the uffd.
- */
- ret = VM_FAULT_NOPAGE;
- goto out;
- }
-
- /*
* Check that we can return VM_FAULT_RETRY.
*
* NOTE: it should become possible to return VM_FAULT_RETRY
@@ -513,6 +487,31 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
goto out;
+ if (unlikely(READ_ONCE(ctx->released))) {
+ /*
+ * If a concurrent release is detected, do not return
+ * VM_FAULT_SIGBUS or VM_FAULT_NOPAGE, but instead always
+ * return VM_FAULT_RETRY with lock released proactively.
+ *
+ * If we were to return VM_FAULT_SIGBUS here, the non
+ * cooperative manager would be instead forced to
+ * always call UFFDIO_UNREGISTER before it can safely
+ * close the uffd, to avoid involuntary SIGBUS triggered.
+ *
+ * If we were to return VM_FAULT_NOPAGE, it would work for
+ * the fault path, in which the lock will be released
+ * later. However for GUP, faultin_page() does nothing
+ * special on NOPAGE, so GUP would spin retrying without
+ * releasing the mmap read lock, causing possible livelock.
+ *
+ * Here only VM_FAULT_RETRY would make sure the mmap lock
+ * be released immediately, so that the thread concurrently
+ * releasing the userfault would always make progress.
+ */
+ release_fault_lock(vmf);
+ goto out;
+ }
+
/* take the reference before dropping the mmap_lock */
userfaultfd_ctx_get(ctx);
diff --git a/fs/xattr.c b/fs/xattr.c
index c20046548f21..5fed22c22a2b 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1291,6 +1291,15 @@ static bool xattr_is_trusted(const char *name)
return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
}
+static bool xattr_is_maclabel(const char *name)
+{
+ const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+
+ return !strncmp(name, XATTR_SECURITY_PREFIX,
+ XATTR_SECURITY_PREFIX_LEN) &&
+ security_ismaclabel(suffix);
+}
+
/**
* simple_xattr_list - list all xattr objects
* @inode: inode from which to get the xattrs
@@ -1323,6 +1332,17 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
if (err)
return err;
+ err = security_inode_listsecurity(inode, buffer, remaining_size);
+ if (err < 0)
+ return err;
+
+ if (buffer) {
+ if (remaining_size < err)
+ return -ERANGE;
+ buffer += err;
+ }
+ remaining_size -= err;
+
read_lock(&xattrs->lock);
for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
xattr = rb_entry(rbp, struct simple_xattr, rb_node);
@@ -1331,6 +1351,10 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
if (!trusted && xattr_is_trusted(xattr->name))
continue;
+ /* skip MAC labels; these are provided by LSM above */
+ if (xattr_is_maclabel(xattr->name))
+ continue;
+
err = xattr_list_one(&buffer, &remaining_size, xattr->name);
if (err)
break;