summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/relocation.c19
-rw-r--r--fs/btrfs/tree-log.c53
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/cifs/cifssmb.c10
-rw-r--r--fs/cifs/file.c10
-rw-r--r--fs/cifs/smb2ops.c7
-rw-r--r--fs/cifs/smbdirect.c14
-rw-r--r--fs/efivarfs/super.c4
-rw-r--r--fs/eventpoll.c60
-rw-r--r--fs/ext2/inode.c12
-rw-r--r--fs/ext4/fsmap.c23
-rw-r--r--fs/ext4/indirect.c4
-rw-r--r--fs/ext4/inline.c19
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/mballoc.c33
-rw-r--r--fs/ext4/orphan.c5
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/f2fs/extent_cache.c2
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/inode.c28
-rw-r--r--fs/f2fs/node.c10
-rw-r--r--fs/file.c90
-rw-r--r--fs/hfs/bnode.c93
-rw-r--r--fs/hfsplus/bnode.c92
-rw-r--r--fs/hfsplus/extents.c3
-rw-r--r--fs/hfsplus/unicode.c7
-rw-r--r--fs/hfsplus/xattr.c6
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/isofs/inode.c9
-rw-r--r--fs/jbd2/checkpoint.c1
-rw-r--r--fs/jfs/file.c3
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/jfs/jfs_dmap.c10
-rw-r--r--fs/jfs/jfs_imap.c13
-rw-r--r--fs/ksmbd/smb2pdu.c16
-rw-r--r--fs/ksmbd/smb_common.c2
-rw-r--r--fs/ksmbd/transport_rdma.c97
-rw-r--r--fs/libfs.c4
-rw-r--r--fs/namespace.c39
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfs/blocklayout/dev.c5
-rw-r--r--fs/nfs/blocklayout/extent_tree.c20
-rw-r--r--fs/nfs/client.c44
-rw-r--r--fs/nfs/export.c11
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c26
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c6
-rw-r--r--fs/nfs/internal.h10
-rw-r--r--fs/nfs/nfs4client.c15
-rw-r--r--fs/nfs/nfs4proc.c12
-rw-r--r--fs/nfs/pagelist.c86
-rw-r--r--fs/nfs/pnfs.c11
-rw-r--r--fs/nfs/write.c140
-rw-r--r--fs/nfsd/nfs4state.c34
-rw-r--r--fs/nilfs2/inode.c9
-rw-r--r--fs/ntfs3/dir.c3
-rw-r--r--fs/ntfs3/file.c5
-rw-r--r--fs/ntfs3/inode.c31
-rw-r--r--fs/orangefs/orangefs-debugfs.c8
-rw-r--r--fs/squashfs/super.c14
-rw-r--r--fs/udf/directory.c2
-rw-r--r--fs/udf/super.c13
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c7
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c6
63 files changed, 895 insertions, 437 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 887ae4a9c50c..04af8687759d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -749,6 +749,25 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
if (root->root_key.objectid == objectid) {
u64 commit_root_gen;
+ /*
+ * Relocation will wait for cleaner thread, and any half-dropped
+ * subvolume will be fully cleaned up at mount time.
+ * So here we shouldn't hit a subvolume with non-zero drop_progress.
+ *
+ * If this isn't the case, error out since it can make us attempt to
+ * drop references for extents that were already dropped before.
+ */
+ if (unlikely(btrfs_disk_key_objectid(&root->root_item.drop_progress))) {
+ struct btrfs_key cpu_key;
+
+ btrfs_disk_key_to_cpu(&cpu_key, &root->root_item.drop_progress);
+ btrfs_err(fs_info,
+ "cannot relocate partially dropped subvolume %llu, drop progress key (%llu %u %llu)",
+ objectid, cpu_key.objectid, cpu_key.type, cpu_key.offset);
+ ret = -EUCLEAN;
+ goto fail;
+ }
+
/* called by btrfs_init_reloc_root */
ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
BTRFS_TREE_RELOC_OBJECTID);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7049a19e07ba..34fedac4e186 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -313,8 +313,7 @@ struct walk_control {
/*
* Ignore any items from the inode currently being processed. Needs
- * to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in
- * the LOG_WALK_REPLAY_INODES stage.
+ * to be set every time we find a BTRFS_INODE_ITEM_KEY.
*/
bool ignore_cur_inode;
@@ -2581,23 +2580,30 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
nritems = btrfs_header_nritems(eb);
for (i = 0; i < nritems; i++) {
- btrfs_item_key_to_cpu(eb, &key, i);
+ struct btrfs_inode_item *inode_item;
- /* inode keys are done during the first stage */
- if (key.type == BTRFS_INODE_ITEM_KEY &&
- wc->stage == LOG_WALK_REPLAY_INODES) {
- struct btrfs_inode_item *inode_item;
- u32 mode;
+ btrfs_item_key_to_cpu(eb, &key, i);
- inode_item = btrfs_item_ptr(eb, i,
- struct btrfs_inode_item);
+ if (key.type == BTRFS_INODE_ITEM_KEY) {
+ inode_item = btrfs_item_ptr(eb, i, struct btrfs_inode_item);
/*
- * If we have a tmpfile (O_TMPFILE) that got fsync'ed
- * and never got linked before the fsync, skip it, as
- * replaying it is pointless since it would be deleted
- * later. We skip logging tmpfiles, but it's always
- * possible we are replaying a log created with a kernel
- * that used to log tmpfiles.
+ * An inode with no links is either:
+ *
+ * 1) A tmpfile (O_TMPFILE) that got fsync'ed and never
+ * got linked before the fsync, skip it, as replaying
+ * it is pointless since it would be deleted later.
+ * We skip logging tmpfiles, but it's always possible
+ * we are replaying a log created with a kernel that
+ * used to log tmpfiles;
+ *
+ * 2) A non-tmpfile which got its last link deleted
+ * while holding an open fd on it and later got
+ * fsynced through that fd. We always log the
+ * parent inodes when inode->last_unlink_trans is
+ * set to the current transaction, so ignore all the
+ * inode items for this inode. We will delete the
+ * inode when processing the parent directory with
+ * replay_dir_deletes().
*/
if (btrfs_inode_nlink(eb, inode_item) == 0) {
wc->ignore_cur_inode = true;
@@ -2605,8 +2611,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
} else {
wc->ignore_cur_inode = false;
}
- ret = replay_xattr_deletes(wc->trans, root, log,
- path, key.objectid);
+ }
+
+ /* Inode keys are done during the first stage. */
+ if (key.type == BTRFS_INODE_ITEM_KEY &&
+ wc->stage == LOG_WALK_REPLAY_INODES) {
+ u32 mode;
+
+ ret = replay_xattr_deletes(wc->trans, root, log, path, key.objectid);
if (ret)
break;
mode = btrfs_inode_mode(eb, inode_item);
@@ -3988,6 +4000,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_timespec_nsec(&token, &item->ctime,
inode->i_ctime.tv_nsec);
+ btrfs_set_token_timespec_sec(&token, &item->otime,
+ BTRFS_I(inode)->i_otime.tv_sec);
+ btrfs_set_token_timespec_nsec(&token, &item->otime,
+ BTRFS_I(inode)->i_otime.tv_nsec);
+
/*
* We do not need to set the nbytes field, in fact during a fast fsync
* its value may not even be correct, since a fast fsync does not wait
diff --git a/fs/buffer.c b/fs/buffer.c
index 1960e2d43ae2..87fcbb725241 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -156,8 +156,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
*/
void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
{
- __end_buffer_read_notouch(bh, uptodate);
put_bh(bh);
+ __end_buffer_read_notouch(bh, uptodate);
}
EXPORT_SYMBOL(end_buffer_read_sync);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6ca08e473a7e..e6541bd5c63d 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4362,6 +4362,12 @@ findFirstRetry:
pSMB->FileName[name_len] = 0;
pSMB->FileName[name_len+1] = 0;
name_len += 2;
+ } else if (!searchName[0]) {
+ pSMB->FileName[0] = CIFS_DIR_SEP(cifs_sb);
+ pSMB->FileName[1] = 0;
+ pSMB->FileName[2] = 0;
+ pSMB->FileName[3] = 0;
+ name_len = 4;
}
} else {
name_len = copy_path_name(pSMB->FileName, searchName);
@@ -4373,6 +4379,10 @@ findFirstRetry:
pSMB->FileName[name_len] = '*';
pSMB->FileName[name_len+1] = 0;
name_len += 2;
+ } else if (!searchName[0]) {
+ pSMB->FileName[0] = CIFS_DIR_SEP(cifs_sb);
+ pSMB->FileName[1] = 0;
+ name_len = 2;
}
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9e8a69f9421e..10bb1f955188 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -4865,7 +4865,8 @@ void cifs_oplock_break(struct work_struct *work)
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
oplock_break);
struct inode *inode = d_inode(cfile->dentry);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct super_block *sb = inode->i_sb;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
@@ -4875,6 +4876,12 @@ void cifs_oplock_break(struct work_struct *work)
__u64 persistent_fid, volatile_fid;
__u16 net_fid;
+ /*
+ * Hold a reference to the superblock to prevent it and its inodes from
+ * being freed while we are accessing cinode. Otherwise, _cifsFileInfo_put()
+ * may release the last reference to the sb and trigger inode eviction.
+ */
+ cifs_sb_active(sb);
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
TASK_UNINTERRUPTIBLE);
@@ -4947,6 +4954,7 @@ oplock_break_ack:
cifs_put_tlink(tlink);
out:
cifs_done_oplock_break(cinode);
+ cifs_sb_deactive(sb);
}
/*
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index da9305f0b6f5..619905fc694e 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -4552,6 +4552,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
u8 key[SMB3_ENC_DEC_KEY_SIZE];
struct aead_request *req;
u8 *iv;
+ DECLARE_CRYPTO_WAIT(wait);
unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
void *creq;
@@ -4600,7 +4601,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
aead_request_set_crypt(req, sg, sg, crypt_len, iv);
aead_request_set_ad(req, assoc_data_len);
- rc = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &wait);
+
+ rc = crypto_wait_req(enc ? crypto_aead_encrypt(req)
+ : crypto_aead_decrypt(req), &wait);
if (!rc && enc)
memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index a9a5d27b8d38..48bd879349fb 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -455,7 +455,6 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
log_rdma_recv(INFO, "wc->status=%d opcode=%d\n",
wc->status, wc->opcode);
- smbd_disconnect_rdma_connection(info);
goto error;
}
@@ -472,8 +471,9 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
info->full_packet_received = true;
info->negotiate_done =
process_negotiation_response(response, wc->byte_len);
+ put_receive_buffer(info, response);
complete(&info->negotiate_completion);
- break;
+ return;
/* SMBD data transfer packet */
case SMBD_TRANSFER_DATA:
@@ -530,14 +530,16 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
}
return;
-
- default:
- log_rdma_recv(ERR,
- "unexpected response type=%d\n", response->type);
}
+ /*
+ * This is an internal error!
+ */
+ log_rdma_recv(ERR, "unexpected response type=%d\n", response->type);
+ WARN_ON_ONCE(response->type != SMBD_TRANSFER_DATA);
error:
put_receive_buffer(info, response);
+ smbd_disconnect_rdma_connection(info);
}
static struct rdma_cm_id *smbd_create_id(
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 99d002438008..124db520b2bd 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -47,6 +47,10 @@ static int efivarfs_d_compare(const struct dentry *dentry,
{
int guid = len - EFI_VARIABLE_GUID_LEN;
+ /* Parallel lookups may produce a temporary invalid filename */
+ if (guid <= 0)
+ return 1;
+
if (name->len != len)
return 1;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index fd0f95fe611f..721162c2b204 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -216,6 +216,7 @@ struct eventpoll {
/* used to optimize loop detection check */
u64 gen;
struct hlist_head refs;
+ u8 loop_check_depth;
#ifdef CONFIG_NET_RX_BUSY_POLL
/* used to track busy poll napi_id */
@@ -1944,23 +1945,24 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
}
/**
- * ep_loop_check_proc - verify that adding an epoll file inside another
- * epoll structure does not violate the constraints, in
- * terms of closed loops, or too deep chains (which can
- * result in excessive stack usage).
+ * ep_loop_check_proc - verify that adding an epoll file @ep inside another
+ * epoll file does not create closed loops, and
+ * determine the depth of the subtree starting at @ep
*
* @ep: the &struct eventpoll to be currently checked.
* @depth: Current depth of the path being checked.
*
- * Return: %zero if adding the epoll @file inside current epoll
- * structure @ep does not violate the constraints, or %-1 otherwise.
+ * Return: depth of the subtree, or INT_MAX if we found a loop or went too deep.
*/
static int ep_loop_check_proc(struct eventpoll *ep, int depth)
{
- int error = 0;
+ int result = 0;
struct rb_node *rbp;
struct epitem *epi;
+ if (ep->gen == loop_check_gen)
+ return ep->loop_check_depth;
+
mutex_lock_nested(&ep->mtx, depth + 1);
ep->gen = loop_check_gen;
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
@@ -1968,13 +1970,11 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth)
if (unlikely(is_file_epoll(epi->ffd.file))) {
struct eventpoll *ep_tovisit;
ep_tovisit = epi->ffd.file->private_data;
- if (ep_tovisit->gen == loop_check_gen)
- continue;
if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS)
- error = -1;
+ result = INT_MAX;
else
- error = ep_loop_check_proc(ep_tovisit, depth + 1);
- if (error != 0)
+ result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1);
+ if (result > EP_MAX_NESTS)
break;
} else {
/*
@@ -1988,9 +1988,27 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth)
list_file(epi->ffd.file);
}
}
+ ep->loop_check_depth = result;
mutex_unlock(&ep->mtx);
- return error;
+ return result;
+}
+
+/**
+ * ep_get_upwards_depth_proc - determine depth of @ep when traversed upwards
+ */
+static int ep_get_upwards_depth_proc(struct eventpoll *ep, int depth)
+{
+ int result = 0;
+ struct epitem *epi;
+
+ if (ep->gen == loop_check_gen)
+ return ep->loop_check_depth;
+ hlist_for_each_entry_rcu(epi, &ep->refs, fllink)
+ result = max(result, ep_get_upwards_depth_proc(epi->ep, depth + 1) + 1);
+ ep->gen = loop_check_gen;
+ ep->loop_check_depth = result;
+ return result;
}
/**
@@ -2006,8 +2024,22 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth)
*/
static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to)
{
+ int depth, upwards_depth;
+
inserting_into = ep;
- return ep_loop_check_proc(to, 0);
+ /*
+ * Check how deep down we can get from @to, and whether it is possible
+ * to loop up to @ep.
+ */
+ depth = ep_loop_check_proc(to, 0);
+ if (depth > EP_MAX_NESTS)
+ return -1;
+ /* Check how far up we can go from @ep. */
+ rcu_read_lock();
+ upwards_depth = ep_get_upwards_depth_proc(ep, 0);
+ rcu_read_unlock();
+
+ return (depth+1+upwards_depth > EP_MAX_NESTS) ? -1 : 0;
}
static void clear_tfile_check_list(void)
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 333fa62661d5..85b6a76378ee 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -856,9 +856,19 @@ int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
int ret;
+ loff_t i_size;
inode_lock(inode);
- len = min_t(u64, len, i_size_read(inode));
+ i_size = i_size_read(inode);
+ /*
+ * iomap_fiemap() returns EINVAL for 0 length. Make sure we don't trim
+ * length to 0 but still trim the range as much as possible since
+ * ext2_get_blocks() iterates unmapped space block by block which is
+ * slow.
+ */
+ if (i_size == 0)
+ i_size = 1;
+ len = min_t(u64, len, i_size);
ret = iomap_fiemap(inode, fieinfo, start, len, &ext2_iomap_ops);
inode_unlock(inode);
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index 53a05b8292f0..1b68586f73f3 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -393,6 +393,14 @@ static unsigned int ext4_getfsmap_find_sb(struct super_block *sb,
/* Reserved GDT blocks */
if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg) {
len = le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+
+ /*
+ * mkfs.ext4 can set s_reserved_gdt_blocks as 0 in some cases,
+ * check for that.
+ */
+ if (!len)
+ return 0;
+
error = ext4_getfsmap_fill(meta_list, fsb, len,
EXT4_FMR_OWN_RESV_GDT);
if (error)
@@ -526,6 +534,7 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
ext4_group_t end_ag;
ext4_grpblk_t first_cluster;
ext4_grpblk_t last_cluster;
+ struct ext4_fsmap irec;
int error = 0;
bofs = le32_to_cpu(sbi->s_es->s_first_data_block);
@@ -609,10 +618,18 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
goto err;
}
- /* Report any gaps at the end of the bg */
+ /*
+ * The dummy record below will cause ext4_getfsmap_helper() to report
+ * any allocated blocks at the end of the range.
+ */
+ irec.fmr_device = 0;
+ irec.fmr_physical = end_fsb + 1;
+ irec.fmr_length = 0;
+ irec.fmr_owner = EXT4_FMR_OWN_FREE;
+ irec.fmr_flags = 0;
+
info->gfi_last = true;
- error = ext4_getfsmap_datadev_helper(sb, end_ag, last_cluster + 1,
- 0, info);
+ error = ext4_getfsmap_helper(sb, info, &irec);
if (error)
goto err;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index d795ccef0417..f1d514c881a3 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -539,7 +539,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
int indirect_blks;
int blocks_to_boundary = 0;
int depth;
- int count = 0;
+ u64 count = 0;
ext4_fsblk_t first_block = 0;
trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
@@ -588,7 +588,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
count++;
/* Fill in size of a hole we found */
map->m_pblk = 0;
- map->m_len = min_t(unsigned int, map->m_len, count);
+ map->m_len = umin(map->m_len, count);
goto cleanup;
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index a1cc14156ced..d2cdb151a9c1 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -298,7 +298,11 @@ static int ext4_create_inline_data(handle_t *handle,
if (error)
goto out;
- BUG_ON(!is.s.not_found);
+ if (!is.s.not_found) {
+ EXT4_ERROR_INODE(inode, "unexpected inline data xattr");
+ error = -EFSCORRUPTED;
+ goto out;
+ }
error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error) {
@@ -349,7 +353,11 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
if (error)
goto out;
- BUG_ON(is.s.not_found);
+ if (is.s.not_found) {
+ EXT4_ERROR_INODE(inode, "missing inline data xattr");
+ error = -EFSCORRUPTED;
+ goto out;
+ }
len -= EXT4_MIN_INLINE_DATA_SIZE;
value = kzalloc(len, GFP_NOFS);
@@ -1981,7 +1989,12 @@ retry:
if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0)
goto out_error;
- BUG_ON(is.s.not_found);
+ if (is.s.not_found) {
+ EXT4_ERROR_INODE(inode,
+ "missing inline data xattr");
+ err = -EFSCORRUPTED;
+ goto out_error;
+ }
value_len = le32_to_cpu(is.s.here->e_value_size);
value = kmalloc(value_len, GFP_NOFS);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c900c917bf04..aefc138262f7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -148,7 +148,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
*/
int ext4_inode_is_fast_symlink(struct inode *inode)
{
- if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+ if (!ext4_has_feature_ea_inode(inode->i_sb)) {
int ea_blocks = EXT4_I(inode)->i_file_acl ?
EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 221f2a7f8e0b..1a8d72c5e327 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1082,33 +1082,28 @@ static void
mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- int i;
+ int new, old = grp->bb_largest_free_order;
- for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--)
- if (grp->bb_counters[i] > 0)
+ for (new = MB_NUM_ORDERS(sb) - 1; new >= 0; new--)
+ if (grp->bb_counters[new] > 0)
break;
+
/* No need to move between order lists? */
- if (!test_opt2(sb, MB_OPTIMIZE_SCAN) ||
- i == grp->bb_largest_free_order) {
- grp->bb_largest_free_order = i;
+ if (new == old)
return;
- }
- if (grp->bb_largest_free_order >= 0) {
- write_lock(&sbi->s_mb_largest_free_orders_locks[
- grp->bb_largest_free_order]);
+ if (old >= 0 && !list_empty(&grp->bb_largest_free_order_node)) {
+ write_lock(&sbi->s_mb_largest_free_orders_locks[old]);
list_del_init(&grp->bb_largest_free_order_node);
- write_unlock(&sbi->s_mb_largest_free_orders_locks[
- grp->bb_largest_free_order]);
+ write_unlock(&sbi->s_mb_largest_free_orders_locks[old]);
}
- grp->bb_largest_free_order = i;
- if (grp->bb_largest_free_order >= 0 && grp->bb_free) {
- write_lock(&sbi->s_mb_largest_free_orders_locks[
- grp->bb_largest_free_order]);
+
+ grp->bb_largest_free_order = new;
+ if (test_opt2(sb, MB_OPTIMIZE_SCAN) && new >= 0 && grp->bb_free) {
+ write_lock(&sbi->s_mb_largest_free_orders_locks[new]);
list_add_tail(&grp->bb_largest_free_order_node,
- &sbi->s_mb_largest_free_orders[grp->bb_largest_free_order]);
- write_unlock(&sbi->s_mb_largest_free_orders_locks[
- grp->bb_largest_free_order]);
+ &sbi->s_mb_largest_free_orders[new]);
+ write_unlock(&sbi->s_mb_largest_free_orders_locks[new]);
}
}
diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c
index c26c404ac58b..72316515a6b0 100644
--- a/fs/ext4/orphan.c
+++ b/fs/ext4/orphan.c
@@ -590,8 +590,9 @@ int ext4_init_orphan_info(struct super_block *sb)
}
oi->of_blocks = inode->i_size >> sb->s_blocksize_bits;
oi->of_csum_seed = EXT4_I(inode)->i_csum_seed;
- oi->of_binfo = kmalloc(oi->of_blocks*sizeof(struct ext4_orphan_block),
- GFP_KERNEL);
+ oi->of_binfo = kmalloc_array(oi->of_blocks,
+ sizeof(struct ext4_orphan_block),
+ GFP_KERNEL);
if (!oi->of_binfo) {
ret = -ENOMEM;
goto out_put;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4d270874d04e..3734ed76cac5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4669,6 +4669,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
err = ext4_load_journal(sb, es, parsed_opts.journal_devnum);
if (err)
goto failed_mount3a;
+ if (bdev_read_only(sb->s_bdev))
+ needs_recovery = 0;
} else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
ext4_has_feature_journal_needs_recovery(sb)) {
ext4_msg(sb, KERN_ERR, "required journal recovery "
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 30b8924d1493..5808791efd98 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -365,7 +365,7 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
struct extent_tree *et;
struct extent_node *en;
- struct extent_info ei;
+ struct extent_info ei = {0};
if (!f2fs_may_extent_tree(inode)) {
/* drop largest extent */
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 28db323dd400..5475d017ad1e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1209,7 +1209,7 @@ struct f2fs_bio_info {
#define RDEV(i) (raw_super->devs[i])
struct f2fs_dev_info {
struct block_device *bdev;
- char path[MAX_PATH_LEN];
+ char path[MAX_PATH_LEN + 1];
unsigned int total_segments;
block_t start_blk;
block_t end_blk;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 558f478d037d..7494a68a5319 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -226,6 +226,13 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
+ if (ino_of_node(node_page) == fi->i_xattr_nid) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.",
+ __func__, inode->i_ino, fi->i_xattr_nid);
+ return false;
+ }
+
if (f2fs_sb_has_flexible_inline_xattr(sbi)
&& !f2fs_has_extra_attr(inode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
@@ -826,6 +833,19 @@ retry:
f2fs_update_inode_page(inode);
if (dquot_initialize_needed(inode))
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+
+ /*
+ * If both f2fs_truncate() and f2fs_update_inode_page() failed
+ * due to fuzzed corrupted inode, call f2fs_inode_synced() to
+ * avoid triggering later f2fs_bug_on().
+ */
+ if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
+ f2fs_warn(sbi,
+ "f2fs_evict_inode: inode is dirty, ino:%lu",
+ inode->i_ino);
+ f2fs_inode_synced(inode);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ }
}
if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
sb_end_intwrite(inode->i_sb);
@@ -842,8 +862,12 @@ no_delete:
if (likely(!f2fs_cp_error(sbi) &&
!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
- else
- f2fs_inode_synced(inode);
+
+ /*
+ * anyway, it needs to remove the inode from sbi->inode_list[DIRTY_META]
+ * list to avoid UAF in f2fs_sync_inode_meta() during checkpoint.
+ */
+ f2fs_inode_synced(inode);
/* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */
if (inode->i_ino)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index ae6d65f2ea06..e5a06a65a539 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -798,6 +798,16 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
for (i = 1; i <= level; i++) {
bool done = false;
+ if (nids[i] && nids[i] == dn->inode->i_ino) {
+ err = -EFSCORRUPTED;
+ f2fs_err(sbi,
+ "inode mapping table is corrupted, run fsck to fix it, "
+ "ino:%lu, nid:%u, level:%d, offset:%d",
+ dn->inode->i_ino, nids[i], level, offset[level]);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ goto release_pages;
+ }
+
if (!nids[i] && mode == ALLOC_NODE) {
/* alloc new node */
if (!f2fs_alloc_nid(sbi, &(nids[i]))) {
diff --git a/fs/file.c b/fs/file.c
index 386968003030..8f013aaf351f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -90,18 +90,11 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
* 'unsigned long' in some places, but simply because that is how the Linux
* kernel bitmaps are defined to work: they are not "bits in an array of bytes",
* they are very much "bits in an array of unsigned long".
- *
- * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied
- * by that "1024/sizeof(ptr)" before, we already know there are sufficient
- * clear low bits. Clang seems to realize that, gcc ends up being confused.
- *
- * On a 128-bit machine, the ALIGN() would actually matter. In the meantime,
- * let's consider it documentation (and maybe a test-case for gcc to improve
- * its code generation ;)
*/
-static struct fdtable * alloc_fdtable(unsigned int nr)
+static struct fdtable *alloc_fdtable(unsigned int slots_wanted)
{
struct fdtable *fdt;
+ unsigned int nr;
void *data;
/*
@@ -109,22 +102,47 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
* Allocation steps are keyed to the size of the fdarray, since it
* grows far faster than any of the other dynamic data. We try to fit
* the fdarray into comfortable page-tuned chunks: starting at 1024B
- * and growing in powers of two from there on.
+ * and growing in powers of two from there on. Since we called only
+ * with slots_wanted > BITS_PER_LONG (embedded instance in files->fdtab
+ * already gives BITS_PER_LONG slots), the above boils down to
+ * 1. use the smallest power of two large enough to give us that many
+ * slots.
+ * 2. on 32bit skip 64 and 128 - the minimal capacity we want there is
+ * 256 slots (i.e. 1Kb fd array).
+ * 3. on 64bit don't skip anything, 1Kb fd array means 128 slots there
+ * and we are never going to be asked for 64 or less.
*/
- nr /= (1024 / sizeof(struct file *));
- nr = roundup_pow_of_two(nr + 1);
- nr *= (1024 / sizeof(struct file *));
- nr = ALIGN(nr, BITS_PER_LONG);
+ if (IS_ENABLED(CONFIG_32BIT) && slots_wanted < 256)
+ nr = 256;
+ else
+ nr = roundup_pow_of_two(slots_wanted);
/*
* Note that this can drive nr *below* what we had passed if sysctl_nr_open
- * had been set lower between the check in expand_files() and here. Deal
- * with that in caller, it's cheaper that way.
+ * had been set lower between the check in expand_files() and here.
*
* We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
* bitmaps handling below becomes unpleasant, to put it mildly...
*/
- if (unlikely(nr > sysctl_nr_open))
- nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
+ if (unlikely(nr > sysctl_nr_open)) {
+ nr = round_down(sysctl_nr_open, BITS_PER_LONG);
+ if (nr < slots_wanted)
+ return ERR_PTR(-EMFILE);
+ }
+
+ /*
+ * Check if the allocation size would exceed INT_MAX. kvmalloc_array()
+ * and kvmalloc() will warn if the allocation size is greater than
+ * INT_MAX, as filp_cachep objects are not __GFP_NOWARN.
+ *
+ * This can happen when sysctl_nr_open is set to a very high value and
+ * a process tries to use a file descriptor near that limit. For example,
+ * if sysctl_nr_open is set to 1073741816 (0x3ffffff8) - which is what
+ * systemd typically sets it to - then trying to use a file descriptor
+ * close to that value will require allocating a file descriptor table
+ * that exceeds 8GB in size.
+ */
+ if (unlikely(nr > INT_MAX / sizeof(struct file *)))
+ return ERR_PTR(-EMFILE);
fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
if (!fdt)
@@ -153,7 +171,7 @@ out_arr:
out_fdt:
kfree(fdt);
out:
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
/*
@@ -170,7 +188,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
struct fdtable *new_fdt, *cur_fdt;
spin_unlock(&files->file_lock);
- new_fdt = alloc_fdtable(nr);
+ new_fdt = alloc_fdtable(nr + 1);
/* make sure all fd_install() have seen resize_in_progress
* or have finished their rcu_read_lock_sched() section.
@@ -179,16 +197,8 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
synchronize_rcu();
spin_lock(&files->file_lock);
- if (!new_fdt)
- return -ENOMEM;
- /*
- * extremely unlikely race - sysctl_nr_open decreased between the check in
- * caller and alloc_fdtable(). Cheaper to catch it here...
- */
- if (unlikely(new_fdt->max_fds <= nr)) {
- __free_fdtable(new_fdt);
- return -EMFILE;
- }
+ if (IS_ERR(new_fdt))
+ return PTR_ERR(new_fdt);
cur_fdt = files_fdtable(files);
BUG_ON(nr < cur_fdt->max_fds);
copy_fdtable(new_fdt, cur_fdt);
@@ -302,7 +312,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho
struct file **old_fds, **new_fds;
unsigned int open_files, i;
struct fdtable *old_fdt, *new_fdt;
- int error;
newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
if (!newf)
@@ -334,17 +343,10 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho
if (new_fdt != &newf->fdtab)
__free_fdtable(new_fdt);
- new_fdt = alloc_fdtable(open_files - 1);
- if (!new_fdt) {
- error = -ENOMEM;
- goto out_release;
- }
-
- /* beyond sysctl_nr_open; nothing to do */
- if (unlikely(new_fdt->max_fds < open_files)) {
- __free_fdtable(new_fdt);
- error = -EMFILE;
- goto out_release;
+ new_fdt = alloc_fdtable(open_files);
+ if (IS_ERR(new_fdt)) {
+ kmem_cache_free(files_cachep, newf);
+ return ERR_CAST(new_fdt);
}
/*
@@ -385,10 +387,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho
rcu_assign_pointer(newf->fdt, new_fdt);
return newf;
-
-out_release:
- kmem_cache_free(files_cachep, newf);
- return ERR_PTR(error);
}
static struct fdtable *close_files(struct files_struct * files)
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 2251286cd83f..219e3b8fd6a8 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -15,6 +15,48 @@
#include "btree.h"
+static inline
+bool is_bnode_offset_valid(struct hfs_bnode *node, int off)
+{
+ bool is_valid = off < node->tree->node_size;
+
+ if (!is_valid) {
+ pr_err("requested invalid offset: "
+ "NODE: id %u, type %#x, height %u, "
+ "node_size %u, offset %d\n",
+ node->this, node->type, node->height,
+ node->tree->node_size, off);
+ }
+
+ return is_valid;
+}
+
+static inline
+int check_and_correct_requested_length(struct hfs_bnode *node, int off, int len)
+{
+ unsigned int node_size;
+
+ if (!is_bnode_offset_valid(node, off))
+ return 0;
+
+ node_size = node->tree->node_size;
+
+ if ((off + len) > node_size) {
+ int new_len = (int)node_size - off;
+
+ pr_err("requested length has been corrected: "
+ "NODE: id %u, type %#x, height %u, "
+ "node_size %u, offset %d, "
+ "requested_len %d, corrected_len %d\n",
+ node->this, node->type, node->height,
+ node->tree->node_size, off, len, new_len);
+
+ return new_len;
+ }
+
+ return len;
+}
+
void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
{
struct page *page;
@@ -23,6 +65,20 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
int bytes_to_read;
void *vaddr;
+ if (!is_bnode_offset_valid(node, off))
+ return;
+
+ if (len == 0) {
+ pr_err("requested zero length: "
+ "NODE: id %u, type %#x, height %u, "
+ "node_size %u, offset %d, len %d\n",
+ node->this, node->type, node->height,
+ node->tree->node_size, off, len);
+ return;
+ }
+
+ len = check_and_correct_requested_length(node, off, len);
+
off += node->page_offset;
pagenum = off >> PAGE_SHIFT;
off &= ~PAGE_MASK; /* compute page offset for the first page */
@@ -83,6 +139,20 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len)
{
struct page *page;
+ if (!is_bnode_offset_valid(node, off))
+ return;
+
+ if (len == 0) {
+ pr_err("requested zero length: "
+ "NODE: id %u, type %#x, height %u, "
+ "node_size %u, offset %d, len %d\n",
+ node->this, node->type, node->height,
+ node->tree->node_size, off, len);
+ return;
+ }
+
+ len = check_and_correct_requested_length(node, off, len);
+
off += node->page_offset;
page = node->page[0];
@@ -108,6 +178,20 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len)
{
struct page *page;
+ if (!is_bnode_offset_valid(node, off))
+ return;
+
+ if (len == 0) {
+ pr_err("requested zero length: "
+ "NODE: id %u, type %#x, height %u, "
+ "node_size %u, offset %d, len %d\n",
+ node->this, node->type, node->height,
+ node->tree->node_size, off, len);
+ return;
+ }
+
+ len = check_and_correct_requested_length(node, off, len);
+
off += node->page_offset;
page = node->page[0];
@@ -124,6 +208,10 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
hfs_dbg(BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len);
if (!len)
return;
+
+ len = check_and_correct_requested_length(src_node, src, len);
+ len = check_and_correct_requested_length(dst_node, dst, len);
+
src += src_node->page_offset;
dst += dst_node->page_offset;
src_page = src_node->page[0];
@@ -143,6 +231,10 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
hfs_dbg(BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len);
if (!len)
return;
+
+ len = check_and_correct_requested_length(node, src, len);
+ len = check_and_correct_requested_length(node, dst, len);
+
src += node->page_offset;
dst += node->page_offset;
page = node->page[0];
@@ -494,6 +586,7 @@ void hfs_bnode_put(struct hfs_bnode *node)
if (test_bit(HFS_BNODE_DELETED, &node->flags)) {
hfs_bnode_unhash(node);
spin_unlock(&tree->hash_lock);
+ hfs_bnode_clear(node, 0, tree->node_size);
hfs_bmap_free(node);
hfs_bnode_free(node);
return;
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index cf6e5de7b9da..c9c38fddf505 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -18,12 +18,68 @@
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
+static inline
+bool is_bnode_offset_valid(struct hfs_bnode *node, int off)
+{
+ bool is_valid = off < node->tree->node_size;
+
+ if (!is_valid) {
+ pr_err("requested invalid offset: "
+ "NODE: id %u, type %#x, height %u, "
+ "node_size %u, offset %d\n",
+ node->this, node->type, node->height,
+ node->tree->node_size, off);
+ }
+
+ return is_valid;
+}
+
+static inline
+int check_and_correct_requested_length(struct hfs_bnode *node, int off, int len)
+{
+ unsigned int node_size;
+
+ if (!is_bnode_offset_valid(node, off))
+ return 0;
+
+ node_size = node->tree->node_size;
+
+ if ((off + len) > node_size) {
+ int new_len = (int)node_size - off;
+
+ pr_err("requested length has been corrected: "
+ "NODE: id %u, type %#x, height %u, "
+ "node_size %u, offset %d, "
+ "requested_len %d, corrected_len %d\n",
+ node->this, node->type, node->height,
+ node->tree->node_size, off, len, new_len);
+
+ return new_len;
+ }
+
+ return len;
+}
+
/* Copy a specified range of bytes from the raw data of a node */
void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
{
struct page **pagep;
int l;
+ if (!is_bnode_offset_valid(node, off))
+ return;
+
+ if (len == 0) {
+ pr_err("requested zero length: "
+ "NODE: id %u, type %#x, height %u, "
+ "node_size %u, offset %d, len %d\n",
+ node->this, node->type, node->height,
+ node->tree->node_size, off, len);
+ return;
+ }
+
+ len = check_and_correct_requested_length(node, off, len);
+
off += node->page_offset;
pagep = node->page + (off >> PAGE_SHIFT);
off &= ~PAGE_MASK;
@@ -83,6 +139,20 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len)
struct page **pagep;
int l;
+ if (!is_bnode_offset_valid(node, off))
+ return;
+
+ if (len == 0) {
+ pr_err("requested zero length: "
+ "NODE: id %u, type %#x, height %u, "
+ "node_size %u, offset %d, len %d\n",
+ node->this, node->type, node->height,
+ node->tree->node_size, off, len);
+ return;
+ }
+
+ len = check_and_correct_requested_length(node, off, len);
+
off += node->page_offset;
pagep = node->page + (off >> PAGE_SHIFT);
off &= ~PAGE_MASK;
@@ -113,6 +183,20 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len)
struct page **pagep;
int l;
+ if (!is_bnode_offset_valid(node, off))
+ return;
+
+ if (len == 0) {
+ pr_err("requested zero length: "
+ "NODE: id %u, type %#x, height %u, "
+ "node_size %u, offset %d, len %d\n",
+ node->this, node->type, node->height,
+ node->tree->node_size, off, len);
+ return;
+ }
+
+ len = check_and_correct_requested_length(node, off, len);
+
off += node->page_offset;
pagep = node->page + (off >> PAGE_SHIFT);
off &= ~PAGE_MASK;
@@ -139,6 +223,10 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
hfs_dbg(BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len);
if (!len)
return;
+
+ len = check_and_correct_requested_length(src_node, src, len);
+ len = check_and_correct_requested_length(dst_node, dst, len);
+
src += src_node->page_offset;
dst += dst_node->page_offset;
src_page = src_node->page + (src >> PAGE_SHIFT);
@@ -196,6 +284,10 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
hfs_dbg(BNODE_MOD, "movebytes: %u,%u,%u\n", dst, src, len);
if (!len)
return;
+
+ len = check_and_correct_requested_length(node, src, len);
+ len = check_and_correct_requested_length(node, dst, len);
+
src += node->page_offset;
dst += node->page_offset;
if (dst > src) {
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index c95a2f0ed4a7..fad1c250f150 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -342,9 +342,6 @@ static int hfsplus_free_extents(struct super_block *sb,
int i;
int err = 0;
- /* Mapping the allocation file may lock the extent tree */
- WARN_ON(mutex_is_locked(&HFSPLUS_SB(sb)->ext_tree->tree_lock));
-
hfsplus_dump_extent(extent);
for (i = 0; i < 8; extent++, i++) {
count = be32_to_cpu(extent->block_count);
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 73342c925a4b..36b6cf2a3abb 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -132,7 +132,14 @@ int hfsplus_uni2asc(struct super_block *sb,
op = astr;
ip = ustr->unicode;
+
ustrlen = be16_to_cpu(ustr->length);
+ if (ustrlen > HFSPLUS_MAX_STRLEN) {
+ ustrlen = HFSPLUS_MAX_STRLEN;
+ pr_err("invalid length %u has been corrected to %d\n",
+ be16_to_cpu(ustr->length), ustrlen);
+ }
+
len = *len_p;
ce1 = NULL;
compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 71fb2f8e9117..7ad3071debd6 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -172,7 +172,11 @@ check_attr_tree_state_again:
return PTR_ERR(attr_file);
}
- BUG_ON(i_size_read(attr_file) != 0);
+ if (i_size_read(attr_file) != 0) {
+ err = -EIO;
+ pr_err("detected inconsistent attributes file, running fsck.hfsplus is recommended.\n");
+ goto end_attr_file_creation;
+ }
hip = HFSPLUS_I(attr_file);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9b6004bc96de..c8a5d94561ff 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -148,7 +148,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma->vm_ops = &hugetlb_vm_ops;
- ret = seal_check_future_write(info->seals, vma);
+ ret = seal_check_write(info->seals, vma);
if (ret)
return ret;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 07252d2a7f5f..e799305b1d68 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1493,9 +1493,16 @@ static int isofs_read_inode(struct inode *inode, int relocated)
inode->i_op = &page_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_data.a_ops = &isofs_symlink_aops;
- } else
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
/* XXX - parse_rock_ridge_inode() had already set i_rdev. */
init_special_inode(inode, inode->i_mode, inode->i_rdev);
+ } else {
+ printk(KERN_DEBUG "ISOFS: Invalid file type 0%04o for inode %lu.\n",
+ inode->i_mode, inode->i_ino);
+ ret = -EIO;
+ goto fail;
+ }
ret = 0;
out:
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index affcde540585..2c1751c637c8 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -297,6 +297,7 @@ restart:
retry:
if (batch_count)
__flush_batch(journal, &batch_count);
+ cond_resched();
spin_lock(&journal->j_list_lock);
goto restart;
}
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 1d732fd223d4..5c28883eee4e 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -44,6 +44,9 @@ static int jfs_open(struct inode *inode, struct file *file)
{
int rc;
+ if (S_ISREG(inode->i_mode) && inode->i_size < 0)
+ return -EIO;
+
if ((rc = dquot_file_open(inode, file)))
return rc;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 072821b50ab9..e132dafa1b6c 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -145,9 +145,9 @@ void jfs_evict_inode(struct inode *inode)
if (!inode->i_nlink && !is_bad_inode(inode)) {
dquot_initialize(inode);
+ truncate_inode_pages_final(&inode->i_data);
if (JFS_IP(inode)->fileset == FILESYSTEM_I) {
struct inode *ipimap = JFS_SBI(inode->i_sb)->ipimap;
- truncate_inode_pages_final(&inode->i_data);
if (test_cflag(COMMIT_Freewmap, inode))
jfs_free_zero_link(inode);
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index cfb81bf5881e..f4f4c5ec38c6 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1457,6 +1457,12 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
(1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth;
ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
+ if (ti < 0 || ti >= le32_to_cpu(dcp->nleafs)) {
+ jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n");
+ release_metapage(mp);
+ return -EIO;
+ }
+
/* dmap control page trees fan-out by 4 and a single allocation
* group may be described by 1 or 2 subtrees within the ag level
* dmap control page, depending upon the ag size. examine the ag's
@@ -1877,8 +1883,10 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
return -EIO;
dp = (struct dmap *) mp->data;
- if (dp->tree.budmin < 0)
+ if (dp->tree.budmin < 0) {
+ release_metapage(mp);
return -EIO;
+ }
/* try to allocate the blocks.
*/
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 9adb29e7862c..1f2e452a7676 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3029,14 +3029,23 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno,
*
* RETURN VALUES:
* 0 - success
- * -ENOMEM - insufficient memory
+ * -EINVAL - unexpected inode type
*/
static int copy_from_dinode(struct dinode * dip, struct inode *ip)
{
struct jfs_inode_info *jfs_ip = JFS_IP(ip);
struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
+ int fileset = le32_to_cpu(dip->di_fileset);
+
+ switch (fileset) {
+ case AGGR_RESERVED_I: case AGGREGATE_I: case BMAP_I:
+ case LOG_I: case BADBLOCK_I: case FILESYSTEM_I:
+ break;
+ default:
+ return -EINVAL;
+ }
- jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
+ jfs_ip->fileset = fileset;
jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
jfs_set_inode_flags(ip);
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 76334a983cd2..3439dbad9389 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -5612,7 +5612,6 @@ static int smb2_create_link(struct ksmbd_work *work,
{
char *link_name = NULL, *target_name = NULL, *pathname = NULL;
struct path path, parent_path;
- bool file_present = false;
int rc;
if (buf_len < (u64)sizeof(struct smb2_file_link_info) +
@@ -5645,11 +5644,8 @@ static int smb2_create_link(struct ksmbd_work *work,
if (rc) {
if (rc != -ENOENT)
goto out;
- } else
- file_present = true;
-
- if (file_info->ReplaceIfExists) {
- if (file_present) {
+ } else {
+ if (file_info->ReplaceIfExists) {
rc = ksmbd_vfs_remove_file(work, &path);
if (rc) {
rc = -EINVAL;
@@ -5657,21 +5653,17 @@ static int smb2_create_link(struct ksmbd_work *work,
link_name);
goto out;
}
- }
- } else {
- if (file_present) {
+ } else {
rc = -EEXIST;
ksmbd_debug(SMB, "link already exists\n");
goto out;
}
+ ksmbd_vfs_kern_path_unlock(&parent_path, &path);
}
-
rc = ksmbd_vfs_link(work, target_name, link_name);
if (rc)
rc = -EINVAL;
out:
- if (file_present)
- ksmbd_vfs_kern_path_unlock(&parent_path, &path);
if (!IS_ERR(link_name))
kfree(link_name);
diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
index e90a1e8c1951..0438a634f4c2 100644
--- a/fs/ksmbd/smb_common.c
+++ b/fs/ksmbd/smb_common.c
@@ -508,7 +508,7 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname,
p = strrchr(longname, '.');
if (p == longname) { /*name starts with a dot*/
- strscpy(extension, "___", strlen("___"));
+ strscpy(extension, "___", sizeof(extension));
} else {
if (p) {
p++;
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
index 91e663d5d5bc..2f0263290584 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/ksmbd/transport_rdma.c
@@ -128,9 +128,6 @@ struct smb_direct_transport {
spinlock_t recvmsg_queue_lock;
struct list_head recvmsg_queue;
- spinlock_t empty_recvmsg_queue_lock;
- struct list_head empty_recvmsg_queue;
-
int send_credit_target;
atomic_t send_credits;
spinlock_t lock_new_recv_credits;
@@ -266,40 +263,19 @@ smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
static void put_recvmsg(struct smb_direct_transport *t,
struct smb_direct_recvmsg *recvmsg)
{
- ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
- recvmsg->sge.length, DMA_FROM_DEVICE);
+ if (likely(recvmsg->sge.length != 0)) {
+ ib_dma_unmap_single(t->cm_id->device,
+ recvmsg->sge.addr,
+ recvmsg->sge.length,
+ DMA_FROM_DEVICE);
+ recvmsg->sge.length = 0;
+ }
spin_lock(&t->recvmsg_queue_lock);
list_add(&recvmsg->list, &t->recvmsg_queue);
spin_unlock(&t->recvmsg_queue_lock);
}
-static struct
-smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
-{
- struct smb_direct_recvmsg *recvmsg = NULL;
-
- spin_lock(&t->empty_recvmsg_queue_lock);
- if (!list_empty(&t->empty_recvmsg_queue)) {
- recvmsg = list_first_entry(&t->empty_recvmsg_queue,
- struct smb_direct_recvmsg, list);
- list_del(&recvmsg->list);
- }
- spin_unlock(&t->empty_recvmsg_queue_lock);
- return recvmsg;
-}
-
-static void put_empty_recvmsg(struct smb_direct_transport *t,
- struct smb_direct_recvmsg *recvmsg)
-{
- ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
- recvmsg->sge.length, DMA_FROM_DEVICE);
-
- spin_lock(&t->empty_recvmsg_queue_lock);
- list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
- spin_unlock(&t->empty_recvmsg_queue_lock);
-}
-
static void enqueue_reassembly(struct smb_direct_transport *t,
struct smb_direct_recvmsg *recvmsg,
int data_length)
@@ -384,9 +360,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
spin_lock_init(&t->recvmsg_queue_lock);
INIT_LIST_HEAD(&t->recvmsg_queue);
- spin_lock_init(&t->empty_recvmsg_queue_lock);
- INIT_LIST_HEAD(&t->empty_recvmsg_queue);
-
init_waitqueue_head(&t->wait_send_pending);
atomic_set(&t->send_pending, 0);
@@ -542,13 +515,13 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
t = recvmsg->transport;
if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
+ put_recvmsg(t, recvmsg);
if (wc->status != IB_WC_WR_FLUSH_ERR) {
pr_err("Recv error. status='%s (%d)' opcode=%d\n",
ib_wc_status_msg(wc->status), wc->status,
wc->opcode);
smb_direct_disconnect_rdma_connection(t);
}
- put_empty_recvmsg(t, recvmsg);
return;
}
@@ -562,7 +535,8 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
switch (recvmsg->type) {
case SMB_DIRECT_MSG_NEGOTIATE_REQ:
if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) {
- put_empty_recvmsg(t, recvmsg);
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
return;
}
t->negotiation_requested = true;
@@ -570,7 +544,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
t->status = SMB_DIRECT_CS_CONNECTED;
enqueue_reassembly(t, recvmsg, 0);
wake_up_interruptible(&t->wait_status);
- break;
+ return;
case SMB_DIRECT_MSG_DATA_TRANSFER: {
struct smb_direct_data_transfer *data_transfer =
(struct smb_direct_data_transfer *)recvmsg->packet;
@@ -579,7 +553,8 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (wc->byte_len <
offsetof(struct smb_direct_data_transfer, padding)) {
- put_empty_recvmsg(t, recvmsg);
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
return;
}
@@ -587,7 +562,8 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (data_length) {
if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
(u64)data_length) {
- put_empty_recvmsg(t, recvmsg);
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
return;
}
@@ -599,16 +575,11 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
else
t->full_packet_received = true;
- enqueue_reassembly(t, recvmsg, (int)data_length);
- wake_up_interruptible(&t->wait_reassembly_queue);
-
spin_lock(&t->receive_credit_lock);
receive_credits = --(t->recv_credits);
avail_recvmsg_count = t->count_avail_recvmsg;
spin_unlock(&t->receive_credit_lock);
} else {
- put_empty_recvmsg(t, recvmsg);
-
spin_lock(&t->receive_credit_lock);
receive_credits = --(t->recv_credits);
avail_recvmsg_count = ++(t->count_avail_recvmsg);
@@ -630,11 +601,23 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
mod_delayed_work(smb_direct_wq,
&t->post_recv_credits_work, 0);
- break;
+
+ if (data_length) {
+ enqueue_reassembly(t, recvmsg, (int)data_length);
+ wake_up_interruptible(&t->wait_reassembly_queue);
+ } else
+ put_recvmsg(t, recvmsg);
+
+ return;
}
- default:
- break;
}
+
+ /*
+ * This is an internal error!
+ */
+ WARN_ON_ONCE(recvmsg->type != SMB_DIRECT_MSG_DATA_TRANSFER);
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
}
static int smb_direct_post_recv(struct smb_direct_transport *t,
@@ -664,6 +647,7 @@ static int smb_direct_post_recv(struct smb_direct_transport *t,
ib_dma_unmap_single(t->cm_id->device,
recvmsg->sge.addr, recvmsg->sge.length,
DMA_FROM_DEVICE);
+ recvmsg->sge.length = 0;
smb_direct_disconnect_rdma_connection(t);
return ret;
}
@@ -805,7 +789,6 @@ static void smb_direct_post_recv_credits(struct work_struct *work)
struct smb_direct_recvmsg *recvmsg;
int receive_credits, credits = 0;
int ret;
- int use_free = 1;
spin_lock(&t->receive_credit_lock);
receive_credits = t->recv_credits;
@@ -813,18 +796,9 @@ static void smb_direct_post_recv_credits(struct work_struct *work)
if (receive_credits < t->recv_credit_target) {
while (true) {
- if (use_free)
- recvmsg = get_free_recvmsg(t);
- else
- recvmsg = get_empty_recvmsg(t);
- if (!recvmsg) {
- if (use_free) {
- use_free = 0;
- continue;
- } else {
- break;
- }
- }
+ recvmsg = get_free_recvmsg(t);
+ if (!recvmsg)
+ break;
recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
recvmsg->first_segment = false;
@@ -1800,8 +1774,6 @@ static void smb_direct_destroy_pools(struct smb_direct_transport *t)
while ((recvmsg = get_free_recvmsg(t)))
mempool_free(recvmsg, t->recvmsg_mempool);
- while ((recvmsg = get_empty_recvmsg(t)))
- mempool_free(recvmsg, t->recvmsg_mempool);
mempool_destroy(t->recvmsg_mempool);
t->recvmsg_mempool = NULL;
@@ -1857,6 +1829,7 @@ static int smb_direct_create_pools(struct smb_direct_transport *t)
if (!recvmsg)
goto err;
recvmsg->transport = t;
+ recvmsg->sge.length = 0;
list_add(&recvmsg->list, &t->recvmsg_queue);
}
t->count_avail_recvmsg = t->recv_credit_max;
diff --git a/fs/libfs.c b/fs/libfs.c
index 7bb5d90319cc..eaf96297449e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -273,7 +273,7 @@ void simple_recursive_removal(struct dentry *dentry,
struct dentry *victim = NULL, *child;
struct inode *inode = this->d_inode;
- inode_lock(inode);
+ inode_lock_nested(inode, I_MUTEX_CHILD);
if (d_is_dir(this))
inode->i_flags |= S_DEAD;
while ((child = find_next_child(this, victim)) == NULL) {
@@ -285,7 +285,7 @@ void simple_recursive_removal(struct dentry *dentry,
victim = this;
this = this->d_parent;
inode = this->d_inode;
- inode_lock(inode);
+ inode_lock_nested(inode, I_MUTEX_CHILD);
if (simple_positive(victim)) {
d_invalidate(victim); // avoid lost mounts
if (d_is_dir(victim))
diff --git a/fs/namespace.c b/fs/namespace.c
index adb966833a4b..35d63bb3b22d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1975,6 +1975,11 @@ struct vfsmount *clone_private_mount(const struct path *path)
if (!check_mnt(old_mnt))
goto invalid;
+ if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) {
+ up_read(&namespace_sem);
+ return ERR_PTR(-EPERM);
+ }
+
if (has_locked_children(old_mnt, path->dentry))
goto invalid;
@@ -2291,6 +2296,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
return attach_recursive_mnt(mnt, p, mp, false);
}
+static int may_change_propagation(const struct mount *m)
+{
+ struct mnt_namespace *ns = m->mnt_ns;
+
+ // it must be mounted in some namespace
+ if (IS_ERR_OR_NULL(ns)) // is_mounted()
+ return -EINVAL;
+ // and the caller must be admin in userns of that namespace
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+ return 0;
+}
+
/*
* Sanity check the flags to change_mnt_propagation.
*/
@@ -2327,10 +2345,10 @@ static int do_change_type(struct path *path, int ms_flags)
return -EINVAL;
namespace_lock();
- if (!check_mnt(mnt)) {
- err = -EINVAL;
+ err = may_change_propagation(mnt);
+ if (err)
goto out_unlock;
- }
+
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
if (err)
@@ -2725,18 +2743,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
namespace_lock();
- err = -EINVAL;
- /* To and From must be mounted */
- if (!is_mounted(&from->mnt))
- goto out;
- if (!is_mounted(&to->mnt))
- goto out;
-
- err = -EPERM;
- /* We should be allowed to modify mount namespaces of both mounts */
- if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ err = may_change_propagation(from);
+ if (err)
goto out;
- if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ err = may_change_propagation(to);
+ if (err)
goto out;
err = -EINVAL;
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index dc657b12822d..76423557f5b3 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -166,8 +166,8 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
/* limit length to what the device mapping allows */
end = disk_addr + *len;
- if (end >= map->start + map->len)
- *len = map->start + map->len - disk_addr;
+ if (end >= map->disk_offset + map->len)
+ *len = map->disk_offset + map->len - disk_addr;
retry:
if (!bio) {
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 16412d6636e8..4e176d7d704d 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -199,10 +199,11 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev *child;
u64 chunk;
u32 chunk_idx;
+ u64 disk_chunk;
u64 disk_offset;
chunk = div_u64(offset, dev->chunk_size);
- div_u64_rem(chunk, dev->nr_children, &chunk_idx);
+ disk_chunk = div_u64_rem(chunk, dev->nr_children, &chunk_idx);
if (chunk_idx >= dev->nr_children) {
dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
@@ -215,7 +216,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
offset = chunk * dev->chunk_size;
/* disk offset of the stripe */
- disk_offset = div_u64(offset, dev->nr_children);
+ disk_offset = disk_chunk * dev->chunk_size;
child = &dev->children[chunk_idx];
child->map(child, disk_offset, map);
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 8f7cff7a4293..0add0f329816 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -552,6 +552,15 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
return ret;
}
+/**
+ * ext_tree_prepare_commit - encode extents that need to be committed
+ * @arg: layout commit data
+ *
+ * Return values:
+ * %0: Success, all required extents are encoded
+ * %-ENOSPC: Some extents are encoded, but not all, due to RPC size limit
+ * %-ENOMEM: Out of memory, extents not encoded
+ */
int
ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
{
@@ -568,12 +577,12 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
start_p = page_address(arg->layoutupdate_page);
arg->layoutupdate_pages = &arg->layoutupdate_page;
-retry:
- ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
+ ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size,
+ &count, &arg->lastbytewritten);
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);
- buffer_size = ext_tree_layoutupdate_size(bl, count);
+ buffer_size = NFS_SERVER(arg->inode)->wsize;
count = 0;
arg->layoutupdate_pages =
@@ -588,7 +597,8 @@ retry:
return -ENOMEM;
}
- goto retry;
+ ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size,
+ &count, &arg->lastbytewritten);
}
*start_p = cpu_to_be32(count);
@@ -608,7 +618,7 @@ retry:
}
dprintk("%s found %zu ranges\n", __func__, count);
- return 0;
+ return ret;
}
void
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 10eef1368114..443b67beec37 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -661,6 +661,44 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp,
}
EXPORT_SYMBOL_GPL(nfs_init_client);
+static void nfs4_server_set_init_caps(struct nfs_server *server)
+{
+#if IS_ENABLED(CONFIG_NFS_V4)
+ /* Set the basic capabilities */
+ server->caps = server->nfs_client->cl_mvops->init_caps;
+ if (server->flags & NFS_MOUNT_NORDIRPLUS)
+ server->caps &= ~NFS_CAP_READDIRPLUS;
+ if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA)
+ server->caps &= ~NFS_CAP_READ_PLUS;
+
+ /*
+ * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
+ * authentication.
+ */
+ if (nfs4_disable_idmapping &&
+ server->client->cl_auth->au_flavor == RPC_AUTH_UNIX)
+ server->caps |= NFS_CAP_UIDGID_NOMAP;
+#endif
+}
+
+void nfs_server_set_init_caps(struct nfs_server *server)
+{
+ switch (server->nfs_client->rpc_ops->version) {
+ case 2:
+ server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+ break;
+ case 3:
+ server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+ if (!(server->flags & NFS_MOUNT_NORDIRPLUS))
+ server->caps |= NFS_CAP_READDIRPLUS;
+ break;
+ default:
+ nfs4_server_set_init_caps(server);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nfs_server_set_init_caps);
+
/*
* Create a version 2 or 3 client
*/
@@ -699,7 +737,6 @@ static int nfs_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = ctx->flags;
server->options = ctx->options;
- server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
switch (clp->rpc_ops->version) {
case 2:
@@ -735,6 +772,8 @@ static int nfs_init_server(struct nfs_server *server,
if (error < 0)
goto error;
+ nfs_server_set_init_caps(server);
+
/* Preserve the values of mount_server-related mount options */
if (ctx->mount_server.addrlen) {
memcpy(&server->mountd_address, &ctx->mount_server.address,
@@ -884,7 +923,6 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour
target->acregmax = source->acregmax;
target->acdirmin = source->acdirmin;
target->acdirmax = source->acdirmax;
- target->caps = source->caps;
target->options = source->options;
target->auth_info = source->auth_info;
target->port = source->port;
@@ -1095,6 +1133,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
if (error < 0)
goto out_free_server;
+ nfs_server_set_init_caps(server);
+
/* probe the filesystem info for this server filesystem */
error = nfs_probe_fsinfo(server, fh, fattr_fsinfo);
if (error < 0)
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index eafa9d7b0911..6bbe92a4eb0c 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -67,14 +67,21 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
struct nfs4_label *label = NULL;
struct nfs_fattr *fattr = NULL;
struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw);
- size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+ size_t fh_size = offsetof(struct nfs_fh, data);
const struct nfs_rpc_ops *rpc_ops;
struct dentry *dentry;
struct inode *inode;
- int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+ int len = EMBED_FH_OFF;
u32 *p = fid->raw;
int ret;
+ /* Initial check of bounds */
+ if (fh_len < len + XDR_QUADLEN(fh_size) ||
+ fh_len > XDR_QUADLEN(NFS_MAXFHSIZE))
+ return NULL;
+ /* Calculate embedded filehandle size */
+ fh_size += server_fh->size;
+ len += XDR_QUADLEN(fh_size);
/* NULL translates to ESTALE */
if (fh_len < len || fh_type != len)
return NULL;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 7a568d2de472..14c7de8fd781 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -739,14 +739,14 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
- struct nfs4_pnfs_ds *ds;
+ struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN);
u32 idx;
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
- if (!ds)
+ if (IS_ERR(ds))
continue;
if (check_device &&
@@ -754,10 +754,10 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
continue;
*best_idx = idx;
- return ds;
+ break;
}
- return NULL;
+ return ds;
}
static struct nfs4_pnfs_ds *
@@ -933,7 +933,7 @@ retry:
for (i = 0; i < pgio->pg_mirror_count; i++) {
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
- if (!ds) {
+ if (IS_ERR(ds)) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
pnfs_generic_pg_cleanup(pgio);
@@ -1826,6 +1826,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
u32 idx = hdr->pgio_mirror_idx;
int vers;
struct nfs_fh *fh;
+ bool ds_fatal_error = false;
dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
__func__, hdr->inode->i_ino,
@@ -1833,8 +1834,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
- if (!ds)
+ if (IS_ERR(ds)) {
+ ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
+ }
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
@@ -1875,7 +1878,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
return PNFS_ATTEMPTED;
out_failed:
- if (ff_layout_avoid_mds_available_ds(lseg))
+ if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@@ -1896,11 +1899,14 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
int vers;
struct nfs_fh *fh;
u32 idx = hdr->pgio_mirror_idx;
+ bool ds_fatal_error = false;
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
- if (!ds)
+ if (IS_ERR(ds)) {
+ ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
+ }
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
@@ -1943,7 +1949,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
return PNFS_ATTEMPTED;
out_failed:
- if (ff_layout_avoid_mds_available_ds(lseg))
+ if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@@ -1985,7 +1991,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
- if (!ds)
+ if (IS_ERR(ds))
goto out_err;
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 4b0cdddce6eb..11777d33a85e 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -368,11 +368,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
struct nfs4_ff_layout_mirror *mirror,
bool fail_return)
{
- struct nfs4_pnfs_ds *ds = NULL;
+ struct nfs4_pnfs_ds *ds;
struct inode *ino = lseg->pls_layout->plh_inode;
struct nfs_server *s = NFS_SERVER(ino);
unsigned int max_payload;
- int status;
+ int status = -EAGAIN;
if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror))
goto noconnect;
@@ -410,7 +410,7 @@ noconnect:
ff_layout_send_layouterror(lseg);
if (fail_return || !ff_layout_has_available_ds(lseg))
pnfs_error_mark_layout_for_return(ino, lseg);
- ds = NULL;
+ ds = ERR_PTR(status);
out:
return ds;
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index a6d0b64dda36..00066057b141 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -219,6 +219,7 @@ extern struct nfs_client *
nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
struct nfs4_sessionid *, u32);
extern struct nfs_server *nfs_create_server(struct fs_context *);
+extern void nfs_server_set_init_caps(struct nfs_server *);
extern struct nfs_server *nfs4_create_server(struct fs_context *);
extern struct nfs_server *nfs4_create_referral_server(struct fs_context *);
extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
@@ -581,9 +582,12 @@ nfs_write_match_verf(const struct nfs_writeverf *verf,
static inline gfp_t nfs_io_gfp_mask(void)
{
- if (current->flags & PF_WQ_WORKER)
- return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
- return GFP_KERNEL;
+ gfp_t ret = current_gfp_context(GFP_KERNEL);
+
+ /* For workers __GFP_NORETRY only with __GFP_IO or __GFP_FS */
+ if ((current->flags & PF_WQ_WORKER) && ret == GFP_KERNEL)
+ ret |= __GFP_NORETRY | __GFP_NOWARN;
+ return ret;
}
/* unlink.c */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8557b2218aa1..7e4b126e3061 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1084,20 +1084,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
if (error < 0)
goto out;
- /* Set the basic capabilities */
- server->caps |= server->nfs_client->cl_mvops->init_caps;
- if (server->flags & NFS_MOUNT_NORDIRPLUS)
- server->caps &= ~NFS_CAP_READDIRPLUS;
- if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA)
- server->caps &= ~NFS_CAP_READ_PLUS;
- /*
- * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
- * authentication.
- */
- if (nfs4_disable_idmapping &&
- server->client->cl_auth->au_flavor == RPC_AUTH_UNIX)
- server->caps |= NFS_CAP_UIDGID_NOMAP;
-
+ nfs_server_set_init_caps(server);
/* Probe the root fh to retrieve its FSID and filehandle */
error = nfs4_get_rootfh(server, mntfh, auth_probe);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9d4e4146efef..5976a31b09b0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3968,6 +3968,8 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
.interruptible = true,
};
int err;
+
+ nfs_server_set_init_caps(server);
do {
err = nfs4_handle_exception(server,
_nfs4_server_capabilities(server, fhandle),
@@ -10528,7 +10530,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
{
- ssize_t error, error2, error3, error4;
+ ssize_t error, error2, error3, error4 = 0;
size_t left = size;
error = generic_listxattr(dentry, list, left);
@@ -10556,9 +10558,11 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
left -= error3;
}
- error4 = security_inode_listsecurity(d_inode(dentry), list, left);
- if (error4 < 0)
- return error4;
+ if (!nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) {
+ error4 = security_inode_listsecurity(d_inode(dentry), list, left);
+ if (error4 < 0)
+ return error4;
+ }
error += error2 + error3 + error4;
if (size && error > size)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index fdecf729fa92..d6b18100a4cf 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -168,83 +168,6 @@ nfs_page_group_lock_head(struct nfs_page *req)
}
/*
- * nfs_unroll_locks - unlock all newly locked reqs and wait on @req
- * @head: head request of page group, must be holding head lock
- * @req: request that couldn't lock and needs to wait on the req bit lock
- *
- * This is a helper function for nfs_lock_and_join_requests
- * returns 0 on success, < 0 on error.
- */
-static void
-nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req)
-{
- struct nfs_page *tmp;
-
- /* relinquish all the locks successfully grabbed this run */
- for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
- if (!kref_read(&tmp->wb_kref))
- continue;
- nfs_unlock_and_release_request(tmp);
- }
-}
-
-/*
- * nfs_page_group_lock_subreq - try to lock a subrequest
- * @head: head request of page group
- * @subreq: request to lock
- *
- * This is a helper function for nfs_lock_and_join_requests which
- * must be called with the head request and page group both locked.
- * On error, it returns with the page group unlocked.
- */
-static int
-nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq)
-{
- int ret;
-
- if (!kref_get_unless_zero(&subreq->wb_kref))
- return 0;
- while (!nfs_lock_request(subreq)) {
- nfs_page_group_unlock(head);
- ret = nfs_wait_on_request(subreq);
- if (!ret)
- ret = nfs_page_group_lock(head);
- if (ret < 0) {
- nfs_unroll_locks(head, subreq);
- nfs_release_request(subreq);
- return ret;
- }
- }
- return 0;
-}
-
-/*
- * nfs_page_group_lock_subrequests - try to lock the subrequests
- * @head: head request of page group
- *
- * This is a helper function for nfs_lock_and_join_requests which
- * must be called with the head request locked.
- */
-int nfs_page_group_lock_subrequests(struct nfs_page *head)
-{
- struct nfs_page *subreq;
- int ret;
-
- ret = nfs_page_group_lock(head);
- if (ret < 0)
- return ret;
- /* lock each request in the page group */
- for (subreq = head->wb_this_page; subreq != head;
- subreq = subreq->wb_this_page) {
- ret = nfs_page_group_lock_subreq(head, subreq);
- if (ret < 0)
- return ret;
- }
- nfs_page_group_unlock(head);
- return 0;
-}
-
-/*
* nfs_page_set_headlock - set the request PG_HEADLOCK
* @req: request that is to be locked
*
@@ -311,13 +234,14 @@ nfs_page_group_unlock(struct nfs_page *req)
nfs_page_clear_headlock(req);
}
-/*
- * nfs_page_group_sync_on_bit_locked
+/**
+ * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set
+ * @req: request in page group
+ * @bit: PG_* bit that is used to sync page group
*
* must be called with page group lock held
*/
-static bool
-nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
+bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
{
struct nfs_page *head = req->wb_head;
struct nfs_page *tmp;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b41c6fced75a..ef273b71f019 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -3216,6 +3216,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
struct nfs_inode *nfsi = NFS_I(inode);
loff_t end_pos;
int status;
+ bool mark_as_dirty = false;
if (!pnfs_layoutcommit_outstanding(inode))
return 0;
@@ -3267,19 +3268,23 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
if (ld->prepare_layoutcommit) {
status = ld->prepare_layoutcommit(&data->args);
if (status) {
- put_cred(data->cred);
+ if (status != -ENOSPC)
+ put_cred(data->cred);
spin_lock(&inode->i_lock);
set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
if (end_pos > nfsi->layout->plh_lwb)
nfsi->layout->plh_lwb = end_pos;
- goto out_unlock;
+ if (status != -ENOSPC)
+ goto out_unlock;
+ spin_unlock(&inode->i_lock);
+ mark_as_dirty = true;
}
}
status = nfs4_proc_layoutcommit(data, sync);
out:
- if (status)
+ if (status || mark_as_dirty)
mark_inode_dirty_sync(inode);
dprintk("<-- %s status %d\n", __func__, status);
return status;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index bb401d37fe26..9323631f4889 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -155,20 +155,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode)
}
}
-static int
-nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
+static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
{
- int ret;
-
- if (!test_bit(PG_REMOVE, &req->wb_flags))
- return 0;
- ret = nfs_page_group_lock(req);
- if (ret)
- return ret;
if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
nfs_page_set_inode_ref(req, inode);
- nfs_page_group_unlock(req);
- return 0;
}
static struct nfs_page *
@@ -240,36 +230,6 @@ static struct nfs_page *nfs_page_find_head_request(struct page *page)
return req;
}
-static struct nfs_page *nfs_find_and_lock_page_request(struct page *page)
-{
- struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *req, *head;
- int ret;
-
- for (;;) {
- req = nfs_page_find_head_request(page);
- if (!req)
- return req;
- head = nfs_page_group_lock_head(req);
- if (head != req)
- nfs_release_request(req);
- if (IS_ERR(head))
- return head;
- ret = nfs_cancel_remove_inode(head, inode);
- if (ret < 0) {
- nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
- }
- /* Ensure that nobody removed the request before we locked it */
- if (head == nfs_page_private_request(page))
- break;
- if (PageSwapCache(page))
- break;
- nfs_unlock_and_release_request(head);
- }
- return head;
-}
-
/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
{
@@ -547,6 +507,57 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
}
/*
+ * nfs_unroll_locks - unlock all newly locked reqs and wait on @req
+ * @head: head request of page group, must be holding head lock
+ * @req: request that couldn't lock and needs to wait on the req bit lock
+ *
+ * This is a helper function for nfs_lock_and_join_requests
+ * returns 0 on success, < 0 on error.
+ */
+static void
+nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req)
+{
+ struct nfs_page *tmp;
+
+ /* relinquish all the locks successfully grabbed this run */
+ for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
+ if (!kref_read(&tmp->wb_kref))
+ continue;
+ nfs_unlock_and_release_request(tmp);
+ }
+}
+
+/*
+ * nfs_page_group_lock_subreq - try to lock a subrequest
+ * @head: head request of page group
+ * @subreq: request to lock
+ *
+ * This is a helper function for nfs_lock_and_join_requests which
+ * must be called with the head request and page group both locked.
+ * On error, it returns with the page group unlocked.
+ */
+static int
+nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq)
+{
+ int ret;
+
+ if (!kref_get_unless_zero(&subreq->wb_kref))
+ return 0;
+ while (!nfs_lock_request(subreq)) {
+ nfs_page_group_unlock(head);
+ ret = nfs_wait_on_request(subreq);
+ if (!ret)
+ ret = nfs_page_group_lock(head);
+ if (ret < 0) {
+ nfs_unroll_locks(head, subreq);
+ nfs_release_request(subreq);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/*
* nfs_lock_and_join_requests - join all subreqs to the head req
* @page: the page used to lookup the "page group" of nfs_page structures
*
@@ -565,7 +576,7 @@ static struct nfs_page *
nfs_lock_and_join_requests(struct page *page)
{
struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *head;
+ struct nfs_page *head, *subreq;
struct nfs_commit_info cinfo;
int ret;
@@ -575,20 +586,49 @@ nfs_lock_and_join_requests(struct page *page)
* reference to the whole page group - the group will not be destroyed
* until the head reference is released.
*/
- head = nfs_find_and_lock_page_request(page);
+retry:
+ head = nfs_page_find_head_request(page);
if (IS_ERR_OR_NULL(head))
return head;
- /* lock each request in the page group */
- ret = nfs_page_group_lock_subrequests(head);
- if (ret < 0) {
+ while (!nfs_lock_request(head)) {
+ ret = nfs_wait_on_request(head);
+ if (ret < 0) {
+ nfs_release_request(head);
+ return ERR_PTR(ret);
+ }
+ }
+
+ ret = nfs_page_group_lock(head);
+ if (ret < 0)
+ goto out_unlock;
+
+ /* Ensure that nobody removed the request before we locked it */
+ if (head != nfs_page_private_request(page) && !PageSwapCache(page)) {
+ nfs_page_group_unlock(head);
nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
+ goto retry;
}
- nfs_join_page_group(head, &cinfo, inode);
+ nfs_cancel_remove_inode(head, inode);
+ /* lock each request in the page group */
+ for (subreq = head->wb_this_page;
+ subreq != head;
+ subreq = subreq->wb_this_page) {
+ ret = nfs_page_group_lock_subreq(head, subreq);
+ if (ret < 0)
+ goto out_unlock;
+ }
+
+ nfs_page_group_unlock(head);
+
+ nfs_join_page_group(head, &cinfo, inode);
return head;
+
+out_unlock:
+ nfs_unlock_and_release_request(head);
+ return ERR_PTR(ret);
}
static void nfs_write_error(struct nfs_page *req, int error)
@@ -781,7 +821,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *head;
- if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
+ nfs_page_group_lock(req);
+ if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) {
head = req->wb_head;
spin_lock(&mapping->private_lock);
@@ -792,6 +833,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
}
spin_unlock(&mapping->private_lock);
}
+ nfs_page_group_unlock(req);
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
nfs_release_request(req);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1d09fb4ff5a5..df982830efc2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4285,10 +4285,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
}
status = nfs_ok;
if (conf) {
- old = unconf;
- unhash_client_locked(old);
- nfsd4_change_callback(conf, &unconf->cl_cb_conn);
- } else {
+ if (get_client_locked(conf) == nfs_ok) {
+ old = unconf;
+ unhash_client_locked(old);
+ nfsd4_change_callback(conf, &unconf->cl_cb_conn);
+ } else {
+ conf = NULL;
+ }
+ }
+
+ if (!conf) {
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
status = nfserr_clid_inuse;
@@ -4305,10 +4311,14 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
}
trace_nfsd_clid_replaced(&old->cl_clientid);
}
+ status = get_client_locked(unconf);
+ if (status != nfs_ok) {
+ old = NULL;
+ goto out;
+ }
move_to_confirmed(unconf);
conf = unconf;
}
- get_client_locked(conf);
spin_unlock(&nn->client_lock);
if (conf == unconf)
fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY);
@@ -5726,6 +5736,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
status = nfs4_check_deleg(cl, open, &dp);
if (status)
goto out;
+ if (dp && nfsd4_is_deleg_cur(open) &&
+ (dp->dl_stid.sc_file != fp)) {
+ /*
+ * RFC8881 section 8.2.4 mandates the server to return
+ * NFS4ERR_BAD_STATEID if the selected table entry does
+ * not match the current filehandle. However returning
+ * NFS4ERR_BAD_STATEID in the OPEN can cause the client
+ * to repeatedly retry the operation with the same
+ * stateid, since the stateid itself is valid. To avoid
+ * this situation NFSD returns NFS4ERR_INVAL instead.
+ */
+ status = nfserr_inval;
+ goto out;
+ }
stp = nfsd4_find_and_lock_existing_open(fp, open);
} else {
open->op_file = NULL;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 424949d86a41..7b9d138c8bf8 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -517,11 +517,18 @@ static int __nilfs_read_inode(struct super_block *sb,
inode->i_op = &nilfs_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mapping->a_ops = &nilfs_aops;
- } else {
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
inode->i_op = &nilfs_special_inode_operations;
init_special_inode(
inode, inode->i_mode,
huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
+ } else {
+ nilfs_error(sb,
+ "invalid file type bits in mode 0%o for inode %lu",
+ inode->i_mode, ino);
+ err = -EIO;
+ goto failed_unmap;
}
nilfs_ifile_unmap_inode(root->ifile, ino, bh);
brelse(bh);
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index a4ab0164d150..c49e64ebbd0a 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -304,6 +304,9 @@ static inline bool ntfs_dir_emit(struct ntfs_sb_info *sbi,
if (sbi->options->nohidden && (fname->dup.fa & FILE_ATTRIBUTE_HIDDEN))
return true;
+ if (fname->name_len + sizeof(struct NTFS_DE) > le16_to_cpu(e->size))
+ return true;
+
name_len = ntfs_utf16_to_nls(sbi, fname->name, fname->name_len, name,
PATH_MAX);
if (name_len <= 0) {
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 74cf9c51e322..ffb31420085f 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -398,7 +398,10 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma)
}
if (ni->i_valid < to) {
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ err = -EAGAIN;
+ goto out;
+ }
err = ntfs_extend_initialized_size(file, ni,
ni->i_valid, to);
inode_unlock(inode);
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 7adfa19a2f06..edd7c89ba1a1 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -1118,10 +1118,10 @@ int inode_write_data(struct inode *inode, const void *data, size_t bytes)
* Number of bytes for REPARSE_DATA_BUFFER(IO_REPARSE_TAG_SYMLINK)
* for unicode string of @uni_len length.
*/
-static inline u32 ntfs_reparse_bytes(u32 uni_len)
+static inline u32 ntfs_reparse_bytes(u32 uni_len, bool is_absolute)
{
/* Header + unicode string + decorated unicode string. */
- return sizeof(short) * (2 * uni_len + 4) +
+ return sizeof(short) * (2 * uni_len + (is_absolute ? 4 : 0)) +
offsetof(struct REPARSE_DATA_BUFFER,
SymbolicLinkReparseBuffer.PathBuffer);
}
@@ -1134,8 +1134,11 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname,
struct REPARSE_DATA_BUFFER *rp;
__le16 *rp_name;
typeof(rp->SymbolicLinkReparseBuffer) *rs;
+ bool is_absolute;
- rp = kzalloc(ntfs_reparse_bytes(2 * size + 2), GFP_NOFS);
+ is_absolute = (strlen(symname) > 1 && symname[1] == ':');
+
+ rp = kzalloc(ntfs_reparse_bytes(2 * size + 2, is_absolute), GFP_NOFS);
if (!rp)
return ERR_PTR(-ENOMEM);
@@ -1150,7 +1153,7 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname,
goto out;
/* err = the length of unicode name of symlink. */
- *nsize = ntfs_reparse_bytes(err);
+ *nsize = ntfs_reparse_bytes(err, is_absolute);
if (*nsize > sbi->reparse.max_size) {
err = -EFBIG;
@@ -1170,7 +1173,7 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname,
/* PrintName + SubstituteName. */
rs->SubstituteNameOffset = cpu_to_le16(sizeof(short) * err);
- rs->SubstituteNameLength = cpu_to_le16(sizeof(short) * err + 8);
+ rs->SubstituteNameLength = cpu_to_le16(sizeof(short) * err + (is_absolute ? 8 : 0));
rs->PrintNameLength = rs->SubstituteNameOffset;
/*
@@ -1178,16 +1181,18 @@ ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname,
* parse this path.
* 0-absolute path 1- relative path (SYMLINK_FLAG_RELATIVE).
*/
- rs->Flags = 0;
+ rs->Flags = cpu_to_le32(is_absolute ? 0 : SYMLINK_FLAG_RELATIVE);
- memmove(rp_name + err + 4, rp_name, sizeof(short) * err);
+ memmove(rp_name + err + (is_absolute ? 4 : 0), rp_name, sizeof(short) * err);
- /* Decorate SubstituteName. */
- rp_name += err;
- rp_name[0] = cpu_to_le16('\\');
- rp_name[1] = cpu_to_le16('?');
- rp_name[2] = cpu_to_le16('?');
- rp_name[3] = cpu_to_le16('\\');
+ if (is_absolute) {
+ /* Decorate SubstituteName. */
+ rp_name += err;
+ rp_name[0] = cpu_to_le16('\\');
+ rp_name[1] = cpu_to_le16('?');
+ rp_name[2] = cpu_to_le16('?');
+ rp_name[3] = cpu_to_le16('\\');
+ }
return rp;
out:
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index fa41db088488..cd4bfd92ebd6 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -354,7 +354,7 @@ static ssize_t orangefs_debug_read(struct file *file,
goto out;
mutex_lock(&orangefs_debug_lock);
- sprintf_ret = sprintf(buf, "%s", (char *)file->private_data);
+ sprintf_ret = scnprintf(buf, ORANGEFS_MAX_DEBUG_STRING_LEN, "%s", (char *)file->private_data);
mutex_unlock(&orangefs_debug_lock);
read_ret = simple_read_from_buffer(ubuf, count, ppos, buf, sprintf_ret);
@@ -728,8 +728,8 @@ static void do_k_string(void *k_mask, int index)
if (*mask & s_kmod_keyword_mask_map[index].mask_val) {
if ((strlen(kernel_debug_string) +
- strlen(s_kmod_keyword_mask_map[index].keyword))
- < ORANGEFS_MAX_DEBUG_STRING_LEN - 1) {
+ strlen(s_kmod_keyword_mask_map[index].keyword) + 1)
+ < ORANGEFS_MAX_DEBUG_STRING_LEN) {
strcat(kernel_debug_string,
s_kmod_keyword_mask_map[index].keyword);
strcat(kernel_debug_string, ",");
@@ -756,7 +756,7 @@ static void do_c_string(void *c_mask, int index)
(mask->mask2 & cdm_array[index].mask2)) {
if ((strlen(client_debug_string) +
strlen(cdm_array[index].keyword) + 1)
- < ORANGEFS_MAX_DEBUG_STRING_LEN - 2) {
+ < ORANGEFS_MAX_DEBUG_STRING_LEN) {
strcat(client_debug_string,
cdm_array[index].keyword);
strcat(client_debug_string, ",");
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 5108740f9653..cdfab5d42be8 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -122,10 +122,15 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
unsigned short flags;
unsigned int fragments;
u64 lookup_table_start, xattr_id_table_start, next_table;
- int err;
+ int err, devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
TRACE("Entered squashfs_fill_superblock\n");
+ if (!devblksize) {
+ errorf(fc, "squashfs: unable to set blocksize\n");
+ return -EINVAL;
+ }
+
sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL);
if (sb->s_fs_info == NULL) {
ERROR("Failed to allocate squashfs_sb_info\n");
@@ -135,12 +140,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
- msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
- if (!msblk->devblksize) {
- errorf(fc, "squashfs: unable to set blocksize\n");
- return -EINVAL;
- }
-
+ msblk->devblksize = devblksize;
msblk->devblksize_log2 = ffz(~msblk->devblksize);
mutex_init(&msblk->meta_index_mutex);
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 4f6c7b546bea..2a398f619b7d 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -171,7 +171,7 @@ static struct buffer_head *udf_fiiter_bread_blk(struct udf_fileident_iter *iter)
static int udf_fiiter_advance_blk(struct udf_fileident_iter *iter)
{
iter->loffset++;
- if (iter->loffset < iter->elen >> iter->dir->i_blkbits)
+ if (iter->loffset < DIV_ROUND_UP(iter->elen, 1<<iter->dir->i_blkbits))
return 0;
iter->loffset = 0;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4275d2bc0c36..69e4f00ce791 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1411,7 +1411,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
struct genericPartitionMap *gpm;
uint16_t ident;
struct buffer_head *bh;
- unsigned int table_len;
+ unsigned int table_len, part_map_count;
int ret;
bh = udf_read_tagged(sb, block, block, &ident);
@@ -1432,7 +1432,16 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
"logical volume");
if (ret)
goto out_bh;
- ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
+
+ part_map_count = le32_to_cpu(lvd->numPartitionMaps);
+ if (part_map_count > table_len / sizeof(struct genericPartitionMap1)) {
+ udf_err(sb, "error loading logical volume descriptor: "
+ "Too many partition maps (%u > %u)\n", part_map_count,
+ table_len / (unsigned)sizeof(struct genericPartitionMap1));
+ ret = -EIO;
+ goto out_bh;
+ }
+ ret = udf_sb_alloc_partition_maps(sb, part_map_count);
if (ret)
goto out_bh;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 83b95be9ded8..0ca4ab5ef119 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -418,6 +418,13 @@ xfs_attr_rmtval_get(
dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
error = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt,
0, &bp, &xfs_attr3_rmt_buf_ops);
+ /*
+ * ENODATA from disk implies a disk medium failure;
+ * ENODATA for xattrs means attribute not found, so
+ * disambiguate that here.
+ */
+ if (error == -ENODATA)
+ error = -EIO;
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index c062e2c85178..9ac99912e7a5 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2639,6 +2639,12 @@ xfs_da_read_buf(
error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0,
&bp, ops);
+ /*
+ * ENODATA from disk implies a disk medium failure; ENODATA for
+ * xattrs means attribute not found, so disambiguate that here.
+ */
+ if (error == -ENODATA && whichfork == XFS_ATTR_FORK)
+ error = -EIO;
if (error)
goto out_free;