summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/kmem.c10
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c12
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c3
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c10
-rw-r--r--fs/xfs/libxfs/xfs_btree.c21
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c3
-rw-r--r--fs/xfs/libxfs/xfs_format.h10
-rw-r--r--fs/xfs/libxfs/xfs_fs.h10
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c10
-rw-r--r--fs/xfs/libxfs/xfs_sb.c10
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c7
-rw-r--r--fs/xfs/xfs_aops.c23
-rw-r--r--fs/xfs/xfs_bmap_util.c2
-rw-r--r--fs/xfs/xfs_buf.c3
-rw-r--r--fs/xfs/xfs_file.c17
-rw-r--r--fs/xfs/xfs_ioctl.c6
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c47
-rw-r--r--fs/xfs/xfs_log.c71
-rw-r--r--fs/xfs/xfs_log.h1
-rw-r--r--fs/xfs/xfs_log_priv.h51
-rw-r--r--fs/xfs/xfs_log_recover.c12
-rw-r--r--fs/xfs/xfs_message.c7
-rw-r--r--fs/xfs/xfs_pnfs.c5
-rw-r--r--fs/xfs/xfs_trace.h1
30 files changed, 310 insertions, 59 deletions
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index a7a3a63bb360..686ba6fb20dd 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -55,8 +55,9 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
return ptr;
if (!(++retries % 100))
xfs_err(NULL,
- "possible memory allocation deadlock in %s (mode:0x%x)",
- __func__, lflags);
+ "%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)",
+ current->comm, current->pid,
+ (unsigned int)size, __func__, lflags);
congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (1);
}
@@ -120,8 +121,9 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags)
return ptr;
if (!(++retries % 100))
xfs_err(NULL,
- "possible memory allocation deadlock in %s (mode:0x%x)",
- __func__, lflags);
+ "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
+ current->comm, current->pid,
+ __func__, lflags);
congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (1);
}
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 9b5da7e3b5c2..e926197e0620 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -482,7 +482,9 @@ xfs_agfl_verify(
be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
return false;
}
- return true;
+
+ return xfs_log_check_lsn(mp,
+ be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
}
static void
@@ -2259,9 +2261,13 @@ xfs_agf_verify(
{
struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
- if (xfs_sb_version_hascrc(&mp->m_sb) &&
- !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
return false;
+ if (!xfs_log_check_lsn(mp,
+ be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
+ return false;
+ }
if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 33df52d97ec7..aa187f7ba2dd 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -41,6 +41,7 @@
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
#include "xfs_dir2.h"
+#include "xfs_log.h"
/*
@@ -266,6 +267,8 @@ xfs_attr3_leaf_verify(
return false;
if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
+ return false;
} else {
if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
return false;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index f38f9bd81557..5ab95ffa4ae9 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -107,7 +107,7 @@ xfs_attr3_rmt_verify(
if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
return false;
if (be32_to_cpu(rmt->rm_offset) +
- be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)
+ be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
return false;
if (rmt->rm_owner == 0)
return false;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 5256fe59623b..ab92d10890ed 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -948,14 +948,16 @@ xfs_bmap_local_to_extents(
bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
/*
- * Initialise the block and copy the data
+ * Initialize the block, copy the data and log the remote buffer.
*
- * Note: init_fn must set the buffer log item type correctly!
+ * The callout is responsible for logging because the remote format
+ * might differ from the local format and thus we don't know how much to
+ * log here. Note that init_fn must also set the buffer log item type
+ * correctly.
*/
init_fn(tp, bp, ip, ifp);
- /* account for the change in fork size and log everything */
- xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
+ /* account for the change in fork size */
xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
xfs_bmap_local_to_extents_empty(ip, whichfork);
flags |= XFS_ILOG_CORE;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index f7d7ee7a2607..af1bbee5586e 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -32,6 +32,7 @@
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_alloc.h"
+#include "xfs_log.h"
/*
* Cursor allocation zone.
@@ -222,7 +223,7 @@ xfs_btree_check_ptr(
* long-form btree header.
*
* Prior to calculting the CRC, pull the LSN out of the buffer log item and put
- * it into the buffer so recovery knows what the last modifcation was that made
+ * it into the buffer so recovery knows what the last modification was that made
* it to disk.
*/
void
@@ -243,8 +244,14 @@ bool
xfs_btree_lblock_verify_crc(
struct xfs_buf *bp)
{
- if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
+ return false;
return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+ }
return true;
}
@@ -254,7 +261,7 @@ xfs_btree_lblock_verify_crc(
* short-form btree header.
*
* Prior to calculting the CRC, pull the LSN out of the buffer log item and put
- * it into the buffer so recovery knows what the last modifcation was that made
+ * it into the buffer so recovery knows what the last modification was that made
* it to disk.
*/
void
@@ -275,8 +282,14 @@ bool
xfs_btree_sblock_verify_crc(
struct xfs_buf *bp)
{
- if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
+ return false;
return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+ }
return true;
}
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index be43248a5822..e89a0f8f827c 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -39,6 +39,7 @@
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_buf_item.h"
+#include "xfs_log.h"
/*
* xfs_da_btree.c
@@ -150,6 +151,8 @@ xfs_da3_node_verify(
return false;
if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
+ return false;
} else {
if (ichdr.magic != XFS_DA_NODE_MAGIC)
return false;
@@ -322,6 +325,7 @@ xfs_da3_node_create(
if (xfs_sb_version_hascrc(&mp->m_sb)) {
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
+ memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
ichdr.magic = XFS_DA3_NODE_MAGIC;
hdr3->info.blkno = cpu_to_be64(bp->b_bn);
hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 4778d1dd511a..9c10e2b8cfcb 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -34,6 +34,7 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
+#include "xfs_log.h"
/*
* Local function prototypes.
@@ -71,6 +72,8 @@ xfs_dir3_block_verify(
return false;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+ return false;
} else {
if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 824131e71bc5..af71a84f343c 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -31,6 +31,7 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
+#include "xfs_log.h"
/*
* Check the consistency of the data block.
@@ -224,6 +225,8 @@ xfs_dir3_data_verify(
return false;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+ return false;
} else {
if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index f300240ebb8d..3923e1f94697 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -33,6 +33,7 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
+#include "xfs_log.h"
/*
* Local function declarations.
@@ -164,6 +165,8 @@ xfs_dir3_leaf_verify(
return false;
if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
+ return false;
} else {
if (leaf->hdr.info.magic != cpu_to_be16(magic))
return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index cc28e924545b..70b0cb2fd556 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -33,6 +33,7 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
+#include "xfs_log.h"
/*
* Function declarations.
@@ -97,6 +98,8 @@ xfs_dir3_free_verify(
return false;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+ return false;
} else {
if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
return false;
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 9590a069e556..8568de163004 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -60,6 +60,14 @@ struct xfs_ifork;
#define XFS_SB_VERSION_MOREBITSBIT 0x8000
/*
+ * The size of a single extended attribute on disk is limited by
+ * the size of index values within the attribute entries themselves.
+ * These are be16 fields, so we can only support attribute data
+ * sizes up to 2^16 bytes in length.
+ */
+#define XFS_XATTR_SIZE_MAX (1 << 16)
+
+/*
* Supported feature bit list is just all bits in the versionnum field because
* we've used them all up and understand them all. Except, of course, for the
* shared superblock bit, which nobody knows what it does and so is unsupported.
@@ -1483,7 +1491,7 @@ struct xfs_acl {
*/
#define XFS_ACL_MAX_ENTRIES(mp) \
(xfs_sb_version_hascrc(&mp->m_sb) \
- ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
+ ? (XFS_XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
sizeof(struct xfs_acl_entry) \
: 25)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 89689c6a43e2..b2b73a998d42 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -490,6 +490,16 @@ typedef struct xfs_swapext
#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
/*
+ * ioctl limits
+ */
+#ifdef XATTR_LIST_MAX
+# define XFS_XATTR_LIST_MAX XATTR_LIST_MAX
+#else
+# define XFS_XATTR_LIST_MAX 65536
+#endif
+
+
+/*
* ioctl commands that are used by Linux filesystems
*/
#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 54deb2d12ac6..70c1db99f6a7 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -38,6 +38,7 @@
#include "xfs_icreate_item.h"
#include "xfs_icache.h"
#include "xfs_trace.h"
+#include "xfs_log.h"
/*
@@ -2500,9 +2501,14 @@ xfs_agi_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
- if (xfs_sb_version_hascrc(&mp->m_sb) &&
- !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
+ return false;
+ if (!xfs_log_check_lsn(mp,
+ be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn)))
return false;
+ }
+
/*
* Validate the magic number of the agi block.
*/
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 47425140f343..a0b071d881a0 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -35,6 +35,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
+#include "xfs_log.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -163,6 +164,15 @@ xfs_mount_validate_sb(
"Filesystem can not be safely mounted by this kernel.");
return -EINVAL;
}
+ } else if (xfs_sb_version_hascrc(sbp)) {
+ /*
+ * We can't read verify the sb LSN because the read verifier is
+ * called before the log is allocated and processed. We know the
+ * log is set up before write verifier (!check_version) calls,
+ * so just check it here.
+ */
+ if (!xfs_log_check_lsn(mp, sbp->sb_lsn))
+ return -EFSCORRUPTED;
}
if (xfs_sb_version_has_pquotino(sbp)) {
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 8f8af05b3f13..cb6fd20a4d3d 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -31,6 +31,7 @@
#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
+#include "xfs_log.h"
/*
@@ -60,6 +61,7 @@ xfs_symlink_hdr_set(
if (!xfs_sb_version_hascrc(&mp->m_sb))
return 0;
+ memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr));
dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
dsl->sl_offset = cpu_to_be32(offset);
dsl->sl_bytes = cpu_to_be32(size);
@@ -116,6 +118,8 @@ xfs_symlink_verify(
return false;
if (dsl->sl_owner == 0)
return false;
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)))
+ return false;
return true;
}
@@ -183,6 +187,7 @@ xfs_symlink_local_to_remote(
if (!xfs_sb_version_hascrc(&mp->m_sb)) {
bp->b_ops = NULL;
memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+ xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
return;
}
@@ -198,4 +203,6 @@ xfs_symlink_local_to_remote(
buf = bp->b_addr;
buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
+ xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
+ ifp->if_bytes - 1);
}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 50ab2879b9da..e4fff5898c1c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -172,6 +172,12 @@ xfs_setfilesize_ioend(
current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
+ /* we abort the update if there was an IO error */
+ if (ioend->io_error) {
+ xfs_trans_cancel(tp);
+ return ioend->io_error;
+ }
+
return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
}
@@ -212,14 +218,17 @@ xfs_end_io(
ioend->io_error = -EIO;
goto done;
}
- if (ioend->io_error)
- goto done;
/*
* For unwritten extents we need to issue transactions to convert a
* range to normal written extens after the data I/O has finished.
+ * Detecting and handling completion IO errors is done individually
+ * for each case as different cleanup operations need to be performed
+ * on error.
*/
if (ioend->io_type == XFS_IO_UNWRITTEN) {
+ if (ioend->io_error)
+ goto done;
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
} else if (ioend->io_append_trans) {
@@ -1399,12 +1408,12 @@ __xfs_get_blocks(
imap.br_startblock == DELAYSTARTBLOCK))) {
if (direct || xfs_get_extsz_hint(ip)) {
/*
- * Drop the ilock in preparation for starting the block
- * allocation transaction. It will be retaken
- * exclusively inside xfs_iomap_write_direct for the
- * actual allocation.
+ * xfs_iomap_write_direct() expects the shared lock. It
+ * is unlocked on return.
*/
- xfs_iunlock(ip, lockmode);
+ if (lockmode == XFS_ILOCK_EXCL)
+ xfs_ilock_demote(ip, lockmode);
+
error = xfs_iomap_write_direct(ip, offset, size,
&imap, nimaps);
if (error)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 3bf4ad0d19e4..eca325e42261 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1027,7 +1027,7 @@ xfs_alloc_file_space(
xfs_bmap_init(&free_list, &firstfsb);
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
allocatesize_fsb, alloc_type, &firstfsb,
- 0, imapp, &nimaps, &free_list);
+ resblks, imapp, &nimaps, &free_list);
if (error) {
goto error0;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 90815c22b22d..3243cdf97f33 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -354,7 +354,8 @@ retry:
*/
if (!(++retries % 100))
xfs_err(NULL,
- "possible memory allocation deadlock in %s (mode:0x%x)",
+ "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
+ current->comm, current->pid,
__func__, gfp_mask);
XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 088e50923830..2f7b6bd39f61 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -482,6 +482,8 @@ xfs_zero_eof(
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(offset > isize);
+ trace_xfs_zero_eof(ip, isize, offset - isize);
+
/*
* First handle zeroing the block on which isize resides.
*
@@ -574,6 +576,7 @@ xfs_file_aio_write_checks(
struct xfs_inode *ip = XFS_I(inode);
ssize_t error = 0;
size_t count = iov_iter_count(from);
+ bool drained_dio = false;
restart:
error = generic_write_checks(iocb, from);
@@ -611,12 +614,13 @@ restart:
bool zero = false;
spin_unlock(&ip->i_flags_lock);
- if (*iolock == XFS_IOLOCK_SHARED) {
- xfs_rw_iunlock(ip, *iolock);
- *iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, *iolock);
- iov_iter_reexpand(from, count);
-
+ if (!drained_dio) {
+ if (*iolock == XFS_IOLOCK_SHARED) {
+ xfs_rw_iunlock(ip, *iolock);
+ *iolock = XFS_IOLOCK_EXCL;
+ xfs_rw_ilock(ip, *iolock);
+ iov_iter_reexpand(from, count);
+ }
/*
* We now have an IO submission barrier in place, but
* AIO can do EOF updates during IO completion and hence
@@ -626,6 +630,7 @@ restart:
* no-op.
*/
inode_dio_wait(inode);
+ drained_dio = true;
goto restart;
}
error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index b67a130134fb..c88ddcadd656 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -411,7 +411,7 @@ xfs_attrlist_by_handle(
if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
return -EFAULT;
if (al_hreq.buflen < sizeof(struct attrlist) ||
- al_hreq.buflen > XATTR_LIST_MAX)
+ al_hreq.buflen > XFS_XATTR_LIST_MAX)
return -EINVAL;
/*
@@ -455,7 +455,7 @@ xfs_attrmulti_attr_get(
unsigned char *kbuf;
int error = -EFAULT;
- if (*len > XATTR_SIZE_MAX)
+ if (*len > XFS_XATTR_SIZE_MAX)
return -EINVAL;
kbuf = kmem_zalloc_large(*len, KM_SLEEP);
if (!kbuf)
@@ -485,7 +485,7 @@ xfs_attrmulti_attr_set(
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return -EPERM;
- if (len > XATTR_SIZE_MAX)
+ if (len > XFS_XATTR_SIZE_MAX)
return -EINVAL;
kbuf = memdup_user(ubuf, len);
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index b88bdc85dd3d..1a05d8ae327d 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -356,7 +356,7 @@ xfs_compat_attrlist_by_handle(
sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
return -EFAULT;
if (al_hreq.buflen < sizeof(struct attrlist) ||
- al_hreq.buflen > XATTR_LIST_MAX)
+ al_hreq.buflen > XFS_XATTR_LIST_MAX)
return -EINVAL;
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index dca69c6711b2..c3cb5a552c4e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -131,20 +131,29 @@ xfs_iomap_write_direct(
uint qblocks, resblks, resrtextents;
int committed;
int error;
-
- error = xfs_qm_dqattach(ip, 0);
- if (error)
- return error;
+ int lockmode;
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
+ lockmode = XFS_ILOCK_SHARED; /* locked by caller */
+
+ ASSERT(xfs_isilocked(ip, lockmode));
offset_fsb = XFS_B_TO_FSBT(mp, offset);
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
if ((offset + count) > XFS_ISIZE(ip)) {
+ /*
+ * Assert that the in-core extent list is present since this can
+ * call xfs_iread_extents() and we only have the ilock shared.
+ * This should be safe because the lock was held around a bmapi
+ * call in the caller and we only need it to access the in-core
+ * list.
+ */
+ ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
+ XFS_IFEXTENTS);
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
if (error)
- return error;
+ goto out_unlock;
} else {
if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
last_fsb = MIN(last_fsb, (xfs_fileoff_t)
@@ -174,6 +183,15 @@ xfs_iomap_write_direct(
}
/*
+ * Drop the shared lock acquired by the caller, attach the dquot if
+ * necessary and move on to transaction setup.
+ */
+ xfs_iunlock(ip, lockmode);
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
+ return error;
+
+ /*
* Allocate and setup the transaction
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
@@ -187,7 +205,8 @@ xfs_iomap_write_direct(
return error;
}
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ lockmode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, lockmode);
error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
if (error)
@@ -202,8 +221,8 @@ xfs_iomap_write_direct(
xfs_bmap_init(&free_list, &firstfsb);
nimaps = 1;
error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
- XFS_BMAPI_PREALLOC, &firstfsb, 0,
- imap, &nimaps, &free_list);
+ XFS_BMAPI_PREALLOC, &firstfsb, resblks, imap,
+ &nimaps, &free_list);
if (error)
goto out_bmap_cancel;
@@ -229,7 +248,7 @@ xfs_iomap_write_direct(
error = xfs_alert_fsblock_zero(ip, imap);
out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, lockmode);
return error;
out_bmap_cancel:
@@ -750,9 +769,9 @@ xfs_iomap_write_allocate(
* pointer that the caller gave to us.
*/
error = xfs_bmapi_write(tp, ip, map_start_fsb,
- count_fsb, 0,
- &first_block, 1,
- imap, &nimaps, &free_list);
+ count_fsb, 0, &first_block,
+ nres, imap, &nimaps,
+ &free_list);
if (error)
goto trans_cancel;
@@ -866,8 +885,8 @@ xfs_iomap_write_unwritten(
xfs_bmap_init(&free_list, &firstfsb);
nimaps = 1;
error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
- XFS_BMAPI_CONVERT, &firstfsb,
- 1, &imap, &nimaps, &free_list);
+ XFS_BMAPI_CONVERT, &firstfsb, resblks,
+ &imap, &nimaps, &free_list);
if (error)
goto error_on_bmapi_transaction;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 401252360a62..f52c72a1a06f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2422,11 +2422,20 @@ xlog_write(
&partial_copy_len);
xlog_verify_dest_ptr(log, ptr);
- /* copy region */
+ /*
+ * Copy region.
+ *
+ * Unmount records just log an opheader, so can have
+ * empty payloads with no data region to copy. Hence we
+ * only copy the payload if the vector says it has data
+ * to copy.
+ */
ASSERT(copy_len >= 0);
- memcpy(ptr, reg->i_addr + copy_off, copy_len);
- xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
-
+ if (copy_len > 0) {
+ memcpy(ptr, reg->i_addr + copy_off, copy_len);
+ xlog_write_adv_cnt(&ptr, &len, &log_offset,
+ copy_len);
+ }
copy_len += start_rec_copy + sizeof(xlog_op_header_t);
record_cnt++;
data_cnt += contwr ? copy_len : 0;
@@ -3165,11 +3174,19 @@ xlog_state_switch_iclogs(
}
if (log->l_curr_block >= log->l_logBBsize) {
+ /*
+ * Rewind the current block before the cycle is bumped to make
+ * sure that the combined LSN never transiently moves forward
+ * when the log wraps to the next cycle. This is to support the
+ * unlocked sample of these fields from xlog_valid_lsn(). Most
+ * other cases should acquire l_icloglock.
+ */
+ log->l_curr_block -= log->l_logBBsize;
+ ASSERT(log->l_curr_block >= 0);
+ smp_wmb();
log->l_curr_cycle++;
if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM)
log->l_curr_cycle++;
- log->l_curr_block -= log->l_logBBsize;
- ASSERT(log->l_curr_block >= 0);
}
ASSERT(iclog == log->l_iclog);
log->l_iclog = iclog->ic_next;
@@ -4023,3 +4040,45 @@ xlog_iclogs_empty(
return 1;
}
+/*
+ * Verify that an LSN stamped into a piece of metadata is valid. This is
+ * intended for use in read verifiers on v5 superblocks.
+ */
+bool
+xfs_log_check_lsn(
+ struct xfs_mount *mp,
+ xfs_lsn_t lsn)
+{
+ struct xlog *log = mp->m_log;
+ bool valid;
+
+ /*
+ * norecovery mode skips mount-time log processing and unconditionally
+ * resets the in-core LSN. We can't validate in this mode, but
+ * modifications are not allowed anyways so just return true.
+ */
+ if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+ return true;
+
+ /*
+ * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is
+ * handled by recovery and thus safe to ignore here.
+ */
+ if (lsn == NULLCOMMITLSN)
+ return true;
+
+ valid = xlog_valid_lsn(mp->m_log, lsn);
+
+ /* warn the user about what's gone wrong before verifier failure */
+ if (!valid) {
+ spin_lock(&log->l_icloglock);
+ xfs_warn(mp,
+"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). "
+"Please unmount and run xfs_repair (>= v4.3) to resolve.",
+ CYCLE_LSN(lsn), BLOCK_LSN(lsn),
+ log->l_curr_cycle, log->l_curr_block);
+ spin_unlock(&log->l_icloglock);
+ }
+
+ return valid;
+}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 09d91d3166cd..aa533a7d50f2 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -181,5 +181,6 @@ bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
void xfs_log_work_queue(struct xfs_mount *mp);
void xfs_log_worker(struct work_struct *work);
void xfs_log_quiesce(struct xfs_mount *mp);
+bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
#endif /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 950f3f94720c..8daba7491b13 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -560,4 +560,55 @@ static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
remove_wait_queue(wq, &wait);
}
+/*
+ * The LSN is valid so long as it is behind the current LSN. If it isn't, this
+ * means that the next log record that includes this metadata could have a
+ * smaller LSN. In turn, this means that the modification in the log would not
+ * replay.
+ */
+static inline bool
+xlog_valid_lsn(
+ struct xlog *log,
+ xfs_lsn_t lsn)
+{
+ int cur_cycle;
+ int cur_block;
+ bool valid = true;
+
+ /*
+ * First, sample the current lsn without locking to avoid added
+ * contention from metadata I/O. The current cycle and block are updated
+ * (in xlog_state_switch_iclogs()) and read here in a particular order
+ * to avoid false negatives (e.g., thinking the metadata LSN is valid
+ * when it is not).
+ *
+ * The current block is always rewound before the cycle is bumped in
+ * xlog_state_switch_iclogs() to ensure the current LSN is never seen in
+ * a transiently forward state. Instead, we can see the LSN in a
+ * transiently behind state if we happen to race with a cycle wrap.
+ */
+ cur_cycle = ACCESS_ONCE(log->l_curr_cycle);
+ smp_rmb();
+ cur_block = ACCESS_ONCE(log->l_curr_block);
+
+ if ((CYCLE_LSN(lsn) > cur_cycle) ||
+ (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) {
+ /*
+ * If the metadata LSN appears invalid, it's possible the check
+ * above raced with a wrap to the next log cycle. Grab the lock
+ * to check for sure.
+ */
+ spin_lock(&log->l_icloglock);
+ cur_cycle = log->l_curr_cycle;
+ cur_block = log->l_curr_block;
+ spin_unlock(&log->l_icloglock);
+
+ if ((CYCLE_LSN(lsn) > cur_cycle) ||
+ (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block))
+ valid = false;
+ }
+
+ return valid;
+}
+
#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 512a0945d52a..f8f1363dc045 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4609,9 +4609,19 @@ xlog_recover(
int error;
/* find the tail of the log */
- if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
+ error = xlog_find_tail(log, &head_blk, &tail_blk);
+ if (error)
return error;
+ /*
+ * The superblock was read before the log was available and thus the LSN
+ * could not be verified. Check the superblock LSN against the current
+ * LSN now that it's known.
+ */
+ if (xfs_sb_version_hascrc(&log->l_mp->m_sb) &&
+ !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn))
+ return -EINVAL;
+
if (tail_blk != head_blk) {
/* There used to be a comment here:
*
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index d8b67547ab34..11792d888e4e 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -17,6 +17,7 @@
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_error.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -43,6 +44,7 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \
{ \
struct va_format vaf; \
va_list args; \
+ int level; \
\
va_start(args, fmt); \
\
@@ -51,6 +53,11 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \
\
__xfs_printk(kern_level, mp, &vaf); \
va_end(args); \
+ \
+ if (!kstrtoint(kern_level, 0, &level) && \
+ level <= LOGLEVEL_ERR && \
+ xfs_error_level >= XFS_ERRLEVEL_HIGH) \
+ xfs_stack_trace(); \
} \
define_xfs_printk_level(xfs_emerg, KERN_EMERG);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index ab4a6066f7ca..dc6221942b85 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -181,6 +181,11 @@ xfs_fs_map_blocks(
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
+ /*
+ * xfs_iomap_write_direct() expects to take ownership of
+ * the shared ilock.
+ */
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
error = xfs_iomap_write_direct(ip, offset, length,
&imap, nimaps);
if (error)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 5ed36b1e04c1..957f5ccdd84f 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1312,6 +1312,7 @@ DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
+DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
DECLARE_EVENT_CLASS(xfs_itrunc_class,
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),