summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2013-04-21 14:53:46 -0500
committerBen Myers <bpm@sgi.com>2013-04-21 14:53:46 -0500
commitee1a47ab0e77600fcbdf1c87d461bd8f3f63150d (patch)
tree6340d9f4b8b53c0d18045da1372599645375efce
parenta2050646f655a90400cbb66c3866d2e0137eee0c (diff)
xfs: add support for large btree blocks
Add support for larger btree blocks that contains a CRC32C checksum, a filesystem uuid and block number for detecting filesystem consistency and out of place writes. [dchinner@redhat.com] Also include an owner field to allow reverse mappings to be implemented for improved repairability and a LSN field to so that log recovery can easily determine the last modification that made it to disk for each buffer. [dchinner@redhat.com] Add buffer log format flags to indicate the type of buffer to recovery so that we don't have to do blind magic number tests to determine what the buffer is. [dchinner@redhat.com] Modified to fit into the verifier structure. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Ben Myers <bpm@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
-rw-r--r--fs/xfs/xfs_alloc_btree.c105
-rw-r--r--fs/xfs/xfs_alloc_btree.h12
-rw-r--r--fs/xfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/xfs_bmap.c47
-rw-r--r--fs/xfs/xfs_bmap_btree.c110
-rw-r--r--fs/xfs/xfs_bmap_btree.h19
-rw-r--r--fs/xfs/xfs_btree.c256
-rw-r--r--fs/xfs/xfs_btree.h64
-rw-r--r--fs/xfs/xfs_buf_item.h24
-rw-r--r--fs/xfs/xfs_dinode.h4
-rw-r--r--fs/xfs/xfs_fsops.c23
-rw-r--r--fs/xfs/xfs_ialloc_btree.c87
-rw-r--r--fs/xfs/xfs_ialloc_btree.h9
-rw-r--r--fs/xfs/xfs_inode.c33
-rw-r--r--fs/xfs/xfs_log_recover.c28
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_buf.c29
17 files changed, 645 insertions, 209 deletions
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index b1ddef6b2689..30c4c1434faf 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -33,6 +33,7 @@
#include "xfs_extent_busy.h"
#include "xfs_error.h"
#include "xfs_trace.h"
+#include "xfs_cksum.h"
STATIC struct xfs_btree_cur *
@@ -272,7 +273,7 @@ xfs_allocbt_key_diff(
return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}
-static void
+static bool
xfs_allocbt_verify(
struct xfs_buf *bp)
{
@@ -280,66 +281,103 @@ xfs_allocbt_verify(
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
unsigned int level;
- int sblock_ok; /* block passes checks */
/*
* magic number and level verification
*
- * During growfs operations, we can't verify the exact level as the
- * perag is not fully initialised and hence not attached to the buffer.
- * In this case, check against the maximum tree depth.
+ * During growfs operations, we can't verify the exact level or owner as
+ * the perag is not fully initialised and hence not attached to the
+ * buffer. In this case, check against the maximum tree depth.
+ *
+ * Similarly, during log recovery we will have a perag structure
+ * attached, but the agf information will not yet have been initialised
+ * from the on disk AGF. Again, we can only check against maximum limits
+ * in this case.
*/
level = be16_to_cpu(block->bb_level);
switch (block->bb_magic) {
+ case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag &&
+ be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+ /* fall through */
case cpu_to_be32(XFS_ABTB_MAGIC):
- if (pag)
- sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi];
- else
- sblock_ok = level < mp->m_ag_maxlevels;
+ if (pag && pag->pagf_init) {
+ if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
+ return false;
+ } else if (level >= mp->m_ag_maxlevels)
+ return false;
break;
+ case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag &&
+ be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+ /* fall through */
case cpu_to_be32(XFS_ABTC_MAGIC):
- if (pag)
- sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi];
- else
- sblock_ok = level < mp->m_ag_maxlevels;
+ if (pag && pag->pagf_init) {
+ if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
+ return false;
+ } else if (level >= mp->m_ag_maxlevels)
+ return false;
break;
default:
- sblock_ok = 0;
- break;
+ return false;
}
/* numrecs verification */
- sblock_ok = sblock_ok &&
- be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0];
+ if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
+ return false;
/* sibling pointer verification */
- sblock_ok = sblock_ok &&
- (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
- be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
- block->bb_u.s.bb_leftsib &&
- (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
- be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
- block->bb_u.s.bb_rightsib;
-
- if (!sblock_ok) {
- trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (!block->bb_u.s.bb_leftsib ||
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+ if (!block->bb_u.s.bb_rightsib ||
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+
+ return true;
}
static void
xfs_allocbt_read_verify(
struct xfs_buf *bp)
{
- xfs_allocbt_verify(bp);
+ if (!(xfs_btree_sblock_verify_crc(bp) &&
+ xfs_allocbt_verify(bp))) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+ bp->b_target->bt_mount, bp->b_addr);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ }
}
static void
xfs_allocbt_write_verify(
struct xfs_buf *bp)
{
- xfs_allocbt_verify(bp);
+ if (!xfs_allocbt_verify(bp)) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+ bp->b_target->bt_mount, bp->b_addr);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ }
+ xfs_btree_sblock_calc_crc(bp);
+
}
const struct xfs_buf_ops xfs_allocbt_buf_ops = {
@@ -444,6 +482,9 @@ xfs_allocbt_init_cursor(
cur->bc_private.a.agbp = agbp;
cur->bc_private.a.agno = agno;
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
+
return cur;
}
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index 7e89a2b429dd..e3a3f7424192 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -31,8 +31,10 @@ struct xfs_mount;
* by blockcount and blockno. All blocks look the same to make the code
* simpler; if we have time later, we'll make the optimizations.
*/
-#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */
-#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */
+#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */
+#define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */
+#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */
+#define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */
/*
* Data record/key structure
@@ -59,10 +61,10 @@ typedef __be32 xfs_alloc_ptr_t;
/*
* Btree block header size depends on a superblock flag.
- *
- * (not quite yet, but soon)
*/
-#define XFS_ALLOC_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
+#define XFS_ALLOC_BLOCK_LEN(mp) \
+ (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
+ XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
/*
* Record, key, and pointer address macros for btree blocks.
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index f96a734ed1e0..aa4765f15cbe 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -232,7 +232,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
return 0;
return dp->i_d.di_forkoff;
}
- dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
+ dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot);
break;
}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 20efb397a7f4..0531cd3927a9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -439,11 +439,15 @@ xfs_bmap_sanity_check(
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
- be16_to_cpu(block->bb_level) != level ||
+ if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
+ block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
+ return 0;
+
+ if (be16_to_cpu(block->bb_level) != level ||
be16_to_cpu(block->bb_numrecs) == 0 ||
be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
return 0;
+
return 1;
}
@@ -1031,6 +1035,7 @@ xfs_bmap_extents_to_btree(
xfs_extnum_t nextents; /* number of file extents */
xfs_bmbt_ptr_t *pp; /* root block address pointer */
+ mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
@@ -1044,16 +1049,18 @@ xfs_bmap_extents_to_btree(
* Fill in the root.
*/
block = ifp->if_broot;
- block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
- block->bb_level = cpu_to_be16(1);
- block->bb_numrecs = cpu_to_be16(1);
- block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
- block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
+ XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
+ XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
+ else
+ xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
+ XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
+ XFS_BTREE_LONG_PTRS);
/*
* Need a cursor. Can't allocate until bb_level is filled in.
*/
- mp = ip->i_mount;
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_private.b.firstblock = *firstblock;
cur->bc_private.b.flist = flist;
@@ -1102,10 +1109,15 @@ xfs_bmap_extents_to_btree(
*/
abp->b_ops = &xfs_bmbt_buf_ops;
ablock = XFS_BUF_TO_BLOCK(abp);
- ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
- ablock->bb_level = 0;
- ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
- ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ xfs_btree_init_block_int(mp, ablock, abp->b_bn,
+ XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
+ XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
+ else
+ xfs_btree_init_block_int(mp, ablock, abp->b_bn,
+ XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
+ XFS_BTREE_LONG_PTRS);
+
arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
for (cnt = i = 0; i < nextents; i++) {
@@ -1155,7 +1167,8 @@ xfs_bmap_local_to_extents(
xfs_extlen_t total, /* total blocks needed by transaction */
int *logflagsp, /* inode logging flags */
int whichfork,
- void (*init_fn)(struct xfs_buf *bp,
+ void (*init_fn)(struct xfs_trans *tp,
+ struct xfs_buf *bp,
struct xfs_inode *ip,
struct xfs_ifork *ifp))
{
@@ -1207,7 +1220,7 @@ xfs_bmap_local_to_extents(
bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
/* initialise the block and copy the data */
- init_fn(bp, ip, ifp);
+ init_fn(tp, bp, ip, ifp);
/* account for the change in fork size and log everything */
xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
@@ -1314,16 +1327,19 @@ xfs_bmap_add_attrfork_extents(
*/
STATIC void
xfs_bmap_local_to_extents_init_fn(
+ struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_inode *ip,
struct xfs_ifork *ifp)
{
bp->b_ops = &xfs_bmbt_buf_ops;
memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+ xfs_trans_buf_set_type(tp, bp, XFS_BLF_BTREE_BUF);
}
STATIC void
xfs_symlink_local_to_remote(
+ struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_inode *ip,
struct xfs_ifork *ifp)
@@ -1342,8 +1358,7 @@ xfs_symlink_local_to_remote(
*
* XXX (dgc): investigate whether directory conversion can use the generic
* formatting callout. It should be possible - it's just a very complex
- * formatter. it would also require passing the transaction through to the init
- * function.
+ * formatter.
*/
STATIC int /* error */
xfs_bmap_add_attrfork_local(
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 061b45cbe614..3a86c3fa6de1 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -37,6 +37,7 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
+#include "xfs_cksum.h"
/*
* Determine the extent state.
@@ -59,24 +60,31 @@ xfs_extent_state(
*/
void
xfs_bmdr_to_bmbt(
- struct xfs_mount *mp,
+ struct xfs_inode *ip,
xfs_bmdr_block_t *dblock,
int dblocklen,
struct xfs_btree_block *rblock,
int rblocklen)
{
+ struct xfs_mount *mp = ip->i_mount;
int dmxr;
xfs_bmbt_key_t *fkp;
__be64 *fpp;
xfs_bmbt_key_t *tkp;
__be64 *tpp;
- rblock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
+ XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
+ XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
+ else
+ xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
+ XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
+ XFS_BTREE_LONG_PTRS);
+
rblock->bb_level = dblock->bb_level;
ASSERT(be16_to_cpu(rblock->bb_level) > 0);
rblock->bb_numrecs = dblock->bb_numrecs;
- rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
- rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
@@ -424,7 +432,13 @@ xfs_bmbt_to_bmdr(
xfs_bmbt_key_t *tkp;
__be64 *tpp;
- ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
+ ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid));
+ ASSERT(rblock->bb_u.l.bb_blkno ==
+ cpu_to_be64(XFS_BUF_DADDR_NULL));
+ } else
+ ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
ASSERT(rblock->bb_level != 0);
@@ -708,59 +722,89 @@ xfs_bmbt_key_diff(
cur->bc_rec.b.br_startoff;
}
-static void
+static int
xfs_bmbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
unsigned int level;
- int lblock_ok; /* block passes checks */
- /* magic number and level verification.
+ switch (block->bb_magic) {
+ case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
+ return false;
+ /*
+ * XXX: need a better way of verifying the owner here. Right now
+ * just make sure there has been one set.
+ */
+ if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
+ return false;
+ /* fall through */
+ case cpu_to_be32(XFS_BMAP_MAGIC):
+ break;
+ default:
+ return false;
+ }
+
+ /*
+ * numrecs and level verification.
*
- * We don't know waht fork we belong to, so just verify that the level
+ * We don't know what fork we belong to, so just verify that the level
* is less than the maximum of the two. Later checks will be more
* precise.
*/
level = be16_to_cpu(block->bb_level);
- lblock_ok = block->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC) &&
- level < max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]);
-
- /* numrecs verification */
- lblock_ok = lblock_ok &&
- be16_to_cpu(block->bb_numrecs) <= mp->m_bmap_dmxr[level != 0];
+ if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
+ return false;
+ if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
+ return false;
/* sibling pointer verification */
- lblock_ok = lblock_ok &&
- block->bb_u.l.bb_leftsib &&
- (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
- XFS_FSB_SANITY_CHECK(mp,
- be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
- block->bb_u.l.bb_rightsib &&
- (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
- XFS_FSB_SANITY_CHECK(mp,
- be64_to_cpu(block->bb_u.l.bb_rightsib)));
-
- if (!lblock_ok) {
- trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (!block->bb_u.l.bb_leftsib ||
+ (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) &&
+ !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
+ return false;
+ if (!block->bb_u.l.bb_rightsib ||
+ (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) &&
+ !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
+ return false;
+
+ return true;
+
}
static void
xfs_bmbt_read_verify(
struct xfs_buf *bp)
{
- xfs_bmbt_verify(bp);
+ if (!(xfs_btree_lblock_verify_crc(bp) &&
+ xfs_bmbt_verify(bp))) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+ bp->b_target->bt_mount, bp->b_addr);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ }
+
}
static void
xfs_bmbt_write_verify(
struct xfs_buf *bp)
{
- xfs_bmbt_verify(bp);
+ if (!xfs_bmbt_verify(bp)) {
+ xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+ bp->b_target->bt_mount, bp->b_addr);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ return;
+ }
+ xfs_btree_lblock_calc_crc(bp);
}
const struct xfs_buf_ops xfs_bmbt_buf_ops = {
@@ -838,6 +882,8 @@ xfs_bmbt_init_cursor(
cur->bc_ops = &xfs_bmbt_ops;
cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
cur->bc_private.b.ip = ip;
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 88469ca08696..70c43d9f72c1 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -18,7 +18,8 @@
#ifndef __XFS_BMAP_BTREE_H__
#define __XFS_BMAP_BTREE_H__
-#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
+#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
+#define XFS_BMAP_CRC_MAGIC 0x424d4133 /* 'BMA3' */
struct xfs_btree_cur;
struct xfs_btree_block;
@@ -136,10 +137,10 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
/*
* Btree block header size depends on a superblock flag.
- *
- * (not quite yet, but soon)
*/
-#define XFS_BMBT_BLOCK_LEN(mp) XFS_BTREE_LBLOCK_LEN
+#define XFS_BMBT_BLOCK_LEN(mp) \
+ (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
+ XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN)
#define XFS_BMBT_REC_ADDR(mp, block, index) \
((xfs_bmbt_rec_t *) \
@@ -186,12 +187,12 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
-#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \
- (int)(XFS_BTREE_LBLOCK_LEN + \
+#define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \
+ (int)(XFS_BMBT_BLOCK_LEN(mp) + \
((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
-#define XFS_BMAP_BROOT_SPACE(bb) \
- (XFS_BMAP_BROOT_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs)))
+#define XFS_BMAP_BROOT_SPACE(mp, bb) \
+ (XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs)))
#define XFS_BMDR_SPACE_CALC(nrecs) \
(int)(sizeof(xfs_bmdr_block_t) + \
((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
@@ -204,7 +205,7 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
/*
* Prototypes for xfs_bmap.c to call.
*/
-extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int,
+extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int,
struct xfs_btree_block *, int);
extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index db010408d701..ec77036f13b5 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -30,9 +30,11 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
+#include "xfs_buf_item.h"
#include "xfs_btree.h"
#include "xfs_error.h"
#include "xfs_trace.h"
+#include "xfs_cksum.h"
/*
* Cursor allocation zone.
@@ -42,9 +44,13 @@ kmem_zone_t *xfs_btree_cur_zone;
/*
* Btree magic numbers.
*/
-const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
- XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
+static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
+ { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC },
+ { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
+ XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }
};
+#define xfs_btree_magic(cur) \
+ xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
STATIC int /* error (0 or EFSCORRUPTED) */
@@ -54,30 +60,38 @@ xfs_btree_check_lblock(
int level, /* level of the btree block */
struct xfs_buf *bp) /* buffer for block, if any */
{
- int lblock_ok; /* block passes checks */
+ int lblock_ok = 1; /* block passes checks */
struct xfs_mount *mp; /* file system mount point */
mp = cur->bc_mp;
- lblock_ok =
- be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ lblock_ok = lblock_ok &&
+ uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
+ block->bb_u.l.bb_blkno == cpu_to_be64(
+ bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
+ }
+
+ lblock_ok = lblock_ok &&
+ be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
block->bb_u.l.bb_leftsib &&
(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
XFS_FSB_SANITY_CHECK(mp,
- be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
+ be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
block->bb_u.l.bb_rightsib &&
(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
XFS_FSB_SANITY_CHECK(mp,
- be64_to_cpu(block->bb_u.l.bb_rightsib)));
+ be64_to_cpu(block->bb_u.l.bb_rightsib)));
+
if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
XFS_ERRTAG_BTREE_CHECK_LBLOCK,
XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
- mp);
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
return XFS_ERROR(EFSCORRUPTED);
}
return 0;
@@ -90,16 +104,26 @@ xfs_btree_check_sblock(
int level, /* level of the btree block */
struct xfs_buf *bp) /* buffer containing block */
{
+ struct xfs_mount *mp; /* file system mount point */
struct xfs_buf *agbp; /* buffer for ag. freespace struct */
struct xfs_agf *agf; /* ag. freespace structure */
xfs_agblock_t agflen; /* native ag. freespace length */
- int sblock_ok; /* block passes checks */
+ int sblock_ok = 1; /* block passes checks */
+ mp = cur->bc_mp;
agbp = cur->bc_private.a.agbp;
agf = XFS_BUF_TO_AGF(agbp);
agflen = be32_to_cpu(agf->agf_length);
- sblock_ok =
- be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ sblock_ok = sblock_ok &&
+ uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
+ block->bb_u.s.bb_blkno == cpu_to_be64(
+ bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
+ }
+
+ sblock_ok = sblock_ok &&
+ be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
@@ -109,13 +133,13 @@ xfs_btree_check_sblock(
(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
block->bb_u.s.bb_rightsib;
- if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
+
+ if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
XFS_ERRTAG_BTREE_CHECK_SBLOCK,
XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
- XFS_ERRLEVEL_LOW, cur->bc_mp, block);
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
return XFS_ERROR(EFSCORRUPTED);
}
return 0;
@@ -194,6 +218,72 @@ xfs_btree_check_ptr(
#endif
/*
+ * Calculate CRC on the whole btree block and stuff it into the
+ * long-form btree header.
+ *
+ * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
+ * it into the buffer so recovery knows what the last modifcation was that made
+ * it to disk.
+ */
+void
+xfs_btree_lblock_calc_crc(
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return;
+ if (bip)
+ block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+ XFS_BTREE_LBLOCK_CRC_OFF);
+}
+
+bool
+xfs_btree_lblock_verify_crc(
+ struct xfs_buf *bp)
+{
+ if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+ XFS_BTREE_LBLOCK_CRC_OFF);
+ return true;
+}
+
+/*
+ * Calculate CRC on the whole btree block and stuff it into the
+ * short-form btree header.
+ *
+ * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
+ * it into the buffer so recovery knows what the last modifcation was that made
+ * it to disk.
+ */
+void
+xfs_btree_sblock_calc_crc(
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return;
+ if (bip)
+ block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+ XFS_BTREE_SBLOCK_CRC_OFF);
+}
+
+bool
+xfs_btree_sblock_verify_crc(
+ struct xfs_buf *bp)
+{
+ if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+ XFS_BTREE_SBLOCK_CRC_OFF);
+ return true;
+}
+
+/*
* Delete the btree cursor.
*/
void
@@ -277,10 +367,8 @@ xfs_btree_dup_cursor(
*ncur = NULL;
return error;
}
- new->bc_bufs[i] = bp;
- ASSERT(!xfs_buf_geterror(bp));
- } else
- new->bc_bufs[i] = NULL;
+ }
+ new->bc_bufs[i] = bp;
}
*ncur = new;
return 0;
@@ -321,9 +409,14 @@ xfs_btree_dup_cursor(
*/
static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
{
- return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
- XFS_BTREE_LBLOCK_LEN :
- XFS_BTREE_SBLOCK_LEN;
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+ return XFS_BTREE_LBLOCK_CRC_LEN;
+ return XFS_BTREE_LBLOCK_LEN;
+ }
+ if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+ return XFS_BTREE_SBLOCK_CRC_LEN;
+ return XFS_BTREE_SBLOCK_LEN;
}
/*
@@ -863,43 +956,85 @@ xfs_btree_set_sibling(
}
void
+xfs_btree_init_block_int(
+ struct xfs_mount *mp,
+ struct xfs_btree_block *buf,
+ xfs_daddr_t blkno,
+ __u32 magic,
+ __u16 level,
+ __u16 numrecs,
+ __u64 owner,
+ unsigned int flags)
+{
+ buf->bb_magic = cpu_to_be32(magic);
+ buf->bb_level = cpu_to_be16(level);
+ buf->bb_numrecs = cpu_to_be16(numrecs);
+
+ if (flags & XFS_BTREE_LONG_PTRS) {
+ buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
+ buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+ if (flags & XFS_BTREE_CRC_BLOCKS) {
+ buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
+ buf->bb_u.l.bb_owner = cpu_to_be64(owner);
+ uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
+ buf->bb_u.l.bb_pad = 0;
+ }
+ } else {
+ /* owner is a 32 bit value on short blocks */
+ __u32 __owner = (__u32)owner;
+
+ buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+ buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
+ if (flags & XFS_BTREE_CRC_BLOCKS) {
+ buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
+ buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
+ uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
+ }
+ }
+}
+
+void
xfs_btree_init_block(
struct xfs_mount *mp,
struct xfs_buf *bp,
__u32 magic,
__u16 level,
__u16 numrecs,
+ __u64 owner,
unsigned int flags)
{
- struct xfs_btree_block *new = XFS_BUF_TO_BLOCK(bp);
-
- new->bb_magic = cpu_to_be32(magic);
- new->bb_level = cpu_to_be16(level);
- new->bb_numrecs = cpu_to_be16(numrecs);
-
- if (flags & XFS_BTREE_LONG_PTRS) {
- new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
- new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
- } else {
- new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
- new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- }
+ xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
+ magic, level, numrecs, owner, flags);
}
STATIC void
xfs_btree_init_block_cur(
struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
int level,
- int numrecs,
- struct xfs_buf *bp)
+ int numrecs)
{
- xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum],
- level, numrecs, cur->bc_flags);
+ __u64 owner;
+
+ /*
+ * we can pull the owner from the cursor right now as the different
+ * owners align directly with the pointer size of the btree. This may
+ * change in future, but is safe for current users of the generic btree
+ * code.
+ */
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ owner = cur->bc_private.b.ip->i_ino;
+ else
+ owner = cur->bc_private.a.agno;
+
+ xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
+ xfs_btree_magic(cur), level, numrecs,
+ owner, cur->bc_flags);
}
/*
* Return true if ptr is the last record in the btree and
- * we need to track updateѕ to this record. The decision
+ * we need to track updates to this record. The decision
* will be further refined in the update_lastrec method.
*/
STATIC int
@@ -1147,6 +1282,7 @@ xfs_btree_log_keys(
XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
if (bp) {
+ xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp,
xfs_btree_key_offset(cur, first),
xfs_btree_key_offset(cur, last + 1) - 1);
@@ -1171,6 +1307,7 @@ xfs_btree_log_recs(
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
+ xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp,
xfs_btree_rec_offset(cur, first),
xfs_btree_rec_offset(cur, last + 1) - 1);
@@ -1195,6 +1332,7 @@ xfs_btree_log_ptrs(
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
int level = xfs_btree_get_level(block);
+ xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp,
xfs_btree_ptr_offset(cur, first, level),
xfs_btree_ptr_offset(cur, last + 1, level) - 1);
@@ -1223,7 +1361,12 @@ xfs_btree_log_block(
offsetof(struct xfs_btree_block, bb_numrecs),
offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
- XFS_BTREE_SBLOCK_LEN
+ offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
+ offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
+ XFS_BTREE_SBLOCK_CRC_LEN
};
static const short loffsets[] = { /* table of offsets (long) */
offsetof(struct xfs_btree_block, bb_magic),
@@ -1231,17 +1374,40 @@ xfs_btree_log_block(
offsetof(struct xfs_btree_block, bb_numrecs),
offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
- XFS_BTREE_LBLOCK_LEN
+ offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
+ offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
+ XFS_BTREE_LBLOCK_CRC_LEN
};
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
if (bp) {
+ int nbits;
+
+ if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
+ /*
+ * We don't log the CRC when updating a btree
+ * block but instead recreate it during log
+ * recovery. As the log buffers have checksums
+ * of their own this is safe and avoids logging a crc
+ * update in a lot of places.
+ */
+ if (fields == XFS_BB_ALL_BITS)
+ fields = XFS_BB_ALL_BITS_CRC;
+ nbits = XFS_BB_NUM_BITS_CRC;
+ } else {
+ nbits = XFS_BB_NUM_BITS;
+ }
xfs_btree_offsets(fields,
(cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
loffsets : soffsets,
- XFS_BB_NUM_BITS, &first, &last);
+ nbits, &first, &last);
+ xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp, first, last);
} else {
xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
@@ -2204,7 +2370,7 @@ xfs_btree_split(
goto error0;
/* Fill in the btree header for the new right block. */
- xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp);
+ xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
/*
* Split the entries between the old and the new block evenly.
@@ -2513,7 +2679,7 @@ xfs_btree_new_root(
nptr = 2;
}
/* Fill in the new block's btree header and log it. */
- xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp);
+ xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
!xfs_btree_ptr_is_null(cur, &rptr));
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index f932897194eb..6e6c915673fe 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -42,11 +42,15 @@ extern kmem_zone_t *xfs_btree_cur_zone;
* Generic btree header.
*
* This is a combination of the actual format used on disk for short and long
- * format btrees. The first three fields are shared by both format, but
- * the pointers are different and should be used with care.
+ * format btrees. The first three fields are shared by both format, but the
+ * pointers are different and should be used with care.
*
- * To get the size of the actual short or long form headers please use
- * the size macros below. Never use sizeof(xfs_btree_block).
+ * To get the size of the actual short or long form headers please use the size
+ * macros below. Never use sizeof(xfs_btree_block).
+ *
+ * The blkno, crc, lsn, owner and uuid fields are only available in filesystems
+ * with the crc feature bit, and all accesses to them must be conditional on
+ * that flag.
*/
struct xfs_btree_block {
__be32 bb_magic; /* magic number for block type */
@@ -56,10 +60,23 @@ struct xfs_btree_block {
struct {
__be32 bb_leftsib;
__be32 bb_rightsib;
+
+ __be64 bb_blkno;
+ __be64 bb_lsn;
+ uuid_t bb_uuid;
+ __be32 bb_owner;
+ __le32 bb_crc;
} s; /* short form pointers */
struct {
__be64 bb_leftsib;
__be64 bb_rightsib;
+
+ __be64 bb_blkno;
+ __be64 bb_lsn;
+ uuid_t bb_uuid;
+ __be64 bb_owner;
+ __le32 bb_crc;
+ __be32 bb_pad; /* padding for alignment */
} l; /* long form pointers */
} bb_u; /* rest */
};
@@ -67,6 +84,16 @@ struct xfs_btree_block {
#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */
#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */
+/* sizes of CRC enabled btree blocks */
+#define XFS_BTREE_SBLOCK_CRC_LEN (XFS_BTREE_SBLOCK_LEN + 40)
+#define XFS_BTREE_LBLOCK_CRC_LEN (XFS_BTREE_LBLOCK_LEN + 48)
+
+
+#define XFS_BTREE_SBLOCK_CRC_OFF \
+ offsetof(struct xfs_btree_block, bb_u.s.bb_crc)
+#define XFS_BTREE_LBLOCK_CRC_OFF \
+ offsetof(struct xfs_btree_block, bb_u.l.bb_crc)
+
/*
* Generic key, ptr and record wrapper structures.
@@ -101,13 +128,11 @@ union xfs_btree_rec {
#define XFS_BB_NUMRECS 0x04
#define XFS_BB_LEFTSIB 0x08
#define XFS_BB_RIGHTSIB 0x10
+#define XFS_BB_BLKNO 0x20
#define XFS_BB_NUM_BITS 5
#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1)
-
-/*
- * Magic numbers for btree blocks.
- */
-extern const __uint32_t xfs_magics[];
+#define XFS_BB_NUM_BITS_CRC 8
+#define XFS_BB_ALL_BITS_CRC ((1 << XFS_BB_NUM_BITS_CRC) - 1)
/*
* Generic stats interface
@@ -256,6 +281,7 @@ typedef struct xfs_btree_cur
#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */
#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
+#define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */
#define XFS_BTREE_NOERROR 0
@@ -393,8 +419,20 @@ xfs_btree_init_block(
__u32 magic,
__u16 level,
__u16 numrecs,
+ __u64 owner,
unsigned int flags);
+void
+xfs_btree_init_block_int(
+ struct xfs_mount *mp,
+ struct xfs_btree_block *buf,
+ xfs_daddr_t blkno,
+ __u32 magic,
+ __u16 level,
+ __u16 numrecs,
+ __u64 owner,
+ unsigned int flags);
+
/*
* Common btree core entry points.
*/
@@ -408,6 +446,14 @@ int xfs_btree_delete(struct xfs_btree_cur *, int *);
int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
/*
+ * btree block CRC helpers
+ */
+void xfs_btree_lblock_calc_crc(struct xfs_buf *);
+bool xfs_btree_lblock_verify_crc(struct xfs_buf *);
+void xfs_btree_sblock_calc_crc(struct xfs_buf *);
+bool xfs_btree_sblock_verify_crc(struct xfs_buf *);
+
+/*
* Internal btree helpers also used by xfs_bmap.c.
*/
void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index ee36c88ecfde..101ef8377f1b 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -24,19 +24,33 @@ extern kmem_zone_t *xfs_buf_item_zone;
* This flag indicates that the buffer contains on disk inodes
* and requires special recovery handling.
*/
-#define XFS_BLF_INODE_BUF 0x1
+#define XFS_BLF_INODE_BUF (1<<0)
/*
* This flag indicates that the buffer should not be replayed
* during recovery because its blocks are being freed.
*/
-#define XFS_BLF_CANCEL 0x2
+#define XFS_BLF_CANCEL (1<<1)
+
/*
* This flag indicates that the buffer contains on disk
* user or group dquots and may require special recovery handling.
*/
-#define XFS_BLF_UDQUOT_BUF 0x4
-#define XFS_BLF_PDQUOT_BUF 0x8
-#define XFS_BLF_GDQUOT_BUF 0x10
+#define XFS_BLF_UDQUOT_BUF (1<<2)
+#define XFS_BLF_PDQUOT_BUF (1<<3)
+#define XFS_BLF_GDQUOT_BUF (1<<4)
+
+/*
+ * all buffers now need flags to tell recovery where the magic number
+ * is so that it can verify and calculate the CRCs on the buffer correctly
+ * once the changes have been replayed into the buffer.
+ */
+#define XFS_BLF_BTREE_BUF (1<<5)
+
+#define XFS_BLF_TYPE_MASK \
+ (XFS_BLF_UDQUOT_BUF | \
+ XFS_BLF_PDQUOT_BUF | \
+ XFS_BLF_GDQUOT_BUF | \
+ XFS_BLF_BTREE_BUF)
#define XFS_BLF_CHUNK 128
#define XFS_BLF_SHIFT 7
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 88a3368ef124..6b5bd1745dbe 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -107,8 +107,8 @@ typedef enum xfs_dinode_fmt {
#define XFS_LITINO(mp, version) \
((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode)))
-#define XFS_BROOT_SIZE_ADJ \
- (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t))
+#define XFS_BROOT_SIZE_ADJ(ip) \
+ (XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t))
/*
* Inode data & attribute fork sizes, per inode.
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 2866b8c78b7a..6fe286a8e29b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -316,7 +316,13 @@ xfs_growfs_data_private(
goto error0;
}
- xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, 0);
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1,
+ agno, XFS_BTREE_CRC_BLOCKS);
+ else
+ xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1,
+ agno, 0);
+
arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
arec->ar_blockcount = cpu_to_be32(
@@ -339,7 +345,13 @@ xfs_growfs_data_private(
goto error0;
}
- xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, 0);
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1,
+ agno, XFS_BTREE_CRC_BLOCKS);
+ else
+ xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1,
+ agno, 0);
+
arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
arec->ar_blockcount = cpu_to_be32(
@@ -363,7 +375,12 @@ xfs_growfs_data_private(
goto error0;
}
- xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, 0);
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0,
+ agno, XFS_BTREE_CRC_BLOCKS);
+ else
+ xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0,
+ agno, 0);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index bec344b36507..c82ac8867421 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -34,6 +34,7 @@
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
+#include "xfs_cksum.h"
STATIC int
@@ -182,52 +183,88 @@ xfs_inobt_key_diff(
cur->bc_rec.i.ir_startino;
}
-void
+static int
xfs_inobt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_perag *pag = bp->b_pag;
unsigned int level;
- int sblock_ok; /* block passes checks */
- /* magic number and level verification */
- level = be16_to_cpu(block->bb_level);
- sblock_ok = block->bb_magic == cpu_to_be32(XFS_IBT_MAGIC) &&
- level < mp->m_in_maxlevels;
+ /*
+ * During growfs operations, we can't verify the exact owner as the
+ * perag is not fully initialised and hence not attached to the buffer.
+ *
+ * Similarly, during log recovery we will have a perag structure
+ * attached, but the agi information will not yet have been initialised
+ * from the on disk AGI. We don't currently use any of this information,
+ * but beware of the landmine (i.e. need to check pag->pagi_init) if we
+ * ever do.
+ */
+ switch (block->bb_magic) {
+ case cpu_to_be32(XFS_IBT_CRC_MAGIC):
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag &&
+ be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+ /* fall through */
+ case cpu_to_be32(XFS_IBT_MAGIC):
+ break;
+ default:
+ return 0;
+ }
- /* numrecs verification */
- sblock_ok = sblock_ok &&
- be16_to_cpu(block->bb_numrecs) <= mp->m_inobt_mxr[level != 0];
+ /* numrecs and level verification */
+ level = be16_to_cpu(block->bb_level);
+ if (level >= mp->m_in_maxlevels)
+ return false;
+ if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
+ return false;
/* sibling pointer verification */
- sblock_ok = sblock_ok &&
- (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
- be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
- block->bb_u.s.bb_leftsib &&
- (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
- be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
- block->bb_u.s.bb_rightsib;
-
- if (!sblock_ok) {
- trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+ if (!block->bb_u.s.bb_leftsib ||
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+ if (!block->bb_u.s.bb_rightsib ||
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+
+ return true;
}
static void
xfs_inobt_read_verify(
struct xfs_buf *bp)
{
- xfs_inobt_verify(bp);
+ if (!(xfs_btree_sblock_verify_crc(bp) &&
+ xfs_inobt_verify(bp))) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+ bp->b_target->bt_mount, bp->b_addr);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ }
}
static void
xfs_inobt_write_verify(
struct xfs_buf *bp)
{
- xfs_inobt_verify(bp);
+ if (!xfs_inobt_verify(bp)) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+ bp->b_target->bt_mount, bp->b_addr);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ }
+ xfs_btree_sblock_calc_crc(bp);
+
}
const struct xfs_buf_ops xfs_inobt_buf_ops = {
@@ -301,6 +338,8 @@ xfs_inobt_init_cursor(
cur->bc_blocklog = mp->m_sb.sb_blocklog;
cur->bc_ops = &xfs_inobt_ops;
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
cur->bc_private.a.agbp = agbp;
cur->bc_private.a.agno = agno;
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 25c0239a8eab..3ac36b7642e9 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -29,7 +29,8 @@ struct xfs_mount;
/*
* There is a btree for the inode map per allocation group.
*/
-#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
+#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
+#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */
typedef __uint64_t xfs_inofree_t;
#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
@@ -76,10 +77,10 @@ typedef __be32 xfs_inobt_ptr_t;
/*
* Btree block header size depends on a superblock flag.
- *
- * (not quite yet, but soon)
*/
-#define XFS_INOBT_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
+#define XFS_INOBT_BLOCK_LEN(mp) \
+ (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
+ XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
/*
* Record, key, and pointer address macros for btree blocks.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 4f201656d2d9..202ce37e66cb 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -786,6 +786,7 @@ xfs_iformat_btree(
xfs_dinode_t *dip,
int whichfork)
{
+ struct xfs_mount *mp = ip->i_mount;
xfs_bmdr_block_t *dfp;
xfs_ifork_t *ifp;
/* REFERENCED */
@@ -794,7 +795,7 @@ xfs_iformat_btree(
ifp = XFS_IFORK_PTR(ip, whichfork);
dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
- size = XFS_BMAP_BROOT_SPACE(dfp);
+ size = XFS_BMAP_BROOT_SPACE(mp, dfp);
nrecs = be16_to_cpu(dfp->bb_numrecs);
/*
@@ -805,14 +806,14 @@ xfs_iformat_btree(
* blocks.
*/
if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
- XFS_IFORK_MAXEXT(ip, whichfork) ||
+ XFS_IFORK_MAXEXT(ip, whichfork) ||
XFS_BMDR_SPACE_CALC(nrecs) >
- XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) ||
+ XFS_DFORK_SIZE(dip, mp, whichfork) ||
XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
- xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
- (unsigned long long) ip->i_ino);
+ xfs_warn(mp, "corrupt inode %Lu (btree).",
+ (unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
- ip->i_mount, dip);
+ mp, dip);
return XFS_ERROR(EFSCORRUPTED);
}
@@ -823,8 +824,7 @@ xfs_iformat_btree(
* Copy and convert from the on-disk structure
* to the in-memory structure.
*/
- xfs_bmdr_to_bmbt(ip->i_mount, dfp,
- XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
+ xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
ifp->if_broot, size);
ifp->if_flags &= ~XFS_IFEXTENTS;
ifp->if_flags |= XFS_IFBROOT;
@@ -2037,7 +2037,7 @@ xfs_iroot_realloc(
* allocate it now and get out.
*/
if (ifp->if_broot_bytes == 0) {
- new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
+ new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
ifp->if_broot_bytes = (int)new_size;
return;
@@ -2051,9 +2051,9 @@ xfs_iroot_realloc(
*/
cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
new_max = cur_max + rec_diff;
- new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
+ new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
- (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
+ XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
KM_SLEEP | KM_NOFS);
op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
ifp->if_broot_bytes);
@@ -2061,7 +2061,7 @@ xfs_iroot_realloc(
(int)new_size);
ifp->if_broot_bytes = (int)new_size;
ASSERT(ifp->if_broot_bytes <=
- XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
+ XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
return;
}
@@ -2076,7 +2076,7 @@ xfs_iroot_realloc(
new_max = cur_max + rec_diff;
ASSERT(new_max >= 0);
if (new_max > 0)
- new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
+ new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
else
new_size = 0;
if (new_size > 0) {
@@ -2084,7 +2084,8 @@ xfs_iroot_realloc(
/*
* First copy over the btree block header.
*/
- memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN);
+ memcpy(new_broot, ifp->if_broot,
+ XFS_BMBT_BLOCK_LEN(ip->i_mount));
} else {
new_broot = NULL;
ifp->if_flags &= ~XFS_IFBROOT;
@@ -2114,7 +2115,7 @@ xfs_iroot_realloc(
ifp->if_broot = new_broot;
ifp->if_broot_bytes = (int)new_size;
ASSERT(ifp->if_broot_bytes <=
- XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
+ XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
return;
}
@@ -2427,7 +2428,7 @@ xfs_iflush_fork(
ASSERT(ifp->if_broot != NULL);
ASSERT(ifp->if_broot_bytes <=
(XFS_IFORK_SIZE(ip, whichfork) +
- XFS_BROOT_SIZE_ADJ));
+ XFS_BROOT_SIZE_ADJ(ip)));
xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
(xfs_bmdr_block_t *)cp,
XFS_DFORK_SIZE(dip, mp, whichfork));
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 3ca3380c3afe..3762ce2e99fc 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -29,6 +29,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
@@ -1928,6 +1929,33 @@ xlog_recover_do_reg_buffer(
/* Shouldn't be any more regions */
ASSERT(i == item->ri_total);
+
+ switch (buf_f->blf_flags & XFS_BLF_TYPE_MASK) {
+ case XFS_BLF_BTREE_BUF:
+ switch (be32_to_cpu(*(__be32 *)bp->b_addr)) {
+ case XFS_ABTB_CRC_MAGIC:
+ case XFS_ABTC_CRC_MAGIC:
+ case XFS_ABTB_MAGIC:
+ case XFS_ABTC_MAGIC:
+ bp->b_ops = &xfs_allocbt_buf_ops;
+ break;
+ case XFS_IBT_CRC_MAGIC:
+ case XFS_IBT_MAGIC:
+ bp->b_ops = &xfs_inobt_buf_ops;
+ break;
+ case XFS_BMAP_CRC_MAGIC:
+ case XFS_BMAP_MAGIC:
+ bp->b_ops = &xfs_bmbt_buf_ops;
+ break;
+ default:
+ xfs_warn(mp, "Bad btree block magic!");
+ ASSERT(0);
+ break;
+ }
+ break;
+ default:
+ break;
+ }
}
/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index cd29f6171021..1b04fe59c603 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -505,6 +505,8 @@ void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
+void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *,
+ uint);
void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 3edf5dbee001..f950edd0d537 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -659,6 +659,7 @@ xfs_trans_binval(
ASSERT(XFS_BUF_ISSTALE(bp));
ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_TYPE_MASK));
ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
@@ -671,6 +672,7 @@ xfs_trans_binval(
bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
+ bip->__bli_format.blf_flags &= ~XFS_BLF_TYPE_MASK;
for (i = 0; i < bip->bli_format_count; i++) {
memset(bip->bli_formats[i].blf_data_map, 0,
(bip->bli_formats[i].blf_map_size * sizeof(uint)));
@@ -751,6 +753,26 @@ xfs_trans_inode_alloc_buf(
bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
}
+/*
+ * Set the type of the buffer for log recovery so that it can correctly identify
+ * and hence attach the correct buffer ops to the buffer after replay.
+ */
+void
+xfs_trans_buf_set_type(
+ struct xfs_trans *tp,
+ struct xfs_buf *bp,
+ uint type)
+{
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
+ ASSERT(atomic_read(&bip->bli_refcount) > 0);
+ ASSERT((type & XFS_BLF_TYPE_MASK) != 0);
+
+ bip->__bli_format.blf_flags &= ~XFS_BLF_TYPE_MASK;
+ bip->__bli_format.blf_flags |= type;
+}
/*
* Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of
@@ -769,14 +791,9 @@ xfs_trans_dquot_buf(
xfs_buf_t *bp,
uint type)
{
- xfs_buf_log_item_t *bip = bp->b_fspriv;
-
- ASSERT(bp->b_transp == tp);
- ASSERT(bip != NULL);
ASSERT(type == XFS_BLF_UDQUOT_BUF ||
type == XFS_BLF_PDQUOT_BUF ||
type == XFS_BLF_GDQUOT_BUF);
- ASSERT(atomic_read(&bip->bli_refcount) > 0);
- bip->__bli_format.blf_flags |= type;
+ xfs_trans_buf_set_type(tp, bp, type);
}