summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c105
1 files changed, 57 insertions, 48 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7f7be5f98f52..bb262c25c8de 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -896,7 +896,6 @@ xfs_dinode_to_disk(
to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
- to->di_flushiter = cpu_to_be16(from->di_flushiter);
to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
@@ -924,6 +923,9 @@ xfs_dinode_to_disk(
to->di_lsn = cpu_to_be64(from->di_lsn);
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &from->di_uuid);
+ to->di_flushiter = 0;
+ } else {
+ to->di_flushiter = cpu_to_be16(from->di_flushiter);
}
}
@@ -1028,6 +1030,15 @@ xfs_dinode_calc_crc(
/*
* Read the disk inode attributes into the in-core inode structure.
+ *
+ * For version 5 superblocks, if we are initialising a new inode and we are not
+ * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
+ * inode core with a random generation number. If we are keeping inodes around,
+ * we need to read the inode cluster to get the existing generation number off
+ * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
+ * format) then log recovery is dependent on the di_flushiter field being
+ * initialised from the current on-disk value and hence we must also read the
+ * inode off disk.
*/
int
xfs_iread(
@@ -1047,6 +1058,23 @@ xfs_iread(
if (error)
return error;
+ /* shortcut IO on inode allocation if possible */
+ if ((iget_flags & XFS_IGET_CREATE) &&
+ xfs_sb_version_hascrc(&mp->m_sb) &&
+ !(mp->m_flags & XFS_MOUNT_IKEEP)) {
+ /* initialise the on-disk inode core */
+ memset(&ip->i_d, 0, sizeof(ip->i_d));
+ ip->i_d.di_magic = XFS_DINODE_MAGIC;
+ ip->i_d.di_gen = prandom_u32();
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ ip->i_d.di_version = 3;
+ ip->i_d.di_ino = ip->i_ino;
+ uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
+ } else
+ ip->i_d.di_version = 2;
+ return 0;
+ }
+
/*
* Get pointers to the on-disk inode and the buffer containing it.
*/
@@ -1133,17 +1161,16 @@ xfs_iread(
xfs_buf_set_ref(bp, XFS_INO_REF);
/*
- * Use xfs_trans_brelse() to release the buffer containing the
- * on-disk inode, because it was acquired with xfs_trans_read_buf()
- * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal
+ * Use xfs_trans_brelse() to release the buffer containing the on-disk
+ * inode, because it was acquired with xfs_trans_read_buf() in
+ * xfs_imap_to_bp() above. If tp is NULL, this is just a normal
* brelse(). If we're within a transaction, then xfs_trans_brelse()
* will only release the buffer if it is not dirty within the
* transaction. It will be OK to release the buffer in this case,
- * because inodes on disk are never destroyed and we will be
- * locking the new in-core inode before putting it in the hash
- * table where other processes can find it. Thus we don't have
- * to worry about the inode being changed just because we released
- * the buffer.
+ * because inodes on disk are never destroyed and we will be locking the
+ * new in-core inode before putting it in the cache where other
+ * processes can find it. Thus we don't have to worry about the inode
+ * being changed just because we released the buffer.
*/
out_brelse:
xfs_trans_brelse(tp, bp);
@@ -2028,8 +2055,6 @@ xfs_ifree(
int error;
int delete;
xfs_ino_t first_ino;
- xfs_dinode_t *dip;
- xfs_buf_t *ibp;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(ip->i_d.di_nlink == 0);
@@ -2042,14 +2067,13 @@ xfs_ifree(
* Pull the on-disk inode from the AGI unlinked list.
*/
error = xfs_iunlink_remove(tp, ip);
- if (error != 0) {
+ if (error)
return error;
- }
error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
- if (error != 0) {
+ if (error)
return error;
- }
+
ip->i_d.di_mode = 0; /* mark incore inode as free */
ip->i_d.di_flags = 0;
ip->i_d.di_dmevmask = 0;
@@ -2061,31 +2085,10 @@ xfs_ifree(
* by reincarnations of this inode.
*/
ip->i_d.di_gen++;
-
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp,
- 0, 0);
- if (error)
- return error;
-
- /*
- * Clear the on-disk di_mode. This is to prevent xfs_bulkstat
- * from picking up this inode when it is reclaimed (its incore state
- * initialzed but not flushed to disk yet). The in-core di_mode is
- * already cleared and a corresponding transaction logged.
- * The hack here just synchronizes the in-core to on-disk
- * di_mode value in advance before the actual inode sync to disk.
- * This is OK because the inode is already unlinked and would never
- * change its di_mode again for this inode generation.
- * This is a temporary hack that would require a proper fix
- * in the future.
- */
- dip->di_mode = 0;
-
- if (delete) {
+ if (delete)
error = xfs_ifree_cluster(ip, tp, first_ino);
- }
return error;
}
@@ -2160,8 +2163,8 @@ xfs_iroot_realloc(
np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
(int)new_size);
ifp->if_broot_bytes = (int)new_size;
- ASSERT(ifp->if_broot_bytes <=
- XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
+ ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+ XFS_IFORK_SIZE(ip, whichfork));
memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
return;
}
@@ -2214,8 +2217,9 @@ xfs_iroot_realloc(
kmem_free(ifp->if_broot);
ifp->if_broot = new_broot;
ifp->if_broot_bytes = (int)new_size;
- ASSERT(ifp->if_broot_bytes <=
- XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
+ if (ifp->if_broot)
+ ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+ XFS_IFORK_SIZE(ip, whichfork));
return;
}
@@ -2526,9 +2530,8 @@ xfs_iflush_fork(
if ((iip->ili_fields & brootflag[whichfork]) &&
(ifp->if_broot_bytes > 0)) {
ASSERT(ifp->if_broot != NULL);
- ASSERT(ifp->if_broot_bytes <=
- (XFS_IFORK_SIZE(ip, whichfork) +
- XFS_BROOT_SIZE_ADJ(ip)));
+ ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+ XFS_IFORK_SIZE(ip, whichfork));
xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
(xfs_bmdr_block_t *)cp,
XFS_DFORK_SIZE(dip, mp, whichfork));
@@ -2886,12 +2889,18 @@ xfs_iflush_int(
__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
goto corrupt_out;
}
+
/*
- * bump the flush iteration count, used to detect flushes which
- * postdate a log record during recovery. This is redundant as we now
- * log every change and hence this can't happen. Still, it doesn't hurt.
+ * Inode item log recovery for v1/v2 inodes are dependent on the
+ * di_flushiter count for correct sequencing. We bump the flush
+ * iteration count so we can detect flushes which postdate a log record
+ * during recovery. This is redundant as we now log every change and
+ * hence this can't happen but we need to still do it to ensure
+ * backwards compatibility with old kernels that predate logging all
+ * inode changes.
*/
- ip->i_d.di_flushiter++;
+ if (ip->i_d.di_version < 3)
+ ip->i_d.di_flushiter++;
/*
* Copy the dirty parts of the inode into the on-disk