summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_iget.c49
-rw-r--r--fs/xfs/xfs_inode.c268
-rw-r--r--fs/xfs/xfs_inode.h16
-rw-r--r--fs/xfs/xfs_vfsops.c5
4 files changed, 156 insertions, 182 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8e09b71f4104..e657c5128460 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -78,7 +78,6 @@ xfs_iget_core(
xfs_inode_t *ip;
xfs_inode_t *iq;
int error;
- xfs_icluster_t *icl, *new_icl = NULL;
unsigned long first_index, mask;
xfs_perag_t *pag;
xfs_agino_t agino;
@@ -229,11 +228,9 @@ finish_inode:
}
/*
- * This is a bit messy - we preallocate everything we _might_
- * need before we pick up the ici lock. That way we don't have to
- * juggle locks and go all the way back to the start.
+ * Preload the radix tree so we can insert safely under the
+ * write spinlock.
*/
- new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP);
if (radix_tree_preload(GFP_KERNEL)) {
xfs_idestroy(ip);
delay(1);
@@ -242,17 +239,6 @@ finish_inode:
mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
first_index = agino & mask;
write_lock(&pag->pag_ici_lock);
-
- /*
- * Find the cluster if it exists
- */
- icl = NULL;
- if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
- first_index, 1)) {
- if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index)
- icl = iq->i_cluster;
- }
-
/*
* insert the new inode
*/
@@ -267,30 +253,13 @@ finish_inode:
}
/*
- * These values _must_ be set before releasing ihlock!
+ * These values _must_ be set before releasing the radix tree lock!
*/
ip->i_udquot = ip->i_gdquot = NULL;
xfs_iflags_set(ip, XFS_INEW);
- ASSERT(ip->i_cluster == NULL);
-
- if (!icl) {
- spin_lock_init(&new_icl->icl_lock);
- INIT_HLIST_HEAD(&new_icl->icl_inodes);
- icl = new_icl;
- new_icl = NULL;
- } else {
- ASSERT(!hlist_empty(&icl->icl_inodes));
- }
- spin_lock(&icl->icl_lock);
- hlist_add_head(&ip->i_cnode, &icl->icl_inodes);
- ip->i_cluster = icl;
- spin_unlock(&icl->icl_lock);
-
write_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
- if (new_icl)
- kmem_zone_free(xfs_icluster_zone, new_icl);
/*
* Link ip to its mount and thread it on the mount's inode list.
@@ -529,18 +498,6 @@ xfs_iextract(
xfs_put_perag(mp, pag);
/*
- * Remove from cluster list
- */
- mp = ip->i_mount;
- spin_lock(&ip->i_cluster->icl_lock);
- hlist_del(&ip->i_cnode);
- spin_unlock(&ip->i_cluster->icl_lock);
-
- /* was last inode in cluster? */
- if (hlist_empty(&ip->i_cluster->icl_inodes))
- kmem_zone_free(xfs_icluster_zone, ip->i_cluster);
-
- /*
* Remove from mount's inode list.
*/
XFS_MOUNT_ILOCK(mp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3c3e9e3c1da8..040c0e41729b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -55,7 +55,6 @@
kmem_zone_t *xfs_ifork_zone;
kmem_zone_t *xfs_inode_zone;
-kmem_zone_t *xfs_icluster_zone;
/*
* Used in xfs_itruncate(). This is the maximum number of extents
@@ -2994,6 +2993,153 @@ xfs_iflush_fork(
return 0;
}
+STATIC int
+xfs_iflush_cluster(
+ xfs_inode_t *ip,
+ xfs_buf_t *bp)
+{
+ xfs_mount_t *mp = ip->i_mount;
+ xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
+ unsigned long first_index, mask;
+ int ilist_size;
+ xfs_inode_t **ilist;
+ xfs_inode_t *iq;
+ xfs_inode_log_item_t *iip;
+ int nr_found;
+ int clcount = 0;
+ int bufwasdelwri;
+ int i;
+
+ ASSERT(pag->pagi_inodeok);
+ ASSERT(pag->pag_ici_init);
+
+ ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *);
+ ilist = kmem_alloc(ilist_size, KM_MAYFAIL);
+ if (!ilist)
+ return 0;
+
+ mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
+ read_lock(&pag->pag_ici_lock);
+ /* really need a gang lookup range call here */
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
+ first_index,
+ XFS_INODE_CLUSTER_SIZE(mp));
+ if (nr_found == 0)
+ goto out_free;
+
+ for (i = 0; i < nr_found; i++) {
+ iq = ilist[i];
+ if (iq == ip)
+ continue;
+ /* if the inode lies outside this cluster, we're done. */
+ if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
+ break;
+ /*
+ * Do an un-protected check to see if the inode is dirty and
+ * is a candidate for flushing. These checks will be repeated
+ * later after the appropriate locks are acquired.
+ */
+ iip = iq->i_itemp;
+ if ((iq->i_update_core == 0) &&
+ ((iip == NULL) ||
+ !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
+ xfs_ipincount(iq) == 0) {
+ continue;
+ }
+
+ /*
+ * Try to get locks. If any are unavailable or it is pinned,
+ * then this inode cannot be flushed and is skipped.
+ */
+
+ if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
+ continue;
+ if (!xfs_iflock_nowait(iq)) {
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ continue;
+ }
+ if (xfs_ipincount(iq)) {
+ xfs_ifunlock(iq);
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ continue;
+ }
+
+ /*
+ * arriving here means that this inode can be flushed. First
+ * re-check that it's dirty before flushing.
+ */
+ iip = iq->i_itemp;
+ if ((iq->i_update_core != 0) || ((iip != NULL) &&
+ (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
+ int error;
+ error = xfs_iflush_int(iq, bp);
+ if (error) {
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ goto cluster_corrupt_out;
+ }
+ clcount++;
+ } else {
+ xfs_ifunlock(iq);
+ }
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ }
+
+ if (clcount) {
+ XFS_STATS_INC(xs_icluster_flushcnt);
+ XFS_STATS_ADD(xs_icluster_flushinode, clcount);
+ }
+
+out_free:
+ read_unlock(&pag->pag_ici_lock);
+ kmem_free(ilist, ilist_size);
+ return 0;
+
+
+cluster_corrupt_out:
+ /*
+ * Corruption detected in the clustering loop. Invalidate the
+ * inode buffer and shut down the filesystem.
+ */
+ read_unlock(&pag->pag_ici_lock);
+ /*
+ * Clean up the buffer. If it was B_DELWRI, just release it --
+ * brelse can handle it with no problems. If not, shut down the
+ * filesystem before releasing the buffer.
+ */
+ bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
+ if (bufwasdelwri)
+ xfs_buf_relse(bp);
+
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+
+ if (!bufwasdelwri) {
+ /*
+ * Just like incore_relse: if we have b_iodone functions,
+ * mark the buffer as an error and call them. Otherwise
+ * mark it as stale and brelse.
+ */
+ if (XFS_BUF_IODONE_FUNC(bp)) {
+ XFS_BUF_CLR_BDSTRAT_FUNC(bp);
+ XFS_BUF_UNDONE(bp);
+ XFS_BUF_STALE(bp);
+ XFS_BUF_SHUT(bp);
+ XFS_BUF_ERROR(bp,EIO);
+ xfs_biodone(bp);
+ } else {
+ XFS_BUF_STALE(bp);
+ xfs_buf_relse(bp);
+ }
+ }
+
+ /*
+ * Unlocks the flush lock
+ */
+ xfs_iflush_abort(iq);
+ kmem_free(ilist, ilist_size);
+ return XFS_ERROR(EFSCORRUPTED);
+}
+
/*
* xfs_iflush() will write a modified inode's changes out to the
* inode's on disk home. The caller must have the inode lock held
@@ -3013,13 +3159,8 @@ xfs_iflush(
xfs_dinode_t *dip;
xfs_mount_t *mp;
int error;
- /* REFERENCED */
- xfs_inode_t *iq;
- int clcount; /* count of inodes clustered */
- int bufwasdelwri;
- struct hlist_node *entry;
- enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
+ enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
XFS_STATS_INC(xs_iflush_count);
@@ -3138,9 +3279,8 @@ xfs_iflush(
* First flush out the inode that xfs_iflush was called with.
*/
error = xfs_iflush_int(ip, bp);
- if (error) {
+ if (error)
goto corrupt_out;
- }
/*
* If the buffer is pinned then push on the log now so we won't
@@ -3153,70 +3293,9 @@ xfs_iflush(
* inode clustering:
* see if other inodes can be gathered into this write
*/
- spin_lock(&ip->i_cluster->icl_lock);
- ip->i_cluster->icl_buf = bp;
-
- clcount = 0;
- hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
- if (iq == ip)
- continue;
-
- /*
- * Do an un-protected check to see if the inode is dirty and
- * is a candidate for flushing. These checks will be repeated
- * later after the appropriate locks are acquired.
- */
- iip = iq->i_itemp;
- if ((iq->i_update_core == 0) &&
- ((iip == NULL) ||
- !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
- xfs_ipincount(iq) == 0) {
- continue;
- }
-
- /*
- * Try to get locks. If any are unavailable,
- * then this inode cannot be flushed and is skipped.
- */
-
- /* get inode locks (just i_lock) */
- if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) {
- /* get inode flush lock */
- if (xfs_iflock_nowait(iq)) {
- /* check if pinned */
- if (xfs_ipincount(iq) == 0) {
- /* arriving here means that
- * this inode can be flushed.
- * first re-check that it's
- * dirty
- */
- iip = iq->i_itemp;
- if ((iq->i_update_core != 0)||
- ((iip != NULL) &&
- (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
- clcount++;
- error = xfs_iflush_int(iq, bp);
- if (error) {
- xfs_iunlock(iq,
- XFS_ILOCK_SHARED);
- goto cluster_corrupt_out;
- }
- } else {
- xfs_ifunlock(iq);
- }
- } else {
- xfs_ifunlock(iq);
- }
- }
- xfs_iunlock(iq, XFS_ILOCK_SHARED);
- }
- }
- spin_unlock(&ip->i_cluster->icl_lock);
-
- if (clcount) {
- XFS_STATS_INC(xs_icluster_flushcnt);
- XFS_STATS_ADD(xs_icluster_flushinode, clcount);
- }
+ error = xfs_iflush_cluster(ip, bp);
+ if (error)
+ goto cluster_corrupt_out;
if (flags & INT_DELWRI) {
xfs_bdwrite(mp, bp);
@@ -3230,52 +3309,11 @@ xfs_iflush(
corrupt_out:
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- xfs_iflush_abort(ip);
- /*
- * Unlocks the flush lock
- */
- return XFS_ERROR(EFSCORRUPTED);
-
cluster_corrupt_out:
- /* Corruption detected in the clustering loop. Invalidate the
- * inode buffer and shut down the filesystem.
- */
- spin_unlock(&ip->i_cluster->icl_lock);
-
- /*
- * Clean up the buffer. If it was B_DELWRI, just release it --
- * brelse can handle it with no problems. If not, shut down the
- * filesystem before releasing the buffer.
- */
- if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) {
- xfs_buf_relse(bp);
- }
-
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-
- if(!bufwasdelwri) {
- /*
- * Just like incore_relse: if we have b_iodone functions,
- * mark the buffer as an error and call them. Otherwise
- * mark it as stale and brelse.
- */
- if (XFS_BUF_IODONE_FUNC(bp)) {
- XFS_BUF_CLR_BDSTRAT_FUNC(bp);
- XFS_BUF_UNDONE(bp);
- XFS_BUF_STALE(bp);
- XFS_BUF_SHUT(bp);
- XFS_BUF_ERROR(bp,EIO);
- xfs_biodone(bp);
- } else {
- XFS_BUF_STALE(bp);
- xfs_buf_relse(bp);
- }
- }
-
- xfs_iflush_abort(iq);
/*
* Unlocks the flush lock
*/
+ xfs_iflush_abort(ip);
return XFS_ERROR(EFSCORRUPTED);
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index c3bfffca9214..93c37697a72c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -133,19 +133,6 @@ typedef struct dm_attrs_s {
} dm_attrs_t;
/*
- * This is the xfs inode cluster structure. This structure is used by
- * xfs_iflush to find inodes that share a cluster and can be flushed to disk at
- * the same time.
- */
-typedef struct xfs_icluster {
- struct hlist_head icl_inodes; /* list of inodes on cluster */
- xfs_daddr_t icl_blkno; /* starting block number of
- * the cluster */
- struct xfs_buf *icl_buf; /* the inode buffer */
- spinlock_t icl_lock; /* inode list lock */
-} xfs_icluster_t;
-
-/*
* This is the xfs in-core inode structure.
* Most of the on-disk inode is embedded in the i_d field.
*
@@ -248,8 +235,6 @@ typedef struct xfs_inode {
unsigned int i_delayed_blks; /* count of delay alloc blks */
xfs_icdinode_t i_d; /* most of ondisk inode */
- xfs_icluster_t *i_cluster; /* cluster list header */
- struct hlist_node i_cnode; /* cluster link node */
xfs_fsize_t i_size; /* in-memory size */
xfs_fsize_t i_new_size; /* size when write completes */
@@ -594,7 +579,6 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#define xfs_inobp_check(mp, bp)
#endif /* DEBUG */
-extern struct kmem_zone *xfs_icluster_zone;
extern struct kmem_zone *xfs_ifork_zone;
extern struct kmem_zone *xfs_inode_zone;
extern struct kmem_zone *xfs_ili_zone;
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 79bdfb3d6081..3ec27bf8531c 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -112,9 +112,6 @@ xfs_init(void)
xfs_ili_zone =
kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
KM_ZONE_SPREAD, NULL);
- xfs_icluster_zone =
- kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster",
- KM_ZONE_SPREAD, NULL);
/*
* Allocate global trace buffers.
@@ -152,7 +149,6 @@ xfs_cleanup(void)
extern kmem_zone_t *xfs_inode_zone;
extern kmem_zone_t *xfs_efd_zone;
extern kmem_zone_t *xfs_efi_zone;
- extern kmem_zone_t *xfs_icluster_zone;
xfs_cleanup_procfs();
xfs_sysctl_unregister();
@@ -187,7 +183,6 @@ xfs_cleanup(void)
kmem_zone_destroy(xfs_efi_zone);
kmem_zone_destroy(xfs_ifork_zone);
kmem_zone_destroy(xfs_ili_zone);
- kmem_zone_destroy(xfs_icluster_zone);
}
/*