From 40ac218f52aa5cac7dc8082f28b61c8b2b29373c Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 2 Aug 2011 13:17:27 +0100
Subject: GFS2: Fix inode allocation error path

If we have got far enough through the inode allocation code
path that an inode has already been allocated, then we must
call iput to dispose of it, if an error occurs during a
later part of the process. This will always be the final iput
since there will be no other references to the inode.

Unlike when the inode has been unlinked, its block state will
be GFS2_BLKST_INODE rather than GFS2_BLKST_UNLINKED so we need
to skip the test in ->evict_inode() for this one case in order
to ensure that it will be deallocated correctly. This patch adds
a new flag in order to ensure that this will happen correctly.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs/gfs2/inode.c')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 900cf986aadc..044efe273b97 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -736,10 +736,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 
 fail_gunlock2:
 	gfs2_glock_dq_uninit(ghs + 1);
-	if (inode && !IS_ERR(inode))
-		iput(inode);
 fail_gunlock:
 	gfs2_glock_dq_uninit(ghs);
+	if (inode && !IS_ERR(inode)) {
+		set_bit(GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags);
+		iput(inode);
+	}
 fail:
 	if (bh)
 		brelse(bh);
-- 
cgit v1.2.3


From ab9bbda0204dfd0e5342562d9979d1241b14ea5f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 15 Aug 2011 14:20:36 +0100
Subject: GFS2: Use ->dirty_inode()

The aim of this patch is to use the newly enhanced ->dirty_inode()
super block operation to deal with atime updates, rather than
piggy backing that code into ->write_inode() as is currently
done.

The net result is a simplification of the code in various places
and a reduction of the number of gfs2_dinode_out() calls since
this is now implied by ->dirty_inode().

Some of the mark_inode_dirty() calls have been moved under glocks
in order to take advantage of then being able to avoid locking in
->dirty_inode() when we already have suitable locks.

One consequence is that generic_write_end() now correctly deals
with file size updates, so that we do not need a separate check
for that afterwards. This also, indirectly, means that fdatasync
should work correctly on GFS2 - the current code always syncs the
metadata whether it needs to or not.

Has survived testing with postmark (with and without atime) and
also fsx.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 43 +++++++++++++------------------------------
 1 file changed, 13 insertions(+), 30 deletions(-)

(limited to 'fs/gfs2/inode.c')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 044efe273b97..a0b53d3bd8fa 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -729,8 +729,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		gfs2_inplace_release(dip);
 	gfs2_quota_unlock(dip);
 	gfs2_alloc_put(dip);
-	gfs2_glock_dq_uninit_m(2, ghs);
 	mark_inode_dirty(inode);
+	gfs2_glock_dq_uninit_m(2, ghs);
 	d_instantiate(dentry, inode);
 	return 0;
 
@@ -926,8 +926,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	inc_nlink(&ip->i_inode);
 	ip->i_inode.i_ctime = CURRENT_TIME;
-	gfs2_dinode_out(ip, dibh->b_data);
-	mark_inode_dirty(&ip->i_inode);
+	ihold(inode);
+	d_instantiate(dentry, inode);
+	mark_inode_dirty(inode);
 
 out_brelse:
 	brelse(dibh);
@@ -949,11 +950,6 @@ out_child:
 out_parent:
 	gfs2_holder_uninit(ghs);
 	gfs2_holder_uninit(ghs + 1);
-	if (!error) {
-		ihold(inode);
-		d_instantiate(dentry, inode);
-		mark_inode_dirty(inode);
-	}
 	return error;
 }
 
@@ -1026,8 +1022,6 @@ static int gfs2_unlink_inode(struct gfs2_inode *dip,
 		clear_nlink(inode);
 	else
 		drop_nlink(inode);
-	gfs2_trans_add_bh(ip->i_gl, bh, 1);
-	gfs2_dinode_out(ip, bh->b_data);
 	mark_inode_dirty(inode);
 	if (inode->i_nlink == 0)
 		gfs2_unlink_di(inode);
@@ -1565,21 +1559,10 @@ int gfs2_permission(struct inode *inode, int mask)
 	return error;
 }
 
-static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
+static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
 {
-	struct inode *inode = &ip->i_inode;
-	struct buffer_head *dibh;
-	int error;
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		return error;
-
 	setattr_copy(inode, attr);
 	mark_inode_dirty(inode);
-	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(ip, dibh->b_data);
-	brelse(dibh);
 	return 0;
 }
 
@@ -1591,19 +1574,19 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
  * Returns: errno
  */
 
-int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
+int gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
 {
 	int error;
 
 	if (current->journal_info)
-		return __gfs2_setattr_simple(ip, attr);
+		return __gfs2_setattr_simple(inode, attr);
 
-	error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0);
+	error = gfs2_trans_begin(GFS2_SB(inode), RES_DINODE, 0);
 	if (error)
 		return error;
 
-	error = __gfs2_setattr_simple(ip, attr);
-	gfs2_trans_end(GFS2_SB(&ip->i_inode));
+	error = __gfs2_setattr_simple(inode, attr);
+	gfs2_trans_end(GFS2_SB(inode));
 	return error;
 }
 
@@ -1641,7 +1624,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	if (error)
 		goto out_gunlock_q;
 
-	error = gfs2_setattr_simple(ip, attr);
+	error = gfs2_setattr_simple(inode, attr);
 	if (error)
 		goto out_end_trans;
 
@@ -1697,12 +1680,12 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
 		error = gfs2_acl_chmod(ip, attr);
 	else
-		error = gfs2_setattr_simple(ip, attr);
+		error = gfs2_setattr_simple(inode, attr);
 
 out:
-	gfs2_glock_dq_uninit(&i_gh);
 	if (!error)
 		mark_inode_dirty(inode);
+	gfs2_glock_dq_uninit(&i_gh);
 	return error;
 }
 
-- 
cgit v1.2.3


From 9a63edd12ba3c18351f00d6b77a6b2f49f2b8eb6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 18 Aug 2011 14:35:53 +0100
Subject: GFS2: Clean up gfs2_create

If we pass through knowledge of whether the creation is intended to be
exclusive or not, then we can deal with that in gfs2_create_inode
and remove one set of locking. Also this removes the loop in
gfs2_create and simplifies the code a bit.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

(limited to 'fs/gfs2/inode.c')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a0b53d3bd8fa..2af69056a9fd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -663,7 +663,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
 
 static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 			     unsigned int mode, dev_t dev, const char *symname,
-			     unsigned int size)
+			     unsigned int size, int excl)
 {
 	const struct qstr *name = &dentry->d_name;
 	struct gfs2_holder ghs[2];
@@ -683,6 +683,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		goto fail;
 
 	error = create_ok(dip, name, mode);
+	if ((error == -EEXIST) && S_ISREG(mode) && !excl) {
+		inode = gfs2_lookupi(dir, &dentry->d_name, 0);
+		gfs2_glock_dq_uninit(ghs);
+		d_instantiate(dentry, inode);
+		return IS_ERR(inode) ? PTR_ERR(inode) : 0;
+	}
 	if (error)
 		goto fail_gunlock;
 
@@ -760,24 +766,10 @@ fail:
 static int gfs2_create(struct inode *dir, struct dentry *dentry,
 		       int mode, struct nameidata *nd)
 {
-	struct inode *inode;
-	int ret;
-
-	for (;;) {
-		ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0);
-		if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL)))
-			return ret;
-
-		inode = gfs2_lookupi(dir, &dentry->d_name, 0);
-		if (inode) {
-			if (!IS_ERR(inode))
-				break;
-			return PTR_ERR(inode);
-		}
-	}
-
-	d_instantiate(dentry, inode);
-	return 0;
+	int excl = 0;
+	if (nd && (nd->flags & LOOKUP_EXCL))
+		excl = 1;
+	return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl);
 }
 
 /**
@@ -1135,7 +1127,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 	if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
 		return -ENAMETOOLONG;
 
-	return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size);
+	return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size, 0);
 }
 
 /**
@@ -1149,7 +1141,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 
 static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
-	return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0);
+	return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0);
 }
 
 /**
@@ -1164,7 +1156,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
 		      dev_t dev)
 {
-	return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0);
+	return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0);
 }
 
 /*
-- 
cgit v1.2.3


From 8339ee543ece6e2dcc1bbd97d5350163c198cf00 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 31 Aug 2011 16:38:29 +0100
Subject: GFS2: Make resource groups "append only" during life of fs

Since we have ruled out supporting online filesystem shrink,
it is possible to make the resource group list append only
during the life of a super block. This gives several benefits:

Firstly, we only need to read new rindex elements as they are added
rather than needing to reread the whole rindex file each time one
element is added.

Secondly, the rindex glock can be held for much shorter periods of
time, and is completely removed from the fast path for allocations.
The lock is taken in shared mode only when updating the resource
groups when the first allocation occurs, and after a grow has
taken place.

Thirdly, this results in a reduction in code size, and everything
gets a lot simpler to understand in this area.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

(limited to 'fs/gfs2/inode.c')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2af69056a9fd..29703dd97dc9 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1041,13 +1041,8 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
 	struct buffer_head *bh;
 	struct gfs2_holder ghs[3];
 	struct gfs2_rgrpd *rgd;
-	struct gfs2_holder ri_gh;
 	int error;
 
-	error = gfs2_rindex_hold(sdp, &ri_gh);
-	if (error)
-		return error;
-
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);
 
@@ -1104,7 +1099,6 @@ out_child:
 	gfs2_glock_dq(ghs);
 out_parent:
 	gfs2_holder_uninit(ghs);
-	gfs2_glock_dq_uninit(&ri_gh);
 	return error;
 }
 
@@ -1222,7 +1216,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
 	struct gfs2_inode *nip = NULL;
 	struct gfs2_sbd *sdp = GFS2_SB(odir);
-	struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
+	struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
 	struct gfs2_rgrpd *nrgd;
 	unsigned int num_gh;
 	int dir_rename = 0;
@@ -1236,10 +1230,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			return 0;
 	}
 
-	error = gfs2_rindex_hold(sdp, &ri_gh);
-	if (error)
-		return error;
-
 	if (odip != ndip) {
 		error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
 					   0, &r_gh);
@@ -1376,7 +1366,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 
 		al->al_requested = sdp->sd_max_dirres;
 
-		error = gfs2_inplace_reserve_ri(ndip);
+		error = gfs2_inplace_reserve(ndip);
 		if (error)
 			goto out_gunlock_q;
 
@@ -1447,7 +1437,6 @@ out_gunlock_r:
 	if (r_gh.gh_gl)
 		gfs2_glock_dq_uninit(&r_gh);
 out:
-	gfs2_glock_dq_uninit(&ri_gh);
 	return error;
 }
 
-- 
cgit v1.2.3


From 54335b1fca27b84baa75b1f45985d98262003837 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 1 Sep 2011 13:31:59 +0100
Subject: GFS2: Cache the most recently used resource group in the inode

This means that after the initial allocation for any inode, the
last used resource group is cached in the inode for future use.
This drastically reduces the number of lookups of resource
groups in the common case, and this the contention on that
data structure.

The allocation algorithm is the same as previously, except that we
always check to see if the goal block is within the cached rgrp
first before going to the rbtree to look one up.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'fs/gfs2/inode.c')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 29703dd97dc9..55b3bbaf2f25 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -583,7 +583,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 			goto fail_quota_locks;
 
 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 al->al_rgd->rd_length +
+					 dip->i_rgd->rd_length +
 					 2 * RES_DINODE +
 					 RES_STATFS + RES_QUOTA, 0);
 		if (error)
@@ -613,8 +613,7 @@ fail_end_trans:
 	gfs2_trans_end(sdp);
 
 fail_ipreserv:
-	if (dip->i_alloc->al_rgd)
-		gfs2_inplace_release(dip);
+	gfs2_inplace_release(dip);
 
 fail_quota_locks:
 	gfs2_quota_unlock(dip);
@@ -731,8 +730,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		brelse(bh);
 
 	gfs2_trans_end(sdp);
-	if (dip->i_alloc->al_rgd)
-		gfs2_inplace_release(dip);
+	gfs2_inplace_release(dip);
 	gfs2_quota_unlock(dip);
 	gfs2_alloc_put(dip);
 	mark_inode_dirty(inode);
@@ -896,7 +894,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 			goto out_gunlock_q;
 
 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 gfs2_rg_blocks(al) +
+					 gfs2_rg_blocks(dip) +
 					 2 * RES_DINODE + RES_STATFS +
 					 RES_QUOTA, 0);
 		if (error)
@@ -1371,7 +1369,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			goto out_gunlock_q;
 
 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 gfs2_rg_blocks(al) +
+					 gfs2_rg_blocks(ndip) +
 					 4 * RES_DINODE + 4 * RES_LEAF +
 					 RES_STATFS + RES_QUOTA + 4, 0);
 		if (error)
-- 
cgit v1.2.3