GFS2: Add test for resource group congestion status

This patch uses information gathered by the recent glock statistics patch in order to derrive a boolean verdict on the congestion status of a resource group. This is then used when making decisions on which resource group to choose during block allocation. The aim is to avoid resource groups which are heavily contended by other nodes, while still ensuring locality of access wherever possible. Once a reservation has been made in a particular resource group we continue to use that resource group until a new reservation is required. This should help to ensure that we do not change resource groups too often. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
author: Steven Whitehouse <swhiteho@redhat.com> 2012-10-31 09:58:42 +0000
committer: Steven Whitehouse <swhiteho@redhat.com> 2012-11-07 13:32:21 +0000
commit: bcd97c06308cbfa8b46e11762ea116300cdce772 (patch)
tree: 1659874b34998404ae05eb1c861e730df04d97e8 /fs/gfs2
parent: 06dfc30641370094ed522bf5949b2a326fe2741b (diff)
1 files changed, 90 insertions, 4 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 669b89b95ccc..bdf3e644baae 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1681,6 +1681,88 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 	return;
 }
 
+/**
+ * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
+ * @rgd: The rgrp in question
+ * @loops: An indication of how picky we can be (0=very, 1=less so)
+ *
+ * This function uses the recently added glock statistics in order to
+ * figure out whether a parciular resource group is suffering from
+ * contention from multiple nodes. This is done purely on the basis
+ * of timings, since this is the only data we have to work with and
+ * our aim here is to reject a resource group which is highly contended
+ * but (very important) not to do this too often in order to ensure that
+ * we do not land up introducing fragmentation by changing resource
+ * groups when not actually required.
+ *
+ * The calculation is fairly simple, we want to know whether the SRTTB
+ * (i.e. smoothed round trip time for blocking operations) to acquire
+ * the lock for this rgrp's glock is significantly greater than the
+ * time taken for resource groups on average. We introduce a margin in
+ * the form of the variable @var which is computed as the sum of the two
+ * respective variences, and multiplied by a factor depending on @loops
+ * and whether we have a lot of data to base the decision on. This is
+ * then tested against the square difference of the means in order to
+ * decide whether the result is statistically significant or not.
+ *
+ * Returns: A boolean verdict on the congestion status
+ */
+
+static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
+{
+	const struct gfs2_glock *gl = rgd->rd_gl;
+	const struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_lkstats *st;
+	s64 r_dcount, l_dcount;
+	s64 r_srttb, l_srttb;
+	s64 srttb_diff;
+	s64 sqr_diff;
+	s64 var;
+
+	preempt_disable();
+	st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
+	r_srttb = st->stats[GFS2_LKS_SRTTB];
+	r_dcount = st->stats[GFS2_LKS_DCOUNT];
+	var = st->stats[GFS2_LKS_SRTTVARB] +
+	      gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
+	preempt_enable();
+
+	l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
+	l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
+
+	if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
+		return false;
+
+	srttb_diff = r_srttb - l_srttb;
+	sqr_diff = srttb_diff * srttb_diff;
+
+	var *= 2;
+	if (l_dcount < 8 || r_dcount < 8)
+		var *= 2;
+	if (loops == 1)
+		var *= 2;
+
+	return ((srttb_diff < 0) && (sqr_diff > var));
+}
+
+/**
+ * gfs2_rgrp_used_recently
+ * @rs: The block reservation with the rgrp to test
+ * @msecs: The time limit in milliseconds
+ *
+ * Returns: True if the rgrp glock has been used within the time limit
+ */
+static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
+				    u64 msecs)
+{
+	u64 tdiff;
+
+	tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
+                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
+
+	return tdiff > (msecs * 1000 * 1000);
+}
+
 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
 {
 	struct gfs2_rgrpd *rgd = *pos;
@@ -1707,7 +1789,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *begin = NULL;
 	struct gfs2_blkreserv *rs = ip->i_res;
-	int error = 0, rg_locked, flags = LM_FLAG_TRY;
+	int error = 0, rg_locked, flags = 0;
 	u64 last_unlinked = NO_BLOCK;
 	int loops = 0;
 
@@ -1731,13 +1813,18 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 
 		if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
 			rg_locked = 0;
+			if (!gfs2_rs_active(rs) && (loops < 2) &&
+			     gfs2_rgrp_used_recently(rs, 1000) &&
+			     gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+				goto next_rgrp;
 			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
 						   LM_ST_EXCLUSIVE, flags,
 						   &rs->rs_rgd_gh);
-			if (error == GLR_TRYFAILED)
-				goto next_rgrp;
 			if (unlikely(error))
 				return error;
+			if (!gfs2_rs_active(rs) && (loops < 2) &&
+			    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+				goto skip_rgrp;
 			if (sdp->sd_args.ar_rgrplvb) {
 				error = update_rgrp_lvb(rs->rs_rbm.rgd);
 				if (unlikely(error)) {
@@ -1789,7 +1876,6 @@ next_rgrp:
 		 * then this checks for some less likely conditions before
 		 * trying again.
 		 */
-		flags &= ~LM_FLAG_TRY;
 		loops++;
 		/* Check that fs hasn't grown if writing to rindex */
 		if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
author	Steven Whitehouse <swhiteho@redhat.com>	2012-10-31 09:58:42 +0000
committer	Steven Whitehouse <swhiteho@redhat.com>	2012-11-07 13:32:21 +0000
commit	bcd97c06308cbfa8b46e11762ea116300cdce772 (patch)
tree	1659874b34998404ae05eb1c861e730df04d97e8 /fs/gfs2
parent	06dfc30641370094ed522bf5949b2a326fe2741b (diff)