From 89918647a445fddfe223b097e29f775dcfa81eab Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 1 Jun 2007 15:19:33 +0100
Subject: [GFS2] Make the log reserved blocks depend on block size

The number of blocks which we reserve in the log at the start of each
transaction needs to depends upon the block size since the overhead is
related to the number of "pointers" which can be fitted into a single
block.

This relates to Red Hat bz #240435

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/log.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs/gfs2/log.c')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 291415ddfe51..586923d24e6e 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -262,7 +262,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
  * @sdp: The GFS2 superblock
  * @blks: The number of blocks to reserve
  *
- * Note that we never give out the last 6 blocks of the journal. Thats
+ * Note that we never give out the last few blocks of the journal. Thats
  * due to the fact that there is are a small number of header blocks
  * associated with each log flush. The exact number can't be known until
  * flush time, so we ensure that we have just enough free blocks at all
@@ -274,6 +274,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 {
 	unsigned int try = 0;
+	unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
 
 	if (gfs2_assert_warn(sdp, blks) ||
 	    gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
@@ -281,7 +282,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 
 	mutex_lock(&sdp->sd_log_reserve_mutex);
 	gfs2_log_lock(sdp);
-	while(sdp->sd_log_blks_free <= (blks + 6)) {
+	while(sdp->sd_log_blks_free <= (blks + reserved_blks)) {
 		gfs2_log_unlock(sdp);
 		gfs2_ail1_empty(sdp, 0);
 		gfs2_log_flush(sdp, NULL);
-- 
cgit v1.2.3


From ddf4b426aababdae4cb96326d7aeb9d119f42c50 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Fri, 1 Jun 2007 14:21:38 -0500
Subject: [GFS2] fix jdata issues

This is a patch for the first three issues of RHBZ #238162

The first issue is that when you allocate a new page for a file, it will not
start off uptodate. This makes sense, since you haven't written anything to that
part of the file yet.  Unfortunately, gfs2_pin() checks to make sure that the
buffers are uptodate.  The solution to this is to mark the buffers uptodate in
gfs2_commit_write(), after they have been zeroed out and have the data written
into them.  I'm pretty confident with this fix, although it's not completely
obvious that there is no problem with marking the buffers uptodate here.

The second issue is simply that you can try to pin a data buffer that is already
on the incore log, and thus, already pinned. This patch checks to see if this
buffer is already on the log, and exits databuf_lo_add() if it is, just like
buf_lo_add() does.

The third issue is that gfs2_log_flush() doesn't do it's block accounting
correctly.  Both metadata and journaled data are logged, but gfs2_log_flush()
only compares the number of metadata blocks with the number of blocks to commit
to the ondisk journal.  This patch also counts the journaled data blocks.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/gfs2/log.c')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 586923d24e6e..1fb846fc545e 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -566,7 +566,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	INIT_LIST_HEAD(&ai->ai_ail1_list);
 	INIT_LIST_HEAD(&ai->ai_ail2_list);
 
-	gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
+	gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf + sdp->sd_log_num_jdata == sdp->sd_log_commited_buf);
 	gfs2_assert_withdraw(sdp,
 			sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
 
-- 
cgit v1.2.3


From 8fb68595d508fd30ec90939572484b263600376c Mon Sep 17 00:00:00 2001
From: Robert Peterson <rpeterso@redhat.com>
Date: Tue, 12 Jun 2007 11:24:36 -0500
Subject: [GFS2] Journaled file write/unstuff bug

This patch is for bugzilla bug 283162, which uncovered a number of
bugs pertaining to writing to files that have the journaled bit on.
These bugs happen most often when writing to the meta_fs because
the files are always journaled.  So operations like gfs2_grow were
particularly vulnerable, although many of the problems could be
recreated with normal files after setting the journaled bit on.
The problems fixed are:

-GFS2 wasn't ever writing unstuffed journaled data blocks to their
 in-place location on disk. Now it does.

-If you unmounted too quickly after doing IO to a journaled file,
 GFS2 was crashing because you would discard a buffer whose bufdata
 was still on the active items list.  GFS2 now deals with this
 gracefully.

-GFS2 was losing track of the bufdata for journaled data blocks,
 and it wasn't getting freed, causing an error when you tried to
 unmount the module.  GFS2 now frees all the bufdata structures.

-There was a memory corruption occurring because GFS2 wrote
 twice as many log entries for journaled buffers.

-It was occasionally trying to write journal headers in buffers
 that weren't currently mapped.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/log.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'fs/gfs2/log.c')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 1fb846fc545e..fbdc0dc9923e 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -83,6 +83,11 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 
 			gfs2_assert(sdp, bd->bd_ail == ai);
 
+			if (!bh){
+				list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+                                continue;
+                        }
+
 			if (!buffer_busy(bh)) {
 				if (!buffer_uptodate(bh)) {
 					gfs2_log_unlock(sdp);
@@ -125,6 +130,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
 					 bd_ail_st_list) {
 		bh = bd->bd_bh;
 
+		if (!bh){
+			list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+			continue;
+		}
+
 		gfs2_assert(sdp, bd->bd_ail == ai);
 
 		if (buffer_busy(bh)) {
@@ -227,7 +237,10 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 		list_del(&bd->bd_ail_st_list);
 		list_del(&bd->bd_ail_gl_list);
 		atomic_dec(&bd->bd_gl->gl_ail_count);
-		brelse(bd->bd_bh);
+		if (bd->bd_bh)
+			brelse(bd->bd_bh);
+		else
+			kmem_cache_free(gfs2_bufdata_cachep, bd);
 	}
 }
 
-- 
cgit v1.2.3


From 2332c4435bb733b5cd4f612ee57532bd8fde4c1c Mon Sep 17 00:00:00 2001
From: Robert Peterson <rpeterso@redhat.com>
Date: Mon, 18 Jun 2007 14:50:20 -0500
Subject: [GFS2] assertion failure after writing to journaled file, umount

This patch passes all my nasty tests that were causing the code to
fail under one circumstance or another.  Here is a complete summary
of all changes from today's git tree, in order of appearance:

1. There are now separate variables for metadata buffer accounting.
2. Variable sd_log_num_hdrs is no longer needed, since the header
   accounting is taken care of by the reserve/refund sequence.
3. Fixed a tiny grammatical problem in a comment.
4. Added a new function "calc_reserved" to calculate the reserved
   log space.  This isn't entirely necessary, but it has two benefits:
   First, it simplifies the gfs2_log_refund function greatly.
   Second, it allows for easier debugging because I could sprinkle the
   code with calls to this function to make sure the accounting is
   proper (by adding asserts and printks) at strategic point of the code.
5. In log_pull_tail there apparently was a kludge to fix up the
   accounting based on a "pull" parameter.  The buffer accounting is
   now done properly, so the kludge was removed.
6. File sync operations were making a call to gfs2_log_flush that
   writes another journal header.  Since that header was unplanned
   for (reserved) by the reserve/refund sequence, the free space had
   to be decremented so that when log_pull_tail gets called, the free
   space is be adjusted properly.  (Did I hear you call that a kludge?
   well, maybe, but a lot more justifiable than the one I removed).
7. In the gfs2_log_shutdown code, it optionally syncs the log by
   specifying the PULL parameter to log_write_header.  I'm not sure
   this is necessary anymore.  It just seems to me there could be
   cases where shutdown is called while there are outstanding log
   buffers.
8. In the (data)buf_lo_before_commit functions, I changed some offset
   values from being calculated on the fly to being constants.	That
   simplified some code and we might as well let the compiler do the
   calculation once rather than redoing those cycles at run time.
9. This version has my rewritten databuf_lo_add function.
   This version is much more like its predecessor, buf_lo_add, which
   makes it easier to understand.  Again, this might not be necessary,
   but it seems as if this one works as well as the previous one,
   maybe even better, so I decided to leave it in.
10. In databuf_lo_before_commit, a previous data corruption problem
   was caused by going off the end of the buffer.  The proper solution
   is to have the proper limit in place, rather than stopping earlier.
   (Thus my previous attempt to fix it is wrong).
   If you don't wrap the buffer, you're stopping too early and that
   causes more log buffer accounting problems.
11. In lops.h there are two new (previously mentioned) constants for
   figuring out the data offset for the journal buffers.
12. There are also two new functions, buf_limit and databuf_limit to
   calculate how many entries will fit in the buffer.
13. In function gfs2_meta_wipe, it needs to distinguish between pinned
   metadata buffers and journaled data buffers for proper journal buffer
   accounting.	It can't use the JDATA gfs2_inode flag because it's
   sometimes passed the "real" inode and sometimes the "metadata
   inode" and the inode flags will be random bits in a metadata
   gfs2_inode.	It needs to base its decision on which was passed in.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/log.c | 100 ++++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 76 insertions(+), 24 deletions(-)

(limited to 'fs/gfs2/log.c')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index fbdc0dc9923e..8fcfb784f906 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -276,7 +276,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
  * @blks: The number of blocks to reserve
  *
  * Note that we never give out the last few blocks of the journal. Thats
- * due to the fact that there is are a small number of header blocks
+ * due to the fact that there is a small number of header blocks
  * associated with each log flush. The exact number can't be known until
  * flush time, so we ensure that we have just enough free blocks at all
  * times to avoid running out during a log flush.
@@ -371,6 +371,58 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
 	return dist;
 }
 
+/**
+ * calc_reserved - Calculate the number of blocks to reserve when
+ *                 refunding a transaction's unused buffers.
+ * @sdp: The GFS2 superblock
+ *
+ * This is complex.  We need to reserve room for all our currently used
+ * metadata buffers (e.g. normal file I/O rewriting file time stamps) and 
+ * all our journaled data buffers for journaled files (e.g. files in the 
+ * meta_fs like rindex, or files for which chattr +j was done.)
+ * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
+ * will count it as free space (sd_log_blks_free) and corruption will follow.
+ *
+ * We can have metadata bufs and jdata bufs in the same journal.  So each
+ * type gets its own log header, for which we need to reserve a block.
+ * In fact, each type has the potential for needing more than one header 
+ * in cases where we have more buffers than will fit on a journal page.
+ * Metadata journal entries take up half the space of journaled buffer entries.
+ * Thus, metadata entries have buf_limit (502) and journaled buffers have
+ * databuf_limit (251) before they cause a wrap around.
+ *
+ * Also, we need to reserve blocks for revoke journal entries and one for an
+ * overall header for the lot.
+ *
+ * Returns: the number of blocks reserved
+ */
+static unsigned int calc_reserved(struct gfs2_sbd *sdp)
+{
+	unsigned int reserved = 0;
+	unsigned int mbuf_limit, metabufhdrs_needed;
+	unsigned int dbuf_limit, databufhdrs_needed;
+	unsigned int revokes = 0;
+
+	mbuf_limit = buf_limit(sdp);
+	metabufhdrs_needed = (sdp->sd_log_commited_buf +
+			      (mbuf_limit - 1)) / mbuf_limit;
+	dbuf_limit = databuf_limit(sdp);
+	databufhdrs_needed = (sdp->sd_log_commited_databuf +
+			      (dbuf_limit - 1)) / dbuf_limit;
+
+	if (sdp->sd_log_commited_revoke)
+		revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
+					  sizeof(u64));
+
+	reserved = sdp->sd_log_commited_buf + metabufhdrs_needed +
+		sdp->sd_log_commited_databuf + databufhdrs_needed +
+		revokes;
+	/* One for the overall header */
+	if (reserved)
+		reserved++;
+	return reserved;
+}
+
 static unsigned int current_tail(struct gfs2_sbd *sdp)
 {
 	struct gfs2_ail *ai;
@@ -461,14 +513,14 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
 	return bh;
 }
 
-static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
+static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 {
 	unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
 
 	ail2_empty(sdp, new_tail);
 
 	gfs2_log_lock(sdp);
-	sdp->sd_log_blks_free += dist - (pull ? 1 : 0);
+	sdp->sd_log_blks_free += dist;
 	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
 	gfs2_log_unlock(sdp);
 
@@ -518,7 +570,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 	brelse(bh);
 
 	if (sdp->sd_log_tail != tail)
-		log_pull_tail(sdp, tail, pull);
+		log_pull_tail(sdp, tail);
 	else
 		gfs2_assert_withdraw(sdp, !pull);
 
@@ -579,7 +631,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	INIT_LIST_HEAD(&ai->ai_ail1_list);
 	INIT_LIST_HEAD(&ai->ai_ail2_list);
 
-	gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf + sdp->sd_log_num_jdata == sdp->sd_log_commited_buf);
+	gfs2_assert_withdraw(sdp,
+			     sdp->sd_log_num_buf + sdp->sd_log_num_jdata ==
+			     sdp->sd_log_commited_buf +
+			     sdp->sd_log_commited_databuf);
 	gfs2_assert_withdraw(sdp,
 			sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
 
@@ -590,16 +645,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	lops_before_commit(sdp);
 	if (!list_empty(&sdp->sd_log_flush_list))
 		log_flush_commit(sdp);
-	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
+	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
+		gfs2_log_lock(sdp);
+		sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */
+		gfs2_log_unlock(sdp);
 		log_write_header(sdp, 0, PULL);
+	}
 	lops_after_commit(sdp, ai);
 
 	gfs2_log_lock(sdp);
 	sdp->sd_log_head = sdp->sd_log_flush_head;
-	sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
 	sdp->sd_log_blks_reserved = 0;
 	sdp->sd_log_commited_buf = 0;
-	sdp->sd_log_num_hdrs = 0;
+	sdp->sd_log_commited_databuf = 0;
 	sdp->sd_log_commited_revoke = 0;
 
 	if (!list_empty(&ai->ai_ail1_list)) {
@@ -616,32 +674,26 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 
 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
-	unsigned int reserved = 0;
+	unsigned int reserved;
 	unsigned int old;
 
 	gfs2_log_lock(sdp);
 
 	sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
-	gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
+	sdp->sd_log_commited_databuf += tr->tr_num_databuf_new -
+		tr->tr_num_databuf_rm;
+	gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) ||
+			     (((int)sdp->sd_log_commited_databuf) >= 0));
 	sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
 	gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
-
-	if (sdp->sd_log_commited_buf)
-		reserved += sdp->sd_log_commited_buf;
-	if (sdp->sd_log_commited_revoke)
-		reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
-					    sizeof(u64));
-	if (reserved)
-		reserved++;
-
+	reserved = calc_reserved(sdp);
 	old = sdp->sd_log_blks_free;
 	sdp->sd_log_blks_free += tr->tr_reserved -
 				 (reserved - sdp->sd_log_blks_reserved);
 
 	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
-	gfs2_assert_withdraw(sdp,
-			     sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks +
-			     sdp->sd_log_num_hdrs);
+	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <=
+			     sdp->sd_jdesc->jd_blocks);
 
 	sdp->sd_log_blks_reserved = reserved;
 
@@ -687,13 +739,13 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
-	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
 	gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
 
 	sdp->sd_log_flush_head = sdp->sd_log_head;
 	sdp->sd_log_flush_wrapped = 0;
 
-	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
+	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
+			 (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
 
 	gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
 	gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
-- 
cgit v1.2.3


From a0a24741cac414aba5918e9939afafa70c37f952 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 9 Jul 2007 15:43:07 +0100
Subject: [GFS2] Small fixes to logging code

This reverts part of an earlier patch which tried to reclaim
gfs2_bufdata structures too early and resulted in a "use after free"
case (this bit from me). Also a change to not write out log headers
unless we really need to (in the case of flushing nothing we don't need
a header) from Bob.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/log.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

(limited to 'fs/gfs2/log.c')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 8fcfb784f906..f49a12e24086 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -237,10 +237,7 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 		list_del(&bd->bd_ail_st_list);
 		list_del(&bd->bd_ail_gl_list);
 		atomic_dec(&bd->bd_gl->gl_ail_count);
-		if (bd->bd_bh)
-			brelse(bd->bd_bh);
-		else
-			kmem_cache_free(gfs2_bufdata_cachep, bd);
+		brelse(bd->bd_bh);
 	}
 }
 
@@ -583,6 +580,7 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
 	struct list_head *head = &sdp->sd_log_flush_list;
 	struct gfs2_log_buf *lb;
 	struct buffer_head *bh;
+	int flushcount = 0;
 
 	while (!list_empty(head)) {
 		lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
@@ -599,9 +597,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
 		} else
 			brelse(bh);
 		kfree(lb);
+		flushcount++;
 	}
 
-	log_write_header(sdp, 0, 0);
+	/* If nothing was journaled, the header is unplanned and unwanted. */
+	if (flushcount) {
+		log_write_header(sdp, 0, 0);
+	} else {
+		unsigned int tail;
+		tail = current_tail(sdp);
+
+		gfs2_ail1_empty(sdp, 0);
+		if (sdp->sd_log_tail != tail)
+			log_pull_tail(sdp, tail);
+	}
 }
 
 /**
-- 
cgit v1.2.3