1 files changed, 166 insertions, 205 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index cf4729b7a083..91d0e5d443ed 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -17,23 +17,14 @@
 #include "super-io.h"
 #include "trace.h"
 
-static bool journal_entry_is_open(struct journal *j)
+static bool __journal_entry_is_open(union journal_res_state state)
 {
-	return j->reservations.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
+	return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
 }
 
-void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
+static bool journal_entry_is_open(struct journal *j)
 {
-	struct journal_buf *w = journal_prev_buf(j);
-
-	atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count);
-
-	if (!need_write_just_set &&
-	    test_bit(JOURNAL_NEED_WRITE, &j->flags))
-		bch2_time_stats_update(j->delay_time,
-				       j->need_write_time);
-
-	closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
+	return __journal_entry_is_open(j->reservations);
 }
 
 static void journal_pin_new_entry(struct journal *j, int count)
@@ -77,39 +68,76 @@ static inline bool journal_entry_empty(struct jset *j)
 	return true;
 }
 
-static enum {
-	JOURNAL_ENTRY_ERROR,
-	JOURNAL_ENTRY_INUSE,
-	JOURNAL_ENTRY_CLOSED,
-	JOURNAL_UNLOCKED,
-} journal_buf_switch(struct journal *j, bool need_write_just_set)
+void bch2_journal_halt(struct journal *j)
+{
+	union journal_res_state old, new;
+	u64 v = atomic64_read(&j->reservations.counter);
+
+	do {
+		old.v = new.v = v;
+		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
+			return;
+
+		new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
+	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
+				       old.v, new.v)) != old.v);
+
+	journal_wake(j);
+	closure_wake_up(&journal_cur_buf(j)->wait);
+	closure_wake_up(&journal_prev_buf(j)->wait);
+}
+
+/* journal entry close/open: */
+
+void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set)
+{
+	struct journal_buf *w = journal_prev_buf(j);
+
+	atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count);
+
+	if (!need_write_just_set &&
+	    test_bit(JOURNAL_NEED_WRITE, &j->flags))
+		bch2_time_stats_update(j->delay_time,
+				       j->need_write_time);
+
+	clear_bit(JOURNAL_NEED_WRITE, &j->flags);
+
+	closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
+}
+
+/*
+ * Returns true if journal entry is now closed:
+ */
+static bool __journal_entry_close(struct journal *j)
 {
 	struct bch_fs *c = container_of(j, struct bch_fs, journal);
 	struct journal_buf *buf = journal_cur_buf(j);
 	union journal_res_state old, new;
 	u64 v = atomic64_read(&j->reservations.counter);
+	bool set_need_write = false;
+	unsigned sectors;
 
 	lockdep_assert_held(&j->lock);
 
 	do {
 		old.v = new.v = v;
 		if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL)
-			return JOURNAL_ENTRY_CLOSED;
+			return true;
 
 		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) {
 			/* this entry will never be written: */
 			closure_wake_up(&buf->wait);
-			return JOURNAL_ENTRY_ERROR;
+			return true;
 		}
 
-		if (new.prev_buf_unwritten)
-			return JOURNAL_ENTRY_INUSE;
+		if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
+			set_bit(JOURNAL_NEED_WRITE, &j->flags);
+			j->need_write_time = local_clock();
+			set_need_write = true;
+		}
 
-		/*
-		 * avoid race between setting buf->data->u64s and
-		 * journal_res_put starting write:
-		 */
-		journal_state_inc(&new);
+		if (new.prev_buf_unwritten)
+			return false;
 
 		new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
 		new.idx++;
@@ -119,15 +147,12 @@ static enum {
 	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
 				       old.v, new.v)) != old.v);
 
-	clear_bit(JOURNAL_NEED_WRITE, &j->flags);
-
 	buf->data->u64s		= cpu_to_le32(old.cur_entry_offset);
 
-	j->prev_buf_sectors =
-		vstruct_blocks_plus(buf->data, c->block_bits,
-				    buf->u64s_reserved) *
-		c->opts.block_size;
-	BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors);
+	sectors = vstruct_blocks_plus(buf->data, c->block_bits,
+				      buf->u64s_reserved) << c->block_bits;
+	BUG_ON(sectors > buf->sectors);
+	buf->sectors = sectors;
 
 	bkey_extent_init(&buf->key);
 
@@ -163,32 +188,22 @@ static enum {
 	bch2_journal_buf_init(j);
 
 	cancel_delayed_work(&j->write_work);
-	spin_unlock(&j->lock);
 
 	/* ugh - might be called from __journal_res_get() under wait_event() */
 	__set_current_state(TASK_RUNNING);
-	bch2_journal_buf_put(j, old.idx, need_write_just_set);
-
-	return JOURNAL_UNLOCKED;
+	bch2_journal_buf_put(j, old.idx, set_need_write);
+	return true;
 }
 
-void bch2_journal_halt(struct journal *j)
+static bool journal_entry_close(struct journal *j)
 {
-	union journal_res_state old, new;
-	u64 v = atomic64_read(&j->reservations.counter);
-
-	do {
-		old.v = new.v = v;
-		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
-			return;
+	bool ret;
 
-		new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
-	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
-				       old.v, new.v)) != old.v);
+	spin_lock(&j->lock);
+	ret = __journal_entry_close(j);
+	spin_unlock(&j->lock);
 
-	journal_wake(j);
-	closure_wake_up(&journal_cur_buf(j)->wait);
-	closure_wake_up(&journal_prev_buf(j)->wait);
+	return ret;
 }
 
 /*
@@ -196,17 +211,16 @@ void bch2_journal_halt(struct journal *j)
  * journal reservation - journal entry is open means journal is dirty:
  *
  * returns:
- * 1:		success
- * 0:		journal currently full (must wait)
- * -EROFS:	insufficient rw devices
- * -EIO:	journal error
+ * 0:		success
+ * -ENOSPC:	journal currently full, must invoke reclaim
+ * -EAGAIN:	journal blocked, must wait
+ * -EROFS:	insufficient rw devices or journal error
  */
 static int journal_entry_open(struct journal *j)
 {
 	struct journal_buf *buf = journal_cur_buf(j);
 	union journal_res_state old, new;
-	ssize_t u64s;
-	int sectors;
+	int u64s, ret;
 	u64 v;
 
 	lockdep_assert_held(&j->lock);
@@ -216,29 +230,22 @@ static int journal_entry_open(struct journal *j)
 		return -EAGAIN;
 
 	if (!fifo_free(&j->pin))
-		return 0;
+		return -ENOSPC;
 
-	sectors = bch2_journal_entry_sectors(j);
-	if (sectors <= 0)
-		return sectors;
+	ret = bch2_journal_space_available(j);
+	if (ret)
+		return ret;
 
-	buf->disk_sectors	= sectors;
 	buf->u64s_reserved	= j->entry_u64s_reserved;
+	buf->disk_sectors	= j->cur_entry_sectors;
+	buf->sectors		= min(buf->disk_sectors, buf->buf_size >> 9);
 
-	sectors = min_t(unsigned, sectors, buf->size >> 9);
-	j->cur_buf_sectors	= sectors;
-
-	u64s = (sectors << 9) / sizeof(u64);
-
-	/* Subtract the journal header */
-	u64s -= sizeof(struct jset) / sizeof(u64);
-	u64s -= buf->u64s_reserved;
-	u64s  = max_t(ssize_t, 0L, u64s);
-
-	BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL);
+	u64s = (int) (buf->sectors << 9) / sizeof(u64) -
+		journal_entry_overhead(j);
+	u64s  = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
 
 	if (u64s <= le32_to_cpu(buf->data->u64s))
-		return 0;
+		return -ENOSPC;
 
 	/*
 	 * Must be set before marking the journal entry as open:
@@ -250,10 +257,11 @@ static int journal_entry_open(struct journal *j)
 		old.v = new.v = v;
 
 		if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
-			return -EIO;
+			return -EROFS;
 
 		/* Handle any already added entries */
 		new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
+		journal_state_inc(&new);
 	} while ((v = atomic64_cmpxchg(&j->reservations.counter,
 				       old.v, new.v)) != old.v);
 
@@ -266,48 +274,16 @@ static int journal_entry_open(struct journal *j)
 			 &j->write_work,
 			 msecs_to_jiffies(j->write_delay_ms));
 	journal_wake(j);
-	return 1;
-}
-
-static bool __journal_entry_close(struct journal *j)
-{
-	bool set_need_write;
-
-	if (!journal_entry_is_open(j)) {
-		spin_unlock(&j->lock);
-		return true;
-	}
-
-	set_need_write = !test_and_set_bit(JOURNAL_NEED_WRITE, &j->flags);
-	if (set_need_write)
-		j->need_write_time = local_clock();
-
-	switch (journal_buf_switch(j, set_need_write)) {
-	case JOURNAL_ENTRY_INUSE:
-		spin_unlock(&j->lock);
-		return false;
-	default:
-		spin_unlock(&j->lock);
-		fallthrough;
-	case JOURNAL_UNLOCKED:
-		return false;
-	}
-}
-
-static bool journal_entry_close(struct journal *j)
-{
-	spin_lock(&j->lock);
-	return __journal_entry_close(j);
+	return 0;
 }
 
 static bool journal_quiesced(struct journal *j)
 {
-	bool ret;
+	union journal_res_state state = READ_ONCE(j->reservations);
+	bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state);
 
-	spin_lock(&j->lock);
-	ret = !j->reservations.prev_buf_unwritten &&
-		!journal_entry_is_open(j);
-	__journal_entry_close(j);
+	if (!ret)
+		journal_entry_close(j);
 	return ret;
 }
 
@@ -357,7 +333,11 @@ retry:
 	if (journal_res_get_fast(j, res, flags))
 		return 0;
 
+	if (bch2_journal_error(j))
+		return -EROFS;
+
 	spin_lock(&j->lock);
+
 	/*
 	 * Recheck after taking the lock, so we don't race with another thread
 	 * that just did journal_entry_open() and call journal_entry_close()
@@ -375,56 +355,42 @@ retry:
 	 */
 	buf = journal_cur_buf(j);
 	if (journal_entry_is_open(j) &&
-	    buf->size >> 9 < buf->disk_sectors &&
-	    buf->size < JOURNAL_ENTRY_SIZE_MAX)
-		j->buf_size_want = max(j->buf_size_want, buf->size << 1);
+	    buf->buf_size >> 9 < buf->disk_sectors &&
+	    buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
+		j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
 
-	/*
-	 * Close the current journal entry if necessary, then try to start a new
-	 * one:
-	 */
-	switch (journal_buf_switch(j, false)) {
-	case JOURNAL_ENTRY_ERROR:
-		spin_unlock(&j->lock);
-		return -EROFS;
-	case JOURNAL_ENTRY_INUSE:
+	if (journal_entry_is_open(j) &&
+	    !__journal_entry_close(j)) {
 		/*
-		 * The current journal entry is still open, but we failed to get
-		 * a journal reservation because there's not enough space in it,
-		 * and we can't close it and start another because we haven't
-		 * finished writing out the previous entry:
+		 * We failed to get a reservation on the current open journal
+		 * entry because it's full, and we can't close it because
+		 * there's still a previous one in flight:
 		 */
-		spin_unlock(&j->lock);
 		trace_journal_entry_full(c);
-		goto blocked;
-	case JOURNAL_ENTRY_CLOSED:
-		break;
-	case JOURNAL_UNLOCKED:
-		goto retry;
+		ret = -EAGAIN;
+	} else {
+		ret = journal_entry_open(j);
 	}
 
-	/* We now have a new, closed journal buf - see if we can open it: */
-	ret = journal_entry_open(j);
+	if ((ret == -EAGAIN || ret == -ENOSPC) &&
+	    !j->res_get_blocked_start)
+		j->res_get_blocked_start = local_clock() ?: 1;
+
 	spin_unlock(&j->lock);
 
-	if (ret < 0)
-		return ret;
-	if (ret)
+	if (!ret)
 		goto retry;
+	if (ret == -ENOSPC) {
+		/*
+		 * Journal is full - can't rely on reclaim from work item due to
+		 * freezing:
+		 */
+		trace_journal_full(c);
+		bch2_journal_reclaim_work(&j->reclaim_work.work);
+		ret = -EAGAIN;
+	}
 
-	/* Journal's full, we have to wait */
-
-	/*
-	 * Direct reclaim - can't rely on reclaim from work item
-	 * due to freezing..
-	 */
-	bch2_journal_reclaim_work(&j->reclaim_work.work);
-
-	trace_journal_full(c);
-blocked:
-	if (!j->res_get_blocked_start)
-		j->res_get_blocked_start = local_clock() ?: 1;
-	return -EAGAIN;
+	return ret;
 }
 
 /*
@@ -461,7 +427,7 @@ void bch2_journal_entry_res_resize(struct journal *j,
 
 	j->entry_u64s_reserved += d;
 	if (d <= 0)
-		goto out_unlock;
+		goto out;
 
 	j->cur_entry_u64s -= d;
 	smp_mb();
@@ -474,15 +440,12 @@ void bch2_journal_entry_res_resize(struct journal *j,
 		 * Not enough room in current journal entry, have to flush it:
 		 */
 		__journal_entry_close(j);
-		goto out;
+	} else {
+		journal_cur_buf(j)->u64s_reserved += d;
 	}
-
-	journal_cur_buf(j)->u64s_reserved += d;
-out_unlock:
-	spin_unlock(&j->lock);
 out:
+	spin_unlock(&j->lock);
 	res->u64s += d;
-	return;
 }
 
 /* journal flushing: */
@@ -512,47 +475,47 @@ int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
 {
 	struct bch_fs *c = container_of(j, struct bch_fs, journal);
 	int ret;
-retry:
+
 	spin_lock(&j->lock);
 
-	if (seq < journal_cur_seq(j) ||
+	/*
+	 * Can't try to open more than one sequence number ahead:
+	 */
+	BUG_ON(journal_cur_seq(j) < seq && !journal_entry_is_open(j));
+
+	if (journal_cur_seq(j) > seq ||
 	    journal_entry_is_open(j)) {
 		spin_unlock(&j->lock);
 		return 0;
 	}
 
-	if (journal_cur_seq(j) < seq) {
-		switch (journal_buf_switch(j, false)) {
-		case JOURNAL_ENTRY_ERROR:
-			spin_unlock(&j->lock);
-			return -EROFS;
-		case JOURNAL_ENTRY_INUSE:
-			/* haven't finished writing out the previous one: */
-			trace_journal_entry_full(c);
-			goto blocked;
-		case JOURNAL_ENTRY_CLOSED:
-			break;
-		case JOURNAL_UNLOCKED:
-			goto retry;
-		}
-	}
-
-	BUG_ON(journal_cur_seq(j) < seq);
+	if (journal_cur_seq(j) < seq &&
+	    !__journal_entry_close(j)) {
+		/* haven't finished writing out the previous one: */
+		trace_journal_entry_full(c);
+		ret = -EAGAIN;
+	} else {
+		BUG_ON(journal_cur_seq(j) != seq);
 
-	ret = journal_entry_open(j);
-	if (ret) {
-		spin_unlock(&j->lock);
-		return ret < 0 ? ret : 0;
+		ret = journal_entry_open(j);
 	}
-blocked:
-	if (!j->res_get_blocked_start)
+
+	if ((ret == -EAGAIN || ret == -ENOSPC) &&
+	    !j->res_get_blocked_start)
 		j->res_get_blocked_start = local_clock() ?: 1;
 
-	closure_wait(&j->async_wait, cl);
+	if (ret == -EAGAIN || ret == -ENOSPC)
+		closure_wait(&j->async_wait, cl);
+
 	spin_unlock(&j->lock);
 
-	bch2_journal_reclaim_work(&j->reclaim_work.work);
-	return -EAGAIN;
+	if (ret == -ENOSPC) {
+		trace_journal_full(c);
+		bch2_journal_reclaim_work(&j->reclaim_work.work);
+		ret = -EAGAIN;
+	}
+
+	return ret;
 }
 
 static int journal_seq_error(struct journal *j, u64 seq)
@@ -635,8 +598,7 @@ void bch2_journal_flush_seq_async(struct journal *j, u64 seq,
 
 	if (seq == journal_cur_seq(j))
 		__journal_entry_close(j);
-	else
-		spin_unlock(&j->lock);
+	spin_unlock(&j->lock);
 }
 
 static int journal_seq_flushed(struct journal *j, u64 seq)
@@ -648,8 +610,7 @@ static int journal_seq_flushed(struct journal *j, u64 seq)
 
 	if (seq == journal_cur_seq(j))
 		__journal_entry_close(j);
-	else
-		spin_unlock(&j->lock);
+	spin_unlock(&j->lock);
 
 	return ret;
 }
@@ -783,7 +744,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
 		goto err;
 
 	journal_buckets = bch2_sb_resize_journal(&ca->disk_sb,
-				nr + sizeof(*journal_buckets) / sizeof(u64));
+						 nr + sizeof(*journal_buckets) / sizeof(u64));
 	if (!journal_buckets)
 		goto err;
 
@@ -846,9 +807,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
 		ja->nr++;
 
 		bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
-				ca->mi.bucket_size,
-				gc_phase(GC_PHASE_SB),
-				0);
+					  ca->mi.bucket_size,
+					  gc_phase(GC_PHASE_SB),
+					  0);
 
 		if (c) {
 			spin_unlock(&c->journal.lock);
@@ -899,7 +860,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
 		 */
 
 		if (bch2_disk_reservation_get(c, &disk_res,
-				bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
+					      bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
 			mutex_unlock(&c->sb_lock);
 			return -ENOSPC;
 		}
@@ -996,7 +957,7 @@ void bch2_fs_journal_start(struct journal *j)
 		journal_pin_new_entry(j, 0);
 
 	/*
-	 * journal_buf_switch() only inits the next journal entry when it
+	 * __journal_entry_close() only inits the next journal entry when it
 	 * closes an open journal entry - the very first journal entry gets
 	 * initialized here:
 	 */
@@ -1063,8 +1024,8 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
 
 void bch2_fs_journal_exit(struct journal *j)
 {
-	kvpfree(j->buf[1].data, j->buf[1].size);
-	kvpfree(j->buf[0].data, j->buf[0].size);
+	kvpfree(j->buf[1].data, j->buf[1].buf_size);
+	kvpfree(j->buf[0].data, j->buf[0].buf_size);
 	free_fifo(&j->pin);
 }
 
@@ -1088,8 +1049,8 @@ int bch2_fs_journal_init(struct journal *j)
 
 	lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
 
-	j->buf[0].size		= JOURNAL_ENTRY_SIZE_MIN;
-	j->buf[1].size		= JOURNAL_ENTRY_SIZE_MIN;
+	j->buf[0].buf_size	= JOURNAL_ENTRY_SIZE_MIN;
+	j->buf[1].buf_size	= JOURNAL_ENTRY_SIZE_MIN;
 	j->write_delay_ms	= 1000;
 	j->reclaim_delay_ms	= 100;
 
@@ -1102,8 +1063,8 @@ int bch2_fs_journal_init(struct journal *j)
 		 { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
 
 	if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
-	    !(j->buf[0].data = kvpmalloc(j->buf[0].size, GFP_KERNEL)) ||
-	    !(j->buf[1].data = kvpmalloc(j->buf[1].size, GFP_KERNEL))) {
+	    !(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) ||
+	    !(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) {
 		ret = -ENOMEM;
 		goto out;
 	}