diff options
Diffstat (limited to 'fs/bcachefs/journal.c')
| -rw-r--r-- | fs/bcachefs/journal.c | 371 |
1 files changed, 166 insertions, 205 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index cf4729b7a083..91d0e5d443ed 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -17,23 +17,14 @@ #include "super-io.h" #include "trace.h" -static bool journal_entry_is_open(struct journal *j) +static bool __journal_entry_is_open(union journal_res_state state) { - return j->reservations.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL; + return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL; } -void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set) +static bool journal_entry_is_open(struct journal *j) { - struct journal_buf *w = journal_prev_buf(j); - - atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count); - - if (!need_write_just_set && - test_bit(JOURNAL_NEED_WRITE, &j->flags)) - bch2_time_stats_update(j->delay_time, - j->need_write_time); - - closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL); + return __journal_entry_is_open(j->reservations); } static void journal_pin_new_entry(struct journal *j, int count) @@ -77,39 +68,76 @@ static inline bool journal_entry_empty(struct jset *j) return true; } -static enum { - JOURNAL_ENTRY_ERROR, - JOURNAL_ENTRY_INUSE, - JOURNAL_ENTRY_CLOSED, - JOURNAL_UNLOCKED, -} journal_buf_switch(struct journal *j, bool need_write_just_set) +void bch2_journal_halt(struct journal *j) +{ + union journal_res_state old, new; + u64 v = atomic64_read(&j->reservations.counter); + + do { + old.v = new.v = v; + if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) + return; + + new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL; + } while ((v = atomic64_cmpxchg(&j->reservations.counter, + old.v, new.v)) != old.v); + + journal_wake(j); + closure_wake_up(&journal_cur_buf(j)->wait); + closure_wake_up(&journal_prev_buf(j)->wait); +} + +/* journal entry close/open: */ + +void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set) +{ + struct journal_buf *w = journal_prev_buf(j); + + atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count); + + if (!need_write_just_set && + test_bit(JOURNAL_NEED_WRITE, &j->flags)) + bch2_time_stats_update(j->delay_time, + j->need_write_time); + + clear_bit(JOURNAL_NEED_WRITE, &j->flags); + + closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL); +} + +/* + * Returns true if journal entry is now closed: + */ +static bool __journal_entry_close(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *buf = journal_cur_buf(j); union journal_res_state old, new; u64 v = atomic64_read(&j->reservations.counter); + bool set_need_write = false; + unsigned sectors; lockdep_assert_held(&j->lock); do { old.v = new.v = v; if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL) - return JOURNAL_ENTRY_CLOSED; + return true; if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) { /* this entry will never be written: */ closure_wake_up(&buf->wait); - return JOURNAL_ENTRY_ERROR; + return true; } - if (new.prev_buf_unwritten) - return JOURNAL_ENTRY_INUSE; + if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) { + set_bit(JOURNAL_NEED_WRITE, &j->flags); + j->need_write_time = local_clock(); + set_need_write = true; + } - /* - * avoid race between setting buf->data->u64s and - * journal_res_put starting write: - */ - journal_state_inc(&new); + if (new.prev_buf_unwritten) + return false; new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL; new.idx++; @@ -119,15 +147,12 @@ static enum { } while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v); - clear_bit(JOURNAL_NEED_WRITE, &j->flags); - buf->data->u64s = cpu_to_le32(old.cur_entry_offset); - j->prev_buf_sectors = - vstruct_blocks_plus(buf->data, c->block_bits, - buf->u64s_reserved) * - c->opts.block_size; - BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors); + sectors = vstruct_blocks_plus(buf->data, c->block_bits, + buf->u64s_reserved) << c->block_bits; + BUG_ON(sectors > buf->sectors); + buf->sectors = sectors; bkey_extent_init(&buf->key); @@ -163,32 +188,22 @@ static enum { bch2_journal_buf_init(j); cancel_delayed_work(&j->write_work); - spin_unlock(&j->lock); /* ugh - might be called from __journal_res_get() under wait_event() */ __set_current_state(TASK_RUNNING); - bch2_journal_buf_put(j, old.idx, need_write_just_set); - - return JOURNAL_UNLOCKED; + bch2_journal_buf_put(j, old.idx, set_need_write); + return true; } -void bch2_journal_halt(struct journal *j) +static bool journal_entry_close(struct journal *j) { - union journal_res_state old, new; - u64 v = atomic64_read(&j->reservations.counter); - - do { - old.v = new.v = v; - if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) - return; + bool ret; - new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL; - } while ((v = atomic64_cmpxchg(&j->reservations.counter, - old.v, new.v)) != old.v); + spin_lock(&j->lock); + ret = __journal_entry_close(j); + spin_unlock(&j->lock); - journal_wake(j); - closure_wake_up(&journal_cur_buf(j)->wait); - closure_wake_up(&journal_prev_buf(j)->wait); + return ret; } /* @@ -196,17 +211,16 @@ void bch2_journal_halt(struct journal *j) * journal reservation - journal entry is open means journal is dirty: * * returns: - * 1: success - * 0: journal currently full (must wait) - * -EROFS: insufficient rw devices - * -EIO: journal error + * 0: success + * -ENOSPC: journal currently full, must invoke reclaim + * -EAGAIN: journal blocked, must wait + * -EROFS: insufficient rw devices or journal error */ static int journal_entry_open(struct journal *j) { struct journal_buf *buf = journal_cur_buf(j); union journal_res_state old, new; - ssize_t u64s; - int sectors; + int u64s, ret; u64 v; lockdep_assert_held(&j->lock); @@ -216,29 +230,22 @@ static int journal_entry_open(struct journal *j) return -EAGAIN; if (!fifo_free(&j->pin)) - return 0; + return -ENOSPC; - sectors = bch2_journal_entry_sectors(j); - if (sectors <= 0) - return sectors; + ret = bch2_journal_space_available(j); + if (ret) + return ret; - buf->disk_sectors = sectors; buf->u64s_reserved = j->entry_u64s_reserved; + buf->disk_sectors = j->cur_entry_sectors; + buf->sectors = min(buf->disk_sectors, buf->buf_size >> 9); - sectors = min_t(unsigned, sectors, buf->size >> 9); - j->cur_buf_sectors = sectors; - - u64s = (sectors << 9) / sizeof(u64); - - /* Subtract the journal header */ - u64s -= sizeof(struct jset) / sizeof(u64); - u64s -= buf->u64s_reserved; - u64s = max_t(ssize_t, 0L, u64s); - - BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL); + u64s = (int) (buf->sectors << 9) / sizeof(u64) - + journal_entry_overhead(j); + u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); if (u64s <= le32_to_cpu(buf->data->u64s)) - return 0; + return -ENOSPC; /* * Must be set before marking the journal entry as open: @@ -250,10 +257,11 @@ static int journal_entry_open(struct journal *j) old.v = new.v = v; if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) - return -EIO; + return -EROFS; /* Handle any already added entries */ new.cur_entry_offset = le32_to_cpu(buf->data->u64s); + journal_state_inc(&new); } while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v); @@ -266,48 +274,16 @@ static int journal_entry_open(struct journal *j) &j->write_work, msecs_to_jiffies(j->write_delay_ms)); journal_wake(j); - return 1; -} - -static bool __journal_entry_close(struct journal *j) -{ - bool set_need_write; - - if (!journal_entry_is_open(j)) { - spin_unlock(&j->lock); - return true; - } - - set_need_write = !test_and_set_bit(JOURNAL_NEED_WRITE, &j->flags); - if (set_need_write) - j->need_write_time = local_clock(); - - switch (journal_buf_switch(j, set_need_write)) { - case JOURNAL_ENTRY_INUSE: - spin_unlock(&j->lock); - return false; - default: - spin_unlock(&j->lock); - fallthrough; - case JOURNAL_UNLOCKED: - return false; - } -} - -static bool journal_entry_close(struct journal *j) -{ - spin_lock(&j->lock); - return __journal_entry_close(j); + return 0; } static bool journal_quiesced(struct journal *j) { - bool ret; + union journal_res_state state = READ_ONCE(j->reservations); + bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state); - spin_lock(&j->lock); - ret = !j->reservations.prev_buf_unwritten && - !journal_entry_is_open(j); - __journal_entry_close(j); + if (!ret) + journal_entry_close(j); return ret; } @@ -357,7 +333,11 @@ retry: if (journal_res_get_fast(j, res, flags)) return 0; + if (bch2_journal_error(j)) + return -EROFS; + spin_lock(&j->lock); + /* * Recheck after taking the lock, so we don't race with another thread * that just did journal_entry_open() and call journal_entry_close() @@ -375,56 +355,42 @@ retry: */ buf = journal_cur_buf(j); if (journal_entry_is_open(j) && - buf->size >> 9 < buf->disk_sectors && - buf->size < JOURNAL_ENTRY_SIZE_MAX) - j->buf_size_want = max(j->buf_size_want, buf->size << 1); + buf->buf_size >> 9 < buf->disk_sectors && + buf->buf_size < JOURNAL_ENTRY_SIZE_MAX) + j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1); - /* - * Close the current journal entry if necessary, then try to start a new - * one: - */ - switch (journal_buf_switch(j, false)) { - case JOURNAL_ENTRY_ERROR: - spin_unlock(&j->lock); - return -EROFS; - case JOURNAL_ENTRY_INUSE: + if (journal_entry_is_open(j) && + !__journal_entry_close(j)) { /* - * The current journal entry is still open, but we failed to get - * a journal reservation because there's not enough space in it, - * and we can't close it and start another because we haven't - * finished writing out the previous entry: + * We failed to get a reservation on the current open journal + * entry because it's full, and we can't close it because + * there's still a previous one in flight: */ - spin_unlock(&j->lock); trace_journal_entry_full(c); - goto blocked; - case JOURNAL_ENTRY_CLOSED: - break; - case JOURNAL_UNLOCKED: - goto retry; + ret = -EAGAIN; + } else { + ret = journal_entry_open(j); } - /* We now have a new, closed journal buf - see if we can open it: */ - ret = journal_entry_open(j); + if ((ret == -EAGAIN || ret == -ENOSPC) && + !j->res_get_blocked_start) + j->res_get_blocked_start = local_clock() ?: 1; + spin_unlock(&j->lock); - if (ret < 0) - return ret; - if (ret) + if (!ret) goto retry; + if (ret == -ENOSPC) { + /* + * Journal is full - can't rely on reclaim from work item due to + * freezing: + */ + trace_journal_full(c); + bch2_journal_reclaim_work(&j->reclaim_work.work); + ret = -EAGAIN; + } - /* Journal's full, we have to wait */ - - /* - * Direct reclaim - can't rely on reclaim from work item - * due to freezing.. - */ - bch2_journal_reclaim_work(&j->reclaim_work.work); - - trace_journal_full(c); -blocked: - if (!j->res_get_blocked_start) - j->res_get_blocked_start = local_clock() ?: 1; - return -EAGAIN; + return ret; } /* @@ -461,7 +427,7 @@ void bch2_journal_entry_res_resize(struct journal *j, j->entry_u64s_reserved += d; if (d <= 0) - goto out_unlock; + goto out; j->cur_entry_u64s -= d; smp_mb(); @@ -474,15 +440,12 @@ void bch2_journal_entry_res_resize(struct journal *j, * Not enough room in current journal entry, have to flush it: */ __journal_entry_close(j); - goto out; + } else { + journal_cur_buf(j)->u64s_reserved += d; } - - journal_cur_buf(j)->u64s_reserved += d; -out_unlock: - spin_unlock(&j->lock); out: + spin_unlock(&j->lock); res->u64s += d; - return; } /* journal flushing: */ @@ -512,47 +475,47 @@ int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl) { struct bch_fs *c = container_of(j, struct bch_fs, journal); int ret; -retry: + spin_lock(&j->lock); - if (seq < journal_cur_seq(j) || + /* + * Can't try to open more than one sequence number ahead: + */ + BUG_ON(journal_cur_seq(j) < seq && !journal_entry_is_open(j)); + + if (journal_cur_seq(j) > seq || journal_entry_is_open(j)) { spin_unlock(&j->lock); return 0; } - if (journal_cur_seq(j) < seq) { - switch (journal_buf_switch(j, false)) { - case JOURNAL_ENTRY_ERROR: - spin_unlock(&j->lock); - return -EROFS; - case JOURNAL_ENTRY_INUSE: - /* haven't finished writing out the previous one: */ - trace_journal_entry_full(c); - goto blocked; - case JOURNAL_ENTRY_CLOSED: - break; - case JOURNAL_UNLOCKED: - goto retry; - } - } - - BUG_ON(journal_cur_seq(j) < seq); + if (journal_cur_seq(j) < seq && + !__journal_entry_close(j)) { + /* haven't finished writing out the previous one: */ + trace_journal_entry_full(c); + ret = -EAGAIN; + } else { + BUG_ON(journal_cur_seq(j) != seq); - ret = journal_entry_open(j); - if (ret) { - spin_unlock(&j->lock); - return ret < 0 ? ret : 0; + ret = journal_entry_open(j); } -blocked: - if (!j->res_get_blocked_start) + + if ((ret == -EAGAIN || ret == -ENOSPC) && + !j->res_get_blocked_start) j->res_get_blocked_start = local_clock() ?: 1; - closure_wait(&j->async_wait, cl); + if (ret == -EAGAIN || ret == -ENOSPC) + closure_wait(&j->async_wait, cl); + spin_unlock(&j->lock); - bch2_journal_reclaim_work(&j->reclaim_work.work); - return -EAGAIN; + if (ret == -ENOSPC) { + trace_journal_full(c); + bch2_journal_reclaim_work(&j->reclaim_work.work); + ret = -EAGAIN; + } + + return ret; } static int journal_seq_error(struct journal *j, u64 seq) @@ -635,8 +598,7 @@ void bch2_journal_flush_seq_async(struct journal *j, u64 seq, if (seq == journal_cur_seq(j)) __journal_entry_close(j); - else - spin_unlock(&j->lock); + spin_unlock(&j->lock); } static int journal_seq_flushed(struct journal *j, u64 seq) @@ -648,8 +610,7 @@ static int journal_seq_flushed(struct journal *j, u64 seq) if (seq == journal_cur_seq(j)) __journal_entry_close(j); - else - spin_unlock(&j->lock); + spin_unlock(&j->lock); return ret; } @@ -783,7 +744,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, goto err; journal_buckets = bch2_sb_resize_journal(&ca->disk_sb, - nr + sizeof(*journal_buckets) / sizeof(u64)); + nr + sizeof(*journal_buckets) / sizeof(u64)); if (!journal_buckets) goto err; @@ -846,9 +807,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, ja->nr++; bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL, - ca->mi.bucket_size, - gc_phase(GC_PHASE_SB), - 0); + ca->mi.bucket_size, + gc_phase(GC_PHASE_SB), + 0); if (c) { spin_unlock(&c->journal.lock); @@ -899,7 +860,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, */ if (bch2_disk_reservation_get(c, &disk_res, - bucket_to_sector(ca, nr - ja->nr), 1, 0)) { + bucket_to_sector(ca, nr - ja->nr), 1, 0)) { mutex_unlock(&c->sb_lock); return -ENOSPC; } @@ -996,7 +957,7 @@ void bch2_fs_journal_start(struct journal *j) journal_pin_new_entry(j, 0); /* - * journal_buf_switch() only inits the next journal entry when it + * __journal_entry_close() only inits the next journal entry when it * closes an open journal entry - the very first journal entry gets * initialized here: */ @@ -1063,8 +1024,8 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) void bch2_fs_journal_exit(struct journal *j) { - kvpfree(j->buf[1].data, j->buf[1].size); - kvpfree(j->buf[0].data, j->buf[0].size); + kvpfree(j->buf[1].data, j->buf[1].buf_size); + kvpfree(j->buf[0].data, j->buf[0].buf_size); free_fifo(&j->pin); } @@ -1088,8 +1049,8 @@ int bch2_fs_journal_init(struct journal *j) lockdep_init_map(&j->res_map, "journal res", &res_key, 0); - j->buf[0].size = JOURNAL_ENTRY_SIZE_MIN; - j->buf[1].size = JOURNAL_ENTRY_SIZE_MIN; + j->buf[0].buf_size = JOURNAL_ENTRY_SIZE_MIN; + j->buf[1].buf_size = JOURNAL_ENTRY_SIZE_MIN; j->write_delay_ms = 1000; j->reclaim_delay_ms = 100; @@ -1102,8 +1063,8 @@ int bch2_fs_journal_init(struct journal *j) { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || - !(j->buf[0].data = kvpmalloc(j->buf[0].size, GFP_KERNEL)) || - !(j->buf[1].data = kvpmalloc(j->buf[1].size, GFP_KERNEL))) { + !(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) || + !(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) { ret = -ENOMEM; goto out; } |
