diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-18 17:03:42 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-18 17:03:42 -0700 |
| commit | 51ed42a8a135511f6d6f75b56e85e6586a06a93c (patch) | |
| tree | 7cc371694ac7d6b8649a000c482a51cc3fee28e4 /fs/jbd2/journal.c | |
| parent | dddebdece62ead1ac1112e6df375f56a1cb45f84 (diff) | |
| parent | f9ca51596bbfd0f9c386dd1c613c394c78d9e5e6 (diff) | |
Merge tag 'ext4_for_linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"Many cleanups and bug fixes in ext4, especially for the fast commit
feature.
Also some performance improvements; in particular, improving IOPS and
throughput on fast devices running Async Direct I/O by up to 20% by
optimizing jbd2_transaction_committed()"
* tag 'ext4_for_linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (40 commits)
ext4: make sure the first directory block is not a hole
ext4: check dot and dotdot of dx_root before making dir indexed
ext4: sanity check for NULL pointer after ext4_force_shutdown
jbd2: increase maximum transaction size
jbd2: drop pointless shrinker batch initialization
jbd2: avoid infinite transaction commit loop
jbd2: precompute number of transaction descriptor blocks
jbd2: make jbd2_journal_get_max_txn_bufs() internal
jbd2: avoid mount failed when commit block is partial submitted
ext4: avoid writing unitialized memory to disk in EA inodes
ext4: don't track ranges in fast_commit if inode has inlined data
ext4: fix possible tid_t sequence overflows
ext4: use ext4_update_inode_fsync_trans() helper in inode creation
ext4: add missing MODULE_DESCRIPTION()
jbd2: add missing MODULE_DESCRIPTION()
ext4: use memtostr_pad() for s_volume_name
jbd2: speed up jbd2_transaction_committed()
ext4: make ext4_da_map_blocks() buffer_head unaware
ext4: make ext4_insert_delayed_block() insert multi-blocks
ext4: factor out a helper to check the cluster allocation state
...
Diffstat (limited to 'fs/jbd2/journal.c')
| -rw-r--r-- | fs/jbd2/journal.c | 123 |
1 files changed, 67 insertions, 56 deletions
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0d2825c131cc..1ebf2393bfb7 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -220,19 +220,12 @@ loop: * so we don't sleep */ DEFINE_WAIT(wait); - int should_sleep = 1; prepare_to_wait(&journal->j_wait_commit, &wait, TASK_INTERRUPTIBLE); - if (journal->j_commit_sequence != journal->j_commit_request) - should_sleep = 0; transaction = journal->j_running_transaction; - if (transaction && time_after_eq(jiffies, - transaction->t_expires)) - should_sleep = 0; - if (journal->j_flags & JBD2_UNMOUNT) - should_sleep = 0; - if (should_sleep) { + if (transaction == NULL || + time_before(jiffies, transaction->t_expires)) { write_unlock(&journal->j_state_lock); schedule(); write_lock(&journal->j_state_lock); @@ -316,11 +309,8 @@ static void journal_kill_thread(journal_t *journal) * * Return value: * <0: Error - * >=0: Finished OK - * - * On success: - * Bit 0 set == escape performed on the data - * Bit 1 set == buffer copy-out performed (kfree the data after IO) + * =0: Finished OK without escape + * =1: Finished OK with escape */ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, @@ -328,7 +318,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct buffer_head **bh_out, sector_t blocknr) { - int need_copy_out = 0; int done_copy_out = 0; int do_escape = 0; char *mapped_data; @@ -355,7 +344,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, atomic_set(&new_bh->b_count, 1); spin_lock(&jh_in->b_state_lock); -repeat: /* * If a new transaction has already done a buffer copy-out, then * we use that version of the data for the commit. @@ -365,8 +353,8 @@ repeat: new_folio = virt_to_folio(jh_in->b_frozen_data); new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data); } else { - new_folio = jh2bh(jh_in)->b_folio; - new_offset = offset_in_folio(new_folio, jh2bh(jh_in)->b_data); + new_folio = bh_in->b_folio; + new_offset = offset_in_folio(new_folio, bh_in->b_data); } mapped_data = kmap_local_folio(new_folio, new_offset); @@ -383,54 +371,52 @@ repeat: /* * Check for escaping */ - if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER)) { - need_copy_out = 1; + if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER)) do_escape = 1; - } kunmap_local(mapped_data); /* * Do we need to do a data copy? */ - if (need_copy_out && !done_copy_out) { + if (do_escape && !done_copy_out) { char *tmp; spin_unlock(&jh_in->b_state_lock); tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); if (!tmp) { brelse(new_bh); + free_buffer_head(new_bh); return -ENOMEM; } spin_lock(&jh_in->b_state_lock); if (jh_in->b_frozen_data) { jbd2_free(tmp, bh_in->b_size); - goto repeat; + goto copy_done; } jh_in->b_frozen_data = tmp; memcpy_from_folio(tmp, new_folio, new_offset, bh_in->b_size); - - new_folio = virt_to_folio(tmp); - new_offset = offset_in_folio(new_folio, tmp); - done_copy_out = 1; - /* * This isn't strictly necessary, as we're using frozen * data for the escaping, but it keeps consistency with * b_frozen_data usage. */ jh_in->b_frozen_triggers = jh_in->b_triggers; + +copy_done: + new_folio = virt_to_folio(jh_in->b_frozen_data); + new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data); + done_copy_out = 1; } /* * Did we need to do an escaping? Now we've done all the * copying, we can finally do so. + * b_frozen_data is from jbd2_alloc() which always provides an + * address from the direct kernels mapping. */ - if (do_escape) { - mapped_data = kmap_local_folio(new_folio, new_offset); - *((unsigned int *)mapped_data) = 0; - kunmap_local(mapped_data); - } + if (do_escape) + *((unsigned int *)jh_in->b_frozen_data) = 0; folio_set_bh(new_bh, new_folio, new_offset); new_bh->b_size = bh_in->b_size; @@ -454,7 +440,7 @@ repeat: set_buffer_shadow(bh_in); spin_unlock(&jh_in->b_state_lock); - return do_escape | (done_copy_out << 1); + return do_escape; } /* @@ -789,17 +775,7 @@ EXPORT_SYMBOL(jbd2_fc_end_commit_fallback); /* Return 1 when transaction with given tid has already committed. */ int jbd2_transaction_committed(journal_t *journal, tid_t tid) { - int ret = 1; - - read_lock(&journal->j_state_lock); - if (journal->j_running_transaction && - journal->j_running_transaction->t_tid == tid) - ret = 0; - if (journal->j_committing_transaction && - journal->j_committing_transaction->t_tid == tid) - ret = 0; - read_unlock(&journal->j_state_lock); - return ret; + return tid_geq(READ_ONCE(journal->j_commit_sequence), tid); } EXPORT_SYMBOL(jbd2_transaction_committed); @@ -1451,6 +1427,48 @@ static int journal_revoke_records_per_block(journal_t *journal) return space / record_size; } +static int jbd2_journal_get_max_txn_bufs(journal_t *journal) +{ + return (journal->j_total_len - journal->j_fc_wbufsize) / 3; +} + +/* + * Base amount of descriptor blocks we reserve for each transaction. + */ +static int jbd2_descriptor_blocks_per_trans(journal_t *journal) +{ + int tag_space = journal->j_blocksize - sizeof(journal_header_t); + int tags_per_block; + + /* Subtract UUID */ + tag_space -= 16; + if (jbd2_journal_has_csum_v2or3(journal)) + tag_space -= sizeof(struct jbd2_journal_block_tail); + /* Commit code leaves a slack space of 16 bytes at the end of block */ + tags_per_block = (tag_space - 16) / journal_tag_bytes(journal); + /* + * Revoke descriptors are accounted separately so we need to reserve + * space for commit block and normal transaction descriptor blocks. + */ + return 1 + DIV_ROUND_UP(jbd2_journal_get_max_txn_bufs(journal), + tags_per_block); +} + +/* + * Initialize number of blocks each transaction reserves for its bookkeeping + * and maximum number of blocks a transaction can use. This needs to be called + * after the journal size and the fastcommit area size are initialized. + */ +static void jbd2_journal_init_transaction_limits(journal_t *journal) +{ + journal->j_revoke_records_per_block = + journal_revoke_records_per_block(journal); + journal->j_transaction_overhead_buffers = + jbd2_descriptor_blocks_per_trans(journal); + journal->j_max_transaction_buffers = + jbd2_journal_get_max_txn_bufs(journal); +} + /* * Load the on-disk journal superblock and read the key fields into the * journal_t. @@ -1492,8 +1510,8 @@ static int journal_load_superblock(journal_t *journal) if (jbd2_journal_has_csum_v2or3(journal)) journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); - journal->j_revoke_records_per_block = - journal_revoke_records_per_block(journal); + /* After journal features are set, we can compute transaction limits */ + jbd2_journal_init_transaction_limits(journal); if (jbd2_has_feature_fast_commit(journal)) { journal->j_fc_last = be32_to_cpu(sb->s_maxlen); @@ -1599,7 +1617,6 @@ static journal_t *journal_init_common(struct block_device *bdev, journal->j_shrinker->scan_objects = jbd2_journal_shrink_scan; journal->j_shrinker->count_objects = jbd2_journal_shrink_count; - journal->j_shrinker->batch = journal->j_max_transaction_buffers; journal->j_shrinker->private_data = journal; shrinker_register(journal->j_shrinker); @@ -1743,8 +1760,6 @@ static int journal_reset(journal_t *journal) journal->j_commit_sequence = journal->j_transaction_sequence - 1; journal->j_commit_request = journal->j_commit_sequence; - journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal); - /* * Now that journal recovery is done, turn fast commits off here. This * way, if fast commit was enabled before the crash but if now FS has @@ -2285,8 +2300,6 @@ jbd2_journal_initialize_fast_commit(journal_t *journal) journal->j_fc_first = journal->j_last + 1; journal->j_fc_off = 0; journal->j_free = journal->j_last - journal->j_first; - journal->j_max_transaction_buffers = - jbd2_journal_get_max_txn_bufs(journal); return 0; } @@ -2374,8 +2387,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat, sb->s_feature_ro_compat |= cpu_to_be32(ro); sb->s_feature_incompat |= cpu_to_be32(incompat); unlock_buffer(journal->j_sb_buffer); - journal->j_revoke_records_per_block = - journal_revoke_records_per_block(journal); + jbd2_journal_init_transaction_limits(journal); return 1; #undef COMPAT_FEATURE_ON @@ -2406,8 +2418,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, sb->s_feature_compat &= ~cpu_to_be32(compat); sb->s_feature_ro_compat &= ~cpu_to_be32(ro); sb->s_feature_incompat &= ~cpu_to_be32(incompat); - journal->j_revoke_records_per_block = - journal_revoke_records_per_block(journal); + jbd2_journal_init_transaction_limits(journal); } EXPORT_SYMBOL(jbd2_journal_clear_features); |
