diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-07 13:03:53 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-07 13:03:53 -0700 |
commit | 09dc942c2a767e2d298f1cc9294bc19c7d7208c5 (patch) | |
tree | d310c118467c90c264e953bdc320ae08394c662a /fs/jbd2 | |
parent | 90e0c225968f0878e090c7ff3f88323973476cee (diff) | |
parent | 6c7a120ac6c62316ab1fc78dfc0a7b13f3bfcbff (diff) |
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (40 commits)
ext4: Adding error check after calling ext4_mb_regular_allocator()
ext4: Fix dirtying of journalled buffers in data=journal mode
ext4: re-inline ext4_rec_len_(to|from)_disk functions
jbd2: Remove t_handle_lock from start_this_handle()
jbd2: Change j_state_lock to be a rwlock_t
jbd2: Use atomic variables to avoid taking t_handle_lock in jbd2_journal_stop
ext4: Add mount options in superblock
ext4: force block allocation on quota_off
ext4: fix freeze deadlock under IO
ext4: drop inode from orphan list if ext4_delete_inode() fails
ext4: check to make make sure bd_dev is set before dereferencing it
jbd2: Make barrier messages less scary
ext4: don't print scary messages for allocation failures post-abort
ext4: fix EFBIG edge case when writing to large non-extent file
ext4: fix ext4_get_blocks references
ext4: Always journal quota file modifications
ext4: Fix potential memory leak in ext4_fill_super
ext4: Don't error out the fs if the user tries to make a file too big
ext4: allocate stripe-multiple IOs on stripe boundaries
ext4: move aio completion after unwritten extent conversion
...
Fix up conflicts in fs/ext4/inode.c as per Ted.
Fix up xfs conflicts as per earlier xfs merge.
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/checkpoint.c | 18 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 50 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 121 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 10 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 233 |
5 files changed, 236 insertions, 196 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 076d1cc44f95..1c23a0f4e8a3 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -118,13 +118,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh) void __jbd2_log_wait_for_space(journal_t *journal) { int nblocks, space_left; - assert_spin_locked(&journal->j_state_lock); + /* assert_spin_locked(&journal->j_state_lock); */ nblocks = jbd_space_needed(journal); while (__jbd2_log_space_left(journal) < nblocks) { if (journal->j_flags & JBD2_ABORT) return; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); mutex_lock(&journal->j_checkpoint_mutex); /* @@ -138,7 +138,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) * filesystem, so abort the journal and leave a stack * trace for forensic evidence. */ - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); nblocks = jbd_space_needed(journal); space_left = __jbd2_log_space_left(journal); @@ -149,7 +149,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) if (journal->j_committing_transaction) tid = journal->j_committing_transaction->t_tid; spin_unlock(&journal->j_list_lock); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); if (chkpt) { jbd2_log_do_checkpoint(journal); } else if (jbd2_cleanup_journal_tail(journal) == 0) { @@ -167,7 +167,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) WARN_ON(1); jbd2_journal_abort(journal, 0); } - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); } else { spin_unlock(&journal->j_list_lock); } @@ -474,7 +474,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) * next transaction ID we will write, and where it will * start. */ - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); transaction = journal->j_checkpoint_transactions; if (transaction) { @@ -496,7 +496,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) /* If the oldest pinned transaction is at the tail of the log already then there's not much we can do right now. */ if (journal->j_tail_sequence == first_tid) { - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return 1; } @@ -516,7 +516,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) journal->j_free += freed; journal->j_tail_sequence = first_tid; journal->j_tail = blocknr; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); /* * If there is an external journal, we need to make sure that @@ -775,7 +775,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); J_ASSERT(transaction->t_checkpoint_io_list == NULL); - J_ASSERT(transaction->t_updates == 0); + J_ASSERT(atomic_read(&transaction->t_updates) == 0); J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 75716d3d2be0..f52e5e8049f1 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -150,11 +150,11 @@ static int journal_submit_commit_record(journal_t *journal, */ if (ret == -EOPNOTSUPP && barrier_done) { printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", journal->j_devname); - spin_lock(&journal->j_state_lock); + "JBD2: Disabling barriers on %s, " + "not supported by device\n", journal->j_devname); + write_lock(&journal->j_state_lock); journal->j_flags &= ~JBD2_BARRIER; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); /* And try again, without the barrier */ lock_buffer(bh); @@ -180,11 +180,11 @@ retry: wait_on_buffer(bh); if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { printk(KERN_WARNING - "JBD2: wait_on_commit_record: sync failed on %s - " - "disabling barriers\n", journal->j_devname); - spin_lock(&journal->j_state_lock); + "JBD2: %s: disabling barries on %s - not supported " + "by device\n", __func__, journal->j_devname); + write_lock(&journal->j_state_lock); journal->j_flags &= ~JBD2_BARRIER; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); lock_buffer(bh); clear_buffer_dirty(bh); @@ -400,7 +400,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) jbd_debug(1, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); commit_transaction->t_state = T_LOCKED; /* @@ -417,23 +417,23 @@ void jbd2_journal_commit_transaction(journal_t *journal) stats.run.rs_locked); spin_lock(&commit_transaction->t_handle_lock); - while (commit_transaction->t_updates) { + while (atomic_read(&commit_transaction->t_updates)) { DEFINE_WAIT(wait); prepare_to_wait(&journal->j_wait_updates, &wait, TASK_UNINTERRUPTIBLE); - if (commit_transaction->t_updates) { + if (atomic_read(&commit_transaction->t_updates)) { spin_unlock(&commit_transaction->t_handle_lock); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); schedule(); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); spin_lock(&commit_transaction->t_handle_lock); } finish_wait(&journal->j_wait_updates, &wait); } spin_unlock(&commit_transaction->t_handle_lock); - J_ASSERT (commit_transaction->t_outstanding_credits <= + J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <= journal->j_max_transaction_buffers); /* @@ -497,7 +497,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) start_time = ktime_get(); commit_transaction->t_log_start = journal->j_head; wake_up(&journal->j_wait_transaction_locked); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); jbd_debug (3, "JBD: commit phase 2\n"); @@ -519,19 +519,20 @@ void jbd2_journal_commit_transaction(journal_t *journal) * transaction! Now comes the tricky part: we need to write out * metadata. Loop over the transaction's entire buffer list: */ - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); commit_transaction->t_state = T_COMMIT; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); trace_jbd2_commit_logging(journal, commit_transaction); stats.run.rs_logging = jiffies; stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing, stats.run.rs_logging); - stats.run.rs_blocks = commit_transaction->t_outstanding_credits; + stats.run.rs_blocks = + atomic_read(&commit_transaction->t_outstanding_credits); stats.run.rs_blocks_logged = 0; J_ASSERT(commit_transaction->t_nr_buffers <= - commit_transaction->t_outstanding_credits); + atomic_read(&commit_transaction->t_outstanding_credits)); err = 0; descriptor = NULL; @@ -616,7 +617,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) * the free space in the log, but this counter is changed * by jbd2_journal_next_log_block() also. */ - commit_transaction->t_outstanding_credits--; + atomic_dec(&commit_transaction->t_outstanding_credits); /* Bump b_count to prevent truncate from stumbling over the shadowed buffer! @@@ This can go if we ever get @@ -977,7 +978,7 @@ restart_loop: * __jbd2_journal_drop_transaction(). Otherwise we could race with * other checkpointing code processing the transaction... */ - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); /* * Now recheck if some buffers did not get attached to the transaction @@ -985,7 +986,7 @@ restart_loop: */ if (commit_transaction->t_forget) { spin_unlock(&journal->j_list_lock); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); goto restart_loop; } @@ -1003,7 +1004,8 @@ restart_loop: * File the transaction statistics */ stats.ts_tid = commit_transaction->t_tid; - stats.run.rs_handle_count = commit_transaction->t_handle_count; + stats.run.rs_handle_count = + atomic_read(&commit_transaction->t_handle_count); trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, commit_transaction->t_tid, &stats.run); @@ -1037,7 +1039,7 @@ restart_loop: journal->j_average_commit_time*3) / 4; else journal->j_average_commit_time = commit_time; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); if (commit_transaction->t_checkpoint_list == NULL && commit_transaction->t_checkpoint_io_list == NULL) { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 036880895bfc..ad5866aaf0f9 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -41,6 +41,7 @@ #include <linux/hash.h> #include <linux/log2.h> #include <linux/vmalloc.h> +#include <linux/backing-dev.h> #define CREATE_TRACE_POINTS #include <trace/events/jbd2.h> @@ -48,8 +49,6 @@ #include <asm/uaccess.h> #include <asm/page.h> -EXPORT_SYMBOL(jbd2_journal_start); -EXPORT_SYMBOL(jbd2_journal_restart); EXPORT_SYMBOL(jbd2_journal_extend); EXPORT_SYMBOL(jbd2_journal_stop); EXPORT_SYMBOL(jbd2_journal_lock_updates); @@ -143,7 +142,7 @@ static int kjournald2(void *arg) /* * And now, wait forever for commit wakeup events. */ - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); loop: if (journal->j_flags & JBD2_UNMOUNT) @@ -154,10 +153,10 @@ loop: if (journal->j_commit_sequence != journal->j_commit_request) { jbd_debug(1, "OK, requests differ\n"); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); del_timer_sync(&journal->j_commit_timer); jbd2_journal_commit_transaction(journal); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); goto loop; } @@ -169,9 +168,9 @@ loop: * be already stopped. */ jbd_debug(1, "Now suspending kjournald2\n"); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); refrigerator(); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); } else { /* * We assume on resume that commits are already there, @@ -191,9 +190,9 @@ loop: if (journal->j_flags & JBD2_UNMOUNT) should_sleep = 0; if (should_sleep) { - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); schedule(); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); } finish_wait(&journal->j_wait_commit, &wait); } @@ -211,7 +210,7 @@ loop: goto loop; end_loop: - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); del_timer_sync(&journal->j_commit_timer); journal->j_task = NULL; wake_up(&journal->j_wait_done_commit); @@ -234,16 +233,16 @@ static int jbd2_journal_start_thread(journal_t *journal) static void journal_kill_thread(journal_t *journal) { - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); journal->j_flags |= JBD2_UNMOUNT; while (journal->j_task) { wake_up(&journal->j_wait_commit); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); wait_event(journal->j_wait_done_commit, journal->j_task == NULL); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); } - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } /* @@ -310,7 +309,17 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, */ J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); - new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); +retry_alloc: + new_bh = alloc_buffer_head(GFP_NOFS); + if (!new_bh) { + /* + * Failure is not an option, but __GFP_NOFAIL is going + * away; so we retry ourselves here. + */ + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry_alloc; + } + /* keep subsequent assertions sane */ new_bh->b_state = 0; init_buffer(new_bh, NULL, NULL); @@ -442,7 +451,7 @@ int __jbd2_log_space_left(journal_t *journal) { int left = journal->j_free; - assert_spin_locked(&journal->j_state_lock); + /* assert_spin_locked(&journal->j_state_lock); */ /* * Be pessimistic here about the number of those free blocks which @@ -487,9 +496,9 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid) { int ret; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); ret = __jbd2_log_start_commit(journal, tid); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return ret; } @@ -508,7 +517,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal) transaction_t *transaction = NULL; tid_t tid; - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); if (journal->j_running_transaction && !current->journal_info) { transaction = journal->j_running_transaction; __jbd2_log_start_commit(journal, transaction->t_tid); @@ -516,12 +525,12 @@ int jbd2_journal_force_commit_nested(journal_t *journal) transaction = journal->j_committing_transaction; if (!transaction) { - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); return 0; /* Nothing to retry */ } tid = transaction->t_tid; - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); jbd2_log_wait_commit(journal, tid); return 1; } @@ -535,7 +544,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) { int ret = 0; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); if (journal->j_running_transaction) { tid_t tid = journal->j_running_transaction->t_tid; @@ -554,7 +563,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) *ptid = journal->j_committing_transaction->t_tid; ret = 1; } - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return ret; } @@ -566,26 +575,24 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) { int err = 0; + read_lock(&journal->j_state_lock); #ifdef CONFIG_JBD2_DEBUG - spin_lock(&journal->j_state_lock); if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_EMERG "%s: error: j_commit_request=%d, tid=%d\n", __func__, journal->j_commit_request, tid); } - spin_unlock(&journal->j_state_lock); #endif - spin_lock(&journal->j_state_lock); while (tid_gt(tid, journal->j_commit_sequence)) { jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", tid, journal->j_commit_sequence); wake_up(&journal->j_wait_commit); - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); wait_event(journal->j_wait_done_commit, !tid_gt(tid, journal->j_commit_sequence)); - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); } - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); if (unlikely(is_journal_aborted(journal))) { printk(KERN_EMERG "journal commit I/O error\n"); @@ -602,7 +609,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) { unsigned long blocknr; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); J_ASSERT(journal->j_free > 1); blocknr = journal->j_head; @@ -610,7 +617,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) journal->j_free--; if (journal->j_head == journal->j_last) journal->j_head = journal->j_first; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return jbd2_journal_bmap(journal, blocknr, retp); } @@ -830,7 +837,7 @@ static journal_t * journal_init_common (void) mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); spin_lock_init(&journal->j_list_lock); - spin_lock_init(&journal->j_state_lock); + rwlock_init(&journal->j_state_lock); journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); journal->j_min_batch_time = 0; @@ -1096,14 +1103,14 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) set_buffer_uptodate(bh); } - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); sb->s_start = cpu_to_be32(journal->j_tail); sb->s_errno = cpu_to_be32(journal->j_errno); - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); BUFFER_TRACE(bh, "marking dirty"); mark_buffer_dirty(bh); @@ -1124,12 +1131,12 @@ out: * any future commit will have to be careful to update the * superblock again to re-record the true start of the log. */ - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); if (sb->s_start) journal->j_flags &= ~JBD2_FLUSHED; else journal->j_flags |= JBD2_FLUSHED; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } /* @@ -1391,13 +1398,9 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { - journal_superblock_t *sb; - if (!compat && !ro && !incompat) return 1; - sb = journal->j_superblock; - /* We can support any known requested features iff the * superblock is in version 2. Otherwise we fail to support any * extended sb features. */ @@ -1545,7 +1548,7 @@ int jbd2_journal_flush(journal_t *journal) transaction_t *transaction = NULL; unsigned long old_tail; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); /* Force everything buffered to the log... */ if (journal->j_running_transaction) { @@ -1558,10 +1561,10 @@ int jbd2_journal_flush(journal_t *journal) if (transaction) { tid_t tid = transaction->t_tid; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); jbd2_log_wait_commit(journal, tid); } else { - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } /* ...and flush everything in the log out to disk. */ @@ -1585,12 +1588,12 @@ int jbd2_journal_flush(journal_t *journal) * the magic code for a fully-recovered superblock. Any future * commits of data to the journal will restore the current * s_start value. */ - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); old_tail = journal->j_tail; journal->j_tail = 0; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); jbd2_journal_update_superblock(journal, 1); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); journal->j_tail = old_tail; J_ASSERT(!journal->j_running_transaction); @@ -1598,7 +1601,7 @@ int jbd2_journal_flush(journal_t *journal) J_ASSERT(!journal->j_checkpoint_transactions); J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return 0; } @@ -1617,7 +1620,6 @@ int jbd2_journal_flush(journal_t *journal) int jbd2_journal_wipe(journal_t *journal, int write) { - journal_superblock_t *sb; int err = 0; J_ASSERT (!(journal->j_flags & JBD2_LOADED)); @@ -1626,8 +1628,6 @@ int jbd2_journal_wipe(journal_t *journal, int write) if (err) return err; - sb = journal->j_superblock; - if (!journal->j_tail) goto no_recovery; @@ -1665,12 +1665,12 @@ void __jbd2_journal_abort_hard(journal_t *journal) printk(KERN_ERR "Aborting journal on device %s.\n", journal->j_devname); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); journal->j_flags |= JBD2_ABORT; transaction = journal->j_running_transaction; if (transaction) __jbd2_log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } /* Soft abort: record the abort error status in the journal superblock, @@ -1755,12 +1755,12 @@ int jbd2_journal_errno(journal_t *journal) { int err; - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); if (journal->j_flags & JBD2_ABORT) err = -EROFS; else err = journal->j_errno; - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); return err; } @@ -1775,12 +1775,12 @@ int jbd2_journal_clear_err(journal_t *journal) { int err = 0; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); if (journal->j_flags & JBD2_ABORT) err = -EROFS; else journal->j_errno = 0; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return err; } @@ -1793,10 +1793,10 @@ int jbd2_journal_clear_err(journal_t *journal) */ void jbd2_journal_ack_err(journal_t *journal) { - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); if (journal->j_errno) journal->j_flags |= JBD2_ACK_ERR; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } int jbd2_journal_blocks_per_page(struct inode *inode) @@ -2201,8 +2201,6 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode) { - int writeout = 0; - if (!journal) return; restart: @@ -2219,9 +2217,6 @@ restart: goto restart; } - /* Do we need to wait for data writeback? */ - if (journal->j_committing_transaction == jinode->i_transaction) - writeout = 1; if (jinode->i_transaction) { list_del(&jinode->i_list); jinode->i_transaction = NULL; diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 049281b7cb89..2bc4d5f116f1 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -285,12 +285,10 @@ int jbd2_journal_recover(journal_t *journal) int jbd2_journal_skip_recovery(journal_t *journal) { int err; - journal_superblock_t * sb; struct recovery_info info; memset (&info, 0, sizeof(info)); - sb = journal->j_superblock; err = do_one_pass(journal, &info, PASS_SCAN); @@ -299,7 +297,8 @@ int jbd2_journal_skip_recovery(journal_t *journal) ++journal->j_transaction_sequence; } else { #ifdef CONFIG_JBD2_DEBUG - int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); + int dropped = info.end_transaction - + be32_to_cpu(journal->j_superblock->s_sequence); #endif jbd_debug(1, "JBD: ignoring %d transaction%s from the journal.\n", @@ -365,11 +364,6 @@ static int do_one_pass(journal_t *journal, int tag_bytes = journal_tag_bytes(journal); __u32 crc32_sum = ~0; /* Transactional Checksums */ - /* Precompute the maximum metadata descriptors in a descriptor block */ - int MAX_BLOCKS_PER_DESC; - MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) - / tag_bytes); - /* * First thing is to establish what we expect to find in the log * (in terms of transaction IDs), and where (in terms of log diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index b8e0806681bb..d95cc9d0401d 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -26,6 +26,8 @@ #include <linux/mm.h> #include <linux/highmem.h> #include <linux/hrtimer.h> +#include <linux/backing-dev.h> +#include <linux/module.h> static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); @@ -53,6 +55,9 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) transaction->t_tid = journal->j_transaction_sequence++; transaction->t_expires = jiffies + journal->j_commit_interval; spin_lock_init(&transaction->t_handle_lock); + atomic_set(&transaction->t_updates, 0); + atomic_set(&transaction->t_outstanding_credits, 0); + atomic_set(&transaction->t_handle_count, 0); INIT_LIST_HEAD(&transaction->t_inode_list); INIT_LIST_HEAD(&transaction->t_private_list); @@ -83,65 +88,75 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) * transaction's buffer credits. */ -static int start_this_handle(journal_t *journal, handle_t *handle) +static int start_this_handle(journal_t *journal, handle_t *handle, + int gfp_mask) { transaction_t *transaction; int needed; int nblocks = handle->h_buffer_credits; transaction_t *new_transaction = NULL; - int ret = 0; unsigned long ts = jiffies; if (nblocks > journal->j_max_transaction_buffers) { printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", current->comm, nblocks, journal->j_max_transaction_buffers); - ret = -ENOSPC; - goto out; + return -ENOSPC; } alloc_transaction: if (!journal->j_running_transaction) { - new_transaction = kzalloc(sizeof(*new_transaction), - GFP_NOFS|__GFP_NOFAIL); + new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask); if (!new_transaction) { - ret = -ENOMEM; - goto out; + /* + * If __GFP_FS is not present, then we may be + * being called from inside the fs writeback + * layer, so we MUST NOT fail. Since + * __GFP_NOFAIL is going away, we will arrange + * to retry the allocation ourselves. + */ + if ((gfp_mask & __GFP_FS) == 0) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto alloc_transaction; + } + return -ENOMEM; } } jbd_debug(3, "New handle %p going live.\n", handle); -repeat: - /* * We need to hold j_state_lock until t_updates has been incremented, * for proper journal barrier handling */ - spin_lock(&journal->j_state_lock); -repeat_locked: +repeat: + read_lock(&journal->j_state_lock); if (is_journal_aborted(journal) || (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { - spin_unlock(&journal->j_state_lock); - ret = -EROFS; - goto out; + read_unlock(&journal->j_state_lock); + kfree(new_transaction); + return -EROFS; } /* Wait on the journal's transaction barrier if necessary */ if (journal->j_barrier_count) { - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); wait_event(journal->j_wait_transaction_locked, journal->j_barrier_count == 0); goto repeat; } if (!journal->j_running_transaction) { - if (!new_transaction) { - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); + if (!new_transaction) goto alloc_transaction; + write_lock(&journal->j_state_lock); + if (!journal->j_running_transaction) { + jbd2_get_transaction(journal, new_transaction); + new_transaction = NULL; } - jbd2_get_transaction(journal, new_transaction); - new_transaction = NULL; + write_unlock(&journal->j_state_lock); + goto repeat; } transaction = journal->j_running_transaction; @@ -155,7 +170,7 @@ repeat_locked: prepare_to_wait(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); schedule(); finish_wait(&journal->j_wait_transaction_locked, &wait); goto repeat; @@ -166,8 +181,8 @@ repeat_locked: * buffers requested by this operation, we need to stall pending a log * checkpoint to free some more log space. */ - spin_lock(&transaction->t_handle_lock); - needed = transaction->t_outstanding_credits + nblocks; + needed = atomic_add_return(nblocks, + &transaction->t_outstanding_credits); if (needed > journal->j_max_transaction_buffers) { /* @@ -178,11 +193,11 @@ repeat_locked: DEFINE_WAIT(wait); jbd_debug(2, "Handle %p starting new commit...\n", handle); - spin_unlock(&transaction->t_handle_lock); + atomic_sub(nblocks, &transaction->t_outstanding_credits); prepare_to_wait(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); __jbd2_log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); schedule(); finish_wait(&journal->j_wait_transaction_locked, &wait); goto repeat; @@ -215,35 +230,48 @@ repeat_locked: */ if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); - spin_unlock(&transaction->t_handle_lock); - __jbd2_log_wait_for_space(journal); - goto repeat_locked; + atomic_sub(nblocks, &transaction->t_outstanding_credits); + read_unlock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); + if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) + __jbd2_log_wait_for_space(journal); + write_unlock(&journal->j_state_lock); + goto repeat; } /* OK, account for the buffers that this operation expects to - * use and add the handle to the running transaction. */ - - if (time_after(transaction->t_start, ts)) { + * use and add the handle to the running transaction. + * + * In order for t_max_wait to be reliable, it must be + * protected by a lock. But doing so will mean that + * start_this_handle() can not be run in parallel on SMP + * systems, which limits our scalability. So we only enable + * it when debugging is enabled. We may want to use a + * separate flag, eventually, so we can enable this + * independently of debugging. + */ +#ifdef CONFIG_JBD2_DEBUG + if (jbd2_journal_enable_debug && + time_after(transaction->t_start, ts)) { ts = jbd2_time_diff(ts, transaction->t_start); + spin_lock(&transaction->t_handle_lock); if (ts > transaction->t_max_wait) transaction->t_max_wait = ts; + spin_unlock(&transaction->t_handle_lock); } - +#endif handle->h_transaction = transaction; - transaction->t_outstanding_credits += nblocks; - transaction->t_updates++; - transaction->t_handle_count++; + atomic_inc(&transaction->t_updates); + atomic_inc(&transaction->t_handle_count); jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", - handle, nblocks, transaction->t_outstanding_credits, + handle, nblocks, + atomic_read(&transaction->t_outstanding_credits), __jbd2_log_space_left(journal)); - spin_unlock(&transaction->t_handle_lock); - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); lock_map_acquire(&handle->h_lockdep_map); -out: - if (unlikely(new_transaction)) /* It's usually NULL */ - kfree(new_transaction); - return ret; + kfree(new_transaction); + return 0; } static struct lock_class_key jbd2_handle_key; @@ -278,7 +306,7 @@ static handle_t *new_handle(int nblocks) * * Return a pointer to a newly allocated handle, or NULL on failure */ -handle_t *jbd2_journal_start(journal_t *journal, int nblocks) +handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) { handle_t *handle = journal_current_handle(); int err; @@ -298,7 +326,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) current->journal_info = handle; - err = start_this_handle(journal, handle); + err = start_this_handle(journal, handle, gfp_mask); if (err < 0) { jbd2_free_handle(handle); current->journal_info = NULL; @@ -308,6 +336,15 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) out: return handle; } +EXPORT_SYMBOL(jbd2__journal_start); + + +handle_t *jbd2_journal_start(journal_t *journal, int nblocks) +{ + return jbd2__journal_start(journal, nblocks, GFP_NOFS); +} +EXPORT_SYMBOL(jbd2_journal_start); + /** * int jbd2_journal_extend() - extend buffer credits. @@ -342,7 +379,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) result = 1; - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); /* Don't extend a locked-down transaction! */ if (handle->h_transaction->t_state != T_RUNNING) { @@ -352,7 +389,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) } spin_lock(&transaction->t_handle_lock); - wanted = transaction->t_outstanding_credits + nblocks; + wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks; if (wanted > journal->j_max_transaction_buffers) { jbd_debug(3, "denied handle %p %d blocks: " @@ -367,14 +404,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) } handle->h_buffer_credits += nblocks; - transaction->t_outstanding_credits += nblocks; + atomic_add(nblocks, &transaction->t_outstanding_credits); result = 0; jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); unlock: spin_unlock(&transaction->t_handle_lock); error_out: - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); out: return result; } @@ -394,8 +431,7 @@ out: * transaction capabable of guaranteeing the requested number of * credits. */ - -int jbd2_journal_restart(handle_t *handle, int nblocks) +int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; @@ -410,29 +446,35 @@ int jbd2_journal_restart(handle_t *handle, int nblocks) * First unlink the handle from its current transaction, and start the * commit on that. */ - J_ASSERT(transaction->t_updates > 0); + J_ASSERT(atomic_read(&transaction->t_updates) > 0); J_ASSERT(journal_current_handle() == handle); - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); spin_lock(&transaction->t_handle_lock); - transaction->t_outstanding_credits -= handle->h_buffer_credits; - transaction->t_updates--; - - if (!transaction->t_updates) + atomic_sub(handle->h_buffer_credits, + &transaction->t_outstanding_credits); + if (atomic_dec_and_test(&transaction->t_updates)) wake_up(&journal->j_wait_updates); spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "restarting handle %p\n", handle); __jbd2_log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); lock_map_release(&handle->h_lockdep_map); handle->h_buffer_credits = nblocks; - ret = start_this_handle(journal, handle); + ret = start_this_handle(journal, handle, gfp_mask); return ret; } +EXPORT_SYMBOL(jbd2__journal_restart); +int jbd2_journal_restart(handle_t *handle, int nblocks) +{ + return jbd2__journal_restart(handle, nblocks, GFP_NOFS); +} +EXPORT_SYMBOL(jbd2_journal_restart); + /** * void jbd2_journal_lock_updates () - establish a transaction barrier. * @journal: Journal to establish a barrier on. @@ -447,7 +489,7 @@ void jbd2_journal_lock_updates(journal_t *journal) { DEFINE_WAIT(wait); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); ++journal->j_barrier_count; /* Wait until there are no running updates */ @@ -458,19 +500,19 @@ void jbd2_journal_lock_updates(journal_t *journal) break; spin_lock(&transaction->t_handle_lock); - if (!transaction->t_updates) { + if (!atomic_read(&transaction->t_updates)) { spin_unlock(&transaction->t_handle_lock); break; } prepare_to_wait(&journal->j_wait_updates, &wait, TASK_UNINTERRUPTIBLE); spin_unlock(&transaction->t_handle_lock); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); schedule(); finish_wait(&journal->j_wait_updates, &wait); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); } - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); /* * We have now established a barrier against other normal updates, but @@ -494,9 +536,9 @@ void jbd2_journal_unlock_updates (journal_t *journal) J_ASSERT(journal->j_barrier_count != 0); mutex_unlock(&journal->j_barrier); - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); --journal->j_barrier_count; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_transaction_locked); } @@ -1238,7 +1280,8 @@ int jbd2_journal_stop(handle_t *handle) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; - int err; + int err, wait_for_commit = 0; + tid_t tid; pid_t pid; J_ASSERT(journal_current_handle() == handle); @@ -1246,7 +1289,7 @@ int jbd2_journal_stop(handle_t *handle) if (is_handle_aborted(handle)) err = -EIO; else { - J_ASSERT(transaction->t_updates > 0); + J_ASSERT(atomic_read(&transaction->t_updates) > 0); err = 0; } @@ -1291,9 +1334,9 @@ int jbd2_journal_stop(handle_t *handle) journal->j_last_sync_writer = pid; - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); commit_time = journal->j_average_commit_time; - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); trans_time = ktime_to_ns(ktime_sub(ktime_get(), transaction->t_start_time)); @@ -1314,14 +1357,8 @@ int jbd2_journal_stop(handle_t *handle) if (handle->h_sync) transaction->t_synchronous_commit = 1; current->journal_info = NULL; - spin_lock(&transaction->t_handle_lock); - transaction->t_outstanding_credits -= handle->h_buffer_credits; - transaction->t_updates--; - if (!transaction->t_updates) { - wake_up(&journal->j_wait_updates); - if (journal->j_barrier_count) - wake_up(&journal->j_wait_transaction_locked); - } + atomic_sub(handle->h_buffer_credits, + &transaction->t_outstanding_credits); /* * If the handle is marked SYNC, we need to set another commit @@ -1330,15 +1367,13 @@ int jbd2_journal_stop(handle_t *handle) * transaction is too old now. */ if (handle->h_sync || - transaction->t_outstanding_credits > - journal->j_max_transaction_buffers || - time_after_eq(jiffies, transaction->t_expires)) { + (atomic_read(&transaction->t_outstanding_credits) > + journal->j_max_transaction_buffers) || + time_after_eq(jiffies, transaction->t_expires)) { /* Do this even for aborted journals: an abort still * completes the commit thread, it just doesn't write * anything to disk. */ - tid_t tid = transaction->t_tid; - spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "transaction too old, requesting commit for " "handle %p\n", handle); /* This is non-blocking */ @@ -1349,11 +1384,25 @@ int jbd2_journal_stop(handle_t *handle) * to wait for the commit to complete. */ if (handle->h_sync && !(current->flags & PF_MEMALLOC)) - err = jbd2_log_wait_commit(journal, tid); - } else { - spin_unlock(&transaction->t_handle_lock); + wait_for_commit = 1; } + /* + * Once we drop t_updates, if it goes to zero the transaction + * could start commiting on us and eventually disappear. So + * once we do this, we must not dereference transaction + * pointer again. + */ + tid = transaction->t_tid; + if (atomic_dec_and_test(&transaction->t_updates)) { + wake_up(&journal->j_wait_updates); + if (journal->j_barrier_count) + wake_up(&journal->j_wait_transaction_locked); + } + + if (wait_for_commit) + err = jbd2_log_wait_commit(journal, tid); + lock_map_release(&handle->h_lockdep_map); jbd2_free_handle(handle); @@ -1719,7 +1768,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) goto zap_buffer_unlocked; /* OK, we have data buffer in journaled mode */ - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); @@ -1772,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) jbd2_journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return ret; } else { /* There is no currently-running transaction. So the @@ -1786,7 +1835,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) jbd2_journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return ret; } else { /* The orphan record's transaction has @@ -1810,7 +1859,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) jbd2_journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); return 0; } else { /* Good, the buffer belongs to the running transaction. @@ -1829,7 +1878,7 @@ zap_buffer: zap_buffer_no_jh: spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); zap_buffer_unlocked: clear_buffer_dirty(bh); J_ASSERT_BH(bh, !buffer_jbddirty(bh)); @@ -2136,9 +2185,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal, /* Locks are here just to force reading of recent values, it is * enough that the transaction was not committing before we started * a transaction adding the inode to orphan list */ - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); commit_trans = journal->j_committing_transaction; - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); inode_trans = jinode->i_transaction; spin_unlock(&journal->j_list_lock); |