From 06a279d636734da32bb62dd2f7b0ade666f65d7c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 17 Jan 2009 18:41:37 -0500 Subject: ext4: only use i_size_high for regular files Directories are not allowed to be bigger than 2GB, so don't use i_size_high for anything other than regular files. E2fsck should complain about these inodes, but the simplest thing to do for the kernel is to only use i_size_high for regular files. This prevents an intentially corrupted filesystem from causing the kernel to burn a huge amount of CPU and issuing error messages such as: EXT4-fs warning (device loop0): ext4_block_to_path: block 135090028 > max Thanks to David Maciejak from Fortinet's FortiGuard Global Security Research Team for reporting this issue. http://bugzilla.kernel.org/show_bug.cgi?id=12375 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a6444cee0c7e..49484ba801c9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -360,9 +360,9 @@ static int ext4_block_to_path(struct inode *inode, final = ptrs; } else { ext4_warning(inode->i_sb, "ext4_block_to_path", - "block %lu > max", + "block %lu > max in inode %lu", i_block + direct_blocks + - indirect_blocks + double_blocks); + indirect_blocks + double_blocks, inode->i_ino); } if (boundary) *boundary = final - 1 - (i_block & (ptrs - 1)); -- cgit v1.2.3 From e7f07968c16bdd9480001c0a9de013ba56889cf9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 20 Jan 2009 09:50:19 -0500 Subject: ext4: Fix ext4_free_blocks() w/o a journal when files have indirect blocks When trying to unlink a file with indirect blocks on a filesystem without a journal, the "circular indirect block" sanity test was getting falsely triggered. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 49484ba801c9..b4386dafeb0c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3622,7 +3622,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, * block pointed to itself, it would have been detached when * the block was cleared. Check for this instead of OOPSing. */ - if (bh2jh(this_bh)) + if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) ext4_handle_dirty_metadata(handle, inode, this_bh); else ext4_error(inode->i_sb, __func__, -- cgit v1.2.3 From b9ec63f78b425c0e16cc95605b5d4ff2dc228b97 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 30 Jan 2009 00:00:24 -0500 Subject: ext4: Remove bogus BUG() check in ext4_bmap() The code to support journal-less ext4 operation added a BUG to ext4_bmap() which fired if there was no journal and the EXT4_STATE_JDATA bit was set in the i_state field. This caused running the filefrag program (which uses the FIMBAP ioctl) to trigger a BUG(). The EXT4_STATE_JDATA bit is only used for ext4_bmap(), and it's harmless for the bit to be set. We could add a check in __ext4_journalled_writepage() and ext4_journalled_write_end() to only set the EXT4_STATE_JDATA bit if the journal is present, but that adds an extra test and jump instruction. It's easier to simply remove the BUG check. http://bugzilla.kernel.org/show_bug.cgi?id=12568 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b4386dafeb0c..03ba20be1329 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2821,9 +2821,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) filemap_write_and_wait(mapping); } - BUG_ON(!EXT4_JOURNAL(inode) && - EXT4_I(inode)->i_state & EXT4_STATE_JDATA); - if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { /* * This is a REALLY heavyweight approach, but the use of -- cgit v1.2.3 From 7f5aa215088b817add9c71914b83650bdd49f8a9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 Feb 2009 11:15:34 -0500 Subject: jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate() If we race with commit code setting i_transaction to NULL, we could possibly dereference it. Proper locking requires the journal pointer (to access journal->j_list_lock), which we don't have. So we have to change the prototype of the function so that filesystem passes us the journal pointer. Also add a more detailed comment about why the function jbd2_journal_begin_ordered_truncate() does what it does and how it should be used. Thanks to Dan Carpenter for pointing to the suspitious code. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" Acked-by: Joel Becker CC: linux-ext4@vger.kernel.org CC: ocfs2-devel@oss.oracle.com CC: mfasheh@suse.de CC: Dan Carpenter --- fs/ext4/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 03ba20be1329..658c4a7f2578 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -47,8 +47,10 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { - return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, - new_size); + return jbd2_journal_begin_ordered_truncate( + EXT4_SB(inode->i_sb)->s_journal, + &EXT4_I(inode)->jinode, + new_size); } static void ext4_invalidatepage(struct page *page, unsigned long offset); -- cgit v1.2.3 From 2acf2c261b823d9d9ed954f348b97620297a36b5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 14 Feb 2009 10:42:58 -0500 Subject: ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages With delayed allocation we lock the page in write_cache_pages() and try to build an in memory extent of contiguous blocks. This is needed so that we can get large contiguous blocks request. If range_cyclic mode is enabled, write_cache_pages() will loop back to the 0 index if no I/O has been done yet, and try to start writing from the beginning of the range. That causes an attempt to take the page lock of lower index page while holding the page lock of higher index page, which can cause a dead lock with another writeback thread. The solution is to implement the range_cyclic behavior in ext4_da_writepages() instead. http://bugzilla.kernel.org/show_bug.cgi?id=12579 Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 658c4a7f2578..cbd2ca99d113 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2439,6 +2439,7 @@ static int ext4_da_writepages(struct address_space *mapping, int no_nrwrite_index_update; int pages_written = 0; long pages_skipped; + int range_cyclic, cycled = 1, io_done = 0; int needed_blocks, ret = 0, nr_to_writebump = 0; struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); @@ -2490,9 +2491,15 @@ static int ext4_da_writepages(struct address_space *mapping, if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - if (wbc->range_cyclic) + range_cyclic = wbc->range_cyclic; + if (wbc->range_cyclic) { index = mapping->writeback_index; - else + if (index) + cycled = 0; + wbc->range_start = index << PAGE_CACHE_SHIFT; + wbc->range_end = LLONG_MAX; + wbc->range_cyclic = 0; + } else index = wbc->range_start >> PAGE_CACHE_SHIFT; mpd.wbc = wbc; @@ -2506,6 +2513,7 @@ static int ext4_da_writepages(struct address_space *mapping, wbc->no_nrwrite_index_update = 1; pages_skipped = wbc->pages_skipped; +retry: while (!ret && wbc->nr_to_write > 0) { /* @@ -2548,6 +2556,7 @@ static int ext4_da_writepages(struct address_space *mapping, pages_written += mpd.pages_written; wbc->pages_skipped = pages_skipped; ret = 0; + io_done = 1; } else if (wbc->nr_to_write) /* * There is no more writeout needed @@ -2556,6 +2565,13 @@ static int ext4_da_writepages(struct address_space *mapping, */ break; } + if (!io_done && !cycled) { + cycled = 1; + index = 0; + wbc->range_start = index << PAGE_CACHE_SHIFT; + wbc->range_end = mapping->writeback_index - 1; + goto retry; + } if (pages_skipped != wbc->pages_skipped) printk(KERN_EMERG "This should not happen leaving %s " "with nr_to_write = %ld ret = %d\n", @@ -2563,6 +2579,7 @@ static int ext4_da_writepages(struct address_space *mapping, /* Update index */ index += pages_written; + wbc->range_cyclic = range_cyclic; if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) /* * set the writeback_index so that range_cyclic -- cgit v1.2.3 From ebd3610b110bbb18ea6f9f2aeed1e1068c537227 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 22 Feb 2009 21:09:59 -0500 Subject: ext4: Fix deadlock in ext4_write_begin() and ext4_da_write_begin() Functions ext4_write_begin() and ext4_da_write_begin() call grab_cache_page_write_begin() without AOP_FLAG_NOFS. Thus it can happen that page reclaim is triggered in that function and it recurses back into the filesystem (or some other filesystem). But this can lead to various problems as a transaction is already started at that point. Add the necessary flag. http://bugzilla.kernel.org/show_bug.cgi?id=11688 Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cbd2ca99d113..51cdd13e1c31 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1368,6 +1368,10 @@ retry: goto out; } + /* We cannot recurse into the filesystem as the transaction is already + * started */ + flags |= AOP_FLAG_NOFS; + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { ext4_journal_stop(handle); @@ -1377,7 +1381,7 @@ retry: *pagep = page; ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext4_get_block); + ext4_get_block); if (!ret && ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), @@ -2667,6 +2671,9 @@ retry: ret = PTR_ERR(handle); goto out; } + /* We cannot recurse into the filesystem as the transaction is already + * started */ + flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { -- cgit v1.2.3 From 8f64b32eb73fbfe9f38c4123121b63ee409278a7 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 26 Feb 2009 00:57:35 -0500 Subject: ext4: don't call jbd2_journal_force_commit_nested without journal Running without a journal, I oopsed when I ran out of space, because we called jbd2_journal_force_commit_nested() from ext4_should_retry_alloc() without a journal. This should take care of it, I think. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 51cdd13e1c31..c7fed5b18745 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2544,7 +2544,7 @@ retry: ext4_journal_stop(handle); - if (mpd.retval == -ENOSPC) { + if ((mpd.retval == -ENOSPC) && sbi->s_journal) { /* commit the transaction which would * free blocks released in the transaction * and try again -- cgit v1.2.3