From 0b9af6daefb3de24ba6c32df234cca448ec76b18 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 20 Feb 2012 17:53:00 -0500 Subject: ext4: ignore EXT4_INODE_JOURNAL_DATA flag with delalloc commit 3d2b158262826e8b75bbbfb7b97010838dd92ac7 upstream. Ext4 does not support data journalling with delayed allocation enabled. We even do not allow to mount the file system with delayed allocation and data journalling enabled, however it can be set via FS_IOC_SETFLAGS so we can hit the inode with EXT4_INODE_JOURNAL_DATA set even on file system mounted with delayed allocation (default) and that's where problem arises. The easies way to reproduce this problem is with the following set of commands: mkfs.ext4 /dev/sdd mount /dev/sdd /mnt/test1 dd if=/dev/zero of=/mnt/test1/file bs=1M count=4 chattr +j /mnt/test1/file dd if=/dev/zero of=/mnt/test1/file bs=1M count=4 conv=notrunc chattr -j /mnt/test1/file Additionally it can be reproduced quite reliably with xfstests 272 and 269. In fact the above reproducer is a part of test 272. To fix this we should ignore the EXT4_INODE_JOURNAL_DATA inode flag if the file system is mounted with delayed allocation. This can be easily done by fixing ext4_should_*_data() functions do ignore data journal flag when delalloc is set (suggested by Ted). We also have to set the appropriate address space operations for the inode (again, ignoring data journal flag if delalloc enabled). Additionally this commit introduces ext4_inode_journal_mode() function because ext4_should_*_data() has already had a lot of common code and this change is putting it all into one function so it is easier to read. Successfully tested with xfstests in following configurations: delalloc + data=ordered delalloc + data=writeback data=journal nodelalloc + data=ordered nodelalloc + data=writeback nodelalloc + data=journal Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 92655fd89657..0b401f8c3281 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2480,13 +2480,14 @@ static int ext4_da_write_end(struct file *file, int write_mode = (int)(unsigned long)fsdata; if (write_mode == FALL_BACK_TO_NONDELALLOC) { - if (ext4_should_order_data(inode)) { + switch (ext4_inode_journal_mode(inode)) { + case EXT4_INODE_ORDERED_DATA_MODE: return ext4_ordered_write_end(file, mapping, pos, len, copied, page, fsdata); - } else if (ext4_should_writeback_data(inode)) { + case EXT4_INODE_WRITEBACK_DATA_MODE: return ext4_writeback_write_end(file, mapping, pos, len, copied, page, fsdata); - } else { + default: BUG(); } } @@ -3084,18 +3085,25 @@ static const struct address_space_operations ext4_da_aops = { void ext4_set_aops(struct inode *inode) { - if (ext4_should_order_data(inode) && - test_opt(inode->i_sb, DELALLOC)) - inode->i_mapping->a_ops = &ext4_da_aops; - else if (ext4_should_order_data(inode)) - inode->i_mapping->a_ops = &ext4_ordered_aops; - else if (ext4_should_writeback_data(inode) && - test_opt(inode->i_sb, DELALLOC)) - inode->i_mapping->a_ops = &ext4_da_aops; - else if (ext4_should_writeback_data(inode)) - inode->i_mapping->a_ops = &ext4_writeback_aops; - else + switch (ext4_inode_journal_mode(inode)) { + case EXT4_INODE_ORDERED_DATA_MODE: + if (test_opt(inode->i_sb, DELALLOC)) + inode->i_mapping->a_ops = &ext4_da_aops; + else + inode->i_mapping->a_ops = &ext4_ordered_aops; + break; + case EXT4_INODE_WRITEBACK_DATA_MODE: + if (test_opt(inode->i_sb, DELALLOC)) + inode->i_mapping->a_ops = &ext4_da_aops; + else + inode->i_mapping->a_ops = &ext4_writeback_aops; + break; + case EXT4_INODE_JOURNAL_DATA_MODE: inode->i_mapping->a_ops = &ext4_journalled_aops; + break; + default: + BUG(); + } } -- cgit v1.2.3 From 8608fb78b2cbf9eb8794e592bf43a8b1884c5a85 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Mon, 20 Feb 2012 17:59:24 -0500 Subject: ext4: fix race between unwritten extent conversion and truncate commit 266991b13890049ee1a6bb95b9817f06339ee3d7 upstream. The following comment in ext4_end_io_dio caught my attention: /* XXX: probably should move into the real I/O completion handler */ inode_dio_done(inode); The truncate code takes i_mutex, then calls inode_dio_wait. Because the ext4 code path above will end up dropping the mutex before it is reacquired by the worker thread that does the extent conversion, it seems to me that the truncate can happen out of order. Jan Kara mentioned that this might result in error messages in the system logs, but that should be the extent of the "damage." The fix is pretty straight-forward: don't call inode_dio_done until the extent conversion is complete. Reviewed-by: Jan Kara Signed-off-by: Jeff Moyer Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0b401f8c3281..3ce761373241 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2794,9 +2794,6 @@ out: /* queue the work to convert unwritten extents to written */ queue_work(wq, &io_end->work); - - /* XXX: probably should move into the real I/O completion handler */ - inode_dio_done(inode); } static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) @@ -2920,9 +2917,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, iocb->private = NULL; EXT4_I(inode)->cur_aio_dio = NULL; if (!is_sync_kiocb(iocb)) { - iocb->private = ext4_init_io_end(inode, GFP_NOFS); - if (!iocb->private) + ext4_io_end_t *io_end = + ext4_init_io_end(inode, GFP_NOFS); + if (!io_end) return -ENOMEM; + io_end->flag |= EXT4_IO_END_DIRECT; + iocb->private = io_end; /* * we save the io structure for current async * direct IO, so that later ext4_map_blocks() -- cgit v1.2.3