From 9df93939b735dd273e49cbee290b9f4738500ef4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Jan 2010 21:58:48 +0100 Subject: ext3: Use bitops to read/modify EXT3_I(inode)->i_state At several places we modify EXT3_I(inode)->i_state without holding i_mutex (ext3_release_file, ext3_bmap, ext3_journalled_writepage, ext3_do_update_inode, ...). These modifications are racy and we can lose updates to i_state. So convert handling of i_state to use bitops which are atomic. Signed-off-by: Jan Kara --- fs/ext3/inode.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 455e6e6e5cb9..44b53386ab8b 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1378,7 +1378,7 @@ static int ext3_journalled_write_end(struct file *file, */ if (pos + len > inode->i_size && ext3_can_truncate(inode)) ext3_orphan_add(handle, inode); - EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; + ext3_set_inode_state(inode, EXT3_STATE_JDATA); if (inode->i_size > EXT3_I(inode)->i_disksize) { EXT3_I(inode)->i_disksize = inode->i_size; ret2 = ext3_mark_inode_dirty(handle, inode); @@ -1417,7 +1417,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block) journal_t *journal; int err; - if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) { + if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) { /* * This is a REALLY heavyweight approach, but the use of * bmap on dirty files is expected to be extremely rare: @@ -1436,7 +1436,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block) * everything they get. */ - EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA; + ext3_clear_inode_state(inode, EXT3_STATE_JDATA); journal = EXT3_JOURNAL(inode); journal_lock_updates(journal); err = journal_flush(journal); @@ -1670,7 +1670,7 @@ static int ext3_journalled_writepage(struct page *page, PAGE_CACHE_SIZE, NULL, write_end_fn); if (ret == 0) ret = err; - EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; + ext3_set_inode_state(inode, EXT3_STATE_JDATA); unlock_page(page); } else { /* @@ -2402,7 +2402,7 @@ void ext3_truncate(struct inode *inode) goto out_notrans; if (inode->i_size == 0 && ext3_should_writeback_data(inode)) - ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; + ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); /* * We have to lock the EOF page here, because lock_page() nests @@ -2721,7 +2721,7 @@ int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) { /* We have all inode data except xattrs in memory here. */ return __ext3_get_inode_loc(inode, iloc, - !(EXT3_I(inode)->i_state & EXT3_STATE_XATTR)); + !ext3_test_inode_state(inode, EXT3_STATE_XATTR)); } void ext3_set_inode_flags(struct inode *inode) @@ -2893,7 +2893,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC)) - ei->i_state |= EXT3_STATE_XATTR; + ext3_set_inode_state(inode, EXT3_STATE_XATTR); } } else ei->i_extra_isize = 0; @@ -2955,7 +2955,7 @@ again: /* For fields not not tracking in the in-memory inode, * initialise them to zero for new inodes. */ - if (ei->i_state & EXT3_STATE_NEW) + if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); ext3_get_inode_flags(ei); @@ -3052,7 +3052,7 @@ again: rc = ext3_journal_dirty_metadata(handle, bh); if (!err) err = rc; - ei->i_state &= ~EXT3_STATE_NEW; + ext3_clear_inode_state(inode, EXT3_STATE_NEW); atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); out_brelse: -- cgit v1.2.3 From 7eb4969e04060dcf3fbd46af9c21b1059b853068 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 1 Mar 2010 14:02:37 +0100 Subject: ext3: Truncate allocated blocks if direct IO write fails to update i_size We have to truncate blocks allocated to file during direct IO when we fail to update i_size properly. Signed-off-by: Jan Kara --- fs/ext3/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 44b53386ab8b..c0ff9d6ffde6 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1785,8 +1785,9 @@ retry: handle = ext3_journal_start(inode, 2); if (IS_ERR(handle)) { /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ + * but cannot extend i_size. Truncate allocated blocks + * and pretend the write failed... */ + ext3_truncate(inode); ret = PTR_ERR(handle); goto out; } -- cgit v1.2.3 From 49792c806d0bfd53afc789dcdf50dc9bed2c5b83 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Tue, 2 Mar 2010 15:51:02 +0300 Subject: ext3: add writepage sanity checks - There is theoretical possibility to perform writepage on RO superblock. Add explicit check for what case. - Page must being locked before writepage. Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/ext3/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index c0ff9d6ffde6..eda9121d7d57 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1528,6 +1528,7 @@ static int ext3_ordered_writepage(struct page *page, int err; J_ASSERT(PageLocked(page)); + WARN_ON_ONCE(IS_RDONLY(inode)); /* * We give up here if we're reentered, because it might be for a @@ -1600,6 +1601,9 @@ static int ext3_writeback_writepage(struct page *page, int ret = 0; int err; + J_ASSERT(PageLocked(page)); + WARN_ON_ONCE(IS_RDONLY(inode)); + if (ext3_journal_current_handle()) goto out_fail; @@ -1642,6 +1646,9 @@ static int ext3_journalled_writepage(struct page *page, int ret = 0; int err; + J_ASSERT(PageLocked(page)); + WARN_ON_ONCE(IS_RDONLY(inode)); + if (ext3_journal_current_handle()) goto no_write; -- cgit v1.2.3 From 5dd4056db84387975140ff2568eaa0406f07985e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2010 09:05:00 -0500 Subject: dquot: cleanup space allocation / freeing routines Get rid of the alloc_space, free_space, reserve_space, claim_space and release_rsv dquot operations - they are always called from the filesystem and if a filesystem really needs their own (which none currently does) it can just call into it's own routine directly. Move shared logic into the common __dquot_alloc_space, dquot_claim_space_nodirty and __dquot_free_space low-level methods, and rationalize the wrappers around it to move as much as possible code into the common block for CONFIG_QUOTA vs not. Also rename all these helpers to be named dquot_* instead of vfs_dq_*. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index eda9121d7d57..20f02d69365c 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3336,7 +3336,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) * i_size has been changed by generic_commit_write() and we thus need * to include the updated inode in the current transaction. * - * Also, vfs_dq_alloc_space() will always dirty the inode when blocks + * Also, dquot_alloc_space() will always dirty the inode when blocks * are allocated to the file. * * If the inode is marked synchronous, we don't honour that here - doing -- cgit v1.2.3 From b43fa8284d7790d9cca32c9c55e24f29be2fa33b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2010 09:05:03 -0500 Subject: dquot: cleanup dquot transfer routine Get rid of the transfer dquot operation - it is now always called from the filesystem and if a filesystem really needs it's own (which none currently does) it can just call into it's own routine directly. Rename the now static low-level dquot_transfer helper to __dquot_transfer and vfs_dq_transfer to dquot_transfer to have a consistent namespace, and make the new dquot_transfer return a normal negative errno value which all callers expect. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 20f02d69365c..14d40a4dd6f0 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3160,7 +3160,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) error = PTR_ERR(handle); goto err_out; } - error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; + error = dquot_transfer(inode, attr); if (error) { ext3_journal_stop(handle); return error; -- cgit v1.2.3 From 907f4554e2521cb28b0009d17167760650a9561c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2010 09:05:06 -0500 Subject: dquot: move dquot initialization responsibility into the filesystem Currently various places in the VFS call vfs_dq_init directly. This means we tie the quota code into the VFS. Get rid of that and make the filesystem responsible for the initialization. For most metadata operations this is a straight forward move into the methods, but for truncate and open it's a bit more complicated. For truncate we currently only call vfs_dq_init for the sys_truncate case because open already takes care of it for ftruncate and open(O_TRUNC) - the new code causes an additional vfs_dq_init for those which is harmless. For open the initialization is moved from do_filp_open into the open method, which means it happens slightly earlier now, and only for regular files. The latter is fine because we don't need to initialize it for operations on special files, and we already do it as part of the namespace operations for directories. Add a dquot_file_open helper that filesystems that support generic quotas can use to fill in ->open. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext3/inode.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 14d40a4dd6f0..d7962b0c57b3 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -196,6 +196,9 @@ void ext3_delete_inode (struct inode * inode) { handle_t *handle; + if (!is_bad_inode(inode)) + vfs_dq_init(inode); + truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) @@ -3148,6 +3151,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; + if (ia_valid & ATTR_SIZE) + vfs_dq_init(inode); if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { handle_t *handle; -- cgit v1.2.3 From 871a293155a24554e153538d36e3a80fa169aefb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2010 09:05:07 -0500 Subject: dquot: cleanup dquot initialize routine Get rid of the initialize dquot operation - it is now always called from the filesystem and if a filesystem really needs it's own (which none currently does) it can just call into it's own routine directly. Rename the now static low-level dquot_initialize helper to __dquot_initialize and vfs_dq_init to dquot_initialize to have a consistent namespace. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext3/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index d7962b0c57b3..ffbbc65e3f68 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -197,7 +197,7 @@ void ext3_delete_inode (struct inode * inode) handle_t *handle; if (!is_bad_inode(inode)) - vfs_dq_init(inode); + dquot_initialize(inode); truncate_inode_pages(&inode->i_data, 0); @@ -3152,7 +3152,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) return error; if (ia_valid & ATTR_SIZE) - vfs_dq_init(inode); + dquot_initialize(inode); if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { handle_t *handle; @@ -3250,7 +3250,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode) ret = 2 * (bpp + indirects) + 2; #ifdef CONFIG_QUOTA - /* We know that structure was already allocated during vfs_dq_init so + /* We know that structure was already allocated during dquot_initialize so * we will be updating only the data blocks + inodes */ ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); #endif -- cgit v1.2.3 From a9185b41a4f84971b930c519f0c63bd450c4810d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:37 +0100 Subject: pass writeback_control to ->write_inode This gives the filesystem more information about the writeback that is happening. Trond requested this for the NFS unstable write handling, and other filesystems might benefit from this too by beeing able to distinguish between the different callers in more detail. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/ext3/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 455e6e6e5cb9..7aca55fcc976 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3096,7 +3096,7 @@ out_brelse: * `stuff()' is running, and the new i_size will be lost. Plus the inode * will no longer be on the superblock's dirty inode list. */ -int ext3_write_inode(struct inode *inode, int wait) +int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) { if (current->flags & PF_MEMALLOC) return 0; @@ -3107,7 +3107,7 @@ int ext3_write_inode(struct inode *inode, int wait) return -EIO; } - if (!wait) + if (wbc->sync_mode != WB_SYNC_ALL) return 0; return ext3_force_commit(inode->i_sb); -- cgit v1.2.3 From de329820e920cd9cfbc2127cad26a37026260cce Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 Mar 2010 14:30:19 -0700 Subject: ext3: fix broken handling of EXT3_STATE_NEW In commit 9df93939b735 ("ext3: Use bitops to read/modify EXT3_I(inode)->i_state") ext3 changed its internal 'i_state' variable to use bitops for its state handling. However, unline the same ext4 change, it didn't actually change the name of the field when it changed the semantics of it. As a result, an old use of 'i_state' remained in fs/ext3/ialloc.c that initialized the field to EXT3_STATE_NEW. And that does not work _at_all_ when we're now working with individually named bits rather than values that get masked. So the code tried to mark the state to be new, but in actual fact set the field to EXT3_STATE_JDATA. Which makes no sense at all, and screws up all the code that checks whether the inode was newly allocated. In particular, it made the xattr code unhappy, and caused various random behavior, like apparently https://bugzilla.redhat.com/show_bug.cgi?id=577911 So fix the initialization, and rename the field to match ext4 so that we don't have this happen again. Cc: James Morris Cc: Stephen Smalley Cc: Daniel J Walsh Cc: Eric Paris Cc: Jan Kara Signed-off-by: Linus Torvalds --- fs/ext3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext3/inode.c') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 7f920b7263a4..ea33bdf0a300 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2811,7 +2811,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; - ei->i_state = 0; + ei->i_state_flags = 0; ei->i_dir_start_lookup = 0; ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); /* We now have enough fields to check if the inode was active or not. -- cgit v1.2.3