From 9df93939b735dd273e49cbee290b9f4738500ef4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 6 Jan 2010 21:58:48 +0100
Subject: ext3: Use bitops to read/modify EXT3_I(inode)->i_state

At several places we modify EXT3_I(inode)->i_state without holding i_mutex
(ext3_release_file, ext3_bmap, ext3_journalled_writepage, ext3_do_update_inode,
...). These modifications are racy and we can lose updates to i_state. So
convert handling of i_state to use bitops which are atomic.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 455e6e6e5cb9..44b53386ab8b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1378,7 +1378,7 @@ static int ext3_journalled_write_end(struct file *file,
 	 */
 	if (pos + len > inode->i_size && ext3_can_truncate(inode))
 		ext3_orphan_add(handle, inode);
-	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+	ext3_set_inode_state(inode, EXT3_STATE_JDATA);
 	if (inode->i_size > EXT3_I(inode)->i_disksize) {
 		EXT3_I(inode)->i_disksize = inode->i_size;
 		ret2 = ext3_mark_inode_dirty(handle, inode);
@@ -1417,7 +1417,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 	journal_t *journal;
 	int err;
 
-	if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) {
 		/*
 		 * This is a REALLY heavyweight approach, but the use of
 		 * bmap on dirty files is expected to be extremely rare:
@@ -1436,7 +1436,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 		 * everything they get.
 		 */
 
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA;
+		ext3_clear_inode_state(inode, EXT3_STATE_JDATA);
 		journal = EXT3_JOURNAL(inode);
 		journal_lock_updates(journal);
 		err = journal_flush(journal);
@@ -1670,7 +1670,7 @@ static int ext3_journalled_writepage(struct page *page,
 				PAGE_CACHE_SIZE, NULL, write_end_fn);
 		if (ret == 0)
 			ret = err;
-		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+		ext3_set_inode_state(inode, EXT3_STATE_JDATA);
 		unlock_page(page);
 	} else {
 		/*
@@ -2402,7 +2402,7 @@ void ext3_truncate(struct inode *inode)
 		goto out_notrans;
 
 	if (inode->i_size == 0 && ext3_should_writeback_data(inode))
-		ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE;
+		ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
 
 	/*
 	 * We have to lock the EOF page here, because lock_page() nests
@@ -2721,7 +2721,7 @@ int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
 {
 	/* We have all inode data except xattrs in memory here. */
 	return __ext3_get_inode_loc(inode, iloc,
-		!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR));
+		!ext3_test_inode_state(inode, EXT3_STATE_XATTR));
 }
 
 void ext3_set_inode_flags(struct inode *inode)
@@ -2893,7 +2893,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 					EXT3_GOOD_OLD_INODE_SIZE +
 					ei->i_extra_isize;
 			if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
-				 ei->i_state |= EXT3_STATE_XATTR;
+				 ext3_set_inode_state(inode, EXT3_STATE_XATTR);
 		}
 	} else
 		ei->i_extra_isize = 0;
@@ -2955,7 +2955,7 @@ again:
 
 	/* For fields not not tracking in the in-memory inode,
 	 * initialise them to zero for new inodes. */
-	if (ei->i_state & EXT3_STATE_NEW)
+	if (ext3_test_inode_state(inode, EXT3_STATE_NEW))
 		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
 
 	ext3_get_inode_flags(ei);
@@ -3052,7 +3052,7 @@ again:
 	rc = ext3_journal_dirty_metadata(handle, bh);
 	if (!err)
 		err = rc;
-	ei->i_state &= ~EXT3_STATE_NEW;
+	ext3_clear_inode_state(inode, EXT3_STATE_NEW);
 
 	atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
 out_brelse:
-- 
cgit v1.2.3


From 7eb4969e04060dcf3fbd46af9c21b1059b853068 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 1 Mar 2010 14:02:37 +0100
Subject: ext3: Truncate allocated blocks if direct IO write fails to update
 i_size

We have to truncate blocks allocated to file during direct IO when we
fail to update i_size properly.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 44b53386ab8b..c0ff9d6ffde6 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1785,8 +1785,9 @@ retry:
 		handle = ext3_journal_start(inode, 2);
 		if (IS_ERR(handle)) {
 			/* This is really bad luck. We've written the data
-			 * but cannot extend i_size. Bail out and pretend
-			 * the write failed... */
+			 * but cannot extend i_size. Truncate allocated blocks
+			 * and pretend the write failed... */
+			ext3_truncate(inode);
 			ret = PTR_ERR(handle);
 			goto out;
 		}
-- 
cgit v1.2.3


From 49792c806d0bfd53afc789dcdf50dc9bed2c5b83 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Tue, 2 Mar 2010 15:51:02 +0300
Subject: ext3: add writepage sanity checks

- There is theoretical possibility to perform writepage on
   RO superblock. Add explicit check for what case.
- Page must being locked before writepage.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index c0ff9d6ffde6..eda9121d7d57 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1528,6 +1528,7 @@ static int ext3_ordered_writepage(struct page *page,
 	int err;
 
 	J_ASSERT(PageLocked(page));
+	WARN_ON_ONCE(IS_RDONLY(inode));
 
 	/*
 	 * We give up here if we're reentered, because it might be for a
@@ -1600,6 +1601,9 @@ static int ext3_writeback_writepage(struct page *page,
 	int ret = 0;
 	int err;
 
+	J_ASSERT(PageLocked(page));
+	WARN_ON_ONCE(IS_RDONLY(inode));
+
 	if (ext3_journal_current_handle())
 		goto out_fail;
 
@@ -1642,6 +1646,9 @@ static int ext3_journalled_writepage(struct page *page,
 	int ret = 0;
 	int err;
 
+	J_ASSERT(PageLocked(page));
+	WARN_ON_ONCE(IS_RDONLY(inode));
+
 	if (ext3_journal_current_handle())
 		goto no_write;
 
-- 
cgit v1.2.3


From 5dd4056db84387975140ff2568eaa0406f07985e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:00 -0500
Subject: dquot: cleanup space allocation / freeing routines

Get rid of the alloc_space, free_space, reserve_space, claim_space and
release_rsv dquot operations - they are always called from the filesystem
and if a filesystem really needs their own (which none currently does)
it can just call into it's own routine directly.

Move shared logic into the common __dquot_alloc_space,
dquot_claim_space_nodirty and __dquot_free_space low-level methods,
and rationalize the wrappers around it to move as much as possible
code into the common block for CONFIG_QUOTA vs not.  Also rename
all these helpers to be named dquot_* instead of vfs_dq_*.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index eda9121d7d57..20f02d69365c 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3336,7 +3336,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
  * i_size has been changed by generic_commit_write() and we thus need
  * to include the updated inode in the current transaction.
  *
- * Also, vfs_dq_alloc_space() will always dirty the inode when blocks
+ * Also, dquot_alloc_space() will always dirty the inode when blocks
  * are allocated to the file.
  *
  * If the inode is marked synchronous, we don't honour that here - doing
-- 
cgit v1.2.3


From b43fa8284d7790d9cca32c9c55e24f29be2fa33b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:03 -0500
Subject: dquot: cleanup dquot transfer routine

Get rid of the transfer dquot operation - it is now always called from
the filesystem and if a filesystem really needs it's own (which none
currently does) it can just call into it's own routine directly.

Rename the now static low-level dquot_transfer helper to __dquot_transfer
and vfs_dq_transfer to dquot_transfer to have a consistent namespace,
and make the new dquot_transfer return a normal negative errno value
which all callers expect.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 20f02d69365c..14d40a4dd6f0 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3160,7 +3160,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
-		error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, attr);
 		if (error) {
 			ext3_journal_stop(handle);
 			return error;
-- 
cgit v1.2.3


From 907f4554e2521cb28b0009d17167760650a9561c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:06 -0500
Subject: dquot: move dquot initialization responsibility into the filesystem

Currently various places in the VFS call vfs_dq_init directly.  This means
we tie the quota code into the VFS.  Get rid of that and make the
filesystem responsible for the initialization.   For most metadata operations
this is a straight forward move into the methods, but for truncate and
open it's a bit more complicated.

For truncate we currently only call vfs_dq_init for the sys_truncate case
because open already takes care of it for ftruncate and open(O_TRUNC) - the
new code causes an additional vfs_dq_init for those which is harmless.

For open the initialization is moved from do_filp_open into the open method,
which means it happens slightly earlier now, and only for regular files.
The latter is fine because we don't need to initialize it for operations
on special files, and we already do it as part of the namespace operations
for directories.

Add a dquot_file_open helper that filesystems that support generic quotas
can use to fill in ->open.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 14d40a4dd6f0..d7962b0c57b3 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -196,6 +196,9 @@ void ext3_delete_inode (struct inode * inode)
 {
 	handle_t *handle;
 
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
@@ -3148,6 +3151,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 	if (error)
 		return error;
 
+	if (ia_valid & ATTR_SIZE)
+		vfs_dq_init(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
-- 
cgit v1.2.3


From 871a293155a24554e153538d36e3a80fa169aefb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:07 -0500
Subject: dquot: cleanup dquot initialize routine

Get rid of the initialize dquot operation - it is now always called from
the filesystem and if a filesystem really needs it's own (which none
currently does) it can just call into it's own routine directly.

Rename the now static low-level dquot_initialize helper to __dquot_initialize
and vfs_dq_init to dquot_initialize to have a consistent namespace.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index d7962b0c57b3..ffbbc65e3f68 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -197,7 +197,7 @@ void ext3_delete_inode (struct inode * inode)
 	handle_t *handle;
 
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	truncate_inode_pages(&inode->i_data, 0);
 
@@ -3152,7 +3152,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 		return error;
 
 	if (ia_valid & ATTR_SIZE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
@@ -3250,7 +3250,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 		ret = 2 * (bpp + indirects) + 2;
 
 #ifdef CONFIG_QUOTA
-	/* We know that structure was already allocated during vfs_dq_init so
+	/* We know that structure was already allocated during dquot_initialize so
 	 * we will be updating only the data blocks + inodes */
 	ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 #endif
-- 
cgit v1.2.3


From a9185b41a4f84971b930c519f0c63bd450c4810d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 5 Mar 2010 09:21:37 +0100
Subject: pass writeback_control to ->write_inode

This gives the filesystem more information about the writeback that
is happening.  Trond requested this for the NFS unstable write handling,
and other filesystems might benefit from this too by beeing able to
distinguish between the different callers in more detail.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext3/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 455e6e6e5cb9..7aca55fcc976 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3096,7 +3096,7 @@ out_brelse:
  * `stuff()' is running, and the new i_size will be lost.  Plus the inode
  * will no longer be on the superblock's dirty inode list.
  */
-int ext3_write_inode(struct inode *inode, int wait)
+int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	if (current->flags & PF_MEMALLOC)
 		return 0;
@@ -3107,7 +3107,7 @@ int ext3_write_inode(struct inode *inode, int wait)
 		return -EIO;
 	}
 
-	if (!wait)
+	if (wbc->sync_mode != WB_SYNC_ALL)
 		return 0;
 
 	return ext3_force_commit(inode->i_sb);
-- 
cgit v1.2.3


From de329820e920cd9cfbc2127cad26a37026260cce Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 29 Mar 2010 14:30:19 -0700
Subject: ext3: fix broken handling of EXT3_STATE_NEW

In commit 9df93939b735 ("ext3: Use bitops to read/modify
EXT3_I(inode)->i_state") ext3 changed its internal 'i_state' variable to
use bitops for its state handling.  However, unline the same ext4
change, it didn't actually change the name of the field when it changed
the semantics of it.

As a result, an old use of 'i_state' remained in fs/ext3/ialloc.c that
initialized the field to EXT3_STATE_NEW.  And that does not work
_at_all_ when we're now working with individually named bits rather than
values that get masked.  So the code tried to mark the state to be new,
but in actual fact set the field to EXT3_STATE_JDATA.  Which makes no
sense at all, and screws up all the code that checks whether the inode
was newly allocated.

In particular, it made the xattr code unhappy, and caused various random
behavior, like apparently

	https://bugzilla.redhat.com/show_bug.cgi?id=577911

So fix the initialization, and rename the field to match ext4 so that we
don't have this happen again.

Cc: James Morris <jmorris@namei.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Daniel J Walsh <dwalsh@redhat.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 7f920b7263a4..ea33bdf0a300 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2811,7 +2811,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 	inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
 	inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
 
-	ei->i_state = 0;
+	ei->i_state_flags = 0;
 	ei->i_dir_start_lookup = 0;
 	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
 	/* We now have enough fields to check if the inode was active or not.
-- 
cgit v1.2.3