From eb8abb927ae2fd1730e24ea94cd9527f3c086292 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 2 Nov 2010 09:34:50 -0400 Subject: ext4: Remove useless spinlock in ext4_getattr() Linus noted, and complained to me, that doing while lots of "git diff"'s of kernel sources, these spinlocks were responsible for 27% of the spinlock cost on his two-processor system as reported by perf. Git was doing lots of parallel stats, and this was putting a lot of pressure on ext4_getattr(). A spinlock to protect a single memory-to-memory copy is pointless, so remove it. Signed-off-by: "Theodore Ts'o" Signed-off-by: Linus Torvalds --- fs/ext4/inode.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 191616470466..4d78342f3bf0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5410,9 +5410,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, * will return the blocks that include the delayed allocation * blocks for this file. */ - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; return 0; -- cgit v1.2.3 From 7ff9c073dd4d7200399076554f7ab9b876f196f6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 8 Nov 2010 13:51:33 -0500 Subject: ext4: Add new ext4 inode tracepoints Add ext4_evict_inode, ext4_drop_inode, ext4_mark_inode_dirty, and ext4_begin_ordered_truncate() Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 191616470466..846e1e9db434 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -53,6 +53,7 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { + trace_ext4_begin_ordered_truncate(inode, new_size); return jbd2_journal_begin_ordered_truncate( EXT4_SB(inode->i_sb)->s_journal, &EXT4_I(inode)->jinode, @@ -178,6 +179,7 @@ void ext4_evict_inode(struct inode *inode) handle_t *handle; int err; + trace_ext4_evict_inode(inode); if (inode->i_nlink) { truncate_inode_pages(&inode->i_data, 0); goto no_delete; @@ -5649,6 +5651,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) int err, ret; might_sleep(); + trace_ext4_mark_inode_dirty(inode, _RET_IP_); err = ext4_reserve_inode_write(handle, inode, &iloc); if (ext4_handle_valid(handle) && EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && -- cgit v1.2.3 From 1449032be17abb69116dbc393f67ceb8bd034f92 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 14 Dec 2010 15:27:50 -0500 Subject: ext4: Turn off multiple page-io submission by default Jon Nelson has found a test case which causes postgresql to fail with the error: psql:t.sql:4: ERROR: invalid page header in block 38269 of relation base/16384/16581 Under memory pressure, it looks like part of a file can end up getting replaced by zero's. Until we can figure out the cause, we'll roll back the change and use block_write_full_page() instead of ext4_bio_write_page(). The new, more efficient writing function can be used via the mount option mblk_io_submit, so we can test and fix the new page I/O code. To reproduce the problem, install postgres 8.4 or 9.0, and pin enough memory such that the system just at the end of triggering writeback before running the following sql script: begin; create temporary table foo as select x as a, ARRAY[x] as b FROM generate_series(1, 10000000 ) AS x; create index foo_a_idx on foo (a); create index foo_b_idx on foo USING GIN (b); rollback; If the temporary table is created on a hard drive partition which is encrypted using dm_crypt, then under memory pressure, approximately 30-40% of the time, pgsql will issue the above failure. This patch should fix this problem, and the problem will come back if the file system is mounted with the mblk_io_submit mount option. Reported-by: Jon Nelson Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bdbe69902207..e659597b690b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2125,9 +2125,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, */ if (unlikely(journal_data && PageChecked(page))) err = __ext4_journalled_writepage(page, len); - else + else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) err = ext4_bio_write_page(&io_submit, page, len, mpd->wbc); + else + err = block_write_full_page(page, + noalloc_get_block_write, mpd->wbc); if (!err) mpd->pages_written++; -- cgit v1.2.3