diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_super.c')
| -rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 342 |
1 files changed, 183 insertions, 159 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 18a4b8e11df2..25ea2408118f 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -15,6 +15,7 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + #include "xfs.h" #include "xfs_bit.h" #include "xfs_log.h" @@ -52,11 +53,11 @@ #include "xfs_trans_priv.h" #include "xfs_filestream.h" #include "xfs_da_btree.h" -#include "xfs_dir2_trace.h" #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" #include "xfs_inode_item.h" #include "xfs_sync.h" +#include "xfs_trace.h" #include <linux/namei.h> #include <linux/init.h> @@ -876,12 +877,11 @@ xfsaild( { struct xfs_ail *ailp = data; xfs_lsn_t last_pushed_lsn = 0; - long tout = 0; + long tout = 0; /* milliseconds */ while (!kthread_should_stop()) { - if (tout) - schedule_timeout_interruptible(msecs_to_jiffies(tout)); - tout = 1000; + schedule_timeout_interruptible(tout ? + msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); /* swsusp */ try_to_freeze(); @@ -930,13 +930,37 @@ xfs_fs_alloc_inode( */ STATIC void xfs_fs_destroy_inode( - struct inode *inode) + struct inode *inode) { - xfs_inode_t *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(inode); + + xfs_itrace_entry(ip); XFS_STATS_INC(vn_reclaim); - if (xfs_reclaim(ip)) - panic("%s: cannot reclaim 0x%p\n", __func__, inode); + + /* bad inode, get out here ASAP */ + if (is_bad_inode(inode)) + goto out_reclaim; + + xfs_ioend_wait(ip); + + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); + + /* + * We should never get here with one of the reclaim flags already set. + */ + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); + + /* + * We always use background reclaim here because even if the + * inode is clean, it still may be under IO and hence we have + * to take the flush lock. The background reclaim path handles + * this more efficiently than we can here, so simply let background + * reclaim tear down all inodes. + */ +out_reclaim: + xfs_inode_set_reclaim_tag(ip); } /* @@ -973,7 +997,6 @@ xfs_fs_inode_init_once( mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, "xfsino", ip->i_ino); - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); } /* @@ -998,12 +1021,45 @@ xfs_fs_dirty_inode( XFS_I(inode)->i_update_core = 1; } -/* - * Attempt to flush the inode, this will actually fail - * if the inode is pinned, but we dirty the inode again - * at the point when it is unpinned after a log write, - * since this is when the inode itself becomes flushable. - */ +STATIC int +xfs_log_inode( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + + if (error) { + xfs_trans_cancel(tp, 0); + /* we need to return with the lock hold shared */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Note - it's possible that we might have pushed ourselves out of the + * way during trans_reserve which would flush the inode. But there's + * no guarantee that the inode buffer has actually gone out yet (it's + * delwri). Plus the buffer could be pinned anyway if it's part of + * an inode in another recent transaction. So we play it safe and + * fire off the transaction anyway. + */ + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + xfs_ilock_demote(ip, XFS_ILOCK_EXCL); + + return error; +} + STATIC int xfs_fs_write_inode( struct inode *inode, @@ -1011,7 +1067,7 @@ xfs_fs_write_inode( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - int error = 0; + int error = EAGAIN; xfs_itrace_entry(ip); @@ -1022,35 +1078,55 @@ xfs_fs_write_inode( error = xfs_wait_on_pages(ip, 0, -1); if (error) goto out; - } - /* - * Bypass inodes which have already been cleaned by - * the inode flush clustering code inside xfs_iflush - */ - if (xfs_inode_clean(ip)) - goto out; - - /* - * We make this non-blocking if the inode is contended, return - * EAGAIN to indicate to the caller that they did not succeed. - * This prevents the flush path from blocking on inodes inside - * another operation right now, they get caught later by xfs_sync. - */ - if (sync) { + /* + * Make sure the inode has hit stable storage. By using the + * log and the fsync transactions we reduce the IOs we have + * to do here from two (log and inode) to just the log. + * + * Note: We still need to do a delwri write of the inode after + * this to flush it to the backing buffer so that bulkstat + * works properly if this is the first time the inode has been + * written. Because we hold the ilock atomically over the + * transaction commit and the inode flush we are guaranteed + * that the inode is not pinned when it returns. If the flush + * lock is already held, then the inode has already been + * flushed once and we don't need to flush it again. Hence + * the code will only flush the inode if it isn't already + * being flushed. + */ xfs_ilock(ip, XFS_ILOCK_SHARED); - xfs_iflock(ip); - - error = xfs_iflush(ip, XFS_IFLUSH_SYNC); + if (ip->i_update_core) { + error = xfs_log_inode(ip); + if (error) + goto out_unlock; + } } else { - error = EAGAIN; + /* + * We make this non-blocking if the inode is contended, return + * EAGAIN to indicate to the caller that they did not succeed. + * This prevents the flush path from blocking on inodes inside + * another operation right now, they get caught later by xfs_sync. + */ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) goto out; - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) - goto out_unlock; + } + + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) + goto out_unlock; - error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK); + /* + * Now we have the flush lock and the inode is not pinned, we can check + * if the inode is really clean as we know that there are no pending + * transaction completions, it is not waiting on the delayed write + * queue and there is no IO in progress. + */ + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + error = 0; + goto out_unlock; } + error = xfs_iflush(ip, 0); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -1075,6 +1151,20 @@ xfs_fs_clear_inode( XFS_STATS_INC(vn_remove); XFS_STATS_DEC(vn_active); + /* + * The iolock is used by the file system to coordinate reads, + * writes, and block truncates. Up to this point the lock + * protected concurrent accesses by users of the inode. But + * from here forward we're doing some final processing of the + * inode because we're done with it, and although we reuse the + * iolock for protection it is really a distinct lock class + * (in the lockdep sense) from before. To keep lockdep happy + * (and basically indicate what we are doing), we explicitly + * re-init the iolock here. + */ + ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); + xfs_inactive(ip); } @@ -1092,8 +1182,6 @@ xfs_fs_put_super( struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); - struct xfs_inode *rip = mp->m_rootip; - int unmount_event_flags = 0; xfs_syncd_stop(mp); @@ -1109,20 +1197,7 @@ xfs_fs_put_super( xfs_sync_attr(mp, 0); } -#ifdef HAVE_DMAPI - if (mp->m_flags & XFS_MOUNT_DMAPI) { - unmount_event_flags = - (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? - 0 : DM_FLAGS_UNWANTED; - /* - * Ignore error from dmapi here, first unmount is not allowed - * to fail anyway, and second we wouldn't want to fail a - * unmount because of dmapi. - */ - XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL, - NULL, NULL, 0, 0, unmount_event_flags); - } -#endif + XFS_SEND_PREUNMOUNT(mp); /* * Blow away any referenced inode in the filestreams cache. @@ -1133,10 +1208,7 @@ xfs_fs_put_super( XFS_bflush(mp->m_ddev_targp); - if (mp->m_flags & XFS_MOUNT_DMAPI) { - XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0, - unmount_event_flags); - } + XFS_SEND_UNMOUNT(mp); xfs_unmountfs(mp); xfs_freesb(mp); @@ -1237,6 +1309,29 @@ xfs_fs_statfs( return 0; } +STATIC void +xfs_save_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks = 0; + + mp->m_resblks_save = mp->m_resblks; + xfs_reserve_blocks(mp, &resblks, NULL); +} + +STATIC void +xfs_restore_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks; + + if (mp->m_resblks_save) { + resblks = mp->m_resblks_save; + mp->m_resblks_save = 0; + } else + resblks = xfs_default_resblks(mp); + + xfs_reserve_blocks(mp, &resblks, NULL); +} + STATIC int xfs_fs_remount( struct super_block *sb, @@ -1316,11 +1411,27 @@ xfs_fs_remount( } mp->m_update_flags = 0; } + + /* + * Fill out the reserve pool if it is empty. Use the stashed + * value if it is non-zero, otherwise go with the default. + */ + xfs_restore_resvblks(mp); } /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + /* + * After we have synced the data but before we sync the + * metadata, we need to free up the reserve block pool so that + * the used block count in the superblock on disk is correct at + * the end of the remount. Stash the current reserve pool size + * so that if we get remounted rw, we can return it to the same + * size. + */ + xfs_quiesce_data(mp); + xfs_save_resvblks(mp); xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } @@ -1339,11 +1450,22 @@ xfs_fs_freeze( { struct xfs_mount *mp = XFS_M(sb); + xfs_save_resvblks(mp); xfs_quiesce_attr(mp); return -xfs_fs_log_dummy(mp); } STATIC int +xfs_fs_unfreeze( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_restore_resvblks(mp); + return 0; +} + +STATIC int xfs_fs_show_options( struct seq_file *m, struct vfsmount *mnt) @@ -1504,8 +1626,6 @@ xfs_fs_fill_super( goto fail_vnrele; kfree(mtpt); - - xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); return 0; out_filestream_unmount: @@ -1567,6 +1687,7 @@ static const struct super_operations xfs_super_operations = { .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, + .unfreeze_fs = xfs_fs_unfreeze, .statfs = xfs_fs_statfs, .remount_fs = xfs_fs_remount, .show_options = xfs_fs_show_options, @@ -1581,94 +1702,6 @@ static struct file_system_type xfs_fs_type = { }; STATIC int __init -xfs_alloc_trace_bufs(void) -{ -#ifdef XFS_ALLOC_TRACE - xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL); - if (!xfs_alloc_trace_buf) - goto out; -#endif -#ifdef XFS_BMAP_TRACE - xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL); - if (!xfs_bmap_trace_buf) - goto out_free_alloc_trace; -#endif -#ifdef XFS_BTREE_TRACE - xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE, - KM_MAYFAIL); - if (!xfs_allocbt_trace_buf) - goto out_free_bmap_trace; - - xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL); - if (!xfs_inobt_trace_buf) - goto out_free_allocbt_trace; - - xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL); - if (!xfs_bmbt_trace_buf) - goto out_free_inobt_trace; -#endif -#ifdef XFS_ATTR_TRACE - xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL); - if (!xfs_attr_trace_buf) - goto out_free_bmbt_trace; -#endif -#ifdef XFS_DIR2_TRACE - xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL); - if (!xfs_dir2_trace_buf) - goto out_free_attr_trace; -#endif - - return 0; - -#ifdef XFS_DIR2_TRACE - out_free_attr_trace: -#endif -#ifdef XFS_ATTR_TRACE - ktrace_free(xfs_attr_trace_buf); - out_free_bmbt_trace: -#endif -#ifdef XFS_BTREE_TRACE - ktrace_free(xfs_bmbt_trace_buf); - out_free_inobt_trace: - ktrace_free(xfs_inobt_trace_buf); - out_free_allocbt_trace: - ktrace_free(xfs_allocbt_trace_buf); - out_free_bmap_trace: -#endif -#ifdef XFS_BMAP_TRACE - ktrace_free(xfs_bmap_trace_buf); - out_free_alloc_trace: -#endif -#ifdef XFS_ALLOC_TRACE - ktrace_free(xfs_alloc_trace_buf); - out: -#endif - return -ENOMEM; -} - -STATIC void -xfs_free_trace_bufs(void) -{ -#ifdef XFS_DIR2_TRACE - ktrace_free(xfs_dir2_trace_buf); -#endif -#ifdef XFS_ATTR_TRACE - ktrace_free(xfs_attr_trace_buf); -#endif -#ifdef XFS_BTREE_TRACE - ktrace_free(xfs_bmbt_trace_buf); - ktrace_free(xfs_inobt_trace_buf); - ktrace_free(xfs_allocbt_trace_buf); -#endif -#ifdef XFS_BMAP_TRACE - ktrace_free(xfs_bmap_trace_buf); -#endif -#ifdef XFS_ALLOC_TRACE - ktrace_free(xfs_alloc_trace_buf); -#endif -} - -STATIC int __init xfs_init_zones(void) { @@ -1809,7 +1842,6 @@ init_xfs_fs(void) printk(KERN_INFO XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"); - ktrace_init(64); xfs_ioend_init(); xfs_dir_startup(); @@ -1817,13 +1849,9 @@ init_xfs_fs(void) if (error) goto out; - error = xfs_alloc_trace_bufs(); - if (error) - goto out_destroy_zones; - error = xfs_mru_cache_init(); if (error) - goto out_free_trace_buffers; + goto out_destroy_zones; error = xfs_filestream_init(); if (error) @@ -1858,8 +1886,6 @@ init_xfs_fs(void) xfs_filestream_uninit(); out_mru_cache_uninit: xfs_mru_cache_uninit(); - out_free_trace_buffers: - xfs_free_trace_bufs(); out_destroy_zones: xfs_destroy_zones(); out: @@ -1876,9 +1902,7 @@ exit_xfs_fs(void) xfs_buf_terminate(); xfs_filestream_uninit(); xfs_mru_cache_uninit(); - xfs_free_trace_bufs(); xfs_destroy_zones(); - ktrace_uninit(); } module_init(init_xfs_fs); |
