From 5d77c0dc0c05c2c65aee16149fae06831a118730 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 19 Nov 2009 15:52:00 +0000 Subject: xfs: make several more functions static Just minor housekeeping, a lot more functions can be trivially made static; others could if we reordered things a bit... Signed-off-by: Eric Sandeen Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 1f5e4bb5e970..0f90bfe2815f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -351,7 +351,7 @@ xfs_commit_dummy_trans( return error; } -int +STATIC int xfs_sync_fsdata( struct xfs_mount *mp, int flags) -- cgit v1.2.3 From 5017e97d52628fb8ae56e434e86ac2e72ddaac2b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:40 +0000 Subject: xfs: rename xfs_get_perag xfs_get_perag is really getting the perag that an inode belongs to based on it's inode number. Convert the use of this function to just get the perag from a provided ag number. Use this new function to obtain the per-ag structure when traversing the per AG inode trees for sync and reclaim. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 0f90bfe2815f..cc964faf12e9 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -90,14 +90,13 @@ xfs_inode_ag_lookup( STATIC int xfs_inode_ag_walk( struct xfs_mount *mp, - xfs_agnumber_t ag, + struct xfs_perag *pag, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), int flags, int tag, int exclusive) { - struct xfs_perag *pag = &mp->m_perag[ag]; uint32_t first_index; int last_error = 0; int skipped; @@ -141,8 +140,6 @@ restart: delay(1); goto restart; } - - xfs_put_perag(mp, pag); return last_error; } @@ -160,10 +157,16 @@ xfs_inode_ag_iterator( xfs_agnumber_t ag; for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { - if (!mp->m_perag[ag].pag_ici_init) + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, ag); + if (!pag->pag_ici_init) { + xfs_perag_put(pag); continue; - error = xfs_inode_ag_walk(mp, ag, execute, flags, tag, + } + error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, exclusive); + xfs_perag_put(pag); if (error) { last_error = error; if (error == EFSCORRUPTED) @@ -690,16 +693,17 @@ void xfs_inode_set_reclaim_tag( xfs_inode_t *ip) { - xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); read_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); __xfs_inode_set_reclaim_tag(pag, ip); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); + xfs_perag_put(pag); } void -- cgit v1.2.3 From b657fc82a3ca6d7ad16a59e81765f0fb0e86cdbb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:47 +0000 Subject: xfs: Kill filestreams cache flush The filestreams cache flush is not needed in the sync code as it does not affect data writeback, and it is now not used by the growfs code, either, so kill it. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index cc964faf12e9..b58f8412dfe2 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -451,9 +451,6 @@ xfs_quiesce_data( xfs_sync_data(mp, SYNC_WAIT); xfs_qm_sync(mp, SYNC_WAIT); - /* drop inode references pinned by filestreams */ - xfs_filestream_flush(mp); - /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp, SYNC_WAIT); -- cgit v1.2.3 From 0cadda1c5f194f98a05d252ff4385d86d2ed0862 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jan 2010 09:56:44 +0000 Subject: xfs: remove duplicate buffer flags Currently we define aliases for the buffer flags in various namespaces, which only adds confusion. Remove all but the XBF_ flags to clean this up a bit. Note that we still abuse XFS_B_ASYNC/XBF_ASYNC for some non-buffer uses, but I'll clean that up later. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b58f8412dfe2..58c24be72c65 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -234,7 +234,7 @@ xfs_sync_inode_data( } error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? - 0 : XFS_B_ASYNC, FI_NONE); + 0 : XBF_ASYNC, FI_NONE); xfs_iunlock(ip, XFS_IOLOCK_SHARED); out_wait: @@ -370,7 +370,7 @@ xfs_sync_fsdata( if (flags & SYNC_TRYLOCK) { ASSERT(!(flags & SYNC_WAIT)); - bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); + bp = xfs_getsb(mp, XBF_TRYLOCK); if (!bp) goto out; -- cgit v1.2.3 From a14a348bff2f99471a28e5928eb6801224c053d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jan 2010 09:56:46 +0000 Subject: xfs: cleanup up xfs_log_force calling conventions Remove the XFS_LOG_FORCE argument which was always set, and the XFS_LOG_URGE define, which was never used. Split xfs_log_force into a two helpers - xfs_log_force which forces the whole log, and xfs_log_force_lsn which forces up to the specified LSN. The underlying implementations already were entirely separate, as were the users. Also re-indent the new _xfs_log_force/_xfs_log_force which previously had a weird coding style. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 58c24be72c65..c9b863eacab7 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -296,10 +296,7 @@ xfs_sync_data( if (error) return XFS_ERROR(error); - xfs_log_force(mp, 0, - (flags & SYNC_WAIT) ? - XFS_LOG_FORCE | XFS_LOG_SYNC : - XFS_LOG_FORCE); + xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); return 0; } @@ -325,10 +322,6 @@ xfs_commit_dummy_trans( struct xfs_inode *ip = mp->m_rootip; struct xfs_trans *tp; int error; - int log_flags = XFS_LOG_FORCE; - - if (flags & SYNC_WAIT) - log_flags |= XFS_LOG_SYNC; /* * Put a dummy transaction in the log to tell recovery @@ -350,7 +343,7 @@ xfs_commit_dummy_trans( xfs_iunlock(ip, XFS_ILOCK_EXCL); /* the log force ensures this transaction is pushed to disk */ - xfs_log_force(mp, 0, log_flags); + xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); return error; } @@ -390,7 +383,7 @@ xfs_sync_fsdata( * become pinned in between there and here. */ if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, 0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); } @@ -575,7 +568,7 @@ xfs_flush_inodes( igrab(inode); xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); wait_for_completion(&completion); - xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_log_force(ip->i_mount, XFS_LOG_SYNC); } /* @@ -591,7 +584,7 @@ xfs_sync_worker( int error; if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); -- cgit v1.2.3 From 777df5afdb26c71634edd60582be620ff94e87a0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 6 Feb 2010 12:37:26 +1100 Subject: xfs: Make inode reclaim states explicit A.K.A.: don't rely on xfs_iflush() return value in reclaim We have gradually been moving checks out of the reclaim code because they are duplicated in xfs_iflush(). We've had a history of problems in this area, and many of them stem from the overloading of the return values from xfs_iflush() and interaction with inode flush locking to determine if the inode is safe to reclaim. With the desire to move to delayed write flushing of inodes and non-blocking inode tree reclaim walks, the overloading of the return value of xfs_iflush makes it very difficult to determine the correct thing to do next. This patch explicitly re-adds the checks to the inode reclaim code, removing the reliance on the return value of xfs_iflush() to determine what to do next. It also means that we can clearly document all the inode states that reclaim must handle and hence we can easily see that we handled all the necessary cases. This also removes the need for the xfs_inode_clean() check in xfs_iflush() as all callers now check this first (safely). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 81 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 19 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index c9b863eacab7..525260c7617f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -706,12 +706,43 @@ __xfs_inode_clear_reclaim_tag( XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); } +/* + * Inodes in different states need to be treated differently, and the return + * value of xfs_iflush is not sufficient to get this right. The following table + * lists the inode states and the reclaim actions necessary for non-blocking + * reclaim: + * + * + * inode state iflush ret required action + * --------------- ---------- --------------- + * bad - reclaim + * shutdown EIO unpin and reclaim + * clean, unpinned 0 reclaim + * stale, unpinned 0 reclaim + * clean, pinned(*) 0 unpin and reclaim + * stale, pinned 0 unpin and reclaim + * dirty, async 0 block on flush lock, reclaim + * dirty, sync flush 0 block on flush lock, reclaim + * + * (*) dgc: I don't think the clean, pinned state is possible but it gets + * handled anyway given the order of checks implemented. + * + * Hence the order of actions after gaining the locks should be: + * bad => reclaim + * shutdown => unpin and reclaim + * pinned => unpin + * stale => reclaim + * clean => reclaim + * dirty => flush, wait and reclaim + */ STATIC int xfs_reclaim_inode( struct xfs_inode *ip, struct xfs_perag *pag, int sync_mode) { + int error; + /* * The radix tree lock here protects a thread in xfs_iget from racing * with us starting reclaim on the inode. Once we have the @@ -729,30 +760,42 @@ xfs_reclaim_inode( spin_unlock(&ip->i_flags_lock); write_unlock(&pag->pag_ici_lock); - /* - * If the inode is still dirty, then flush it out. If the inode - * is not in the AIL, then it will be OK to flush it delwri as - * long as xfs_iflush() does not keep any references to the inode. - * We leave that decision up to xfs_iflush() since it has the - * knowledge of whether it's OK to simply do a delwri flush of - * the inode or whether we need to wait until the inode is - * pulled from the AIL. - * We get the flush lock regardless, though, just to make sure - * we don't free it while it is being flushed. - */ xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_iflock(ip); - /* - * In the case of a forced shutdown we rely on xfs_iflush() to - * wait for the inode to be unpinned before returning an error. - */ - if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { - /* synchronize with xfs_iflush_done */ - xfs_iflock(ip); - xfs_ifunlock(ip); + if (is_bad_inode(VFS_I(ip))) + goto reclaim; + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_iunpin_wait(ip); + goto reclaim; + } + if (xfs_ipincount(ip)) + xfs_iunpin_wait(ip); + if (xfs_iflags_test(ip, XFS_ISTALE)) + goto reclaim; + if (xfs_inode_clean(ip)) + goto reclaim; + + /* Now we have an inode that needs flushing */ + error = xfs_iflush(ip, sync_mode); + if (!error) { + switch(sync_mode) { + case XFS_IFLUSH_DELWRI_ELSE_ASYNC: + case XFS_IFLUSH_DELWRI: + case XFS_IFLUSH_ASYNC: + case XFS_IFLUSH_DELWRI_ELSE_SYNC: + case XFS_IFLUSH_SYNC: + /* IO issued, synchronise with IO completion */ + xfs_iflock(ip); + break; + default: + ASSERT(0); + break; + } } +reclaim: + xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_ireclaim(ip); return 0; -- cgit v1.2.3 From c854363e80b49dd04a4de18ebc379eb8c8806674 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 6 Feb 2010 12:39:36 +1100 Subject: xfs: Use delayed write for inodes rather than async V2 We currently do background inode flush asynchronously, resulting in inodes being written in whatever order the background writeback issues them. Not only that, there are also blocking and non-blocking asynchronous inode flushes, depending on where the flush comes from. This patch completely removes asynchronous inode writeback. It removes all the strange writeback modes and replaces them with either a synchronous flush or a non-blocking delayed write flush. That is, inode flushes will only issue IO directly if they are synchronous, and background flushing may do nothing if the operation would block (e.g. on a pinned inode or buffer lock). Delayed write flushes will now result in the inode buffer sitting in the delwri queue of the buffer cache to be flushed by either an AIL push or by the xfsbufd timing out the buffer. This will allow accumulation of dirty inode buffers in memory and allow optimisation of inode cluster writeback at the xfsbufd level where we have much greater queue depths than the block layer elevators. We will also get adjacent inode cluster buffer IO merging for free when a later patch in the series allows sorting of the delayed write buffers before dispatch. This effectively means that any inode that is written back by background writeback will be seen as flush locked during AIL pushing, and will result in the buffers being pushed from there. This writeback path is currently non-optimal, but the next patch in the series will fix that problem. A side effect of this delayed write mechanism is that background inode reclaim will no longer directly flush inodes, nor can it wait on the flush lock. The result is that inode reclaim must leave the inode in the reclaimable state until it is clean. Hence attempts to reclaim a dirty inode in the background will simply skip the inode until it is clean and this allows other mechanisms (i.e. xfsbufd) to do more optimal writeback of the dirty buffers. As a result, the inode reclaim code has been rewritten so that it no longer relies on the ambiguous return values of xfs_iflush() to determine whether it is safe to reclaim an inode. Portions of this patch are derived from patches by Christoph Hellwig. Version 2: - cleanup reclaim code as suggested by Christoph - log background reclaim inode flush errors - just pass sync flags to xfs_iflush Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 105 ++++++++++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 29 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 525260c7617f..a9f6d20aff41 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -270,8 +270,7 @@ xfs_sync_inode_attr( goto out_unlock; } - error = xfs_iflush(ip, (flags & SYNC_WAIT) ? - XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI); + error = xfs_iflush(ip, flags); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -460,16 +459,18 @@ xfs_quiesce_fs( { int count = 0, pincount; + xfs_reclaim_inodes(mp, 0); xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* * This loop must run at least twice. The first instance of the loop * will flush most meta data but that will generate more meta data * (typically directory updates). Which then must be flushed and - * logged before we can write the unmount record. + * logged before we can write the unmount record. We also so sync + * reclaim of inodes to catch any that the above delwri flush skipped. */ do { + xfs_reclaim_inodes(mp, SYNC_WAIT); xfs_sync_attr(mp, SYNC_WAIT); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { @@ -585,7 +586,7 @@ xfs_sync_worker( if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { xfs_log_force(mp, 0); - xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, 0); /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); @@ -719,21 +720,42 @@ __xfs_inode_clear_reclaim_tag( * shutdown EIO unpin and reclaim * clean, unpinned 0 reclaim * stale, unpinned 0 reclaim - * clean, pinned(*) 0 unpin and reclaim - * stale, pinned 0 unpin and reclaim - * dirty, async 0 block on flush lock, reclaim - * dirty, sync flush 0 block on flush lock, reclaim + * clean, pinned(*) 0 requeue + * stale, pinned EAGAIN requeue + * dirty, delwri ok 0 requeue + * dirty, delwri blocked EAGAIN requeue + * dirty, sync flush 0 reclaim * * (*) dgc: I don't think the clean, pinned state is possible but it gets * handled anyway given the order of checks implemented. * + * As can be seen from the table, the return value of xfs_iflush() is not + * sufficient to correctly decide the reclaim action here. The checks in + * xfs_iflush() might look like duplicates, but they are not. + * + * Also, because we get the flush lock first, we know that any inode that has + * been flushed delwri has had the flush completed by the time we check that + * the inode is clean. The clean inode check needs to be done before flushing + * the inode delwri otherwise we would loop forever requeuing clean inodes as + * we cannot tell apart a successful delwri flush and a clean inode from the + * return value of xfs_iflush(). + * + * Note that because the inode is flushed delayed write by background + * writeback, the flush lock may already be held here and waiting on it can + * result in very long latencies. Hence for sync reclaims, where we wait on the + * flush lock, the caller should push out delayed write inodes first before + * trying to reclaim them to minimise the amount of time spent waiting. For + * background relaim, we just requeue the inode for the next pass. + * * Hence the order of actions after gaining the locks should be: * bad => reclaim * shutdown => unpin and reclaim - * pinned => unpin + * pinned, delwri => requeue + * pinned, sync => unpin * stale => reclaim * clean => reclaim - * dirty => flush, wait and reclaim + * dirty, delwri => flush and requeue + * dirty, sync => flush, wait and reclaim */ STATIC int xfs_reclaim_inode( @@ -741,7 +763,7 @@ xfs_reclaim_inode( struct xfs_perag *pag, int sync_mode) { - int error; + int error = 0; /* * The radix tree lock here protects a thread in xfs_iget from racing @@ -761,7 +783,11 @@ xfs_reclaim_inode( write_unlock(&pag->pag_ici_lock); xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_iflock(ip); + if (!xfs_iflock_nowait(ip)) { + if (!(sync_mode & SYNC_WAIT)) + goto out; + xfs_iflock(ip); + } if (is_bad_inode(VFS_I(ip))) goto reclaim; @@ -769,8 +795,13 @@ xfs_reclaim_inode( xfs_iunpin_wait(ip); goto reclaim; } - if (xfs_ipincount(ip)) + if (xfs_ipincount(ip)) { + if (!(sync_mode & SYNC_WAIT)) { + xfs_ifunlock(ip); + goto out; + } xfs_iunpin_wait(ip); + } if (xfs_iflags_test(ip, XFS_ISTALE)) goto reclaim; if (xfs_inode_clean(ip)) @@ -778,27 +809,43 @@ xfs_reclaim_inode( /* Now we have an inode that needs flushing */ error = xfs_iflush(ip, sync_mode); - if (!error) { - switch(sync_mode) { - case XFS_IFLUSH_DELWRI_ELSE_ASYNC: - case XFS_IFLUSH_DELWRI: - case XFS_IFLUSH_ASYNC: - case XFS_IFLUSH_DELWRI_ELSE_SYNC: - case XFS_IFLUSH_SYNC: - /* IO issued, synchronise with IO completion */ - xfs_iflock(ip); - break; - default: - ASSERT(0); - break; - } + if (sync_mode & SYNC_WAIT) { + xfs_iflock(ip); + goto reclaim; } + /* + * When we have to flush an inode but don't have SYNC_WAIT set, we + * flush the inode out using a delwri buffer and wait for the next + * call into reclaim to find it in a clean state instead of waiting for + * it now. We also don't return errors here - if the error is transient + * then the next reclaim pass will flush the inode, and if the error + * is permanent then the next sync reclaim will relcaim the inode and + * pass on the error. + */ + if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "inode 0x%llx background reclaim flush failed with %d", + (long long)ip->i_ino, error); + } +out: + xfs_iflags_clear(ip, XFS_IRECLAIM); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* + * We could return EAGAIN here to make reclaim rescan the inode tree in + * a short while. However, this just burns CPU time scanning the tree + * waiting for IO to complete and xfssyncd never goes back to the idle + * state. Instead, return 0 to let the next scheduled background reclaim + * attempt to reclaim the inode again. + */ + return 0; + reclaim: xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_ireclaim(ip); - return 0; + return error; + } int -- cgit v1.2.3