From 78c931b8be75456562b55ed4e27878f1519e1367 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 28 Nov 2014 13:59:58 +1100 Subject: xfs: replace global xfslogd wq with per-mount wq The xfslogd workqueue is a global, single-job workqueue for buffer ioend processing. This means we allow for a single work item at a time for all possible XFS mounts on a system. fsstress testing in loopback XFS over XFS configurations has reproduced xfslogd deadlocks due to the single threaded nature of the queue and dependencies introduced between the separate XFS instances by online discard (-o discard). Discard over a loopback device converts the discard request to a hole punch (fallocate) on the underlying file. Online discard requests are issued synchronously and from xfslogd context in XFS, hence the xfslogd workqueue is blocked in the upper fs waiting on a hole punch request to be servied in the lower fs. If the lower fs issues I/O that depends on xfslogd to complete, both filesystems end up hung indefinitely. This is reproduced reliabily by generic/013 on XFS->loop->XFS test devices with the '-o discard' mount option. Further, docker implementations appear to use this kind of configuration for container instance filesystems by default (container fs->dm-> loop->base fs) and therefore are subject to this deadlock when running on XFS. Replace the global xfslogd workqueue with a per-mount variant. This guarantees each mount access to a single worker and prevents deadlocks due to inter-fs dependencies introduced by discard. Since the queue is only responsible for buffer iodone processing at this point in time, rename xfslogd to xfs-buf. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 24b4ebea0d4d..c06d790a3000 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -44,8 +44,6 @@ static kmem_zone_t *xfs_buf_zone; -static struct workqueue_struct *xfslogd_workqueue; - #ifdef XFS_BUF_LOCK_TRACKING # define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) # define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) @@ -1053,7 +1051,7 @@ xfs_buf_ioend_async( struct xfs_buf *bp) { INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work); - queue_work(xfslogd_workqueue, &bp->b_iodone_work); + queue_work(bp->b_target->bt_mount->m_buf_workqueue, &bp->b_iodone_work); } void @@ -1882,15 +1880,8 @@ xfs_buf_init(void) if (!xfs_buf_zone) goto out; - xfslogd_workqueue = alloc_workqueue("xfslogd", - WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 1); - if (!xfslogd_workqueue) - goto out_free_buf_zone; - return 0; - out_free_buf_zone: - kmem_zone_destroy(xfs_buf_zone); out: return -ENOMEM; } @@ -1898,6 +1889,5 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { - destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(xfs_buf_zone); } -- cgit v1.2.3 From db52d09ecbf85c54e263a9d1ebfb615a9b2b3ba6 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 28 Nov 2014 14:03:55 +1100 Subject: xfs: catch invalid negative blknos in _xfs_buf_find() Here blkno is a daddr_t, which is a __s64; it's possible to hold a value which is negative, and thus pass the (blkno >= eofs) test. Then we try to do a xfs_perag_get() for a ridiculous agno via xfs_daddr_to_agno(), and bad things happen when that fails, and returns a null pag which is dereferenced shortly thereafter. Found via a user-supplied fuzzed image... Signed-off-by: Eric Sandeen Reviewed-by: Mark Tinguely Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c06d790a3000..d083889535a2 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -461,7 +461,7 @@ _xfs_buf_find( * have to check that the buffer falls within the filesystem bounds. */ eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); - if (blkno >= eofs) { + if (blkno < 0 || blkno >= eofs) { /* * XXX (dgc): we should really be returning -EFSCORRUPTED here, * but none of the higher level infrastructure supports -- cgit v1.2.3 From 4fb6e8ade2c70ef1a13f358963b3298fd8b72bcc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2014 14:25:04 +1100 Subject: xfs: merge xfs_ag.h into xfs_format.h More on-disk format consolidation. A few declarations that weren't on-disk format related move into better suitable spots. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 24b4ebea0d4d..b0a594e6cbb8 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -34,10 +34,10 @@ #include #include +#include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sb.h" -#include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_trace.h" #include "xfs_log.h" -- cgit v1.2.3 From b29c70f59870dad0945b0e0b3fe3758ad528e268 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 4 Dec 2014 09:43:17 +1100 Subject: xfs: split metadata and log buffer completion to separate workqueues XFS traditionally sends all buffer I/O completion work to a single workqueue. This includes metadata buffer completion and log buffer completion. The log buffer completion requires a high priority queue to prevent stalls due to log forces getting stuck behind other queued work. Rather than continue to prioritize all buffer I/O completion due to the needs of log completion, split log buffer completion off to m_log_workqueue and move the high priority flag from m_buf_workqueue to m_log_workqueue. Add a b_ioend_wq wq pointer to xfs_buf to allow completion workqueue customization on a per-buffer basis. Initialize b_ioend_wq to m_buf_workqueue by default in the generic buffer I/O submission path. Finally, override the default wq with the high priority m_log_workqueue in the log buffer I/O submission path. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs/xfs/xfs_buf.c') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d083889535a2..945bea924e48 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1041,7 +1041,7 @@ xfs_buf_ioend_work( struct work_struct *work) { struct xfs_buf *bp = - container_of(work, xfs_buf_t, b_iodone_work); + container_of(work, xfs_buf_t, b_ioend_work); xfs_buf_ioend(bp); } @@ -1050,8 +1050,8 @@ void xfs_buf_ioend_async( struct xfs_buf *bp) { - INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work); - queue_work(bp->b_target->bt_mount->m_buf_workqueue, &bp->b_iodone_work); + INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); + queue_work(bp->b_ioend_wq, &bp->b_ioend_work); } void @@ -1220,6 +1220,13 @@ _xfs_buf_ioapply( */ bp->b_error = 0; + /* + * Initialize the I/O completion workqueue if we haven't yet or the + * submitter has not opted to specify a custom one. + */ + if (!bp->b_ioend_wq) + bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue; + if (bp->b_flags & XBF_WRITE) { if (bp->b_flags & XBF_SYNCIO) rw = WRITE_SYNC; -- cgit v1.2.3