From e70b73f84f474cc594a39bd8ff083974e6d69aea Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 23 Apr 2012 15:58:49 +1000 Subject: xfs: clean up buffer get/read call API The xfs_buf_get/read API is not consistent in the units it uses, and does not use appropriate or consistent units/types for the variables. Convert the API to use disk addresses and block counts for all buffer get and read calls. Use consistent naming for all the functions and their declarations, and convert the internal functions to use disk addresses and block counts to avoid need to convert them from one type to another and back again. Fix all the callers to use disk addresses and block counts. In many cases, this removes an additional conversion from the function call as the callers already have a block count. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_vnodeops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 64981d7e7375..445c224742b8 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1966,7 +1966,7 @@ xfs_zero_remaining_bytes( bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp, - mp->m_sb.sb_blocksize, XBF_DONT_BLOCK); + BTOBB(mp->m_sb.sb_blocksize), XBF_DONT_BLOCK); if (!bp) return XFS_ERROR(ENOMEM); -- cgit v1.2.3 From a8acad70731e7d0585f25f33f8a009176f001f70 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 23 Apr 2012 15:58:54 +1000 Subject: xfs: kill XBF_LOCK Buffers are always returned locked from the lookup routines. Hence we don't need to tell the lookup routines to return locked buffers, on to try and lock them. Remove XBF_LOCK from all the callers and from internal buffer cache usage. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_vnodeops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 445c224742b8..8f99c7781f39 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -82,7 +82,7 @@ xfs_readlink_bmap( byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), - XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); + XBF_MAPPED | XBF_DONT_BLOCK); if (!bp) return XFS_ERROR(ENOMEM); error = bp->b_error; -- cgit v1.2.3 From aa5c158ec97bd4014f47a2bc0150fb6b20e6c48b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 23 Apr 2012 15:58:56 +1000 Subject: xfs: kill XBF_DONTBLOCK Just about all callers of xfs_buf_read() and xfs_buf_get() use XBF_DONTBLOCK. This is used to make memory allocation use GFP_NOFS rather than GFP_KERNEL to avoid recursion through memory reclaim back into the filesystem. All the blocking get calls in growfs occur inside a transaction, even though they are no part of the transaction, so all allocation will be GFP_NOFS due to the task flag PF_TRANS being set. The blocking read calls occur during log recovery, so they will probably be unaffected by converting to GFP_NOFS allocations. Hence make XBF_DONTBLOCK behaviour always occur for buffers and kill the flag. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_vnodeops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 8f99c7781f39..6c187450f1c8 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -82,7 +82,7 @@ xfs_readlink_bmap( byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), - XBF_MAPPED | XBF_DONT_BLOCK); + XBF_MAPPED); if (!bp) return XFS_ERROR(ENOMEM); error = bp->b_error; @@ -1966,7 +1966,7 @@ xfs_zero_remaining_bytes( bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp, - BTOBB(mp->m_sb.sb_blocksize), XBF_DONT_BLOCK); + BTOBB(mp->m_sb.sb_blocksize), 0); if (!bp) return XFS_ERROR(ENOMEM); -- cgit v1.2.3 From bc4010ecb8f4d4316e1a63a879a2715e49d113ad Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 23 Apr 2012 15:58:57 +1000 Subject: xfs: use iolock on XFS_IOC_ALLOCSP calls fsstress has a particular effective way of stopping debug XFS kernels. We keep seeing assert failures due finding delayed allocation extents where there should be none. This shows up when extracting extent maps and we are holding all the locks we should be to prevent races, so this really makes no sense to see these errors. After checking that fsstress does not use mmap, it occurred to me that fsstress uses something that no sane application uses - the XFS_IOC_ALLOCSP ioctl interfaces for preallocation. These interfaces do allocation of blocks beyond EOF without using preallocation, and then call setattr to extend and zero the allocated blocks. THe problem here is this is a buffered write, and hence the allocation is a delayed allocation. Unlike the buffered IO path, the allocation and zeroing are not serialised using the IOLOCK. Hence the ALLOCSP operation can race with operations holding the iolock to prevent buffered IO operations from occurring. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_vnodeops.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 6c187450f1c8..9b6c94e7546e 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2315,17 +2315,33 @@ xfs_change_file_space( case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP: case XFS_IOC_FREESP64: + /* + * These operations actually do IO when extending the file, but + * the allocation is done seperately to the zeroing that is + * done. This set of operations need to be serialised against + * other IO operations, such as truncate and buffered IO. We + * need to take the IOLOCK here to serialise the allocation and + * zeroing IO to prevent other IOLOCK holders (e.g. getbmap, + * truncate, direct IO) from racing against the transient + * allocated but not written state we can have here. + */ + xfs_ilock(ip, XFS_IOLOCK_EXCL); if (startoffset > fsize) { error = xfs_alloc_file_space(ip, fsize, - startoffset - fsize, 0, attr_flags); - if (error) + startoffset - fsize, 0, + attr_flags | XFS_ATTR_NOLOCK); + if (error) { + xfs_iunlock(ip, XFS_IOLOCK_EXCL); break; + } } iattr.ia_valid = ATTR_SIZE; iattr.ia_size = startoffset; - error = xfs_setattr_size(ip, &iattr, attr_flags); + error = xfs_setattr_size(ip, &iattr, + attr_flags | XFS_ATTR_NOLOCK); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); if (error) return error; -- cgit v1.2.3 From 60a34607b26b60d6b5c5c928ede7fc84b0f06b85 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 23 Apr 2012 15:58:58 +1000 Subject: xfs: move xfsagino_t to xfs_types.h Untangle the header file includes a bit by moving the definition of xfs_agino_t to xfs_types.h. This removes the dependency that xfs_ag.h has on xfs_inum.h, meaning we don't need to include xfs_inum.h everywhere we include xfs_ag.h. Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_vnodeops.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 9b6c94e7546e..9e8266981f47 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -21,7 +21,6 @@ #include "xfs_types.h" #include "xfs_bit.h" #include "xfs_log.h" -#include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -- cgit v1.2.3 From 2a0ec1d9ed7f3aa7974fccfbb612fadda2e10bad Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 23 Apr 2012 15:59:02 +1000 Subject: xfs: move xfs_get_extsz_hint() and kill xfs_rw.h The only thing left in xfs_rw.h is a function prototype for an inode function. Move that to xfs_inode.h, and kill xfs_rw.h. Also move the function implementing the prototype from xfs_rw.c to xfs_inode.c so we only have one function left in xfs_rw.c Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_vnodeops.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 9e8266981f47..5b3f5b13a008 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -38,7 +38,6 @@ #include "xfs_bmap.h" #include "xfs_acl.h" #include "xfs_attr.h" -#include "xfs_rw.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_utils.h" -- cgit v1.2.3 From 611c99468c7aa1a5c2bb6d46e7b5d8e53eecfefd Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 23 Apr 2012 15:59:07 +1000 Subject: xfs: make XBF_MAPPED the default behaviour Rather than specifying XBF_MAPPED for almost all buffers, introduce XBF_UNMAPPED for the couple of users that use unmapped buffers. Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_vnodeops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 5b3f5b13a008..82b000f8ad2f 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -79,8 +79,7 @@ xfs_readlink_bmap( d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); - bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), - XBF_MAPPED); + bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); if (!bp) return XFS_ERROR(ENOMEM); error = bp->b_error; -- cgit v1.2.3 From ea562ed6e7df5acd9392d993882c39e855099165 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 8 May 2012 20:48:53 +1000 Subject: xfs: fix delalloc quota accounting on failure xfstest 270 was causing quota reservations way beyond what was sane (ten to hundreds of TB) for a 4GB filesystem. There's a sign problem in the error handling path of xfs_bmapi_reserve_delalloc() because xfs_trans_unreserve_quota_nblks() simple negates the value passed - which doesn't work for an unsigned variable. This causes reservations of close to 2^32 block instead of removing a reservation of a handful of blocks. Fix the same problem in the other xfs_trans_unreserve_quota_nblks() callers where unsigned integer variables are used, too. Signed-off-by: Dave Chinner Reviewed-by: Eric Sandeen Signed-off-by: Ben Myers --- fs/xfs/xfs_vnodeops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/xfs_vnodeops.c') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 82b000f8ad2f..b6a82d817a82 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1916,7 +1916,7 @@ xfs_alloc_file_space( error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); - xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); + xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); -- cgit v1.2.3