diff options
Diffstat (limited to 'fs/xfs')
52 files changed, 1320 insertions, 1050 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index d3ff78354638..49e3e7e5e3dc 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -1,150 +1 @@ -# -# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it would be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# -# Further, this software is distributed without any warranty that it is -# free of the rightful claim of any third person regarding infringement -# or the like. Any license provided herein, whether implied or -# otherwise, applies only to this software file. Patent licenses, if -# any, provided herein do not apply to combinations of this program with -# other software, or any other product whatsoever. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write the Free Software Foundation, Inc., 59 -# Temple Place - Suite 330, Boston MA 02111-1307, USA. -# -# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, -# Mountain View, CA 94043, or: -# -# http://www.sgi.com -# -# For further information regarding this notice, see: -# -# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ -# - -EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char - -ifeq ($(CONFIG_XFS_DEBUG),y) - EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG - EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING -endif -ifeq ($(CONFIG_XFS_TRACE),y) - EXTRA_CFLAGS += -DXFS_ALLOC_TRACE - EXTRA_CFLAGS += -DXFS_ATTR_TRACE - EXTRA_CFLAGS += -DXFS_BLI_TRACE - EXTRA_CFLAGS += -DXFS_BMAP_TRACE - EXTRA_CFLAGS += -DXFS_BMBT_TRACE - EXTRA_CFLAGS += -DXFS_DIR_TRACE - EXTRA_CFLAGS += -DXFS_DIR2_TRACE - EXTRA_CFLAGS += -DXFS_DQUOT_TRACE - EXTRA_CFLAGS += -DXFS_ILOCK_TRACE - EXTRA_CFLAGS += -DXFS_LOG_TRACE - EXTRA_CFLAGS += -DXFS_RW_TRACE - EXTRA_CFLAGS += -DPAGEBUF_TRACE - EXTRA_CFLAGS += -DXFS_VNODE_TRACE -endif - -obj-$(CONFIG_XFS_FS) += xfs.o - -xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ - xfs_dquot.o \ - xfs_dquot_item.o \ - xfs_trans_dquot.o \ - xfs_qm_syscalls.o \ - xfs_qm_bhv.o \ - xfs_qm.o) -ifeq ($(CONFIG_XFS_QUOTA),y) -xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o -endif - -xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o -xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o -xfs-$(CONFIG_PROC_FS) += linux-2.6/xfs_stats.o -xfs-$(CONFIG_SYSCTL) += linux-2.6/xfs_sysctl.o -xfs-$(CONFIG_COMPAT) += linux-2.6/xfs_ioctl32.o -xfs-$(CONFIG_XFS_EXPORT) += linux-2.6/xfs_export.o - - -xfs-y += xfs_alloc.o \ - xfs_alloc_btree.o \ - xfs_attr.o \ - xfs_attr_leaf.o \ - xfs_behavior.o \ - xfs_bit.o \ - xfs_bmap.o \ - xfs_bmap_btree.o \ - xfs_btree.o \ - xfs_buf_item.o \ - xfs_da_btree.o \ - xfs_dir.o \ - xfs_dir2.o \ - xfs_dir2_block.o \ - xfs_dir2_data.o \ - xfs_dir2_leaf.o \ - xfs_dir2_node.o \ - xfs_dir2_sf.o \ - xfs_dir_leaf.o \ - xfs_error.o \ - xfs_extfree_item.o \ - xfs_fsops.o \ - xfs_ialloc.o \ - xfs_ialloc_btree.o \ - xfs_iget.o \ - xfs_inode.o \ - xfs_inode_item.o \ - xfs_iocore.o \ - xfs_iomap.o \ - xfs_itable.o \ - xfs_dfrag.o \ - xfs_log.o \ - xfs_log_recover.o \ - xfs_macros.o \ - xfs_mount.o \ - xfs_rename.o \ - xfs_trans.o \ - xfs_trans_ail.o \ - xfs_trans_buf.o \ - xfs_trans_extfree.o \ - xfs_trans_inode.o \ - xfs_trans_item.o \ - xfs_utils.o \ - xfs_vfsops.o \ - xfs_vnodeops.o \ - xfs_rw.o \ - xfs_dmops.o \ - xfs_qmops.o - -xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o - -# Objects in linux-2.6/ -xfs-y += $(addprefix linux-2.6/, \ - kmem.o \ - xfs_aops.o \ - xfs_buf.o \ - xfs_file.o \ - xfs_fs_subr.o \ - xfs_globals.o \ - xfs_ioctl.o \ - xfs_iops.o \ - xfs_lrw.o \ - xfs_super.o \ - xfs_vfs.o \ - xfs_vnode.o) - -# Objects in support/ -xfs-y += $(addprefix support/, \ - debug.o \ - move.o \ - qsort.o \ - uuid.o) - -xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o - +include $(TOPDIR)/fs/xfs/Makefile-linux-$(VERSION).$(PATCHLEVEL) diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 new file mode 100644 index 000000000000..fbfcbe5a7cda --- /dev/null +++ b/fs/xfs/Makefile-linux-2.6 @@ -0,0 +1,141 @@ +# +# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char + +XFS_LINUX := linux-2.6 + +ifeq ($(CONFIG_XFS_DEBUG),y) + EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG + EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING +endif +ifeq ($(CONFIG_XFS_TRACE),y) + EXTRA_CFLAGS += -DXFS_ALLOC_TRACE + EXTRA_CFLAGS += -DXFS_ATTR_TRACE + EXTRA_CFLAGS += -DXFS_BLI_TRACE + EXTRA_CFLAGS += -DXFS_BMAP_TRACE + EXTRA_CFLAGS += -DXFS_BMBT_TRACE + EXTRA_CFLAGS += -DXFS_DIR_TRACE + EXTRA_CFLAGS += -DXFS_DIR2_TRACE + EXTRA_CFLAGS += -DXFS_DQUOT_TRACE + EXTRA_CFLAGS += -DXFS_ILOCK_TRACE + EXTRA_CFLAGS += -DXFS_LOG_TRACE + EXTRA_CFLAGS += -DXFS_RW_TRACE + EXTRA_CFLAGS += -DPAGEBUF_TRACE + EXTRA_CFLAGS += -DXFS_VNODE_TRACE +endif + +obj-$(CONFIG_XFS_FS) += xfs.o +obj-$(CONFIG_XFS_QUOTA) += quota/ + +xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o +xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o +xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o +xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o +xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o +xfs-$(CONFIG_XFS_EXPORT) += $(XFS_LINUX)/xfs_export.o + + +xfs-y += xfs_alloc.o \ + xfs_alloc_btree.o \ + xfs_attr.o \ + xfs_attr_leaf.o \ + xfs_behavior.o \ + xfs_bit.o \ + xfs_bmap.o \ + xfs_bmap_btree.o \ + xfs_btree.o \ + xfs_buf_item.o \ + xfs_da_btree.o \ + xfs_dir.o \ + xfs_dir2.o \ + xfs_dir2_block.o \ + xfs_dir2_data.o \ + xfs_dir2_leaf.o \ + xfs_dir2_node.o \ + xfs_dir2_sf.o \ + xfs_dir_leaf.o \ + xfs_error.o \ + xfs_extfree_item.o \ + xfs_fsops.o \ + xfs_ialloc.o \ + xfs_ialloc_btree.o \ + xfs_iget.o \ + xfs_inode.o \ + xfs_inode_item.o \ + xfs_iocore.o \ + xfs_iomap.o \ + xfs_itable.o \ + xfs_dfrag.o \ + xfs_log.o \ + xfs_log_recover.o \ + xfs_macros.o \ + xfs_mount.o \ + xfs_rename.o \ + xfs_trans.o \ + xfs_trans_ail.o \ + xfs_trans_buf.o \ + xfs_trans_extfree.o \ + xfs_trans_inode.o \ + xfs_trans_item.o \ + xfs_utils.o \ + xfs_vfsops.o \ + xfs_vnodeops.o \ + xfs_rw.o \ + xfs_dmops.o \ + xfs_qmops.o + +xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o + +# Objects in linux/ +xfs-y += $(addprefix $(XFS_LINUX)/, \ + kmem.o \ + xfs_aops.o \ + xfs_buf.o \ + xfs_file.o \ + xfs_fs_subr.o \ + xfs_globals.o \ + xfs_ioctl.o \ + xfs_iops.o \ + xfs_lrw.o \ + xfs_super.o \ + xfs_vfs.o \ + xfs_vnode.o) + +# Objects in support/ +xfs-y += $(addprefix support/, \ + debug.o \ + move.o \ + uuid.o) + +xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o + diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 364ea8c386b1..4b184559f231 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -45,11 +45,11 @@ void * -kmem_alloc(size_t size, int flags) +kmem_alloc(size_t size, unsigned int __nocast flags) { - int retries = 0; - int lflags = kmem_flags_convert(flags); - void *ptr; + int retries = 0; + unsigned int lflags = kmem_flags_convert(flags); + void *ptr; do { if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) @@ -67,7 +67,7 @@ kmem_alloc(size_t size, int flags) } void * -kmem_zalloc(size_t size, int flags) +kmem_zalloc(size_t size, unsigned int __nocast flags) { void *ptr; @@ -89,7 +89,8 @@ kmem_free(void *ptr, size_t size) } void * -kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) +kmem_realloc(void *ptr, size_t newsize, size_t oldsize, + unsigned int __nocast flags) { void *new; @@ -104,11 +105,11 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) } void * -kmem_zone_alloc(kmem_zone_t *zone, int flags) +kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) { - int retries = 0; - int lflags = kmem_flags_convert(flags); - void *ptr; + int retries = 0; + unsigned int lflags = kmem_flags_convert(flags); + void *ptr; do { ptr = kmem_cache_alloc(zone, lflags); @@ -123,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, int flags) } void * -kmem_zone_zalloc(kmem_zone_t *zone, int flags) +kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) { void *ptr; diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 1397b669b059..109fcf27e256 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -39,10 +39,10 @@ /* * memory management routines */ -#define KM_SLEEP 0x0001 -#define KM_NOSLEEP 0x0002 -#define KM_NOFS 0x0004 -#define KM_MAYFAIL 0x0008 +#define KM_SLEEP 0x0001u +#define KM_NOSLEEP 0x0002u +#define KM_NOFS 0x0004u +#define KM_MAYFAIL 0x0008u #define kmem_zone kmem_cache_s #define kmem_zone_t kmem_cache_t @@ -81,9 +81,9 @@ typedef unsigned long xfs_pflags_t; *(NSTATEP) = *(OSTATEP); \ } while (0) -static __inline unsigned int kmem_flags_convert(int flags) +static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags) { - int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ + unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ #ifdef DEBUG if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) { @@ -125,12 +125,13 @@ kmem_zone_destroy(kmem_zone_t *zone) BUG(); } -extern void *kmem_zone_zalloc(kmem_zone_t *, int); -extern void *kmem_zone_alloc(kmem_zone_t *, int); +extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); -extern void *kmem_alloc(size_t, int); -extern void *kmem_realloc(void *, size_t, size_t, int); -extern void *kmem_zalloc(size_t, int); +extern void *kmem_alloc(size_t, unsigned int __nocast); +extern void *kmem_realloc(void *, size_t, size_t, + unsigned int __nocast); +extern void *kmem_zalloc(size_t, unsigned int __nocast); extern void kmem_free(void *, size_t); typedef struct shrinker *kmem_shaker_t; diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h index bcf60a0b8df0..0039504069a5 100644 --- a/fs/xfs/linux-2.6/spin.h +++ b/fs/xfs/linux-2.6/spin.h @@ -45,6 +45,9 @@ typedef spinlock_t lock_t; #define SPLDECL(s) unsigned long s +#ifndef DEFINE_SPINLOCK +#define DEFINE_SPINLOCK(s) spinlock_t s = SPIN_LOCK_UNLOCKED +#endif #define spinlock_init(lock, name) spin_lock_init(lock) #define spinlock_destroy(lock) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index a3a4b5aaf5d9..c6c077978fe3 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -104,66 +104,114 @@ xfs_page_trace( #define xfs_page_trace(tag, inode, page, mask) #endif -void -linvfs_unwritten_done( - struct buffer_head *bh, - int uptodate) +/* + * Schedule IO completion handling on a xfsdatad if this was + * the final hold on this ioend. + */ +STATIC void +xfs_finish_ioend( + xfs_ioend_t *ioend) { - xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; + if (atomic_dec_and_test(&ioend->io_remaining)) + queue_work(xfsdatad_workqueue, &ioend->io_work); +} - ASSERT(buffer_unwritten(bh)); - bh->b_end_io = NULL; - clear_buffer_unwritten(bh); - if (!uptodate) - pagebuf_ioerror(pb, EIO); - if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { - pagebuf_iodone(pb, 1, 1); - } - end_buffer_async_write(bh, uptodate); +STATIC void +xfs_destroy_ioend( + xfs_ioend_t *ioend) +{ + vn_iowake(ioend->io_vnode); + mempool_free(ioend, xfs_ioend_pool); } /* * Issue transactions to convert a buffer range from unwritten - * to written extents (buffered IO). + * to written extents. */ STATIC void -linvfs_unwritten_convert( - xfs_buf_t *bp) +xfs_end_bio_unwritten( + void *data) { - vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); - int error; + xfs_ioend_t *ioend = data; + vnode_t *vp = ioend->io_vnode; + xfs_off_t offset = ioend->io_offset; + size_t size = ioend->io_size; + struct buffer_head *bh, *next; + int error; + + if (ioend->io_uptodate) + VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); + + /* ioend->io_buffer_head is only non-NULL for buffered I/O */ + for (bh = ioend->io_buffer_head; bh; bh = next) { + next = bh->b_private; + + bh->b_end_io = NULL; + clear_buffer_unwritten(bh); + end_buffer_async_write(bh, ioend->io_uptodate); + } - BUG_ON(atomic_read(&bp->pb_hold) < 1); - VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp), - BMAPI_UNWRITTEN, NULL, NULL, error); - XFS_BUF_SET_FSPRIVATE(bp, NULL); - XFS_BUF_CLR_IODONE_FUNC(bp); - XFS_BUF_UNDATAIO(bp); - iput(LINVFS_GET_IP(vp)); - pagebuf_iodone(bp, 0, 0); + xfs_destroy_ioend(ioend); } /* - * Issue transactions to convert a buffer range from unwritten - * to written extents (direct IO). + * Allocate and initialise an IO completion structure. + * We need to track unwritten extent write completion here initially. + * We'll need to extend this for updating the ondisk inode size later + * (vs. incore size). */ -STATIC void -linvfs_unwritten_convert_direct( - struct kiocb *iocb, - loff_t offset, - ssize_t size, - void *private) +STATIC xfs_ioend_t * +xfs_alloc_ioend( + struct inode *inode) { - struct inode *inode = iocb->ki_filp->f_dentry->d_inode; - ASSERT(!private || inode == (struct inode *)private); + xfs_ioend_t *ioend; - /* private indicates an unwritten extent lay beneath this IO */ - if (private && size > 0) { - vnode_t *vp = LINVFS_GET_VP(inode); - int error; + ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); - VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); - } + /* + * Set the count to 1 initially, which will prevent an I/O + * completion callback from happening before we have started + * all the I/O from calling the completion routine too early. + */ + atomic_set(&ioend->io_remaining, 1); + ioend->io_uptodate = 1; /* cleared if any I/O fails */ + ioend->io_vnode = LINVFS_GET_VP(inode); + ioend->io_buffer_head = NULL; + atomic_inc(&ioend->io_vnode->v_iocount); + ioend->io_offset = 0; + ioend->io_size = 0; + + INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); + + return ioend; +} + +void +linvfs_unwritten_done( + struct buffer_head *bh, + int uptodate) +{ + xfs_ioend_t *ioend = bh->b_private; + static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED; + unsigned long flags; + + ASSERT(buffer_unwritten(bh)); + bh->b_end_io = NULL; + + if (!uptodate) + ioend->io_uptodate = 0; + + /* + * Deep magic here. We reuse b_private in the buffer_heads to build + * a chain for completing the I/O from user context after we've issued + * a transaction to convert the unwritten extent. + */ + spin_lock_irqsave(&unwritten_done_lock, flags); + bh->b_private = ioend->io_buffer_head; + ioend->io_buffer_head = bh; + spin_unlock_irqrestore(&unwritten_done_lock, flags); + + xfs_finish_ioend(ioend); } STATIC int @@ -255,7 +303,7 @@ xfs_probe_unwritten_page( struct address_space *mapping, pgoff_t index, xfs_iomap_t *iomapp, - xfs_buf_t *pb, + xfs_ioend_t *ioend, unsigned long max_offset, unsigned long *fsbs, unsigned int bbits) @@ -283,7 +331,7 @@ xfs_probe_unwritten_page( break; xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); set_buffer_unwritten_io(bh); - bh->b_private = pb; + bh->b_private = ioend; p_offset += bh->b_size; (*fsbs)++; } while ((bh = bh->b_this_page) != head); @@ -434,34 +482,15 @@ xfs_map_unwritten( { struct buffer_head *bh = curr; xfs_iomap_t *tmp; - xfs_buf_t *pb; - loff_t offset, size; + xfs_ioend_t *ioend; + loff_t offset; unsigned long nblocks = 0; offset = start_page->index; offset <<= PAGE_CACHE_SHIFT; offset += p_offset; - /* get an "empty" pagebuf to manage IO completion - * Proper values will be set before returning */ - pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0); - if (!pb) - return -EAGAIN; - - /* Take a reference to the inode to prevent it from - * being reclaimed while we have outstanding unwritten - * extent IO on it. - */ - if ((igrab(inode)) != inode) { - pagebuf_free(pb); - return -EAGAIN; - } - - /* Set the count to 1 initially, this will stop an I/O - * completion callout which happens before we have started - * all the I/O from calling pagebuf_iodone too early. - */ - atomic_set(&pb->pb_io_remaining, 1); + ioend = xfs_alloc_ioend(inode); /* First map forwards in the page consecutive buffers * covering this unwritten extent @@ -474,12 +503,12 @@ xfs_map_unwritten( break; xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); set_buffer_unwritten_io(bh); - bh->b_private = pb; + bh->b_private = ioend; p_offset += bh->b_size; nblocks++; } while ((bh = bh->b_this_page) != head); - atomic_add(nblocks, &pb->pb_io_remaining); + atomic_add(nblocks, &ioend->io_remaining); /* If we reached the end of the page, map forwards in any * following pages which are also covered by this extent. @@ -496,13 +525,13 @@ xfs_map_unwritten( tloff = min(tlast, tloff); for (tindex = start_page->index + 1; tindex < tloff; tindex++) { page = xfs_probe_unwritten_page(mapping, - tindex, iomapp, pb, + tindex, iomapp, ioend, PAGE_CACHE_SIZE, &bs, bbits); if (!page) break; nblocks += bs; - atomic_add(bs, &pb->pb_io_remaining); - xfs_convert_page(inode, page, iomapp, wbc, pb, + atomic_add(bs, &ioend->io_remaining); + xfs_convert_page(inode, page, iomapp, wbc, ioend, startio, all_bh); /* stop if converting the next page might add * enough blocks that the corresponding byte @@ -514,12 +543,12 @@ xfs_map_unwritten( if (tindex == tlast && (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { page = xfs_probe_unwritten_page(mapping, - tindex, iomapp, pb, + tindex, iomapp, ioend, pg_offset, &bs, bbits); if (page) { nblocks += bs; - atomic_add(bs, &pb->pb_io_remaining); - xfs_convert_page(inode, page, iomapp, wbc, pb, + atomic_add(bs, &ioend->io_remaining); + xfs_convert_page(inode, page, iomapp, wbc, ioend, startio, all_bh); if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) goto enough; @@ -528,21 +557,9 @@ xfs_map_unwritten( } enough: - size = nblocks; /* NB: using 64bit number here */ - size <<= block_bits; /* convert fsb's to byte range */ - - XFS_BUF_DATAIO(pb); - XFS_BUF_ASYNC(pb); - XFS_BUF_SET_SIZE(pb, size); - XFS_BUF_SET_COUNT(pb, size); - XFS_BUF_SET_OFFSET(pb, offset); - XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode)); - XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert); - - if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { - pagebuf_iodone(pb, 1, 1); - } - + ioend->io_size = (xfs_off_t)nblocks << block_bits; + ioend->io_offset = offset; + xfs_finish_ioend(ioend); return 0; } @@ -787,7 +804,7 @@ xfs_page_state_convert( continue; if (!iomp) { err = xfs_map_blocks(inode, offset, len, &iomap, - BMAPI_READ|BMAPI_IGNSTATE); + BMAPI_WRITE|BMAPI_IGNSTATE); if (err) { goto error; } @@ -1028,6 +1045,44 @@ linvfs_get_blocks_direct( create, 1, BMAPI_WRITE|BMAPI_DIRECT); } +STATIC void +linvfs_end_io_direct( + struct kiocb *iocb, + loff_t offset, + ssize_t size, + void *private) +{ + xfs_ioend_t *ioend = iocb->private; + + /* + * Non-NULL private data means we need to issue a transaction to + * convert a range from unwritten to written extents. This needs + * to happen from process contect but aio+dio I/O completion + * happens from irq context so we need to defer it to a workqueue. + * This is not nessecary for synchronous direct I/O, but we do + * it anyway to keep the code uniform and simpler. + * + * The core direct I/O code might be changed to always call the + * completion handler in the future, in which case all this can + * go away. + */ + if (private && size > 0) { + ioend->io_offset = offset; + ioend->io_size = size; + xfs_finish_ioend(ioend); + } else { + ASSERT(size >= 0); + xfs_destroy_ioend(ioend); + } + + /* + * blockdev_direct_IO can return an error even afer the I/O + * completion handler was called. Thus we need to protect + * against double-freeing. + */ + iocb->private = NULL; +} + STATIC ssize_t linvfs_direct_IO( int rw, @@ -1042,16 +1097,23 @@ linvfs_direct_IO( xfs_iomap_t iomap; int maps = 1; int error; + ssize_t ret; VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); if (error) return -error; - return blockdev_direct_IO_own_locking(rw, iocb, inode, + iocb->private = xfs_alloc_ioend(inode); + + ret = blockdev_direct_IO_own_locking(rw, iocb, inode, iomap.iomap_target->pbr_bdev, iov, offset, nr_segs, linvfs_get_blocks_direct, - linvfs_unwritten_convert_direct); + linvfs_end_io_direct); + + if (unlikely(ret <= 0 && iocb->private)) + xfs_destroy_ioend(iocb->private); + return ret; } @@ -1202,6 +1264,16 @@ out_unlock: return error; } +STATIC int +linvfs_invalidate_page( + struct page *page, + unsigned long offset) +{ + xfs_page_trace(XFS_INVALIDPAGE_ENTER, + page->mapping->host, page, offset); + return block_invalidatepage(page, offset); +} + /* * Called to move a page into cleanable state - and from there * to be released. Possibly the page is already clean. We always @@ -1279,6 +1351,7 @@ struct address_space_operations linvfs_aops = { .writepage = linvfs_writepage, .sync_page = block_sync_page, .releasepage = linvfs_release_page, + .invalidatepage = linvfs_invalidate_page, .prepare_write = linvfs_prepare_write, .commit_write = generic_commit_write, .bmap = linvfs_bmap, diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h new file mode 100644 index 000000000000..2fa62974a04d --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_AOPS_H__ +#define __XFS_AOPS_H__ + +extern struct workqueue_struct *xfsdatad_workqueue; +extern mempool_t *xfs_ioend_pool; + +typedef void (*xfs_ioend_func_t)(void *); + +typedef struct xfs_ioend { + unsigned int io_uptodate; /* I/O status register */ + atomic_t io_remaining; /* hold count */ + struct vnode *io_vnode; /* file being written to */ + struct buffer_head *io_buffer_head;/* buffer linked list head */ + size_t io_size; /* size of the extent */ + xfs_off_t io_offset; /* offset in the file */ + struct work_struct io_work; /* xfsdatad work queue */ +} xfs_ioend_t; + +#endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index df0cba239dd5..655bf4a78afe 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -54,6 +54,7 @@ #include <linux/percpu.h> #include <linux/blkdev.h> #include <linux/hash.h> +#include <linux/kthread.h> #include "xfs_linux.h" @@ -67,7 +68,7 @@ STATIC int xfsbufd_wakeup(int, unsigned int); STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); STATIC struct workqueue_struct *xfslogd_workqueue; -STATIC struct workqueue_struct *xfsdatad_workqueue; +struct workqueue_struct *xfsdatad_workqueue; /* * Pagebuf debugging @@ -590,8 +591,10 @@ found: PB_SET_OWNER(pb); } - if (pb->pb_flags & PBF_STALE) + if (pb->pb_flags & PBF_STALE) { + ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0); pb->pb_flags &= PBF_MAPPED; + } PB_TRACE(pb, "got_lock", 0); XFS_STATS_INC(pb_get_locked); return (pb); @@ -700,25 +703,6 @@ xfs_buf_read_flags( } /* - * Create a skeletal pagebuf (no pages associated with it). - */ -xfs_buf_t * -pagebuf_lookup( - xfs_buftarg_t *target, - loff_t ioff, - size_t isize, - page_buf_flags_t flags) -{ - xfs_buf_t *pb; - - pb = pagebuf_allocate(flags); - if (pb) { - _pagebuf_initialize(pb, target, ioff, isize, flags); - } - return pb; -} - -/* * If we are not low on memory then do the readahead in a deadlock * safe manner. */ @@ -913,22 +897,23 @@ pagebuf_rele( do_free = 0; } - if (pb->pb_flags & PBF_DELWRI) { - pb->pb_flags |= PBF_ASYNC; - atomic_inc(&pb->pb_hold); - pagebuf_delwri_queue(pb, 0); - do_free = 0; - } else if (pb->pb_flags & PBF_FS_MANAGED) { + if (pb->pb_flags & PBF_FS_MANAGED) { do_free = 0; } if (do_free) { + ASSERT((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == 0); list_del_init(&pb->pb_hash_list); spin_unlock(&hash->bh_lock); pagebuf_free(pb); } else { spin_unlock(&hash->bh_lock); } + } else { + /* + * Catch reference count leaks + */ + ASSERT(atomic_read(&pb->pb_hold) >= 0); } } @@ -1006,13 +991,24 @@ pagebuf_lock( * pagebuf_unlock * * pagebuf_unlock releases the lock on the buffer object created by - * pagebuf_lock or pagebuf_cond_lock (not any - * pinning of underlying pages created by pagebuf_pin). + * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages + * created by pagebuf_pin). + * + * If the buffer is marked delwri but is not queued, do so before we + * unlock the buffer as we need to set flags correctly. We also need to + * take a reference for the delwri queue because the unlocker is going to + * drop their's and they don't know we just queued it. */ void pagebuf_unlock( /* unlock buffer */ xfs_buf_t *pb) /* buffer to unlock */ { + if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) { + atomic_inc(&pb->pb_hold); + pb->pb_flags |= PBF_ASYNC; + pagebuf_delwri_queue(pb, 0); + } + PB_CLEAR_OWNER(pb); up(&pb->pb_sema); PB_TRACE(pb, "unlock", 0); @@ -1249,8 +1245,8 @@ bio_end_io_pagebuf( int error) { xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; - unsigned int i, blocksize = pb->pb_target->pbr_bsize; - struct bio_vec *bvec = bio->bi_io_vec; + unsigned int blocksize = pb->pb_target->pbr_bsize; + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; if (bio->bi_size) return 1; @@ -1258,10 +1254,12 @@ bio_end_io_pagebuf( if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) pb->pb_error = EIO; - for (i = 0; i < bio->bi_vcnt; i++, bvec++) { + do { struct page *page = bvec->bv_page; - if (pb->pb_error) { + if (unlikely(pb->pb_error)) { + if (pb->pb_flags & PBF_READ) + ClearPageUptodate(page); SetPageError(page); } else if (blocksize == PAGE_CACHE_SIZE) { SetPageUptodate(page); @@ -1270,10 +1268,13 @@ bio_end_io_pagebuf( set_page_region(page, bvec->bv_offset, bvec->bv_len); } + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + if (_pagebuf_iolocked(pb)) { unlock_page(page); } - } + } while (bvec >= bio->bi_io_vec); _pagebuf_iodone(pb, 1); bio_put(bio); @@ -1511,6 +1512,11 @@ again: ASSERT(btp == bp->pb_target); if (!(bp->pb_flags & PBF_FS_MANAGED)) { spin_unlock(&hash->bh_lock); + /* + * Catch superblock reference count leaks + * immediately + */ + BUG_ON(bp->pb_bn == 0); delay(100); goto again; } @@ -1686,17 +1692,20 @@ pagebuf_delwri_queue( int unlock) { PB_TRACE(pb, "delwri_q", (long)unlock); - ASSERT(pb->pb_flags & PBF_DELWRI); + ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == + (PBF_DELWRI|PBF_ASYNC)); spin_lock(&pbd_delwrite_lock); /* If already in the queue, dequeue and place at tail */ if (!list_empty(&pb->pb_list)) { + ASSERT(pb->pb_flags & _PBF_DELWRI_Q); if (unlock) { atomic_dec(&pb->pb_hold); } list_del(&pb->pb_list); } + pb->pb_flags |= _PBF_DELWRI_Q; list_add_tail(&pb->pb_list, &pbd_delwrite_queue); pb->pb_queuetime = jiffies; spin_unlock(&pbd_delwrite_lock); @@ -1713,10 +1722,11 @@ pagebuf_delwri_dequeue( spin_lock(&pbd_delwrite_lock); if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { + ASSERT(pb->pb_flags & _PBF_DELWRI_Q); list_del_init(&pb->pb_list); dequeued = 1; } - pb->pb_flags &= ~PBF_DELWRI; + pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); spin_unlock(&pbd_delwrite_lock); if (dequeued) @@ -1733,9 +1743,7 @@ pagebuf_runall_queues( } /* Defines for pagebuf daemon */ -STATIC DECLARE_COMPLETION(xfsbufd_done); STATIC struct task_struct *xfsbufd_task; -STATIC int xfsbufd_active; STATIC int xfsbufd_force_flush; STATIC int xfsbufd_force_sleep; @@ -1761,14 +1769,8 @@ xfsbufd( xfs_buftarg_t *target; xfs_buf_t *pb, *n; - /* Set up the thread */ - daemonize("xfsbufd"); current->flags |= PF_MEMALLOC; - xfsbufd_task = current; - xfsbufd_active = 1; - barrier(); - INIT_LIST_HEAD(&tmp); do { if (unlikely(freezing(current))) { @@ -1795,7 +1797,7 @@ xfsbufd( break; } - pb->pb_flags &= ~PBF_DELWRI; + pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); } @@ -1816,9 +1818,9 @@ xfsbufd( purge_addresses(); xfsbufd_force_flush = 0; - } while (xfsbufd_active); + } while (!kthread_should_stop()); - complete_and_exit(&xfsbufd_done, 0); + return 0; } /* @@ -1845,15 +1847,13 @@ xfs_flush_buftarg( if (pb->pb_target != target) continue; - ASSERT(pb->pb_flags & PBF_DELWRI); + ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)); PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); if (pagebuf_ispin(pb)) { pincount++; continue; } - pb->pb_flags &= ~PBF_DELWRI; - pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); } spin_unlock(&pbd_delwrite_lock); @@ -1862,12 +1862,14 @@ xfs_flush_buftarg( * Dropped the delayed write list lock, now walk the temporary list */ list_for_each_entry_safe(pb, n, &tmp, pb_list) { + pagebuf_lock(pb); + pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); + pb->pb_flags |= PBF_WRITE; if (wait) pb->pb_flags &= ~PBF_ASYNC; else list_del_init(&pb->pb_list); - pagebuf_lock(pb); pagebuf_iostrategy(pb); } @@ -1901,9 +1903,11 @@ xfs_buf_daemons_start(void) if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; - error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES); - if (error < 0) + xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd"); + if (IS_ERR(xfsbufd_task)) { + error = PTR_ERR(xfsbufd_task); goto out_destroy_xfsdatad_workqueue; + } return 0; out_destroy_xfsdatad_workqueue: @@ -1920,10 +1924,7 @@ xfs_buf_daemons_start(void) STATIC void xfs_buf_daemons_stop(void) { - xfsbufd_active = 0; - barrier(); - wait_for_completion(&xfsbufd_done); - + kthread_stop(xfsbufd_task); destroy_workqueue(xfslogd_workqueue); destroy_workqueue(xfsdatad_workqueue); } diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 3f8f69a66aea..67c19f799232 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -89,6 +89,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */ _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ + _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ } page_buf_flags_t; #define PBF_UPDATE (PBF_READ | PBF_WRITE) @@ -206,13 +207,6 @@ extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */ #define xfs_buf_read(target, blkno, len, flags) \ xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) -extern xfs_buf_t *pagebuf_lookup( - xfs_buftarg_t *, - loff_t, /* starting offset of range */ - size_t, /* length of range */ - page_buf_flags_t); /* PBF_READ, PBF_WRITE, */ - /* PBF_FORCEIO, */ - extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ /* no memory or disk address */ size_t len, @@ -344,8 +338,6 @@ extern void pagebuf_trace( - - /* These are just for xfs_syncsub... it sets an internal variable * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t */ @@ -452,7 +444,7 @@ extern void pagebuf_trace( #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) -extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) +static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) { if (bp->pb_flags & PBF_MAPPED) return XFS_BUF_PTR(bp) + offset; diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f1ce4323f56e..3881622bcf08 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -311,6 +311,31 @@ linvfs_fsync( #define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen)) +#ifdef CONFIG_XFS_DMAPI + +STATIC struct page * +linvfs_filemap_nopage( + struct vm_area_struct *area, + unsigned long address, + int *type) +{ + struct inode *inode = area->vm_file->f_dentry->d_inode; + vnode_t *vp = LINVFS_GET_VP(inode); + xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); + int error; + + ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); + + error = XFS_SEND_MMAP(mp, area, 0); + if (error) + return NULL; + + return filemap_nopage(area, address, type); +} + +#endif /* CONFIG_XFS_DMAPI */ + + STATIC int linvfs_readdir( struct file *filp, @@ -390,14 +415,6 @@ done: return -error; } -#ifdef CONFIG_XFS_DMAPI -STATIC void -linvfs_mmap_close( - struct vm_area_struct *vma) -{ - xfs_dm_mm_put(vma); -} -#endif /* CONFIG_XFS_DMAPI */ STATIC int linvfs_file_mmap( @@ -411,16 +428,11 @@ linvfs_file_mmap( vma->vm_ops = &linvfs_file_vm_ops; - if (vp->v_vfsp->vfs_flag & VFS_DMI) { - xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); - - error = -XFS_SEND_MMAP(mp, vma, 0); - if (error) - return error; #ifdef CONFIG_XFS_DMAPI + if (vp->v_vfsp->vfs_flag & VFS_DMI) { vma->vm_ops = &linvfs_dmapi_file_vm_ops; -#endif } +#endif /* CONFIG_XFS_DMAPI */ VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); if (!error) @@ -474,6 +486,7 @@ linvfs_ioctl_invis( return error; } +#ifdef CONFIG_XFS_DMAPI #ifdef HAVE_VMOP_MPROTECT STATIC int linvfs_mprotect( @@ -494,6 +507,7 @@ linvfs_mprotect( return error; } #endif /* HAVE_VMOP_MPROTECT */ +#endif /* CONFIG_XFS_DMAPI */ #ifdef HAVE_FOP_OPEN_EXEC /* If the user is attempting to execute a file that is offline then @@ -528,49 +542,10 @@ open_exec_out: } #endif /* HAVE_FOP_OPEN_EXEC */ -/* - * Temporary workaround to the AIO direct IO write problem. - * This code can go and we can revert to do_sync_write once - * the writepage(s) rework is merged. - */ -STATIC ssize_t -linvfs_write( - struct file *filp, - const char __user *buf, - size_t len, - loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - kiocb.ki_pos = *ppos; - ret = __linvfs_write(&kiocb, buf, 0, len, kiocb.ki_pos); - *ppos = kiocb.ki_pos; - return ret; -} -STATIC ssize_t -linvfs_write_invis( - struct file *filp, - const char __user *buf, - size_t len, - loff_t *ppos) -{ - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, filp); - kiocb.ki_pos = *ppos; - ret = __linvfs_write(&kiocb, buf, IO_INVIS, len, kiocb.ki_pos); - *ppos = kiocb.ki_pos; - return ret; -} - - struct file_operations linvfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .write = linvfs_write, + .write = do_sync_write, .readv = linvfs_readv, .writev = linvfs_writev, .aio_read = linvfs_aio_read, @@ -592,7 +567,7 @@ struct file_operations linvfs_file_operations = { struct file_operations linvfs_invis_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .write = linvfs_write_invis, + .write = do_sync_write, .readv = linvfs_readv_invis, .writev = linvfs_writev_invis, .aio_read = linvfs_aio_read_invis, @@ -626,8 +601,7 @@ static struct vm_operations_struct linvfs_file_vm_ops = { #ifdef CONFIG_XFS_DMAPI static struct vm_operations_struct linvfs_dmapi_file_vm_ops = { - .close = linvfs_mmap_close, - .nopage = filemap_nopage, + .nopage = linvfs_filemap_nopage, .populate = filemap_populate, #ifdef HAVE_VMOP_MPROTECT .mprotect = linvfs_mprotect, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 05a447e51cc0..6a3326bcd8d0 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -141,13 +141,19 @@ xfs_find_handle( return -XFS_ERROR(EINVAL); } - /* we need the vnode */ - vp = LINVFS_GET_VP(inode); - if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + break; + default: iput(inode); return -XFS_ERROR(EBADF); } + /* we need the vnode */ + vp = LINVFS_GET_VP(inode); + /* now we can grab the fsid */ memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); hsize = sizeof(xfs_fsid_t); @@ -386,7 +392,7 @@ xfs_readlink_by_handle( return -error; /* Restrict this handle operation to symlinks only. */ - if (vp->v_type != VLNK) { + if (!S_ISLNK(inode->i_mode)) { VN_RELE(vp); return -XFS_ERROR(EINVAL); } @@ -982,10 +988,10 @@ xfs_ioc_space( if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) return -XFS_ERROR(EPERM); - if (!(filp->f_flags & FMODE_WRITE)) + if (!(filp->f_mode & FMODE_WRITE)) return -XFS_ERROR(EBADF); - if (vp->v_type != VREG) + if (!VN_ISREG(vp)) return -XFS_ERROR(EINVAL); if (copy_from_user(&bf, arg, sizeof(bf))) diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 0f8f1384eb36..4636b7f86f1f 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -47,8 +47,52 @@ #include "xfs_vnode.h" #include "xfs_dfrag.h" +#define _NATIVE_IOC(cmd, type) \ + _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) + #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) #define BROKEN_X86_ALIGNMENT +/* on ia32 l_start is on a 32-bit boundary */ +typedef struct xfs_flock64_32 { + __s16 l_type; + __s16 l_whence; + __s64 l_start __attribute__((packed)); + /* len == 0 means until end of file */ + __s64 l_len __attribute__((packed)); + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +} xfs_flock64_32_t; + +#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32) +#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32) +#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32) +#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32) +#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32) +#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32) +#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32) +#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32) + +/* just account for different alignment */ +STATIC unsigned long +xfs_ioctl32_flock( + unsigned long arg) +{ + xfs_flock64_32_t __user *p32 = (void __user *)arg; + xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p)); + + if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || + copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) || + copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) || + copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) || + copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) || + copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || + copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32))) + return -EFAULT; + + return (unsigned long)p; +} + #else typedef struct xfs_fsop_bulkreq32 { @@ -103,7 +147,6 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) /* not handled case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: - case XFS_IOC_PATH_TO_HANDLE: case XFS_IOC_PATH_TO_FSHANDLE: case XFS_IOC_OPEN_BY_HANDLE: case XFS_IOC_FSSETDM_BY_HANDLE: @@ -124,8 +167,21 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) case XFS_IOC_ERROR_CLEARALL: break; -#ifndef BROKEN_X86_ALIGNMENT - /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */ +#ifdef BROKEN_X86_ALIGNMENT + /* xfs_flock_t has wrong u32 vs u64 alignment */ + case XFS_IOC_ALLOCSP_32: + case XFS_IOC_FREESP_32: + case XFS_IOC_ALLOCSP64_32: + case XFS_IOC_FREESP64_32: + case XFS_IOC_RESVSP_32: + case XFS_IOC_UNRESVSP_32: + case XFS_IOC_RESVSP64_32: + case XFS_IOC_UNRESVSP64_32: + arg = xfs_ioctl32_flock(arg); + cmd = _NATIVE_IOC(cmd, struct xfs_flock64); + break; + +#else /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: @@ -134,6 +190,9 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP64: + break; + + /* xfs_bstat_t still has wrong u32 vs u64 alignment */ case XFS_IOC_SWAPEXT: break; diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index f252605514eb..77708a8c9f87 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -140,7 +140,6 @@ linvfs_mknod( memset(&va, 0, sizeof(va)); va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; - va.va_type = IFTOVT(mode); va.va_mode = mode; switch (mode & S_IFMT) { @@ -308,14 +307,13 @@ linvfs_symlink( cvp = NULL; memset(&va, 0, sizeof(va)); - va.va_type = VLNK; - va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO; + va.va_mode = S_IFLNK | + (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; error = 0; VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); if (!error && cvp) { - ASSERT(cvp->v_type == VLNK); ip = LINVFS_GET_IP(cvp); d_instantiate(dentry, ip); validate_fields(dir); @@ -425,9 +423,14 @@ linvfs_follow_link( return NULL; } -static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +STATIC void +linvfs_put_link( + struct dentry *dentry, + struct nameidata *nd, + void *p) { - char *s = nd_get_link(nd); + char *s = nd_get_link(nd); + if (!IS_ERR(s)) kfree(s); } diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 42dc5e4662ed..68c5d885ed9c 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -64,7 +64,6 @@ #include <sema.h> #include <time.h> -#include <support/qsort.h> #include <support/ktrace.h> #include <support/debug.h> #include <support/move.h> @@ -104,6 +103,7 @@ #include <xfs_stats.h> #include <xfs_sysctl.h> #include <xfs_iops.h> +#include <xfs_aops.h> #include <xfs_super.h> #include <xfs_globals.h> #include <xfs_fs_subr.h> @@ -254,11 +254,18 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh) #define MAX(a,b) (max(a,b)) #define howmany(x, y) (((x)+((y)-1))/(y)) #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) +#define qsort(a,n,s,fn) sort(a,n,s,fn,NULL) +/* + * Various platform dependent calls that don't fit anywhere else + */ #define xfs_stack_trace() dump_stack() - #define xfs_itruncate_data(ip, off) \ (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) +#define xfs_statvfs_fsid(statp, mp) \ + ({ u64 id = huge_encode_dev((mp)->m_dev); \ + __kernel_fsid_t *fsid = &(statp)->f_fsid; \ + (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); }) /* Move the kernel do_div definition off to one side */ @@ -371,6 +378,4 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) return(x * y); } -#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL) - #endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index acab58c48043..3b5fabe8dae9 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -660,9 +660,6 @@ xfs_write( (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if (ioflags & IO_ISAIO) - return XFS_ERROR(-ENOSYS); - if ((pos & target->pbr_smask) || (count & target->pbr_smask)) return XFS_ERROR(-EINVAL); diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index f197a720e394..6294dcdb797c 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -70,9 +70,10 @@ struct xfs_iomap; #define XFS_SENDFILE_ENTER 21 #define XFS_WRITEPAGE_ENTER 22 #define XFS_RELEASEPAGE_ENTER 23 -#define XFS_IOMAP_ALLOC_ENTER 24 -#define XFS_IOMAP_ALLOC_MAP 25 -#define XFS_IOMAP_UNWRITTEN 26 +#define XFS_INVALIDPAGE_ENTER 24 +#define XFS_IOMAP_ALLOC_ENTER 25 +#define XFS_IOMAP_ALLOC_MAP 26 +#define XFS_IOMAP_UNWRITTEN 27 extern void xfs_rw_enter_trace(int, struct xfs_iocore *, void *, size_t, loff_t, int); extern void xfs_inval_cached_trace(struct xfs_iocore *, diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index f6dd7de25927..0da87bfc9999 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -70,11 +70,15 @@ #include <linux/namei.h> #include <linux/init.h> #include <linux/mount.h> +#include <linux/mempool.h> #include <linux/writeback.h> +#include <linux/kthread.h> STATIC struct quotactl_ops linvfs_qops; STATIC struct super_operations linvfs_sops; -STATIC kmem_zone_t *linvfs_inode_zone; +STATIC kmem_zone_t *xfs_vnode_zone; +STATIC kmem_zone_t *xfs_ioend_zone; +mempool_t *xfs_ioend_pool; STATIC struct xfs_mount_args * xfs_args_allocate( @@ -138,24 +142,25 @@ STATIC __inline__ void xfs_set_inodeops( struct inode *inode) { - vnode_t *vp = LINVFS_GET_VP(inode); - - if (vp->v_type == VNON) { - vn_mark_bad(vp); - } else if (S_ISREG(inode->i_mode)) { + switch (inode->i_mode & S_IFMT) { + case S_IFREG: inode->i_op = &linvfs_file_inode_operations; inode->i_fop = &linvfs_file_operations; inode->i_mapping->a_ops = &linvfs_aops; - } else if (S_ISDIR(inode->i_mode)) { + break; + case S_IFDIR: inode->i_op = &linvfs_dir_inode_operations; inode->i_fop = &linvfs_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { + break; + case S_IFLNK: inode->i_op = &linvfs_symlink_inode_operations; if (inode->i_blocks) inode->i_mapping->a_ops = &linvfs_aops; - } else { + break; + default: inode->i_op = &linvfs_file_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); + break; } } @@ -167,16 +172,23 @@ xfs_revalidate_inode( { struct inode *inode = LINVFS_GET_IP(vp); - inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type); + inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; inode->i_uid = ip->i_d.di_uid; inode->i_gid = ip->i_d.di_gid; - if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { + + switch (inode->i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + inode->i_rdev = + MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, + sysv_minor(ip->i_df.if_u2.if_rdev)); + break; + default: inode->i_rdev = 0; - } else { - xfs_dev_t dev = ip->i_df.if_u2.if_rdev; - inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); + break; } + inode->i_blksize = PAGE_CACHE_SIZE; inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); @@ -231,7 +243,6 @@ xfs_initialize_vnode( * finish our work. */ if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) { - vp->v_type = IFTOVT(ip->i_d.di_mode); xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); xfs_set_inodeops(inode); @@ -274,8 +285,7 @@ linvfs_alloc_inode( { vnode_t *vp; - vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, - kmem_flags_convert(KM_SLEEP)); + vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP)); if (!vp) return NULL; return LINVFS_GET_IP(vp); @@ -285,11 +295,11 @@ STATIC void linvfs_destroy_inode( struct inode *inode) { - kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode)); + kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode)); } STATIC void -init_once( +linvfs_inode_init_once( void *data, kmem_cache_t *cachep, unsigned long flags) @@ -302,21 +312,41 @@ init_once( } STATIC int -init_inodecache( void ) +linvfs_init_zones(void) { - linvfs_inode_zone = kmem_cache_create("linvfs_icache", + xfs_vnode_zone = kmem_cache_create("xfs_vnode", sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT, - init_once, NULL); - if (linvfs_inode_zone == NULL) - return -ENOMEM; + linvfs_inode_init_once, NULL); + if (!xfs_vnode_zone) + goto out; + + xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); + if (!xfs_ioend_zone) + goto out_destroy_vnode_zone; + + xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE, + mempool_alloc_slab, mempool_free_slab, + xfs_ioend_zone); + if (!xfs_ioend_pool) + goto out_free_ioend_zone; + return 0; + + + out_free_ioend_zone: + kmem_zone_destroy(xfs_ioend_zone); + out_destroy_vnode_zone: + kmem_zone_destroy(xfs_vnode_zone); + out: + return -ENOMEM; } STATIC void -destroy_inodecache( void ) +linvfs_destroy_zones(void) { - if (kmem_cache_destroy(linvfs_inode_zone)) - printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__); + mempool_destroy(xfs_ioend_pool); + kmem_zone_destroy(xfs_vnode_zone); + kmem_zone_destroy(xfs_ioend_zone); } /* @@ -354,17 +384,38 @@ linvfs_clear_inode( struct inode *inode) { vnode_t *vp = LINVFS_GET_VP(inode); + int error, cache; - if (vp) { - vn_rele(vp); - vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); - /* - * Do all our cleanup, and remove this vnode. - */ - vn_remove(vp); + vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address); + + XFS_STATS_INC(vn_rele); + XFS_STATS_INC(vn_remove); + XFS_STATS_INC(vn_reclaim); + XFS_STATS_DEC(vn_active); + + /* + * This can happen because xfs_iget_core calls xfs_idestroy if we + * find an inode with di_mode == 0 but without IGET_CREATE set. + */ + if (vp->v_fbhv) + VOP_INACTIVE(vp, NULL, cache); + + VN_LOCK(vp); + vp->v_flag &= ~VMODIFIED; + VN_UNLOCK(vp, 0); + + if (vp->v_fbhv) { + VOP_RECLAIM(vp, error); + if (error) + panic("vn_purge: cannot reclaim"); } -} + ASSERT(vp->v_fbhv == NULL); + +#ifdef XFS_VNODE_TRACE + ktrace_free(vp->v_trace); +#endif +} /* * Enqueue a work item to be picked up by the vfs xfssyncd thread. @@ -466,25 +517,16 @@ xfssyncd( { long timeleft; vfs_t *vfsp = (vfs_t *) arg; - struct list_head tmp; struct vfs_sync_work *work, *n; + LIST_HEAD (tmp); - daemonize("xfssyncd"); - - vfsp->vfs_sync_work.w_vfs = vfsp; - vfsp->vfs_sync_work.w_syncer = vfs_sync_worker; - vfsp->vfs_sync_task = current; - wmb(); - wake_up(&vfsp->vfs_wait_sync_task); - - INIT_LIST_HEAD(&tmp); timeleft = (xfs_syncd_centisecs * HZ) / 100; for (;;) { set_current_state(TASK_INTERRUPTIBLE); timeleft = schedule_timeout(timeleft); /* swsusp */ try_to_freeze(); - if (vfsp->vfs_flag & VFS_UMOUNT) + if (kthread_should_stop()) break; spin_lock(&vfsp->vfs_sync_lock); @@ -513,10 +555,6 @@ xfssyncd( } } - vfsp->vfs_sync_task = NULL; - wmb(); - wake_up(&vfsp->vfs_wait_sync_task); - return 0; } @@ -524,13 +562,11 @@ STATIC int linvfs_start_syncd( vfs_t *vfsp) { - int pid; - - pid = kernel_thread(xfssyncd, (void *) vfsp, - CLONE_VM | CLONE_FS | CLONE_FILES); - if (pid < 0) - return -pid; - wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task); + vfsp->vfs_sync_work.w_syncer = vfs_sync_worker; + vfsp->vfs_sync_work.w_vfs = vfsp; + vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd"); + if (IS_ERR(vfsp->vfs_sync_task)) + return -PTR_ERR(vfsp->vfs_sync_task); return 0; } @@ -538,11 +574,7 @@ STATIC void linvfs_stop_syncd( vfs_t *vfsp) { - vfsp->vfs_flag |= VFS_UMOUNT; - wmb(); - - wake_up_process(vfsp->vfs_sync_task); - wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task); + kthread_stop(vfsp->vfs_sync_task); } STATIC void @@ -866,9 +898,9 @@ init_xfs_fs( void ) ktrace_init(64); - error = init_inodecache(); + error = linvfs_init_zones(); if (error < 0) - goto undo_inodecache; + goto undo_zones; error = pagebuf_init(); if (error < 0) @@ -889,9 +921,9 @@ undo_register: pagebuf_terminate(); undo_pagebuf: - destroy_inodecache(); + linvfs_destroy_zones(); -undo_inodecache: +undo_zones: return error; } @@ -903,7 +935,7 @@ exit_xfs_fs( void ) unregister_filesystem(&xfs_fs_type); xfs_cleanup(); pagebuf_terminate(); - destroy_inodecache(); + linvfs_destroy_zones(); ktrace_uninit(); } diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c index 669c61644959..34cc902ec119 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.c +++ b/fs/xfs/linux-2.6/xfs_vfs.c @@ -251,7 +251,6 @@ vfs_allocate( void ) bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); INIT_LIST_HEAD(&vfsp->vfs_sync_list); spin_lock_init(&vfsp->vfs_sync_lock); - init_waitqueue_head(&vfsp->vfs_wait_sync_task); init_waitqueue_head(&vfsp->vfs_wait_single_sync_task); return vfsp; } diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 7ee1f714e9ba..f0ab574fb47a 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -65,7 +65,6 @@ typedef struct vfs { spinlock_t vfs_sync_lock; /* work item list lock */ int vfs_sync_seq; /* sync thread generation no. */ wait_queue_head_t vfs_wait_single_sync_task; - wait_queue_head_t vfs_wait_sync_task; } vfs_t; #define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ @@ -96,7 +95,6 @@ typedef enum { #define VFS_RDONLY 0x0001 /* read-only vfs */ #define VFS_GRPID 0x0002 /* group-ID assigned from directory */ #define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ -#define VFS_UMOUNT 0x0008 /* unmount in progress */ #define VFS_END 0x0008 /* max flag */ #define SYNC_ATTR 0x0001 /* sync attributes */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 250cad54e892..268f45bf6a9a 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -42,93 +42,33 @@ DEFINE_SPINLOCK(vnumber_lock); */ #define NVSYNC 37 #define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) -sv_t vsync[NVSYNC]; - -/* - * Translate stat(2) file types to vnode types and vice versa. - * Aware of numeric order of S_IFMT and vnode type values. - */ -enum vtype iftovt_tab[] = { - VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, - VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON -}; - -u_short vttoif_tab[] = { - 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK -}; +STATIC wait_queue_head_t vsync[NVSYNC]; void vn_init(void) { - register sv_t *svp; - register int i; + int i; - for (svp = vsync, i = 0; i < NVSYNC; i++, svp++) - init_sv(svp, SV_DEFAULT, "vsy", i); + for (i = 0; i < NVSYNC; i++) + init_waitqueue_head(&vsync[i]); } -/* - * Clean a vnode of filesystem-specific data and prepare it for reuse. - */ -STATIC int -vn_reclaim( +void +vn_iowait( struct vnode *vp) { - int error; + wait_queue_head_t *wq = vptosync(vp); - XFS_STATS_INC(vn_reclaim); - vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address); - - /* - * Only make the VOP_RECLAIM call if there are behaviors - * to call. - */ - if (vp->v_fbhv) { - VOP_RECLAIM(vp, error); - if (error) - return -error; - } - ASSERT(vp->v_fbhv == NULL); - - VN_LOCK(vp); - vp->v_flag &= (VRECLM|VWAIT); - VN_UNLOCK(vp, 0); - - vp->v_type = VNON; - vp->v_fbhv = NULL; - -#ifdef XFS_VNODE_TRACE - ktrace_free(vp->v_trace); - vp->v_trace = NULL; -#endif - - return 0; -} - -STATIC void -vn_wakeup( - struct vnode *vp) -{ - VN_LOCK(vp); - if (vp->v_flag & VWAIT) - sv_broadcast(vptosync(vp)); - vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED); - VN_UNLOCK(vp, 0); + wait_event(*wq, (atomic_read(&vp->v_iocount) == 0)); } -int -vn_wait( +void +vn_iowake( struct vnode *vp) { - VN_LOCK(vp); - if (vp->v_flag & (VINACT | VRECLM)) { - vp->v_flag |= VWAIT; - sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); - return 1; - } - VN_UNLOCK(vp, 0); - return 0; + if (atomic_dec_and_test(&vp->v_iocount)) + wake_up(vptosync(vp)); } struct vnode * @@ -154,6 +94,8 @@ vn_initialize( /* Initialize the first behavior and the behavior chain head. */ vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode"); + atomic_set(&vp->v_iocount, 0); + #ifdef XFS_VNODE_TRACE vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); #endif /* XFS_VNODE_TRACE */ @@ -163,30 +105,6 @@ vn_initialize( } /* - * Get a reference on a vnode. - */ -vnode_t * -vn_get( - struct vnode *vp, - vmap_t *vmap) -{ - struct inode *inode; - - XFS_STATS_INC(vn_get); - inode = LINVFS_GET_IP(vp); - if (inode->i_state & I_FREEING) - return NULL; - - inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino); - if (!inode) /* Inode not present */ - return NULL; - - vn_trace_exit(vp, "vn_get", (inst_t *)__return_address); - - return vp; -} - -/* * Revalidate the Linux inode from the vattr. * Note: i_size _not_ updated; we must hold the inode * semaphore when doing that - callers responsibility. @@ -198,7 +116,7 @@ vn_revalidate_core( { struct inode *inode = LINVFS_GET_IP(vp); - inode->i_mode = VTTOIF(vap->va_type) | vap->va_mode; + inode->i_mode = vap->va_mode; inode->i_nlink = vap->va_nlink; inode->i_uid = vap->va_uid; inode->i_gid = vap->va_gid; @@ -247,71 +165,6 @@ vn_revalidate( } /* - * purge a vnode from the cache - * At this point the vnode is guaranteed to have no references (vn_count == 0) - * The caller has to make sure that there are no ways someone could - * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock). - */ -void -vn_purge( - struct vnode *vp, - vmap_t *vmap) -{ - vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address); - -again: - /* - * Check whether vp has already been reclaimed since our caller - * sampled its version while holding a filesystem cache lock that - * its VOP_RECLAIM function acquires. - */ - VN_LOCK(vp); - if (vp->v_number != vmap->v_number) { - VN_UNLOCK(vp, 0); - return; - } - - /* - * If vp is being reclaimed or inactivated, wait until it is inert, - * then proceed. Can't assume that vnode is actually reclaimed - * just because the reclaimed flag is asserted -- a vn_alloc - * reclaim can fail. - */ - if (vp->v_flag & (VINACT | VRECLM)) { - ASSERT(vn_count(vp) == 0); - vp->v_flag |= VWAIT; - sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); - goto again; - } - - /* - * Another process could have raced in and gotten this vnode... - */ - if (vn_count(vp) > 0) { - VN_UNLOCK(vp, 0); - return; - } - - XFS_STATS_DEC(vn_active); - vp->v_flag |= VRECLM; - VN_UNLOCK(vp, 0); - - /* - * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells - * vp's filesystem to flush and invalidate all cached resources. - * When vn_reclaim returns, vp should have no private data, - * either in a system cache or attached to v_data. - */ - if (vn_reclaim(vp) != 0) - panic("vn_purge: cannot reclaim"); - - /* - * Wakeup anyone waiting for vp to be reclaimed. - */ - vn_wakeup(vp); -} - -/* * Add a reference to a referenced vnode. */ struct vnode * @@ -330,80 +183,6 @@ vn_hold( return vp; } -/* - * Call VOP_INACTIVE on last reference. - */ -void -vn_rele( - struct vnode *vp) -{ - int vcnt; - int cache; - - XFS_STATS_INC(vn_rele); - - VN_LOCK(vp); - - vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address); - vcnt = vn_count(vp); - - /* - * Since we always get called from put_inode we know - * that i_count won't be decremented after we - * return. - */ - if (!vcnt) { - /* - * As soon as we turn this on, noone can find us in vn_get - * until we turn off VINACT or VRECLM - */ - vp->v_flag |= VINACT; - VN_UNLOCK(vp, 0); - - /* - * Do not make the VOP_INACTIVE call if there - * are no behaviors attached to the vnode to call. - */ - if (vp->v_fbhv) - VOP_INACTIVE(vp, NULL, cache); - - VN_LOCK(vp); - if (vp->v_flag & VWAIT) - sv_broadcast(vptosync(vp)); - - vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED); - } - - VN_UNLOCK(vp, 0); - - vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address); -} - -/* - * Finish the removal of a vnode. - */ -void -vn_remove( - struct vnode *vp) -{ - vmap_t vmap; - - /* Make sure we don't do this to the same vnode twice */ - if (!(vp->v_fbhv)) - return; - - XFS_STATS_INC(vn_remove); - vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address); - - /* - * After the following purge the vnode - * will no longer exist. - */ - VMAP(vp, vmap); - vn_purge(vp, &vmap); -} - - #ifdef XFS_VNODE_TRACE #define KTRACE_ENTER(vp, vk, s, line, ra) \ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index a6e57c647be4..35f306cebb87 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -65,10 +65,6 @@ struct vattr; struct xfs_iomap; struct attrlist_cursor_kern; -/* - * Vnode types. VNON means no type. - */ -enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK }; typedef xfs_ino_t vnumber_t; typedef struct dentry vname_t; @@ -77,15 +73,14 @@ typedef bhv_head_t vn_bhv_head_t; /* * MP locking protocols: * v_flag, v_vfsp VN_LOCK/VN_UNLOCK - * v_type read-only or fs-dependent */ typedef struct vnode { __u32 v_flag; /* vnode flags (see below) */ - enum vtype v_type; /* vnode type */ struct vfs *v_vfsp; /* ptr to containing VFS */ vnumber_t v_number; /* in-core vnode number */ vn_bhv_head_t v_bh; /* behavior head */ spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ + atomic_t v_iocount; /* outstanding I/O count */ #ifdef XFS_VNODE_TRACE struct ktrace *v_trace; /* trace header structure */ #endif @@ -93,6 +88,12 @@ typedef struct vnode { /* inode MUST be last */ } vnode_t; +#define VN_ISLNK(vp) S_ISLNK((vp)->v_inode.i_mode) +#define VN_ISREG(vp) S_ISREG((vp)->v_inode.i_mode) +#define VN_ISDIR(vp) S_ISDIR((vp)->v_inode.i_mode) +#define VN_ISCHR(vp) S_ISCHR((vp)->v_inode.i_mode) +#define VN_ISBLK(vp) S_ISBLK((vp)->v_inode.i_mode) + #define v_fbhv v_bh.bh_first /* first behavior */ #define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ @@ -133,22 +134,8 @@ typedef enum { #define LINVFS_GET_IP(vp) (&(vp)->v_inode) /* - * Convert between vnode types and inode formats (since POSIX.1 - * defines mode word of stat structure in terms of inode formats). - */ -extern enum vtype iftovt_tab[]; -extern u_short vttoif_tab[]; -#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) -#define VTTOIF(indx) (vttoif_tab[(int)(indx)]) -#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) - - -/* * Vnode flags. */ -#define VINACT 0x1 /* vnode is being inactivated */ -#define VRECLM 0x2 /* vnode is being reclaimed */ -#define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */ #define VMODIFIED 0x8 /* XFS inode state possibly differs */ /* to the Linux inode state. */ @@ -408,7 +395,6 @@ typedef struct vnodeops { */ typedef struct vattr { int va_mask; /* bit-mask of attributes present */ - enum vtype va_type; /* vnode type (for create) */ mode_t va_mode; /* file access mode and type */ xfs_nlink_t va_nlink; /* number of references to file */ uid_t va_uid; /* owner user id */ @@ -498,27 +484,12 @@ typedef struct vattr { * Check whether mandatory file locking is enabled. */ #define MANDLOCK(vp, mode) \ - ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) + (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) extern void vn_init(void); -extern int vn_wait(struct vnode *); extern vnode_t *vn_initialize(struct inode *); /* - * Acquiring and invalidating vnodes: - * - * if (vn_get(vp, version, 0)) - * ...; - * vn_purge(vp, version); - * - * vn_get and vn_purge must be called with vmap_t arguments, sampled - * while a lock that the vnode's VOP_RECLAIM function acquires is - * held, to ensure that the vnode sampled with the lock held isn't - * recycled (VOP_RECLAIMed) or deallocated between the release of the lock - * and the subsequent vn_get or vn_purge. - */ - -/* * vnode_map structures _must_ match vn_epoch and vnode structure sizes. */ typedef struct vnode_map { @@ -531,11 +502,11 @@ typedef struct vnode_map { (vmap).v_number = (vp)->v_number, \ (vmap).v_ino = (vp)->v_inode.i_ino; } -extern void vn_purge(struct vnode *, vmap_t *); -extern vnode_t *vn_get(struct vnode *, vmap_t *); extern int vn_revalidate(struct vnode *); extern void vn_revalidate_core(struct vnode *, vattr_t *); -extern void vn_remove(struct vnode *); + +extern void vn_iowait(struct vnode *vp); +extern void vn_iowake(struct vnode *vp); static inline int vn_count(struct vnode *vp) { @@ -546,7 +517,6 @@ static inline int vn_count(struct vnode *vp) * Vnode reference counting functions (and macros for compatibility). */ extern vnode_t *vn_hold(struct vnode *); -extern void vn_rele(struct vnode *); #if defined(XFS_VNODE_TRACE) #define VN_HOLD(vp) \ @@ -560,6 +530,12 @@ extern void vn_rele(struct vnode *); #define VN_RELE(vp) (iput(LINVFS_GET_IP(vp))) #endif +static inline struct vnode *vn_grab(struct vnode *vp) +{ + struct inode *inode = igrab(LINVFS_GET_IP(vp)); + return inode ? LINVFS_GET_VP(inode) : NULL; +} + /* * Vname handling macros. */ diff --git a/fs/xfs/quota/Makefile b/fs/xfs/quota/Makefile new file mode 100644 index 000000000000..7a4f725b2824 --- /dev/null +++ b/fs/xfs/quota/Makefile @@ -0,0 +1 @@ +include $(TOPDIR)/fs/xfs/quota/Makefile-linux-$(VERSION).$(PATCHLEVEL) diff --git a/fs/xfs/quota/Makefile-linux-2.6 b/fs/xfs/quota/Makefile-linux-2.6 new file mode 100644 index 000000000000..8b7b676718b9 --- /dev/null +++ b/fs/xfs/quota/Makefile-linux-2.6 @@ -0,0 +1,53 @@ +# +# Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +EXTRA_CFLAGS += -I $(TOPDIR)/fs/xfs -I $(TOPDIR)/fs/xfs/linux-2.6 + +ifeq ($(CONFIG_XFS_DEBUG),y) + EXTRA_CFLAGS += -g -DDEBUG + #EXTRA_CFLAGS += -DQUOTADEBUG +endif +ifeq ($(CONFIG_XFS_TRACE),y) + EXTRA_CFLAGS += -DXFS_DQUOT_TRACE + EXTRA_CFLAGS += -DXFS_VNODE_TRACE +endif + +obj-$(CONFIG_XFS_QUOTA) += xfs_quota.o + +xfs_quota-y += xfs_dquot.o \ + xfs_dquot_item.o \ + xfs_trans_dquot.o \ + xfs_qm_syscalls.o \ + xfs_qm_bhv.o \ + xfs_qm.o + +xfs_quota-$(CONFIG_PROC_FS) += xfs_qm_stats.o diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 46ce1e3ce1d6..e2e8d35fa4d0 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -421,7 +421,7 @@ xfs_qm_init_dquot_blk( */ STATIC int xfs_qm_dqalloc( - xfs_trans_t *tp, + xfs_trans_t **tpp, xfs_mount_t *mp, xfs_dquot_t *dqp, xfs_inode_t *quotip, @@ -433,6 +433,7 @@ xfs_qm_dqalloc( xfs_bmbt_irec_t map; int nmaps, error, committed; xfs_buf_t *bp; + xfs_trans_t *tp = *tpp; ASSERT(tp != NULL); xfs_dqtrace_entry(dqp, "DQALLOC"); @@ -492,10 +493,32 @@ xfs_qm_dqalloc( xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT), dqp->dq_flags & XFS_DQ_ALLTYPES, bp); - if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) { + /* + * xfs_bmap_finish() may commit the current transaction and + * start a second transaction if the freelist is not empty. + * + * Since we still want to modify this buffer, we need to + * ensure that the buffer is not released on commit of + * the first transaction and ensure the buffer is added to the + * second transaction. + * + * If there is only one transaction then don't stop the buffer + * from being released when it commits later on. + */ + + xfs_trans_bhold(tp, bp); + + if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) { goto error1; } + if (committed) { + tp = *tpp; + xfs_trans_bjoin(tp, bp); + } else { + xfs_trans_bhold_release(tp, bp); + } + *O_bpp = bp; return 0; @@ -514,7 +537,7 @@ xfs_qm_dqalloc( */ STATIC int xfs_qm_dqtobp( - xfs_trans_t *tp, + xfs_trans_t **tpp, xfs_dquot_t *dqp, xfs_disk_dquot_t **O_ddpp, xfs_buf_t **O_bpp, @@ -528,6 +551,7 @@ xfs_qm_dqtobp( xfs_disk_dquot_t *ddq; xfs_dqid_t id; boolean_t newdquot; + xfs_trans_t *tp = (tpp ? *tpp : NULL); mp = dqp->q_mount; id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT); @@ -579,9 +603,10 @@ xfs_qm_dqtobp( return (ENOENT); ASSERT(tp); - if ((error = xfs_qm_dqalloc(tp, mp, dqp, quotip, + if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, dqp->q_fileoffset, &bp))) return (error); + tp = *tpp; newdquot = B_TRUE; } else { /* @@ -645,7 +670,7 @@ xfs_qm_dqtobp( /* ARGSUSED */ STATIC int xfs_qm_dqread( - xfs_trans_t *tp, + xfs_trans_t **tpp, xfs_dqid_t id, xfs_dquot_t *dqp, /* dquot to get filled in */ uint flags) @@ -653,15 +678,19 @@ xfs_qm_dqread( xfs_disk_dquot_t *ddqp; xfs_buf_t *bp; int error; + xfs_trans_t *tp; + + ASSERT(tpp); /* * get a pointer to the on-disk dquot and the buffer containing it * dqp already knows its own type (GROUP/USER). */ xfs_dqtrace_entry(dqp, "DQREAD"); - if ((error = xfs_qm_dqtobp(tp, dqp, &ddqp, &bp, flags))) { + if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { return (error); } + tp = *tpp; /* copy everything from disk dquot to the incore dquot */ memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); @@ -740,7 +769,7 @@ xfs_qm_idtodq( * Read it from disk; xfs_dqread() takes care of * all the necessary initialization of dquot's fields (locks, etc) */ - if ((error = xfs_qm_dqread(tp, id, dqp, flags))) { + if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) { /* * This can happen if quotas got turned off (ESRCH), * or if the dquot didn't exist on disk and we ask to diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 39175103c8e0..8ebc87176c78 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -113,20 +113,6 @@ typedef struct xfs_dquot { #define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) -/* - * Quota Accounting/Enforcement flags - */ -#define XFS_ALL_QUOTA_ACCT \ - (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) -#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) -#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) - -#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) -#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD) -#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) -#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) -#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) - #ifdef DEBUG static inline int XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index f5271b7b1e84..e74eaa7dd1bc 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -509,6 +509,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf, log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); log_vector->i_len = sizeof(xfs_qoff_logitem_t); + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF); qf->qql_format.qf_size = 1; } diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index f665ca8f9e96..efde16e0a913 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -365,16 +365,6 @@ xfs_qm_mount_quotas( int error = 0; uint sbf; - /* - * If a file system had quotas running earlier, but decided to - * mount without -o uquota/pquota/gquota options, revoke the - * quotachecked license, and bail out. - */ - if (! XFS_IS_QUOTA_ON(mp) && - (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) { - mp->m_qflags = 0; - goto write_changes; - } /* * If quotas on realtime volumes is not supported, we disable @@ -388,11 +378,8 @@ xfs_qm_mount_quotas( goto write_changes; } -#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) - cmn_err(CE_NOTE, "Attempting to turn on disk quotas."); -#endif - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + /* * Allocate the quotainfo structure inside the mount struct, and * create quotainode(s), and change/rev superblock if necessary. @@ -410,19 +397,14 @@ xfs_qm_mount_quotas( */ if (XFS_QM_NEED_QUOTACHECK(mp) && !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { -#ifdef DEBUG - cmn_err(CE_NOTE, "Doing a quotacheck. Please wait."); -#endif if ((error = xfs_qm_quotacheck(mp))) { /* Quotacheck has failed and quotas have * been disabled. */ return XFS_ERROR(error); } -#ifdef DEBUG - cmn_err(CE_NOTE, "Done quotacheck."); -#endif } + write_changes: /* * We actually don't have to acquire the SB_LOCK at all. @@ -2010,7 +1992,7 @@ xfs_qm_quotacheck( ASSERT(mp->m_quotainfo != NULL); ASSERT(xfs_Gqm != NULL); xfs_qm_destroy_quotainfo(mp); - xfs_mount_reset_sbqflags(mp); + (void)xfs_mount_reset_sbqflags(mp); } else { cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); } diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index b03eecf3b6cb..0b00b3c67015 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -184,8 +184,6 @@ typedef struct xfs_dquot_acct { #define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++) #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) -extern void xfs_mount_reset_sbqflags(xfs_mount_t *); - extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); extern int xfs_qm_mount_quotas(xfs_mount_t *, int); extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index dc3c37a1e158..8890a18a99d8 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -229,48 +229,6 @@ xfs_qm_syncall( return error; } -/* - * Clear the quotaflags in memory and in the superblock. - */ -void -xfs_mount_reset_sbqflags( - xfs_mount_t *mp) -{ - xfs_trans_t *tp; - unsigned long s; - - mp->m_qflags = 0; - /* - * It is OK to look at sb_qflags here in mount path, - * without SB_LOCK. - */ - if (mp->m_sb.sb_qflags == 0) - return; - s = XFS_SB_LOCK(mp); - mp->m_sb.sb_qflags = 0; - XFS_SB_UNLOCK(mp, s); - - /* - * if the fs is readonly, let the incore superblock run - * with quotas off but don't flush the update out to disk - */ - if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) - return; -#ifdef QUOTADEBUG - xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); -#endif - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, - XFS_DEFAULT_LOG_COUNT)) { - xfs_trans_cancel(tp, 0); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_mount_reset_sbqflags: Superblock update failed!"); - return; - } - xfs_mod_sb(tp, XFS_SB_QFLAGS); - xfs_trans_commit(tp, 0, NULL); -} - STATIC int xfs_qm_newmount( xfs_mount_t *mp, diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 68e98962dbef..15e02e8a9d4f 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1053,7 +1053,6 @@ xfs_qm_dqrele_all_inodes( struct xfs_mount *mp, uint flags) { - vmap_t vmap; xfs_inode_t *ip, *topino; uint ireclaims; vnode_t *vp; @@ -1061,8 +1060,8 @@ xfs_qm_dqrele_all_inodes( ASSERT(mp->m_quotainfo); -again: XFS_MOUNT_ILOCK(mp); +again: ip = mp->m_inodes; if (ip == NULL) { XFS_MOUNT_IUNLOCK(mp); @@ -1090,18 +1089,14 @@ again: } vnode_refd = B_FALSE; if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - /* - * Sample vp mapping while holding the mplock, lest - * we come across a non-existent vnode. - */ - VMAP(vp, vmap); ireclaims = mp->m_ireclaims; topino = mp->m_inodes; - XFS_MOUNT_IUNLOCK(mp); + vp = vn_grab(vp); + if (!vp) + goto again; + XFS_MOUNT_IUNLOCK(mp); /* XXX restart limit ? */ - if ( ! (vp = vn_get(vp, &vmap))) - goto again; xfs_ilock(ip, XFS_ILOCK_EXCL); vnode_refd = B_TRUE; } else { @@ -1137,7 +1132,6 @@ again: */ if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { /* XXX use a sentinel */ - XFS_MOUNT_IUNLOCK(mp); goto again; } ip = ip->i_mnext; diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 4ed7b6928cd7..4e1a5ec22fa3 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -31,6 +31,7 @@ */ #include "debug.h" +#include "spin.h" #include <asm/page.h> #include <linux/sched.h> diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 8d01dce8c532..92fd1d67f878 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -85,7 +85,7 @@ xfs_acl_vhasacl_default( { int error; - if (vp->v_type != VDIR) + if (!VN_ISDIR(vp)) return 0; xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); return (error == 0); @@ -389,7 +389,7 @@ xfs_acl_allow_set( if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) return EPERM; - if (kind == _ACL_TYPE_DEFAULT && vp->v_type != VDIR) + if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp)) return ENOTDIR; if (vp->v_vfsp->vfs_flag & VFS_RDONLY) return EROFS; @@ -750,7 +750,7 @@ xfs_acl_inherit( * If the new file is a directory, its default ACL is a copy of * the containing directory's default ACL. */ - if (vp->v_type == VDIR) + if (VN_ISDIR(vp)) xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); if (!error && !basicperms) xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 6f5d283888aa..3e76def1283d 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4754,10 +4754,20 @@ xfs_bmapi( error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, -(alen), rsvd); - if (!error) + if (!error) { error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, -(indlen), rsvd); + if (error && rt) { + xfs_mod_incore_sb(ip->i_mount, + XFS_SBS_FREXTENTS, + extsz, rsvd); + } else if (error) { + xfs_mod_incore_sb(ip->i_mount, + XFS_SBS_FDBLOCKS, + alen, rsvd); + } + } if (error) { if (XFS_IS_QUOTA_ON(ip->i_mount)) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 30b8285ad476..a264657acfd9 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -274,6 +274,7 @@ xfs_buf_item_format( ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); vecp->i_addr = (xfs_caddr_t)&bip->bli_format; vecp->i_len = base_size; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT); vecp++; nvecs = 1; @@ -320,12 +321,14 @@ xfs_buf_item_format( buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); nvecs++; break; } else if (next_bit != last_bit + 1) { buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); nvecs++; vecp++; first_bit = next_bit; @@ -337,6 +340,7 @@ xfs_buf_item_format( buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); /* You would think we need to bump the nvecs here too, but we do not * this number is used by recovery, and it gets confused by the boundary * split here diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index 55c17adaaa37..19e872856f6b 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index db7cbd1bc857..cc7d1494a45d 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -107,6 +107,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip, log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); log_vector->i_len = size; + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT); ASSERT(size >= sizeof(xfs_efi_log_format_t)); } @@ -426,6 +427,7 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp, log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); log_vector->i_len = size; + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT); ASSERT(size >= sizeof(xfs_efd_log_format_t)); } diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index d3da00045f26..0d9ae8fb4138 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -30,6 +30,8 @@ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ */ +#include <linux/delay.h> + #include "xfs.h" #include "xfs_macros.h" @@ -505,17 +507,15 @@ xfs_iget( vnode_t *vp = NULL; int error; -retry: XFS_STATS_INC(xs_ig_attempts); +retry: if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { bhv_desc_t *bdp; xfs_inode_t *ip; - int newnode; vp = LINVFS_GET_VP(inode); if (inode->i_state & I_NEW) { -inode_allocate: vn_initialize(inode); error = xfs_iget_core(vp, mp, tp, ino, flags, lock_flags, ipp, bno); @@ -526,32 +526,25 @@ inode_allocate: iput(inode); } } else { - /* These are true if the inode is in inactive or - * reclaim. The linux inode is about to go away, - * wait for that path to finish, and try again. + /* + * If the inode is not fully constructed due to + * filehandle mistmatches wait for the inode to go + * away and try again. + * + * iget_locked will call __wait_on_freeing_inode + * to wait for the inode to go away. */ - if (vp->v_flag & (VINACT | VRECLM)) { - vn_wait(vp); + if (is_bad_inode(inode) || + ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), + &xfs_vnodeops)) == NULL)) { iput(inode); + delay(1); goto retry; } - if (is_bad_inode(inode)) { - iput(inode); - return EIO; - } - - bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); - if (bdp == NULL) { - XFS_STATS_INC(xs_ig_dup); - goto inode_allocate; - } ip = XFS_BHVTOI(bdp); if (lock_flags != 0) xfs_ilock(ip, lock_flags); - newnode = (ip->i_d.di_mode == 0); - if (newnode) - xfs_iocore_inode_reinit(ip); XFS_STATS_INC(xs_ig_found); *ipp = ip; error = 0; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 34bdf5909687..db43308aae93 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1128,7 +1128,6 @@ xfs_ialloc( ASSERT(ip != NULL); vp = XFS_ITOV(ip); - vp->v_type = IFTOVT(mode); ip->i_d.di_mode = (__uint16_t)mode; ip->i_d.di_onlink = 0; ip->i_d.di_nlink = nlink; @@ -1250,7 +1249,7 @@ xfs_ialloc( */ xfs_trans_log_inode(tp, ip, flags); - /* now that we have a v_type we can set Linux inode ops (& unlock) */ + /* now that we have an i_mode we can set Linux inode ops (& unlock) */ VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); *ipp = ip; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 0eed30f5cb19..276ec70eb7f9 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -248,6 +248,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)&iip->ili_format; vecp->i_len = sizeof(xfs_inode_log_format_t); + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); vecp++; nvecs = 1; @@ -292,6 +293,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)&ip->i_d; vecp->i_len = sizeof(xfs_dinode_core_t); + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); vecp++; nvecs++; iip->ili_format.ilf_fields |= XFS_ILOG_CORE; @@ -349,6 +351,7 @@ xfs_inode_item_format( vecp->i_addr = (char *)(ip->i_df.if_u1.if_extents); vecp->i_len = ip->i_df.if_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); } else #endif { @@ -367,6 +370,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)ext_buffer; vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_DATA_FORK); + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; @@ -384,6 +388,7 @@ xfs_inode_item_format( ASSERT(ip->i_df.if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); vecp++; nvecs++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; @@ -409,6 +414,7 @@ xfs_inode_item_format( ASSERT((ip->i_df.if_real_bytes == 0) || (ip->i_df.if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); vecp++; nvecs++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; @@ -486,6 +492,7 @@ xfs_inode_item_format( vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_ATTR_FORK); #endif + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; @@ -500,6 +507,7 @@ xfs_inode_item_format( ASSERT(ip->i_afp->if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); vecp++; nvecs++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; @@ -523,6 +531,7 @@ xfs_inode_item_format( ASSERT((ip->i_afp->if_real_bytes == 0) || (ip->i_afp->if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; + XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); vecp++; nvecs++; iip->ili_format.ilf_asize = (unsigned)data_bytes; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2edd6769e5d3..d0f5be63cddb 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -226,13 +226,12 @@ xfs_iomap( xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count); lockmode = XFS_LCK_MAP_SHARED(mp, io); bmapi_flags = XFS_BMAPI_ENTIRE; - if (flags & BMAPI_IGNSTATE) - bmapi_flags |= XFS_BMAPI_IGSTATE; break; case BMAPI_WRITE: xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count); lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; - bmapi_flags = 0; + if (flags & BMAPI_IGNSTATE) + bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; XFS_ILOCK(mp, io, lockmode); break; case BMAPI_ALLOCATE: @@ -391,9 +390,9 @@ xfs_iomap_write_direct( xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp; xfs_bmap_free_t free_list; int aeof; - xfs_filblks_t datablocks, qblocks, resblks; + xfs_filblks_t qblocks, resblks; int committed; - int numrtextents; + int resrtextents; /* * Make sure that the dquots are there. This doesn't hold @@ -434,14 +433,14 @@ xfs_iomap_write_direct( if (!(extsz = ip->i_d.di_extsize)) extsz = mp->m_sb.sb_rextsize; - numrtextents = qblocks = (count_fsb + extsz - 1); - do_div(numrtextents, mp->m_sb.sb_rextsize); + resrtextents = qblocks = (count_fsb + extsz - 1); + do_div(resrtextents, mp->m_sb.sb_rextsize); + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; - datablocks = 0; } else { - datablocks = qblocks = count_fsb; + resrtextents = 0; + resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb); quota_flag = XFS_QMOPT_RES_REGBLKS; - numrtextents = 0; } /* @@ -449,9 +448,8 @@ xfs_iomap_write_direct( */ xfs_iunlock(ip, XFS_ILOCK_EXCL); tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks); error = xfs_trans_reserve(tp, resblks, - XFS_WRITE_LOG_RES(mp), numrtextents, + XFS_WRITE_LOG_RES(mp), resrtextents, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1cd2ac163877..54a6f1142403 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -159,11 +159,15 @@ xfs_buftarg_t *xlog_target; void xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) { - if (! log->l_grant_trace) { - log->l_grant_trace = ktrace_alloc(1024, KM_NOSLEEP); - if (! log->l_grant_trace) + unsigned long cnts; + + if (!log->l_grant_trace) { + log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP); + if (!log->l_grant_trace) return; } + /* ticket counts are 1 byte each */ + cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; ktrace_enter(log->l_grant_trace, (void *)tic, @@ -178,10 +182,10 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)), (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)), (void *)string, - (void *)((unsigned long)13), - (void *)((unsigned long)14), - (void *)((unsigned long)15), - (void *)((unsigned long)16)); + (void *)((unsigned long)tic->t_trans_type), + (void *)cnts, + (void *)((unsigned long)tic->t_curr_res), + (void *)((unsigned long)tic->t_unit_res)); } void @@ -274,9 +278,11 @@ xfs_log_done(xfs_mount_t *mp, * Release ticket if not permanent reservation or a specifc * request has been made to release a permanent reservation. */ + xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); xlog_ungrant_log_space(log, ticket); xlog_state_put_ticket(log, ticket); } else { + xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); xlog_regrant_reserve_log_space(log, ticket); } @@ -399,7 +405,8 @@ xfs_log_reserve(xfs_mount_t *mp, int cnt, xfs_log_ticket_t *ticket, __uint8_t client, - uint flags) + uint flags, + uint t_type) { xlog_t *log = mp->m_log; xlog_ticket_t *internal_ticket; @@ -421,13 +428,19 @@ xfs_log_reserve(xfs_mount_t *mp, if (*ticket != NULL) { ASSERT(flags & XFS_LOG_PERM_RESERV); internal_ticket = (xlog_ticket_t *)*ticket; + xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)"); xlog_grant_push_ail(mp, internal_ticket->t_unit_res); retval = xlog_regrant_write_log_space(log, internal_ticket); } else { /* may sleep if need to allocate more tickets */ internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, client, flags); + internal_ticket->t_trans_type = t_type; *ticket = internal_ticket; + xlog_trace_loggrant(log, internal_ticket, + (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ? + "xfs_log_reserve: create new ticket (permanent trans)" : + "xfs_log_reserve: create new ticket"); xlog_grant_push_ail(mp, (internal_ticket->t_unit_res * internal_ticket->t_cnt)); @@ -601,8 +614,9 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (! (XLOG_FORCED_SHUTDOWN(log))) { reg[0].i_addr = (void*)&magic; reg[0].i_len = sizeof(magic); + XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_UNMOUNT); - error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0); + error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, 0); if (!error) { /* remove inited flag */ ((xlog_ticket_t *)tic)->t_flags = 0; @@ -1272,6 +1286,7 @@ xlog_commit_record(xfs_mount_t *mp, reg[0].i_addr = NULL; reg[0].i_len = 0; + XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_COMMIT); ASSERT_ALWAYS(iclog); if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, @@ -1605,6 +1620,117 @@ xlog_state_finish_copy(xlog_t *log, /* + * print out info relating to regions written which consume + * the reservation + */ +#if defined(XFS_LOG_RES_DEBUG) +STATIC void +xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) +{ + uint i; + uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); + + /* match with XLOG_REG_TYPE_* in xfs_log.h */ + static char *res_type_str[XLOG_REG_TYPE_MAX] = { + "bformat", + "bchunk", + "efi_format", + "efd_format", + "iformat", + "icore", + "iext", + "ibroot", + "ilocal", + "iattr_ext", + "iattr_broot", + "iattr_local", + "qformat", + "dquot", + "quotaoff", + "LR header", + "unmount", + "commit", + "trans header" + }; + static char *trans_type_str[XFS_TRANS_TYPE_MAX] = { + "SETATTR_NOT_SIZE", + "SETATTR_SIZE", + "INACTIVE", + "CREATE", + "CREATE_TRUNC", + "TRUNCATE_FILE", + "REMOVE", + "LINK", + "RENAME", + "MKDIR", + "RMDIR", + "SYMLINK", + "SET_DMATTRS", + "GROWFS", + "STRAT_WRITE", + "DIOSTRAT", + "WRITE_SYNC", + "WRITEID", + "ADDAFORK", + "ATTRINVAL", + "ATRUNCATE", + "ATTR_SET", + "ATTR_RM", + "ATTR_FLAG", + "CLEAR_AGI_BUCKET", + "QM_SBCHANGE", + "DUMMY1", + "DUMMY2", + "QM_QUOTAOFF", + "QM_DQALLOC", + "QM_SETQLIM", + "QM_DQCLUSTER", + "QM_QINOCREATE", + "QM_QUOTAOFF_END", + "SB_UNIT", + "FSYNC_TS", + "GROWFSRT_ALLOC", + "GROWFSRT_ZERO", + "GROWFSRT_FREE", + "SWAPEXT" + }; + + xfs_fs_cmn_err(CE_WARN, mp, + "xfs_log_write: reservation summary:\n" + " trans type = %s (%u)\n" + " unit res = %d bytes\n" + " current res = %d bytes\n" + " total reg = %u bytes (o/flow = %u bytes)\n" + " ophdrs = %u (ophdr space = %u bytes)\n" + " ophdr + reg = %u bytes\n" + " num regions = %u\n", + ((ticket->t_trans_type <= 0 || + ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? + "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), + ticket->t_trans_type, + ticket->t_unit_res, + ticket->t_curr_res, + ticket->t_res_arr_sum, ticket->t_res_o_flow, + ticket->t_res_num_ophdrs, ophdr_spc, + ticket->t_res_arr_sum + + ticket->t_res_o_flow + ophdr_spc, + ticket->t_res_num); + + for (i = 0; i < ticket->t_res_num; i++) { + uint r_type = ticket->t_res_arr[i].r_type; + cmn_err(CE_WARN, + "region[%u]: %s - %u bytes\n", + i, + ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? + "bad-rtype" : res_type_str[r_type-1]), + ticket->t_res_arr[i].r_len); + } +} +#else +#define xlog_print_tic_res(mp, ticket) +#endif + +/* * Write some region out to in-core log * * This will be called when writing externally provided regions or when @@ -1677,16 +1803,21 @@ xlog_write(xfs_mount_t * mp, * xlog_op_header_t and may need to be double word aligned. */ len = 0; - if (ticket->t_flags & XLOG_TIC_INITED) /* acct for start rec of xact */ + if (ticket->t_flags & XLOG_TIC_INITED) { /* acct for start rec of xact */ len += sizeof(xlog_op_header_t); + XLOG_TIC_ADD_OPHDR(ticket); + } for (index = 0; index < nentries; index++) { len += sizeof(xlog_op_header_t); /* each region gets >= 1 */ + XLOG_TIC_ADD_OPHDR(ticket); len += reg[index].i_len; + XLOG_TIC_ADD_REGION(ticket, reg[index].i_len, reg[index].i_type); } contwr = *start_lsn = 0; if (ticket->t_curr_res < len) { + xlog_print_tic_res(mp, ticket); #ifdef DEBUG xlog_panic( "xfs_log_write: reservation ran out. Need to up reservation"); @@ -1790,6 +1921,7 @@ xlog_write(xfs_mount_t * mp, len += sizeof(xlog_op_header_t); /* from splitting of region */ /* account for new log op header */ ticket->t_curr_res -= sizeof(xlog_op_header_t); + XLOG_TIC_ADD_OPHDR(ticket); } xlog_verify_dest_ptr(log, ptr); @@ -2282,6 +2414,9 @@ restart: */ if (log_offset == 0) { ticket->t_curr_res -= log->l_iclog_hsize; + XLOG_TIC_ADD_REGION(ticket, + log->l_iclog_hsize, + XLOG_REG_TYPE_LRHEADER); INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle); ASSIGN_LSN(head->h_lsn, log); ASSERT(log->l_curr_block >= 0); @@ -2468,6 +2603,7 @@ xlog_regrant_write_log_space(xlog_t *log, #endif tic->t_curr_res = tic->t_unit_res; + XLOG_TIC_RESET_RES(tic); if (tic->t_cnt > 0) return (0); @@ -2608,6 +2744,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r'); ticket->t_curr_res = ticket->t_unit_res; + XLOG_TIC_RESET_RES(ticket); xlog_trace_loggrant(log, ticket, "xlog_regrant_reserve_log_space: sub current res"); xlog_verify_grant_head(log, 1); @@ -2624,6 +2761,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, xlog_verify_grant_head(log, 0); GRANT_UNLOCK(log, s); ticket->t_curr_res = ticket->t_unit_res; + XLOG_TIC_RESET_RES(ticket); } /* xlog_regrant_reserve_log_space */ @@ -3179,29 +3317,57 @@ xlog_ticket_get(xlog_t *log, * and their unit amount is the total amount of space required. * * The following lines of code account for non-transaction data - * which occupy space in the on-disk log. + * which occupy space in the on-disk log. + * + * Normal form of a transaction is: + * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph> + * and then there are LR hdrs, split-recs and roundoff at end of syncs. + * + * We need to account for all the leadup data and trailer data + * around the transaction data. + * And then we need to account for the worst case in terms of using + * more space. + * The worst case will happen if: + * - the placement of the transaction happens to be such that the + * roundoff is at its maximum + * - the transaction data is synced before the commit record is synced + * i.e. <transaction-data><roundoff> | <commit-rec><roundoff> + * Therefore the commit record is in its own Log Record. + * This can happen as the commit record is called with its + * own region to xlog_write(). + * This then means that in the worst case, roundoff can happen for + * the commit-rec as well. + * The commit-rec is smaller than padding in this scenario and so it is + * not added separately. */ + /* for trans header */ + unit_bytes += sizeof(xlog_op_header_t); + unit_bytes += sizeof(xfs_trans_header_t); + /* for start-rec */ - unit_bytes += sizeof(xlog_op_header_t); + unit_bytes += sizeof(xlog_op_header_t); + + /* for LR headers */ + num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log); + unit_bytes += log->l_iclog_hsize * num_headers; + + /* for commit-rec LR header - note: padding will subsume the ophdr */ + unit_bytes += log->l_iclog_hsize; + + /* for split-recs - ophdrs added when data split over LRs */ + unit_bytes += sizeof(xlog_op_header_t) * num_headers; - /* for padding */ + /* for roundoff padding for transaction data and one for commit record */ if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && - log->l_mp->m_sb.sb_logsunit > 1) { + log->l_mp->m_sb.sb_logsunit > 1) { /* log su roundoff */ - unit_bytes += log->l_mp->m_sb.sb_logsunit; + unit_bytes += 2*log->l_mp->m_sb.sb_logsunit; } else { /* BB roundoff */ - unit_bytes += BBSIZE; + unit_bytes += 2*BBSIZE; } - /* for commit-rec */ - unit_bytes += sizeof(xlog_op_header_t); - - /* for LR headers */ - num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log); - unit_bytes += log->l_iclog_hsize * num_headers; - tic->t_unit_res = unit_bytes; tic->t_curr_res = unit_bytes; tic->t_cnt = cnt; @@ -3209,10 +3375,13 @@ xlog_ticket_get(xlog_t *log, tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff); tic->t_clientid = client; tic->t_flags = XLOG_TIC_INITED; + tic->t_trans_type = 0; if (xflags & XFS_LOG_PERM_RESERV) tic->t_flags |= XLOG_TIC_PERM_RESERV; sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); + XLOG_TIC_RESET_RES(tic); + return tic; } /* xlog_ticket_get */ diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 0db122ddda3f..18961119fc65 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -114,9 +114,44 @@ xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) #define XFS_VOLUME 0x2 #define XFS_LOG 0xaa + +/* Region types for iovec's i_type */ +#if defined(XFS_LOG_RES_DEBUG) +#define XLOG_REG_TYPE_BFORMAT 1 +#define XLOG_REG_TYPE_BCHUNK 2 +#define XLOG_REG_TYPE_EFI_FORMAT 3 +#define XLOG_REG_TYPE_EFD_FORMAT 4 +#define XLOG_REG_TYPE_IFORMAT 5 +#define XLOG_REG_TYPE_ICORE 6 +#define XLOG_REG_TYPE_IEXT 7 +#define XLOG_REG_TYPE_IBROOT 8 +#define XLOG_REG_TYPE_ILOCAL 9 +#define XLOG_REG_TYPE_IATTR_EXT 10 +#define XLOG_REG_TYPE_IATTR_BROOT 11 +#define XLOG_REG_TYPE_IATTR_LOCAL 12 +#define XLOG_REG_TYPE_QFORMAT 13 +#define XLOG_REG_TYPE_DQUOT 14 +#define XLOG_REG_TYPE_QUOTAOFF 15 +#define XLOG_REG_TYPE_LRHEADER 16 +#define XLOG_REG_TYPE_UNMOUNT 17 +#define XLOG_REG_TYPE_COMMIT 18 +#define XLOG_REG_TYPE_TRANSHDR 19 +#define XLOG_REG_TYPE_MAX 19 +#endif + +#if defined(XFS_LOG_RES_DEBUG) +#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t)) +#else +#define XLOG_VEC_SET_TYPE(vecp, t) +#endif + + typedef struct xfs_log_iovec { xfs_caddr_t i_addr; /* beginning address of region */ int i_len; /* length in bytes of region */ +#if defined(XFS_LOG_RES_DEBUG) + uint i_type; /* type of region */ +#endif } xfs_log_iovec_t; typedef void* xfs_log_ticket_t; @@ -159,7 +194,8 @@ int xfs_log_reserve(struct xfs_mount *mp, int count, xfs_log_ticket_t *ticket, __uint8_t clientid, - uint flags); + uint flags, + uint t_type); int xfs_log_write(struct xfs_mount *mp, xfs_log_iovec_t region[], int nentries, diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 1a1d452f15f9..eb7fdc6ebc32 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -335,18 +335,66 @@ typedef __uint32_t xlog_tid_t; #define XLOG_COVER_OPS 5 + +/* Ticket reservation region accounting */ +#if defined(XFS_LOG_RES_DEBUG) +#define XLOG_TIC_LEN_MAX 15 +#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \ + (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0) +#define XLOG_TIC_ADD_OPHDR(t) ((t)->t_res_num_ophdrs++) +#define XLOG_TIC_ADD_REGION(t, len, type) \ + do { \ + if ((t)->t_res_num == XLOG_TIC_LEN_MAX) { \ + /* add to overflow and start again */ \ + (t)->t_res_o_flow += (t)->t_res_arr_sum; \ + (t)->t_res_num = 0; \ + (t)->t_res_arr_sum = 0; \ + } \ + (t)->t_res_arr[(t)->t_res_num].r_len = (len); \ + (t)->t_res_arr[(t)->t_res_num].r_type = (type); \ + (t)->t_res_arr_sum += (len); \ + (t)->t_res_num++; \ + } while (0) + +/* + * Reservation region + * As would be stored in xfs_log_iovec but without the i_addr which + * we don't care about. + */ +typedef struct xlog_res { + uint r_len; + uint r_type; +} xlog_res_t; +#else +#define XLOG_TIC_RESET_RES(t) +#define XLOG_TIC_ADD_OPHDR(t) +#define XLOG_TIC_ADD_REGION(t, len, type) +#endif + + typedef struct xlog_ticket { - sv_t t_sema; /* sleep on this semaphore :20 */ - struct xlog_ticket *t_next; /* : 4 */ - struct xlog_ticket *t_prev; /* : 4 */ - xlog_tid_t t_tid; /* transaction identifier : 4 */ - int t_curr_res; /* current reservation in bytes : 4 */ - int t_unit_res; /* unit reservation in bytes : 4 */ - __uint8_t t_ocnt; /* original count : 1 */ - __uint8_t t_cnt; /* current count : 1 */ - __uint8_t t_clientid; /* who does this belong to; : 1 */ - __uint8_t t_flags; /* properties of reservation : 1 */ + sv_t t_sema; /* sleep on this semaphore : 20 */ + struct xlog_ticket *t_next; /* :4|8 */ + struct xlog_ticket *t_prev; /* :4|8 */ + xlog_tid_t t_tid; /* transaction identifier : 4 */ + int t_curr_res; /* current reservation in bytes : 4 */ + int t_unit_res; /* unit reservation in bytes : 4 */ + char t_ocnt; /* original count : 1 */ + char t_cnt; /* current count : 1 */ + char t_clientid; /* who does this belong to; : 1 */ + char t_flags; /* properties of reservation : 1 */ + uint t_trans_type; /* transaction type : 4 */ + +#if defined (XFS_LOG_RES_DEBUG) + /* reservation array fields */ + uint t_res_num; /* num in array : 4 */ + xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : X */ + uint t_res_num_ophdrs; /* num op hdrs : 4 */ + uint t_res_arr_sum; /* array sum : 4 */ + uint t_res_o_flow; /* sum overflow : 4 */ +#endif } xlog_ticket_t; + #endif diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0aac28ddb81c..14faabaabf29 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1387,7 +1387,7 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0); + ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); memcpy(&ptr[old_len], dp, len); /* d, s, l */ item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c index 4f40c92863d5..a6cd6324e946 100644 --- a/fs/xfs/xfs_qmops.c +++ b/fs/xfs/xfs_qmops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -42,7 +42,8 @@ #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" - +#include "xfs_quota.h" +#include "xfs_error.h" STATIC struct xfs_dquot * xfs_dqvopchown_default( @@ -54,8 +55,79 @@ xfs_dqvopchown_default( return NULL; } +/* + * Clear the quotaflags in memory and in the superblock. + */ +int +xfs_mount_reset_sbqflags(xfs_mount_t *mp) +{ + int error; + xfs_trans_t *tp; + unsigned long s; + + mp->m_qflags = 0; + /* + * It is OK to look at sb_qflags here in mount path, + * without SB_LOCK. + */ + if (mp->m_sb.sb_qflags == 0) + return 0; + s = XFS_SB_LOCK(mp); + mp->m_sb.sb_qflags = 0; + XFS_SB_UNLOCK(mp, s); + + /* + * if the fs is readonly, let the incore superblock run + * with quotas off but don't flush the update out to disk + */ + if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) + return 0; +#ifdef QUOTADEBUG + xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); +#endif + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); + if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, + XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + xfs_fs_cmn_err(CE_ALERT, mp, + "xfs_mount_reset_sbqflags: Superblock update failed!"); + return error; + } + xfs_mod_sb(tp, XFS_SB_QFLAGS); + error = xfs_trans_commit(tp, 0, NULL); + return error; +} + +STATIC int +xfs_noquota_init( + xfs_mount_t *mp, + uint *needquotamount, + uint *quotaflags) +{ + int error = 0; + + *quotaflags = 0; + *needquotamount = B_FALSE; + + ASSERT(!XFS_IS_QUOTA_ON(mp)); + + /* + * If a file system had quotas running earlier, but decided to + * mount without -o uquota/pquota/gquota options, revoke the + * quotachecked license. + */ + if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { + cmn_err(CE_NOTE, + "XFS resetting qflags for filesystem %s", + mp->m_fsname); + + error = xfs_mount_reset_sbqflags(mp); + } + return error; +} + xfs_qmops_t xfs_qmcore_stub = { - .xfs_qminit = (xfs_qminit_t) fs_noerr, + .xfs_qminit = (xfs_qminit_t) xfs_noquota_init, .xfs_qmdone = (xfs_qmdone_t) fs_noerr, .xfs_qmmount = (xfs_qmmount_t) fs_noerr, .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr, diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 7134576ae7fa..32cb79752d5d 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -160,6 +160,20 @@ typedef struct xfs_qoff_logformat { #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ /* + * Quota Accounting/Enforcement flags + */ +#define XFS_ALL_QUOTA_ACCT \ + (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) +#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) +#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) + +#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) +#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD) +#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) +#define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) +#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) + +/* * Incore only flags for quotaoff - these bits get cleared when quota(s) * are in the process of getting turned off. These flags are in m_qflags but * never in sb_qflags. @@ -362,6 +376,7 @@ typedef struct xfs_dqtrxops { f | XFS_QMOPT_RES_REGBLKS) extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); +extern int xfs_mount_reset_sbqflags(struct xfs_mount *); extern struct bhv_vfsops xfs_qmops; diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 06dfca531f79..92efe272b83d 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -276,7 +276,7 @@ xfs_trans_reserve( error = xfs_log_reserve(tp->t_mountp, logspace, logcount, &tp->t_ticket, - XFS_TRANSACTION, log_flags); + XFS_TRANSACTION, log_flags, tp->t_type); if (error) { goto undo_blocks; } @@ -1032,6 +1032,7 @@ xfs_trans_fill_vecs( tp->t_header.th_num_items = nitems; log_vector->i_addr = (xfs_caddr_t)&tp->t_header; log_vector->i_len = sizeof(xfs_trans_header_t); + XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index ec541d66fa2a..a263aec8b3a6 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -112,6 +112,7 @@ typedef struct xfs_trans_header { #define XFS_TRANS_GROWFSRT_ZERO 38 #define XFS_TRANS_GROWFSRT_FREE 39 #define XFS_TRANS_SWAPEXT 40 +#define XFS_TRANS_TYPE_MAX 40 /* new transaction types need to be reflected in xfs_logprint(8) */ @@ -998,6 +999,7 @@ struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *); +void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 7bc5eab4c2c1..2a71b4f91bfa 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -379,8 +379,8 @@ xfs_trans_delete_ail( else { xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL"); - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); AIL_UNLOCK(mp, s); + xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); } } } diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 144da7a85466..e733293dd7f4 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -714,6 +714,29 @@ xfs_trans_bhold(xfs_trans_t *tp, } /* + * Cancel the previous buffer hold request made on this buffer + * for this transaction. + */ +void +xfs_trans_bhold_release(xfs_trans_t *tp, + xfs_buf_t *bp) +{ + xfs_buf_log_item_t *bip; + + ASSERT(XFS_BUF_ISBUSY(bp)); + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); + ASSERT(atomic_read(&bip->bli_refcount) > 0); + ASSERT(bip->bli_flags & XFS_BLI_HOLD); + bip->bli_flags &= ~XFS_BLI_HOLD; + xfs_buf_item_trace("BHOLD RELEASE", bip); +} + +/* * This is called to mark bytes first through last inclusive of the given * buffer as needing to be logged when the transaction is committed. * The buffer must already be associated with the given transaction. diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 42bcc0215203..f1a904e23ade 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -795,7 +795,6 @@ xfs_statvfs( xfs_mount_t *mp; xfs_sb_t *sbp; unsigned long s; - u64 id; mp = XFS_BHVTOM(bdp); sbp = &(mp->m_sb); @@ -823,9 +822,7 @@ xfs_statvfs( statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); XFS_SB_UNLOCK(mp, s); - id = huge_encode_dev(mp->m_dev); - statp->f_fsid.val[0] = (u32)id; - statp->f_fsid.val[1] = (u32)(id >> 32); + xfs_statvfs_fsid(statp, mp); statp->f_namelen = MAXNAMELEN - 1; return 0; @@ -906,7 +903,6 @@ xfs_sync_inodes( xfs_inode_t *ip_next; xfs_buf_t *bp; vnode_t *vp = NULL; - vmap_t vmap; int error; int last_error; uint64_t fflag; @@ -1101,48 +1097,21 @@ xfs_sync_inodes( * lock in xfs_ireclaim() after the inode is pulled from * the mount list will sleep until we release it here. * This keeps the vnode from being freed while we reference - * it. It is also cheaper and simpler than actually doing - * a vn_get() for every inode we touch here. + * it. */ if (xfs_ilock_nowait(ip, lock_flags) == 0) { - if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { ip = ip->i_mnext; continue; } - /* - * We need to unlock the inode list lock in order - * to lock the inode. Insert a marker record into - * the inode list to remember our position, dropping - * the lock is now done inside the IPOINTER_INSERT - * macro. - * - * We also use the inode list lock to protect us - * in taking a snapshot of the vnode version number - * for use in calling vn_get(). - */ - VMAP(vp, vmap); - IPOINTER_INSERT(ip, mp); - - vp = vn_get(vp, &vmap); + vp = vn_grab(vp); if (vp == NULL) { - /* - * The vnode was reclaimed once we let go - * of the inode list lock. Skip to the - * next list entry. Remove the marker. - */ - - XFS_MOUNT_ILOCK(mp); - - mount_locked = B_TRUE; - vnode_refed = B_FALSE; - - IPOINTER_REMOVE(ip, mp); - + ip = ip->i_mnext; continue; } + IPOINTER_INSERT(ip, mp); xfs_ilock(ip, lock_flags); ASSERT(vp == XFS_ITOV(ip)); @@ -1533,7 +1502,10 @@ xfs_syncsub( * eventually kicked out of the cache. */ if (flags & SYNC_REFCACHE) { - xfs_refcache_purge_some(mp); + if (flags & SYNC_WAIT) + xfs_refcache_purge_mp(mp); + else + xfs_refcache_purge_some(mp); } /* @@ -1649,6 +1621,10 @@ xfs_vget( #define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ #define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ #define MNTOPT_MTPT "mtpt" /* filesystem mount point */ +#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ +#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ +#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ +#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ #define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ #define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ #define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ @@ -1769,6 +1745,12 @@ xfs_parseargs( } args->flags |= XFSMNT_IHASHSIZE; args->ihashsize = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_GRPID) || + !strcmp(this_char, MNTOPT_BSDGROUPS)) { + vfsp->vfs_flag |= VFS_GRPID; + } else if (!strcmp(this_char, MNTOPT_NOGRPID) || + !strcmp(this_char, MNTOPT_SYSVGROUPS)) { + vfsp->vfs_flag &= ~VFS_GRPID; } else if (!strcmp(this_char, MNTOPT_WSYNC)) { args->flags |= XFSMNT_WSYNC; } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { @@ -1890,6 +1872,7 @@ xfs_showargs( }; struct proc_xfs_info *xfs_infop; struct xfs_mount *mp = XFS_BHVTOM(bhv); + struct vfs *vfsp = XFS_MTOVFS(mp); for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { if (mp->m_flags & xfs_infop->flag) @@ -1926,7 +1909,10 @@ xfs_showargs( if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) seq_printf(m, "," MNTOPT_64BITINODE); - + + if (vfsp->vfs_flag & VFS_GRPID) + seq_printf(m, "," MNTOPT_GRPID); + return 0; } diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1377c868f3f4..58bfe629b933 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -104,7 +104,7 @@ xfs_open( * If it's a directory with any blocks, read-ahead block 0 * as we're almost certain to have the next operation be a read there. */ - if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) { + if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) { mode = xfs_ilock_map_shared(ip); if (ip->i_d.di_nextents > 0) (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); @@ -163,18 +163,21 @@ xfs_getattr( /* * Copy from in-core inode. */ - vap->va_type = vp->v_type; - vap->va_mode = ip->i_d.di_mode & MODEMASK; + vap->va_mode = ip->i_d.di_mode; vap->va_uid = ip->i_d.di_uid; vap->va_gid = ip->i_d.di_gid; vap->va_projid = ip->i_d.di_projid; /* * Check vnode type block/char vs. everything else. - * Do it with bitmask because that's faster than looking - * for multiple values individually. */ - if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { + switch (ip->i_d.di_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + vap->va_rdev = ip->i_df.if_u2.if_rdev; + vap->va_blocksize = BLKDEV_IOSIZE; + break; + default: vap->va_rdev = 0; if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { @@ -224,9 +227,7 @@ xfs_getattr( (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); } - } else { - vap->va_rdev = ip->i_df.if_u2.if_rdev; - vap->va_blocksize = BLKDEV_IOSIZE; + break; } vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; @@ -468,7 +469,7 @@ xfs_setattr( m |= S_ISGID; #if 0 /* Linux allows this, Irix doesn't. */ - if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR) + if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) m |= S_ISVTX; #endif if (m && !capable(CAP_FSETID)) @@ -546,10 +547,10 @@ xfs_setattr( goto error_return; } - if (vp->v_type == VDIR) { + if (VN_ISDIR(vp)) { code = XFS_ERROR(EISDIR); goto error_return; - } else if (vp->v_type != VREG) { + } else if (!VN_ISREG(vp)) { code = XFS_ERROR(EINVAL); goto error_return; } @@ -1567,7 +1568,7 @@ xfs_release( vp = BHV_TO_VNODE(bdp); ip = XFS_BHVTOI(bdp); - if ((vp->v_type != VREG) || (ip->i_d.di_mode == 0)) { + if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { return 0; } @@ -1895,7 +1896,7 @@ xfs_create( dp = XFS_BHVTOI(dir_bdp); mp = dp->i_mount; - dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); + dm_di_mode = vap->va_mode; namelen = VNAMELEN(dentry); if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { @@ -1973,8 +1974,7 @@ xfs_create( (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) goto error_return; rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; - error = xfs_dir_ialloc(&tp, dp, - MAKEIMODE(vap->va_type,vap->va_mode), 1, + error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, rdev, credp, prid, resblks > 0, &ip, &committed); if (error) { @@ -2620,7 +2620,7 @@ xfs_link( vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); target_namelen = VNAMELEN(dentry); - if (src_vp->v_type == VDIR) + if (VN_ISDIR(src_vp)) return XFS_ERROR(EPERM); src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); @@ -2805,7 +2805,7 @@ xfs_mkdir( tp = NULL; dp_joined_to_trans = B_FALSE; - dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); + dm_di_mode = vap->va_mode; if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, @@ -2879,8 +2879,7 @@ xfs_mkdir( /* * create the directory inode. */ - error = xfs_dir_ialloc(&tp, dp, - MAKEIMODE(vap->va_type,vap->va_mode), 2, + error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2, 0, credp, prid, resblks > 0, &cdp, NULL); if (error) { @@ -3650,7 +3649,7 @@ xfs_rwlock( vnode_t *vp; vp = BHV_TO_VNODE(bdp); - if (vp->v_type == VDIR) + if (VN_ISDIR(vp)) return 1; ip = XFS_BHVTOI(bdp); if (locktype == VRWLOCK_WRITE) { @@ -3681,7 +3680,7 @@ xfs_rwunlock( vnode_t *vp; vp = BHV_TO_VNODE(bdp); - if (vp->v_type == VDIR) + if (VN_ISDIR(vp)) return; ip = XFS_BHVTOI(bdp); if (locktype == VRWLOCK_WRITE) { @@ -3847,51 +3846,10 @@ xfs_reclaim( return 0; } - if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { - if (ip->i_d.di_size > 0) { - /* - * Flush and invalidate any data left around that is - * a part of this file. - * - * Get the inode's i/o lock so that buffers are pushed - * out while holding the proper lock. We can't hold - * the inode lock here since flushing out buffers may - * cause us to try to get the lock in xfs_strategy(). - * - * We don't have to call remapf() here, because there - * cannot be any mapped file references to this vnode - * since it is being reclaimed. - */ - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - /* - * If we hit an IO error, we need to make sure that the - * buffer and page caches of file data for - * the file are tossed away. We don't want to use - * VOP_FLUSHINVAL_PAGES here because we don't want dirty - * pages to stay attached to the vnode, but be - * marked P_BAD. pdflush/vnode_pagebad - * hates that. - */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_NONE); - } else { - VOP_TOSS_PAGES(vp, 0, -1, FI_NONE); - } + vn_iowait(vp); - ASSERT(VN_CACHED(vp) == 0); - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || - ip->i_delayed_blks == 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - } else if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - /* - * di_size field may not be quite accurate if we're - * shutting down. - */ - VOP_TOSS_PAGES(vp, 0, -1, FI_NONE); - ASSERT(VN_CACHED(vp) == 0); - } - } + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); + ASSERT(VN_CACHED(vp) == 0); /* If we have nothing to flush with this inode then complete the * teardown now, otherwise break the link between the xfs inode @@ -4567,7 +4525,7 @@ xfs_change_file_space( /* * must be a regular file and have write permission */ - if (vp->v_type != VREG) + if (!VN_ISREG(vp)) return XFS_ERROR(EINVAL); xfs_ilock(ip, XFS_ILOCK_SHARED); |