diff options
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r-- | fs/xfs/linux-2.6/kmem.c | 56 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/kmem.h | 21 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_acl.c | 14 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 320 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 52 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_fs_subr.c | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl.c | 21 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl.h | 12 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl32.c | 4 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_iops.c | 4 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_lrw.c | 62 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_lrw.h | 3 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 183 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 329 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.h | 3 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_trace.h | 1218 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_xattr.c | 27 |
17 files changed, 1397 insertions, 934 deletions
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 2d3f90afe5f1..bc7405585def 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -16,7 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include <linux/mm.h> -#include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/swap.h> #include <linux/blkdev.h> @@ -24,8 +23,25 @@ #include "time.h" #include "kmem.h" -#define MAX_VMALLOCS 6 -#define MAX_SLAB_SIZE 0x20000 +/* + * Greedy allocation. May fail and may return vmalloced memory. + * + * Must be freed using kmem_free_large. + */ +void * +kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) +{ + void *ptr; + size_t kmsize = maxsize; + + while (!(ptr = kmem_zalloc_large(kmsize))) { + if ((kmsize >>= 1) <= minsize) + kmsize = minsize; + } + if (ptr) + *size = kmsize; + return ptr; +} void * kmem_alloc(size_t size, unsigned int __nocast flags) @@ -34,19 +50,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags) gfp_t lflags = kmem_flags_convert(flags); void *ptr; -#ifdef DEBUG - if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) { - printk(KERN_WARNING "Large %s attempt, size=%ld\n", - __func__, (long)size); - dump_stack(); - } -#endif - do { - if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) - ptr = kmalloc(size, lflags); - else - ptr = __vmalloc(size, lflags, PAGE_KERNEL); + ptr = kmalloc(size, lflags); if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) return ptr; if (!(++retries % 100)) @@ -68,27 +73,6 @@ kmem_zalloc(size_t size, unsigned int __nocast flags) return ptr; } -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize, - unsigned int __nocast flags) -{ - void *ptr; - size_t kmsize = maxsize; - unsigned int kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP; - - while (!(ptr = kmem_zalloc(kmsize, kmflags))) { - if ((kmsize <= minsize) && (flags & KM_NOSLEEP)) - break; - if ((kmsize >>= 1) <= minsize) { - kmsize = minsize; - kmflags = flags; - } - } - if (ptr) - *size = kmsize; - return ptr; -} - void kmem_free(const void *ptr) { diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 179cbd630f69..f7c8f7a9ea6d 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -21,6 +21,7 @@ #include <linux/slab.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/vmalloc.h> /* * General memory allocation interfaces @@ -30,7 +31,6 @@ #define KM_NOSLEEP 0x0002u #define KM_NOFS 0x0004u #define KM_MAYFAIL 0x0008u -#define KM_LARGE 0x0010u /* * We use a special process flag to avoid recursive callbacks into @@ -42,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags) { gfp_t lflags; - BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE)); + BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); if (flags & KM_NOSLEEP) { lflags = GFP_ATOMIC | __GFP_NOWARN; @@ -56,10 +56,25 @@ kmem_flags_convert(unsigned int __nocast flags) extern void *kmem_alloc(size_t, unsigned int __nocast); extern void *kmem_zalloc(size_t, unsigned int __nocast); -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast); extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); extern void kmem_free(const void *); +static inline void *kmem_zalloc_large(size_t size) +{ + void *ptr; + + ptr = vmalloc(size); + if (ptr) + memset(ptr, 0, size); + return ptr; +} +static inline void kmem_free_large(void *ptr) +{ + vfree(ptr); +} + +extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); + /* * Zone interfaces */ diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index 2512125dfa7c..bf85bbe4a9ae 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -106,7 +106,7 @@ xfs_get_acl(struct inode *inode, int type) struct posix_acl *acl; struct xfs_acl *xfs_acl; int len = sizeof(struct xfs_acl); - char *ea_name; + unsigned char *ea_name; int error; acl = get_cached_acl(inode, type); @@ -133,7 +133,8 @@ xfs_get_acl(struct inode *inode, int type) if (!xfs_acl) return ERR_PTR(-ENOMEM); - error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT); + error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, + &len, ATTR_ROOT); if (error) { /* * If the attribute doesn't exist make sure we have a negative @@ -162,7 +163,7 @@ STATIC int xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) { struct xfs_inode *ip = XFS_I(inode); - char *ea_name; + unsigned char *ea_name; int error; if (S_ISLNK(inode->i_mode)) @@ -194,7 +195,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) (sizeof(struct xfs_acl_entry) * (XFS_ACL_MAX_ENTRIES - acl->a_count)); - error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl, + error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, len, ATTR_ROOT); kfree(xfs_acl); @@ -251,8 +252,9 @@ xfs_set_mode(struct inode *inode, mode_t mode) if (mode != inode->i_mode) { struct iattr iattr; - iattr.ia_valid = ATTR_MODE; + iattr.ia_valid = ATTR_MODE | ATTR_CTIME; iattr.ia_mode = mode; + iattr.ia_ctime = current_fs_time(inode->i_sb); error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); } @@ -261,7 +263,7 @@ xfs_set_mode(struct inode *inode, mode_t mode) } static int -xfs_acl_exists(struct inode *inode, char *name) +xfs_acl_exists(struct inode *inode, unsigned char *name) { int len = sizeof(struct xfs_acl); diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 77b8be81c769..6f76ba85f193 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -33,6 +33,7 @@ #include <linux/migrate.h> #include <linux/backing-dev.h> #include <linux/freezer.h> +#include <linux/list_sort.h> #include "xfs_sb.h" #include "xfs_inum.h" @@ -76,6 +77,27 @@ struct workqueue_struct *xfsconvertd_workqueue; #define xfs_buf_deallocate(bp) \ kmem_zone_free(xfs_buf_zone, (bp)); +static inline int +xfs_buf_is_vmapped( + struct xfs_buf *bp) +{ + /* + * Return true if the buffer is vmapped. + * + * The XBF_MAPPED flag is set if the buffer should be mapped, but the + * code is clever enough to know it doesn't have to map a single page, + * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1. + */ + return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1; +} + +static inline int +xfs_buf_vmap_len( + struct xfs_buf *bp) +{ + return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; +} + /* * Page Region interfaces. * @@ -314,7 +336,7 @@ xfs_buf_free( if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { uint i; - if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) + if (xfs_buf_is_vmapped(bp)) free_address(bp->b_addr - bp->b_offset); for (i = 0; i < bp->b_page_count; i++) { @@ -1051,22 +1073,30 @@ xfs_buf_ioerror( } int -xfs_bawrite( - void *mp, +xfs_bwrite( + struct xfs_mount *mp, struct xfs_buf *bp) { - trace_xfs_buf_bawrite(bp, _RET_IP_); + int iowait = (bp->b_flags & XBF_ASYNC) == 0; + int error = 0; - ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); + bp->b_strat = xfs_bdstrat_cb; + bp->b_mount = mp; + bp->b_flags |= XBF_WRITE; + if (!iowait) + bp->b_flags |= _XBF_RUN_QUEUES; xfs_buf_delwri_dequeue(bp); + xfs_buf_iostrategy(bp); - bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); - bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); + if (iowait) { + error = xfs_buf_iowait(bp); + if (error) + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + xfs_buf_relse(bp); + } - bp->b_mount = mp; - bp->b_strat = xfs_bdstrat_cb; - return xfs_bdstrat_cb(bp); + return error; } void @@ -1085,6 +1115,126 @@ xfs_bdwrite( xfs_buf_delwri_queue(bp, 1); } +/* + * Called when we want to stop a buffer from getting written or read. + * We attach the EIO error, muck with its flags, and call biodone + * so that the proper iodone callbacks get called. + */ +STATIC int +xfs_bioerror( + xfs_buf_t *bp) +{ +#ifdef XFSERRORDEBUG + ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); +#endif + + /* + * No need to wait until the buffer is unpinned, we aren't flushing it. + */ + XFS_BUF_ERROR(bp, EIO); + + /* + * We're calling biodone, so delete XBF_DONE flag. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_UNDONE(bp); + XFS_BUF_STALE(bp); + + XFS_BUF_CLR_BDSTRAT_FUNC(bp); + xfs_biodone(bp); + + return EIO; +} + +/* + * Same as xfs_bioerror, except that we are releasing the buffer + * here ourselves, and avoiding the biodone call. + * This is meant for userdata errors; metadata bufs come with + * iodone functions attached, so that we can track down errors. + */ +STATIC int +xfs_bioerror_relse( + struct xfs_buf *bp) +{ + int64_t fl = XFS_BUF_BFLAGS(bp); + /* + * No need to wait until the buffer is unpinned. + * We aren't flushing it. + * + * chunkhold expects B_DONE to be set, whether + * we actually finish the I/O or not. We don't want to + * change that interface. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_DONE(bp); + XFS_BUF_STALE(bp); + XFS_BUF_CLR_IODONE_FUNC(bp); + XFS_BUF_CLR_BDSTRAT_FUNC(bp); + if (!(fl & XBF_ASYNC)) { + /* + * Mark b_error and B_ERROR _both_. + * Lot's of chunkcache code assumes that. + * There's no reason to mark error for + * ASYNC buffers. + */ + XFS_BUF_ERROR(bp, EIO); + XFS_BUF_FINISH_IOWAIT(bp); + } else { + xfs_buf_relse(bp); + } + + return EIO; +} + + +/* + * All xfs metadata buffers except log state machine buffers + * get this attached as their b_bdstrat callback function. + * This is so that we can catch a buffer + * after prematurely unpinning it to forcibly shutdown the filesystem. + */ +int +xfs_bdstrat_cb( + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + /* + * Metadata write that didn't get logged but + * written delayed anyway. These aren't associated + * with a transaction, and can be ignored. + */ + if (!bp->b_iodone && !XFS_BUF_ISREAD(bp)) + return xfs_bioerror_relse(bp); + else + return xfs_bioerror(bp); + } + + xfs_buf_iorequest(bp); + return 0; +} + +/* + * Wrapper around bdstrat so that we can stop data from going to disk in case + * we are shutting down the filesystem. Typically user data goes thru this + * path; one of the exceptions is the superblock. + */ +void +xfsbdstrat( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(mp)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + xfs_bioerror_relse(bp); + return; + } + + xfs_buf_iorequest(bp); +} + STATIC void _xfs_buf_ioend( xfs_buf_t *bp, @@ -1107,6 +1257,9 @@ xfs_buf_bio_end_io( xfs_buf_ioerror(bp, -error); + if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) + invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); + do { struct page *page = bvec->bv_page; @@ -1216,6 +1369,10 @@ next_chunk: submit_io: if (likely(bio->bi_size)) { + if (xfs_buf_is_vmapped(bp)) { + flush_kernel_vmap_range(bp->b_addr, + xfs_buf_vmap_len(bp)); + } submit_bio(rw, bio); if (size) goto next_chunk; @@ -1296,7 +1453,7 @@ xfs_buf_iomove( xfs_buf_t *bp, /* buffer to process */ size_t boff, /* starting buffer offset */ size_t bsize, /* length to copy */ - caddr_t data, /* data address */ + void *data, /* data address */ xfs_buf_rw_t mode) /* read/write/zero flag */ { size_t bend, cpoff, csize; @@ -1378,8 +1535,8 @@ xfs_alloc_bufhash( btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; - btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * - sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); + btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * + sizeof(xfs_bufhash_t)); for (i = 0; i < (1 << btp->bt_hashshift); i++) { spin_lock_init(&btp->bt_hash[i].bh_lock); INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); @@ -1390,7 +1547,7 @@ STATIC void xfs_free_bufhash( xfs_buftarg_t *btp) { - kmem_free(btp->bt_hash); + kmem_free_large(btp->bt_hash); btp->bt_hash = NULL; } @@ -1595,6 +1752,11 @@ xfs_buf_delwri_queue( list_del(&bp->b_list); } + if (list_empty(dwq)) { + /* start xfsbufd as it is about to have something to do */ + wake_up_process(bp->b_target->bt_task); + } + bp->b_flags |= _XBF_DELWRI_Q; list_add_tail(&bp->b_list, dwq); bp->b_queuetime = jiffies; @@ -1626,6 +1788,35 @@ xfs_buf_delwri_dequeue( trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); } +/* + * If a delwri buffer needs to be pushed before it has aged out, then promote + * it to the head of the delwri queue so that it will be flushed on the next + * xfsbufd run. We do this by resetting the queuetime of the buffer to be older + * than the age currently needed to flush the buffer. Hence the next time the + * xfsbufd sees it is guaranteed to be considered old enough to flush. + */ +void +xfs_buf_delwri_promote( + struct xfs_buf *bp) +{ + struct xfs_buftarg *btp = bp->b_target; + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; + + ASSERT(bp->b_flags & XBF_DELWRI); + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + + /* + * Check the buffer age before locking the delayed write queue as we + * don't need to promote buffers that are already past the flush age. + */ + if (bp->b_queuetime < jiffies - age) + return; + bp->b_queuetime = jiffies - age; + spin_lock(&btp->bt_delwrite_lock); + list_move(&bp->b_list, &btp->bt_delwrite_queue); + spin_unlock(&btp->bt_delwrite_lock); +} + STATIC void xfs_buf_runall_queues( struct workqueue_struct *queue) @@ -1644,6 +1835,8 @@ xfsbufd_wakeup( list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) continue; + if (list_empty(&btp->bt_delwrite_queue)) + continue; set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); wake_up_process(btp->bt_task); } @@ -1694,20 +1887,53 @@ xfs_buf_delwri_split( } +/* + * Compare function is more complex than it needs to be because + * the return value is only 32 bits and we are doing comparisons + * on 64 bit values + */ +static int +xfs_buf_cmp( + void *priv, + struct list_head *a, + struct list_head *b) +{ + struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); + struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); + xfs_daddr_t diff; + + diff = ap->b_bn - bp->b_bn; + if (diff < 0) + return -1; + if (diff > 0) + return 1; + return 0; +} + +void +xfs_buf_delwri_sort( + xfs_buftarg_t *target, + struct list_head *list) +{ + list_sort(NULL, list, xfs_buf_cmp); +} + STATIC int xfsbufd( void *data) { - struct list_head tmp; - xfs_buftarg_t *target = (xfs_buftarg_t *)data; - int count; - xfs_buf_t *bp; + xfs_buftarg_t *target = (xfs_buftarg_t *)data; current->flags |= PF_MEMALLOC; set_freezable(); do { + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); + long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); + int count = 0; + struct list_head tmp; + if (unlikely(freezing(current))) { set_bit(XBT_FORCE_SLEEP, &target->bt_flags); refrigerator(); @@ -1715,17 +1941,16 @@ xfsbufd( clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); } - schedule_timeout_interruptible( - xfs_buf_timer_centisecs * msecs_to_jiffies(10)); + /* sleep for a long time if there is nothing to do. */ + if (list_empty(&target->bt_delwrite_queue)) + tout = MAX_SCHEDULE_TIMEOUT; + schedule_timeout_interruptible(tout); - xfs_buf_delwri_split(target, &tmp, - xfs_buf_age_centisecs * msecs_to_jiffies(10)); - - count = 0; + xfs_buf_delwri_split(target, &tmp, age); + list_sort(NULL, &tmp, xfs_buf_cmp); while (!list_empty(&tmp)) { - bp = list_entry(tmp.next, xfs_buf_t, b_list); - ASSERT(target == bp->b_target); - + struct xfs_buf *bp; + bp = list_first_entry(&tmp, struct xfs_buf, b_list); list_del_init(&bp->b_list); xfs_buf_iostrategy(bp); count++; @@ -1751,42 +1976,45 @@ xfs_flush_buftarg( xfs_buftarg_t *target, int wait) { - struct list_head tmp; - xfs_buf_t *bp, *n; + xfs_buf_t *bp; int pincount = 0; + LIST_HEAD(tmp_list); + LIST_HEAD(wait_list); xfs_buf_runall_queues(xfsconvertd_workqueue); xfs_buf_runall_queues(xfsdatad_workqueue); xfs_buf_runall_queues(xfslogd_workqueue); set_bit(XBT_FORCE_FLUSH, &target->bt_flags); - pincount = xfs_buf_delwri_split(target, &tmp, 0); + pincount = xfs_buf_delwri_split(target, &tmp_list, 0); /* - * Dropped the delayed write list lock, now walk the temporary list + * Dropped the delayed write list lock, now walk the temporary list. + * All I/O is issued async and then if we need to wait for completion + * we do that after issuing all the IO. */ - list_for_each_entry_safe(bp, n, &tmp, b_list) { + list_sort(NULL, &tmp_list, xfs_buf_cmp); + while (!list_empty(&tmp_list)) { + bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); ASSERT(target == bp->b_target); - if (wait) + list_del_init(&bp->b_list); + if (wait) { bp->b_flags &= ~XBF_ASYNC; - else - list_del_init(&bp->b_list); - + list_add(&bp->b_list, &wait_list); + } xfs_buf_iostrategy(bp); } - if (wait) + if (wait) { + /* Expedite and wait for IO to complete. */ blk_run_address_space(target->bt_mapping); + while (!list_empty(&wait_list)) { + bp = list_first_entry(&wait_list, struct xfs_buf, b_list); - /* - * Remaining list items must be flushed before returning - */ - while (!list_empty(&tmp)) { - bp = list_entry(tmp.next, xfs_buf_t, b_list); - - list_del_init(&bp->b_list); - xfs_iowait(bp); - xfs_buf_relse(bp); + list_del_init(&bp->b_list); + xfs_iowait(bp); + xfs_buf_relse(bp); + } } return pincount; diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index a34c7b54822d..386e7361e50e 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -232,13 +232,17 @@ extern void xfs_buf_lock(xfs_buf_t *); extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ -extern int xfs_bawrite(void *mp, xfs_buf_t *bp); +extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); + +extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); +extern int xfs_bdstrat_cb(struct xfs_buf *); + extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); extern int xfs_buf_iorequest(xfs_buf_t *); extern int xfs_buf_iowait(xfs_buf_t *); -extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, +extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, xfs_buf_rw_t); static inline int xfs_buf_iostrategy(xfs_buf_t *bp) @@ -261,6 +265,7 @@ extern int xfs_buf_ispin(xfs_buf_t *); /* Delayed Write Buffer Routines */ extern void xfs_buf_delwri_dequeue(xfs_buf_t *); +extern void xfs_buf_delwri_promote(xfs_buf_t *); /* Buffer Daemon Setup Routines */ extern int xfs_buf_init(void); @@ -270,33 +275,19 @@ extern void xfs_buf_terminate(void); ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) -#define XFS_B_ASYNC XBF_ASYNC -#define XFS_B_DELWRI XBF_DELWRI -#define XFS_B_READ XBF_READ -#define XFS_B_WRITE XBF_WRITE -#define XFS_B_STALE XBF_STALE - -#define XFS_BUF_TRYLOCK XBF_TRYLOCK -#define XFS_INCORE_TRYLOCK XBF_TRYLOCK -#define XFS_BUF_LOCK XBF_LOCK -#define XFS_BUF_MAPPED XBF_MAPPED - -#define BUF_BUSY XBF_DONT_BLOCK - #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) -#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) -#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) -#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) +#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) +#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) +#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) #define XFS_BUF_SUPER_STALE(bp) do { \ XFS_BUF_STALE(bp); \ xfs_buf_delwri_dequeue(bp); \ XFS_BUF_DONE(bp); \ } while (0) -#define XFS_BUF_MANAGE XBF_FS_MANAGED #define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) @@ -385,31 +376,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) #define xfs_biomove(bp, off, len, data, rw) \ xfs_buf_iomove((bp), (off), (len), (data), \ - ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) + ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ) #define xfs_biozero(bp, off, len) \ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) - -static inline int XFS_bwrite(xfs_buf_t *bp) -{ - int iowait = (bp->b_flags & XBF_ASYNC) == 0; - int error = 0; - - if (!iowait) - bp->b_flags |= _XBF_RUN_QUEUES; - - xfs_buf_delwri_dequeue(bp); - xfs_buf_iostrategy(bp); - if (iowait) { - error = xfs_buf_iowait(bp); - xfs_buf_relse(bp); - } - return error; -} - -#define XFS_bdstrat(bp) xfs_buf_iorequest(bp) - #define xfs_iowait(bp) xfs_buf_iowait(bp) #define xfs_baread(target, rablkno, ralen) \ @@ -424,6 +395,7 @@ extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); extern int xfs_flush_buftarg(xfs_buftarg_t *, int); + #ifdef CONFIG_KDB_MODULES extern struct list_head *xfs_get_buftarg_list(void); #endif diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 7501b85fd860..b6918d76bc7b 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -79,7 +79,7 @@ xfs_flush_pages( xfs_iflags_clear(ip, XFS_ITRUNCATED); ret = -filemap_fdatawrite(mapping); } - if (flags & XFS_B_ASYNC) + if (flags & XBF_ASYNC) return ret; ret2 = xfs_wait_on_pages(ip, first, last); if (!ret) diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index a034cf624437..4ea1ee18aded 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -447,12 +447,12 @@ xfs_attrlist_by_handle( int xfs_attrmulti_attr_get( struct inode *inode, - char *name, - char __user *ubuf, + unsigned char *name, + unsigned char __user *ubuf, __uint32_t *len, __uint32_t flags) { - char *kbuf; + unsigned char *kbuf; int error = EFAULT; if (*len > XATTR_SIZE_MAX) @@ -476,12 +476,12 @@ xfs_attrmulti_attr_get( int xfs_attrmulti_attr_set( struct inode *inode, - char *name, - const char __user *ubuf, + unsigned char *name, + const unsigned char __user *ubuf, __uint32_t len, __uint32_t flags) { - char *kbuf; + unsigned char *kbuf; int error = EFAULT; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -501,7 +501,7 @@ xfs_attrmulti_attr_set( int xfs_attrmulti_attr_remove( struct inode *inode, - char *name, + unsigned char *name, __uint32_t flags) { if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -519,7 +519,7 @@ xfs_attrmulti_by_handle( xfs_fsop_attrmulti_handlereq_t am_hreq; struct dentry *dentry; unsigned int i, size; - char *attr_name; + unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); @@ -547,7 +547,7 @@ xfs_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = strncpy_from_user(attr_name, + ops[i].am_error = strncpy_from_user((char *)attr_name, ops[i].am_attrname, MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) error = -ERANGE; @@ -1431,6 +1431,9 @@ xfs_file_ioctl( if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (mp->m_flags & XFS_MOUNT_RDONLY) + return -XFS_ERROR(EROFS); + if (copy_from_user(&inout, arg, sizeof(inout))) return -XFS_ERROR(EFAULT); diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index 7bd7c6afc1eb..d56173b34a2a 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -45,23 +45,23 @@ xfs_readlink_by_handle( extern int xfs_attrmulti_attr_get( struct inode *inode, - char *name, - char __user *ubuf, + unsigned char *name, + unsigned char __user *ubuf, __uint32_t *len, __uint32_t flags); extern int - xfs_attrmulti_attr_set( +xfs_attrmulti_attr_set( struct inode *inode, - char *name, - const char __user *ubuf, + unsigned char *name, + const unsigned char __user *ubuf, __uint32_t len, __uint32_t flags); extern int xfs_attrmulti_attr_remove( struct inode *inode, - char *name, + unsigned char *name, __uint32_t flags); extern struct dentry * diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index be1527b1670c..0bf6d61f0528 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -411,7 +411,7 @@ xfs_compat_attrmulti_by_handle( compat_xfs_fsop_attrmulti_handlereq_t am_hreq; struct dentry *dentry; unsigned int i, size; - char *attr_name; + unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); @@ -440,7 +440,7 @@ xfs_compat_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = strncpy_from_user(attr_name, + ops[i].am_error = strncpy_from_user((char *)attr_name, compat_ptr(ops[i].am_attrname), MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 225946012d0b..e8566bbf0f00 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -140,10 +140,10 @@ xfs_init_security( struct xfs_inode *ip = XFS_I(inode); size_t length; void *value; - char *name; + unsigned char *name; int error; - error = security_inode_init_security(inode, dir, &name, + error = security_inode_init_security(inode, dir, (char **)&name, &value, &length); if (error) { if (error == -EOPNOTSUPP) diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 0d32457abef1..eac6f80d786d 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -630,18 +630,9 @@ start: * by root. This keeps people from modifying setuid and * setgid binaries. */ - - if (((xip->i_d.di_mode & S_ISUID) || - ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == - (S_ISGID | S_IXGRP))) && - !capable(CAP_FSETID)) { - error = xfs_write_clear_setuid(xip); - if (likely(!error)) - error = -file_remove_suid(file); - if (unlikely(error)) { - goto out_unlock_internal; - } - } + error = -file_remove_suid(file); + if (unlikely(error)) + goto out_unlock_internal; /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -784,53 +775,6 @@ write_retry: } /* - * All xfs metadata buffers except log state machine buffers - * get this attached as their b_bdstrat callback function. - * This is so that we can catch a buffer - * after prematurely unpinning it to forcibly shutdown the filesystem. - */ -int -xfs_bdstrat_cb(struct xfs_buf *bp) -{ - if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - /* - * Metadata write that didn't get logged but - * written delayed anyway. These aren't associated - * with a transaction, and can be ignored. - */ - if (XFS_BUF_IODONE_FUNC(bp) == NULL && - (XFS_BUF_ISREAD(bp)) == 0) - return (xfs_bioerror_relse(bp)); - else - return (xfs_bioerror(bp)); - } - - xfs_buf_iorequest(bp); - return 0; -} - -/* - * Wrapper around bdstrat so that we can stop data from going to disk in case - * we are shutting down the filesystem. Typically user data goes thru this - * path; one of the exceptions is the superblock. - */ -void -xfsbdstrat( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - ASSERT(mp); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_buf_iorequest(bp); - return; - } - - trace_xfs_bdstrat_shut(bp, _RET_IP_); - xfs_bioerror_relse(bp); -} - -/* * If the underlying (data/log/rt) device is readonly, there are some * operations that cannot proceed. */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index d1f7789c7ffb..342ae8c0d011 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -22,9 +22,6 @@ struct xfs_mount; struct xfs_inode; struct xfs_buf; -/* errors from xfsbdstrat() must be extracted from the buffer */ -extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -extern int xfs_bdstrat_cb(struct xfs_buf *); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 09783cc444ac..25ea2408118f 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -877,12 +877,11 @@ xfsaild( { struct xfs_ail *ailp = data; xfs_lsn_t last_pushed_lsn = 0; - long tout = 0; + long tout = 0; /* milliseconds */ while (!kthread_should_stop()) { - if (tout) - schedule_timeout_interruptible(msecs_to_jiffies(tout)); - tout = 1000; + schedule_timeout_interruptible(tout ? + msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); /* swsusp */ try_to_freeze(); @@ -954,16 +953,14 @@ xfs_fs_destroy_inode( ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); /* - * If we have nothing to flush with this inode then complete the - * teardown now, otherwise delay the flush operation. + * We always use background reclaim here because even if the + * inode is clean, it still may be under IO and hence we have + * to take the flush lock. The background reclaim path handles + * this more efficiently than we can here, so simply let background + * reclaim tear down all inodes. */ - if (!xfs_inode_clean(ip)) { - xfs_inode_set_reclaim_tag(ip); - return; - } - out_reclaim: - xfs_ireclaim(ip); + xfs_inode_set_reclaim_tag(ip); } /* @@ -1024,12 +1021,45 @@ xfs_fs_dirty_inode( XFS_I(inode)->i_update_core = 1; } -/* - * Attempt to flush the inode, this will actually fail - * if the inode is pinned, but we dirty the inode again - * at the point when it is unpinned after a log write, - * since this is when the inode itself becomes flushable. - */ +STATIC int +xfs_log_inode( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + + if (error) { + xfs_trans_cancel(tp, 0); + /* we need to return with the lock hold shared */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Note - it's possible that we might have pushed ourselves out of the + * way during trans_reserve which would flush the inode. But there's + * no guarantee that the inode buffer has actually gone out yet (it's + * delwri). Plus the buffer could be pinned anyway if it's part of + * an inode in another recent transaction. So we play it safe and + * fire off the transaction anyway. + */ + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + xfs_ilock_demote(ip, XFS_ILOCK_EXCL); + + return error; +} + STATIC int xfs_fs_write_inode( struct inode *inode, @@ -1037,7 +1067,7 @@ xfs_fs_write_inode( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - int error = 0; + int error = EAGAIN; xfs_itrace_entry(ip); @@ -1048,35 +1078,55 @@ xfs_fs_write_inode( error = xfs_wait_on_pages(ip, 0, -1); if (error) goto out; - } - /* - * Bypass inodes which have already been cleaned by - * the inode flush clustering code inside xfs_iflush - */ - if (xfs_inode_clean(ip)) - goto out; - - /* - * We make this non-blocking if the inode is contended, return - * EAGAIN to indicate to the caller that they did not succeed. - * This prevents the flush path from blocking on inodes inside - * another operation right now, they get caught later by xfs_sync. - */ - if (sync) { + /* + * Make sure the inode has hit stable storage. By using the + * log and the fsync transactions we reduce the IOs we have + * to do here from two (log and inode) to just the log. + * + * Note: We still need to do a delwri write of the inode after + * this to flush it to the backing buffer so that bulkstat + * works properly if this is the first time the inode has been + * written. Because we hold the ilock atomically over the + * transaction commit and the inode flush we are guaranteed + * that the inode is not pinned when it returns. If the flush + * lock is already held, then the inode has already been + * flushed once and we don't need to flush it again. Hence + * the code will only flush the inode if it isn't already + * being flushed. + */ xfs_ilock(ip, XFS_ILOCK_SHARED); - xfs_iflock(ip); - - error = xfs_iflush(ip, XFS_IFLUSH_SYNC); + if (ip->i_update_core) { + error = xfs_log_inode(ip); + if (error) + goto out_unlock; + } } else { - error = EAGAIN; + /* + * We make this non-blocking if the inode is contended, return + * EAGAIN to indicate to the caller that they did not succeed. + * This prevents the flush path from blocking on inodes inside + * another operation right now, they get caught later by xfs_sync. + */ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) goto out; - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) - goto out_unlock; + } + + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) + goto out_unlock; - error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK); + /* + * Now we have the flush lock and the inode is not pinned, we can check + * if the inode is really clean as we know that there are no pending + * transaction completions, it is not waiting on the delayed write + * queue and there is no IO in progress. + */ + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + error = 0; + goto out_unlock; } + error = xfs_iflush(ip, 0); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -1259,6 +1309,29 @@ xfs_fs_statfs( return 0; } +STATIC void +xfs_save_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks = 0; + + mp->m_resblks_save = mp->m_resblks; + xfs_reserve_blocks(mp, &resblks, NULL); +} + +STATIC void +xfs_restore_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks; + + if (mp->m_resblks_save) { + resblks = mp->m_resblks_save; + mp->m_resblks_save = 0; + } else + resblks = xfs_default_resblks(mp); + + xfs_reserve_blocks(mp, &resblks, NULL); +} + STATIC int xfs_fs_remount( struct super_block *sb, @@ -1338,11 +1411,27 @@ xfs_fs_remount( } mp->m_update_flags = 0; } + + /* + * Fill out the reserve pool if it is empty. Use the stashed + * value if it is non-zero, otherwise go with the default. + */ + xfs_restore_resvblks(mp); } /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + /* + * After we have synced the data but before we sync the + * metadata, we need to free up the reserve block pool so that + * the used block count in the superblock on disk is correct at + * the end of the remount. Stash the current reserve pool size + * so that if we get remounted rw, we can return it to the same + * size. + */ + xfs_quiesce_data(mp); + xfs_save_resvblks(mp); xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } @@ -1361,11 +1450,22 @@ xfs_fs_freeze( { struct xfs_mount *mp = XFS_M(sb); + xfs_save_resvblks(mp); xfs_quiesce_attr(mp); return -xfs_fs_log_dummy(mp); } STATIC int +xfs_fs_unfreeze( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_restore_resvblks(mp); + return 0; +} + +STATIC int xfs_fs_show_options( struct seq_file *m, struct vfsmount *mnt) @@ -1587,6 +1687,7 @@ static const struct super_operations xfs_super_operations = { .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, + .unfreeze_fs = xfs_fs_unfreeze, .statfs = xfs_fs_statfs, .remount_fs = xfs_fs_remount, .show_options = xfs_fs_show_options, diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 6fed97a8cd3e..a9f6d20aff41 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -65,7 +65,6 @@ xfs_inode_ag_lookup( * as the tree is sparse and a gang lookup walks to find * the number of objects requested. */ - read_lock(&pag->pag_ici_lock); if (tag == XFS_ICI_NO_TAG) { nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void **)&ip, *first_index, 1); @@ -74,7 +73,7 @@ xfs_inode_ag_lookup( (void **)&ip, *first_index, 1, tag); } if (!nr_found) - goto unlock; + return NULL; /* * Update the index for the next lookup. Catch overflows @@ -84,25 +83,20 @@ xfs_inode_ag_lookup( */ *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) - goto unlock; - + return NULL; return ip; - -unlock: - read_unlock(&pag->pag_ici_lock); - return NULL; } STATIC int xfs_inode_ag_walk( struct xfs_mount *mp, - xfs_agnumber_t ag, + struct xfs_perag *pag, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), int flags, - int tag) + int tag, + int exclusive) { - struct xfs_perag *pag = &mp->m_perag[ag]; uint32_t first_index; int last_error = 0; int skipped; @@ -114,10 +108,20 @@ restart: int error = 0; xfs_inode_t *ip; + if (exclusive) + write_lock(&pag->pag_ici_lock); + else + read_lock(&pag->pag_ici_lock); ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); - if (!ip) + if (!ip) { + if (exclusive) + write_unlock(&pag->pag_ici_lock); + else + read_unlock(&pag->pag_ici_lock); break; + } + /* execute releases pag->pag_ici_lock */ error = execute(ip, pag, flags); if (error == EAGAIN) { skipped++; @@ -125,9 +129,8 @@ restart: } if (error) last_error = error; - /* - * bail out if the filesystem is corrupted. - */ + + /* bail out if the filesystem is corrupted. */ if (error == EFSCORRUPTED) break; @@ -137,8 +140,6 @@ restart: delay(1); goto restart; } - - xfs_put_perag(mp, pag); return last_error; } @@ -148,16 +149,24 @@ xfs_inode_ag_iterator( int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), int flags, - int tag) + int tag, + int exclusive) { int error = 0; int last_error = 0; xfs_agnumber_t ag; for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { - if (!mp->m_perag[ag].pag_ici_init) + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, ag); + if (!pag->pag_ici_init) { + xfs_perag_put(pag); continue; - error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); + } + error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, + exclusive); + xfs_perag_put(pag); if (error) { last_error = error; if (error == EFSCORRUPTED) @@ -174,30 +183,31 @@ xfs_sync_inode_valid( struct xfs_perag *pag) { struct inode *inode = VFS_I(ip); + int error = EFSCORRUPTED; /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - read_unlock(&pag->pag_ici_lock); - return EFSCORRUPTED; - } + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + goto out_unlock; - /* - * If we can't get a reference on the inode, it must be in reclaim. - * Leave it for the reclaim code to flush. Also avoid inodes that - * haven't been fully initialised. - */ - if (!igrab(inode)) { - read_unlock(&pag->pag_ici_lock); - return ENOENT; - } - read_unlock(&pag->pag_ici_lock); + /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ + error = ENOENT; + if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + goto out_unlock; + + /* If we can't grab the inode, it must on it's way to reclaim. */ + if (!igrab(inode)) + goto out_unlock; - if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { + if (is_bad_inode(inode)) { IRELE(ip); - return ENOENT; + goto out_unlock; } - return 0; + /* inode is valid */ + error = 0; +out_unlock: + read_unlock(&pag->pag_ici_lock); + return error; } STATIC int @@ -224,7 +234,7 @@ xfs_sync_inode_data( } error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? - 0 : XFS_B_ASYNC, FI_NONE); + 0 : XBF_ASYNC, FI_NONE); xfs_iunlock(ip, XFS_IOLOCK_SHARED); out_wait: @@ -260,8 +270,7 @@ xfs_sync_inode_attr( goto out_unlock; } - error = xfs_iflush(ip, (flags & SYNC_WAIT) ? - XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI); + error = xfs_iflush(ip, flags); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -282,14 +291,11 @@ xfs_sync_data( ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, - XFS_ICI_NO_TAG); + XFS_ICI_NO_TAG, 0); if (error) return XFS_ERROR(error); - xfs_log_force(mp, 0, - (flags & SYNC_WAIT) ? - XFS_LOG_FORCE | XFS_LOG_SYNC : - XFS_LOG_FORCE); + xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); return 0; } @@ -304,7 +310,7 @@ xfs_sync_attr( ASSERT((flags & ~SYNC_WAIT) == 0); return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, - XFS_ICI_NO_TAG); + XFS_ICI_NO_TAG, 0); } STATIC int @@ -315,10 +321,6 @@ xfs_commit_dummy_trans( struct xfs_inode *ip = mp->m_rootip; struct xfs_trans *tp; int error; - int log_flags = XFS_LOG_FORCE; - - if (flags & SYNC_WAIT) - log_flags |= XFS_LOG_SYNC; /* * Put a dummy transaction in the log to tell recovery @@ -340,11 +342,11 @@ xfs_commit_dummy_trans( xfs_iunlock(ip, XFS_ILOCK_EXCL); /* the log force ensures this transaction is pushed to disk */ - xfs_log_force(mp, 0, log_flags); + xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); return error; } -int +STATIC int xfs_sync_fsdata( struct xfs_mount *mp, int flags) @@ -360,7 +362,7 @@ xfs_sync_fsdata( if (flags & SYNC_TRYLOCK) { ASSERT(!(flags & SYNC_WAIT)); - bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); + bp = xfs_getsb(mp, XBF_TRYLOCK); if (!bp) goto out; @@ -380,7 +382,7 @@ xfs_sync_fsdata( * become pinned in between there and here. */ if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, 0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); } @@ -441,9 +443,6 @@ xfs_quiesce_data( xfs_sync_data(mp, SYNC_WAIT); xfs_qm_sync(mp, SYNC_WAIT); - /* drop inode references pinned by filestreams */ - xfs_filestream_flush(mp); - /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp, SYNC_WAIT); @@ -460,16 +459,18 @@ xfs_quiesce_fs( { int count = 0, pincount; + xfs_reclaim_inodes(mp, 0); xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* * This loop must run at least twice. The first instance of the loop * will flush most meta data but that will generate more meta data * (typically directory updates). Which then must be flushed and - * logged before we can write the unmount record. + * logged before we can write the unmount record. We also so sync + * reclaim of inodes to catch any that the above delwri flush skipped. */ do { + xfs_reclaim_inodes(mp, SYNC_WAIT); xfs_sync_attr(mp, SYNC_WAIT); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { @@ -568,7 +569,7 @@ xfs_flush_inodes( igrab(inode); xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); wait_for_completion(&completion); - xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_log_force(ip->i_mount, XFS_LOG_SYNC); } /* @@ -584,8 +585,8 @@ xfs_sync_worker( int error; if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_log_force(mp, 0); + xfs_reclaim_inodes(mp, 0); /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); @@ -664,60 +665,6 @@ xfs_syncd_stop( kthread_stop(mp->m_sync_task); } -STATIC int -xfs_reclaim_inode( - xfs_inode_t *ip, - int sync_mode) -{ - xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); - - /* The hash lock here protects a thread in xfs_iget_core from - * racing with us on linking the inode back with a vnode. - * Once we have the XFS_IRECLAIM flag set it will not touch - * us. - */ - write_lock(&pag->pag_ici_lock); - spin_lock(&ip->i_flags_lock); - if (__xfs_iflags_test(ip, XFS_IRECLAIM) || - !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - return -EAGAIN; - } - __xfs_iflags_set(ip, XFS_IRECLAIM); - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - xfs_put_perag(ip->i_mount, pag); - - /* - * If the inode is still dirty, then flush it out. If the inode - * is not in the AIL, then it will be OK to flush it delwri as - * long as xfs_iflush() does not keep any references to the inode. - * We leave that decision up to xfs_iflush() since it has the - * knowledge of whether it's OK to simply do a delwri flush of - * the inode or whether we need to wait until the inode is - * pulled from the AIL. - * We get the flush lock regardless, though, just to make sure - * we don't free it while it is being flushed. - */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_iflock(ip); - - /* - * In the case of a forced shutdown we rely on xfs_iflush() to - * wait for the inode to be unpinned before returning an error. - */ - if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { - /* synchronize with xfs_iflush_done */ - xfs_iflock(ip); - xfs_ifunlock(ip); - } - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_ireclaim(ip); - return 0; -} - void __xfs_inode_set_reclaim_tag( struct xfs_perag *pag, @@ -737,16 +684,17 @@ void xfs_inode_set_reclaim_tag( xfs_inode_t *ip) { - xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); read_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); __xfs_inode_set_reclaim_tag(pag, ip); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); + xfs_perag_put(pag); } void @@ -759,20 +707,145 @@ __xfs_inode_clear_reclaim_tag( XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); } +/* + * Inodes in different states need to be treated differently, and the return + * value of xfs_iflush is not sufficient to get this right. The following table + * lists the inode states and the reclaim actions necessary for non-blocking + * reclaim: + * + * + * inode state iflush ret required action + * --------------- ---------- --------------- + * bad - reclaim + * shutdown EIO unpin and reclaim + * clean, unpinned 0 reclaim + * stale, unpinned 0 reclaim + * clean, pinned(*) 0 requeue + * stale, pinned EAGAIN requeue + * dirty, delwri ok 0 requeue + * dirty, delwri blocked EAGAIN requeue + * dirty, sync flush 0 reclaim + * + * (*) dgc: I don't think the clean, pinned state is possible but it gets + * handled anyway given the order of checks implemented. + * + * As can be seen from the table, the return value of xfs_iflush() is not + * sufficient to correctly decide the reclaim action here. The checks in + * xfs_iflush() might look like duplicates, but they are not. + * + * Also, because we get the flush lock first, we know that any inode that has + * been flushed delwri has had the flush completed by the time we check that + * the inode is clean. The clean inode check needs to be done before flushing + * the inode delwri otherwise we would loop forever requeuing clean inodes as + * we cannot tell apart a successful delwri flush and a clean inode from the + * return value of xfs_iflush(). + * + * Note that because the inode is flushed delayed write by background + * writeback, the flush lock may already be held here and waiting on it can + * result in very long latencies. Hence for sync reclaims, where we wait on the + * flush lock, the caller should push out delayed write inodes first before + * trying to reclaim them to minimise the amount of time spent waiting. For + * background relaim, we just requeue the inode for the next pass. + * + * Hence the order of actions after gaining the locks should be: + * bad => reclaim + * shutdown => unpin and reclaim + * pinned, delwri => requeue + * pinned, sync => unpin + * stale => reclaim + * clean => reclaim + * dirty, delwri => flush and requeue + * dirty, sync => flush, wait and reclaim + */ STATIC int -xfs_reclaim_inode_now( +xfs_reclaim_inode( struct xfs_inode *ip, struct xfs_perag *pag, - int flags) + int sync_mode) { - /* ignore if already under reclaim */ - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { - read_unlock(&pag->pag_ici_lock); + int error = 0; + + /* + * The radix tree lock here protects a thread in xfs_iget from racing + * with us starting reclaim on the inode. Once we have the + * XFS_IRECLAIM flag set it will not touch us. + */ + spin_lock(&ip->i_flags_lock); + ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { + /* ignore as it is already under reclaim */ + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); return 0; } - read_unlock(&pag->pag_ici_lock); + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (!xfs_iflock_nowait(ip)) { + if (!(sync_mode & SYNC_WAIT)) + goto out; + xfs_iflock(ip); + } + + if (is_bad_inode(VFS_I(ip))) + goto reclaim; + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_iunpin_wait(ip); + goto reclaim; + } + if (xfs_ipincount(ip)) { + if (!(sync_mode & SYNC_WAIT)) { + xfs_ifunlock(ip); + goto out; + } + xfs_iunpin_wait(ip); + } + if (xfs_iflags_test(ip, XFS_ISTALE)) + goto reclaim; + if (xfs_inode_clean(ip)) + goto reclaim; + + /* Now we have an inode that needs flushing */ + error = xfs_iflush(ip, sync_mode); + if (sync_mode & SYNC_WAIT) { + xfs_iflock(ip); + goto reclaim; + } + + /* + * When we have to flush an inode but don't have SYNC_WAIT set, we + * flush the inode out using a delwri buffer and wait for the next + * call into reclaim to find it in a clean state instead of waiting for + * it now. We also don't return errors here - if the error is transient + * then the next reclaim pass will flush the inode, and if the error + * is permanent then the next sync reclaim will relcaim the inode and + * pass on the error. + */ + if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "inode 0x%llx background reclaim flush failed with %d", + (long long)ip->i_ino, error); + } +out: + xfs_iflags_clear(ip, XFS_IRECLAIM); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* + * We could return EAGAIN here to make reclaim rescan the inode tree in + * a short while. However, this just burns CPU time scanning the tree + * waiting for IO to complete and xfssyncd never goes back to the idle + * state. Instead, return 0 to let the next scheduled background reclaim + * attempt to reclaim the inode again. + */ + return 0; + +reclaim: + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_ireclaim(ip); + return error; - return xfs_reclaim_inode(ip, flags); } int @@ -780,6 +853,6 @@ xfs_reclaim_inodes( xfs_mount_t *mp, int mode) { - return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, - XFS_ICI_RECLAIM_TAG); + return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, + XFS_ICI_RECLAIM_TAG, 1); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index a500b4d91835..d480c346cabb 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -37,7 +37,6 @@ void xfs_syncd_stop(struct xfs_mount *mp); int xfs_sync_attr(struct xfs_mount *mp, int flags); int xfs_sync_data(struct xfs_mount *mp, int flags); -int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); void xfs_quiesce_attr(struct xfs_mount *mp); @@ -54,6 +53,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, int tag); + int flags, int tag, int write_lock); #endif diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index c40834bdee58..a4574dcf5065 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -33,51 +33,82 @@ struct xfs_dquot; struct xlog_ticket; struct log; -#define DEFINE_ATTR_LIST_EVENT(name) \ +DECLARE_EVENT_CLASS(xfs_attr_list_class, + TP_PROTO(struct xfs_attr_list_context *ctx), + TP_ARGS(ctx), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u32, hashval) + __field(u32, blkno) + __field(u32, offset) + __field(void *, alist) + __field(int, bufsize) + __field(int, count) + __field(int, firstu) + __field(int, dupcnt) + __field(int, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; + __entry->ino = ctx->dp->i_ino; + __entry->hashval = ctx->cursor->hashval; + __entry->blkno = ctx->cursor->blkno; + __entry->offset = ctx->cursor->offset; + __entry->alist = ctx->alist; + __entry->bufsize = ctx->bufsize; + __entry->count = ctx->count; + __entry->firstu = ctx->firstu; + __entry->flags = ctx->flags; + ), + TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " + "alist 0x%p size %u count %u firstu %u flags %d %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->hashval, + __entry->blkno, + __entry->offset, + __entry->dupcnt, + __entry->alist, + __entry->bufsize, + __entry->count, + __entry->firstu, + __entry->flags, + __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) + ) +) + +#define DEFINE_PERAG_REF_EVENT(name) \ TRACE_EVENT(name, \ - TP_PROTO(struct xfs_attr_list_context *ctx), \ - TP_ARGS(ctx), \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ + unsigned long caller_ip), \ + TP_ARGS(mp, agno, refcount, caller_ip), \ TP_STRUCT__entry( \ __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(u32, hashval) \ - __field(u32, blkno) \ - __field(u32, offset) \ - __field(void *, alist) \ - __field(int, bufsize) \ - __field(int, count) \ - __field(int, firstu) \ - __field(int, dupcnt) \ - __field(int, flags) \ + __field(xfs_agnumber_t, agno) \ + __field(int, refcount) \ + __field(unsigned long, caller_ip) \ ), \ TP_fast_assign( \ - __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; \ - __entry->ino = ctx->dp->i_ino; \ - __entry->hashval = ctx->cursor->hashval; \ - __entry->blkno = ctx->cursor->blkno; \ - __entry->offset = ctx->cursor->offset; \ - __entry->alist = ctx->alist; \ - __entry->bufsize = ctx->bufsize; \ - __entry->count = ctx->count; \ - __entry->firstu = ctx->firstu; \ - __entry->flags = ctx->flags; \ + __entry->dev = mp->m_super->s_dev; \ + __entry->agno = agno; \ + __entry->refcount = refcount; \ + __entry->caller_ip = caller_ip; \ ), \ - TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " \ - "alist 0x%p size %u count %u firstu %u flags %d %s", \ + TP_printk("dev %d:%d agno %u refcount %d caller %pf", \ MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->hashval, \ - __entry->blkno, \ - __entry->offset, \ - __entry->dupcnt, \ - __entry->alist, \ - __entry->bufsize, \ - __entry->count, \ - __entry->firstu, \ - __entry->flags, \ - __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) \ - ) \ -) + __entry->agno, \ + __entry->refcount, \ + (char *)__entry->caller_ip) \ +); + +DEFINE_PERAG_REF_EVENT(xfs_perag_get) +DEFINE_PERAG_REF_EVENT(xfs_perag_put) + +#define DEFINE_ATTR_LIST_EVENT(name) \ +DEFINE_EVENT(xfs_attr_list_class, name, \ + TP_PROTO(struct xfs_attr_list_context *ctx), \ + TP_ARGS(ctx)) DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf); @@ -178,91 +209,99 @@ TRACE_EVENT(xfs_iext_insert, (char *)__entry->caller_ip) ); +DECLARE_EVENT_CLASS(xfs_bmap_class, + TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, + unsigned long caller_ip), + TP_ARGS(ip, idx, state, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_extnum_t, idx) + __field(xfs_fileoff_t, startoff) + __field(xfs_fsblock_t, startblock) + __field(xfs_filblks_t, blockcount) + __field(xfs_exntst_t, state) + __field(int, bmap_state) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? + ip->i_afp : &ip->i_df; + struct xfs_bmbt_irec r; + + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->idx = idx; + __entry->startoff = r.br_startoff; + __entry->startblock = r.br_startblock; + __entry->blockcount = r.br_blockcount; + __entry->state = r.br_state; + __entry->bmap_state = state; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " + "offset %lld block %s count %lld flag %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), + (long)__entry->idx, + __entry->startoff, + xfs_fmtfsblock(__entry->startblock), + __entry->blockcount, + __entry->state, + (char *)__entry->caller_ip) +) + #define DEFINE_BMAP_EVENT(name) \ -TRACE_EVENT(name, \ +DEFINE_EVENT(xfs_bmap_class, name, \ TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ unsigned long caller_ip), \ - TP_ARGS(ip, idx, state, caller_ip), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(xfs_extnum_t, idx) \ - __field(xfs_fileoff_t, startoff) \ - __field(xfs_fsblock_t, startblock) \ - __field(xfs_filblks_t, blockcount) \ - __field(xfs_exntst_t, state) \ - __field(int, bmap_state) \ - __field(unsigned long, caller_ip) \ - ), \ - TP_fast_assign( \ - struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? \ - ip->i_afp : &ip->i_df; \ - struct xfs_bmbt_irec r; \ - \ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); \ - __entry->dev = VFS_I(ip)->i_sb->s_dev; \ - __entry->ino = ip->i_ino; \ - __entry->idx = idx; \ - __entry->startoff = r.br_startoff; \ - __entry->startblock = r.br_startblock; \ - __entry->blockcount = r.br_blockcount; \ - __entry->state = r.br_state; \ - __entry->bmap_state = state; \ - __entry->caller_ip = caller_ip; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " \ - "offset %lld block %s count %lld flag %d caller %pf", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), \ - (long)__entry->idx, \ - __entry->startoff, \ - xfs_fmtfsblock(__entry->startblock), \ - __entry->blockcount, \ - __entry->state, \ - (char *)__entry->caller_ip) \ -) - + TP_ARGS(ip, idx, state, caller_ip)) DEFINE_BMAP_EVENT(xfs_iext_remove); DEFINE_BMAP_EVENT(xfs_bmap_pre_update); DEFINE_BMAP_EVENT(xfs_bmap_post_update); DEFINE_BMAP_EVENT(xfs_extlist); -#define DEFINE_BUF_EVENT(tname) \ -TRACE_EVENT(tname, \ - TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ - TP_ARGS(bp, caller_ip), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_daddr_t, bno) \ - __field(size_t, buffer_length) \ - __field(int, hold) \ - __field(int, pincount) \ - __field(unsigned, lockval) \ - __field(unsigned, flags) \ - __field(unsigned long, caller_ip) \ - ), \ - TP_fast_assign( \ - __entry->dev = bp->b_target->bt_dev; \ - __entry->bno = bp->b_bn; \ - __entry->buffer_length = bp->b_buffer_length; \ - __entry->hold = atomic_read(&bp->b_hold); \ - __entry->pincount = atomic_read(&bp->b_pin_count); \ - __entry->lockval = xfs_buf_lock_value(bp); \ - __entry->flags = bp->b_flags; \ - __entry->caller_ip = caller_ip; \ - ), \ - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ - "lock %d flags %s caller %pf", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - (unsigned long long)__entry->bno, \ - __entry->buffer_length, \ - __entry->hold, \ - __entry->pincount, \ - __entry->lockval, \ - __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \ - (void *)__entry->caller_ip) \ +DECLARE_EVENT_CLASS(xfs_buf_class, + TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), + TP_ARGS(bp, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, bno) + __field(size_t, buffer_length) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(unsigned, flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = bp->b_target->bt_dev; + __entry->bno = bp->b_bn; + __entry->buffer_length = bp->b_buffer_length; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = xfs_buf_lock_value(bp); + __entry->flags = bp->b_flags; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->bno, + __entry->buffer_length, + __entry->hold, + __entry->pincount, + __entry->lockval, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), + (void *)__entry->caller_ip) ) + +#define DEFINE_BUF_EVENT(name) \ +DEFINE_EVENT(xfs_buf_class, name, \ + TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ + TP_ARGS(bp, caller_ip)) DEFINE_BUF_EVENT(xfs_buf_init); DEFINE_BUF_EVENT(xfs_buf_free); DEFINE_BUF_EVENT(xfs_buf_hold); @@ -299,41 +338,45 @@ DEFINE_BUF_EVENT(xfs_reset_dqcounts); DEFINE_BUF_EVENT(xfs_inode_item_push); /* pass flags explicitly */ -#define DEFINE_BUF_FLAGS_EVENT(tname) \ -TRACE_EVENT(tname, \ - TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ - TP_ARGS(bp, flags, caller_ip), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_daddr_t, bno) \ - __field(size_t, buffer_length) \ - __field(int, hold) \ - __field(int, pincount) \ - __field(unsigned, lockval) \ - __field(unsigned, flags) \ - __field(unsigned long, caller_ip) \ - ), \ - TP_fast_assign( \ - __entry->dev = bp->b_target->bt_dev; \ - __entry->bno = bp->b_bn; \ - __entry->buffer_length = bp->b_buffer_length; \ - __entry->flags = flags; \ - __entry->hold = atomic_read(&bp->b_hold); \ - __entry->pincount = atomic_read(&bp->b_pin_count); \ - __entry->lockval = xfs_buf_lock_value(bp); \ - __entry->caller_ip = caller_ip; \ - ), \ - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ - "lock %d flags %s caller %pf", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - (unsigned long long)__entry->bno, \ - __entry->buffer_length, \ - __entry->hold, \ - __entry->pincount, \ - __entry->lockval, \ - __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \ - (void *)__entry->caller_ip) \ +DECLARE_EVENT_CLASS(xfs_buf_flags_class, + TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), + TP_ARGS(bp, flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, bno) + __field(size_t, buffer_length) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(unsigned, flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = bp->b_target->bt_dev; + __entry->bno = bp->b_bn; + __entry->buffer_length = bp->b_buffer_length; + __entry->flags = flags; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = xfs_buf_lock_value(bp); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->bno, + __entry->buffer_length, + __entry->hold, + __entry->pincount, + __entry->lockval, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), + (void *)__entry->caller_ip) ) + +#define DEFINE_BUF_FLAGS_EVENT(name) \ +DEFINE_EVENT(xfs_buf_flags_class, name, \ + TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ + TP_ARGS(bp, flags, caller_ip)) DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); @@ -376,55 +419,58 @@ TRACE_EVENT(xfs_buf_ioerror, (void *)__entry->caller_ip) ); -#define DEFINE_BUF_ITEM_EVENT(tname) \ -TRACE_EVENT(tname, \ - TP_PROTO(struct xfs_buf_log_item *bip), \ - TP_ARGS(bip), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_daddr_t, buf_bno) \ - __field(size_t, buf_len) \ - __field(int, buf_hold) \ - __field(int, buf_pincount) \ - __field(int, buf_lockval) \ - __field(unsigned, buf_flags) \ - __field(unsigned, bli_recur) \ - __field(int, bli_refcount) \ - __field(unsigned, bli_flags) \ - __field(void *, li_desc) \ - __field(unsigned, li_flags) \ - ), \ - TP_fast_assign( \ - __entry->dev = bip->bli_buf->b_target->bt_dev; \ - __entry->bli_flags = bip->bli_flags; \ - __entry->bli_recur = bip->bli_recur; \ - __entry->bli_refcount = atomic_read(&bip->bli_refcount); \ - __entry->buf_bno = bip->bli_buf->b_bn; \ - __entry->buf_len = bip->bli_buf->b_buffer_length; \ - __entry->buf_flags = bip->bli_buf->b_flags; \ - __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); \ - __entry->buf_pincount = \ - atomic_read(&bip->bli_buf->b_pin_count); \ - __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); \ - __entry->li_desc = bip->bli_item.li_desc; \ - __entry->li_flags = bip->bli_item.li_flags; \ - ), \ - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ - "lock %d flags %s recur %d refcount %d bliflags %s " \ - "lidesc 0x%p liflags %s", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - (unsigned long long)__entry->buf_bno, \ - __entry->buf_len, \ - __entry->buf_hold, \ - __entry->buf_pincount, \ - __entry->buf_lockval, \ - __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), \ - __entry->bli_recur, \ - __entry->bli_refcount, \ - __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), \ - __entry->li_desc, \ - __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) \ +DECLARE_EVENT_CLASS(xfs_buf_item_class, + TP_PROTO(struct xfs_buf_log_item *bip), + TP_ARGS(bip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, buf_bno) + __field(size_t, buf_len) + __field(int, buf_hold) + __field(int, buf_pincount) + __field(int, buf_lockval) + __field(unsigned, buf_flags) + __field(unsigned, bli_recur) + __field(int, bli_refcount) + __field(unsigned, bli_flags) + __field(void *, li_desc) + __field(unsigned, li_flags) + ), + TP_fast_assign( + __entry->dev = bip->bli_buf->b_target->bt_dev; + __entry->bli_flags = bip->bli_flags; + __entry->bli_recur = bip->bli_recur; + __entry->bli_refcount = atomic_read(&bip->bli_refcount); + __entry->buf_bno = bip->bli_buf->b_bn; + __entry->buf_len = bip->bli_buf->b_buffer_length; + __entry->buf_flags = bip->bli_buf->b_flags; + __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); + __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); + __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); + __entry->li_desc = bip->bli_item.li_desc; + __entry->li_flags = bip->bli_item.li_flags; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s recur %d refcount %d bliflags %s " + "lidesc 0x%p liflags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->buf_bno, + __entry->buf_len, + __entry->buf_hold, + __entry->buf_pincount, + __entry->buf_lockval, + __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), + __entry->bli_recur, + __entry->bli_refcount, + __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), + __entry->li_desc, + __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) ) + +#define DEFINE_BUF_ITEM_EVENT(name) \ +DEFINE_EVENT(xfs_buf_item_class, name, \ + TP_PROTO(struct xfs_buf_log_item *bip), \ + TP_ARGS(bip)) DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); @@ -437,6 +483,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); @@ -450,78 +497,90 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); +DECLARE_EVENT_CLASS(xfs_lock_class, + TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, + unsigned long caller_ip), + TP_ARGS(ip, lock_flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, lock_flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->lock_flags = lock_flags; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), + (void *)__entry->caller_ip) +) + #define DEFINE_LOCK_EVENT(name) \ -TRACE_EVENT(name, \ +DEFINE_EVENT(xfs_lock_class, name, \ TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \ unsigned long caller_ip), \ - TP_ARGS(ip, lock_flags, caller_ip), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(int, lock_flags) \ - __field(unsigned long, caller_ip) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(ip)->i_sb->s_dev; \ - __entry->ino = ip->i_ino; \ - __entry->lock_flags = lock_flags; \ - __entry->caller_ip = caller_ip; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), \ - (void *)__entry->caller_ip) \ -) - + TP_ARGS(ip, lock_flags, caller_ip)) DEFINE_LOCK_EVENT(xfs_ilock); DEFINE_LOCK_EVENT(xfs_ilock_nowait); DEFINE_LOCK_EVENT(xfs_ilock_demote); DEFINE_LOCK_EVENT(xfs_iunlock); +DECLARE_EVENT_CLASS(xfs_iget_class, + TP_PROTO(struct xfs_inode *ip), + TP_ARGS(ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + ), + TP_printk("dev %d:%d ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino) +) + #define DEFINE_IGET_EVENT(name) \ -TRACE_EVENT(name, \ +DEFINE_EVENT(xfs_iget_class, name, \ TP_PROTO(struct xfs_inode *ip), \ - TP_ARGS(ip), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(ip)->i_sb->s_dev; \ - __entry->ino = ip->i_ino; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino) \ -) + TP_ARGS(ip)) DEFINE_IGET_EVENT(xfs_iget_skip); DEFINE_IGET_EVENT(xfs_iget_reclaim); DEFINE_IGET_EVENT(xfs_iget_found); DEFINE_IGET_EVENT(xfs_iget_alloc); +DECLARE_EVENT_CLASS(xfs_inode_class, + TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), + TP_ARGS(ip, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, count) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->count = atomic_read(&VFS_I(ip)->i_count); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->count, + (char *)__entry->caller_ip) +) + #define DEFINE_INODE_EVENT(name) \ -TRACE_EVENT(name, \ +DEFINE_EVENT(xfs_inode_class, name, \ TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ - TP_ARGS(ip, caller_ip), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(int, count) \ - __field(unsigned long, caller_ip) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(ip)->i_sb->s_dev; \ - __entry->ino = ip->i_ino; \ - __entry->count = atomic_read(&VFS_I(ip)->i_count); \ - __entry->caller_ip = caller_ip; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->count, \ - (char *)__entry->caller_ip) \ -) + TP_ARGS(ip, caller_ip)) DEFINE_INODE_EVENT(xfs_ihold); DEFINE_INODE_EVENT(xfs_irele); /* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ @@ -529,55 +588,59 @@ DEFINE_INODE_EVENT(xfs_inode); #define xfs_itrace_entry(ip) \ trace_xfs_inode(ip, _THIS_IP_) -#define DEFINE_DQUOT_EVENT(tname) \ -TRACE_EVENT(tname, \ - TP_PROTO(struct xfs_dquot *dqp), \ - TP_ARGS(dqp), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(__be32, id) \ - __field(unsigned, flags) \ - __field(unsigned, nrefs) \ - __field(unsigned long long, res_bcount) \ - __field(unsigned long long, bcount) \ - __field(unsigned long long, icount) \ - __field(unsigned long long, blk_hardlimit) \ - __field(unsigned long long, blk_softlimit) \ - __field(unsigned long long, ino_hardlimit) \ - __field(unsigned long long, ino_softlimit) \ - ), \ - TP_fast_assign( \ - __entry->dev = dqp->q_mount->m_super->s_dev; \ - __entry->id = dqp->q_core.d_id; \ - __entry->flags = dqp->dq_flags; \ - __entry->nrefs = dqp->q_nrefs; \ - __entry->res_bcount = dqp->q_res_bcount; \ - __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); \ - __entry->icount = be64_to_cpu(dqp->q_core.d_icount); \ - __entry->blk_hardlimit = \ - be64_to_cpu(dqp->q_core.d_blk_hardlimit); \ - __entry->blk_softlimit = \ - be64_to_cpu(dqp->q_core.d_blk_softlimit); \ - __entry->ino_hardlimit = \ - be64_to_cpu(dqp->q_core.d_ino_hardlimit); \ - __entry->ino_softlimit = \ - be64_to_cpu(dqp->q_core.d_ino_softlimit); \ +DECLARE_EVENT_CLASS(xfs_dquot_class, + TP_PROTO(struct xfs_dquot *dqp), + TP_ARGS(dqp), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(__be32, id) + __field(unsigned, flags) + __field(unsigned, nrefs) + __field(unsigned long long, res_bcount) + __field(unsigned long long, bcount) + __field(unsigned long long, icount) + __field(unsigned long long, blk_hardlimit) + __field(unsigned long long, blk_softlimit) + __field(unsigned long long, ino_hardlimit) + __field(unsigned long long, ino_softlimit) ), \ - TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " \ - "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] " \ - "icnt 0x%llx [hard 0x%llx | soft 0x%llx]", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - be32_to_cpu(__entry->id), \ - __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), \ - __entry->nrefs, \ - __entry->res_bcount, \ - __entry->bcount, \ - __entry->blk_hardlimit, \ - __entry->blk_softlimit, \ - __entry->icount, \ - __entry->ino_hardlimit, \ - __entry->ino_softlimit) \ + TP_fast_assign( + __entry->dev = dqp->q_mount->m_super->s_dev; + __entry->id = dqp->q_core.d_id; + __entry->flags = dqp->dq_flags; + __entry->nrefs = dqp->q_nrefs; + __entry->res_bcount = dqp->q_res_bcount; + __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); + __entry->icount = be64_to_cpu(dqp->q_core.d_icount); + __entry->blk_hardlimit = + be64_to_cpu(dqp->q_core.d_blk_hardlimit); + __entry->blk_softlimit = + be64_to_cpu(dqp->q_core.d_blk_softlimit); + __entry->ino_hardlimit = + be64_to_cpu(dqp->q_core.d_ino_hardlimit); + __entry->ino_softlimit = + be64_to_cpu(dqp->q_core.d_ino_softlimit); + ), + TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " + "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] " + "icnt 0x%llx [hard 0x%llx | soft 0x%llx]", + MAJOR(__entry->dev), MINOR(__entry->dev), + be32_to_cpu(__entry->id), + __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), + __entry->nrefs, + __entry->res_bcount, + __entry->bcount, + __entry->blk_hardlimit, + __entry->blk_softlimit, + __entry->icount, + __entry->ino_hardlimit, + __entry->ino_softlimit) ) + +#define DEFINE_DQUOT_EVENT(name) \ +DEFINE_EVENT(xfs_dquot_class, name, \ + TP_PROTO(struct xfs_dquot *dqp), \ + TP_ARGS(dqp)) DEFINE_DQUOT_EVENT(xfs_dqadjust); DEFINE_DQUOT_EVENT(xfs_dqshake_dirty); DEFINE_DQUOT_EVENT(xfs_dqshake_unlink); @@ -610,72 +673,75 @@ DEFINE_DQUOT_EVENT(xfs_dqflush_done); DEFINE_IGET_EVENT(xfs_dquot_dqalloc); DEFINE_IGET_EVENT(xfs_dquot_dqdetach); +DECLARE_EVENT_CLASS(xfs_loggrant_class, + TP_PROTO(struct log *log, struct xlog_ticket *tic), + TP_ARGS(log, tic), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned, trans_type) + __field(char, ocnt) + __field(char, cnt) + __field(int, curr_res) + __field(int, unit_res) + __field(unsigned int, flags) + __field(void *, reserve_headq) + __field(void *, write_headq) + __field(int, grant_reserve_cycle) + __field(int, grant_reserve_bytes) + __field(int, grant_write_cycle) + __field(int, grant_write_bytes) + __field(int, curr_cycle) + __field(int, curr_block) + __field(xfs_lsn_t, tail_lsn) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->trans_type = tic->t_trans_type; + __entry->ocnt = tic->t_ocnt; + __entry->cnt = tic->t_cnt; + __entry->curr_res = tic->t_curr_res; + __entry->unit_res = tic->t_unit_res; + __entry->flags = tic->t_flags; + __entry->reserve_headq = log->l_reserve_headq; + __entry->write_headq = log->l_write_headq; + __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; + __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; + __entry->grant_write_cycle = log->l_grant_write_cycle; + __entry->grant_write_bytes = log->l_grant_write_bytes; + __entry->curr_cycle = log->l_curr_cycle; + __entry->curr_block = log->l_curr_block; + __entry->tail_lsn = log->l_tail_lsn; + ), + TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " + "t_unit_res %u t_flags %s reserve_headq 0x%p " + "write_headq 0x%p grant_reserve_cycle %d " + "grant_reserve_bytes %d grant_write_cycle %d " + "grant_write_bytes %d curr_cycle %d curr_block %d " + "tail_cycle %d tail_block %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), + __entry->ocnt, + __entry->cnt, + __entry->curr_res, + __entry->unit_res, + __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), + __entry->reserve_headq, + __entry->write_headq, + __entry->grant_reserve_cycle, + __entry->grant_reserve_bytes, + __entry->grant_write_cycle, + __entry->grant_write_bytes, + __entry->curr_cycle, + __entry->curr_block, + CYCLE_LSN(__entry->tail_lsn), + BLOCK_LSN(__entry->tail_lsn) + ) +) -#define DEFINE_LOGGRANT_EVENT(tname) \ -TRACE_EVENT(tname, \ +#define DEFINE_LOGGRANT_EVENT(name) \ +DEFINE_EVENT(xfs_loggrant_class, name, \ TP_PROTO(struct log *log, struct xlog_ticket *tic), \ - TP_ARGS(log, tic), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(unsigned, trans_type) \ - __field(char, ocnt) \ - __field(char, cnt) \ - __field(int, curr_res) \ - __field(int, unit_res) \ - __field(unsigned int, flags) \ - __field(void *, reserve_headq) \ - __field(void *, write_headq) \ - __field(int, grant_reserve_cycle) \ - __field(int, grant_reserve_bytes) \ - __field(int, grant_write_cycle) \ - __field(int, grant_write_bytes) \ - __field(int, curr_cycle) \ - __field(int, curr_block) \ - __field(xfs_lsn_t, tail_lsn) \ - ), \ - TP_fast_assign( \ - __entry->dev = log->l_mp->m_super->s_dev; \ - __entry->trans_type = tic->t_trans_type; \ - __entry->ocnt = tic->t_ocnt; \ - __entry->cnt = tic->t_cnt; \ - __entry->curr_res = tic->t_curr_res; \ - __entry->unit_res = tic->t_unit_res; \ - __entry->flags = tic->t_flags; \ - __entry->reserve_headq = log->l_reserve_headq; \ - __entry->write_headq = log->l_write_headq; \ - __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; \ - __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; \ - __entry->grant_write_cycle = log->l_grant_write_cycle; \ - __entry->grant_write_bytes = log->l_grant_write_bytes; \ - __entry->curr_cycle = log->l_curr_cycle; \ - __entry->curr_block = log->l_curr_block; \ - __entry->tail_lsn = log->l_tail_lsn; \ - ), \ - TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " \ - "t_unit_res %u t_flags %s reserve_headq 0x%p " \ - "write_headq 0x%p grant_reserve_cycle %d " \ - "grant_reserve_bytes %d grant_write_cycle %d " \ - "grant_write_bytes %d curr_cycle %d curr_block %d " \ - "tail_cycle %d tail_block %d", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), \ - __entry->ocnt, \ - __entry->cnt, \ - __entry->curr_res, \ - __entry->unit_res, \ - __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), \ - __entry->reserve_headq, \ - __entry->write_headq, \ - __entry->grant_reserve_cycle, \ - __entry->grant_reserve_bytes, \ - __entry->grant_write_cycle, \ - __entry->grant_write_bytes, \ - __entry->curr_cycle, \ - __entry->curr_block, \ - CYCLE_LSN(__entry->tail_lsn), \ - BLOCK_LSN(__entry->tail_lsn) \ - ) \ -) + TP_ARGS(log, tic)) DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); DEFINE_LOGGRANT_EVENT(xfs_log_reserve); @@ -815,7 +881,7 @@ TRACE_EVENT(name, \ ), \ TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ "offset 0x%llx count %zd flags %s " \ - "startoff 0x%llx startblock 0x%llx blockcount 0x%llx", \ + "startoff 0x%llx startblock %s blockcount 0x%llx", \ MAJOR(__entry->dev), MINOR(__entry->dev), \ __entry->ino, \ __entry->size, \ @@ -824,7 +890,7 @@ TRACE_EVENT(name, \ __entry->count, \ __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ __entry->startoff, \ - __entry->startblock, \ + xfs_fmtfsblock(__entry->startblock), \ __entry->blockcount) \ ) DEFINE_IOMAP_EVENT(xfs_iomap_enter); @@ -897,28 +963,32 @@ TRACE_EVENT(xfs_itruncate_start, __entry->toss_finish) ); +DECLARE_EVENT_CLASS(xfs_itrunc_class, + TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), + TP_ARGS(ip, new_size), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fsize_t, new_size) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = new_size; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size) +) + #define DEFINE_ITRUNC_EVENT(name) \ -TRACE_EVENT(name, \ +DEFINE_EVENT(xfs_itrunc_class, name, \ TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ - TP_ARGS(ip, new_size), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(xfs_fsize_t, size) \ - __field(xfs_fsize_t, new_size) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(ip)->i_sb->s_dev; \ - __entry->ino = ip->i_ino; \ - __entry->size = ip->i_d.di_size; \ - __entry->new_size = new_size; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->size, \ - __entry->new_size) \ -) + TP_ARGS(ip, new_size)) DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start); DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end); @@ -1037,28 +1107,28 @@ TRACE_EVENT(xfs_alloc_unbusy, TRACE_EVENT(xfs_alloc_busysearch, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, - xfs_extlen_t len, int found), - TP_ARGS(mp, agno, agbno, len, found), + xfs_extlen_t len, xfs_lsn_t lsn), + TP_ARGS(mp, agno, agbno, len, lsn), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) - __field(int, found) + __field(xfs_lsn_t, lsn) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->agbno = agbno; __entry->len = len; - __entry->found = found; + __entry->lsn = lsn; ), - TP_printk("dev %d:%d agno %u agbno %u len %u %s", + TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->len, - __print_symbolic(__entry->found, XFS_BUSY_STATES)) + __entry->lsn) ); TRACE_EVENT(xfs_agf, @@ -1152,77 +1222,80 @@ TRACE_EVENT(xfs_free_extent, ); -#define DEFINE_ALLOC_EVENT(name) \ -TRACE_EVENT(name, \ - TP_PROTO(struct xfs_alloc_arg *args), \ - TP_ARGS(args), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_agnumber_t, agno) \ - __field(xfs_agblock_t, agbno) \ - __field(xfs_extlen_t, minlen) \ - __field(xfs_extlen_t, maxlen) \ - __field(xfs_extlen_t, mod) \ - __field(xfs_extlen_t, prod) \ - __field(xfs_extlen_t, minleft) \ - __field(xfs_extlen_t, total) \ - __field(xfs_extlen_t, alignment) \ - __field(xfs_extlen_t, minalignslop) \ - __field(xfs_extlen_t, len) \ - __field(short, type) \ - __field(short, otype) \ - __field(char, wasdel) \ - __field(char, wasfromfl) \ - __field(char, isfl) \ - __field(char, userdata) \ - __field(xfs_fsblock_t, firstblock) \ - ), \ - TP_fast_assign( \ - __entry->dev = args->mp->m_super->s_dev; \ - __entry->agno = args->agno; \ - __entry->agbno = args->agbno; \ - __entry->minlen = args->minlen; \ - __entry->maxlen = args->maxlen; \ - __entry->mod = args->mod; \ - __entry->prod = args->prod; \ - __entry->minleft = args->minleft; \ - __entry->total = args->total; \ - __entry->alignment = args->alignment; \ - __entry->minalignslop = args->minalignslop; \ - __entry->len = args->len; \ - __entry->type = args->type; \ - __entry->otype = args->otype; \ - __entry->wasdel = args->wasdel; \ - __entry->wasfromfl = args->wasfromfl; \ - __entry->isfl = args->isfl; \ - __entry->userdata = args->userdata; \ - __entry->firstblock = args->firstblock; \ - ), \ - TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " \ - "prod %u minleft %u total %u alignment %u minalignslop %u " \ - "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " \ - "userdata %d firstblock 0x%llx", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->agno, \ - __entry->agbno, \ - __entry->minlen, \ - __entry->maxlen, \ - __entry->mod, \ - __entry->prod, \ - __entry->minleft, \ - __entry->total, \ - __entry->alignment, \ - __entry->minalignslop, \ - __entry->len, \ - __print_symbolic(__entry->type, XFS_ALLOC_TYPES), \ - __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), \ - __entry->wasdel, \ - __entry->wasfromfl, \ - __entry->isfl, \ - __entry->userdata, \ - __entry->firstblock) \ +DECLARE_EVENT_CLASS(xfs_alloc_class, + TP_PROTO(struct xfs_alloc_arg *args), + TP_ARGS(args), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, minlen) + __field(xfs_extlen_t, maxlen) + __field(xfs_extlen_t, mod) + __field(xfs_extlen_t, prod) + __field(xfs_extlen_t, minleft) + __field(xfs_extlen_t, total) + __field(xfs_extlen_t, alignment) + __field(xfs_extlen_t, minalignslop) + __field(xfs_extlen_t, len) + __field(short, type) + __field(short, otype) + __field(char, wasdel) + __field(char, wasfromfl) + __field(char, isfl) + __field(char, userdata) + __field(xfs_fsblock_t, firstblock) + ), + TP_fast_assign( + __entry->dev = args->mp->m_super->s_dev; + __entry->agno = args->agno; + __entry->agbno = args->agbno; + __entry->minlen = args->minlen; + __entry->maxlen = args->maxlen; + __entry->mod = args->mod; + __entry->prod = args->prod; + __entry->minleft = args->minleft; + __entry->total = args->total; + __entry->alignment = args->alignment; + __entry->minalignslop = args->minalignslop; + __entry->len = args->len; + __entry->type = args->type; + __entry->otype = args->otype; + __entry->wasdel = args->wasdel; + __entry->wasfromfl = args->wasfromfl; + __entry->isfl = args->isfl; + __entry->userdata = args->userdata; + __entry->firstblock = args->firstblock; + ), + TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " + "prod %u minleft %u total %u alignment %u minalignslop %u " + "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " + "userdata %d firstblock 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->minlen, + __entry->maxlen, + __entry->mod, + __entry->prod, + __entry->minleft, + __entry->total, + __entry->alignment, + __entry->minalignslop, + __entry->len, + __print_symbolic(__entry->type, XFS_ALLOC_TYPES), + __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), + __entry->wasdel, + __entry->wasfromfl, + __entry->isfl, + __entry->userdata, + __entry->firstblock) ) +#define DEFINE_ALLOC_EVENT(name) \ +DEFINE_EVENT(xfs_alloc_class, name, \ + TP_PROTO(struct xfs_alloc_arg *args), \ + TP_ARGS(args)) DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); @@ -1245,92 +1318,100 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); -#define DEFINE_DIR2_TRACE(tname) \ -TRACE_EVENT(tname, \ +DECLARE_EVENT_CLASS(xfs_dir2_class, + TP_PROTO(struct xfs_da_args *args), + TP_ARGS(args), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __dynamic_array(char, name, args->namelen) + __field(int, namelen) + __field(xfs_dahash_t, hashval) + __field(xfs_ino_t, inumber) + __field(int, op_flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(args->dp)->i_sb->s_dev; + __entry->ino = args->dp->i_ino; + if (args->namelen) + memcpy(__get_str(name), args->name, args->namelen); + __entry->namelen = args->namelen; + __entry->hashval = args->hashval; + __entry->inumber = args->inumber; + __entry->op_flags = args->op_flags; + ), + TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " + "inumber 0x%llx op_flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->namelen, + __entry->namelen ? __get_str(name) : NULL, + __entry->namelen, + __entry->hashval, + __entry->inumber, + __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) +) + +#define DEFINE_DIR2_EVENT(name) \ +DEFINE_EVENT(xfs_dir2_class, name, \ TP_PROTO(struct xfs_da_args *args), \ - TP_ARGS(args), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __dynamic_array(char, name, args->namelen) \ - __field(int, namelen) \ - __field(xfs_dahash_t, hashval) \ - __field(xfs_ino_t, inumber) \ - __field(int, op_flags) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \ - __entry->ino = args->dp->i_ino; \ - if (args->namelen) \ - memcpy(__get_str(name), args->name, args->namelen); \ - __entry->namelen = args->namelen; \ - __entry->hashval = args->hashval; \ - __entry->inumber = args->inumber; \ - __entry->op_flags = args->op_flags; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " \ - "inumber 0x%llx op_flags %s", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __entry->namelen, \ - __entry->namelen ? __get_str(name) : NULL, \ - __entry->namelen, \ - __entry->hashval, \ - __entry->inumber, \ - __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) \ + TP_ARGS(args)) +DEFINE_DIR2_EVENT(xfs_dir2_sf_addname); +DEFINE_DIR2_EVENT(xfs_dir2_sf_create); +DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_sf_replace); +DEFINE_DIR2_EVENT(xfs_dir2_sf_removename); +DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4); +DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8); +DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block); +DEFINE_DIR2_EVENT(xfs_dir2_block_addname); +DEFINE_DIR2_EVENT(xfs_dir2_block_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_block_replace); +DEFINE_DIR2_EVENT(xfs_dir2_block_removename); +DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf); +DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node); +DEFINE_DIR2_EVENT(xfs_dir2_node_addname); +DEFINE_DIR2_EVENT(xfs_dir2_node_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_node_replace); +DEFINE_DIR2_EVENT(xfs_dir2_node_removename); +DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); + +DECLARE_EVENT_CLASS(xfs_dir2_space_class, + TP_PROTO(struct xfs_da_args *args, int idx), + TP_ARGS(args, idx), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, op_flags) + __field(int, idx) + ), + TP_fast_assign( + __entry->dev = VFS_I(args->dp)->i_sb->s_dev; + __entry->ino = args->dp->i_ino; + __entry->op_flags = args->op_flags; + __entry->idx = idx; + ), + TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), + __entry->idx) ) -DEFINE_DIR2_TRACE(xfs_dir2_sf_addname); -DEFINE_DIR2_TRACE(xfs_dir2_sf_create); -DEFINE_DIR2_TRACE(xfs_dir2_sf_lookup); -DEFINE_DIR2_TRACE(xfs_dir2_sf_replace); -DEFINE_DIR2_TRACE(xfs_dir2_sf_removename); -DEFINE_DIR2_TRACE(xfs_dir2_sf_toino4); -DEFINE_DIR2_TRACE(xfs_dir2_sf_toino8); -DEFINE_DIR2_TRACE(xfs_dir2_sf_to_block); -DEFINE_DIR2_TRACE(xfs_dir2_block_addname); -DEFINE_DIR2_TRACE(xfs_dir2_block_lookup); -DEFINE_DIR2_TRACE(xfs_dir2_block_replace); -DEFINE_DIR2_TRACE(xfs_dir2_block_removename); -DEFINE_DIR2_TRACE(xfs_dir2_block_to_sf); -DEFINE_DIR2_TRACE(xfs_dir2_block_to_leaf); -DEFINE_DIR2_TRACE(xfs_dir2_leaf_addname); -DEFINE_DIR2_TRACE(xfs_dir2_leaf_lookup); -DEFINE_DIR2_TRACE(xfs_dir2_leaf_replace); -DEFINE_DIR2_TRACE(xfs_dir2_leaf_removename); -DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_block); -DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_node); -DEFINE_DIR2_TRACE(xfs_dir2_node_addname); -DEFINE_DIR2_TRACE(xfs_dir2_node_lookup); -DEFINE_DIR2_TRACE(xfs_dir2_node_replace); -DEFINE_DIR2_TRACE(xfs_dir2_node_removename); -DEFINE_DIR2_TRACE(xfs_dir2_node_to_leaf); - -#define DEFINE_DIR2_SPACE_TRACE(tname) \ -TRACE_EVENT(tname, \ + +#define DEFINE_DIR2_SPACE_EVENT(name) \ +DEFINE_EVENT(xfs_dir2_space_class, name, \ TP_PROTO(struct xfs_da_args *args, int idx), \ - TP_ARGS(args, idx), \ - TP_STRUCT__entry( \ - __field(dev_t, dev) \ - __field(xfs_ino_t, ino) \ - __field(int, op_flags) \ - __field(int, idx) \ - ), \ - TP_fast_assign( \ - __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \ - __entry->ino = args->dp->i_ino; \ - __entry->op_flags = args->op_flags; \ - __entry->idx = idx; \ - ), \ - TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", \ - MAJOR(__entry->dev), MINOR(__entry->dev), \ - __entry->ino, \ - __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), \ - __entry->idx) \ -) -DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_add); -DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_remove); -DEFINE_DIR2_SPACE_TRACE(xfs_dir2_grow_inode); -DEFINE_DIR2_SPACE_TRACE(xfs_dir2_shrink_inode); + TP_ARGS(args, idx)) +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode); TRACE_EVENT(xfs_dir2_leafn_moveents, TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count), @@ -1361,6 +1442,59 @@ TRACE_EVENT(xfs_dir2_leafn_moveents, __entry->count) ); +#define XFS_SWAPEXT_INODES \ + { 0, "target" }, \ + { 1, "temp" } + +#define XFS_INODE_FORMAT_STR \ + { 0, "invalid" }, \ + { 1, "local" }, \ + { 2, "extent" }, \ + { 3, "btree" } + +DECLARE_EVENT_CLASS(xfs_swap_extent_class, + TP_PROTO(struct xfs_inode *ip, int which), + TP_ARGS(ip, which), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, which) + __field(xfs_ino_t, ino) + __field(int, format) + __field(int, nex) + __field(int, max_nex) + __field(int, broot_size) + __field(int, fork_off) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->which = which; + __entry->ino = ip->i_ino; + __entry->format = ip->i_d.di_format; + __entry->nex = ip->i_d.di_nextents; + __entry->max_nex = ip->i_df.if_ext_max; + __entry->broot_size = ip->i_df.if_broot_bytes; + __entry->fork_off = XFS_IFORK_BOFF(ip); + ), + TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " + "Max in-fork extents %d, broot size %d, fork offset %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), + __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), + __entry->nex, + __entry->max_nex, + __entry->broot_size, + __entry->fork_off) +) + +#define DEFINE_SWAPEXT_EVENT(name) \ +DEFINE_EVENT(xfs_swap_extent_class, name, \ + TP_PROTO(struct xfs_inode *ip, int which), \ + TP_ARGS(ip, which)) + +DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); +DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c index 0b1878857fc3..fa01b9daba6b 100644 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ b/fs/xfs/linux-2.6/xfs_xattr.c @@ -45,7 +45,7 @@ xfs_xattr_get(struct dentry *dentry, const char *name, value = NULL; } - error = -xfs_attr_get(ip, name, value, &asize, xflags); + error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); if (error) return error; return asize; @@ -67,8 +67,9 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, xflags |= ATTR_REPLACE; if (!value) - return -xfs_attr_remove(ip, name, xflags); - return -xfs_attr_set(ip, name, (void *)value, size, xflags); + return -xfs_attr_remove(ip, (unsigned char *)name, xflags); + return -xfs_attr_set(ip, (unsigned char *)name, + (void *)value, size, xflags); } static struct xattr_handler xfs_xattr_user_handler = { @@ -124,8 +125,13 @@ static const char *xfs_xattr_prefix(int flags) } static int -xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, - char *name, int namelen, int valuelen, char *value) +xfs_xattr_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) { unsigned int prefix_len = xfs_xattr_prefix_len(flags); char *offset; @@ -148,7 +154,7 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, offset = (char *)context->alist + context->count; strncpy(offset, xfs_xattr_prefix(flags), prefix_len); offset += prefix_len; - strncpy(offset, name, namelen); /* real name */ + strncpy(offset, (char *)name, namelen); /* real name */ offset += namelen; *offset = '\0'; context->count += prefix_len + namelen + 1; @@ -156,8 +162,13 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, } static int -xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags, - char *name, int namelen, int valuelen, char *value) +xfs_xattr_put_listent_sizes( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) { context->count += xfs_xattr_prefix_len(flags) + namelen + 1; return 0; |