diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-09 12:29:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-09 12:29:12 -0700 |
commit | da89bd213fe719ec3552abbeb8be12d0cc0337ca (patch) | |
tree | 41d07c62c3e4c963504a72bce8043acdd4aa142d /fs/xfs | |
parent | be0c5d8c0bb0023e11f5c6d38e90f7b0f24edb64 (diff) | |
parent | 83e782e1a1cc0159888e58e14dfc8f3289663338 (diff) |
Merge tag 'for-linus-v3.11-rc1' of git://oss.sgi.com/xfs/xfs
Pull xfs update from Ben Myers:
"This includes several bugfixes, part of the work for project quotas
and group quotas to be used together, performance improvements for
inode creation/deletion, buffer readahead, and bulkstat,
implementation of the inode change count, an inode create transaction,
and the removal of a bunch of dead code.
There are also some duplicate commits that you already have from the
3.10-rc series.
- part of the work to allow project quotas and group quotas to be
used together
- inode change count
- inode create transaction
- block queue plugging in buffer readahead and bulkstat
- ordered log vector support
- removal of dead code in and around xfs_sync_inode_grab,
xfs_ialloc_get_rec, XFS_MOUNT_RETERR, XFS_ALLOCFREE_LOG_RES,
XFS_DIROP_LOG_RES, xfs_chash, ctl_table, and
xfs_growfs_data_private
- don't keep silent if sunit/swidth can not be changed via mount
- fix a leak of remote symlink blocks into the filesystem when xattrs
are used on symlinks
- fix for fiemap to return FIEMAP_EXTENT_UNKOWN flag on delay extents
- part of a fix for xfs_fsr
- disable speculative preallocation with small files
- performance improvements for inode creates and deletes"
* tag 'for-linus-v3.11-rc1' of git://oss.sgi.com/xfs/xfs: (61 commits)
xfs: Remove incore use of XFS_OQUOTA_ENFD and XFS_OQUOTA_CHKD
xfs: Change xfs_dquot_acct to be a 2-dimensional array
xfs: Code cleanup and removal of some typedef usage
xfs: Replace macro XFS_DQ_TO_QIP with a function
xfs: Replace macro XFS_DQUOT_TREE with a function
xfs: Define a new function xfs_is_quota_inode()
xfs: implement inode change count
xfs: Use inode create transaction
xfs: Inode create item recovery
xfs: Inode create transaction reservations
xfs: Inode create log items
xfs: Introduce an ordered buffer item
xfs: Introduce ordered log vector support
xfs: xfs_ifree doesn't need to modify the inode buffer
xfs: don't do IO when creating an new inode
xfs: don't use speculative prealloc for small files
xfs: plug directory buffer readahead
xfs: add pluging for bulkstat readahead
xfs: Remove dead function prototype xfs_sync_inode_grab()
xfs: Remove the left function variable from xfs_ialloc_get_rec()
...
Diffstat (limited to 'fs/xfs')
43 files changed, 1180 insertions, 477 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 6313b69b6644..4a4508023a3c 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -71,6 +71,7 @@ xfs-y += xfs_alloc.o \ xfs_dir2_sf.o \ xfs_ialloc.o \ xfs_ialloc_btree.o \ + xfs_icreate_item.o \ xfs_inode.o \ xfs_log_recover.o \ xfs_mount.o \ diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 5673bcfda2f0..71596e57283a 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -175,6 +175,7 @@ xfs_alloc_compute_diff( xfs_agblock_t wantbno, /* target starting block */ xfs_extlen_t wantlen, /* target length */ xfs_extlen_t alignment, /* target alignment */ + char userdata, /* are we allocating data? */ xfs_agblock_t freebno, /* freespace's starting block */ xfs_extlen_t freelen, /* freespace's length */ xfs_agblock_t *newbnop) /* result: best start block from free */ @@ -189,7 +190,14 @@ xfs_alloc_compute_diff( ASSERT(freelen >= wantlen); freeend = freebno + freelen; wantend = wantbno + wantlen; - if (freebno >= wantbno) { + /* + * We want to allocate from the start of a free extent if it is past + * the desired block or if we are allocating user data and the free + * extent is before desired block. The second case is there to allow + * for contiguous allocation from the remaining free space if the file + * grows in the short term. + */ + if (freebno >= wantbno || (userdata && freeend < wantend)) { if ((newbno1 = roundup(freebno, alignment)) >= freeend) newbno1 = NULLAGBLOCK; } else if (freeend >= wantend && alignment > 1) { @@ -805,7 +813,8 @@ xfs_alloc_find_best_extent( xfs_alloc_fix_len(args); sdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, *sbnoa, + args->alignment, + args->userdata, *sbnoa, *slena, &new); /* @@ -976,7 +985,8 @@ restart: if (args->len < blen) continue; ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, ltbnoa, ltlena, <new); + args->alignment, args->userdata, ltbnoa, + ltlena, <new); if (ltnew != NULLAGBLOCK && (args->len > blen || ltdiff < bdiff)) { bdiff = ltdiff; @@ -1128,7 +1138,8 @@ restart: args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, ltbnoa, ltlena, <new); + args->alignment, args->userdata, ltbnoa, + ltlena, <new); error = xfs_alloc_find_best_extent(args, &bno_cur_lt, &bno_cur_gt, @@ -1144,7 +1155,8 @@ restart: args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); xfs_alloc_fix_len(args); gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, gtbnoa, gtlena, >new); + args->alignment, args->userdata, gtbnoa, + gtlena, >new); error = xfs_alloc_find_best_extent(args, &bno_cur_gt, &bno_cur_lt, @@ -1203,7 +1215,7 @@ restart: } rlen = args->len; (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, - ltbnoa, ltlena, <new); + args->userdata, ltbnoa, ltlena, <new); ASSERT(ltnew >= ltbno); ASSERT(ltnew + rlen <= ltbnoa + ltlena); ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 70c43d9f72c1..1b726d626941 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -196,6 +196,8 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; #define XFS_BMDR_SPACE_CALC(nrecs) \ (int)(sizeof(xfs_bmdr_block_t) + \ ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) +#define XFS_BMAP_BMDR_SPACE(bb) \ + (XFS_BMDR_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs))) /* * Maximum number of bmap btree levels. diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 4ec431777048..bfc4e0c26fd3 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -140,6 +140,16 @@ xfs_buf_item_size( ASSERT(bip->bli_flags & XFS_BLI_LOGGED); + if (bip->bli_flags & XFS_BLI_ORDERED) { + /* + * The buffer has been logged just to order it. + * It is not being included in the transaction + * commit, so no vectors are used at all. + */ + trace_xfs_buf_item_size_ordered(bip); + return XFS_LOG_VEC_ORDERED; + } + /* * the vector count is based on the number of buffer vectors we have * dirty bits in. This will only be greater than one when we have a @@ -212,6 +222,7 @@ xfs_buf_item_format_segment( goto out; } + /* * Fill in an iovec for each set of contiguous chunks. */ @@ -299,18 +310,36 @@ xfs_buf_item_format( /* * If it is an inode buffer, transfer the in-memory state to the - * format flags and clear the in-memory state. We do not transfer + * format flags and clear the in-memory state. + * + * For buffer based inode allocation, we do not transfer * this state if the inode buffer allocation has not yet been committed * to the log as setting the XFS_BLI_INODE_BUF flag will prevent * correct replay of the inode allocation. + * + * For icreate item based inode allocation, the buffers aren't written + * to the journal during allocation, and hence we should always tag the + * buffer as an inode buffer so that the correct unlinked list replay + * occurs during recovery. */ if (bip->bli_flags & XFS_BLI_INODE_BUF) { - if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && + if (xfs_sb_version_hascrc(&lip->li_mountp->m_sb) || + !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && xfs_log_item_in_current_chkpt(lip))) bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; bip->bli_flags &= ~XFS_BLI_INODE_BUF; } + if ((bip->bli_flags & (XFS_BLI_ORDERED|XFS_BLI_STALE)) == + XFS_BLI_ORDERED) { + /* + * The buffer has been logged just to order it. It is not being + * included in the transaction commit, so don't format it. + */ + trace_xfs_buf_item_format_ordered(bip); + return; + } + for (i = 0; i < bip->bli_format_count; i++) { vecp = xfs_buf_item_format_segment(bip, vecp, offset, &bip->bli_formats[i]); @@ -340,6 +369,7 @@ xfs_buf_item_pin( ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || + (bip->bli_flags & XFS_BLI_ORDERED) || (bip->bli_flags & XFS_BLI_STALE)); trace_xfs_buf_item_pin(bip); @@ -512,8 +542,9 @@ xfs_buf_item_unlock( { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - int aborted, clean, i; - uint hold; + bool clean; + bool aborted; + int flags; /* Clear the buffer's association with this transaction. */ bp->b_transp = NULL; @@ -524,23 +555,21 @@ xfs_buf_item_unlock( * (cancelled) buffers at unpin time, but we'll never go through the * pin/unpin cycle if we abort inside commit. */ - aborted = (lip->li_flags & XFS_LI_ABORTED) != 0; - + aborted = (lip->li_flags & XFS_LI_ABORTED) ? true : false; /* - * Before possibly freeing the buf item, determine if we should - * release the buffer at the end of this routine. + * Before possibly freeing the buf item, copy the per-transaction state + * so we can reference it safely later after clearing it from the + * buffer log item. */ - hold = bip->bli_flags & XFS_BLI_HOLD; - - /* Clear the per transaction state. */ - bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD); + flags = bip->bli_flags; + bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); /* * If the buf item is marked stale, then don't do anything. We'll * unlock the buffer and free the buf item when the buffer is unpinned * for the last time. */ - if (bip->bli_flags & XFS_BLI_STALE) { + if (flags & XFS_BLI_STALE) { trace_xfs_buf_item_unlock_stale(bip); ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); if (!aborted) { @@ -557,13 +586,19 @@ xfs_buf_item_unlock( * be the only reference to the buf item, so we free it anyway * regardless of whether it is dirty or not. A dirty abort implies a * shutdown, anyway. + * + * Ordered buffers are dirty but may have no recorded changes, so ensure + * we only release clean items here. */ - clean = 1; - for (i = 0; i < bip->bli_format_count; i++) { - if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, - bip->bli_formats[i].blf_map_size)) { - clean = 0; - break; + clean = (flags & XFS_BLI_DIRTY) ? false : true; + if (clean) { + int i; + for (i = 0; i < bip->bli_format_count; i++) { + if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, + bip->bli_formats[i].blf_map_size)) { + clean = false; + break; + } } } if (clean) @@ -576,7 +611,7 @@ xfs_buf_item_unlock( } else atomic_dec(&bip->bli_refcount); - if (!hold) + if (!(flags & XFS_BLI_HOLD)) xfs_buf_relse(bp); } @@ -842,12 +877,6 @@ xfs_buf_item_log( struct xfs_buf *bp = bip->bli_buf; /* - * Mark the item as having some dirty data for - * quick reference in xfs_buf_item_dirty. - */ - bip->bli_flags |= XFS_BLI_DIRTY; - - /* * walk each buffer segment and mark them dirty appropriately. */ start = 0; @@ -873,7 +902,7 @@ xfs_buf_item_log( /* - * Return 1 if the buffer has some data that has been logged (at any + * Return 1 if the buffer has been logged or ordered in a transaction (at any * point, not just the current transaction) and 0 if not. */ uint @@ -907,11 +936,11 @@ void xfs_buf_item_relse( xfs_buf_t *bp) { - xfs_buf_log_item_t *bip; + xfs_buf_log_item_t *bip = bp->b_fspriv; trace_xfs_buf_item_relse(bp, _RET_IP_); + ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); - bip = bp->b_fspriv; bp->b_fspriv = bip->bli_item.li_bio_list; if (bp->b_fspriv == NULL) bp->b_iodone = NULL; diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 2573d2a75fc8..0f1c247dc680 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -120,6 +120,7 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf) #define XFS_BLI_INODE_ALLOC_BUF 0x10 #define XFS_BLI_STALE_INODE 0x20 #define XFS_BLI_INODE_BUF 0x40 +#define XFS_BLI_ORDERED 0x80 #define XFS_BLI_FLAGS \ { XFS_BLI_HOLD, "HOLD" }, \ @@ -128,7 +129,8 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf) { XFS_BLI_LOGGED, "LOGGED" }, \ { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ - { XFS_BLI_INODE_BUF, "INODE_BUF" } + { XFS_BLI_INODE_BUF, "INODE_BUF" }, \ + { XFS_BLI_ORDERED, "ORDERED" } #ifdef __KERNEL__ diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index c407e1ccff43..e36445ceaf80 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -24,6 +24,9 @@ #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" @@ -182,7 +185,7 @@ xfs_swap_extents_check_format( */ if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { if (XFS_IFORK_BOFF(ip) && - tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) + XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) return EINVAL; if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) @@ -192,9 +195,8 @@ xfs_swap_extents_check_format( /* Reciprocal target->temp btree format checks */ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { if (XFS_IFORK_BOFF(tip) && - ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) + XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) return EINVAL; - if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) return EINVAL; diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index e0cc1243a8aa..2aed25cae04d 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -1108,6 +1108,7 @@ xfs_dir2_leaf_readbuf( struct xfs_mount *mp = dp->i_mount; struct xfs_buf *bp = *bpp; struct xfs_bmbt_irec *map = mip->map; + struct blk_plug plug; int error = 0; int length; int i; @@ -1236,6 +1237,7 @@ xfs_dir2_leaf_readbuf( /* * Do we need more readahead? */ + blk_start_plug(&plug); for (mip->ra_index = mip->ra_offset = i = 0; mip->ra_want > mip->ra_current && i < mip->map_blocks; i += mp->m_dirblkfsbs) { @@ -1287,6 +1289,7 @@ xfs_dir2_leaf_readbuf( } } } + blk_finish_plug(&plug); out: *bpp = bp; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 044e97a33c8d..f01012de06d0 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -570,13 +570,13 @@ xfs_qm_dqtobp( xfs_buf_t **O_bpp, uint flags) { - xfs_bmbt_irec_t map; - int nmaps = 1, error; - xfs_buf_t *bp; - xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); - xfs_mount_t *mp = dqp->q_mount; - xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); - xfs_trans_t *tp = (tpp ? *tpp : NULL); + struct xfs_bmbt_irec map; + int nmaps = 1, error; + struct xfs_buf *bp; + struct xfs_inode *quotip = xfs_dq_to_quota_inode(dqp); + struct xfs_mount *mp = dqp->q_mount; + xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); + struct xfs_trans *tp = (tpp ? *tpp : NULL); dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; @@ -804,7 +804,7 @@ xfs_qm_dqget( xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ { struct xfs_quotainfo *qi = mp->m_quotainfo; - struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); + struct radix_tree_root *tree = xfs_dquot_tree(qi, type); struct xfs_dquot *dqp; int error; diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 4f0ebfc43cc9..b596626249b8 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -143,10 +143,6 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) #define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) #define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) -#define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo) -#define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \ - XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ - XFS_DQ_TO_QINF(dqp)->qi_gquotaip) extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, uint, struct xfs_dquot **); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 3c3644ea825b..614eb0cc3608 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -176,7 +176,7 @@ xfs_growfs_data_private( if (!bp) return EIO; if (bp->b_error) { - int error = bp->b_error; + error = bp->b_error; xfs_buf_relse(bp); return error; } diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index c8f5ae1debf2..7a0c17d7ec09 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -38,6 +38,7 @@ #include "xfs_bmap.h" #include "xfs_cksum.h" #include "xfs_buf_item.h" +#include "xfs_icreate_item.h" /* @@ -150,12 +151,16 @@ xfs_check_agi_freecount( #endif /* - * Initialise a new set of inodes. + * Initialise a new set of inodes. When called without a transaction context + * (e.g. from recovery) we initiate a delayed write of the inode buffers rather + * than logging them (which in a transaction context puts them into the AIL + * for writeback rather than the xfsbufd queue). */ -STATIC int +int xfs_ialloc_inode_init( struct xfs_mount *mp, struct xfs_trans *tp, + struct list_head *buffer_list, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, @@ -208,6 +213,18 @@ xfs_ialloc_inode_init( version = 3; ino = XFS_AGINO_TO_INO(mp, agno, XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); + + /* + * log the initialisation that is about to take place as an + * logical operation. This means the transaction does not + * need to log the physical changes to the inode buffers as log + * recovery will know what initialisation is actually needed. + * Hence we only need to log the buffers as "ordered" buffers so + * they track in the AIL as if they were physically logged. + */ + if (tp) + xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), + mp->m_sb.sb_inodesize, length, gen); } else if (xfs_sb_version_hasnlink(&mp->m_sb)) version = 2; else @@ -223,13 +240,8 @@ xfs_ialloc_inode_init( XBF_UNMAPPED); if (!fbuf) return ENOMEM; - /* - * Initialize all inodes in this buffer and then log them. - * - * XXX: It would be much better if we had just one transaction - * to log a whole cluster of inodes instead of all the - * individual transactions causing a lot of log traffic. - */ + + /* Initialize the inode buffers and log them appropriately. */ fbuf->b_ops = &xfs_inode_buf_ops; xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); for (i = 0; i < ninodes; i++) { @@ -247,18 +259,39 @@ xfs_ialloc_inode_init( ino++; uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); xfs_dinode_calc_crc(mp, free); - } else { + } else if (tp) { /* just log the inode core */ xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); } } - if (version == 3) { - /* need to log the entire buffer */ - xfs_trans_log_buf(tp, fbuf, 0, - BBTOB(fbuf->b_length) - 1); + + if (tp) { + /* + * Mark the buffer as an inode allocation buffer so it + * sticks in AIL at the point of this allocation + * transaction. This ensures the they are on disk before + * the tail of the log can be moved past this + * transaction (i.e. by preventing relogging from moving + * it forward in the log). + */ + xfs_trans_inode_alloc_buf(tp, fbuf); + if (version == 3) { + /* + * Mark the buffer as ordered so that they are + * not physically logged in the transaction but + * still tracked in the AIL as part of the + * transaction and pin the log appropriately. + */ + xfs_trans_ordered_buf(tp, fbuf); + xfs_trans_log_buf(tp, fbuf, 0, + BBTOB(fbuf->b_length) - 1); + } + } else { + fbuf->b_flags |= XBF_DONE; + xfs_buf_delwri_queue(fbuf, buffer_list); + xfs_buf_relse(fbuf); } - xfs_trans_inode_alloc_buf(tp, fbuf); } return 0; } @@ -303,7 +336,7 @@ xfs_ialloc_ag_alloc( * First try to allocate inodes contiguous with the last-allocated * chunk of inodes. If the filesystem is striped, this will fill * an entire stripe unit with inodes. - */ + */ agi = XFS_BUF_TO_AGI(agbp); newino = be32_to_cpu(agi->agi_newino); agno = be32_to_cpu(agi->agi_seqno); @@ -402,7 +435,7 @@ xfs_ialloc_ag_alloc( * rather than a linear progression to prevent the next generation * number from being easily guessable. */ - error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, + error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, args.len, prandom_u32()); if (error) @@ -615,8 +648,7 @@ xfs_ialloc_get_rec( struct xfs_btree_cur *cur, xfs_agino_t agino, xfs_inobt_rec_incore_t *rec, - int *done, - int left) + int *done) { int error; int i; @@ -724,12 +756,12 @@ xfs_dialloc_ag( pag->pagl_leftrec != NULLAGINO && pag->pagl_rightrec != NULLAGINO) { error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, - &trec, &doneleft, 1); + &trec, &doneleft); if (error) goto error1; error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, - &rec, &doneright, 0); + &rec, &doneright); if (error) goto error1; } else { diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index c8da3df271e6..68c07320f096 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -150,6 +150,14 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_inobt_rec_incore_t *rec, int *stat); +/* + * Inode chunk initialisation routine + */ +int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, + struct list_head *buffer_list, + xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_agblock_t length, unsigned int gen); + extern const struct xfs_buf_ops xfs_agi_buf_ops; #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 96e344e3e927..9560dc1f15a9 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -335,7 +335,8 @@ xfs_iget_cache_miss( iflags = XFS_INEW; if (flags & XFS_IGET_DONTCACHE) iflags |= XFS_IDONTCACHE; - ip->i_udquot = ip->i_gdquot = NULL; + ip->i_udquot = NULL; + ip->i_gdquot = NULL; xfs_iflags_set(ip, iflags); /* insert the new inode */ diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index e0f138c70a2f..a01afbb3909a 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -40,7 +40,6 @@ void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); void xfs_eofblocks_worker(struct work_struct *); -int xfs_sync_inode_grab(struct xfs_inode *ip); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags, void *args), diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c new file mode 100644 index 000000000000..7716a4e7375e --- /dev/null +++ b/fs/xfs/xfs_icreate_item.c @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2008-2010, 2013 Dave Chinner + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_mount.h" +#include "xfs_trans_priv.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_inode_item.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_error.h" +#include "xfs_icreate_item.h" + +kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ + +static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_icreate_item, ic_item); +} + +/* + * This returns the number of iovecs needed to log the given inode item. + * + * We only need one iovec for the icreate log structure. + */ +STATIC uint +xfs_icreate_item_size( + struct xfs_log_item *lip) +{ + return 1; +} + +/* + * This is called to fill in the vector of log iovecs for the + * given inode create log item. + */ +STATIC void +xfs_icreate_item_format( + struct xfs_log_item *lip, + struct xfs_log_iovec *log_vector) +{ + struct xfs_icreate_item *icp = ICR_ITEM(lip); + + log_vector->i_addr = (xfs_caddr_t)&icp->ic_format; + log_vector->i_len = sizeof(struct xfs_icreate_log); + log_vector->i_type = XLOG_REG_TYPE_ICREATE; +} + + +/* Pinning has no meaning for the create item, so just return. */ +STATIC void +xfs_icreate_item_pin( + struct xfs_log_item *lip) +{ +} + + +/* pinning has no meaning for the create item, so just return. */ +STATIC void +xfs_icreate_item_unpin( + struct xfs_log_item *lip, + int remove) +{ +} + +STATIC void +xfs_icreate_item_unlock( + struct xfs_log_item *lip) +{ + struct xfs_icreate_item *icp = ICR_ITEM(lip); + + if (icp->ic_item.li_flags & XFS_LI_ABORTED) + kmem_zone_free(xfs_icreate_zone, icp); + return; +} + +/* + * Because we have ordered buffers being tracked in the AIL for the inode + * creation, we don't need the create item after this. Hence we can free + * the log item and return -1 to tell the caller we're done with the item. + */ +STATIC xfs_lsn_t +xfs_icreate_item_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ + struct xfs_icreate_item *icp = ICR_ITEM(lip); + + kmem_zone_free(xfs_icreate_zone, icp); + return (xfs_lsn_t)-1; +} + +/* item can never get into the AIL */ +STATIC uint +xfs_icreate_item_push( + struct xfs_log_item *lip, + struct list_head *buffer_list) +{ + ASSERT(0); + return XFS_ITEM_SUCCESS; +} + +/* Ordered buffers do the dependency tracking here, so this does nothing. */ +STATIC void +xfs_icreate_item_committing( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ +} + +/* + * This is the ops vector shared by all buf log items. + */ +static struct xfs_item_ops xfs_icreate_item_ops = { + .iop_size = xfs_icreate_item_size, + .iop_format = xfs_icreate_item_format, + .iop_pin = xfs_icreate_item_pin, + .iop_unpin = xfs_icreate_item_unpin, + .iop_push = xfs_icreate_item_push, + .iop_unlock = xfs_icreate_item_unlock, + .iop_committed = xfs_icreate_item_committed, + .iop_committing = xfs_icreate_item_committing, +}; + + +/* + * Initialize the inode log item for a newly allocated (in-core) inode. + * + * Inode extents can only reside within an AG. Hence specify the starting + * block for the inode chunk by offset within an AG as well as the + * length of the allocated extent. + * + * This joins the item to the transaction and marks it dirty so + * that we don't need a separate call to do this, nor does the + * caller need to know anything about the icreate item. + */ +void +xfs_icreate_log( + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + unsigned int count, + unsigned int inode_size, + xfs_agblock_t length, + unsigned int generation) +{ + struct xfs_icreate_item *icp; + + icp = kmem_zone_zalloc(xfs_icreate_zone, KM_SLEEP); + + xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, + &xfs_icreate_item_ops); + + icp->ic_format.icl_type = XFS_LI_ICREATE; + icp->ic_format.icl_size = 1; /* single vector */ + icp->ic_format.icl_ag = cpu_to_be32(agno); + icp->ic_format.icl_agbno = cpu_to_be32(agbno); + icp->ic_format.icl_count = cpu_to_be32(count); + icp->ic_format.icl_isize = cpu_to_be32(inode_size); + icp->ic_format.icl_length = cpu_to_be32(length); + icp->ic_format.icl_gen = cpu_to_be32(generation); + + xfs_trans_add_item(tp, &icp->ic_item); + tp->t_flags |= XFS_TRANS_DIRTY; + icp->ic_item.li_desc->lid_flags |= XFS_LID_DIRTY; +} diff --git a/fs/xfs/xfs_icreate_item.h b/fs/xfs/xfs_icreate_item.h new file mode 100644 index 000000000000..88ba8aa0bc41 --- /dev/null +++ b/fs/xfs/xfs_icreate_item.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2008-2010, Dave Chinner + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef XFS_ICREATE_ITEM_H +#define XFS_ICREATE_ITEM_H 1 + +/* + * on disk log item structure + * + * Log recovery assumes the first two entries are the type and size and they fit + * in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so + * decoding can be done correctly. + */ +struct xfs_icreate_log { + __uint16_t icl_type; /* type of log format structure */ + __uint16_t icl_size; /* size of log format structure */ + __be32 icl_ag; /* ag being allocated in */ + __be32 icl_agbno; /* start block of inode range */ + __be32 icl_count; /* number of inodes to initialise */ + __be32 icl_isize; /* size of inodes */ + __be32 icl_length; /* length of extent to initialise */ + __be32 icl_gen; /* inode generation number to use */ +}; + +/* in memory log item structure */ +struct xfs_icreate_item { + struct xfs_log_item ic_item; + struct xfs_icreate_log ic_format; +}; + +extern kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ + +void xfs_icreate_log(struct xfs_trans *tp, xfs_agnumber_t agno, + xfs_agblock_t agbno, unsigned int count, + unsigned int inode_size, xfs_agblock_t length, + unsigned int generation); + +#endif /* XFS_ICREATE_ITEM_H */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7f7be5f98f52..9ecfe1e559fc 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1028,6 +1028,11 @@ xfs_dinode_calc_crc( /* * Read the disk inode attributes into the in-core inode structure. + * + * If we are initialising a new inode and we are not utilising the + * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core + * with a random generation number. If we are keeping inodes around, we need to + * read the inode cluster to get the existing generation number off disk. */ int xfs_iread( @@ -1047,6 +1052,22 @@ xfs_iread( if (error) return error; + /* shortcut IO on inode allocation if possible */ + if ((iget_flags & XFS_IGET_CREATE) && + !(mp->m_flags & XFS_MOUNT_IKEEP)) { + /* initialise the on-disk inode core */ + memset(&ip->i_d, 0, sizeof(ip->i_d)); + ip->i_d.di_magic = XFS_DINODE_MAGIC; + ip->i_d.di_gen = prandom_u32(); + if (xfs_sb_version_hascrc(&mp->m_sb)) { + ip->i_d.di_version = 3; + ip->i_d.di_ino = ip->i_ino; + uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); + } else + ip->i_d.di_version = 2; + return 0; + } + /* * Get pointers to the on-disk inode and the buffer containing it. */ @@ -1133,17 +1154,16 @@ xfs_iread( xfs_buf_set_ref(bp, XFS_INO_REF); /* - * Use xfs_trans_brelse() to release the buffer containing the - * on-disk inode, because it was acquired with xfs_trans_read_buf() - * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal + * Use xfs_trans_brelse() to release the buffer containing the on-disk + * inode, because it was acquired with xfs_trans_read_buf() in + * xfs_imap_to_bp() above. If tp is NULL, this is just a normal * brelse(). If we're within a transaction, then xfs_trans_brelse() * will only release the buffer if it is not dirty within the * transaction. It will be OK to release the buffer in this case, - * because inodes on disk are never destroyed and we will be - * locking the new in-core inode before putting it in the hash - * table where other processes can find it. Thus we don't have - * to worry about the inode being changed just because we released - * the buffer. + * because inodes on disk are never destroyed and we will be locking the + * new in-core inode before putting it in the cache where other + * processes can find it. Thus we don't have to worry about the inode + * being changed just because we released the buffer. */ out_brelse: xfs_trans_brelse(tp, bp); @@ -2028,8 +2048,6 @@ xfs_ifree( int error; int delete; xfs_ino_t first_ino; - xfs_dinode_t *dip; - xfs_buf_t *ibp; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(ip->i_d.di_nlink == 0); @@ -2042,14 +2060,13 @@ xfs_ifree( * Pull the on-disk inode from the AGI unlinked list. */ error = xfs_iunlink_remove(tp, ip); - if (error != 0) { + if (error) return error; - } error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); - if (error != 0) { + if (error) return error; - } + ip->i_d.di_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; ip->i_d.di_dmevmask = 0; @@ -2061,31 +2078,10 @@ xfs_ifree( * by reincarnations of this inode. */ ip->i_d.di_gen++; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp, - 0, 0); - if (error) - return error; - - /* - * Clear the on-disk di_mode. This is to prevent xfs_bulkstat - * from picking up this inode when it is reclaimed (its incore state - * initialzed but not flushed to disk yet). The in-core di_mode is - * already cleared and a corresponding transaction logged. - * The hack here just synchronizes the in-core to on-disk - * di_mode value in advance before the actual inode sync to disk. - * This is OK because the inode is already unlinked and would never - * change its di_mode again for this inode generation. - * This is a temporary hack that would require a proper fix - * in the future. - */ - dip->di_mode = 0; - - if (delete) { + if (delete) error = xfs_ifree_cluster(ip, tp, first_ino); - } return error; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 8f8aaee7f379..6a7096422295 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -284,6 +284,15 @@ xfs_iomap_eof_want_preallocate( return 0; /* + * If the file is smaller than the minimum prealloc and we are using + * dynamic preallocation, don't do any preallocation at all as it is + * likely this is the only write to the file that is going to be done. + */ + if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && + XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)) + return 0; + + /* * If there are any real blocks past eof, then don't * do any speculative allocation. */ @@ -345,6 +354,10 @@ xfs_iomap_eof_prealloc_initial_size( if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) return 0; + /* If the file is small, then use the minimum prealloc */ + if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign)) + return 0; + /* * As we write multiple pages, the offset will always align to the * start of a page and hence point to a hole at EOF. i.e. if the size is diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ca9ecaa81112..c69bbc493cb0 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -987,7 +987,8 @@ xfs_fiemap_format( if (bmv->bmv_oflags & BMV_OF_PREALLOC) fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { - fiemap_flags |= FIEMAP_EXTENT_DELALLOC; + fiemap_flags |= (FIEMAP_EXTENT_DELALLOC | + FIEMAP_EXTENT_UNKNOWN); physical = 0; /* no block yet */ } if (bmv->bmv_oflags & BMV_OF_LAST) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 2ea7d402188d..bc92c5306a17 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -43,7 +43,7 @@ xfs_internal_inum( { return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || (xfs_sb_version_hasquota(&mp->m_sb) && - (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); + xfs_is_quota_inode(&mp->m_sb, ino))); } /* @@ -383,11 +383,13 @@ xfs_bulkstat( * Also start read-ahead now for this chunk. */ if (r.ir_freecount < XFS_INODES_PER_CHUNK) { + struct blk_plug plug; /* * Loop over all clusters in the next chunk. * Do a readahead if there are any allocated * inodes in that cluster. */ + blk_start_plug(&plug); agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; @@ -399,6 +401,7 @@ xfs_bulkstat( agbno, nbcluster, &xfs_inode_buf_ops); } + blk_finish_plug(&plug); irbp->ir_startino = r.ir_startino; irbp->ir_freecount = r.ir_freecount; irbp->ir_free = r.ir_free; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b345a7c85153..d852a2b3e1fd 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1963,6 +1963,10 @@ xlog_write_calc_vec_length( headers++; for (lv = log_vector; lv; lv = lv->lv_next) { + /* we don't write ordered log vectors */ + if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) + continue; + headers += lv->lv_niovecs; for (i = 0; i < lv->lv_niovecs; i++) { @@ -2216,7 +2220,7 @@ xlog_write( index = 0; lv = log_vector; vecp = lv->lv_iovecp; - while (lv && index < lv->lv_niovecs) { + while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { void *ptr; int log_offset; @@ -2236,13 +2240,22 @@ xlog_write( * This loop writes out as many regions as can fit in the amount * of space which was allocated by xlog_state_get_iclog_space(). */ - while (lv && index < lv->lv_niovecs) { - struct xfs_log_iovec *reg = &vecp[index]; + while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { + struct xfs_log_iovec *reg; struct xlog_op_header *ophdr; int start_rec_copy; int copy_len; int copy_off; + bool ordered = false; + + /* ordered log vectors have no regions to write */ + if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { + ASSERT(lv->lv_niovecs == 0); + ordered = true; + goto next_lv; + } + reg = &vecp[index]; ASSERT(reg->i_len % sizeof(__int32_t) == 0); ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); @@ -2302,12 +2315,13 @@ xlog_write( break; if (++index == lv->lv_niovecs) { +next_lv: lv = lv->lv_next; index = 0; if (lv) vecp = lv->lv_iovecp; } - if (record_cnt == 0) { + if (record_cnt == 0 && ordered == false) { if (!lv) return 0; break; diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 5caee96059df..fb630e496c12 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -88,7 +88,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) #define XLOG_REG_TYPE_UNMOUNT 17 #define XLOG_REG_TYPE_COMMIT 18 #define XLOG_REG_TYPE_TRANSHDR 19 -#define XLOG_REG_TYPE_MAX 19 +#define XLOG_REG_TYPE_ICREATE 20 +#define XLOG_REG_TYPE_MAX 20 typedef struct xfs_log_iovec { void *i_addr; /* beginning address of region */ @@ -105,6 +106,8 @@ struct xfs_log_vec { int lv_buf_len; /* size of formatted buffer */ }; +#define XFS_LOG_VEC_ORDERED (-1) + /* * Structure used to pass callback function and the function's argument * to the log manager. diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index d0833b54e55d..02b9cf3f8252 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -127,6 +127,7 @@ xlog_cil_prepare_log_vecs( int index; int len = 0; uint niovecs; + bool ordered = false; /* Skip items which aren't dirty in this transaction. */ if (!(lidp->lid_flags & XFS_LID_DIRTY)) @@ -137,14 +138,30 @@ xlog_cil_prepare_log_vecs( if (!niovecs) continue; + /* + * Ordered items need to be tracked but we do not wish to write + * them. We need a logvec to track the object, but we do not + * need an iovec or buffer to be allocated for copying data. + */ + if (niovecs == XFS_LOG_VEC_ORDERED) { + ordered = true; + niovecs = 0; + } + new_lv = kmem_zalloc(sizeof(*new_lv) + niovecs * sizeof(struct xfs_log_iovec), KM_SLEEP|KM_NOFS); + new_lv->lv_item = lidp->lid_item; + new_lv->lv_niovecs = niovecs; + if (ordered) { + /* track as an ordered logvec */ + new_lv->lv_buf_len = XFS_LOG_VEC_ORDERED; + goto next; + } + /* The allocated iovec region lies beyond the log vector. */ new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; - new_lv->lv_niovecs = niovecs; - new_lv->lv_item = lidp->lid_item; /* build the vector array and calculate it's length */ IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp); @@ -165,6 +182,7 @@ xlog_cil_prepare_log_vecs( } ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len); +next: if (!ret_lv) ret_lv = new_lv; else @@ -191,8 +209,18 @@ xfs_cil_prepare_item( if (old) { /* existing lv on log item, space used is a delta */ - ASSERT(!list_empty(&lv->lv_item->li_cil)); - ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); + ASSERT((old->lv_buf && old->lv_buf_len && old->lv_niovecs) || + old->lv_buf_len == XFS_LOG_VEC_ORDERED); + + /* + * If the new item is ordered, keep the old one that is already + * tracking dirty or ordered regions + */ + if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { + ASSERT(!lv->lv_buf); + kmem_free(lv); + return; + } *len += lv->lv_buf_len - old->lv_buf_len; *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; @@ -201,10 +229,11 @@ xfs_cil_prepare_item( } else { /* new lv, must pin the log item */ ASSERT(!lv->lv_item->li_lv); - ASSERT(list_empty(&lv->lv_item->li_cil)); - *len += lv->lv_buf_len; - *diff_iovecs += lv->lv_niovecs; + if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { + *len += lv->lv_buf_len; + *diff_iovecs += lv->lv_niovecs; + } IOP_PIN(lv->lv_item); } @@ -259,18 +288,24 @@ xlog_cil_insert_items( * We can do this safely because the context can't checkpoint until we * are done so it doesn't matter exactly how we update the CIL. */ - for (lv = log_vector; lv; lv = lv->lv_next) - xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); - - /* account for space used by new iovec headers */ - len += diff_iovecs * sizeof(xlog_op_header_t); - spin_lock(&cil->xc_cil_lock); + for (lv = log_vector; lv; ) { + struct xfs_log_vec *next = lv->lv_next; - /* move the items to the tail of the CIL */ - for (lv = log_vector; lv; lv = lv->lv_next) + ASSERT(lv->lv_item->li_lv || list_empty(&lv->lv_item->li_cil)); + lv->lv_next = NULL; + + /* + * xfs_cil_prepare_item() may free the lv, so move the item on + * the CIL first. + */ list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); + xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); + lv = next; + } + /* account for space used by new iovec headers */ + len += diff_iovecs * sizeof(xlog_op_header_t); ctx->nvecs += diff_iovecs; /* @@ -381,9 +416,7 @@ xlog_cil_push( struct xfs_cil_ctx *new_ctx; struct xlog_in_core *commit_iclog; struct xlog_ticket *tic; - int num_lv; int num_iovecs; - int len; int error = 0; struct xfs_trans_header thdr; struct xfs_log_iovec lhdr; @@ -428,12 +461,9 @@ xlog_cil_push( * side which is currently locked out by the flush lock. */ lv = NULL; - num_lv = 0; num_iovecs = 0; - len = 0; while (!list_empty(&cil->xc_cil)) { struct xfs_log_item *item; - int i; item = list_first_entry(&cil->xc_cil, struct xfs_log_item, li_cil); @@ -444,11 +474,7 @@ xlog_cil_push( lv->lv_next = item->li_lv; lv = item->li_lv; item->li_lv = NULL; - - num_lv++; num_iovecs += lv->lv_niovecs; - for (i = 0; i < lv->lv_niovecs; i++) - len += lv->lv_iovecp[i].i_len; } /* @@ -701,6 +727,7 @@ xfs_log_commit_cil( if (commit_lsn) *commit_lsn = log->l_cilp->xc_ctx->sequence; + /* xlog_cil_insert_items() destroys log_vector list */ xlog_cil_insert_items(log, log_vector, tp->t_ticket); /* check we didn't blow the reservation */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7cf5e4eafe28..6fcc910a50b9 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -45,6 +45,7 @@ #include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_icreate_item.h" /* Need all the magic numbers and buffer ops structures from these headers */ #include "xfs_symlink.h" @@ -1617,7 +1618,10 @@ xlog_recover_add_to_trans( * form the cancelled buffer table. Hence they have tobe done last. * * 3. Inode allocation buffers must be replayed before inode items that - * read the buffer and replay changes into it. + * read the buffer and replay changes into it. For filesystems using the + * ICREATE transactions, this means XFS_LI_ICREATE objects need to get + * treated the same as inode allocation buffers as they create and + * initialise the buffers directly. * * 4. Inode unlink buffers must be replayed after inode items are replayed. * This ensures that inodes are completely flushed to the inode buffer @@ -1632,10 +1636,17 @@ xlog_recover_add_to_trans( * from all the other buffers and move them to last. * * Hence, 4 lists, in order from head to tail: - * - buffer_list for all buffers except cancelled/inode unlink buffers - * - item_list for all non-buffer items - * - inode_buffer_list for inode unlink buffers - * - cancel_list for the cancelled buffers + * - buffer_list for all buffers except cancelled/inode unlink buffers + * - item_list for all non-buffer items + * - inode_buffer_list for inode unlink buffers + * - cancel_list for the cancelled buffers + * + * Note that we add objects to the tail of the lists so that first-to-last + * ordering is preserved within the lists. Adding objects to the head of the + * list means when we traverse from the head we walk them in last-to-first + * order. For cancelled buffers and inode unlink buffers this doesn't matter, + * but for all other items there may be specific ordering that we need to + * preserve. */ STATIC int xlog_recover_reorder_trans( @@ -1655,6 +1666,9 @@ xlog_recover_reorder_trans( xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; switch (ITEM_TYPE(item)) { + case XFS_LI_ICREATE: + list_move_tail(&item->ri_list, &buffer_list); + break; case XFS_LI_BUF: if (buf_f->blf_flags & XFS_BLF_CANCEL) { trace_xfs_log_recover_item_reorder_head(log, @@ -2982,6 +2996,93 @@ xlog_recover_efd_pass2( } /* + * This routine is called when an inode create format structure is found in a + * committed transaction in the log. It's purpose is to initialise the inodes + * being allocated on disk. This requires us to get inode cluster buffers that + * match the range to be intialised, stamped with inode templates and written + * by delayed write so that subsequent modifications will hit the cached buffer + * and only need writing out at the end of recovery. + */ +STATIC int +xlog_recover_do_icreate_pass2( + struct xlog *log, + struct list_head *buffer_list, + xlog_recover_item_t *item) +{ + struct xfs_mount *mp = log->l_mp; + struct xfs_icreate_log *icl; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + unsigned int count; + unsigned int isize; + xfs_agblock_t length; + + icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; + if (icl->icl_type != XFS_LI_ICREATE) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); + return EINVAL; + } + + if (icl->icl_size != 1) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); + return EINVAL; + } + + agno = be32_to_cpu(icl->icl_ag); + if (agno >= mp->m_sb.sb_agcount) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); + return EINVAL; + } + agbno = be32_to_cpu(icl->icl_agbno); + if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); + return EINVAL; + } + isize = be32_to_cpu(icl->icl_isize); + if (isize != mp->m_sb.sb_inodesize) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); + return EINVAL; + } + count = be32_to_cpu(icl->icl_count); + if (!count) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); + return EINVAL; + } + length = be32_to_cpu(icl->icl_length); + if (!length || length >= mp->m_sb.sb_agblocks) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); + return EINVAL; + } + + /* existing allocation is fixed value */ + ASSERT(count == XFS_IALLOC_INODES(mp)); + ASSERT(length == XFS_IALLOC_BLOCKS(mp)); + if (count != XFS_IALLOC_INODES(mp) || + length != XFS_IALLOC_BLOCKS(mp)) { + xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); + return EINVAL; + } + + /* + * Inode buffers can be freed. Do not replay the inode initialisation as + * we could be overwriting something written after this inode buffer was + * cancelled. + * + * XXX: we need to iterate all buffers and only init those that are not + * cancelled. I think that a more fine grained factoring of + * xfs_ialloc_inode_init may be appropriate here to enable this to be + * done easily. + */ + if (xlog_check_buffer_cancelled(log, + XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0)) + return 0; + + xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length, + be32_to_cpu(icl->icl_gen)); + return 0; +} + +/* * Free up any resources allocated by the transaction * * Remember that EFIs, EFDs, and IUNLINKs are handled later. @@ -3023,6 +3124,7 @@ xlog_recover_commit_pass1( case XFS_LI_EFI: case XFS_LI_EFD: case XFS_LI_DQUOT: + case XFS_LI_ICREATE: /* nothing to do in pass 1 */ return 0; default: @@ -3053,6 +3155,8 @@ xlog_recover_commit_pass2( return xlog_recover_efd_pass2(log, item); case XFS_LI_DQUOT: return xlog_recover_dquot_pass2(log, buffer_list, item); + case XFS_LI_ICREATE: + return xlog_recover_do_icreate_pass2(log, buffer_list, item); case XFS_LI_QUOTAOFF: /* nothing to do in pass2 */ return 0; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index e8e310c05097..2b0ba3581656 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -336,6 +336,14 @@ xfs_mount_validate_sb( return XFS_ERROR(EWRONGFS); } + if ((sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) && + (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | + XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))) { + xfs_notice(mp, +"Super block has XFS_OQUOTA bits along with XFS_PQUOTA and/or XFS_GQUOTA bits.\n"); + return XFS_ERROR(EFSCORRUPTED); + } + /* * Version 5 superblock feature mask validation. Reject combinations the * kernel cannot support up front before checking anything else. For @@ -561,6 +569,18 @@ out_unwind: return error; } +static void +xfs_sb_quota_from_disk(struct xfs_sb *sbp) +{ + if (sbp->sb_qflags & XFS_OQUOTA_ENFD) + sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? + XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD; + if (sbp->sb_qflags & XFS_OQUOTA_CHKD) + sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? + XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; + sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); +} + void xfs_sb_from_disk( struct xfs_sb *to, @@ -622,6 +642,35 @@ xfs_sb_from_disk( to->sb_lsn = be64_to_cpu(from->sb_lsn); } +static inline void +xfs_sb_quota_to_disk( + xfs_dsb_t *to, + xfs_sb_t *from, + __int64_t *fields) +{ + __uint16_t qflags = from->sb_qflags; + + if (*fields & XFS_SB_QFLAGS) { + /* + * The in-core version of sb_qflags do not have + * XFS_OQUOTA_* flags, whereas the on-disk version + * does. So, convert incore XFS_{PG}QUOTA_* flags + * to on-disk XFS_OQUOTA_* flags. + */ + qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | + XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); + + if (from->sb_qflags & + (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) + qflags |= XFS_OQUOTA_ENFD; + if (from->sb_qflags & + (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) + qflags |= XFS_OQUOTA_CHKD; + to->sb_qflags = cpu_to_be16(qflags); + *fields &= ~XFS_SB_QFLAGS; + } +} + /* * Copy in core superblock to ondisk one. * @@ -643,6 +692,7 @@ xfs_sb_to_disk( if (!fields) return; + xfs_sb_quota_to_disk(to, from, &fields); while (fields) { f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); first = xfs_sb_info[f].offset; @@ -835,6 +885,7 @@ reread: */ xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); + xfs_sb_quota_from_disk(&mp->m_sb); /* * We must be able to do sector-sized and sector-aligned IO. */ @@ -987,42 +1038,27 @@ xfs_update_alignment(xfs_mount_t *mp) */ if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || (BBTOB(mp->m_swidth) & mp->m_blockmask)) { - if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_warn(mp, "alignment check failed: " - "(sunit/swidth vs. blocksize)"); - return XFS_ERROR(EINVAL); - } - mp->m_dalign = mp->m_swidth = 0; + xfs_warn(mp, + "alignment check failed: sunit/swidth vs. blocksize(%d)", + sbp->sb_blocksize); + return XFS_ERROR(EINVAL); } else { /* * Convert the stripe unit and width to FSBs. */ mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { - if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_warn(mp, "alignment check failed: " - "(sunit/swidth vs. ag size)"); - return XFS_ERROR(EINVAL); - } xfs_warn(mp, - "stripe alignment turned off: sunit(%d)/swidth(%d) " - "incompatible with agsize(%d)", - mp->m_dalign, mp->m_swidth, - sbp->sb_agblocks); - - mp->m_dalign = 0; - mp->m_swidth = 0; + "alignment check failed: sunit/swidth vs. agsize(%d)", + sbp->sb_agblocks); + return XFS_ERROR(EINVAL); } else if (mp->m_dalign) { mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); } else { - if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_warn(mp, "alignment check failed: " - "sunit(%d) less than bsize(%d)", - mp->m_dalign, - mp->m_blockmask +1); - return XFS_ERROR(EINVAL); - } - mp->m_swidth = 0; + xfs_warn(mp, + "alignment check failed: sunit(%d) less than bsize(%d)", + mp->m_dalign, sbp->sb_blocksize); + return XFS_ERROR(EINVAL); } } @@ -1039,6 +1075,10 @@ xfs_update_alignment(xfs_mount_t *mp) sbp->sb_width = mp->m_swidth; mp->m_update_flags |= XFS_SB_WIDTH; } + } else { + xfs_warn(mp, + "cannot change alignment: superblock does not support data alignment"); + return XFS_ERROR(EINVAL); } } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && xfs_sb_version_hasdalign(&mp->m_sb)) { diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b004cecdfb04..4e374d4a9189 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -192,8 +192,6 @@ typedef struct xfs_mount { xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */ xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */ uint m_chsize; /* size of next field */ - struct xfs_chash *m_chash; /* fs private inode per-cluster - * hash table */ atomic_t m_active_trans; /* number trans frozen */ #ifdef HAVE_PERCPU_SB xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ @@ -229,8 +227,6 @@ typedef struct xfs_mount { operations, typically for disk errors in metadata */ #define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */ -#define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to - user */ #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment allocations */ #define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b75c9bb6e71e..7a3e007b49f4 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -70,7 +70,7 @@ xfs_qm_dquot_walk( void *data) { struct xfs_quotainfo *qi = mp->m_quotainfo; - struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); + struct radix_tree_root *tree = xfs_dquot_tree(qi, type); uint32_t next_index; int last_error = 0; int skipped; @@ -189,7 +189,7 @@ xfs_qm_dqpurge( xfs_dqfunlock(dqp); xfs_dqunlock(dqp); - radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), + radix_tree_delete(xfs_dquot_tree(qi, dqp->q_core.d_flags), be32_to_cpu(dqp->q_core.d_id)); qi->qi_dquots--; @@ -299,8 +299,10 @@ xfs_qm_mount_quotas( */ if (!XFS_IS_UQUOTA_ON(mp)) mp->m_qflags &= ~XFS_UQUOTA_CHKD; - if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) - mp->m_qflags &= ~XFS_OQUOTA_CHKD; + if (!XFS_IS_GQUOTA_ON(mp)) + mp->m_qflags &= ~XFS_GQUOTA_CHKD; + if (!XFS_IS_PQUOTA_ON(mp)) + mp->m_qflags &= ~XFS_PQUOTA_CHKD; write_changes: /* @@ -489,8 +491,7 @@ xfs_qm_need_dqattach( return false; if (!XFS_NOT_DQATTACHED(mp, ip)) return false; - if (ip->i_ino == mp->m_sb.sb_uquotino || - ip->i_ino == mp->m_sb.sb_gquotino) + if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) return false; return true; } @@ -606,8 +607,7 @@ xfs_qm_dqdetach( trace_xfs_dquot_dqdetach(ip); - ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); - ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); + ASSERT(!xfs_is_quota_inode(&ip->i_mount->m_sb, ip->i_ino)); if (ip->i_udquot) { xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; @@ -1152,7 +1152,7 @@ xfs_qm_dqusage_adjust( * rootino must have its resources accounted for, not so with the quota * inodes. */ - if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { + if (xfs_is_quota_inode(&mp->m_sb, ino)) { *res = BULKSTAT_RV_NOTHING; return XFS_ERROR(EINVAL); } @@ -1262,19 +1262,20 @@ int xfs_qm_quotacheck( xfs_mount_t *mp) { - int done, count, error, error2; - xfs_ino_t lastino; - size_t structsz; - xfs_inode_t *uip, *gip; - uint flags; - LIST_HEAD (buffer_list); + int done, count, error, error2; + xfs_ino_t lastino; + size_t structsz; + uint flags; + LIST_HEAD (buffer_list); + struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip; + struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip; count = INT_MAX; structsz = 1; lastino = 0; flags = 0; - ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); + ASSERT(uip || gip); ASSERT(XFS_IS_QUOTA_RUNNING(mp)); xfs_notice(mp, "Quotacheck needed: Please wait."); @@ -1284,7 +1285,6 @@ xfs_qm_quotacheck( * their counters to zero. We need a clean slate. * We don't log our changes till later. */ - uip = mp->m_quotainfo->qi_uquotaip; if (uip) { error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA, &buffer_list); @@ -1293,14 +1293,14 @@ xfs_qm_quotacheck( flags |= XFS_UQUOTA_CHKD; } - gip = mp->m_quotainfo->qi_gquotaip; if (gip) { error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, &buffer_list); if (error) goto error_return; - flags |= XFS_OQUOTA_CHKD; + flags |= XFS_IS_GQUOTA_ON(mp) ? + XFS_GQUOTA_CHKD : XFS_PQUOTA_CHKD; } do { @@ -1395,15 +1395,13 @@ STATIC int xfs_qm_init_quotainos( xfs_mount_t *mp) { - xfs_inode_t *uip, *gip; - int error; - __int64_t sbflags; - uint flags; + struct xfs_inode *uip = NULL; + struct xfs_inode *gip = NULL; + int error; + __int64_t sbflags = 0; + uint flags = 0; ASSERT(mp->m_quotainfo); - uip = gip = NULL; - sbflags = 0; - flags = 0; /* * Get the uquota and gquota inodes @@ -1412,19 +1410,18 @@ xfs_qm_init_quotainos( if (XFS_IS_UQUOTA_ON(mp) && mp->m_sb.sb_uquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_uquotino > 0); - if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip))) + error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, + 0, 0, &uip); + if (error) return XFS_ERROR(error); } if (XFS_IS_OQUOTA_ON(mp) && mp->m_sb.sb_gquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_gquotino > 0); - if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip))) { - if (uip) - IRELE(uip); - return XFS_ERROR(error); - } + error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, + 0, 0, &gip); + if (error) + goto error_rele; } } else { flags |= XFS_QMOPT_SBVERSION; @@ -1439,10 +1436,11 @@ xfs_qm_init_quotainos( * temporarily switch to read-write to do this. */ if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { - if ((error = xfs_qm_qino_alloc(mp, &uip, + error = xfs_qm_qino_alloc(mp, &uip, sbflags | XFS_SB_UQUOTINO, - flags | XFS_QMOPT_UQUOTA))) - return XFS_ERROR(error); + flags | XFS_QMOPT_UQUOTA); + if (error) + goto error_rele; flags &= ~XFS_QMOPT_SBVERSION; } @@ -1451,18 +1449,21 @@ xfs_qm_init_quotainos( XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); error = xfs_qm_qino_alloc(mp, &gip, sbflags | XFS_SB_GQUOTINO, flags); - if (error) { - if (uip) - IRELE(uip); - - return XFS_ERROR(error); - } + if (error) + goto error_rele; } mp->m_quotainfo->qi_uquotaip = uip; mp->m_quotainfo->qi_gquotaip = gip; return 0; + +error_rele: + if (uip) + IRELE(uip); + if (gip) + IRELE(gip); + return XFS_ERROR(error); } STATIC void @@ -1473,7 +1474,7 @@ xfs_qm_dqfree_one( struct xfs_quotainfo *qi = mp->m_quotainfo; mutex_lock(&qi->qi_tree_lock); - radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), + radix_tree_delete(xfs_dquot_tree(qi, dqp->q_core.d_flags), be32_to_cpu(dqp->q_core.d_id)); qi->qi_dquots--; @@ -1659,7 +1660,8 @@ xfs_qm_vop_dqalloc( struct xfs_dquot **O_gdqpp) { struct xfs_mount *mp = ip->i_mount; - struct xfs_dquot *uq, *gq; + struct xfs_dquot *uq = NULL; + struct xfs_dquot *gq = NULL; int error; uint lockflags; @@ -1684,7 +1686,6 @@ xfs_qm_vop_dqalloc( } } - uq = gq = NULL; if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { if (ip->i_d.di_uid != uid) { /* @@ -1697,11 +1698,12 @@ xfs_qm_vop_dqalloc( * holding ilock. */ xfs_iunlock(ip, lockflags); - if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, + error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, XFS_DQ_USER, XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, - &uq))) { + &uq); + if (error) { ASSERT(error != ENOENT); return error; } @@ -1723,15 +1725,14 @@ xfs_qm_vop_dqalloc( if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { if (ip->i_d.di_gid != gid) { xfs_iunlock(ip, lockflags); - if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, + error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, XFS_DQ_GROUP, XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, - &gq))) { - if (uq) - xfs_qm_dqrele(uq); + &gq); + if (error) { ASSERT(error != ENOENT); - return error; + goto error_rele; } xfs_dqunlock(gq); lockflags = XFS_ILOCK_SHARED; @@ -1743,15 +1744,14 @@ xfs_qm_vop_dqalloc( } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { if (xfs_get_projid(ip) != prid) { xfs_iunlock(ip, lockflags); - if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, + error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, XFS_DQ_PROJ, XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, - &gq))) { - if (uq) - xfs_qm_dqrele(uq); + &gq); + if (error) { ASSERT(error != ENOENT); - return (error); + goto error_rele; } xfs_dqunlock(gq); lockflags = XFS_ILOCK_SHARED; @@ -1774,6 +1774,11 @@ xfs_qm_vop_dqalloc( else if (gq) xfs_qm_dqrele(gq); return 0; + +error_rele: + if (uq) + xfs_qm_dqrele(uq); + return error; } /* @@ -1821,29 +1826,31 @@ xfs_qm_vop_chown( */ int xfs_qm_vop_chown_reserve( - xfs_trans_t *tp, - xfs_inode_t *ip, - xfs_dquot_t *udqp, - xfs_dquot_t *gdqp, - uint flags) + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_dquot *udqp, + struct xfs_dquot *gdqp, + uint flags) { - xfs_mount_t *mp = ip->i_mount; - uint delblks, blkflags, prjflags = 0; - xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; - int error; + struct xfs_mount *mp = ip->i_mount; + uint delblks, blkflags, prjflags = 0; + struct xfs_dquot *udq_unres = NULL; + struct xfs_dquot *gdq_unres = NULL; + struct xfs_dquot *udq_delblks = NULL; + struct xfs_dquot *gdq_delblks = NULL; + int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(XFS_IS_QUOTA_RUNNING(mp)); delblks = ip->i_delayed_blks; - delblksudq = delblksgdq = unresudq = unresgdq = NULL; blkflags = XFS_IS_REALTIME_INODE(ip) ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; if (XFS_IS_UQUOTA_ON(mp) && udqp && ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { - delblksudq = udqp; + udq_delblks = udqp; /* * If there are delayed allocation blocks, then we have to * unreserve those from the old dquot, and add them to the @@ -1851,7 +1858,7 @@ xfs_qm_vop_chown_reserve( */ if (delblks) { ASSERT(ip->i_udquot); - unresudq = ip->i_udquot; + udq_unres = ip->i_udquot; } } if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { @@ -1862,18 +1869,19 @@ xfs_qm_vop_chown_reserve( if (prjflags || (XFS_IS_GQUOTA_ON(ip->i_mount) && ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { - delblksgdq = gdqp; + gdq_delblks = gdqp; if (delblks) { ASSERT(ip->i_gdquot); - unresgdq = ip->i_gdquot; + gdq_unres = ip->i_gdquot; } } } - if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, - delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, - flags | blkflags | prjflags))) - return (error); + error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, + udq_delblks, gdq_delblks, ip->i_d.di_nblocks, 1, + flags | blkflags | prjflags); + if (error) + return error; /* * Do the delayed blks reservations/unreservations now. Since, these @@ -1885,14 +1893,15 @@ xfs_qm_vop_chown_reserve( /* * Do the reservations first. Unreservation can't fail. */ - ASSERT(delblksudq || delblksgdq); - ASSERT(unresudq || unresgdq); - if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, - delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, - flags | blkflags | prjflags))) - return (error); + ASSERT(udq_delblks || gdq_delblks); + ASSERT(udq_unres || gdq_unres); + error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, + udq_delblks, gdq_delblks, (xfs_qcnt_t)delblks, 0, + flags | blkflags | prjflags); + if (error) + return error; xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, - unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, + udq_unres, gdq_unres, -((xfs_qcnt_t)delblks), 0, blkflags); } diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 5d16a6e6900f..bdb4f8b95207 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -69,30 +69,62 @@ typedef struct xfs_quotainfo { struct shrinker qi_shrinker; } xfs_quotainfo_t; -#define XFS_DQUOT_TREE(qi, type) \ - ((type & XFS_DQ_USER) ? \ - &((qi)->qi_uquota_tree) : \ - &((qi)->qi_gquota_tree)) +static inline struct radix_tree_root * +xfs_dquot_tree( + struct xfs_quotainfo *qi, + int type) +{ + switch (type) { + case XFS_DQ_USER: + return &qi->qi_uquota_tree; + case XFS_DQ_GROUP: + case XFS_DQ_PROJ: + return &qi->qi_gquota_tree; + default: + ASSERT(0); + } + return NULL; +} +static inline struct xfs_inode * +xfs_dq_to_quota_inode(struct xfs_dquot *dqp) +{ + switch (dqp->dq_flags & XFS_DQ_ALLTYPES) { + case XFS_DQ_USER: + return dqp->q_mount->m_quotainfo->qi_uquotaip; + case XFS_DQ_GROUP: + case XFS_DQ_PROJ: + return dqp->q_mount->m_quotainfo->qi_gquotaip; + default: + ASSERT(0); + } + return NULL; +} extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp, unsigned int nbblks); -extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); -extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, - xfs_dquot_t *, xfs_dquot_t *, long, long, uint); -extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); -extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); +extern void xfs_trans_mod_dquot(struct xfs_trans *, + struct xfs_dquot *, uint, long); +extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, + struct xfs_mount *, struct xfs_dquot *, + struct xfs_dquot *, long, long, uint); +extern void xfs_trans_dqjoin(struct xfs_trans *, struct xfs_dquot *); +extern void xfs_trans_log_dquot(struct xfs_trans *, struct xfs_dquot *); /* * We keep the usr and grp dquots separately so that locking will be easier * to do at commit time. All transactions that we know of at this point * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. */ +enum { + XFS_QM_TRANS_USR = 0, + XFS_QM_TRANS_GRP, + XFS_QM_TRANS_DQTYPES +}; #define XFS_QM_TRANS_MAXDQS 2 -typedef struct xfs_dquot_acct { - xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; - xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; -} xfs_dquot_acct_t; +struct xfs_dquot_acct { + struct xfs_dqtrx dqs[XFS_QM_TRANS_DQTYPES][XFS_QM_TRANS_MAXDQS]; +}; /* * Users are allowed to have a usage exceeding their softlimit for @@ -106,22 +138,23 @@ typedef struct xfs_dquot_acct { #define XFS_QM_IWARNLIMIT 5 #define XFS_QM_RTBWARNLIMIT 5 -extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); -extern int xfs_qm_quotacheck(xfs_mount_t *); -extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); +extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); +extern int xfs_qm_quotacheck(struct xfs_mount *); +extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t); /* dquot stuff */ -extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint); -extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); +extern void xfs_qm_dqpurge_all(struct xfs_mount *, uint); +extern void xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint); /* quota ops */ -extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); -extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, - fs_disk_quota_t *); +extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); +extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t, + uint, struct fs_disk_quota *); extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, - fs_disk_quota_t *); -extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); -extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); -extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); + struct fs_disk_quota *); +extern int xfs_qm_scall_getqstat(struct xfs_mount *, + struct fs_quota_stat *); +extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint); +extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint); #endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 6cdf6ffc36a1..a08801ae24e2 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -117,11 +117,11 @@ xfs_qm_scall_quotaoff( } if (flags & XFS_GQUOTA_ACCT) { dqtype |= XFS_QMOPT_GQUOTA; - flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); + flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD); inactivate_flags |= XFS_GQUOTA_ACTIVE; } else if (flags & XFS_PQUOTA_ACCT) { dqtype |= XFS_QMOPT_PQUOTA; - flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); + flags |= (XFS_PQUOTA_CHKD | XFS_PQUOTA_ENFD); inactivate_flags |= XFS_PQUOTA_ACTIVE; } @@ -335,14 +335,14 @@ xfs_qm_scall_quotaon( * quota acct on ondisk without m_qflags' knowing. */ if (((flags & XFS_UQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && - (flags & XFS_UQUOTA_ENFD)) - || + (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && + (flags & XFS_UQUOTA_ENFD)) || + ((flags & XFS_GQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && + (flags & XFS_GQUOTA_ENFD)) || ((flags & XFS_PQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && - (flags & XFS_GQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && - (flags & XFS_OQUOTA_ENFD))) { + (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && + (flags & XFS_PQUOTA_ENFD))) { xfs_debug(mp, "%s: Can't enforce without acct, flags=%x sbflags=%x\n", __func__, flags, mp->m_sb.sb_qflags); @@ -407,11 +407,11 @@ xfs_qm_scall_getqstat( struct fs_quota_stat *out) { struct xfs_quotainfo *q = mp->m_quotainfo; - struct xfs_inode *uip, *gip; - bool tempuqip, tempgqip; + struct xfs_inode *uip = NULL; + struct xfs_inode *gip = NULL; + bool tempuqip = false; + bool tempgqip = false; - uip = gip = NULL; - tempuqip = tempgqip = false; memset(out, 0, sizeof(fs_quota_stat_t)); out->qs_version = FS_QSTAT_VERSION; @@ -776,9 +776,12 @@ xfs_qm_scall_getquota( * gets turned off. No need to confuse the user level code, * so return zeroes in that case. */ - if ((!XFS_IS_UQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_USER) || - (!XFS_IS_OQUOTA_ENFORCED(mp) && - (dqp->q_core.d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { + if ((!XFS_IS_UQUOTA_ENFORCED(mp) && + dqp->q_core.d_flags == XFS_DQ_USER) || + (!XFS_IS_GQUOTA_ENFORCED(mp) && + dqp->q_core.d_flags == XFS_DQ_GROUP) || + (!XFS_IS_PQUOTA_ENFORCED(mp) && + dqp->q_core.d_flags == XFS_DQ_PROJ)) { dst->d_btimer = 0; dst->d_itimer = 0; dst->d_rtbtimer = 0; @@ -786,8 +789,8 @@ xfs_qm_scall_getquota( #ifdef DEBUG if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || - (XFS_IS_OQUOTA_ENFORCED(mp) && - (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && + (XFS_IS_GQUOTA_ENFORCED(mp) && dst->d_flags == FS_GROUP_QUOTA) || + (XFS_IS_PQUOTA_ENFORCED(mp) && dst->d_flags == FS_PROJ_QUOTA)) && dst->d_id != 0) { if ((dst->d_bcount > dst->d_blk_softlimit) && (dst->d_blk_softlimit > 0)) { @@ -833,16 +836,16 @@ xfs_qm_export_flags( uflags = 0; if (flags & XFS_UQUOTA_ACCT) uflags |= FS_QUOTA_UDQ_ACCT; - if (flags & XFS_PQUOTA_ACCT) - uflags |= FS_QUOTA_PDQ_ACCT; if (flags & XFS_GQUOTA_ACCT) uflags |= FS_QUOTA_GDQ_ACCT; + if (flags & XFS_PQUOTA_ACCT) + uflags |= FS_QUOTA_PDQ_ACCT; if (flags & XFS_UQUOTA_ENFD) uflags |= FS_QUOTA_UDQ_ENFD; - if (flags & (XFS_OQUOTA_ENFD)) { - uflags |= (flags & XFS_GQUOTA_ACCT) ? - FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD; - } + if (flags & XFS_GQUOTA_ENFD) + uflags |= FS_QUOTA_GDQ_ENFD; + if (flags & XFS_PQUOTA_ENFD) + uflags |= FS_QUOTA_PDQ_ENFD; return (uflags); } diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index c38068f26c55..c3483bab9cde 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -161,30 +161,42 @@ typedef struct xfs_qoff_logformat { #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ /* + * Conversion to and from the combined OQUOTA flag (if necessary) + * is done only in xfs_sb_qflags_to_disk() and xfs_sb_qflags_from_disk() + */ +#define XFS_GQUOTA_ENFD 0x0080 /* group quota limits enforced */ +#define XFS_GQUOTA_CHKD 0x0100 /* quotacheck run on group quotas */ +#define XFS_PQUOTA_ENFD 0x0200 /* project quota limits enforced */ +#define XFS_PQUOTA_CHKD 0x0400 /* quotacheck run on project quotas */ + +/* * Quota Accounting/Enforcement flags */ #define XFS_ALL_QUOTA_ACCT \ (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) -#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) -#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) +#define XFS_ALL_QUOTA_ENFD \ + (XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_ENFD) +#define XFS_ALL_QUOTA_CHKD \ + (XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD | XFS_PQUOTA_CHKD) #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) #define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) -#define XFS_IS_OQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_OQUOTA_ENFD) +#define XFS_IS_GQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_GQUOTA_ENFD) +#define XFS_IS_PQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_PQUOTA_ENFD) /* * Incore only flags for quotaoff - these bits get cleared when quota(s) * are in the process of getting turned off. These flags are in m_qflags but * never in sb_qflags. */ -#define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ -#define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ -#define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ +#define XFS_UQUOTA_ACTIVE 0x1000 /* uquotas are being turned off */ +#define XFS_GQUOTA_ACTIVE 0x2000 /* gquotas are being turned off */ +#define XFS_PQUOTA_ACTIVE 0x4000 /* pquotas are being turned off */ #define XFS_ALL_QUOTA_ACTIVE \ - (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) + (XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE) /* * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees @@ -268,24 +280,23 @@ typedef struct xfs_qoff_logformat { ((XFS_IS_UQUOTA_ON(mp) && \ (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ (XFS_IS_GQUOTA_ON(mp) && \ - ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ - (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \ + (mp->m_sb.sb_qflags & XFS_GQUOTA_CHKD) == 0) || \ (XFS_IS_PQUOTA_ON(mp) && \ - ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ - (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT)))) + (mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD) == 0)) #define XFS_MOUNT_QUOTA_SET1 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ - XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ - XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) + XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ + XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD) #define XFS_MOUNT_QUOTA_SET2 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ - XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ - XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) + XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ + XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD) #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ - XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ - XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\ - XFS_GQUOTA_ACCT) + XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ + XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\ + XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD|\ + XFS_PQUOTA_CHKD) /* diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 71926d630527..20e30f93b0c7 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -75,8 +75,10 @@ xfs_fs_set_xstate( flags |= XFS_GQUOTA_ACCT; if (uflags & FS_QUOTA_UDQ_ENFD) flags |= XFS_UQUOTA_ENFD; - if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD)) - flags |= XFS_OQUOTA_ENFD; + if (uflags & FS_QUOTA_GDQ_ENFD) + flags |= XFS_GQUOTA_ENFD; + if (uflags & FS_QUOTA_PDQ_ENFD) + flags |= XFS_PQUOTA_ENFD; switch (op) { case Q_XQUOTAON: diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 2de58a85833c..78f9e70b80c7 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -618,6 +618,12 @@ xfs_sb_has_incompat_log_feature( return (sbp->sb_features_log_incompat & feature) != 0; } +static inline bool +xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) +{ + return (ino == sbp->sb_uquotino || ino == sbp->sb_gquotino); +} + /* * end of superblock version macros */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3033ba5e9762..1d68ffcdeaa7 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -51,6 +51,7 @@ #include "xfs_inode_item.h" #include "xfs_icache.h" #include "xfs_trace.h" +#include "xfs_icreate_item.h" #include <linux/namei.h> #include <linux/init.h> @@ -359,17 +360,17 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_PQUOTA) || !strcmp(this_char, MNTOPT_PRJQUOTA)) { mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | - XFS_OQUOTA_ENFD); + XFS_PQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_OQUOTA_ENFD; + mp->m_qflags &= ~XFS_PQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_GQUOTA) || !strcmp(this_char, MNTOPT_GRPQUOTA)) { mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | - XFS_OQUOTA_ENFD); + XFS_GQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_OQUOTA_ENFD; + mp->m_qflags &= ~XFS_GQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { xfs_warn(mp, "delaylog is the default now, option is deprecated."); @@ -439,20 +440,15 @@ xfs_parseargs( } done: - if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { + if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) { /* * At this point the superblock has not been read * in, therefore we do not know the block size. * Before the mount call ends we will convert * these to FSBs. */ - if (dsunit) { - mp->m_dalign = dsunit; - mp->m_flags |= XFS_MOUNT_RETERR; - } - - if (dswidth) - mp->m_swidth = dswidth; + mp->m_dalign = dsunit; + mp->m_swidth = dswidth; } if (mp->m_logbufs != -1 && @@ -563,12 +559,12 @@ xfs_showargs( /* Either project or group quotas can be active, not both */ if (mp->m_qflags & XFS_PQUOTA_ACCT) { - if (mp->m_qflags & XFS_OQUOTA_ENFD) + if (mp->m_qflags & XFS_PQUOTA_ENFD) seq_puts(m, "," MNTOPT_PRJQUOTA); else seq_puts(m, "," MNTOPT_PQUOTANOENF); } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { - if (mp->m_qflags & XFS_OQUOTA_ENFD) + if (mp->m_qflags & XFS_GQUOTA_ENFD) seq_puts(m, "," MNTOPT_GRPQUOTA); else seq_puts(m, "," MNTOPT_GQUOTANOENF); @@ -1136,8 +1132,8 @@ xfs_fs_statfs( spin_unlock(&mp->m_sb_lock); if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && - ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == - (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) + ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == + (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) xfs_qm_statvfs(ip, statp); return 0; } @@ -1481,6 +1477,10 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); + /* version 5 superblocks support inode version counters. */ + if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) + sb->s_flags |= MS_I_VERSION; + error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; @@ -1655,9 +1655,15 @@ xfs_init_zones(void) KM_ZONE_SPREAD, NULL); if (!xfs_ili_zone) goto out_destroy_inode_zone; + xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item), + "xfs_icr"); + if (!xfs_icreate_zone) + goto out_destroy_ili_zone; return 0; + out_destroy_ili_zone: + kmem_zone_destroy(xfs_ili_zone); out_destroy_inode_zone: kmem_zone_destroy(xfs_inode_zone); out_destroy_efi_zone: @@ -1696,6 +1702,7 @@ xfs_destroy_zones(void) * destroy caches. */ rcu_barrier(); + kmem_zone_destroy(xfs_icreate_zone); kmem_zone_destroy(xfs_ili_zone); kmem_zone_destroy(xfs_inode_zone); kmem_zone_destroy(xfs_efi_zone); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 195a403e1522..e830fb56e27f 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -358,7 +358,8 @@ xfs_symlink( int n; xfs_buf_t *bp; prid_t prid; - struct xfs_dquot *udqp, *gdqp; + struct xfs_dquot *udqp = NULL; + struct xfs_dquot *gdqp = NULL; uint resblks; *ipp = NULL; @@ -585,7 +586,7 @@ xfs_symlink( /* * Free a symlink that has blocks associated with it. */ -int +STATIC int xfs_inactive_symlink_rmt( xfs_inode_t *ip, xfs_trans_t **tpp) @@ -606,7 +607,7 @@ xfs_inactive_symlink_rmt( tp = *tpp; mp = ip->i_mount; - ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); + ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS); /* * We're freeing a symlink that has some * blocks allocated to it. Free the @@ -720,3 +721,47 @@ xfs_inactive_symlink_rmt( error0: return error; } + +/* + * xfs_inactive_symlink - free a symlink + */ +int +xfs_inactive_symlink( + struct xfs_inode *ip, + struct xfs_trans **tp) +{ + struct xfs_mount *mp = ip->i_mount; + int pathlen; + + trace_xfs_inactive_symlink(ip); + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + /* + * Zero length symlinks _can_ exist. + */ + pathlen = (int)ip->i_d.di_size; + if (!pathlen) + return 0; + + if (pathlen < 0 || pathlen > MAXPATHLEN) { + xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)", + __func__, (unsigned long long)ip->i_ino, pathlen); + ASSERT(0); + return XFS_ERROR(EFSCORRUPTED); + } + + if (ip->i_df.if_flags & XFS_IFINLINE) { + if (ip->i_df.if_bytes > 0) + xfs_idata_realloc(ip, -(ip->i_df.if_bytes), + XFS_DATA_FORK); + ASSERT(ip->i_df.if_bytes == 0); + return 0; + } + + /* remove the remote symlink */ + return xfs_inactive_symlink_rmt(ip, tp); +} diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index b39398d2097c..374394880c01 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h @@ -60,7 +60,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, umode_t mode, struct xfs_inode **ipp); int xfs_readlink(struct xfs_inode *ip, char *link); -int xfs_inactive_symlink_rmt(struct xfs_inode *ip, struct xfs_trans **tpp); +int xfs_inactive_symlink(struct xfs_inode *ip, struct xfs_trans **tpp); #endif /* __KERNEL__ */ #endif /* __XFS_SYMLINK_H */ diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 2801b5ce6cdb..1743b9f8e23d 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -25,11 +25,11 @@ static struct ctl_table_header *xfs_table_header; #ifdef CONFIG_PROC_FS STATIC int xfs_stats_clear_proc_handler( - ctl_table *ctl, - int write, - void __user *buffer, - size_t *lenp, - loff_t *ppos) + struct ctl_table *ctl, + int write, + void __user *buffer, + size_t *lenp, + loff_t *ppos) { int c, ret, *valp = ctl->data; __uint32_t vn_active; @@ -55,11 +55,11 @@ xfs_stats_clear_proc_handler( STATIC int xfs_panic_mask_proc_handler( - ctl_table *ctl, - int write, - void __user *buffer, - size_t *lenp, - loff_t *ppos) + struct ctl_table *ctl, + int write, + void __user *buffer, + size_t *lenp, + loff_t *ppos) { int ret, *valp = ctl->data; @@ -74,7 +74,7 @@ xfs_panic_mask_proc_handler( } #endif /* CONFIG_PROC_FS */ -static ctl_table xfs_table[] = { +static struct ctl_table xfs_table[] = { { .procname = "irix_sgid_inherit", .data = &xfs_params.sgid_inherit.val, @@ -227,7 +227,7 @@ static ctl_table xfs_table[] = { {} }; -static ctl_table xfs_dir_table[] = { +static struct ctl_table xfs_dir_table[] = { { .procname = "xfs", .mode = 0555, @@ -236,7 +236,7 @@ static ctl_table xfs_dir_table[] = { {} }; -static ctl_table xfs_root_table[] = { +static struct ctl_table xfs_root_table[] = { { .procname = "fs", .mode = 0555, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a04701de6bbd..47910e638c18 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -486,9 +486,12 @@ DEFINE_EVENT(xfs_buf_item_class, name, \ TP_PROTO(struct xfs_buf_log_item *bip), \ TP_ARGS(bip)) DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_ordered); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_ordered); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); @@ -508,6 +511,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin); DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); +DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered); DECLARE_EVENT_CLASS(xfs_lock_class, TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, @@ -571,6 +575,7 @@ DEFINE_INODE_EVENT(xfs_iget_miss); DEFINE_INODE_EVENT(xfs_getattr); DEFINE_INODE_EVENT(xfs_setattr); DEFINE_INODE_EVENT(xfs_readlink); +DEFINE_INODE_EVENT(xfs_inactive_symlink); DEFINE_INODE_EVENT(xfs_alloc_file_space); DEFINE_INODE_EVENT(xfs_free_file_space); DEFINE_INODE_EVENT(xfs_readdir); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 2fd7c1ff1d21..35a229981354 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -234,71 +234,93 @@ xfs_calc_remove_reservation( } /* - * For symlink we can modify: + * For create, break it in to the two cases that the transaction + * covers. We start with the modify case - allocation done by modification + * of the state of existing inodes - and the allocation case. + */ + +/* + * For create we can modify: * the parent directory inode: inode size * the new inode: inode size - * the inode btree entry: 1 block + * the inode btree entry: block size + * the superblock for the nlink flag: sector size * the directory btree: (max depth + v2) * dir block size * the directory inode's bmap btree: (max depth + v2) * block size - * the blocks for the symlink: 1 kB - * Or in the first xact we allocate some inodes giving: + */ +STATIC uint +xfs_calc_create_resv_modify( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + (uint)XFS_FSB_TO_B(mp, 1) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); +} + +/* + * For create we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize + * the superblock for the nlink flag: sector size * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (2 * max depth - 1) * block size + * the allocation btrees: 2 trees * (max depth - 1) * block size */ STATIC uint -xfs_calc_symlink_reservation( +xfs_calc_create_resv_alloc( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + mp->m_sb.sb_sectsize + + xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); +} + +STATIC uint +__xfs_calc_create_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + - xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(1, 1024)), - (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(mp->m_in_maxlevels, - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)))); + MAX(xfs_calc_create_resv_alloc(mp), + xfs_calc_create_resv_modify(mp)); } /* - * For create we can modify: - * the parent directory inode: inode size - * the new inode: inode size - * the inode btree entry: block size - * the superblock for the nlink flag: sector size - * the directory btree: (max depth + v2) * dir block size - * the directory inode's bmap btree: (max depth + v2) * block size - * Or in the first xact we allocate some inodes giving: + * For icreate we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size - * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size */ STATIC uint -xfs_calc_create_reservation( +xfs_calc_icreate_resv_alloc( struct xfs_mount *mp) { + return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + mp->m_sb.sb_sectsize + + xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), + XFS_FSB_TO_B(mp, 1)); +} + +STATIC uint +xfs_calc_icreate_reservation(xfs_mount_t *mp) +{ return XFS_DQUOT_LOGRES(mp) + - MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - (uint)XFS_FSB_TO_B(mp, 1) + - xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), - XFS_FSB_TO_B(mp, 1))), - (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - mp->m_sb.sb_sectsize + - xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(mp->m_in_maxlevels, - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), - XFS_FSB_TO_B(mp, 1)))); + MAX(xfs_calc_icreate_resv_alloc(mp), + xfs_calc_create_resv_modify(mp)); +} + +STATIC uint +xfs_calc_create_reservation( + struct xfs_mount *mp) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) + return xfs_calc_icreate_reservation(mp); + return __xfs_calc_create_reservation(mp); + } /* @@ -311,6 +333,20 @@ xfs_calc_mkdir_reservation( return xfs_calc_create_reservation(mp); } + +/* + * Making a new symplink is the same as creating a new file, but + * with the added blocks for remote symlink data which can be up to 1kB in + * length (MAXPATHLEN). + */ +STATIC uint +xfs_calc_symlink_reservation( + struct xfs_mount *mp) +{ + return xfs_calc_create_reservation(mp) + + xfs_calc_buf_res(1, MAXPATHLEN); +} + /* * In freeing an inode we can modify: * the inode being freed: inode size diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index a44dba5b2cdb..2b4946393e30 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -48,6 +48,7 @@ typedef struct xfs_trans_header { #define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */ #define XFS_LI_DQUOT 0x123d #define XFS_LI_QUOTAOFF 0x123e +#define XFS_LI_ICREATE 0x123f #define XFS_LI_TYPE_DESC \ { XFS_LI_EFI, "XFS_LI_EFI" }, \ @@ -107,7 +108,8 @@ typedef struct xfs_trans_header { #define XFS_TRANS_SWAPEXT 40 #define XFS_TRANS_SB_COUNT 41 #define XFS_TRANS_CHECKPOINT 42 -#define XFS_TRANS_TYPE_MAX 42 +#define XFS_TRANS_ICREATE 43 +#define XFS_TRANS_TYPE_MAX 43 /* new transaction types need to be reflected in xfs_logprint(8) */ #define XFS_TRANS_TYPES \ @@ -210,23 +212,18 @@ struct xfs_log_item_desc { /* * Per-extent log reservation for the allocation btree changes * involved in freeing or allocating an extent. - * 2 trees * (2 blocks/level * max depth - 1) * block size + * 2 trees * (2 blocks/level * max depth - 1) */ -#define XFS_ALLOCFREE_LOG_RES(mp,nx) \ - ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1))) #define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \ ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1))) /* * Per-directory log reservation for any directory change. - * dir blocks: (1 btree block per level + data block + free block) * dblock size - * bmap btree: (levels + 2) * max depth * block size + * dir blocks: (1 btree block per level + data block + free block) + * bmap btree: (levels + 2) * max depth * v2 directory blocks can be fragmented below the dirblksize down to the fsb * size, so account for that in the DAENTER macros. */ -#define XFS_DIROP_LOG_RES(mp) \ - (XFS_FSB_TO_B(mp, XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK)) + \ - (XFS_FSB_TO_B(mp, XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1))) #define XFS_DIROP_LOG_COUNT(mp) \ (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) @@ -503,6 +500,7 @@ void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); +void xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 73a5fa457e16..aa5a04b844d6 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -397,7 +397,6 @@ shutdown_abort: return XFS_ERROR(EIO); } - /* * Release the buffer bp which was previously acquired with one of the * xfs_trans_... buffer allocation routines if the buffer has not @@ -603,8 +602,14 @@ xfs_trans_log_buf(xfs_trans_t *tp, tp->t_flags |= XFS_TRANS_DIRTY; bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; - bip->bli_flags |= XFS_BLI_LOGGED; - xfs_buf_item_log(bip, first, last); + + /* + * If we have an ordered buffer we are not logging any dirty range but + * it still needs to be marked dirty and that it has been logged. + */ + bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED; + if (!(bip->bli_flags & XFS_BLI_ORDERED)) + xfs_buf_item_log(bip, first, last); } @@ -757,6 +762,29 @@ xfs_trans_inode_alloc_buf( } /* + * Mark the buffer as ordered for this transaction. This means + * that the contents of the buffer are not recorded in the transaction + * but it is tracked in the AIL as though it was. This allows us + * to record logical changes in transactions rather than the physical + * changes we make to the buffer without changing writeback ordering + * constraints of metadata buffers. + */ +void +xfs_trans_ordered_buf( + struct xfs_trans *tp, + struct xfs_buf *bp) +{ + struct xfs_buf_log_item *bip = bp->b_fspriv; + + ASSERT(bp->b_transp == tp); + ASSERT(bip != NULL); + ASSERT(atomic_read(&bip->bli_refcount) > 0); + + bip->bli_flags |= XFS_BLI_ORDERED; + trace_xfs_buf_item_ordered(bip); +} + +/* * Set the type of the buffer for log recovery so that it can correctly identify * and hence attach the correct buffer ops to the buffer after replay. */ diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index fec75d023703..3ba64d540168 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -103,8 +103,6 @@ xfs_trans_dup_dqinfo( return; xfs_trans_alloc_dqinfo(ntp); - oqa = otp->t_dqinfo->dqa_usrdquots; - nqa = ntp->t_dqinfo->dqa_usrdquots; /* * Because the quota blk reservation is carried forward, @@ -113,7 +111,9 @@ xfs_trans_dup_dqinfo( if(otp->t_flags & XFS_TRANS_DQ_DIRTY) ntp->t_flags |= XFS_TRANS_DQ_DIRTY; - for (j = 0; j < 2; j++) { + for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { + oqa = otp->t_dqinfo->dqs[j]; + nqa = ntp->t_dqinfo->dqs[j]; for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { if (oqa[i].qt_dquot == NULL) break; @@ -138,8 +138,6 @@ xfs_trans_dup_dqinfo( oq->qt_ino_res = oq->qt_ino_res_used; } - oqa = otp->t_dqinfo->dqa_grpdquots; - nqa = ntp->t_dqinfo->dqa_grpdquots; } } @@ -157,8 +155,7 @@ xfs_trans_mod_dquot_byino( if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp) || - ip->i_ino == mp->m_sb.sb_uquotino || - ip->i_ino == mp->m_sb.sb_gquotino) + xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) return; if (tp->t_dqinfo == NULL) @@ -170,16 +167,18 @@ xfs_trans_mod_dquot_byino( (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); } -STATIC xfs_dqtrx_t * +STATIC struct xfs_dqtrx * xfs_trans_get_dqtrx( - xfs_trans_t *tp, - xfs_dquot_t *dqp) + struct xfs_trans *tp, + struct xfs_dquot *dqp) { - int i; - xfs_dqtrx_t *qa; + int i; + struct xfs_dqtrx *qa; - qa = XFS_QM_ISUDQ(dqp) ? - tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots; + if (XFS_QM_ISUDQ(dqp)) + qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_USR]; + else + qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_GRP]; for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { if (qa[i].qt_dquot == NULL || @@ -339,12 +338,10 @@ xfs_trans_apply_dquot_deltas( return; ASSERT(tp->t_dqinfo); - qa = tp->t_dqinfo->dqa_usrdquots; - for (j = 0; j < 2; j++) { - if (qa[0].qt_dquot == NULL) { - qa = tp->t_dqinfo->dqa_grpdquots; + for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { + qa = tp->t_dqinfo->dqs[j]; + if (qa[0].qt_dquot == NULL) continue; - } /* * Lock all of the dquots and join them to the transaction. @@ -495,10 +492,6 @@ xfs_trans_apply_dquot_deltas( ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount)); } - /* - * Do the group quotas next - */ - qa = tp->t_dqinfo->dqa_grpdquots; } } @@ -521,9 +514,9 @@ xfs_trans_unreserve_and_mod_dquots( if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) return; - qa = tp->t_dqinfo->dqa_usrdquots; + for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { + qa = tp->t_dqinfo->dqs[j]; - for (j = 0; j < 2; j++) { for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { qtrx = &qa[i]; /* @@ -565,7 +558,6 @@ xfs_trans_unreserve_and_mod_dquots( xfs_dqunlock(dqp); } - qa = tp->t_dqinfo->dqa_grpdquots; } } @@ -640,8 +632,8 @@ xfs_trans_dqresv( if ((flags & XFS_QMOPT_FORCE_RES) == 0 && dqp->q_core.d_id && ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || - (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && - (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { + (XFS_IS_GQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISGDQ(dqp)) || + (XFS_IS_PQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISPDQ(dqp)))) { if (nblks > 0) { /* * dquot is locked already. See if we'd go over the @@ -748,15 +740,15 @@ error_return: */ int xfs_trans_reserve_quota_bydquots( - xfs_trans_t *tp, - xfs_mount_t *mp, - xfs_dquot_t *udqp, - xfs_dquot_t *gdqp, - long nblks, - long ninos, - uint flags) + struct xfs_trans *tp, + struct xfs_mount *mp, + struct xfs_dquot *udqp, + struct xfs_dquot *gdqp, + long nblks, + long ninos, + uint flags) { - int resvd = 0, error; + int error; if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; @@ -771,28 +763,24 @@ xfs_trans_reserve_quota_bydquots( (flags & ~XFS_QMOPT_ENOSPC)); if (error) return error; - resvd = 1; } if (gdqp) { error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); - if (error) { - /* - * can't do it, so backout previous reservation - */ - if (resvd) { - flags |= XFS_QMOPT_FORCE_RES; - xfs_trans_dqresv(tp, mp, udqp, - -nblks, -ninos, flags); - } - return error; - } + if (error) + goto unwind_usr; } /* * Didn't change anything critical, so, no need to log */ return 0; + +unwind_usr: + flags |= XFS_QMOPT_FORCE_RES; + if (udqp) + xfs_trans_dqresv(tp, mp, udqp, -nblks, -ninos, flags); + return error; } @@ -816,8 +804,7 @@ xfs_trans_reserve_quota_nblks( if (XFS_IS_PQUOTA_ON(mp)) flags |= XFS_QMOPT_ENOSPC; - ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); - ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); + ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index ac6d567704db..53dfe46f3680 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -112,6 +112,17 @@ xfs_trans_log_inode( ASSERT(ip->i_itemp != NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + /* + * First time we log the inode in a transaction, bump the inode change + * counter if it is configured for this to occur. + */ + if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && + IS_I_VERSION(VFS_I(ip))) { + inode_inc_iversion(VFS_I(ip)); + ip->i_d.di_changecount = VFS_I(ip)->i_version; + flags |= XFS_ILOG_CORE; + } + tp->t_flags |= XFS_TRANS_DIRTY; ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 0176bb21f09a..42c0ef288aeb 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -322,18 +322,9 @@ xfs_inactive( xfs_trans_ijoin(tp, ip, 0); if (S_ISLNK(ip->i_d.di_mode)) { - /* - * Zero length symlinks _can_ exist. - */ - if (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) { - error = xfs_inactive_symlink_rmt(ip, &tp); - if (error) - goto out_cancel; - } else if (ip->i_df.if_bytes > 0) { - xfs_idata_realloc(ip, -(ip->i_df.if_bytes), - XFS_DATA_FORK); - ASSERT(ip->i_df.if_bytes == 0); - } + error = xfs_inactive_symlink(ip, &tp); + if (error) + goto out_cancel; } else if (truncate) { ip->i_d.di_size = 0; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |