summaryrefslogtreecommitdiff
path: root/fs/xfs/quota
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/quota
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/xfs/quota')
-rw-r--r--fs/xfs/quota/xfs_dquot.c1648
-rw-r--r--fs/xfs/quota/xfs_dquot.h224
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c715
-rw-r--r--fs/xfs/quota/xfs_dquot_item.h66
-rw-r--r--fs/xfs/quota/xfs_qm.c2848
-rw-r--r--fs/xfs/quota/xfs_qm.h236
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c410
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c149
-rw-r--r--fs/xfs/quota/xfs_qm_stats.h68
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c1458
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h192
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c941
12 files changed, 8955 insertions, 0 deletions
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
new file mode 100644
index 000000000000..740d20d33187
--- /dev/null
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -0,0 +1,1648 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_space.h"
+#include "xfs_trans_priv.h"
+
+#include "xfs_qm.h"
+
+
+/*
+ LOCK ORDER
+
+ inode lock (ilock)
+ dquot hash-chain lock (hashlock)
+ xqm dquot freelist lock (freelistlock
+ mount's dquot list lock (mplistlock)
+ user dquot lock - lock ordering among dquots is based on the uid or gid
+ group dquot lock - similar to udquots. Between the two dquots, the udquot
+ has to be locked first.
+ pin lock - the dquot lock must be held to take this lock.
+ flush lock - ditto.
+*/
+
+STATIC void xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *);
+
+#ifdef DEBUG
+xfs_buftarg_t *xfs_dqerror_target;
+int xfs_do_dqerror;
+int xfs_dqreq_num;
+int xfs_dqerror_mod = 33;
+#endif
+
+/*
+ * Allocate and initialize a dquot. We don't always allocate fresh memory;
+ * we try to reclaim a free dquot if the number of incore dquots are above
+ * a threshold.
+ * The only field inside the core that gets initialized at this point
+ * is the d_id field. The idea is to fill in the entire q_core
+ * when we read in the on disk dquot.
+ */
+xfs_dquot_t *
+xfs_qm_dqinit(
+ xfs_mount_t *mp,
+ xfs_dqid_t id,
+ uint type)
+{
+ xfs_dquot_t *dqp;
+ boolean_t brandnewdquot;
+
+ brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
+ dqp->dq_flags = type;
+ INT_SET(dqp->q_core.d_id, ARCH_CONVERT, id);
+ dqp->q_mount = mp;
+
+ /*
+ * No need to re-initialize these if this is a reclaimed dquot.
+ */
+ if (brandnewdquot) {
+ dqp->dq_flnext = dqp->dq_flprev = dqp;
+ mutex_init(&dqp->q_qlock, MUTEX_DEFAULT, "xdq");
+ initnsema(&dqp->q_flock, 1, "fdq");
+ sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
+
+#ifdef XFS_DQUOT_TRACE
+ dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP);
+ xfs_dqtrace_entry(dqp, "DQINIT");
+#endif
+ } else {
+ /*
+ * Only the q_core portion was zeroed in dqreclaim_one().
+ * So, we need to reset others.
+ */
+ dqp->q_nrefs = 0;
+ dqp->q_blkno = 0;
+ dqp->MPL_NEXT = dqp->HL_NEXT = NULL;
+ dqp->HL_PREVP = dqp->MPL_PREVP = NULL;
+ dqp->q_bufoffset = 0;
+ dqp->q_fileoffset = 0;
+ dqp->q_transp = NULL;
+ dqp->q_gdquot = NULL;
+ dqp->q_res_bcount = 0;
+ dqp->q_res_icount = 0;
+ dqp->q_res_rtbcount = 0;
+ dqp->q_pincount = 0;
+ dqp->q_hash = NULL;
+ ASSERT(dqp->dq_flnext == dqp->dq_flprev);
+
+#ifdef XFS_DQUOT_TRACE
+ ASSERT(dqp->q_trace);
+ xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT");
+#endif
+ }
+
+ /*
+ * log item gets initialized later
+ */
+ return (dqp);
+}
+
+/*
+ * This is called to free all the memory associated with a dquot
+ */
+void
+xfs_qm_dqdestroy(
+ xfs_dquot_t *dqp)
+{
+ ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));
+
+ mutex_destroy(&dqp->q_qlock);
+ freesema(&dqp->q_flock);
+ sv_destroy(&dqp->q_pinwait);
+
+#ifdef XFS_DQUOT_TRACE
+ if (dqp->q_trace)
+ ktrace_free(dqp->q_trace);
+ dqp->q_trace = NULL;
+#endif
+ kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
+ atomic_dec(&xfs_Gqm->qm_totaldquots);
+}
+
+/*
+ * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
+ */
+STATIC void
+xfs_qm_dqinit_core(
+ xfs_dqid_t id,
+ uint type,
+ xfs_dqblk_t *d)
+{
+ /*
+ * Caller has zero'd the entire dquot 'chunk' already.
+ */
+ INT_SET(d->dd_diskdq.d_magic, ARCH_CONVERT, XFS_DQUOT_MAGIC);
+ INT_SET(d->dd_diskdq.d_version, ARCH_CONVERT, XFS_DQUOT_VERSION);
+ INT_SET(d->dd_diskdq.d_id, ARCH_CONVERT, id);
+ INT_SET(d->dd_diskdq.d_flags, ARCH_CONVERT, type);
+}
+
+
+#ifdef XFS_DQUOT_TRACE
+/*
+ * Dquot tracing for debugging.
+ */
+/* ARGSUSED */
+void
+__xfs_dqtrace_entry(
+ xfs_dquot_t *dqp,
+ char *func,
+ void *retaddr,
+ xfs_inode_t *ip)
+{
+ xfs_dquot_t *udqp = NULL;
+ xfs_ino_t ino = 0;
+
+ ASSERT(dqp->q_trace);
+ if (ip) {
+ ino = ip->i_ino;
+ udqp = ip->i_udquot;
+ }
+ ktrace_enter(dqp->q_trace,
+ (void *)(__psint_t)DQUOT_KTRACE_ENTRY,
+ (void *)func,
+ (void *)(__psint_t)dqp->q_nrefs,
+ (void *)(__psint_t)dqp->dq_flags,
+ (void *)(__psint_t)dqp->q_res_bcount,
+ (void *)(__psint_t)INT_GET(dqp->q_core.d_bcount,
+ ARCH_CONVERT),
+ (void *)(__psint_t)INT_GET(dqp->q_core.d_icount,
+ ARCH_CONVERT),
+ (void *)(__psint_t)INT_GET(dqp->q_core.d_blk_hardlimit,
+ ARCH_CONVERT),
+ (void *)(__psint_t)INT_GET(dqp->q_core.d_blk_softlimit,
+ ARCH_CONVERT),
+ (void *)(__psint_t)INT_GET(dqp->q_core.d_ino_hardlimit,
+ ARCH_CONVERT),
+ (void *)(__psint_t)INT_GET(dqp->q_core.d_ino_softlimit,
+ ARCH_CONVERT),
+ (void *)(__psint_t)INT_GET(dqp->q_core.d_id, ARCH_CONVERT),
+ (void *)(__psint_t)current_pid(),
+ (void *)(__psint_t)ino,
+ (void *)(__psint_t)retaddr,
+ (void *)(__psint_t)udqp);
+ return;
+}
+#endif
+
+
+/*
+ * If default limits are in force, push them into the dquot now.
+ * We overwrite the dquot limits only if they are zero and this
+ * is not the root dquot.
+ */
+void
+xfs_qm_adjust_dqlimits(
+ xfs_mount_t *mp,
+ xfs_disk_dquot_t *d)
+{
+ xfs_quotainfo_t *q = mp->m_quotainfo;
+
+ ASSERT(d->d_id);
+
+ if (q->qi_bsoftlimit && !d->d_blk_softlimit)
+ INT_SET(d->d_blk_softlimit, ARCH_CONVERT, q->qi_bsoftlimit);
+ if (q->qi_bhardlimit && !d->d_blk_hardlimit)
+ INT_SET(d->d_blk_hardlimit, ARCH_CONVERT, q->qi_bhardlimit);
+ if (q->qi_isoftlimit && !d->d_ino_softlimit)
+ INT_SET(d->d_ino_softlimit, ARCH_CONVERT, q->qi_isoftlimit);
+ if (q->qi_ihardlimit && !d->d_ino_hardlimit)
+ INT_SET(d->d_ino_hardlimit, ARCH_CONVERT, q->qi_ihardlimit);
+ if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
+ INT_SET(d->d_rtb_softlimit, ARCH_CONVERT, q->qi_rtbsoftlimit);
+ if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
+ INT_SET(d->d_rtb_hardlimit, ARCH_CONVERT, q->qi_rtbhardlimit);
+}
+
+/*
+ * Check the limits and timers of a dquot and start or reset timers
+ * if necessary.
+ * This gets called even when quota enforcement is OFF, which makes our
+ * life a little less complicated. (We just don't reject any quota
+ * reservations in that case, when enforcement is off).
+ * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
+ * enforcement's off.
+ * In contrast, warnings are a little different in that they don't
+ * 'automatically' get started when limits get exceeded.
+ */
+void
+xfs_qm_adjust_dqtimers(
+ xfs_mount_t *mp,
+ xfs_disk_dquot_t *d)
+{
+ ASSERT(d->d_id);
+
+#ifdef QUOTADEBUG
+ if (INT_GET(d->d_blk_hardlimit, ARCH_CONVERT))
+ ASSERT(INT_GET(d->d_blk_softlimit, ARCH_CONVERT) <=
+ INT_GET(d->d_blk_hardlimit, ARCH_CONVERT));
+ if (INT_GET(d->d_ino_hardlimit, ARCH_CONVERT))
+ ASSERT(INT_GET(d->d_ino_softlimit, ARCH_CONVERT) <=
+ INT_GET(d->d_ino_hardlimit, ARCH_CONVERT));
+ if (INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT))
+ ASSERT(INT_GET(d->d_rtb_softlimit, ARCH_CONVERT) <=
+ INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT));
+#endif
+ if (!d->d_btimer) {
+ if ((INT_GET(d->d_blk_softlimit, ARCH_CONVERT) &&
+ (INT_GET(d->d_bcount, ARCH_CONVERT) >=
+ INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) ||
+ (INT_GET(d->d_blk_hardlimit, ARCH_CONVERT) &&
+ (INT_GET(d->d_bcount, ARCH_CONVERT) >=
+ INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) {
+ INT_SET(d->d_btimer, ARCH_CONVERT,
+ get_seconds() + XFS_QI_BTIMELIMIT(mp));
+ }
+ } else {
+ if ((!d->d_blk_softlimit ||
+ (INT_GET(d->d_bcount, ARCH_CONVERT) <
+ INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) &&
+ (!d->d_blk_hardlimit ||
+ (INT_GET(d->d_bcount, ARCH_CONVERT) <
+ INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) {
+ d->d_btimer = 0;
+ }
+ }
+
+ if (!d->d_itimer) {
+ if ((INT_GET(d->d_ino_softlimit, ARCH_CONVERT) &&
+ (INT_GET(d->d_icount, ARCH_CONVERT) >=
+ INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) ||
+ (INT_GET(d->d_ino_hardlimit, ARCH_CONVERT) &&
+ (INT_GET(d->d_icount, ARCH_CONVERT) >=
+ INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) {
+ INT_SET(d->d_itimer, ARCH_CONVERT,
+ get_seconds() + XFS_QI_ITIMELIMIT(mp));
+ }
+ } else {
+ if ((!d->d_ino_softlimit ||
+ (INT_GET(d->d_icount, ARCH_CONVERT) <
+ INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) &&
+ (!d->d_ino_hardlimit ||
+ (INT_GET(d->d_icount, ARCH_CONVERT) <
+ INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) {
+ d->d_itimer = 0;
+ }
+ }
+
+ if (!d->d_rtbtimer) {
+ if ((INT_GET(d->d_rtb_softlimit, ARCH_CONVERT) &&
+ (INT_GET(d->d_rtbcount, ARCH_CONVERT) >=
+ INT_GET(d->d_rtb_softlimit, ARCH_CONVERT))) ||
+ (INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT) &&
+ (INT_GET(d->d_rtbcount, ARCH_CONVERT) >=
+ INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT)))) {
+ INT_SET(d->d_rtbtimer, ARCH_CONVERT,
+ get_seconds() + XFS_QI_RTBTIMELIMIT(mp));
+ }
+ } else {
+ if ((!d->d_rtb_softlimit ||
+ (INT_GET(d->d_rtbcount, ARCH_CONVERT) <
+ INT_GET(d->d_rtb_softlimit, ARCH_CONVERT))) &&
+ (!d->d_rtb_hardlimit ||
+ (INT_GET(d->d_rtbcount, ARCH_CONVERT) <
+ INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT)))) {
+ d->d_rtbtimer = 0;
+ }
+ }
+}
+
+/*
+ * Increment or reset warnings of a given dquot.
+ */
+int
+xfs_qm_dqwarn(
+ xfs_disk_dquot_t *d,
+ uint flags)
+{
+ int warned;
+
+ /*
+ * root's limits are not real limits.
+ */
+ if (!d->d_id)
+ return (0);
+
+ warned = 0;
+ if (INT_GET(d->d_blk_softlimit, ARCH_CONVERT) &&
+ (INT_GET(d->d_bcount, ARCH_CONVERT) >=
+ INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) {
+ if (flags & XFS_QMOPT_DOWARN) {
+ INT_MOD(d->d_bwarns, ARCH_CONVERT, +1);
+ warned++;
+ }
+ } else {
+ if (!d->d_blk_softlimit ||
+ (INT_GET(d->d_bcount, ARCH_CONVERT) <
+ INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) {
+ d->d_bwarns = 0;
+ }
+ }
+
+ if (INT_GET(d->d_ino_softlimit, ARCH_CONVERT) > 0 &&
+ (INT_GET(d->d_icount, ARCH_CONVERT) >=
+ INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) {
+ if (flags & XFS_QMOPT_DOWARN) {
+ INT_MOD(d->d_iwarns, ARCH_CONVERT, +1);
+ warned++;
+ }
+ } else {
+ if (!d->d_ino_softlimit ||
+ (INT_GET(d->d_icount, ARCH_CONVERT) <
+ INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) {
+ d->d_iwarns = 0;
+ }
+ }
+#ifdef QUOTADEBUG
+ if (INT_GET(d->d_iwarns, ARCH_CONVERT))
+ cmn_err(CE_DEBUG,
+ "--------@@Inode warnings running : %Lu >= %Lu",
+ INT_GET(d->d_icount, ARCH_CONVERT),
+ INT_GET(d->d_ino_softlimit, ARCH_CONVERT));
+ if (INT_GET(d->d_bwarns, ARCH_CONVERT))
+ cmn_err(CE_DEBUG,
+ "--------@@Blks warnings running : %Lu >= %Lu",
+ INT_GET(d->d_bcount, ARCH_CONVERT),
+ INT_GET(d->d_blk_softlimit, ARCH_CONVERT));
+#endif
+ return (warned);
+}
+
+
+/*
+ * initialize a buffer full of dquots and log the whole thing
+ */
+STATIC void
+xfs_qm_init_dquot_blk(
+ xfs_trans_t *tp,
+ xfs_mount_t *mp,
+ xfs_dqid_t id,
+ uint type,
+ xfs_buf_t *bp)
+{
+ xfs_dqblk_t *d;
+ int curid, i;
+
+ ASSERT(tp);
+ ASSERT(XFS_BUF_ISBUSY(bp));
+ ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+
+ d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
+
+ /*
+ * ID of the first dquot in the block - id's are zero based.
+ */
+ curid = id - (id % XFS_QM_DQPERBLK(mp));
+ ASSERT(curid >= 0);
+ memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)));
+ for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++)
+ xfs_qm_dqinit_core(curid, type, d);
+ xfs_trans_dquot_buf(tp, bp,
+ type & XFS_DQ_USER ?
+ XFS_BLI_UDQUOT_BUF :
+ XFS_BLI_GDQUOT_BUF);
+ xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1);
+}
+
+
+
+/*
+ * Allocate a block and fill it with dquots.
+ * This is called when the bmapi finds a hole.
+ */
+STATIC int
+xfs_qm_dqalloc(
+ xfs_trans_t *tp,
+ xfs_mount_t *mp,
+ xfs_dquot_t *dqp,
+ xfs_inode_t *quotip,
+ xfs_fileoff_t offset_fsb,
+ xfs_buf_t **O_bpp)
+{
+ xfs_fsblock_t firstblock;
+ xfs_bmap_free_t flist;
+ xfs_bmbt_irec_t map;
+ int nmaps, error, committed;
+ xfs_buf_t *bp;
+
+ ASSERT(tp != NULL);
+ xfs_dqtrace_entry(dqp, "DQALLOC");
+
+ /*
+ * Initialize the bmap freelist prior to calling bmapi code.
+ */
+ XFS_BMAP_INIT(&flist, &firstblock);
+ xfs_ilock(quotip, XFS_ILOCK_EXCL);
+ /*
+ * Return if this type of quotas is turned off while we didn't
+ * have an inode lock
+ */
+ if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+ xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+ return (ESRCH);
+ }
+
+ /*
+ * xfs_trans_commit normally decrements the vnode ref count
+ * when it unlocks the inode. Since we want to keep the quota
+ * inode around, we bump the vnode ref count now.
+ */
+ VN_HOLD(XFS_ITOV(quotip));
+
+ xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
+ nmaps = 1;
+ if ((error = xfs_bmapi(tp, quotip,
+ offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
+ XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
+ &firstblock,
+ XFS_QM_DQALLOC_SPACE_RES(mp),
+ &map, &nmaps, &flist))) {
+ goto error0;
+ }
+ ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
+ ASSERT(nmaps == 1);
+ ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
+ (map.br_startblock != HOLESTARTBLOCK));
+
+ /*
+ * Keep track of the blkno to save a lookup later
+ */
+ dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+
+ /* now we can just get the buffer (there's nothing to read yet) */
+ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+ dqp->q_blkno,
+ XFS_QI_DQCHUNKLEN(mp),
+ 0);
+ if (!bp || (error = XFS_BUF_GETERROR(bp)))
+ goto error1;
+ /*
+ * Make a chunk of dquots out of this buffer and log
+ * the entire thing.
+ */
+ xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT),
+ dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP),
+ bp);
+
+ if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) {
+ goto error1;
+ }
+
+ *O_bpp = bp;
+ return 0;
+
+ error1:
+ xfs_bmap_cancel(&flist);
+ error0:
+ xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+
+ return (error);
+}
+
+/*
+ * Maps a dquot to the buffer containing its on-disk version.
+ * This returns a ptr to the buffer containing the on-disk dquot
+ * in the bpp param, and a ptr to the on-disk dquot within that buffer
+ */
+STATIC int
+xfs_qm_dqtobp(
+ xfs_trans_t *tp,
+ xfs_dquot_t *dqp,
+ xfs_disk_dquot_t **O_ddpp,
+ xfs_buf_t **O_bpp,
+ uint flags)
+{
+ xfs_bmbt_irec_t map;
+ int nmaps, error;
+ xfs_buf_t *bp;
+ xfs_inode_t *quotip;
+ xfs_mount_t *mp;
+ xfs_disk_dquot_t *ddq;
+ xfs_dqid_t id;
+ boolean_t newdquot;
+
+ mp = dqp->q_mount;
+ id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT);
+ nmaps = 1;
+ newdquot = B_FALSE;
+
+ /*
+ * If we don't know where the dquot lives, find out.
+ */
+ if (dqp->q_blkno == (xfs_daddr_t) 0) {
+ /* We use the id as an index */
+ dqp->q_fileoffset = (xfs_fileoff_t) ((uint)id /
+ XFS_QM_DQPERBLK(mp));
+ nmaps = 1;
+ quotip = XFS_DQ_TO_QIP(dqp);
+ xfs_ilock(quotip, XFS_ILOCK_SHARED);
+ /*
+ * Return if this type of quotas is turned off while we didn't
+ * have an inode lock
+ */
+ if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+ xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+ return (ESRCH);
+ }
+ /*
+ * Find the block map; no allocations yet
+ */
+ error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
+ XFS_DQUOT_CLUSTER_SIZE_FSB,
+ XFS_BMAPI_METADATA,
+ NULL, 0, &map, &nmaps, NULL);
+
+ xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+ if (error)
+ return (error);
+ ASSERT(nmaps == 1);
+ ASSERT(map.br_blockcount == 1);
+
+ /*
+ * offset of dquot in the (fixed sized) dquot chunk.
+ */
+ dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) *
+ sizeof(xfs_dqblk_t);
+ if (map.br_startblock == HOLESTARTBLOCK) {
+ /*
+ * We don't allocate unless we're asked to
+ */
+ if (!(flags & XFS_QMOPT_DQALLOC))
+ return (ENOENT);
+
+ ASSERT(tp);
+ if ((error = xfs_qm_dqalloc(tp, mp, dqp, quotip,
+ dqp->q_fileoffset, &bp)))
+ return (error);
+ newdquot = B_TRUE;
+ } else {
+ /*
+ * store the blkno etc so that we don't have to do the
+ * mapping all the time
+ */
+ dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+ }
+ }
+ ASSERT(dqp->q_blkno != DELAYSTARTBLOCK);
+ ASSERT(dqp->q_blkno != HOLESTARTBLOCK);
+
+ /*
+ * Read in the buffer, unless we've just done the allocation
+ * (in which case we already have the buf).
+ */
+ if (! newdquot) {
+ xfs_dqtrace_entry(dqp, "DQTOBP READBUF");
+ if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+ dqp->q_blkno,
+ XFS_QI_DQCHUNKLEN(mp),
+ 0, &bp))) {
+ return (error);
+ }
+ if (error || !bp)
+ return XFS_ERROR(error);
+ }
+ ASSERT(XFS_BUF_ISBUSY(bp));
+ ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+
+ /*
+ * calculate the location of the dquot inside the buffer.
+ */
+ ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset);
+
+ /*
+ * A simple sanity check in case we got a corrupted dquot...
+ */
+ if (xfs_qm_dqcheck(ddq, id,
+ dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP),
+ flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
+ "dqtobp")) {
+ if (!(flags & XFS_QMOPT_DQREPAIR)) {
+ xfs_trans_brelse(tp, bp);
+ return XFS_ERROR(EIO);
+ }
+ XFS_BUF_BUSY(bp); /* We dirtied this */
+ }
+
+ *O_bpp = bp;
+ *O_ddpp = ddq;
+
+ return (0);
+}
+
+
+/*
+ * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
+ * and release the buffer immediately.
+ *
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqread(
+ xfs_trans_t *tp,
+ xfs_dqid_t id,
+ xfs_dquot_t *dqp, /* dquot to get filled in */
+ uint flags)
+{
+ xfs_disk_dquot_t *ddqp;
+ xfs_buf_t *bp;
+ int error;
+
+ /*
+ * get a pointer to the on-disk dquot and the buffer containing it
+ * dqp already knows its own type (GROUP/USER).
+ */
+ xfs_dqtrace_entry(dqp, "DQREAD");
+ if ((error = xfs_qm_dqtobp(tp, dqp, &ddqp, &bp, flags))) {
+ return (error);
+ }
+
+ /* copy everything from disk dquot to the incore dquot */
+ memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
+ ASSERT(INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id);
+ xfs_qm_dquot_logitem_init(dqp);
+
+ /*
+ * Reservation counters are defined as reservation plus current usage
+ * to avoid having to add everytime.
+ */
+ dqp->q_res_bcount = INT_GET(ddqp->d_bcount, ARCH_CONVERT);
+ dqp->q_res_icount = INT_GET(ddqp->d_icount, ARCH_CONVERT);
+ dqp->q_res_rtbcount = INT_GET(ddqp->d_rtbcount, ARCH_CONVERT);
+
+ /* Mark the buf so that this will stay incore a little longer */
+ XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
+
+ /*
+ * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
+ * So we need to release with xfs_trans_brelse().
+ * The strategy here is identical to that of inodes; we lock
+ * the dquot in xfs_qm_dqget() before making it accessible to
+ * others. This is because dquots, like inodes, need a good level of
+ * concurrency, and we don't want to take locks on the entire buffers
+ * for dquot accesses.
+ * Note also that the dquot buffer may even be dirty at this point, if
+ * this particular dquot was repaired. We still aren't afraid to
+ * brelse it because we have the changes incore.
+ */
+ ASSERT(XFS_BUF_ISBUSY(bp));
+ ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+ xfs_trans_brelse(tp, bp);
+
+ return (error);
+}
+
+
+/*
+ * allocate an incore dquot from the kernel heap,
+ * and fill its core with quota information kept on disk.
+ * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
+ * if it wasn't already allocated.
+ */
+STATIC int
+xfs_qm_idtodq(
+ xfs_mount_t *mp,
+ xfs_dqid_t id, /* gid or uid, depending on type */
+ uint type, /* UDQUOT or GDQUOT */
+ uint flags, /* DQALLOC, DQREPAIR */
+ xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */
+{
+ xfs_dquot_t *dqp;
+ int error;
+ xfs_trans_t *tp;
+ int cancelflags=0;
+
+ dqp = xfs_qm_dqinit(mp, id, type);
+ tp = NULL;
+ if (flags & XFS_QMOPT_DQALLOC) {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
+ if ((error = xfs_trans_reserve(tp,
+ XFS_QM_DQALLOC_SPACE_RES(mp),
+ XFS_WRITE_LOG_RES(mp) +
+ BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 +
+ 128,
+ 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_WRITE_LOG_COUNT))) {
+ cancelflags = 0;
+ goto error0;
+ }
+ cancelflags = XFS_TRANS_RELEASE_LOG_RES;
+ }
+
+ /*
+ * Read it from disk; xfs_dqread() takes care of
+ * all the necessary initialization of dquot's fields (locks, etc)
+ */
+ if ((error = xfs_qm_dqread(tp, id, dqp, flags))) {
+ /*
+ * This can happen if quotas got turned off (ESRCH),
+ * or if the dquot didn't exist on disk and we ask to
+ * allocate (ENOENT).
+ */
+ xfs_dqtrace_entry(dqp, "DQREAD FAIL");
+ cancelflags |= XFS_TRANS_ABORT;
+ goto error0;
+ }
+ if (tp) {
+ if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
+ NULL)))
+ goto error1;
+ }
+
+ *O_dqpp = dqp;
+ return (0);
+
+ error0:
+ ASSERT(error);
+ if (tp)
+ xfs_trans_cancel(tp, cancelflags);
+ error1:
+ xfs_qm_dqdestroy(dqp);
+ *O_dqpp = NULL;
+ return (error);
+}
+
+/*
+ * Lookup a dquot in the incore dquot hashtable. We keep two separate
+ * hashtables for user and group dquots; and, these are global tables
+ * inside the XQM, not per-filesystem tables.
+ * The hash chain must be locked by caller, and it is left locked
+ * on return. Returning dquot is locked.
+ */
+STATIC int
+xfs_qm_dqlookup(
+ xfs_mount_t *mp,
+ xfs_dqid_t id,
+ xfs_dqhash_t *qh,
+ xfs_dquot_t **O_dqpp)
+{
+ xfs_dquot_t *dqp;
+ uint flist_locked;
+ xfs_dquot_t *d;
+
+ ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+
+ flist_locked = B_FALSE;
+
+ /*
+ * Traverse the hashchain looking for a match
+ */
+ for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) {
+ /*
+ * We already have the hashlock. We don't need the
+ * dqlock to look at the id field of the dquot, since the
+ * id can't be modified without the hashlock anyway.
+ */
+ if (INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id && dqp->q_mount == mp) {
+ xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP");
+ /*
+ * All in core dquots must be on the dqlist of mp
+ */
+ ASSERT(dqp->MPL_PREVP != NULL);
+
+ xfs_dqlock(dqp);
+ if (dqp->q_nrefs == 0) {
+ ASSERT (XFS_DQ_IS_ON_FREELIST(dqp));
+ if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
+ xfs_dqtrace_entry(dqp, "DQLOOKUP: WANT");
+
+ /*
+ * We may have raced with dqreclaim_one()
+ * (and lost). So, flag that we don't
+ * want the dquot to be reclaimed.
+ */
+ dqp->dq_flags |= XFS_DQ_WANT;
+ xfs_dqunlock(dqp);
+ xfs_qm_freelist_lock(xfs_Gqm);
+ xfs_dqlock(dqp);
+ dqp->dq_flags &= ~(XFS_DQ_WANT);
+ }
+ flist_locked = B_TRUE;
+ }
+
+ /*
+ * id couldn't have changed; we had the hashlock all
+ * along
+ */
+ ASSERT(INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id);
+
+ if (flist_locked) {
+ if (dqp->q_nrefs != 0) {
+ xfs_qm_freelist_unlock(xfs_Gqm);
+ flist_locked = B_FALSE;
+ } else {
+ /*
+ * take it off the freelist
+ */
+ xfs_dqtrace_entry(dqp,
+ "DQLOOKUP: TAKEOFF FL");
+ XQM_FREELIST_REMOVE(dqp);
+ /* xfs_qm_freelist_print(&(xfs_Gqm->
+ qm_dqfreelist),
+ "after removal"); */
+ }
+ }
+
+ /*
+ * grab a reference
+ */
+ XFS_DQHOLD(dqp);
+
+ if (flist_locked)
+ xfs_qm_freelist_unlock(xfs_Gqm);
+ /*
+ * move the dquot to the front of the hashchain
+ */
+ ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+ if (dqp->HL_PREVP != &qh->qh_next) {
+ xfs_dqtrace_entry(dqp,
+ "DQLOOKUP: HASH MOVETOFRONT");
+ if ((d = dqp->HL_NEXT))
+ d->HL_PREVP = dqp->HL_PREVP;
+ *(dqp->HL_PREVP) = d;
+ d = qh->qh_next;
+ d->HL_PREVP = &dqp->HL_NEXT;
+ dqp->HL_NEXT = d;
+ dqp->HL_PREVP = &qh->qh_next;
+ qh->qh_next = dqp;
+ }
+ xfs_dqtrace_entry(dqp, "LOOKUP END");
+ *O_dqpp = dqp;
+ ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+ return (0);
+ }
+ }
+
+ *O_dqpp = NULL;
+ ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
+ return (1);
+}
+
+/*
+ * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
+ * a locked dquot, doing an allocation (if requested) as needed.
+ * When both an inode and an id are given, the inode's id takes precedence.
+ * That is, if the id changes while we don't hold the ilock inside this
+ * function, the new dquot is returned, not necessarily the one requested
+ * in the id argument.
+ */
+int
+xfs_qm_dqget(
+ xfs_mount_t *mp,
+ xfs_inode_t *ip, /* locked inode (optional) */
+ xfs_dqid_t id, /* gid or uid, depending on type */
+ uint type, /* UDQUOT or GDQUOT */
+ uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
+ xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
+{
+ xfs_dquot_t *dqp;
+ xfs_dqhash_t *h;
+ uint version;
+ int error;
+
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+ if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
+ (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
+ return (ESRCH);
+ }
+ h = XFS_DQ_HASH(mp, id, type);
+
+#ifdef DEBUG
+ if (xfs_do_dqerror) {
+ if ((xfs_dqerror_target == mp->m_ddev_targp) &&
+ (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
+ cmn_err(CE_DEBUG, "Returning error in dqget");
+ return (EIO);
+ }
+ }
+#endif
+
+ again:
+
+#ifdef DEBUG
+ ASSERT(type == XFS_DQ_USER || type == XFS_DQ_GROUP);
+ if (ip) {
+ ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ if (type == XFS_DQ_USER)
+ ASSERT(ip->i_udquot == NULL);
+ else
+ ASSERT(ip->i_gdquot == NULL);
+ }
+#endif
+ XFS_DQ_HASH_LOCK(h);
+
+ /*
+ * Look in the cache (hashtable).
+ * The chain is kept locked during lookup.
+ */
+ if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
+ XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
+ /*
+ * The dquot was found, moved to the front of the chain,
+ * taken off the freelist if it was on it, and locked
+ * at this point. Just unlock the hashchain and return.
+ */
+ ASSERT(*O_dqpp);
+ ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
+ XFS_DQ_HASH_UNLOCK(h);
+ xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)");
+ return (0); /* success */
+ }
+ XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+
+ /*
+ * Dquot cache miss. We don't want to keep the inode lock across
+ * a (potential) disk read. Also we don't want to deal with the lock
+ * ordering between quotainode and this inode. OTOH, dropping the inode
+ * lock here means dealing with a chown that can happen before
+ * we re-acquire the lock.
+ */
+ if (ip)
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ /*
+ * Save the hashchain version stamp, and unlock the chain, so that
+ * we don't keep the lock across a disk read
+ */
+ version = h->qh_version;
+ XFS_DQ_HASH_UNLOCK(h);
+
+ /*
+ * Allocate the dquot on the kernel heap, and read the ondisk
+ * portion off the disk. Also, do all the necessary initialization
+ * This can return ENOENT if dquot didn't exist on disk and we didn't
+ * ask it to allocate; ESRCH if quotas got turned off suddenly.
+ */
+ if ((error = xfs_qm_idtodq(mp, id, type,
+ flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
+ XFS_QMOPT_DOWARN),
+ &dqp))) {
+ if (ip)
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ return (error);
+ }
+
+ /*
+ * See if this is mount code calling to look at the overall quota limits
+ * which are stored in the id == 0 user or group's dquot.
+ * Since we may not have done a quotacheck by this point, just return
+ * the dquot without attaching it to any hashtables, lists, etc, or even
+ * taking a reference.
+ * The caller must dqdestroy this once done.
+ */
+ if (flags & XFS_QMOPT_DQSUSER) {
+ ASSERT(id == 0);
+ ASSERT(! ip);
+ goto dqret;
+ }
+
+ /*
+ * Dquot lock comes after hashlock in the lock ordering
+ */
+ if (ip) {
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if (! XFS_IS_DQTYPE_ON(mp, type)) {
+ /* inode stays locked on return */
+ xfs_qm_dqdestroy(dqp);
+ return XFS_ERROR(ESRCH);
+ }
+ /*
+ * A dquot could be attached to this inode by now, since
+ * we had dropped the ilock.
+ */
+ if (type == XFS_DQ_USER) {
+ if (ip->i_udquot) {
+ xfs_qm_dqdestroy(dqp);
+ dqp = ip->i_udquot;
+ xfs_dqlock(dqp);
+ goto dqret;
+ }
+ } else {
+ if (ip->i_gdquot) {
+ xfs_qm_dqdestroy(dqp);
+ dqp = ip->i_gdquot;
+ xfs_dqlock(dqp);
+ goto dqret;
+ }
+ }
+ }
+
+ /*
+ * Hashlock comes after ilock in lock order
+ */
+ XFS_DQ_HASH_LOCK(h);
+ if (version != h->qh_version) {
+ xfs_dquot_t *tmpdqp;
+ /*
+ * Now, see if somebody else put the dquot in the
+ * hashtable before us. This can happen because we didn't
+ * keep the hashchain lock. We don't have to worry about
+ * lock order between the two dquots here since dqp isn't
+ * on any findable lists yet.
+ */
+ if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
+ /*
+ * Duplicate found. Just throw away the new dquot
+ * and start over.
+ */
+ xfs_qm_dqput(tmpdqp);
+ XFS_DQ_HASH_UNLOCK(h);
+ xfs_qm_dqdestroy(dqp);
+ XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+ goto again;
+ }
+ }
+
+ /*
+ * Put the dquot at the beginning of the hash-chain and mp's list
+ * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
+ */
+ ASSERT(XFS_DQ_IS_HASH_LOCKED(h));
+ dqp->q_hash = h;
+ XQM_HASHLIST_INSERT(h, dqp);
+
+ /*
+ * Attach this dquot to this filesystem's list of all dquots,
+ * kept inside the mount structure in m_quotainfo field
+ */
+ xfs_qm_mplist_lock(mp);
+
+ /*
+ * We return a locked dquot to the caller, with a reference taken
+ */
+ xfs_dqlock(dqp);
+ dqp->q_nrefs = 1;
+
+ XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp);
+
+ xfs_qm_mplist_unlock(mp);
+ XFS_DQ_HASH_UNLOCK(h);
+ dqret:
+ ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip));
+ xfs_dqtrace_entry(dqp, "DQGET DONE");
+ *O_dqpp = dqp;
+ return (0);
+}
+
+
+/*
+ * Release a reference to the dquot (decrement ref-count)
+ * and unlock it. If there is a group quota attached to this
+ * dquot, carefully release that too without tripping over
+ * deadlocks'n'stuff.
+ */
+void
+xfs_qm_dqput(
+ xfs_dquot_t *dqp)
+{
+ xfs_dquot_t *gdqp;
+
+ ASSERT(dqp->q_nrefs > 0);
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ xfs_dqtrace_entry(dqp, "DQPUT");
+
+ if (dqp->q_nrefs != 1) {
+ dqp->q_nrefs--;
+ xfs_dqunlock(dqp);
+ return;
+ }
+
+ /*
+ * drop the dqlock and acquire the freelist and dqlock
+ * in the right order; but try to get it out-of-order first
+ */
+ if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
+ xfs_dqtrace_entry(dqp, "DQPUT: FLLOCK-WAIT");
+ xfs_dqunlock(dqp);
+ xfs_qm_freelist_lock(xfs_Gqm);
+ xfs_dqlock(dqp);
+ }
+
+ while (1) {
+ gdqp = NULL;
+
+ /* We can't depend on nrefs being == 1 here */
+ if (--dqp->q_nrefs == 0) {
+ xfs_dqtrace_entry(dqp, "DQPUT: ON FREELIST");
+ /*
+ * insert at end of the freelist.
+ */
+ XQM_FREELIST_INSERT(&(xfs_Gqm->qm_dqfreelist), dqp);
+
+ /*
+ * If we just added a udquot to the freelist, then
+ * we want to release the gdquot reference that
+ * it (probably) has. Otherwise it'll keep the
+ * gdquot from getting reclaimed.
+ */
+ if ((gdqp = dqp->q_gdquot)) {
+ /*
+ * Avoid a recursive dqput call
+ */
+ xfs_dqlock(gdqp);
+ dqp->q_gdquot = NULL;
+ }
+
+ /* xfs_qm_freelist_print(&(xfs_Gqm->qm_dqfreelist),
+ "@@@@@++ Free list (after append) @@@@@+");
+ */
+ }
+ xfs_dqunlock(dqp);
+
+ /*
+ * If we had a group quota inside the user quota as a hint,
+ * release it now.
+ */
+ if (! gdqp)
+ break;
+ dqp = gdqp;
+ }
+ xfs_qm_freelist_unlock(xfs_Gqm);
+}
+
+/*
+ * Release a dquot. Flush it if dirty, then dqput() it.
+ * dquot must not be locked.
+ */
+void
+xfs_qm_dqrele(
+ xfs_dquot_t *dqp)
+{
+ ASSERT(dqp);
+ xfs_dqtrace_entry(dqp, "DQRELE");
+
+ xfs_dqlock(dqp);
+ /*
+ * We don't care to flush it if the dquot is dirty here.
+ * That will create stutters that we want to avoid.
+ * Instead we do a delayed write when we try to reclaim
+ * a dirty dquot. Also xfs_sync will take part of the burden...
+ */
+ xfs_qm_dqput(dqp);
+}
+
+
+/*
+ * Write a modified dquot to disk.
+ * The dquot must be locked and the flush lock too taken by caller.
+ * The flush lock will not be unlocked until the dquot reaches the disk,
+ * but the dquot is free to be unlocked and modified by the caller
+ * in the interim. Dquot is still locked on return. This behavior is
+ * identical to that of inodes.
+ */
+int
+xfs_qm_dqflush(
+ xfs_dquot_t *dqp,
+ uint flags)
+{
+ xfs_mount_t *mp;
+ xfs_buf_t *bp;
+ xfs_disk_dquot_t *ddqp;
+ int error;
+ SPLDECL(s);
+
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
+ xfs_dqtrace_entry(dqp, "DQFLUSH");
+
+ /*
+ * If not dirty, nada.
+ */
+ if (!XFS_DQ_IS_DIRTY(dqp)) {
+ xfs_dqfunlock(dqp);
+ return (0);
+ }
+
+ /*
+ * Cant flush a pinned dquot. Wait for it.
+ */
+ xfs_qm_dqunpin_wait(dqp);
+
+ /*
+ * This may have been unpinned because the filesystem is shutting
+ * down forcibly. If that's the case we must not write this dquot
+ * to disk, because the log record didn't make it to disk!
+ */
+ if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) {
+ dqp->dq_flags &= ~(XFS_DQ_DIRTY);
+ xfs_dqfunlock(dqp);
+ return XFS_ERROR(EIO);
+ }
+
+ /*
+ * Get the buffer containing the on-disk dquot
+ * We don't need a transaction envelope because we know that the
+ * the ondisk-dquot has already been allocated for.
+ */
+ if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
+ xfs_dqtrace_entry(dqp, "DQTOBP FAIL");
+ ASSERT(error != ENOENT);
+ /*
+ * Quotas could have gotten turned off (ESRCH)
+ */
+ xfs_dqfunlock(dqp);
+ return (error);
+ }
+
+ if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT), 0, XFS_QMOPT_DOWARN,
+ "dqflush (incore copy)")) {
+ xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE);
+ return XFS_ERROR(EIO);
+ }
+
+ /* This is the only portion of data that needs to persist */
+ memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t));
+
+ /*
+ * Clear the dirty field and remember the flush lsn for later use.
+ */
+ dqp->dq_flags &= ~(XFS_DQ_DIRTY);
+ mp = dqp->q_mount;
+
+ /* lsn is 64 bits */
+ AIL_LOCK(mp, s);
+ dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
+ AIL_UNLOCK(mp, s);
+
+ /*
+ * Attach an iodone routine so that we can remove this dquot from the
+ * AIL and release the flush lock once the dquot is synced to disk.
+ */
+ xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *))
+ xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item));
+ /*
+ * If the buffer is pinned then push on the log so we won't
+ * get stuck waiting in the write for too long.
+ */
+ if (XFS_BUF_ISPINNED(bp)) {
+ xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE");
+ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
+ }
+
+ if (flags & XFS_QMOPT_DELWRI) {
+ xfs_bdwrite(mp, bp);
+ } else if (flags & XFS_QMOPT_ASYNC) {
+ xfs_bawrite(mp, bp);
+ } else {
+ error = xfs_bwrite(mp, bp);
+ }
+ xfs_dqtrace_entry(dqp, "DQFLUSH END");
+ /*
+ * dqp is still locked, but caller is free to unlock it now.
+ */
+ return (error);
+
+}
+
+/*
+ * This is the dquot flushing I/O completion routine. It is called
+ * from interrupt level when the buffer containing the dquot is
+ * flushed to disk. It is responsible for removing the dquot logitem
+ * from the AIL if it has not been re-logged, and unlocking the dquot's
+ * flush lock. This behavior is very similar to that of inodes..
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_qm_dqflush_done(
+ xfs_buf_t *bp,
+ xfs_dq_logitem_t *qip)
+{
+ xfs_dquot_t *dqp;
+ SPLDECL(s);
+
+ dqp = qip->qli_dquot;
+
+ /*
+ * We only want to pull the item from the AIL if its
+ * location in the log has not changed since we started the flush.
+ * Thus, we only bother if the dquot's lsn has
+ * not changed. First we check the lsn outside the lock
+ * since it's cheaper, and then we recheck while
+ * holding the lock before removing the dquot from the AIL.
+ */
+ if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
+ qip->qli_item.li_lsn == qip->qli_flush_lsn) {
+
+ AIL_LOCK(dqp->q_mount, s);
+ /*
+ * xfs_trans_delete_ail() drops the AIL lock.
+ */
+ if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
+ xfs_trans_delete_ail(dqp->q_mount,
+ (xfs_log_item_t*)qip, s);
+ else
+ AIL_UNLOCK(dqp->q_mount, s);
+ }
+
+ /*
+ * Release the dq's flush lock since we're done with it.
+ */
+ xfs_dqfunlock(dqp);
+}
+
+
+int
+xfs_qm_dqflock_nowait(
+ xfs_dquot_t *dqp)
+{
+ int locked;
+
+ locked = cpsema(&((dqp)->q_flock));
+
+ /* XXX ifdef these out */
+ if (locked)
+ (dqp)->dq_flags |= XFS_DQ_FLOCKED;
+ return (locked);
+}
+
+
+int
+xfs_qm_dqlock_nowait(
+ xfs_dquot_t *dqp)
+{
+ return (mutex_trylock(&((dqp)->q_qlock)));
+}
+
+void
+xfs_dqlock(
+ xfs_dquot_t *dqp)
+{
+ mutex_lock(&(dqp->q_qlock), PINOD);
+}
+
+void
+xfs_dqunlock(
+ xfs_dquot_t *dqp)
+{
+ mutex_unlock(&(dqp->q_qlock));
+ if (dqp->q_logitem.qli_dquot == dqp) {
+ /* Once was dqp->q_mount, but might just have been cleared */
+ xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp,
+ (xfs_log_item_t*)&(dqp->q_logitem));
+ }
+}
+
+
+void
+xfs_dqunlock_nonotify(
+ xfs_dquot_t *dqp)
+{
+ mutex_unlock(&(dqp->q_qlock));
+}
+
+void
+xfs_dqlock2(
+ xfs_dquot_t *d1,
+ xfs_dquot_t *d2)
+{
+ if (d1 && d2) {
+ ASSERT(d1 != d2);
+ if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) > INT_GET(d2->q_core.d_id, ARCH_CONVERT)) {
+ xfs_dqlock(d2);
+ xfs_dqlock(d1);
+ } else {
+ xfs_dqlock(d1);
+ xfs_dqlock(d2);
+ }
+ } else {
+ if (d1) {
+ xfs_dqlock(d1);
+ } else if (d2) {
+ xfs_dqlock(d2);
+ }
+ }
+}
+
+
+/*
+ * Take a dquot out of the mount's dqlist as well as the hashlist.
+ * This is called via unmount as well as quotaoff, and the purge
+ * will always succeed unless there are soft (temp) references
+ * outstanding.
+ *
+ * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
+ * that we're returning! XXXsup - not cool.
+ */
+/* ARGSUSED */
+int
+xfs_qm_dqpurge(
+ xfs_dquot_t *dqp,
+ uint flags)
+{
+ xfs_dqhash_t *thishash;
+ xfs_mount_t *mp;
+
+ mp = dqp->q_mount;
+
+ ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
+ ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash));
+
+ xfs_dqlock(dqp);
+ /*
+ * We really can't afford to purge a dquot that is
+ * referenced, because these are hard refs.
+ * It shouldn't happen in general because we went thru _all_ inodes in
+ * dqrele_all_inodes before calling this and didn't let the mountlock go.
+ * However it is possible that we have dquots with temporary
+ * references that are not attached to an inode. e.g. see xfs_setattr().
+ */
+ if (dqp->q_nrefs != 0) {
+ xfs_dqunlock(dqp);
+ XFS_DQ_HASH_UNLOCK(dqp->q_hash);
+ return (1);
+ }
+
+ ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
+
+ /*
+ * If we're turning off quotas, we have to make sure that, for
+ * example, we don't delete quota disk blocks while dquots are
+ * in the process of getting written to those disk blocks.
+ * This dquot might well be on AIL, and we can't leave it there
+ * if we're turning off quotas. Basically, we need this flush
+ * lock, and are willing to block on it.
+ */
+ if (! xfs_qm_dqflock_nowait(dqp)) {
+ /*
+ * Block on the flush lock after nudging dquot buffer,
+ * if it is incore.
+ */
+ xfs_qm_dqflock_pushbuf_wait(dqp);
+ }
+
+ /*
+ * XXXIf we're turning this type of quotas off, we don't care
+ * about the dirty metadata sitting in this dquot. OTOH, if
+ * we're unmounting, we do care, so we flush it and wait.
+ */
+ if (XFS_DQ_IS_DIRTY(dqp)) {
+ xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY");
+ /* dqflush unlocks dqflock */
+ /*
+ * Given that dqpurge is a very rare occurrence, it is OK
+ * that we're holding the hashlist and mplist locks
+ * across the disk write. But, ... XXXsup
+ *
+ * We don't care about getting disk errors here. We need
+ * to purge this dquot anyway, so we go ahead regardless.
+ */
+ (void) xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC);
+ xfs_dqflock(dqp);
+ }
+ ASSERT(dqp->q_pincount == 0);
+ ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
+ !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+
+ thishash = dqp->q_hash;
+ XQM_HASHLIST_REMOVE(thishash, dqp);
+ XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp);
+ /*
+ * XXX Move this to the front of the freelist, if we can get the
+ * freelist lock.
+ */
+ ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
+
+ dqp->q_mount = NULL;
+ dqp->q_hash = NULL;
+ dqp->dq_flags = XFS_DQ_INACTIVE;
+ memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+ xfs_dqfunlock(dqp);
+ xfs_dqunlock(dqp);
+ XFS_DQ_HASH_UNLOCK(thishash);
+ return (0);
+}
+
+
+#ifdef QUOTADEBUG
+void
+xfs_qm_dqprint(xfs_dquot_t *dqp)
+{
+ cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------");
+ cmn_err(CE_DEBUG, "---- dquotID = %d",
+ (int)INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
+ cmn_err(CE_DEBUG, "---- type = %s",
+ XFS_QM_ISUDQ(dqp) ? "USR" : "GRP");
+ cmn_err(CE_DEBUG, "---- fs = 0x%p", dqp->q_mount);
+ cmn_err(CE_DEBUG, "---- blkno = 0x%x", (int) dqp->q_blkno);
+ cmn_err(CE_DEBUG, "---- boffset = 0x%x", (int) dqp->q_bufoffset);
+ cmn_err(CE_DEBUG, "---- blkhlimit = %Lu (0x%x)",
+ INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT),
+ (int) INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT));
+ cmn_err(CE_DEBUG, "---- blkslimit = %Lu (0x%x)",
+ INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT),
+ (int)INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT));
+ cmn_err(CE_DEBUG, "---- inohlimit = %Lu (0x%x)",
+ INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT),
+ (int)INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT));
+ cmn_err(CE_DEBUG, "---- inoslimit = %Lu (0x%x)",
+ INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT),
+ (int)INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT));
+ cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)",
+ INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT),
+ (int)INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
+ cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)",
+ INT_GET(dqp->q_core.d_icount, ARCH_CONVERT),
+ (int)INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
+ cmn_err(CE_DEBUG, "---- btimer = %d",
+ (int)INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT));
+ cmn_err(CE_DEBUG, "---- itimer = %d",
+ (int)INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT));
+ cmn_err(CE_DEBUG, "---------------------------");
+}
+#endif
+
+/*
+ * Give the buffer a little push if it is incore and
+ * wait on the flush lock.
+ */
+void
+xfs_qm_dqflock_pushbuf_wait(
+ xfs_dquot_t *dqp)
+{
+ xfs_buf_t *bp;
+
+ /*
+ * Check to see if the dquot has been flushed delayed
+ * write. If so, grab its buffer and send it
+ * out immediately. We'll be able to acquire
+ * the flush lock when the I/O completes.
+ */
+ bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno,
+ XFS_QI_DQCHUNKLEN(dqp->q_mount),
+ XFS_INCORE_TRYLOCK);
+ if (bp != NULL) {
+ if (XFS_BUF_ISDELAYWRITE(bp)) {
+ if (XFS_BUF_ISPINNED(bp)) {
+ xfs_log_force(dqp->q_mount,
+ (xfs_lsn_t)0,
+ XFS_LOG_FORCE);
+ }
+ xfs_bawrite(dqp->q_mount, bp);
+ } else {
+ xfs_buf_relse(bp);
+ }
+ }
+ xfs_dqflock(dqp);
+}
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
new file mode 100644
index 000000000000..0c3fe3175baa
--- /dev/null
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DQUOT_H__
+#define __XFS_DQUOT_H__
+
+/*
+ * Dquots are structures that hold quota information about a user or a group,
+ * much like inodes are for files. In fact, dquots share many characteristics
+ * with inodes. However, dquots can also be a centralized resource, relative
+ * to a collection of inodes. In this respect, dquots share some characteristics
+ * of the superblock.
+ * XFS dquots exploit both those in its algorithms. They make every attempt
+ * to not be a bottleneck when quotas are on and have minimal impact, if any,
+ * when quotas are off.
+ */
+
+/*
+ * The hash chain headers (hash buckets)
+ */
+typedef struct xfs_dqhash {
+ struct xfs_dquot *qh_next;
+ mutex_t qh_lock;
+ uint qh_version; /* ever increasing version */
+ uint qh_nelems; /* number of dquots on the list */
+} xfs_dqhash_t;
+
+typedef struct xfs_dqlink {
+ struct xfs_dquot *ql_next; /* forward link */
+ struct xfs_dquot **ql_prevp; /* pointer to prev ql_next */
+} xfs_dqlink_t;
+
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * This is the marker which is designed to occupy the first few
+ * bytes of the xfs_dquot_t structure. Even inside this, the freelist pointers
+ * must come first.
+ * This serves as the marker ("sentinel") when we have to restart list
+ * iterations because of locking considerations.
+ */
+typedef struct xfs_dqmarker {
+ struct xfs_dquot*dqm_flnext; /* link to freelist: must be first */
+ struct xfs_dquot*dqm_flprev;
+ xfs_dqlink_t dqm_mplist; /* link to mount's list of dquots */
+ xfs_dqlink_t dqm_hashlist; /* link to the hash chain */
+ uint dqm_flags; /* various flags (XFS_DQ_*) */
+} xfs_dqmarker_t;
+
+/*
+ * The incore dquot structure
+ */
+typedef struct xfs_dquot {
+ xfs_dqmarker_t q_lists; /* list ptrs, q_flags (marker) */
+ xfs_dqhash_t *q_hash; /* the hashchain header */
+ struct xfs_mount*q_mount; /* filesystem this relates to */
+ struct xfs_trans*q_transp; /* trans this belongs to currently */
+ uint q_nrefs; /* # active refs from inodes */
+ xfs_daddr_t q_blkno; /* blkno of dquot buffer */
+ int q_bufoffset; /* off of dq in buffer (# dquots) */
+ xfs_fileoff_t q_fileoffset; /* offset in quotas file */
+
+ struct xfs_dquot*q_gdquot; /* group dquot, hint only */
+ xfs_disk_dquot_t q_core; /* actual usage & quotas */
+ xfs_dq_logitem_t q_logitem; /* dquot log item */
+ xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */
+ xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
+ xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
+ mutex_t q_qlock; /* quota lock */
+ sema_t q_flock; /* flush lock */
+ uint q_pincount; /* pin count for this dquot */
+ sv_t q_pinwait; /* sync var for pinning */
+#ifdef XFS_DQUOT_TRACE
+ struct ktrace *q_trace; /* trace header structure */
+#endif
+} xfs_dquot_t;
+
+
+#define dq_flnext q_lists.dqm_flnext
+#define dq_flprev q_lists.dqm_flprev
+#define dq_mplist q_lists.dqm_mplist
+#define dq_hashlist q_lists.dqm_hashlist
+#define dq_flags q_lists.dqm_flags
+
+#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++)
+
+/*
+ * Quota Accounting flags
+ */
+#define XFS_ALL_QUOTA_ACCT (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT)
+#define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD)
+#define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD)
+#define XFS_ALL_QUOTA_ACTV (XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE)
+#define XFS_ALL_QUOTA_ACCT_ENFD (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
+ XFS_GQUOTA_ACCT|XFS_GQUOTA_ENFD)
+
+#define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
+#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
+#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
+
+/*
+ * Quota Limit Enforcement flags
+ */
+#define XFS_IS_QUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
+#define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD)
+#define XFS_IS_GQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_GQUOTA_ENFD)
+
+#ifdef DEBUG
+static inline int
+XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
+{
+ if (mutex_trylock(&dqp->q_qlock)) {
+ mutex_unlock(&dqp->q_qlock);
+ return 0;
+ }
+ return 1;
+}
+#endif
+
+
+/*
+ * The following three routines simply manage the q_flock
+ * semaphore embedded in the dquot. This semaphore synchronizes
+ * processes attempting to flush the in-core dquot back to disk.
+ */
+#define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\
+ (dqp)->dq_flags |= XFS_DQ_FLOCKED; }
+#define xfs_dqfunlock(dqp) { ASSERT(valusema(&((dqp)->q_flock)) <= 0); \
+ vsema(&((dqp)->q_flock)); \
+ (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); }
+
+#define XFS_DQ_PINLOCK(dqp) mutex_spinlock( \
+ &(XFS_DQ_TO_QINF(dqp)->qi_pinlock))
+#define XFS_DQ_PINUNLOCK(dqp, s) mutex_spinunlock( \
+ &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s)
+
+#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (valusema(&((dqp)->q_flock)) <= 0)
+#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp))
+#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
+#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
+#define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo)
+#define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \
+ XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
+ XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
+
+#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
+ (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
+ (XFS_IS_GQUOTA_ON((d)->q_mount))))
+
+#ifdef XFS_DQUOT_TRACE
+/*
+ * Dquot Tracing stuff.
+ */
+#define DQUOT_TRACE_SIZE 64
+#define DQUOT_KTRACE_ENTRY 1
+
+extern void __xfs_dqtrace_entry(xfs_dquot_t *dqp, char *func,
+ void *, xfs_inode_t *);
+#define xfs_dqtrace_entry_ino(a,b,ip) \
+ __xfs_dqtrace_entry((a), (b), (void*)__return_address, (ip))
+#define xfs_dqtrace_entry(a,b) \
+ __xfs_dqtrace_entry((a), (b), (void*)__return_address, NULL)
+#else
+#define xfs_dqtrace_entry(a,b)
+#define xfs_dqtrace_entry_ino(a,b,ip)
+#endif
+
+#ifdef QUOTADEBUG
+extern void xfs_qm_dqprint(xfs_dquot_t *);
+#else
+#define xfs_qm_dqprint(a)
+#endif
+
+extern void xfs_qm_dqdestroy(xfs_dquot_t *);
+extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
+extern int xfs_qm_dqpurge(xfs_dquot_t *, uint);
+extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
+extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
+extern int xfs_qm_dqflock_nowait(xfs_dquot_t *);
+extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
+extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
+ xfs_disk_dquot_t *);
+extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
+ xfs_disk_dquot_t *);
+extern int xfs_qm_dqwarn(xfs_disk_dquot_t *, uint);
+extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
+ xfs_dqid_t, uint, uint, xfs_dquot_t **);
+extern void xfs_qm_dqput(xfs_dquot_t *);
+extern void xfs_qm_dqrele(xfs_dquot_t *);
+extern void xfs_dqlock(xfs_dquot_t *);
+extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
+extern void xfs_dqunlock(xfs_dquot_t *);
+extern void xfs_dqunlock_nonotify(xfs_dquot_t *);
+
+#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
new file mode 100644
index 000000000000..a5425ee6e7bd
--- /dev/null
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -0,0 +1,715 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+
+#include "xfs_qm.h"
+
+
+/*
+ * returns the number of iovecs needed to log the given dquot item.
+ */
+/* ARGSUSED */
+STATIC uint
+xfs_qm_dquot_logitem_size(
+ xfs_dq_logitem_t *logitem)
+{
+ /*
+ * we need only two iovecs, one for the format, one for the real thing
+ */
+ return (2);
+}
+
+/*
+ * fills in the vector of log iovecs for the given dquot log item.
+ */
+STATIC void
+xfs_qm_dquot_logitem_format(
+ xfs_dq_logitem_t *logitem,
+ xfs_log_iovec_t *logvec)
+{
+ ASSERT(logitem);
+ ASSERT(logitem->qli_dquot);
+
+ logvec->i_addr = (xfs_caddr_t)&logitem->qli_format;
+ logvec->i_len = sizeof(xfs_dq_logformat_t);
+ logvec++;
+ logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core;
+ logvec->i_len = sizeof(xfs_disk_dquot_t);
+
+ ASSERT(2 == logitem->qli_item.li_desc->lid_size);
+ logitem->qli_format.qlf_size = 2;
+
+}
+
+/*
+ * Increment the pin count of the given dquot.
+ * This value is protected by pinlock spinlock in the xQM structure.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pin(
+ xfs_dq_logitem_t *logitem)
+{
+ unsigned long s;
+ xfs_dquot_t *dqp;
+
+ dqp = logitem->qli_dquot;
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ s = XFS_DQ_PINLOCK(dqp);
+ dqp->q_pincount++;
+ XFS_DQ_PINUNLOCK(dqp, s);
+}
+
+/*
+ * Decrement the pin count of the given dquot, and wake up
+ * anyone in xfs_dqwait_unpin() if the count goes to 0. The
+ * dquot must have been previously pinned with a call to xfs_dqpin().
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_dquot_logitem_unpin(
+ xfs_dq_logitem_t *logitem,
+ int stale)
+{
+ unsigned long s;
+ xfs_dquot_t *dqp;
+
+ dqp = logitem->qli_dquot;
+ ASSERT(dqp->q_pincount > 0);
+ s = XFS_DQ_PINLOCK(dqp);
+ dqp->q_pincount--;
+ if (dqp->q_pincount == 0) {
+ sv_broadcast(&dqp->q_pinwait);
+ }
+ XFS_DQ_PINUNLOCK(dqp, s);
+}
+
+/* ARGSUSED */
+STATIC void
+xfs_qm_dquot_logitem_unpin_remove(
+ xfs_dq_logitem_t *logitem,
+ xfs_trans_t *tp)
+{
+ xfs_qm_dquot_logitem_unpin(logitem, 0);
+}
+
+/*
+ * Given the logitem, this writes the corresponding dquot entry to disk
+ * asynchronously. This is called with the dquot entry securely locked;
+ * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
+ * at the end.
+ */
+STATIC void
+xfs_qm_dquot_logitem_push(
+ xfs_dq_logitem_t *logitem)
+{
+ xfs_dquot_t *dqp;
+
+ dqp = logitem->qli_dquot;
+
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
+
+ /*
+ * Since we were able to lock the dquot's flush lock and
+ * we found it on the AIL, the dquot must be dirty. This
+ * is because the dquot is removed from the AIL while still
+ * holding the flush lock in xfs_dqflush_done(). Thus, if
+ * we found it in the AIL and were able to obtain the flush
+ * lock without sleeping, then there must not have been
+ * anyone in the process of flushing the dquot.
+ */
+ xfs_qm_dqflush(dqp, XFS_B_DELWRI);
+ xfs_dqunlock(dqp);
+}
+
+/*ARGSUSED*/
+STATIC xfs_lsn_t
+xfs_qm_dquot_logitem_committed(
+ xfs_dq_logitem_t *l,
+ xfs_lsn_t lsn)
+{
+ /*
+ * We always re-log the entire dquot when it becomes dirty,
+ * so, the latest copy _is_ the only one that matters.
+ */
+ return (lsn);
+}
+
+
+/*
+ * This is called to wait for the given dquot to be unpinned.
+ * Most of these pin/unpin routines are plagiarized from inode code.
+ */
+void
+xfs_qm_dqunpin_wait(
+ xfs_dquot_t *dqp)
+{
+ SPLDECL(s);
+
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ if (dqp->q_pincount == 0) {
+ return;
+ }
+
+ /*
+ * Give the log a push so we don't wait here too long.
+ */
+ xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
+ s = XFS_DQ_PINLOCK(dqp);
+ if (dqp->q_pincount == 0) {
+ XFS_DQ_PINUNLOCK(dqp, s);
+ return;
+ }
+ sv_wait(&(dqp->q_pinwait), PINOD,
+ &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s);
+}
+
+/*
+ * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
+ * the dquot is locked by us, but the flush lock isn't. So, here we are
+ * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
+ * If so, we want to push it out to help us take this item off the AIL as soon
+ * as possible.
+ *
+ * We must not be holding the AIL_LOCK at this point. Calling incore() to
+ * search the buffercache can be a time consuming thing, and AIL_LOCK is a
+ * spinlock.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pushbuf(
+ xfs_dq_logitem_t *qip)
+{
+ xfs_dquot_t *dqp;
+ xfs_mount_t *mp;
+ xfs_buf_t *bp;
+ uint dopush;
+
+ dqp = qip->qli_dquot;
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+ /*
+ * The qli_pushbuf_flag keeps others from
+ * trying to duplicate our effort.
+ */
+ ASSERT(qip->qli_pushbuf_flag != 0);
+ ASSERT(qip->qli_push_owner == get_thread_id());
+
+ /*
+ * If flushlock isn't locked anymore, chances are that the
+ * inode flush completed and the inode was taken off the AIL.
+ * So, just get out.
+ */
+ if ((valusema(&(dqp->q_flock)) > 0) ||
+ ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
+ qip->qli_pushbuf_flag = 0;
+ xfs_dqunlock(dqp);
+ return;
+ }
+ mp = dqp->q_mount;
+ bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
+ XFS_QI_DQCHUNKLEN(mp),
+ XFS_INCORE_TRYLOCK);
+ if (bp != NULL) {
+ if (XFS_BUF_ISDELAYWRITE(bp)) {
+ dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
+ (valusema(&(dqp->q_flock)) <= 0));
+ qip->qli_pushbuf_flag = 0;
+ xfs_dqunlock(dqp);
+
+ if (XFS_BUF_ISPINNED(bp)) {
+ xfs_log_force(mp, (xfs_lsn_t)0,
+ XFS_LOG_FORCE);
+ }
+ if (dopush) {
+#ifdef XFSRACEDEBUG
+ delay_for_intr();
+ delay(300);
+#endif
+ xfs_bawrite(mp, bp);
+ } else {
+ xfs_buf_relse(bp);
+ }
+ } else {
+ qip->qli_pushbuf_flag = 0;
+ xfs_dqunlock(dqp);
+ xfs_buf_relse(bp);
+ }
+ return;
+ }
+
+ qip->qli_pushbuf_flag = 0;
+ xfs_dqunlock(dqp);
+}
+
+/*
+ * This is called to attempt to lock the dquot associated with this
+ * dquot log item. Don't sleep on the dquot lock or the flush lock.
+ * If the flush lock is already held, indicating that the dquot has
+ * been or is in the process of being flushed, then see if we can
+ * find the dquot's buffer in the buffer cache without sleeping. If
+ * we can and it is marked delayed write, then we want to send it out.
+ * We delay doing so until the push routine, though, to avoid sleeping
+ * in any device strategy routines.
+ */
+STATIC uint
+xfs_qm_dquot_logitem_trylock(
+ xfs_dq_logitem_t *qip)
+{
+ xfs_dquot_t *dqp;
+ uint retval;
+
+ dqp = qip->qli_dquot;
+ if (dqp->q_pincount > 0)
+ return (XFS_ITEM_PINNED);
+
+ if (! xfs_qm_dqlock_nowait(dqp))
+ return (XFS_ITEM_LOCKED);
+
+ retval = XFS_ITEM_SUCCESS;
+ if (! xfs_qm_dqflock_nowait(dqp)) {
+ /*
+ * The dquot is already being flushed. It may have been
+ * flushed delayed write, however, and we don't want to
+ * get stuck waiting for that to complete. So, we want to check
+ * to see if we can lock the dquot's buffer without sleeping.
+ * If we can and it is marked for delayed write, then we
+ * hold it and send it out from the push routine. We don't
+ * want to do that now since we might sleep in the device
+ * strategy routine. We also don't want to grab the buffer lock
+ * here because we'd like not to call into the buffer cache
+ * while holding the AIL_LOCK.
+ * Make sure to only return PUSHBUF if we set pushbuf_flag
+ * ourselves. If someone else is doing it then we don't
+ * want to go to the push routine and duplicate their efforts.
+ */
+ if (qip->qli_pushbuf_flag == 0) {
+ qip->qli_pushbuf_flag = 1;
+ ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
+#ifdef DEBUG
+ qip->qli_push_owner = get_thread_id();
+#endif
+ /*
+ * The dquot is left locked.
+ */
+ retval = XFS_ITEM_PUSHBUF;
+ } else {
+ retval = XFS_ITEM_FLUSHING;
+ xfs_dqunlock_nonotify(dqp);
+ }
+ }
+
+ ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL);
+ return (retval);
+}
+
+
+/*
+ * Unlock the dquot associated with the log item.
+ * Clear the fields of the dquot and dquot log item that
+ * are specific to the current transaction. If the
+ * hold flags is set, do not unlock the dquot.
+ */
+STATIC void
+xfs_qm_dquot_logitem_unlock(
+ xfs_dq_logitem_t *ql)
+{
+ xfs_dquot_t *dqp;
+
+ ASSERT(ql != NULL);
+ dqp = ql->qli_dquot;
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+ /*
+ * Clear the transaction pointer in the dquot
+ */
+ dqp->q_transp = NULL;
+
+ /*
+ * dquots are never 'held' from getting unlocked at the end of
+ * a transaction. Their locking and unlocking is hidden inside the
+ * transaction layer, within trans_commit. Hence, no LI_HOLD flag
+ * for the logitem.
+ */
+ xfs_dqunlock(dqp);
+}
+
+
+/*
+ * The transaction with the dquot locked has aborted. The dquot
+ * must not be dirty within the transaction. We simply unlock just
+ * as if the transaction had been cancelled.
+ */
+STATIC void
+xfs_qm_dquot_logitem_abort(
+ xfs_dq_logitem_t *ql)
+{
+ xfs_qm_dquot_logitem_unlock(ql);
+}
+
+/*
+ * this needs to stamp an lsn into the dquot, I think.
+ * rpc's that look at user dquot's would then have to
+ * push on the dependency recorded in the dquot
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_dquot_logitem_committing(
+ xfs_dq_logitem_t *l,
+ xfs_lsn_t lsn)
+{
+ return;
+}
+
+
+/*
+ * This is the ops vector for dquots
+ */
+struct xfs_item_ops xfs_dquot_item_ops = {
+ .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size,
+ .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+ xfs_qm_dquot_logitem_format,
+ .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin,
+ .iop_unpin = (void(*)(xfs_log_item_t*, int))
+ xfs_qm_dquot_logitem_unpin,
+ .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
+ xfs_qm_dquot_logitem_unpin_remove,
+ .iop_trylock = (uint(*)(xfs_log_item_t*))
+ xfs_qm_dquot_logitem_trylock,
+ .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unlock,
+ .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
+ xfs_qm_dquot_logitem_committed,
+ .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push,
+ .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_abort,
+ .iop_pushbuf = (void(*)(xfs_log_item_t*))
+ xfs_qm_dquot_logitem_pushbuf,
+ .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
+ xfs_qm_dquot_logitem_committing
+};
+
+/*
+ * Initialize the dquot log item for a newly allocated dquot.
+ * The dquot isn't locked at this point, but it isn't on any of the lists
+ * either, so we don't care.
+ */
+void
+xfs_qm_dquot_logitem_init(
+ struct xfs_dquot *dqp)
+{
+ xfs_dq_logitem_t *lp;
+ lp = &dqp->q_logitem;
+
+ lp->qli_item.li_type = XFS_LI_DQUOT;
+ lp->qli_item.li_ops = &xfs_dquot_item_ops;
+ lp->qli_item.li_mountp = dqp->q_mount;
+ lp->qli_dquot = dqp;
+ lp->qli_format.qlf_type = XFS_LI_DQUOT;
+ lp->qli_format.qlf_id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT);
+ lp->qli_format.qlf_blkno = dqp->q_blkno;
+ lp->qli_format.qlf_len = 1;
+ /*
+ * This is just the offset of this dquot within its buffer
+ * (which is currently 1 FSB and probably won't change).
+ * Hence 32 bits for this offset should be just fine.
+ * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
+ * here, and recompute it at recovery time.
+ */
+ lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
+}
+
+/*------------------ QUOTAOFF LOG ITEMS -------------------*/
+
+/*
+ * This returns the number of iovecs needed to log the given quotaoff item.
+ * We only need 1 iovec for an quotaoff item. It just logs the
+ * quotaoff_log_format structure.
+ */
+/*ARGSUSED*/
+STATIC uint
+xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf)
+{
+ return (1);
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given quotaoff log item. We use only 1 iovec, and we point that
+ * at the quotaoff_log_format structure embedded in the quotaoff item.
+ * It is at this point that we assert that all of the extent
+ * slots in the quotaoff item have been filled.
+ */
+STATIC void
+xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf,
+ xfs_log_iovec_t *log_vector)
+{
+ ASSERT(qf->qql_format.qf_type == XFS_LI_QUOTAOFF);
+
+ log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
+ log_vector->i_len = sizeof(xfs_qoff_logitem_t);
+ qf->qql_format.qf_size = 1;
+}
+
+
+/*
+ * Pinning has no meaning for an quotaoff item, so just return.
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf)
+{
+ return;
+}
+
+
+/*
+ * Since pinning has no meaning for an quotaoff item, unpinning does
+ * not either.
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf, int stale)
+{
+ return;
+}
+
+/*ARGSUSED*/
+STATIC void
+xfs_qm_qoff_logitem_unpin_remove(xfs_qoff_logitem_t *qf, xfs_trans_t *tp)
+{
+ return;
+}
+
+/*
+ * Quotaoff items have no locking, so just return success.
+ */
+/*ARGSUSED*/
+STATIC uint
+xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf)
+{
+ return XFS_ITEM_LOCKED;
+}
+
+/*
+ * Quotaoff items have no locking or pushing, so return failure
+ * so that the caller doesn't bother with us.
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_qm_qoff_logitem_unlock(xfs_qoff_logitem_t *qf)
+{
+ return;
+}
+
+/*
+ * The quotaoff-start-item is logged only once and cannot be moved in the log,
+ * so simply return the lsn at which it's been logged.
+ */
+/*ARGSUSED*/
+STATIC xfs_lsn_t
+xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn)
+{
+ return (lsn);
+}
+
+/*
+ * The transaction of which this QUOTAOFF is a part has been aborted.
+ * Just clean up after ourselves.
+ * Shouldn't this never happen in the case of qoffend logitems? XXX
+ */
+STATIC void
+xfs_qm_qoff_logitem_abort(xfs_qoff_logitem_t *qf)
+{
+ kmem_free(qf, sizeof(xfs_qoff_logitem_t));
+}
+
+/*
+ * There isn't much you can do to push on an quotaoff item. It is simply
+ * stuck waiting for the log to be flushed to disk.
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_qm_qoff_logitem_push(xfs_qoff_logitem_t *qf)
+{
+ return;
+}
+
+
+/*ARGSUSED*/
+STATIC xfs_lsn_t
+xfs_qm_qoffend_logitem_committed(
+ xfs_qoff_logitem_t *qfe,
+ xfs_lsn_t lsn)
+{
+ xfs_qoff_logitem_t *qfs;
+ SPLDECL(s);
+
+ qfs = qfe->qql_start_lip;
+ AIL_LOCK(qfs->qql_item.li_mountp,s);
+ /*
+ * Delete the qoff-start logitem from the AIL.
+ * xfs_trans_delete_ail() drops the AIL lock.
+ */
+ xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs, s);
+ kmem_free(qfs, sizeof(xfs_qoff_logitem_t));
+ kmem_free(qfe, sizeof(xfs_qoff_logitem_t));
+ return (xfs_lsn_t)-1;
+}
+
+/*
+ * XXX rcc - don't know quite what to do with this. I think we can
+ * just ignore it. The only time that isn't the case is if we allow
+ * the client to somehow see that quotas have been turned off in which
+ * we can't allow that to get back until the quotaoff hits the disk.
+ * So how would that happen? Also, do we need different routines for
+ * quotaoff start and quotaoff end? I suspect the answer is yes but
+ * to be sure, I need to look at the recovery code and see how quota off
+ * recovery is handled (do we roll forward or back or do something else).
+ * If we roll forwards or backwards, then we need two separate routines,
+ * one that does nothing and one that stamps in the lsn that matters
+ * (truly makes the quotaoff irrevocable). If we do something else,
+ * then maybe we don't need two.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_qoff_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
+{
+ return;
+}
+
+/* ARGSUSED */
+STATIC void
+xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
+{
+ return;
+}
+
+struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+ .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
+ .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+ xfs_qm_qoff_logitem_format,
+ .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
+ .iop_unpin = (void(*)(xfs_log_item_t* ,int))
+ xfs_qm_qoff_logitem_unpin,
+ .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
+ xfs_qm_qoff_logitem_unpin_remove,
+ .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
+ .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
+ .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
+ xfs_qm_qoffend_logitem_committed,
+ .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
+ .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
+ .iop_pushbuf = NULL,
+ .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
+ xfs_qm_qoffend_logitem_committing
+};
+
+/*
+ * This is the ops vector shared by all quotaoff-start log items.
+ */
+struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+ .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
+ .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
+ xfs_qm_qoff_logitem_format,
+ .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
+ .iop_unpin = (void(*)(xfs_log_item_t*, int))
+ xfs_qm_qoff_logitem_unpin,
+ .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
+ xfs_qm_qoff_logitem_unpin_remove,
+ .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
+ .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
+ .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
+ xfs_qm_qoff_logitem_committed,
+ .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
+ .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
+ .iop_pushbuf = NULL,
+ .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
+ xfs_qm_qoff_logitem_committing
+};
+
+/*
+ * Allocate and initialize an quotaoff item of the correct quota type(s).
+ */
+xfs_qoff_logitem_t *
+xfs_qm_qoff_logitem_init(
+ struct xfs_mount *mp,
+ xfs_qoff_logitem_t *start,
+ uint flags)
+{
+ xfs_qoff_logitem_t *qf;
+
+ qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP);
+
+ qf->qql_item.li_type = XFS_LI_QUOTAOFF;
+ if (start)
+ qf->qql_item.li_ops = &xfs_qm_qoffend_logitem_ops;
+ else
+ qf->qql_item.li_ops = &xfs_qm_qoff_logitem_ops;
+ qf->qql_item.li_mountp = mp;
+ qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
+ qf->qql_format.qf_flags = flags;
+ qf->qql_start_lip = start;
+ return (qf);
+}
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h
new file mode 100644
index 000000000000..9c6500dabcaa
--- /dev/null
+++ b/fs/xfs/quota/xfs_dquot_item.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DQUOT_ITEM_H__
+#define __XFS_DQUOT_ITEM_H__
+
+struct xfs_dquot;
+struct xfs_trans;
+struct xfs_mount;
+struct xfs_qoff_logitem;
+
+typedef struct xfs_dq_logitem {
+ xfs_log_item_t qli_item; /* common portion */
+ struct xfs_dquot *qli_dquot; /* dquot ptr */
+ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
+ unsigned short qli_pushbuf_flag; /* 1 bit used in push_ail */
+#ifdef DEBUG
+ uint64_t qli_push_owner;
+#endif
+ xfs_dq_logformat_t qli_format; /* logged structure */
+} xfs_dq_logitem_t;
+
+typedef struct xfs_qoff_logitem {
+ xfs_log_item_t qql_item; /* common portion */
+ struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
+ xfs_qoff_logformat_t qql_format; /* logged structure */
+} xfs_qoff_logitem_t;
+
+
+extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *);
+extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
+ struct xfs_qoff_logitem *, uint);
+extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
+ struct xfs_qoff_logitem *, uint);
+extern void xfs_trans_log_quotaoff_item(struct xfs_trans *,
+ struct xfs_qoff_logitem *);
+
+#endif /* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
new file mode 100644
index 000000000000..89f2cd656ebf
--- /dev/null
+++ b/fs/xfs/quota/xfs_qm.c
@@ -0,0 +1,2848 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_clnt.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_space.h"
+#include "xfs_utils.h"
+
+#include "xfs_qm.h"
+
+/*
+ * The global quota manager. There is only one of these for the entire
+ * system, _not_ one per file system. XQM keeps track of the overall
+ * quota functionality, including maintaining the freelist and hash
+ * tables of dquots.
+ */
+mutex_t xfs_Gqm_lock;
+struct xfs_qm *xfs_Gqm;
+
+kmem_zone_t *qm_dqzone;
+kmem_zone_t *qm_dqtrxzone;
+kmem_shaker_t xfs_qm_shaker;
+
+STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
+STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
+
+STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
+STATIC int xfs_qm_shake(int, unsigned int);
+
+#ifdef DEBUG
+extern mutex_t qcheck_lock;
+#endif
+
+#ifdef QUOTADEBUG
+#define XQM_LIST_PRINT(l, NXT, title) \
+{ \
+ xfs_dquot_t *dqp; int i = 0; \
+ cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
+ for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
+ cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " \
+ "bcnt = %d, icnt = %d, refs = %d", \
+ ++i, (int) INT_GET(dqp->q_core.d_id, ARCH_CONVERT), \
+ DQFLAGTO_TYPESTR(dqp), \
+ (int) INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT), \
+ (int) INT_GET(dqp->q_core.d_icount, ARCH_CONVERT), \
+ (int) dqp->q_nrefs); } \
+}
+#else
+#define XQM_LIST_PRINT(l, NXT, title) do { } while (0)
+#endif
+
+/*
+ * Initialize the XQM structure.
+ * Note that there is not one quota manager per file system.
+ */
+STATIC struct xfs_qm *
+xfs_Gqm_init(void)
+{
+ xfs_qm_t *xqm;
+ int hsize, i;
+
+ xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
+ ASSERT(xqm);
+
+ /*
+ * Initialize the dquot hash tables.
+ */
+ hsize = (DQUOT_HASH_HEURISTIC < XFS_QM_NCSIZE_THRESHOLD) ?
+ XFS_QM_HASHSIZE_LOW : XFS_QM_HASHSIZE_HIGH;
+ xqm->qm_dqhashmask = hsize - 1;
+
+ xqm->qm_usr_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
+ sizeof(xfs_dqhash_t),
+ KM_SLEEP);
+ xqm->qm_grp_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
+ sizeof(xfs_dqhash_t),
+ KM_SLEEP);
+ ASSERT(xqm->qm_usr_dqhtable != NULL);
+ ASSERT(xqm->qm_grp_dqhtable != NULL);
+
+ for (i = 0; i < hsize; i++) {
+ xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
+ xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
+ }
+
+ /*
+ * Freelist of all dquots of all file systems
+ */
+ xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
+
+ /*
+ * dquot zone. we register our own low-memory callback.
+ */
+ if (!qm_dqzone) {
+ xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
+ "xfs_dquots");
+ qm_dqzone = xqm->qm_dqzone;
+ } else
+ xqm->qm_dqzone = qm_dqzone;
+
+ xfs_qm_shaker = kmem_shake_register(xfs_qm_shake);
+
+ /*
+ * The t_dqinfo portion of transactions.
+ */
+ if (!qm_dqtrxzone) {
+ xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
+ "xfs_dqtrx");
+ qm_dqtrxzone = xqm->qm_dqtrxzone;
+ } else
+ xqm->qm_dqtrxzone = qm_dqtrxzone;
+
+ atomic_set(&xqm->qm_totaldquots, 0);
+ xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
+ xqm->qm_nrefs = 0;
+#ifdef DEBUG
+ mutex_init(&qcheck_lock, MUTEX_DEFAULT, "qchk");
+#endif
+ return xqm;
+}
+
+/*
+ * Destroy the global quota manager when its reference count goes to zero.
+ */
+void
+xfs_qm_destroy(
+ struct xfs_qm *xqm)
+{
+ int hsize, i;
+
+ ASSERT(xqm != NULL);
+ ASSERT(xqm->qm_nrefs == 0);
+ kmem_shake_deregister(xfs_qm_shaker);
+ hsize = xqm->qm_dqhashmask + 1;
+ for (i = 0; i < hsize; i++) {
+ xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
+ xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
+ }
+ kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t));
+ kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t));
+ xqm->qm_usr_dqhtable = NULL;
+ xqm->qm_grp_dqhtable = NULL;
+ xqm->qm_dqhashmask = 0;
+ xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
+#ifdef DEBUG
+ mutex_destroy(&qcheck_lock);
+#endif
+ kmem_free(xqm, sizeof(xfs_qm_t));
+}
+
+/*
+ * Called at mount time to let XQM know that another file system is
+ * starting quotas. This isn't crucial information as the individual mount
+ * structures are pretty independent, but it helps the XQM keep a
+ * global view of what's going on.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_hold_quotafs_ref(
+ struct xfs_mount *mp)
+{
+ /*
+ * Need to lock the xfs_Gqm structure for things like this. For example,
+ * the structure could disappear between the entry to this routine and
+ * a HOLD operation if not locked.
+ */
+ XFS_QM_LOCK(xfs_Gqm);
+
+ if (xfs_Gqm == NULL)
+ xfs_Gqm = xfs_Gqm_init();
+ /*
+ * We can keep a list of all filesystems with quotas mounted for
+ * debugging and statistical purposes, but ...
+ * Just take a reference and get out.
+ */
+ XFS_QM_HOLD(xfs_Gqm);
+ XFS_QM_UNLOCK(xfs_Gqm);
+
+ return 0;
+}
+
+
+/*
+ * Release the reference that a filesystem took at mount time,
+ * so that we know when we need to destroy the entire quota manager.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_rele_quotafs_ref(
+ struct xfs_mount *mp)
+{
+ xfs_dquot_t *dqp, *nextdqp;
+
+ ASSERT(xfs_Gqm);
+ ASSERT(xfs_Gqm->qm_nrefs > 0);
+
+ /*
+ * Go thru the freelist and destroy all inactive dquots.
+ */
+ xfs_qm_freelist_lock(xfs_Gqm);
+
+ for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
+ dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
+ xfs_dqlock(dqp);
+ nextdqp = dqp->dq_flnext;
+ if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+ ASSERT(dqp->q_mount == NULL);
+ ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+ ASSERT(dqp->HL_PREVP == NULL);
+ ASSERT(dqp->MPL_PREVP == NULL);
+ XQM_FREELIST_REMOVE(dqp);
+ xfs_dqunlock(dqp);
+ xfs_qm_dqdestroy(dqp);
+ } else {
+ xfs_dqunlock(dqp);
+ }
+ dqp = nextdqp;
+ }
+ xfs_qm_freelist_unlock(xfs_Gqm);
+
+ /*
+ * Destroy the entire XQM. If somebody mounts with quotaon, this'll
+ * be restarted.
+ */
+ XFS_QM_LOCK(xfs_Gqm);
+ XFS_QM_RELE(xfs_Gqm);
+ if (xfs_Gqm->qm_nrefs == 0) {
+ xfs_qm_destroy(xfs_Gqm);
+ xfs_Gqm = NULL;
+ }
+ XFS_QM_UNLOCK(xfs_Gqm);
+}
+
+/*
+ * This is called at mount time from xfs_mountfs to initialize the quotainfo
+ * structure and start the global quotamanager (xfs_Gqm) if it hasn't done
+ * so already. Note that the superblock has not been read in yet.
+ */
+void
+xfs_qm_mount_quotainit(
+ xfs_mount_t *mp,
+ uint flags)
+{
+ /*
+ * User or group quotas has to be on.
+ */
+ ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA));
+
+ /*
+ * Initialize the flags in the mount structure. From this point
+ * onwards we look at m_qflags to figure out if quotas's ON/OFF, etc.
+ * Note that we enforce nothing if accounting is off.
+ * ie. XFSMNT_*QUOTA must be ON for XFSMNT_*QUOTAENF.
+ * It isn't necessary to take the quotaoff lock to do this; this is
+ * called from mount.
+ */
+ if (flags & XFSMNT_UQUOTA) {
+ mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+ if (flags & XFSMNT_UQUOTAENF)
+ mp->m_qflags |= XFS_UQUOTA_ENFD;
+ }
+ if (flags & XFSMNT_GQUOTA) {
+ mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+ if (flags & XFSMNT_GQUOTAENF)
+ mp->m_qflags |= XFS_GQUOTA_ENFD;
+ }
+}
+
+/*
+ * Just destroy the quotainfo structure.
+ */
+void
+xfs_qm_unmount_quotadestroy(
+ xfs_mount_t *mp)
+{
+ if (mp->m_quotainfo)
+ xfs_qm_destroy_quotainfo(mp);
+}
+
+
+/*
+ * This is called from xfs_mountfs to start quotas and initialize all
+ * necessary data structures like quotainfo. This is also responsible for
+ * running a quotacheck as necessary. We are guaranteed that the superblock
+ * is consistently read in at this point.
+ */
+int
+xfs_qm_mount_quotas(
+ xfs_mount_t *mp,
+ int mfsi_flags)
+{
+ unsigned long s;
+ int error = 0;
+ uint sbf;
+
+ /*
+ * If a file system had quotas running earlier, but decided to
+ * mount without -o quota/uquota/gquota options, revoke the
+ * quotachecked license, and bail out.
+ */
+ if (! XFS_IS_QUOTA_ON(mp) &&
+ (mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT))) {
+ mp->m_qflags = 0;
+ goto write_changes;
+ }
+
+ /*
+ * If quotas on realtime volumes is not supported, we disable
+ * quotas immediately.
+ */
+ if (mp->m_sb.sb_rextents) {
+ cmn_err(CE_NOTE,
+ "Cannot turn on quotas for realtime filesystem %s",
+ mp->m_fsname);
+ mp->m_qflags = 0;
+ goto write_changes;
+ }
+
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+ cmn_err(CE_NOTE, "Attempting to turn on disk quotas.");
+#endif
+
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+ /*
+ * Allocate the quotainfo structure inside the mount struct, and
+ * create quotainode(s), and change/rev superblock if necessary.
+ */
+ if ((error = xfs_qm_init_quotainfo(mp))) {
+ /*
+ * We must turn off quotas.
+ */
+ ASSERT(mp->m_quotainfo == NULL);
+ mp->m_qflags = 0;
+ goto write_changes;
+ }
+ /*
+ * If any of the quotas are not consistent, do a quotacheck.
+ */
+ if (XFS_QM_NEED_QUOTACHECK(mp) &&
+ !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
+#ifdef DEBUG
+ cmn_err(CE_NOTE, "Doing a quotacheck. Please wait.");
+#endif
+ if ((error = xfs_qm_quotacheck(mp))) {
+ /* Quotacheck has failed and quotas have
+ * been disabled.
+ */
+ return XFS_ERROR(error);
+ }
+#ifdef DEBUG
+ cmn_err(CE_NOTE, "Done quotacheck.");
+#endif
+ }
+ write_changes:
+ /*
+ * We actually don't have to acquire the SB_LOCK at all.
+ * This can only be called from mount, and that's single threaded. XXX
+ */
+ s = XFS_SB_LOCK(mp);
+ sbf = mp->m_sb.sb_qflags;
+ mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
+ XFS_SB_UNLOCK(mp, s);
+
+ if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
+ if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
+ /*
+ * We could only have been turning quotas off.
+ * We aren't in very good shape actually because
+ * the incore structures are convinced that quotas are
+ * off, but the on disk superblock doesn't know that !
+ */
+ ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
+ xfs_fs_cmn_err(CE_ALERT, mp,
+ "XFS mount_quotas: Superblock update failed!");
+ }
+ }
+
+ if (error) {
+ xfs_fs_cmn_err(CE_WARN, mp,
+ "Failed to initialize disk quotas.");
+ }
+ return XFS_ERROR(error);
+}
+
+/*
+ * Called from the vfsops layer.
+ */
+int
+xfs_qm_unmount_quotas(
+ xfs_mount_t *mp)
+{
+ xfs_inode_t *uqp, *gqp;
+ int error = 0;
+
+ /*
+ * Release the dquots that root inode, et al might be holding,
+ * before we flush quotas and blow away the quotainfo structure.
+ */
+ ASSERT(mp->m_rootip);
+ xfs_qm_dqdetach(mp->m_rootip);
+ if (mp->m_rbmip)
+ xfs_qm_dqdetach(mp->m_rbmip);
+ if (mp->m_rsumip)
+ xfs_qm_dqdetach(mp->m_rsumip);
+
+ /*
+ * Flush out the quota inodes.
+ */
+ uqp = gqp = NULL;
+ if (mp->m_quotainfo) {
+ if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) {
+ xfs_ilock(uqp, XFS_ILOCK_EXCL);
+ xfs_iflock(uqp);
+ error = xfs_iflush(uqp, XFS_IFLUSH_SYNC);
+ xfs_iunlock(uqp, XFS_ILOCK_EXCL);
+ if (unlikely(error == EFSCORRUPTED)) {
+ XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)",
+ XFS_ERRLEVEL_LOW, mp);
+ goto out;
+ }
+ }
+ if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) {
+ xfs_ilock(gqp, XFS_ILOCK_EXCL);
+ xfs_iflock(gqp);
+ error = xfs_iflush(gqp, XFS_IFLUSH_SYNC);
+ xfs_iunlock(gqp, XFS_ILOCK_EXCL);
+ if (unlikely(error == EFSCORRUPTED)) {
+ XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)",
+ XFS_ERRLEVEL_LOW, mp);
+ goto out;
+ }
+ }
+ }
+ if (uqp) {
+ XFS_PURGE_INODE(uqp);
+ mp->m_quotainfo->qi_uquotaip = NULL;
+ }
+ if (gqp) {
+ XFS_PURGE_INODE(gqp);
+ mp->m_quotainfo->qi_gquotaip = NULL;
+ }
+out:
+ return XFS_ERROR(error);
+}
+
+/*
+ * Flush all dquots of the given file system to disk. The dquots are
+ * _not_ purged from memory here, just their data written to disk.
+ */
+int
+xfs_qm_dqflush_all(
+ xfs_mount_t *mp,
+ int flags)
+{
+ int recl;
+ xfs_dquot_t *dqp;
+ int niters;
+ int error;
+
+ if (mp->m_quotainfo == NULL)
+ return (0);
+ niters = 0;
+again:
+ xfs_qm_mplist_lock(mp);
+ FOREACH_DQUOT_IN_MP(dqp, mp) {
+ xfs_dqlock(dqp);
+ if (! XFS_DQ_IS_DIRTY(dqp)) {
+ xfs_dqunlock(dqp);
+ continue;
+ }
+ xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
+ /* XXX a sentinel would be better */
+ recl = XFS_QI_MPLRECLAIMS(mp);
+ if (! xfs_qm_dqflock_nowait(dqp)) {
+ /*
+ * If we can't grab the flush lock then check
+ * to see if the dquot has been flushed delayed
+ * write. If so, grab its buffer and send it
+ * out immediately. We'll be able to acquire
+ * the flush lock when the I/O completes.
+ */
+ xfs_qm_dqflock_pushbuf_wait(dqp);
+ }
+ /*
+ * Let go of the mplist lock. We don't want to hold it
+ * across a disk write.
+ */
+ xfs_qm_mplist_unlock(mp);
+ error = xfs_qm_dqflush(dqp, flags);
+ xfs_dqunlock(dqp);
+ if (error)
+ return (error);
+
+ xfs_qm_mplist_lock(mp);
+ if (recl != XFS_QI_MPLRECLAIMS(mp)) {
+ xfs_qm_mplist_unlock(mp);
+ /* XXX restart limit */
+ goto again;
+ }
+ }
+
+ xfs_qm_mplist_unlock(mp);
+ /* return ! busy */
+ return (0);
+}
+/*
+ * Release the group dquot pointers the user dquots may be
+ * carrying around as a hint. mplist is locked on entry and exit.
+ */
+STATIC void
+xfs_qm_detach_gdquots(
+ xfs_mount_t *mp)
+{
+ xfs_dquot_t *dqp, *gdqp;
+ int nrecl;
+
+ again:
+ ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
+ dqp = XFS_QI_MPLNEXT(mp);
+ while (dqp) {
+ xfs_dqlock(dqp);
+ if ((gdqp = dqp->q_gdquot)) {
+ xfs_dqlock(gdqp);
+ dqp->q_gdquot = NULL;
+ }
+ xfs_dqunlock(dqp);
+
+ if (gdqp) {
+ /*
+ * Can't hold the mplist lock across a dqput.
+ * XXXmust convert to marker based iterations here.
+ */
+ nrecl = XFS_QI_MPLRECLAIMS(mp);
+ xfs_qm_mplist_unlock(mp);
+ xfs_qm_dqput(gdqp);
+
+ xfs_qm_mplist_lock(mp);
+ if (nrecl != XFS_QI_MPLRECLAIMS(mp))
+ goto again;
+ }
+ dqp = dqp->MPL_NEXT;
+ }
+}
+
+/*
+ * Go through all the incore dquots of this file system and take them
+ * off the mplist and hashlist, if the dquot type matches the dqtype
+ * parameter. This is used when turning off quota accounting for
+ * users and/or groups, as well as when the filesystem is unmounting.
+ */
+STATIC int
+xfs_qm_dqpurge_int(
+ xfs_mount_t *mp,
+ uint flags) /* QUOTAOFF/UMOUNTING/UQUOTA/GQUOTA */
+{
+ xfs_dquot_t *dqp;
+ uint dqtype;
+ int nrecl;
+ xfs_dquot_t *nextdqp;
+ int nmisses;
+
+ if (mp->m_quotainfo == NULL)
+ return (0);
+
+ dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
+ dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
+
+ xfs_qm_mplist_lock(mp);
+
+ /*
+ * In the first pass through all incore dquots of this filesystem,
+ * we release the group dquot pointers the user dquots may be
+ * carrying around as a hint. We need to do this irrespective of
+ * what's being turned off.
+ */
+ xfs_qm_detach_gdquots(mp);
+
+ again:
+ nmisses = 0;
+ ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
+ /*
+ * Try to get rid of all of the unwanted dquots. The idea is to
+ * get them off mplist and hashlist, but leave them on freelist.
+ */
+ dqp = XFS_QI_MPLNEXT(mp);
+ while (dqp) {
+ /*
+ * It's OK to look at the type without taking dqlock here.
+ * We're holding the mplist lock here, and that's needed for
+ * a dqreclaim.
+ */
+ if ((dqp->dq_flags & dqtype) == 0) {
+ dqp = dqp->MPL_NEXT;
+ continue;
+ }
+
+ if (! xfs_qm_dqhashlock_nowait(dqp)) {
+ nrecl = XFS_QI_MPLRECLAIMS(mp);
+ xfs_qm_mplist_unlock(mp);
+ XFS_DQ_HASH_LOCK(dqp->q_hash);
+ xfs_qm_mplist_lock(mp);
+
+ /*
+ * XXXTheoretically, we can get into a very long
+ * ping pong game here.
+ * No one can be adding dquots to the mplist at
+ * this point, but somebody might be taking things off.
+ */
+ if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
+ XFS_DQ_HASH_UNLOCK(dqp->q_hash);
+ goto again;
+ }
+ }
+
+ /*
+ * Take the dquot off the mplist and hashlist. It may remain on
+ * freelist in INACTIVE state.
+ */
+ nextdqp = dqp->MPL_NEXT;
+ nmisses += xfs_qm_dqpurge(dqp, flags);
+ dqp = nextdqp;
+ }
+ xfs_qm_mplist_unlock(mp);
+ return nmisses;
+}
+
+int
+xfs_qm_dqpurge_all(
+ xfs_mount_t *mp,
+ uint flags)
+{
+ int ndquots;
+
+ /*
+ * Purge the dquot cache.
+ * None of the dquots should really be busy at this point.
+ */
+ if (mp->m_quotainfo) {
+ while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
+ delay(ndquots * 10);
+ }
+ }
+ return 0;
+}
+
+STATIC int
+xfs_qm_dqattach_one(
+ xfs_inode_t *ip,
+ xfs_dqid_t id,
+ uint type,
+ uint doalloc,
+ uint dolock,
+ xfs_dquot_t *udqhint, /* hint */
+ xfs_dquot_t **IO_idqpp)
+{
+ xfs_dquot_t *dqp;
+ int error;
+
+ ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ error = 0;
+ /*
+ * See if we already have it in the inode itself. IO_idqpp is
+ * &i_udquot or &i_gdquot. This made the code look weird, but
+ * made the logic a lot simpler.
+ */
+ if ((dqp = *IO_idqpp)) {
+ if (dolock)
+ xfs_dqlock(dqp);
+ xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
+ goto done;
+ }
+
+ /*
+ * udqhint is the i_udquot field in inode, and is non-NULL only
+ * when the type arg is XFS_DQ_GROUP. Its purpose is to save a
+ * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
+ * the user dquot.
+ */
+ ASSERT(!udqhint || type == XFS_DQ_GROUP);
+ if (udqhint && !dolock)
+ xfs_dqlock(udqhint);
+
+ /*
+ * No need to take dqlock to look at the id.
+ * The ID can't change until it gets reclaimed, and it won't
+ * be reclaimed as long as we have a ref from inode and we hold
+ * the ilock.
+ */
+ if (udqhint &&
+ (dqp = udqhint->q_gdquot) &&
+ (INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id)) {
+ ASSERT(XFS_DQ_IS_LOCKED(udqhint));
+ xfs_dqlock(dqp);
+ XFS_DQHOLD(dqp);
+ ASSERT(*IO_idqpp == NULL);
+ *IO_idqpp = dqp;
+ if (!dolock) {
+ xfs_dqunlock(dqp);
+ xfs_dqunlock(udqhint);
+ }
+ goto done;
+ }
+ /*
+ * We can't hold a dquot lock when we call the dqget code.
+ * We'll deadlock in no time, because of (not conforming to)
+ * lock ordering - the inodelock comes before any dquot lock,
+ * and we may drop and reacquire the ilock in xfs_qm_dqget().
+ */
+ if (udqhint)
+ xfs_dqunlock(udqhint);
+ /*
+ * Find the dquot from somewhere. This bumps the
+ * reference count of dquot and returns it locked.
+ * This can return ENOENT if dquot didn't exist on
+ * disk and we didn't ask it to allocate;
+ * ESRCH if quotas got turned off suddenly.
+ */
+ if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type,
+ doalloc|XFS_QMOPT_DOWARN, &dqp))) {
+ if (udqhint && dolock)
+ xfs_dqlock(udqhint);
+ goto done;
+ }
+
+ xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
+ /*
+ * dqget may have dropped and re-acquired the ilock, but it guarantees
+ * that the dquot returned is the one that should go in the inode.
+ */
+ *IO_idqpp = dqp;
+ ASSERT(dqp);
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ if (! dolock) {
+ xfs_dqunlock(dqp);
+ goto done;
+ }
+ if (! udqhint)
+ goto done;
+
+ ASSERT(udqhint);
+ ASSERT(dolock);
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ if (! xfs_qm_dqlock_nowait(udqhint)) {
+ xfs_dqunlock(dqp);
+ xfs_dqlock(udqhint);
+ xfs_dqlock(dqp);
+ }
+ done:
+#ifdef QUOTADEBUG
+ if (udqhint) {
+ if (dolock)
+ ASSERT(XFS_DQ_IS_LOCKED(udqhint));
+ }
+ if (! error) {
+ if (dolock)
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ }
+#endif
+ return (error);
+}
+
+
+/*
+ * Given a udquot and gdquot, attach a ptr to the group dquot in the
+ * udquot as a hint for future lookups. The idea sounds simple, but the
+ * execution isn't, because the udquot might have a group dquot attached
+ * already and getting rid of that gets us into lock ordering contraints.
+ * The process is complicated more by the fact that the dquots may or may not
+ * be locked on entry.
+ */
+STATIC void
+xfs_qm_dqattach_grouphint(
+ xfs_dquot_t *udq,
+ xfs_dquot_t *gdq,
+ uint locked)
+{
+ xfs_dquot_t *tmp;
+
+#ifdef QUOTADEBUG
+ if (locked) {
+ ASSERT(XFS_DQ_IS_LOCKED(udq));
+ ASSERT(XFS_DQ_IS_LOCKED(gdq));
+ }
+#endif
+ if (! locked)
+ xfs_dqlock(udq);
+
+ if ((tmp = udq->q_gdquot)) {
+ if (tmp == gdq) {
+ if (! locked)
+ xfs_dqunlock(udq);
+ return;
+ }
+
+ udq->q_gdquot = NULL;
+ /*
+ * We can't keep any dqlocks when calling dqrele,
+ * because the freelist lock comes before dqlocks.
+ */
+ xfs_dqunlock(udq);
+ if (locked)
+ xfs_dqunlock(gdq);
+ /*
+ * we took a hard reference once upon a time in dqget,
+ * so give it back when the udquot no longer points at it
+ * dqput() does the unlocking of the dquot.
+ */
+ xfs_qm_dqrele(tmp);
+
+ xfs_dqlock(udq);
+ xfs_dqlock(gdq);
+
+ } else {
+ ASSERT(XFS_DQ_IS_LOCKED(udq));
+ if (! locked) {
+ xfs_dqlock(gdq);
+ }
+ }
+
+ ASSERT(XFS_DQ_IS_LOCKED(udq));
+ ASSERT(XFS_DQ_IS_LOCKED(gdq));
+ /*
+ * Somebody could have attached a gdquot here,
+ * when we dropped the uqlock. If so, just do nothing.
+ */
+ if (udq->q_gdquot == NULL) {
+ XFS_DQHOLD(gdq);
+ udq->q_gdquot = gdq;
+ }
+ if (! locked) {
+ xfs_dqunlock(gdq);
+ xfs_dqunlock(udq);
+ }
+}
+
+
+/*
+ * Given a locked inode, attach dquot(s) to it, taking UQUOTAON / GQUOTAON
+ * in to account.
+ * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
+ * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
+ * much made this code a complete mess, but it has been pretty useful.
+ * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
+ * Inode may get unlocked and relocked in here, and the caller must deal with
+ * the consequences.
+ */
+int
+xfs_qm_dqattach(
+ xfs_inode_t *ip,
+ uint flags)
+{
+ xfs_mount_t *mp = ip->i_mount;
+ uint nquotas = 0;
+ int error = 0;
+
+ if ((! XFS_IS_QUOTA_ON(mp)) ||
+ (! XFS_NOT_DQATTACHED(mp, ip)) ||
+ (ip->i_ino == mp->m_sb.sb_uquotino) ||
+ (ip->i_ino == mp->m_sb.sb_gquotino))
+ return (0);
+
+ ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
+ XFS_ISLOCKED_INODE_EXCL(ip));
+
+ if (! (flags & XFS_QMOPT_ILOCKED))
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ if (XFS_IS_UQUOTA_ON(mp)) {
+ error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
+ flags & XFS_QMOPT_DQALLOC,
+ flags & XFS_QMOPT_DQLOCK,
+ NULL, &ip->i_udquot);
+ if (error)
+ goto done;
+ nquotas++;
+ }
+ ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ if (XFS_IS_GQUOTA_ON(mp)) {
+ error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+ flags & XFS_QMOPT_DQALLOC,
+ flags & XFS_QMOPT_DQLOCK,
+ ip->i_udquot, &ip->i_gdquot);
+ /*
+ * Don't worry about the udquot that we may have
+ * attached above. It'll get detached, if not already.
+ */
+ if (error)
+ goto done;
+ nquotas++;
+ }
+
+ /*
+ * Attach this group quota to the user quota as a hint.
+ * This WON'T, in general, result in a thrash.
+ */
+ if (nquotas == 2) {
+ ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(ip->i_udquot);
+ ASSERT(ip->i_gdquot);
+
+ /*
+ * We may or may not have the i_udquot locked at this point,
+ * but this check is OK since we don't depend on the i_gdquot to
+ * be accurate 100% all the time. It is just a hint, and this
+ * will succeed in general.
+ */
+ if (ip->i_udquot->q_gdquot == ip->i_gdquot)
+ goto done;
+ /*
+ * Attach i_gdquot to the gdquot hint inside the i_udquot.
+ */
+ xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot,
+ flags & XFS_QMOPT_DQLOCK);
+ }
+
+ done:
+
+#ifdef QUOTADEBUG
+ if (! error) {
+ if (ip->i_udquot) {
+ if (flags & XFS_QMOPT_DQLOCK)
+ ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot));
+ }
+ if (ip->i_gdquot) {
+ if (flags & XFS_QMOPT_DQLOCK)
+ ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot));
+ }
+ if (XFS_IS_UQUOTA_ON(mp))
+ ASSERT(ip->i_udquot);
+ if (XFS_IS_GQUOTA_ON(mp))
+ ASSERT(ip->i_gdquot);
+ }
+#endif
+
+ if (! (flags & XFS_QMOPT_ILOCKED))
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+#ifdef QUOTADEBUG
+ else
+ ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+#endif
+ return (error);
+}
+
+/*
+ * Release dquots (and their references) if any.
+ * The inode should be locked EXCL except when this's called by
+ * xfs_ireclaim.
+ */
+void
+xfs_qm_dqdetach(
+ xfs_inode_t *ip)
+{
+ if (!(ip->i_udquot || ip->i_gdquot))
+ return;
+
+ ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
+ ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
+ if (ip->i_udquot)
+ xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
+ if (ip->i_udquot) {
+ xfs_qm_dqrele(ip->i_udquot);
+ ip->i_udquot = NULL;
+ }
+ if (ip->i_gdquot) {
+ xfs_qm_dqrele(ip->i_gdquot);
+ ip->i_gdquot = NULL;
+ }
+}
+
+/*
+ * This is called by VFS_SYNC and flags arg determines the caller,
+ * and its motives, as done in xfs_sync.
+ *
+ * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
+ * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
+ * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
+ */
+
+int
+xfs_qm_sync(
+ xfs_mount_t *mp,
+ short flags)
+{
+ int recl, restarts;
+ xfs_dquot_t *dqp;
+ uint flush_flags;
+ boolean_t nowait;
+ int error;
+
+ restarts = 0;
+ /*
+ * We won't block unless we are asked to.
+ */
+ nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
+
+ again:
+ xfs_qm_mplist_lock(mp);
+ /*
+ * dqpurge_all() also takes the mplist lock and iterate thru all dquots
+ * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
+ * when we have the mplist lock, we know that dquots will be consistent
+ * as long as we have it locked.
+ */
+ if (! XFS_IS_QUOTA_ON(mp)) {
+ xfs_qm_mplist_unlock(mp);
+ return (0);
+ }
+ FOREACH_DQUOT_IN_MP(dqp, mp) {
+ /*
+ * If this is vfs_sync calling, then skip the dquots that
+ * don't 'seem' to be dirty. ie. don't acquire dqlock.
+ * This is very similar to what xfs_sync does with inodes.
+ */
+ if (flags & SYNC_BDFLUSH) {
+ if (! XFS_DQ_IS_DIRTY(dqp))
+ continue;
+ }
+
+ if (nowait) {
+ /*
+ * Try to acquire the dquot lock. We are NOT out of
+ * lock order, but we just don't want to wait for this
+ * lock, unless somebody wanted us to.
+ */
+ if (! xfs_qm_dqlock_nowait(dqp))
+ continue;
+ } else {
+ xfs_dqlock(dqp);
+ }
+
+ /*
+ * Now, find out for sure if this dquot is dirty or not.
+ */
+ if (! XFS_DQ_IS_DIRTY(dqp)) {
+ xfs_dqunlock(dqp);
+ continue;
+ }
+
+ /* XXX a sentinel would be better */
+ recl = XFS_QI_MPLRECLAIMS(mp);
+ if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (nowait) {
+ xfs_dqunlock(dqp);
+ continue;
+ }
+ /*
+ * If we can't grab the flush lock then if the caller
+ * really wanted us to give this our best shot,
+ * see if we can give a push to the buffer before we wait
+ * on the flush lock. At this point, we know that
+ * eventhough the dquot is being flushed,
+ * it has (new) dirty data.
+ */
+ xfs_qm_dqflock_pushbuf_wait(dqp);
+ }
+ /*
+ * Let go of the mplist lock. We don't want to hold it
+ * across a disk write
+ */
+ flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
+ xfs_qm_mplist_unlock(mp);
+ xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
+ error = xfs_qm_dqflush(dqp, flush_flags);
+ xfs_dqunlock(dqp);
+ if (error && XFS_FORCED_SHUTDOWN(mp))
+ return(0); /* Need to prevent umount failure */
+ else if (error)
+ return (error);
+
+ xfs_qm_mplist_lock(mp);
+ if (recl != XFS_QI_MPLRECLAIMS(mp)) {
+ if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
+ break;
+
+ xfs_qm_mplist_unlock(mp);
+ goto again;
+ }
+ }
+
+ xfs_qm_mplist_unlock(mp);
+ return (0);
+}
+
+
+/*
+ * This initializes all the quota information that's kept in the
+ * mount structure
+ */
+int
+xfs_qm_init_quotainfo(
+ xfs_mount_t *mp)
+{
+ xfs_quotainfo_t *qinf;
+ int error;
+ xfs_dquot_t *dqp;
+
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+ /*
+ * Tell XQM that we exist as soon as possible.
+ */
+ if ((error = xfs_qm_hold_quotafs_ref(mp))) {
+ return (error);
+ }
+
+ qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
+
+ /*
+ * See if quotainodes are setup, and if not, allocate them,
+ * and change the superblock accordingly.
+ */
+ if ((error = xfs_qm_init_quotainos(mp))) {
+ kmem_free(qinf, sizeof(xfs_quotainfo_t));
+ mp->m_quotainfo = NULL;
+ return (error);
+ }
+
+ spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin");
+ xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
+ qinf->qi_dqreclaims = 0;
+
+ /* mutex used to serialize quotaoffs */
+ mutex_init(&qinf->qi_quotaofflock, MUTEX_DEFAULT, "qoff");
+
+ /* Precalc some constants */
+ qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+ ASSERT(qinf->qi_dqchunklen);
+ qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
+ do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
+
+ mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
+
+ /*
+ * We try to get the limits from the superuser's limits fields.
+ * This is quite hacky, but it is standard quota practice.
+ * We look at the USR dquot with id == 0 first, but if user quotas
+ * are not enabled we goto the GRP dquot with id == 0.
+ * We don't really care to keep separate default limits for user
+ * and group quotas, at least not at this point.
+ */
+ error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
+ (XFS_IS_UQUOTA_RUNNING(mp)) ?
+ XFS_DQ_USER : XFS_DQ_GROUP,
+ XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
+ &dqp);
+ if (! error) {
+ xfs_disk_dquot_t *ddqp = &dqp->q_core;
+
+ /*
+ * The warnings and timers set the grace period given to
+ * a user or group before he or she can not perform any
+ * more writing. If it is zero, a default is used.
+ */
+ qinf->qi_btimelimit =
+ INT_GET(ddqp->d_btimer, ARCH_CONVERT) ?
+ INT_GET(ddqp->d_btimer, ARCH_CONVERT) :
+ XFS_QM_BTIMELIMIT;
+ qinf->qi_itimelimit =
+ INT_GET(ddqp->d_itimer, ARCH_CONVERT) ?
+ INT_GET(ddqp->d_itimer, ARCH_CONVERT) :
+ XFS_QM_ITIMELIMIT;
+ qinf->qi_rtbtimelimit =
+ INT_GET(ddqp->d_rtbtimer, ARCH_CONVERT) ?
+ INT_GET(ddqp->d_rtbtimer, ARCH_CONVERT) :
+ XFS_QM_RTBTIMELIMIT;
+ qinf->qi_bwarnlimit =
+ INT_GET(ddqp->d_bwarns, ARCH_CONVERT) ?
+ INT_GET(ddqp->d_bwarns, ARCH_CONVERT) :
+ XFS_QM_BWARNLIMIT;
+ qinf->qi_iwarnlimit =
+ INT_GET(ddqp->d_iwarns, ARCH_CONVERT) ?
+ INT_GET(ddqp->d_iwarns, ARCH_CONVERT) :
+ XFS_QM_IWARNLIMIT;
+ qinf->qi_bhardlimit =
+ INT_GET(ddqp->d_blk_hardlimit, ARCH_CONVERT);
+ qinf->qi_bsoftlimit =
+ INT_GET(ddqp->d_blk_softlimit, ARCH_CONVERT);
+ qinf->qi_ihardlimit =
+ INT_GET(ddqp->d_ino_hardlimit, ARCH_CONVERT);
+ qinf->qi_isoftlimit =
+ INT_GET(ddqp->d_ino_softlimit, ARCH_CONVERT);
+ qinf->qi_rtbhardlimit =
+ INT_GET(ddqp->d_rtb_hardlimit, ARCH_CONVERT);
+ qinf->qi_rtbsoftlimit =
+ INT_GET(ddqp->d_rtb_softlimit, ARCH_CONVERT);
+
+ /*
+ * We sent the XFS_QMOPT_DQSUSER flag to dqget because
+ * we don't want this dquot cached. We haven't done a
+ * quotacheck yet, and quotacheck doesn't like incore dquots.
+ */
+ xfs_qm_dqdestroy(dqp);
+ } else {
+ qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
+ qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
+ qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
+ qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
+ qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
+ }
+
+ return (0);
+}
+
+
+/*
+ * Gets called when unmounting a filesystem or when all quotas get
+ * turned off.
+ * This purges the quota inodes, destroys locks and frees itself.
+ */
+void
+xfs_qm_destroy_quotainfo(
+ xfs_mount_t *mp)
+{
+ xfs_quotainfo_t *qi;
+
+ qi = mp->m_quotainfo;
+ ASSERT(qi != NULL);
+ ASSERT(xfs_Gqm != NULL);
+
+ /*
+ * Release the reference that XQM kept, so that we know
+ * when the XQM structure should be freed. We cannot assume
+ * that xfs_Gqm is non-null after this point.
+ */
+ xfs_qm_rele_quotafs_ref(mp);
+
+ spinlock_destroy(&qi->qi_pinlock);
+ xfs_qm_list_destroy(&qi->qi_dqlist);
+
+ if (qi->qi_uquotaip) {
+ XFS_PURGE_INODE(qi->qi_uquotaip);
+ qi->qi_uquotaip = NULL; /* paranoia */
+ }
+ if (qi->qi_gquotaip) {
+ XFS_PURGE_INODE(qi->qi_gquotaip);
+ qi->qi_gquotaip = NULL;
+ }
+ mutex_destroy(&qi->qi_quotaofflock);
+ kmem_free(qi, sizeof(xfs_quotainfo_t));
+ mp->m_quotainfo = NULL;
+}
+
+
+
+/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
+
+/* ARGSUSED */
+STATIC void
+xfs_qm_list_init(
+ xfs_dqlist_t *list,
+ char *str,
+ int n)
+{
+ mutex_init(&list->qh_lock, MUTEX_DEFAULT, str);
+ list->qh_next = NULL;
+ list->qh_version = 0;
+ list->qh_nelems = 0;
+}
+
+STATIC void
+xfs_qm_list_destroy(
+ xfs_dqlist_t *list)
+{
+ mutex_destroy(&(list->qh_lock));
+}
+
+
+/*
+ * Stripped down version of dqattach. This doesn't attach, or even look at the
+ * dquots attached to the inode. The rationale is that there won't be any
+ * attached at the time this is called from quotacheck.
+ */
+STATIC int
+xfs_qm_dqget_noattach(
+ xfs_inode_t *ip,
+ xfs_dquot_t **O_udqpp,
+ xfs_dquot_t **O_gdqpp)
+{
+ int error;
+ xfs_mount_t *mp;
+ xfs_dquot_t *udqp, *gdqp;
+
+ ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ mp = ip->i_mount;
+ udqp = NULL;
+ gdqp = NULL;
+
+ if (XFS_IS_UQUOTA_ON(mp)) {
+ ASSERT(ip->i_udquot == NULL);
+ /*
+ * We want the dquot allocated if it doesn't exist.
+ */
+ if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
+ XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
+ &udqp))) {
+ /*
+ * Shouldn't be able to turn off quotas here.
+ */
+ ASSERT(error != ESRCH);
+ ASSERT(error != ENOENT);
+ return (error);
+ }
+ ASSERT(udqp);
+ }
+
+ if (XFS_IS_GQUOTA_ON(mp)) {
+ ASSERT(ip->i_gdquot == NULL);
+ if (udqp)
+ xfs_dqunlock(udqp);
+ if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+ XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
+ &gdqp))) {
+ if (udqp)
+ xfs_qm_dqrele(udqp);
+ ASSERT(error != ESRCH);
+ ASSERT(error != ENOENT);
+ return (error);
+ }
+ ASSERT(gdqp);
+
+ /* Reacquire the locks in the right order */
+ if (udqp) {
+ if (! xfs_qm_dqlock_nowait(udqp)) {
+ xfs_dqunlock(gdqp);
+ xfs_dqlock(udqp);
+ xfs_dqlock(gdqp);
+ }
+ }
+ }
+
+ *O_udqpp = udqp;
+ *O_gdqpp = gdqp;
+
+#ifdef QUOTADEBUG
+ if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
+ if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
+#endif
+ return (0);
+}
+
+/*
+ * Create an inode and return with a reference already taken, but unlocked
+ * This is how we create quota inodes
+ */
+STATIC int
+xfs_qm_qino_alloc(
+ xfs_mount_t *mp,
+ xfs_inode_t **ip,
+ __int64_t sbfields,
+ uint flags)
+{
+ xfs_trans_t *tp;
+ int error;
+ unsigned long s;
+ cred_t zerocr;
+ int committed;
+
+ tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE);
+ if ((error = xfs_trans_reserve(tp,
+ XFS_QM_QINOCREATE_SPACE_RES(mp),
+ XFS_CREATE_LOG_RES(mp), 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_CREATE_LOG_COUNT))) {
+ xfs_trans_cancel(tp, 0);
+ return (error);
+ }
+ memset(&zerocr, 0, sizeof(zerocr));
+
+ if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, S_IFREG, 1, 0,
+ &zerocr, 0, 1, ip, &committed))) {
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+ XFS_TRANS_ABORT);
+ return (error);
+ }
+
+ /*
+ * Keep an extra reference to this quota inode. This inode is
+ * locked exclusively and joined to the transaction already.
+ */
+ ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip));
+ VN_HOLD(XFS_ITOV((*ip)));
+
+ /*
+ * Make the changes in the superblock, and log those too.
+ * sbfields arg may contain fields other than *QUOTINO;
+ * VERSIONNUM for example.
+ */
+ s = XFS_SB_LOCK(mp);
+ if (flags & XFS_QMOPT_SBVERSION) {
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+ unsigned oldv = mp->m_sb.sb_versionnum;
+#endif
+ ASSERT(!XFS_SB_VERSION_HASQUOTA(&mp->m_sb));
+ ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+ XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
+ (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+ XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
+
+ XFS_SB_VERSION_ADDQUOTA(&mp->m_sb);
+ mp->m_sb.sb_uquotino = NULLFSINO;
+ mp->m_sb.sb_gquotino = NULLFSINO;
+
+ /* qflags will get updated _after_ quotacheck */
+ mp->m_sb.sb_qflags = 0;
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+ cmn_err(CE_NOTE,
+ "Old superblock version %x, converting to %x.",
+ oldv, mp->m_sb.sb_versionnum);
+#endif
+ }
+ if (flags & XFS_QMOPT_UQUOTA)
+ mp->m_sb.sb_uquotino = (*ip)->i_ino;
+ else
+ mp->m_sb.sb_gquotino = (*ip)->i_ino;
+ XFS_SB_UNLOCK(mp, s);
+ xfs_mod_sb(tp, sbfields);
+
+ if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
+ NULL))) {
+ xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
+ return (error);
+ }
+ return (0);
+}
+
+
+STATIC int
+xfs_qm_reset_dqcounts(
+ xfs_mount_t *mp,
+ xfs_buf_t *bp,
+ xfs_dqid_t id,
+ uint type)
+{
+ xfs_disk_dquot_t *ddq;
+ int j;
+
+ xfs_buftrace("RESET DQUOTS", bp);
+ /*
+ * Reset all counters and timers. They'll be
+ * started afresh by xfs_qm_quotacheck.
+ */
+#ifdef DEBUG
+ j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+ do_div(j, sizeof(xfs_dqblk_t));
+ ASSERT(XFS_QM_DQPERBLK(mp) == j);
+#endif
+ ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
+ for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
+ /*
+ * Do a sanity check, and if needed, repair the dqblk. Don't
+ * output any warnings because it's perfectly possible to
+ * find unitialized dquot blks. See comment in xfs_qm_dqcheck.
+ */
+ (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
+ "xfs_quotacheck");
+ INT_SET(ddq->d_bcount, ARCH_CONVERT, 0ULL);
+ INT_SET(ddq->d_icount, ARCH_CONVERT, 0ULL);
+ INT_SET(ddq->d_rtbcount, ARCH_CONVERT, 0ULL);
+ INT_SET(ddq->d_btimer, ARCH_CONVERT, (time_t)0);
+ INT_SET(ddq->d_itimer, ARCH_CONVERT, (time_t)0);
+ INT_SET(ddq->d_bwarns, ARCH_CONVERT, 0UL);
+ INT_SET(ddq->d_iwarns, ARCH_CONVERT, 0UL);
+ ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
+ }
+
+ return (0);
+}
+
+STATIC int
+xfs_qm_dqiter_bufs(
+ xfs_mount_t *mp,
+ xfs_dqid_t firstid,
+ xfs_fsblock_t bno,
+ xfs_filblks_t blkcnt,
+ uint flags)
+{
+ xfs_buf_t *bp;
+ int error;
+ int notcommitted;
+ int incr;
+
+ ASSERT(blkcnt > 0);
+ notcommitted = 0;
+ incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
+ XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
+ error = 0;
+
+ /*
+ * Blkcnt arg can be a very big number, and might even be
+ * larger than the log itself. So, we have to break it up into
+ * manageable-sized transactions.
+ * Note that we don't start a permanent transaction here; we might
+ * not be able to get a log reservation for the whole thing up front,
+ * and we don't really care to either, because we just discard
+ * everything if we were to crash in the middle of this loop.
+ */
+ while (blkcnt--) {
+ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, bno),
+ (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
+ if (error)
+ break;
+
+ (void) xfs_qm_reset_dqcounts(mp, bp, firstid,
+ flags & XFS_QMOPT_UQUOTA ?
+ XFS_DQ_USER : XFS_DQ_GROUP);
+ xfs_bdwrite(mp, bp);
+ /*
+ * goto the next block.
+ */
+ bno++;
+ firstid += XFS_QM_DQPERBLK(mp);
+ }
+ return (error);
+}
+
+/*
+ * Iterate over all allocated USR/GRP dquots in the system, calling a
+ * caller supplied function for every chunk of dquots that we find.
+ */
+STATIC int
+xfs_qm_dqiterate(
+ xfs_mount_t *mp,
+ xfs_inode_t *qip,
+ uint flags)
+{
+ xfs_bmbt_irec_t *map;
+ int i, nmaps; /* number of map entries */
+ int error; /* return value */
+ xfs_fileoff_t lblkno;
+ xfs_filblks_t maxlblkcnt;
+ xfs_dqid_t firstid;
+ xfs_fsblock_t rablkno;
+ xfs_filblks_t rablkcnt;
+
+ error = 0;
+ /*
+ * This looks racey, but we can't keep an inode lock across a
+ * trans_reserve. But, this gets called during quotacheck, and that
+ * happens only at mount time which is single threaded.
+ */
+ if (qip->i_d.di_nblocks == 0)
+ return (0);
+
+ map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
+
+ lblkno = 0;
+ maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+ do {
+ nmaps = XFS_DQITER_MAP_SIZE;
+ /*
+ * We aren't changing the inode itself. Just changing
+ * some of its data. No new blocks are added here, and
+ * the inode is never added to the transaction.
+ */
+ xfs_ilock(qip, XFS_ILOCK_SHARED);
+ error = xfs_bmapi(NULL, qip, lblkno,
+ maxlblkcnt - lblkno,
+ XFS_BMAPI_METADATA,
+ NULL,
+ 0, map, &nmaps, NULL);
+ xfs_iunlock(qip, XFS_ILOCK_SHARED);
+ if (error)
+ break;
+
+ ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
+ for (i = 0; i < nmaps; i++) {
+ ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
+ ASSERT(map[i].br_blockcount);
+
+
+ lblkno += map[i].br_blockcount;
+
+ if (map[i].br_startblock == HOLESTARTBLOCK)
+ continue;
+
+ firstid = (xfs_dqid_t) map[i].br_startoff *
+ XFS_QM_DQPERBLK(mp);
+ /*
+ * Do a read-ahead on the next extent.
+ */
+ if ((i+1 < nmaps) &&
+ (map[i+1].br_startblock != HOLESTARTBLOCK)) {
+ rablkcnt = map[i+1].br_blockcount;
+ rablkno = map[i+1].br_startblock;
+ while (rablkcnt--) {
+ xfs_baread(mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, rablkno),
+ (int)XFS_QI_DQCHUNKLEN(mp));
+ rablkno++;
+ }
+ }
+ /*
+ * Iterate thru all the blks in the extent and
+ * reset the counters of all the dquots inside them.
+ */
+ if ((error = xfs_qm_dqiter_bufs(mp,
+ firstid,
+ map[i].br_startblock,
+ map[i].br_blockcount,
+ flags))) {
+ break;
+ }
+ }
+
+ if (error)
+ break;
+ } while (nmaps > 0);
+
+ kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
+
+ return (error);
+}
+
+/*
+ * Called by dqusage_adjust in doing a quotacheck.
+ * Given the inode, and a dquot (either USR or GRP, doesn't matter),
+ * this updates its incore copy as well as the buffer copy. This is
+ * so that once the quotacheck is done, we can just log all the buffers,
+ * as opposed to logging numerous updates to individual dquots.
+ */
+STATIC void
+xfs_qm_quotacheck_dqadjust(
+ xfs_dquot_t *dqp,
+ xfs_qcnt_t nblks,
+ xfs_qcnt_t rtblks)
+{
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ xfs_dqtrace_entry(dqp, "QCHECK DQADJUST");
+ /*
+ * Adjust the inode count and the block count to reflect this inode's
+ * resource usage.
+ */
+ INT_MOD(dqp->q_core.d_icount, ARCH_CONVERT, +1);
+ dqp->q_res_icount++;
+ if (nblks) {
+ INT_MOD(dqp->q_core.d_bcount, ARCH_CONVERT, nblks);
+ dqp->q_res_bcount += nblks;
+ }
+ if (rtblks) {
+ INT_MOD(dqp->q_core.d_rtbcount, ARCH_CONVERT, rtblks);
+ dqp->q_res_rtbcount += rtblks;
+ }
+
+ /*
+ * Set default limits, adjust timers (since we changed usages)
+ */
+ if (! XFS_IS_SUSER_DQUOT(dqp)) {
+ xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
+ xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
+ }
+
+ dqp->dq_flags |= XFS_DQ_DIRTY;
+}
+
+STATIC int
+xfs_qm_get_rtblks(
+ xfs_inode_t *ip,
+ xfs_qcnt_t *O_rtblks)
+{
+ xfs_filblks_t rtblks; /* total rt blks */
+ xfs_ifork_t *ifp; /* inode fork pointer */
+ xfs_extnum_t nextents; /* number of extent entries */
+ xfs_bmbt_rec_t *base; /* base of extent array */
+ xfs_bmbt_rec_t *ep; /* pointer to an extent entry */
+ int error;
+
+ ASSERT(XFS_IS_REALTIME_INODE(ip));
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
+ return (error);
+ }
+ rtblks = 0;
+ nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+ base = &ifp->if_u1.if_extents[0];
+ for (ep = base; ep < &base[nextents]; ep++)
+ rtblks += xfs_bmbt_get_blockcount(ep);
+ *O_rtblks = (xfs_qcnt_t)rtblks;
+ return (0);
+}
+
+/*
+ * callback routine supplied to bulkstat(). Given an inumber, find its
+ * dquots and update them to account for resources taken by that inode.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqusage_adjust(
+ xfs_mount_t *mp, /* mount point for filesystem */
+ xfs_ino_t ino, /* inode number to get data for */
+ void __user *buffer, /* not used */
+ int ubsize, /* not used */
+ void *private_data, /* not used */
+ xfs_daddr_t bno, /* starting block of inode cluster */
+ int *ubused, /* not used */
+ void *dip, /* on-disk inode pointer (not used) */
+ int *res) /* result code value */
+{
+ xfs_inode_t *ip;
+ xfs_dquot_t *udqp, *gdqp;
+ xfs_qcnt_t nblks, rtblks;
+ int error;
+
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+ /*
+ * rootino must have its resources accounted for, not so with the quota
+ * inodes.
+ */
+ if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
+ *res = BULKSTAT_RV_NOTHING;
+ return XFS_ERROR(EINVAL);
+ }
+
+ /*
+ * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
+ * interface expects the inode to be exclusively locked because that's
+ * the case in all other instances. It's OK that we do this because
+ * quotacheck is done only at mount time.
+ */
+ if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
+ *res = BULKSTAT_RV_NOTHING;
+ return (error);
+ }
+
+ if (ip->i_d.di_mode == 0) {
+ xfs_iput_new(ip, XFS_ILOCK_EXCL);
+ *res = BULKSTAT_RV_NOTHING;
+ return XFS_ERROR(ENOENT);
+ }
+
+ /*
+ * Obtain the locked dquots. In case of an error (eg. allocation
+ * fails for ENOSPC), we return the negative of the error number
+ * to bulkstat, so that it can get propagated to quotacheck() and
+ * making us disable quotas for the file system.
+ */
+ if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
+ xfs_iput(ip, XFS_ILOCK_EXCL);
+ *res = BULKSTAT_RV_GIVEUP;
+ return (error);
+ }
+
+ rtblks = 0;
+ if (! XFS_IS_REALTIME_INODE(ip)) {
+ nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
+ } else {
+ /*
+ * Walk thru the extent list and count the realtime blocks.
+ */
+ if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
+ xfs_iput(ip, XFS_ILOCK_EXCL);
+ if (udqp)
+ xfs_qm_dqput(udqp);
+ if (gdqp)
+ xfs_qm_dqput(gdqp);
+ *res = BULKSTAT_RV_GIVEUP;
+ return (error);
+ }
+ nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
+ }
+ ASSERT(ip->i_delayed_blks == 0);
+
+ /*
+ * We can't release the inode while holding its dquot locks.
+ * The inode can go into inactive and might try to acquire the dquotlocks.
+ * So, just unlock here and do a vn_rele at the end.
+ */
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ /*
+ * Add the (disk blocks and inode) resources occupied by this
+ * inode to its dquots. We do this adjustment in the incore dquot,
+ * and also copy the changes to its buffer.
+ * We don't care about putting these changes in a transaction
+ * envelope because if we crash in the middle of a 'quotacheck'
+ * we have to start from the beginning anyway.
+ * Once we're done, we'll log all the dquot bufs.
+ *
+ * The *QUOTA_ON checks below may look pretty racey, but quotachecks
+ * and quotaoffs don't race. (Quotachecks happen at mount time only).
+ */
+ if (XFS_IS_UQUOTA_ON(mp)) {
+ ASSERT(udqp);
+ xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
+ xfs_qm_dqput(udqp);
+ }
+ if (XFS_IS_GQUOTA_ON(mp)) {
+ ASSERT(gdqp);
+ xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
+ xfs_qm_dqput(gdqp);
+ }
+ /*
+ * Now release the inode. This will send it to 'inactive', and
+ * possibly even free blocks.
+ */
+ VN_RELE(XFS_ITOV(ip));
+
+ /*
+ * Goto next inode.
+ */
+ *res = BULKSTAT_RV_DIDONE;
+ return (0);
+}
+
+/*
+ * Walk thru all the filesystem inodes and construct a consistent view
+ * of the disk quota world. If the quotacheck fails, disable quotas.
+ */
+int
+xfs_qm_quotacheck(
+ xfs_mount_t *mp)
+{
+ int done, count, error;
+ xfs_ino_t lastino;
+ size_t structsz;
+ xfs_inode_t *uip, *gip;
+ uint flags;
+
+ count = INT_MAX;
+ structsz = 1;
+ lastino = 0;
+ flags = 0;
+
+ ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+ /*
+ * There should be no cached dquots. The (simplistic) quotacheck
+ * algorithm doesn't like that.
+ */
+ ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
+
+ cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
+
+ /*
+ * First we go thru all the dquots on disk, USR and GRP, and reset
+ * their counters to zero. We need a clean slate.
+ * We don't log our changes till later.
+ */
+ if ((uip = XFS_QI_UQIP(mp))) {
+ if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
+ goto error_return;
+ flags |= XFS_UQUOTA_CHKD;
+ }
+
+ if ((gip = XFS_QI_GQIP(mp))) {
+ if ((error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA)))
+ goto error_return;
+ flags |= XFS_GQUOTA_CHKD;
+ }
+
+ do {
+ /*
+ * Iterate thru all the inodes in the file system,
+ * adjusting the corresponding dquot counters in core.
+ */
+ if ((error = xfs_bulkstat(mp, &lastino, &count,
+ xfs_qm_dqusage_adjust, NULL,
+ structsz, NULL,
+ BULKSTAT_FG_IGET|BULKSTAT_FG_VFSLOCKED,
+ &done)))
+ break;
+
+ } while (! done);
+
+ /*
+ * We can get this error if we couldn't do a dquot allocation inside
+ * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
+ * dirty dquots that might be cached, we just want to get rid of them
+ * and turn quotaoff. The dquots won't be attached to any of the inodes
+ * at this point (because we intentionally didn't in dqget_noattach).
+ */
+ if (error) {
+ xfs_qm_dqpurge_all(mp,
+ XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
+ XFS_QMOPT_QUOTAOFF);
+ goto error_return;
+ }
+ /*
+ * We've made all the changes that we need to make incore.
+ * Now flush_them down to disk buffers.
+ */
+ xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
+
+ /*
+ * We didn't log anything, because if we crashed, we'll have to
+ * start the quotacheck from scratch anyway. However, we must make
+ * sure that our dquot changes are secure before we put the
+ * quotacheck'd stamp on the superblock. So, here we do a synchronous
+ * flush.
+ */
+ XFS_bflush(mp->m_ddev_targp);
+
+ /*
+ * If one type of quotas is off, then it will lose its
+ * quotachecked status, since we won't be doing accounting for
+ * that type anymore.
+ */
+ mp->m_qflags &= ~(XFS_GQUOTA_CHKD | XFS_UQUOTA_CHKD);
+ mp->m_qflags |= flags;
+
+ XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
+
+ error_return:
+ if (error) {
+ cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): "
+ "Disabling quotas.",
+ mp->m_fsname, error);
+ /*
+ * We must turn off quotas.
+ */
+ ASSERT(mp->m_quotainfo != NULL);
+ ASSERT(xfs_Gqm != NULL);
+ xfs_qm_destroy_quotainfo(mp);
+ xfs_mount_reset_sbqflags(mp);
+ } else {
+ cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
+ }
+ return (error);
+}
+
+/*
+ * This is called after the superblock has been read in and we're ready to
+ * iget the quota inodes.
+ */
+STATIC int
+xfs_qm_init_quotainos(
+ xfs_mount_t *mp)
+{
+ xfs_inode_t *uip, *gip;
+ int error;
+ __int64_t sbflags;
+ uint flags;
+
+ ASSERT(mp->m_quotainfo);
+ uip = gip = NULL;
+ sbflags = 0;
+ flags = 0;
+
+ /*
+ * Get the uquota and gquota inodes
+ */
+ if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
+ if (XFS_IS_UQUOTA_ON(mp) &&
+ mp->m_sb.sb_uquotino != NULLFSINO) {
+ ASSERT(mp->m_sb.sb_uquotino > 0);
+ if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+ 0, 0, &uip, 0)))
+ return XFS_ERROR(error);
+ }
+ if (XFS_IS_GQUOTA_ON(mp) &&
+ mp->m_sb.sb_gquotino != NULLFSINO) {
+ ASSERT(mp->m_sb.sb_gquotino > 0);
+ if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+ 0, 0, &gip, 0))) {
+ if (uip)
+ VN_RELE(XFS_ITOV(uip));
+ return XFS_ERROR(error);
+ }
+ }
+ } else {
+ flags |= XFS_QMOPT_SBVERSION;
+ sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+ XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
+ }
+
+ /*
+ * Create the two inodes, if they don't exist already. The changes
+ * made above will get added to a transaction and logged in one of
+ * the qino_alloc calls below. If the device is readonly,
+ * temporarily switch to read-write to do this.
+ */
+ if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
+ if ((error = xfs_qm_qino_alloc(mp, &uip,
+ sbflags | XFS_SB_UQUOTINO,
+ flags | XFS_QMOPT_UQUOTA)))
+ return XFS_ERROR(error);
+
+ flags &= ~XFS_QMOPT_SBVERSION;
+ }
+ if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
+ if ((error = xfs_qm_qino_alloc(mp, &gip,
+ sbflags | XFS_SB_GQUOTINO,
+ flags | XFS_QMOPT_GQUOTA))) {
+ if (uip)
+ VN_RELE(XFS_ITOV(uip));
+
+ return XFS_ERROR(error);
+ }
+ }
+
+ XFS_QI_UQIP(mp) = uip;
+ XFS_QI_GQIP(mp) = gip;
+
+ return (0);
+}
+
+
+/*
+ * Traverse the freelist of dquots and attempt to reclaim a maximum of
+ * 'howmany' dquots. This operation races with dqlookup(), and attempts to
+ * favor the lookup function ...
+ * XXXsup merge this with qm_reclaim_one().
+ */
+STATIC int
+xfs_qm_shake_freelist(
+ int howmany)
+{
+ int nreclaimed;
+ xfs_dqhash_t *hash;
+ xfs_dquot_t *dqp, *nextdqp;
+ int restarts;
+ int nflushes;
+
+ if (howmany <= 0)
+ return (0);
+
+ nreclaimed = 0;
+ restarts = 0;
+ nflushes = 0;
+
+#ifdef QUOTADEBUG
+ cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
+#endif
+ /* lock order is : hashchainlock, freelistlock, mplistlock */
+ tryagain:
+ xfs_qm_freelist_lock(xfs_Gqm);
+
+ for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
+ ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
+ nreclaimed < howmany); ) {
+ xfs_dqlock(dqp);
+
+ /*
+ * We are racing with dqlookup here. Naturally we don't
+ * want to reclaim a dquot that lookup wants.
+ */
+ if (dqp->dq_flags & XFS_DQ_WANT) {
+ xfs_dqunlock(dqp);
+ xfs_qm_freelist_unlock(xfs_Gqm);
+ if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+ return (nreclaimed);
+ XQM_STATS_INC(xqmstats.xs_qm_dqwants);
+ goto tryagain;
+ }
+
+ /*
+ * If the dquot is inactive, we are assured that it is
+ * not on the mplist or the hashlist, and that makes our
+ * life easier.
+ */
+ if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+ ASSERT(dqp->q_mount == NULL);
+ ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+ ASSERT(dqp->HL_PREVP == NULL);
+ ASSERT(dqp->MPL_PREVP == NULL);
+ XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
+ nextdqp = dqp->dq_flnext;
+ goto off_freelist;
+ }
+
+ ASSERT(dqp->MPL_PREVP);
+ /*
+ * Try to grab the flush lock. If this dquot is in the process of
+ * getting flushed to disk, we don't want to reclaim it.
+ */
+ if (! xfs_qm_dqflock_nowait(dqp)) {
+ xfs_dqunlock(dqp);
+ dqp = dqp->dq_flnext;
+ continue;
+ }
+
+ /*
+ * We have the flush lock so we know that this is not in the
+ * process of being flushed. So, if this is dirty, flush it
+ * DELWRI so that we don't get a freelist infested with
+ * dirty dquots.
+ */
+ if (XFS_DQ_IS_DIRTY(dqp)) {
+ xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
+ /*
+ * We flush it delayed write, so don't bother
+ * releasing the mplock.
+ */
+ (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
+ xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
+ dqp = dqp->dq_flnext;
+ continue;
+ }
+ /*
+ * We're trying to get the hashlock out of order. This races
+ * with dqlookup; so, we giveup and goto the next dquot if
+ * we couldn't get the hashlock. This way, we won't starve
+ * a dqlookup process that holds the hashlock that is
+ * waiting for the freelist lock.
+ */
+ if (! xfs_qm_dqhashlock_nowait(dqp)) {
+ xfs_dqfunlock(dqp);
+ xfs_dqunlock(dqp);
+ dqp = dqp->dq_flnext;
+ continue;
+ }
+ /*
+ * This races with dquot allocation code as well as dqflush_all
+ * and reclaim code. So, if we failed to grab the mplist lock,
+ * giveup everything and start over.
+ */
+ hash = dqp->q_hash;
+ ASSERT(hash);
+ if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
+ /* XXX put a sentinel so that we can come back here */
+ xfs_dqfunlock(dqp);
+ xfs_dqunlock(dqp);
+ XFS_DQ_HASH_UNLOCK(hash);
+ xfs_qm_freelist_unlock(xfs_Gqm);
+ if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+ return (nreclaimed);
+ goto tryagain;
+ }
+ xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
+#ifdef QUOTADEBUG
+ cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
+ dqp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
+#endif
+ ASSERT(dqp->q_nrefs == 0);
+ nextdqp = dqp->dq_flnext;
+ XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
+ XQM_HASHLIST_REMOVE(hash, dqp);
+ xfs_dqfunlock(dqp);
+ xfs_qm_mplist_unlock(dqp->q_mount);
+ XFS_DQ_HASH_UNLOCK(hash);
+
+ off_freelist:
+ XQM_FREELIST_REMOVE(dqp);
+ xfs_dqunlock(dqp);
+ nreclaimed++;
+ XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
+ xfs_qm_dqdestroy(dqp);
+ dqp = nextdqp;
+ }
+ xfs_qm_freelist_unlock(xfs_Gqm);
+ return (nreclaimed);
+}
+
+
+/*
+ * The kmem_shake interface is invoked when memory is running low.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_shake(int nr_to_scan, unsigned int gfp_mask)
+{
+ int ndqused, nfree, n;
+
+ if (!kmem_shake_allow(gfp_mask))
+ return (0);
+ if (!xfs_Gqm)
+ return (0);
+
+ nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
+ /* incore dquots in all f/s's */
+ ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
+
+ ASSERT(ndqused >= 0);
+
+ if (nfree <= ndqused && nfree < ndquot)
+ return (0);
+
+ ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */
+ n = nfree - ndqused - ndquot; /* # over target */
+
+ return xfs_qm_shake_freelist(MAX(nfree, n));
+}
+
+
+/*
+ * Just pop the least recently used dquot off the freelist and
+ * recycle it. The returned dquot is locked.
+ */
+STATIC xfs_dquot_t *
+xfs_qm_dqreclaim_one(void)
+{
+ xfs_dquot_t *dqpout;
+ xfs_dquot_t *dqp;
+ int restarts;
+ int nflushes;
+
+ restarts = 0;
+ dqpout = NULL;
+ nflushes = 0;
+
+ /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
+ startagain:
+ xfs_qm_freelist_lock(xfs_Gqm);
+
+ FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
+ xfs_dqlock(dqp);
+
+ /*
+ * We are racing with dqlookup here. Naturally we don't
+ * want to reclaim a dquot that lookup wants. We release the
+ * freelist lock and start over, so that lookup will grab
+ * both the dquot and the freelistlock.
+ */
+ if (dqp->dq_flags & XFS_DQ_WANT) {
+ ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
+ xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT");
+ xfs_dqunlock(dqp);
+ xfs_qm_freelist_unlock(xfs_Gqm);
+ if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+ return (NULL);
+ XQM_STATS_INC(xqmstats.xs_qm_dqwants);
+ goto startagain;
+ }
+
+ /*
+ * If the dquot is inactive, we are assured that it is
+ * not on the mplist or the hashlist, and that makes our
+ * life easier.
+ */
+ if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+ ASSERT(dqp->q_mount == NULL);
+ ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+ ASSERT(dqp->HL_PREVP == NULL);
+ ASSERT(dqp->MPL_PREVP == NULL);
+ XQM_FREELIST_REMOVE(dqp);
+ xfs_dqunlock(dqp);
+ dqpout = dqp;
+ XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
+ break;
+ }
+
+ ASSERT(dqp->q_hash);
+ ASSERT(dqp->MPL_PREVP);
+
+ /*
+ * Try to grab the flush lock. If this dquot is in the process of
+ * getting flushed to disk, we don't want to reclaim it.
+ */
+ if (! xfs_qm_dqflock_nowait(dqp)) {
+ xfs_dqunlock(dqp);
+ continue;
+ }
+
+ /*
+ * We have the flush lock so we know that this is not in the
+ * process of being flushed. So, if this is dirty, flush it
+ * DELWRI so that we don't get a freelist infested with
+ * dirty dquots.
+ */
+ if (XFS_DQ_IS_DIRTY(dqp)) {
+ xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
+ /*
+ * We flush it delayed write, so don't bother
+ * releasing the freelist lock.
+ */
+ (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
+ xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
+ continue;
+ }
+
+ if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
+ xfs_dqfunlock(dqp);
+ xfs_dqunlock(dqp);
+ continue;
+ }
+
+ if (! xfs_qm_dqhashlock_nowait(dqp))
+ goto mplistunlock;
+
+ ASSERT(dqp->q_nrefs == 0);
+ xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
+ XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
+ XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
+ XQM_FREELIST_REMOVE(dqp);
+ dqpout = dqp;
+ XFS_DQ_HASH_UNLOCK(dqp->q_hash);
+ mplistunlock:
+ xfs_qm_mplist_unlock(dqp->q_mount);
+ xfs_dqfunlock(dqp);
+ xfs_dqunlock(dqp);
+ if (dqpout)
+ break;
+ }
+
+ xfs_qm_freelist_unlock(xfs_Gqm);
+ return (dqpout);
+}
+
+
+/*------------------------------------------------------------------*/
+
+/*
+ * Return a new incore dquot. Depending on the number of
+ * dquots in the system, we either allocate a new one on the kernel heap,
+ * or reclaim a free one.
+ * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
+ * to reclaim an existing one from the freelist.
+ */
+boolean_t
+xfs_qm_dqalloc_incore(
+ xfs_dquot_t **O_dqpp)
+{
+ xfs_dquot_t *dqp;
+
+ /*
+ * Check against high water mark to see if we want to pop
+ * a nincompoop dquot off the freelist.
+ */
+ if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
+ /*
+ * Try to recycle a dquot from the freelist.
+ */
+ if ((dqp = xfs_qm_dqreclaim_one())) {
+ XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
+ /*
+ * Just zero the core here. The rest will get
+ * reinitialized by caller. XXX we shouldn't even
+ * do this zero ...
+ */
+ memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+ *O_dqpp = dqp;
+ return (B_FALSE);
+ }
+ XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
+ }
+
+ /*
+ * Allocate a brand new dquot on the kernel heap and return it
+ * to the caller to initialize.
+ */
+ ASSERT(xfs_Gqm->qm_dqzone != NULL);
+ *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+ atomic_inc(&xfs_Gqm->qm_totaldquots);
+
+ return (B_TRUE);
+}
+
+
+/*
+ * Start a transaction and write the incore superblock changes to
+ * disk. flags parameter indicates which fields have changed.
+ */
+int
+xfs_qm_write_sb_changes(
+ xfs_mount_t *mp,
+ __int64_t flags)
+{
+ xfs_trans_t *tp;
+ int error;
+
+#ifdef QUOTADEBUG
+ cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
+#endif
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+ if ((error = xfs_trans_reserve(tp, 0,
+ mp->m_sb.sb_sectsize + 128, 0,
+ 0,
+ XFS_DEFAULT_LOG_COUNT))) {
+ xfs_trans_cancel(tp, 0);
+ return (error);
+ }
+
+ xfs_mod_sb(tp, flags);
+ (void) xfs_trans_commit(tp, 0, NULL);
+
+ return (0);
+}
+
+
+/* --------------- utility functions for vnodeops ---------------- */
+
+
+/*
+ * Given an inode, a uid and gid (from cred_t) make sure that we have
+ * allocated relevant dquot(s) on disk, and that we won't exceed inode
+ * quotas by creating this file.
+ * This also attaches dquot(s) to the given inode after locking it,
+ * and returns the dquots corresponding to the uid and/or gid.
+ *
+ * in : inode (unlocked)
+ * out : udquot, gdquot with references taken and unlocked
+ */
+int
+xfs_qm_vop_dqalloc(
+ xfs_mount_t *mp,
+ xfs_inode_t *ip,
+ uid_t uid,
+ gid_t gid,
+ uint flags,
+ xfs_dquot_t **O_udqpp,
+ xfs_dquot_t **O_gdqpp)
+{
+ int error;
+ xfs_dquot_t *uq, *gq;
+ uint lockflags;
+
+ if (!XFS_IS_QUOTA_ON(mp))
+ return 0;
+
+ lockflags = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, lockflags);
+
+ if ((flags & XFS_QMOPT_INHERIT) &&
+ XFS_INHERIT_GID(ip, XFS_MTOVFS(mp)))
+ gid = ip->i_d.di_gid;
+
+ /*
+ * Attach the dquot(s) to this inode, doing a dquot allocation
+ * if necessary. The dquot(s) will not be locked.
+ */
+ if (XFS_NOT_DQATTACHED(mp, ip)) {
+ if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
+ XFS_QMOPT_ILOCKED))) {
+ xfs_iunlock(ip, lockflags);
+ return (error);
+ }
+ }
+
+ uq = gq = NULL;
+ if ((flags & XFS_QMOPT_UQUOTA) &&
+ XFS_IS_UQUOTA_ON(mp)) {
+ if (ip->i_d.di_uid != uid) {
+ /*
+ * What we need is the dquot that has this uid, and
+ * if we send the inode to dqget, the uid of the inode
+ * takes priority over what's sent in the uid argument.
+ * We must unlock inode here before calling dqget if
+ * we're not sending the inode, because otherwise
+ * we'll deadlock by doing trans_reserve while
+ * holding ilock.
+ */
+ xfs_iunlock(ip, lockflags);
+ if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
+ XFS_DQ_USER,
+ XFS_QMOPT_DQALLOC |
+ XFS_QMOPT_DOWARN,
+ &uq))) {
+ ASSERT(error != ENOENT);
+ return (error);
+ }
+ /*
+ * Get the ilock in the right order.
+ */
+ xfs_dqunlock(uq);
+ lockflags = XFS_ILOCK_SHARED;
+ xfs_ilock(ip, lockflags);
+ } else {
+ /*
+ * Take an extra reference, because we'll return
+ * this to caller
+ */
+ ASSERT(ip->i_udquot);
+ uq = ip->i_udquot;
+ xfs_dqlock(uq);
+ XFS_DQHOLD(uq);
+ xfs_dqunlock(uq);
+ }
+ }
+ if ((flags & XFS_QMOPT_GQUOTA) &&
+ XFS_IS_GQUOTA_ON(mp)) {
+ if (ip->i_d.di_gid != gid) {
+ xfs_iunlock(ip, lockflags);
+ if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
+ XFS_DQ_GROUP,
+ XFS_QMOPT_DQALLOC |
+ XFS_QMOPT_DOWARN,
+ &gq))) {
+ if (uq)
+ xfs_qm_dqrele(uq);
+ ASSERT(error != ENOENT);
+ return (error);
+ }
+ xfs_dqunlock(gq);
+ lockflags = XFS_ILOCK_SHARED;
+ xfs_ilock(ip, lockflags);
+ } else {
+ ASSERT(ip->i_gdquot);
+ gq = ip->i_gdquot;
+ xfs_dqlock(gq);
+ XFS_DQHOLD(gq);
+ xfs_dqunlock(gq);
+ }
+ }
+ if (uq)
+ xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
+
+ xfs_iunlock(ip, lockflags);
+ if (O_udqpp)
+ *O_udqpp = uq;
+ else if (uq)
+ xfs_qm_dqrele(uq);
+ if (O_gdqpp)
+ *O_gdqpp = gq;
+ else if (gq)
+ xfs_qm_dqrele(gq);
+ return (0);
+}
+
+/*
+ * Actually transfer ownership, and do dquot modifications.
+ * These were already reserved.
+ */
+xfs_dquot_t *
+xfs_qm_vop_chown(
+ xfs_trans_t *tp,
+ xfs_inode_t *ip,
+ xfs_dquot_t **IO_olddq,
+ xfs_dquot_t *newdq)
+{
+ xfs_dquot_t *prevdq;
+ ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
+
+ /* old dquot */
+ prevdq = *IO_olddq;
+ ASSERT(prevdq);
+ ASSERT(prevdq != newdq);
+
+ xfs_trans_mod_dquot(tp, prevdq,
+ XFS_TRANS_DQ_BCOUNT,
+ -(ip->i_d.di_nblocks));
+ xfs_trans_mod_dquot(tp, prevdq,
+ XFS_TRANS_DQ_ICOUNT,
+ -1);
+
+ /* the sparkling new dquot */
+ xfs_trans_mod_dquot(tp, newdq,
+ XFS_TRANS_DQ_BCOUNT,
+ ip->i_d.di_nblocks);
+ xfs_trans_mod_dquot(tp, newdq,
+ XFS_TRANS_DQ_ICOUNT,
+ 1);
+
+ /*
+ * Take an extra reference, because the inode
+ * is going to keep this dquot pointer even
+ * after the trans_commit.
+ */
+ xfs_dqlock(newdq);
+ XFS_DQHOLD(newdq);
+ xfs_dqunlock(newdq);
+ *IO_olddq = newdq;
+
+ return (prevdq);
+}
+
+/*
+ * Quota reservations for setattr(AT_UID|AT_GID).
+ */
+int
+xfs_qm_vop_chown_reserve(
+ xfs_trans_t *tp,
+ xfs_inode_t *ip,
+ xfs_dquot_t *udqp,
+ xfs_dquot_t *gdqp,
+ uint flags)
+{
+ int error;
+ xfs_mount_t *mp;
+ uint delblks;
+ xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
+
+ ASSERT(XFS_ISLOCKED_INODE(ip));
+ mp = ip->i_mount;
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+ delblks = ip->i_delayed_blks;
+ delblksudq = delblksgdq = unresudq = unresgdq = NULL;
+
+ if (XFS_IS_UQUOTA_ON(mp) && udqp &&
+ ip->i_d.di_uid != (uid_t)INT_GET(udqp->q_core.d_id, ARCH_CONVERT)) {
+ delblksudq = udqp;
+ /*
+ * If there are delayed allocation blocks, then we have to
+ * unreserve those from the old dquot, and add them to the
+ * new dquot.
+ */
+ if (delblks) {
+ ASSERT(ip->i_udquot);
+ unresudq = ip->i_udquot;
+ }
+ }
+ if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
+ ip->i_d.di_gid != INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) {
+ delblksgdq = gdqp;
+ if (delblks) {
+ ASSERT(ip->i_gdquot);
+ unresgdq = ip->i_gdquot;
+ }
+ }
+
+ if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
+ delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
+ flags | XFS_QMOPT_RES_REGBLKS)))
+ return (error);
+
+ /*
+ * Do the delayed blks reservations/unreservations now. Since, these
+ * are done without the help of a transaction, if a reservation fails
+ * its previous reservations won't be automatically undone by trans
+ * code. So, we have to do it manually here.
+ */
+ if (delblks) {
+ /*
+ * Do the reservations first. Unreservation can't fail.
+ */
+ ASSERT(delblksudq || delblksgdq);
+ ASSERT(unresudq || unresgdq);
+ if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+ delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
+ flags | XFS_QMOPT_RES_REGBLKS)))
+ return (error);
+ xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+ unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
+ XFS_QMOPT_RES_REGBLKS);
+ }
+
+ return (0);
+}
+
+int
+xfs_qm_vop_rename_dqattach(
+ xfs_inode_t **i_tab)
+{
+ xfs_inode_t *ip;
+ int i;
+ int error;
+
+ ip = i_tab[0];
+
+ if (! XFS_IS_QUOTA_ON(ip->i_mount))
+ return (0);
+
+ if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
+ return (error);
+ }
+ for (i = 1; (i < 4 && i_tab[i]); i++) {
+ /*
+ * Watch out for duplicate entries in the table.
+ */
+ if ((ip = i_tab[i]) != i_tab[i-1]) {
+ if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
+ return (error);
+ }
+ }
+ }
+ return (0);
+}
+
+void
+xfs_qm_vop_dqattach_and_dqmod_newinode(
+ xfs_trans_t *tp,
+ xfs_inode_t *ip,
+ xfs_dquot_t *udqp,
+ xfs_dquot_t *gdqp)
+{
+ if (!XFS_IS_QUOTA_ON(tp->t_mountp))
+ return;
+
+ ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+
+ if (udqp) {
+ xfs_dqlock(udqp);
+ XFS_DQHOLD(udqp);
+ xfs_dqunlock(udqp);
+ ASSERT(ip->i_udquot == NULL);
+ ip->i_udquot = udqp;
+ ASSERT(ip->i_d.di_uid == INT_GET(udqp->q_core.d_id, ARCH_CONVERT));
+ xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
+ }
+ if (gdqp) {
+ xfs_dqlock(gdqp);
+ XFS_DQHOLD(gdqp);
+ xfs_dqunlock(gdqp);
+ ASSERT(ip->i_gdquot == NULL);
+ ip->i_gdquot = gdqp;
+ ASSERT(ip->i_d.di_gid == INT_GET(gdqp->q_core.d_id, ARCH_CONVERT));
+ xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
+ }
+}
+
+/* ------------- list stuff -----------------*/
+void
+xfs_qm_freelist_init(xfs_frlist_t *ql)
+{
+ ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
+ mutex_init(&ql->qh_lock, MUTEX_DEFAULT, "dqf");
+ ql->qh_version = 0;
+ ql->qh_nelems = 0;
+}
+
+void
+xfs_qm_freelist_destroy(xfs_frlist_t *ql)
+{
+ xfs_dquot_t *dqp, *nextdqp;
+
+ mutex_lock(&ql->qh_lock, PINOD);
+ for (dqp = ql->qh_next;
+ dqp != (xfs_dquot_t *)ql; ) {
+ xfs_dqlock(dqp);
+ nextdqp = dqp->dq_flnext;
+#ifdef QUOTADEBUG
+ cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
+#endif
+ XQM_FREELIST_REMOVE(dqp);
+ xfs_dqunlock(dqp);
+ xfs_qm_dqdestroy(dqp);
+ dqp = nextdqp;
+ }
+ /*
+ * Don't bother about unlocking.
+ */
+ mutex_destroy(&ql->qh_lock);
+
+ ASSERT(ql->qh_nelems == 0);
+}
+
+void
+xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
+{
+ dq->dq_flnext = ql->qh_next;
+ dq->dq_flprev = (xfs_dquot_t *)ql;
+ ql->qh_next = dq;
+ dq->dq_flnext->dq_flprev = dq;
+ xfs_Gqm->qm_dqfreelist.qh_nelems++;
+ xfs_Gqm->qm_dqfreelist.qh_version++;
+}
+
+void
+xfs_qm_freelist_unlink(xfs_dquot_t *dq)
+{
+ xfs_dquot_t *next = dq->dq_flnext;
+ xfs_dquot_t *prev = dq->dq_flprev;
+
+ next->dq_flprev = prev;
+ prev->dq_flnext = next;
+ dq->dq_flnext = dq->dq_flprev = dq;
+ xfs_Gqm->qm_dqfreelist.qh_nelems--;
+ xfs_Gqm->qm_dqfreelist.qh_version++;
+}
+
+void
+xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
+{
+ xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
+}
+
+int
+xfs_qm_dqhashlock_nowait(
+ xfs_dquot_t *dqp)
+{
+ int locked;
+
+ locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
+ return (locked);
+}
+
+int
+xfs_qm_freelist_lock_nowait(
+ xfs_qm_t *xqm)
+{
+ int locked;
+
+ locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
+ return (locked);
+}
+
+int
+xfs_qm_mplist_nowait(
+ xfs_mount_t *mp)
+{
+ int locked;
+
+ ASSERT(mp->m_quotainfo);
+ locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
+ return (locked);
+}
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
new file mode 100644
index 000000000000..dcf1a7a831d8
--- /dev/null
+++ b/fs/xfs/quota/xfs_qm.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_QM_H__
+#define __XFS_QM_H__
+
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
+#include "xfs_quota_priv.h"
+#include "xfs_qm_stats.h"
+
+struct xfs_qm;
+struct xfs_inode;
+
+extern mutex_t xfs_Gqm_lock;
+extern struct xfs_qm *xfs_Gqm;
+extern kmem_zone_t *qm_dqzone;
+extern kmem_zone_t *qm_dqtrxzone;
+
+/*
+ * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
+ * iterate over the mountpt's dquot list in one call.
+ */
+#define XFS_QM_SYNC_MAX_RESTARTS 7
+
+/*
+ * Ditto, for xfs_qm_dqreclaim_one.
+ */
+#define XFS_QM_RECLAIM_MAX_RESTARTS 4
+
+/*
+ * Ideal ratio of free to in use dquots. Quota manager makes an attempt
+ * to keep this balance.
+ */
+#define XFS_QM_DQFREE_RATIO 2
+
+/*
+ * Dquot hashtable constants/threshold values.
+ */
+#define XFS_QM_NCSIZE_THRESHOLD 5000
+#define XFS_QM_HASHSIZE_LOW 32
+#define XFS_QM_HASHSIZE_HIGH 64
+
+/*
+ * We output a cmn_err when quotachecking a quota file with more than
+ * this many fsbs.
+ */
+#define XFS_QM_BIG_QCHECK_NBLKS 500
+
+/*
+ * This defines the unit of allocation of dquots.
+ * Currently, it is just one file system block, and a 4K blk contains 30
+ * (136 * 30 = 4080) dquots. It's probably not worth trying to make
+ * this more dynamic.
+ * XXXsup However, if this number is changed, we have to make sure that we don't
+ * implicitly assume that we do allocations in chunks of a single filesystem
+ * block in the dquot/xqm code.
+ */
+#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
+/*
+ * When doing a quotacheck, we log dquot clusters of this many FSBs at most
+ * in a single transaction. We don't want to ask for too huge a log reservation.
+ */
+#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3
+
+typedef xfs_dqhash_t xfs_dqlist_t;
+/*
+ * The freelist head. The first two fields match the first two in the
+ * xfs_dquot_t structure (in xfs_dqmarker_t)
+ */
+typedef struct xfs_frlist {
+ struct xfs_dquot *qh_next;
+ struct xfs_dquot *qh_prev;
+ mutex_t qh_lock;
+ uint qh_version;
+ uint qh_nelems;
+} xfs_frlist_t;
+
+/*
+ * Quota Manager (global) structure. Lives only in core.
+ */
+typedef struct xfs_qm {
+ xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */
+ xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */
+ uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */
+ xfs_frlist_t qm_dqfreelist; /* freelist of dquots */
+ atomic_t qm_totaldquots; /* total incore dquots */
+ uint qm_nrefs; /* file systems with quota on */
+ int qm_dqfree_ratio;/* ratio of free to inuse dquots */
+ kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */
+ kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */
+} xfs_qm_t;
+
+/*
+ * Various quota information for individual filesystems.
+ * The mount structure keeps a pointer to this.
+ */
+typedef struct xfs_quotainfo {
+ xfs_inode_t *qi_uquotaip; /* user quota inode */
+ xfs_inode_t *qi_gquotaip; /* group quota inode */
+ lock_t qi_pinlock; /* dquot pinning mutex */
+ xfs_dqlist_t qi_dqlist; /* all dquots in filesys */
+ int qi_dqreclaims; /* a change here indicates
+ a removal in the dqlist */
+ time_t qi_btimelimit; /* limit for blks timer */
+ time_t qi_itimelimit; /* limit for inodes timer */
+ time_t qi_rtbtimelimit;/* limit for rt blks timer */
+ xfs_qwarncnt_t qi_bwarnlimit; /* limit for num warnings */
+ xfs_qwarncnt_t qi_iwarnlimit; /* limit for num warnings */
+ mutex_t qi_quotaofflock;/* to serialize quotaoff */
+ xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */
+ uint qi_dqperchunk; /* # ondisk dqs in above chunk */
+ xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */
+ xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */
+ xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */
+ xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */
+ xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */
+ xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */
+} xfs_quotainfo_t;
+
+
+extern xfs_dqtrxops_t xfs_trans_dquot_ops;
+
+extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
+extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
+ xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
+extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
+extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
+
+/*
+ * We keep the usr and grp dquots separately so that locking will be easier
+ * to do at commit time. All transactions that we know of at this point
+ * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
+ */
+#define XFS_QM_TRANS_MAXDQS 2
+typedef struct xfs_dquot_acct {
+ xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
+ xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
+} xfs_dquot_acct_t;
+
+/*
+ * Users are allowed to have a usage exceeding their softlimit for
+ * a period this long.
+ */
+#define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */
+#define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */
+#define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */
+
+#define XFS_QM_BWARNLIMIT 5
+#define XFS_QM_IWARNLIMIT 5
+
+#define XFS_QM_LOCK(xqm) (mutex_lock(&xqm##_lock, PINOD))
+#define XFS_QM_UNLOCK(xqm) (mutex_unlock(&xqm##_lock))
+#define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++)
+#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--)
+
+extern void xfs_mount_reset_sbqflags(xfs_mount_t *);
+
+extern int xfs_qm_init_quotainfo(xfs_mount_t *);
+extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
+extern int xfs_qm_mount_quotas(xfs_mount_t *, int);
+extern void xfs_qm_mount_quotainit(xfs_mount_t *, uint);
+extern int xfs_qm_quotacheck(xfs_mount_t *);
+extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
+extern int xfs_qm_unmount_quotas(xfs_mount_t *);
+extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
+extern int xfs_qm_sync(xfs_mount_t *, short);
+
+/* dquot stuff */
+extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **);
+extern int xfs_qm_dqattach(xfs_inode_t *, uint);
+extern void xfs_qm_dqdetach(xfs_inode_t *);
+extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint);
+extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
+
+/* vop stuff */
+extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
+ uid_t, gid_t, uint,
+ xfs_dquot_t **, xfs_dquot_t **);
+extern void xfs_qm_vop_dqattach_and_dqmod_newinode(
+ xfs_trans_t *, xfs_inode_t *,
+ xfs_dquot_t *, xfs_dquot_t *);
+extern int xfs_qm_vop_rename_dqattach(xfs_inode_t **);
+extern xfs_dquot_t * xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *,
+ xfs_dquot_t **, xfs_dquot_t *);
+extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
+ xfs_dquot_t *, xfs_dquot_t *, uint);
+
+/* list stuff */
+extern void xfs_qm_freelist_init(xfs_frlist_t *);
+extern void xfs_qm_freelist_destroy(xfs_frlist_t *);
+extern void xfs_qm_freelist_insert(xfs_frlist_t *, xfs_dquot_t *);
+extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
+extern void xfs_qm_freelist_unlink(xfs_dquot_t *);
+extern int xfs_qm_freelist_lock_nowait(xfs_qm_t *);
+extern int xfs_qm_mplist_nowait(xfs_mount_t *);
+extern int xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
+
+/* system call interface */
+extern int xfs_qm_quotactl(bhv_desc_t *, int, int, xfs_caddr_t);
+
+#ifdef DEBUG
+extern int xfs_qm_internalqcheck(xfs_mount_t *);
+#else
+#define xfs_qm_internalqcheck(mp) (0)
+#endif
+
+#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
new file mode 100644
index 000000000000..be67d9c265f8
--- /dev/null
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -0,0 +1,410 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_clnt.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+
+#include "xfs_qm.h"
+
+#define MNTOPT_QUOTA "quota" /* disk quotas (user) */
+#define MNTOPT_NOQUOTA "noquota" /* no quotas */
+#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */
+#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */
+#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */
+#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */
+#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
+#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
+#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
+
+STATIC int
+xfs_qm_parseargs(
+ struct bhv_desc *bhv,
+ char *options,
+ struct xfs_mount_args *args,
+ int update)
+{
+ size_t length;
+ char *local_options = options;
+ char *this_char;
+ int error;
+ int referenced = update;
+
+ while ((this_char = strsep(&local_options, ",")) != NULL) {
+ length = strlen(this_char);
+ if (local_options)
+ length++;
+
+ if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
+ args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA);
+ args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA);
+ referenced = update;
+ } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
+ !strcmp(this_char, MNTOPT_UQUOTA) ||
+ !strcmp(this_char, MNTOPT_USRQUOTA)) {
+ args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
+ referenced = 1;
+ } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
+ !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
+ args->flags |= XFSMNT_UQUOTA;
+ args->flags &= ~XFSMNT_UQUOTAENF;
+ referenced = 1;
+ } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
+ !strcmp(this_char, MNTOPT_GRPQUOTA)) {
+ args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
+ referenced = 1;
+ } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
+ args->flags |= XFSMNT_GQUOTA;
+ args->flags &= ~XFSMNT_GQUOTAENF;
+ referenced = 1;
+ } else {
+ if (local_options)
+ *(local_options-1) = ',';
+ continue;
+ }
+
+ while (length--)
+ *this_char++ = ',';
+ }
+
+ PVFS_PARSEARGS(BHV_NEXT(bhv), options, args, update, error);
+ if (!error && !referenced)
+ bhv_remove_vfsops(bhvtovfs(bhv), VFS_POSITION_QM);
+ return error;
+}
+
+STATIC int
+xfs_qm_showargs(
+ struct bhv_desc *bhv,
+ struct seq_file *m)
+{
+ struct vfs *vfsp = bhvtovfs(bhv);
+ struct xfs_mount *mp = XFS_VFSTOM(vfsp);
+ int error;
+
+ if (mp->m_qflags & XFS_UQUOTA_ACCT) {
+ (mp->m_qflags & XFS_UQUOTA_ENFD) ?
+ seq_puts(m, "," MNTOPT_USRQUOTA) :
+ seq_puts(m, "," MNTOPT_UQUOTANOENF);
+ }
+
+ if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+ (mp->m_qflags & XFS_GQUOTA_ENFD) ?
+ seq_puts(m, "," MNTOPT_GRPQUOTA) :
+ seq_puts(m, "," MNTOPT_GQUOTANOENF);
+ }
+
+ if (!(mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT)))
+ seq_puts(m, "," MNTOPT_NOQUOTA);
+
+ PVFS_SHOWARGS(BHV_NEXT(bhv), m, error);
+ return error;
+}
+
+STATIC int
+xfs_qm_mount(
+ struct bhv_desc *bhv,
+ struct xfs_mount_args *args,
+ struct cred *cr)
+{
+ struct vfs *vfsp = bhvtovfs(bhv);
+ struct xfs_mount *mp = XFS_VFSTOM(vfsp);
+ int error;
+
+ if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA))
+ xfs_qm_mount_quotainit(mp, args->flags);
+ PVFS_MOUNT(BHV_NEXT(bhv), args, cr, error);
+ return error;
+}
+
+STATIC int
+xfs_qm_syncall(
+ struct bhv_desc *bhv,
+ int flags,
+ cred_t *credp)
+{
+ struct vfs *vfsp = bhvtovfs(bhv);
+ struct xfs_mount *mp = XFS_VFSTOM(vfsp);
+ int error;
+
+ /*
+ * Get the Quota Manager to flush the dquots.
+ */
+ if (XFS_IS_QUOTA_ON(mp)) {
+ if ((error = xfs_qm_sync(mp, flags))) {
+ /*
+ * If we got an IO error, we will be shutting down.
+ * So, there's nothing more for us to do here.
+ */
+ ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp));
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ return XFS_ERROR(error);
+ }
+ }
+ }
+ PVFS_SYNC(BHV_NEXT(bhv), flags, credp, error);
+ return error;
+}
+
+/*
+ * Clear the quotaflags in memory and in the superblock.
+ */
+void
+xfs_mount_reset_sbqflags(
+ xfs_mount_t *mp)
+{
+ xfs_trans_t *tp;
+ unsigned long s;
+
+ mp->m_qflags = 0;
+ /*
+ * It is OK to look at sb_qflags here in mount path,
+ * without SB_LOCK.
+ */
+ if (mp->m_sb.sb_qflags == 0)
+ return;
+ s = XFS_SB_LOCK(mp);
+ mp->m_sb.sb_qflags = 0;
+ XFS_SB_UNLOCK(mp, s);
+
+ /*
+ * if the fs is readonly, let the incore superblock run
+ * with quotas off but don't flush the update out to disk
+ */
+ if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
+ return;
+#ifdef QUOTADEBUG
+ xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
+#endif
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+ if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+ XFS_DEFAULT_LOG_COUNT)) {
+ xfs_trans_cancel(tp, 0);
+ xfs_fs_cmn_err(CE_ALERT, mp,
+ "xfs_mount_reset_sbqflags: Superblock update failed!");
+ return;
+ }
+ xfs_mod_sb(tp, XFS_SB_QFLAGS);
+ xfs_trans_commit(tp, 0, NULL);
+}
+
+STATIC int
+xfs_qm_newmount(
+ xfs_mount_t *mp,
+ uint *needquotamount,
+ uint *quotaflags)
+{
+ uint quotaondisk;
+ uint uquotaondisk = 0, gquotaondisk = 0;
+
+ *quotaflags = 0;
+ *needquotamount = B_FALSE;
+
+ quotaondisk = XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
+ mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT);
+
+ if (quotaondisk) {
+ uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
+ gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
+ }
+
+ /*
+ * If the device itself is read-only, we can't allow
+ * the user to change the state of quota on the mount -
+ * this would generate a transaction on the ro device,
+ * which would lead to an I/O error and shutdown
+ */
+
+ if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
+ (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) ||
+ (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
+ (!gquotaondisk && XFS_IS_GQUOTA_ON(mp))) &&
+ xfs_dev_is_read_only(mp, "changing quota state")) {
+ cmn_err(CE_WARN,
+ "XFS: please mount with%s%s%s.",
+ (!quotaondisk ? "out quota" : ""),
+ (uquotaondisk ? " usrquota" : ""),
+ (gquotaondisk ? " grpquota" : ""));
+ return XFS_ERROR(EPERM);
+ }
+
+ if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
+ /*
+ * Call mount_quotas at this point only if we won't have to do
+ * a quotacheck.
+ */
+ if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
+ /*
+ * If an error occured, qm_mount_quotas code
+ * has already disabled quotas. So, just finish
+ * mounting, and get on with the boring life
+ * without disk quotas.
+ */
+ xfs_qm_mount_quotas(mp, 0);
+ } else {
+ /*
+ * Clear the quota flags, but remember them. This
+ * is so that the quota code doesn't get invoked
+ * before we're ready. This can happen when an
+ * inode goes inactive and wants to free blocks,
+ * or via xfs_log_mount_finish.
+ */
+ *needquotamount = B_TRUE;
+ *quotaflags = mp->m_qflags;
+ mp->m_qflags = 0;
+ }
+ }
+
+ return 0;
+}
+
+STATIC int
+xfs_qm_endmount(
+ xfs_mount_t *mp,
+ uint needquotamount,
+ uint quotaflags,
+ int mfsi_flags)
+{
+ if (needquotamount) {
+ ASSERT(mp->m_qflags == 0);
+ mp->m_qflags = quotaflags;
+ xfs_qm_mount_quotas(mp, mfsi_flags);
+ }
+
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+ if (! (XFS_IS_QUOTA_ON(mp)))
+ xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
+ else
+ xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
+#endif
+
+#ifdef QUOTADEBUG
+ if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp))
+ cmn_err(CE_WARN, "XFS: mount internalqcheck failed");
+#endif
+
+ return 0;
+}
+
+STATIC void
+xfs_qm_dqrele_null(
+ xfs_dquot_t *dq)
+{
+ /*
+ * Called from XFS, where we always check first for a NULL dquot.
+ */
+ if (!dq)
+ return;
+ xfs_qm_dqrele(dq);
+}
+
+
+struct xfs_qmops xfs_qmcore_xfs = {
+ .xfs_qminit = xfs_qm_newmount,
+ .xfs_qmdone = xfs_qm_unmount_quotadestroy,
+ .xfs_qmmount = xfs_qm_endmount,
+ .xfs_qmunmount = xfs_qm_unmount_quotas,
+ .xfs_dqrele = xfs_qm_dqrele_null,
+ .xfs_dqattach = xfs_qm_dqattach,
+ .xfs_dqdetach = xfs_qm_dqdetach,
+ .xfs_dqpurgeall = xfs_qm_dqpurge_all,
+ .xfs_dqvopalloc = xfs_qm_vop_dqalloc,
+ .xfs_dqvopcreate = xfs_qm_vop_dqattach_and_dqmod_newinode,
+ .xfs_dqvoprename = xfs_qm_vop_rename_dqattach,
+ .xfs_dqvopchown = xfs_qm_vop_chown,
+ .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve,
+ .xfs_dqtrxops = &xfs_trans_dquot_ops,
+};
+
+struct bhv_vfsops xfs_qmops = { {
+ BHV_IDENTITY_INIT(VFS_BHV_QM, VFS_POSITION_QM),
+ .vfs_parseargs = xfs_qm_parseargs,
+ .vfs_showargs = xfs_qm_showargs,
+ .vfs_mount = xfs_qm_mount,
+ .vfs_sync = xfs_qm_syncall,
+ .vfs_quotactl = xfs_qm_quotactl, },
+};
+
+
+void __init
+xfs_qm_init(void)
+{
+ static char message[] __initdata =
+ KERN_INFO "SGI XFS Quota Management subsystem\n";
+
+ printk(message);
+ mutex_init(&xfs_Gqm_lock, MUTEX_DEFAULT, "xfs_qmlock");
+ vfs_bhv_set_custom(&xfs_qmops, &xfs_qmcore_xfs);
+ xfs_qm_init_procfs();
+}
+
+void __exit
+xfs_qm_exit(void)
+{
+ vfs_bhv_clr_custom(&xfs_qmops);
+ xfs_qm_cleanup_procfs();
+ if (qm_dqzone)
+ kmem_cache_destroy(qm_dqzone);
+ if (qm_dqtrxzone)
+ kmem_cache_destroy(qm_dqtrxzone);
+}
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
new file mode 100644
index 000000000000..29978e037fee
--- /dev/null
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+
+#include "xfs_qm.h"
+
+struct xqmstats xqmstats;
+
+STATIC int
+xfs_qm_read_xfsquota(
+ char *buffer,
+ char **start,
+ off_t offset,
+ int count,
+ int *eof,
+ void *data)
+{
+ int len;
+
+ /* maximum; incore; ratio free to inuse; freelist */
+ len = sprintf(buffer, "%d\t%d\t%d\t%u\n",
+ ndquot,
+ xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
+ xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
+ xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0);
+
+ if (offset >= len) {
+ *start = buffer;
+ *eof = 1;
+ return 0;
+ }
+ *start = buffer + offset;
+ if ((len -= offset) > count)
+ return count;
+ *eof = 1;
+
+ return len;
+}
+
+STATIC int
+xfs_qm_read_stats(
+ char *buffer,
+ char **start,
+ off_t offset,
+ int count,
+ int *eof,
+ void *data)
+{
+ int len;
+
+ /* quota performance statistics */
+ len = sprintf(buffer, "qm %u %u %u %u %u %u %u %u\n",
+ xqmstats.xs_qm_dqreclaims,
+ xqmstats.xs_qm_dqreclaim_misses,
+ xqmstats.xs_qm_dquot_dups,
+ xqmstats.xs_qm_dqcachemisses,
+ xqmstats.xs_qm_dqcachehits,
+ xqmstats.xs_qm_dqwants,
+ xqmstats.xs_qm_dqshake_reclaims,
+ xqmstats.xs_qm_dqinact_reclaims);
+
+ if (offset >= len) {
+ *start = buffer;
+ *eof = 1;
+ return 0;
+ }
+ *start = buffer + offset;
+ if ((len -= offset) > count)
+ return count;
+ *eof = 1;
+
+ return len;
+}
+
+void
+xfs_qm_init_procfs(void)
+{
+ create_proc_read_entry("fs/xfs/xqmstat", 0, NULL, xfs_qm_read_stats, NULL);
+ create_proc_read_entry("fs/xfs/xqm", 0, NULL, xfs_qm_read_xfsquota, NULL);
+}
+
+void
+xfs_qm_cleanup_procfs(void)
+{
+ remove_proc_entry("fs/xfs/xqm", NULL);
+ remove_proc_entry("fs/xfs/xqmstat", NULL);
+}
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h
new file mode 100644
index 000000000000..8093c5c284ec
--- /dev/null
+++ b/fs/xfs/quota/xfs_qm_stats.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2002 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_QM_STATS_H__
+#define __XFS_QM_STATS_H__
+
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+/*
+ * XQM global statistics
+ */
+struct xqmstats {
+ __uint32_t xs_qm_dqreclaims;
+ __uint32_t xs_qm_dqreclaim_misses;
+ __uint32_t xs_qm_dquot_dups;
+ __uint32_t xs_qm_dqcachemisses;
+ __uint32_t xs_qm_dqcachehits;
+ __uint32_t xs_qm_dqwants;
+ __uint32_t xs_qm_dqshake_reclaims;
+ __uint32_t xs_qm_dqinact_reclaims;
+};
+
+extern struct xqmstats xqmstats;
+
+# define XQM_STATS_INC(count) ( (count)++ )
+
+extern void xfs_qm_init_procfs(void);
+extern void xfs_qm_cleanup_procfs(void);
+
+#else
+
+# define XQM_STATS_INC(count) do { } while (0)
+
+static __inline void xfs_qm_init_procfs(void) { };
+static __inline void xfs_qm_cleanup_procfs(void) { };
+
+#endif
+
+#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
new file mode 100644
index 000000000000..229f5b5a2d25
--- /dev/null
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -0,0 +1,1458 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+
+#include "xfs_qm.h"
+
+#ifdef DEBUG
+# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args)
+#else
+# define qdprintk(s, args...) do { } while (0)
+#endif
+
+STATIC int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
+STATIC int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
+ fs_disk_quota_t *);
+STATIC int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
+STATIC int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
+ fs_disk_quota_t *);
+STATIC int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
+STATIC int xfs_qm_scall_quotaoff(xfs_mount_t *, uint, boolean_t);
+STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
+STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
+ uint);
+STATIC uint xfs_qm_import_flags(uint);
+STATIC uint xfs_qm_export_flags(uint);
+STATIC uint xfs_qm_import_qtype_flags(uint);
+STATIC uint xfs_qm_export_qtype_flags(uint);
+STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
+ fs_disk_quota_t *);
+
+
+/*
+ * The main distribution switch of all XFS quotactl system calls.
+ */
+int
+xfs_qm_quotactl(
+ struct bhv_desc *bdp,
+ int cmd,
+ int id,
+ xfs_caddr_t addr)
+{
+ xfs_mount_t *mp;
+ int error;
+ struct vfs *vfsp;
+
+ vfsp = bhvtovfs(bdp);
+ mp = XFS_VFSTOM(vfsp);
+
+ if (addr == NULL && cmd != Q_SYNC)
+ return XFS_ERROR(EINVAL);
+ if (id < 0 && cmd != Q_SYNC)
+ return XFS_ERROR(EINVAL);
+
+ /*
+ * The following commands are valid even when quotaoff.
+ */
+ switch (cmd) {
+ /*
+ * truncate quota files. quota must be off.
+ */
+ case Q_XQUOTARM:
+ if (XFS_IS_QUOTA_ON(mp) || addr == NULL)
+ return XFS_ERROR(EINVAL);
+ if (vfsp->vfs_flag & VFS_RDONLY)
+ return XFS_ERROR(EROFS);
+ return (xfs_qm_scall_trunc_qfiles(mp,
+ xfs_qm_import_qtype_flags(*(uint *)addr)));
+ /*
+ * Get quota status information.
+ */
+ case Q_XGETQSTAT:
+ return (xfs_qm_scall_getqstat(mp, (fs_quota_stat_t *)addr));
+
+ /*
+ * QUOTAON for root f/s and quota enforcement on others..
+ * Quota accounting for non-root f/s's must be turned on
+ * at mount time.
+ */
+ case Q_XQUOTAON:
+ if (addr == NULL)
+ return XFS_ERROR(EINVAL);
+ if (vfsp->vfs_flag & VFS_RDONLY)
+ return XFS_ERROR(EROFS);
+ return (xfs_qm_scall_quotaon(mp,
+ xfs_qm_import_flags(*(uint *)addr)));
+ case Q_XQUOTAOFF:
+ if (vfsp->vfs_flag & VFS_RDONLY)
+ return XFS_ERROR(EROFS);
+ break;
+
+ default:
+ break;
+ }
+
+ if (! XFS_IS_QUOTA_ON(mp))
+ return XFS_ERROR(ESRCH);
+
+ switch (cmd) {
+ case Q_XQUOTAOFF:
+ if (vfsp->vfs_flag & VFS_RDONLY)
+ return XFS_ERROR(EROFS);
+ error = xfs_qm_scall_quotaoff(mp,
+ xfs_qm_import_flags(*(uint *)addr),
+ B_FALSE);
+ break;
+
+ /*
+ * Defaults to XFS_GETUQUOTA.
+ */
+ case Q_XGETQUOTA:
+ error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_USER,
+ (fs_disk_quota_t *)addr);
+ break;
+ /*
+ * Set limits, both hard and soft. Defaults to Q_SETUQLIM.
+ */
+ case Q_XSETQLIM:
+ if (vfsp->vfs_flag & VFS_RDONLY)
+ return XFS_ERROR(EROFS);
+ error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_USER,
+ (fs_disk_quota_t *)addr);
+ break;
+
+ case Q_XSETGQLIM:
+ if (vfsp->vfs_flag & VFS_RDONLY)
+ return XFS_ERROR(EROFS);
+ error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
+ (fs_disk_quota_t *)addr);
+ break;
+
+
+ case Q_XGETGQUOTA:
+ error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
+ (fs_disk_quota_t *)addr);
+ break;
+
+ /*
+ * Quotas are entirely undefined after quotaoff in XFS quotas.
+ * For instance, there's no way to set limits when quotaoff.
+ */
+
+ default:
+ error = XFS_ERROR(EINVAL);
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * Turn off quota accounting and/or enforcement for all udquots and/or
+ * gdquots. Called only at unmount time.
+ *
+ * This assumes that there are no dquots of this file system cached
+ * incore, and modifies the ondisk dquot directly. Therefore, for example,
+ * it is an error to call this twice, without purging the cache.
+ */
+STATIC int
+xfs_qm_scall_quotaoff(
+ xfs_mount_t *mp,
+ uint flags,
+ boolean_t force)
+{
+ uint dqtype;
+ unsigned long s;
+ int error;
+ uint inactivate_flags;
+ xfs_qoff_logitem_t *qoffstart;
+ int nculprits;
+
+ if (!force && !capable(CAP_SYS_ADMIN))
+ return XFS_ERROR(EPERM);
+ /*
+ * No file system can have quotas enabled on disk but not in core.
+ * Note that quota utilities (like quotaoff) _expect_
+ * errno == EEXIST here.
+ */
+ if ((mp->m_qflags & flags) == 0)
+ return XFS_ERROR(EEXIST);
+ error = 0;
+
+ flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+
+ /*
+ * We don't want to deal with two quotaoffs messing up each other,
+ * so we're going to serialize it. quotaoff isn't exactly a performance
+ * critical thing.
+ * If quotaoff, then we must be dealing with the root filesystem.
+ */
+ ASSERT(mp->m_quotainfo);
+ if (mp->m_quotainfo)
+ mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+
+ ASSERT(mp->m_quotainfo);
+
+ /*
+ * If we're just turning off quota enforcement, change mp and go.
+ */
+ if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
+ mp->m_qflags &= ~(flags);
+
+ s = XFS_SB_LOCK(mp);
+ mp->m_sb.sb_qflags = mp->m_qflags;
+ XFS_SB_UNLOCK(mp, s);
+ mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+
+ /* XXX what to do if error ? Revert back to old vals incore ? */
+ error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
+ return (error);
+ }
+
+ dqtype = 0;
+ inactivate_flags = 0;
+ /*
+ * If accounting is off, we must turn enforcement off, clear the
+ * quota 'CHKD' certificate to make it known that we have to
+ * do a quotacheck the next time this quota is turned on.
+ */
+ if (flags & XFS_UQUOTA_ACCT) {
+ dqtype |= XFS_QMOPT_UQUOTA;
+ flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
+ inactivate_flags |= XFS_UQUOTA_ACTIVE;
+ }
+ if (flags & XFS_GQUOTA_ACCT) {
+ dqtype |= XFS_QMOPT_GQUOTA;
+ flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD);
+ inactivate_flags |= XFS_GQUOTA_ACTIVE;
+ }
+
+ /*
+ * Nothing to do? Don't complain. This happens when we're just
+ * turning off quota enforcement.
+ */
+ if ((mp->m_qflags & flags) == 0) {
+ mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+ return (0);
+ }
+
+ /*
+ * Write the LI_QUOTAOFF log record, and do SB changes atomically,
+ * and synchronously.
+ */
+ xfs_qm_log_quotaoff(mp, &qoffstart, flags);
+
+ /*
+ * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
+ * to take care of the race between dqget and quotaoff. We don't take
+ * any special locks to reset these bits. All processes need to check
+ * these bits *after* taking inode lock(s) to see if the particular
+ * quota type is in the process of being turned off. If *ACTIVE, it is
+ * guaranteed that all dquot structures and all quotainode ptrs will all
+ * stay valid as long as that inode is kept locked.
+ *
+ * There is no turning back after this.
+ */
+ mp->m_qflags &= ~inactivate_flags;
+
+ /*
+ * Give back all the dquot reference(s) held by inodes.
+ * Here we go thru every single incore inode in this file system, and
+ * do a dqrele on the i_udquot/i_gdquot that it may have.
+ * Essentially, as long as somebody has an inode locked, this guarantees
+ * that quotas will not be turned off. This is handy because in a
+ * transaction once we lock the inode(s) and check for quotaon, we can
+ * depend on the quota inodes (and other things) being valid as long as
+ * we keep the lock(s).
+ */
+ xfs_qm_dqrele_all_inodes(mp, flags);
+
+ /*
+ * Next we make the changes in the quota flag in the mount struct.
+ * This isn't protected by a particular lock directly, because we
+ * don't want to take a mrlock everytime we depend on quotas being on.
+ */
+ mp->m_qflags &= ~(flags);
+
+ /*
+ * Go through all the dquots of this file system and purge them,
+ * according to what was turned off. We may not be able to get rid
+ * of all dquots, because dquots can have temporary references that
+ * are not attached to inodes. eg. xfs_setattr, xfs_create.
+ * So, if we couldn't purge all the dquots from the filesystem,
+ * we can't get rid of the incore data structures.
+ */
+ while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype|XFS_QMOPT_QUOTAOFF)))
+ delay(10 * nculprits);
+
+ /*
+ * Transactions that had started before ACTIVE state bit was cleared
+ * could have logged many dquots, so they'd have higher LSNs than
+ * the first QUOTAOFF log record does. If we happen to crash when
+ * the tail of the log has gone past the QUOTAOFF record, but
+ * before the last dquot modification, those dquots __will__
+ * recover, and that's not good.
+ *
+ * So, we have QUOTAOFF start and end logitems; the start
+ * logitem won't get overwritten until the end logitem appears...
+ */
+ xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
+
+ /*
+ * If quotas is completely disabled, close shop.
+ */
+ if ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_ALL) {
+ mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+ xfs_qm_destroy_quotainfo(mp);
+ return (0);
+ }
+
+ /*
+ * Release our quotainode references, and vn_purge them,
+ * if we don't need them anymore.
+ */
+ if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
+ XFS_PURGE_INODE(XFS_QI_UQIP(mp));
+ XFS_QI_UQIP(mp) = NULL;
+ }
+ if ((dqtype & XFS_QMOPT_GQUOTA) && XFS_QI_GQIP(mp)) {
+ XFS_PURGE_INODE(XFS_QI_GQIP(mp));
+ XFS_QI_GQIP(mp) = NULL;
+ }
+ mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+
+ return (error);
+}
+
+STATIC int
+xfs_qm_scall_trunc_qfiles(
+ xfs_mount_t *mp,
+ uint flags)
+{
+ int error;
+ xfs_inode_t *qip;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return XFS_ERROR(EPERM);
+ error = 0;
+ if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) {
+ qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
+ return XFS_ERROR(EINVAL);
+ }
+
+ if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
+ error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0);
+ if (! error) {
+ (void) xfs_truncate_file(mp, qip);
+ VN_RELE(XFS_ITOV(qip));
+ }
+ }
+
+ if ((flags & XFS_DQ_GROUP) && mp->m_sb.sb_gquotino != NULLFSINO) {
+ error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
+ if (! error) {
+ (void) xfs_truncate_file(mp, qip);
+ VN_RELE(XFS_ITOV(qip));
+ }
+ }
+
+ return (error);
+}
+
+
+/*
+ * Switch on (a given) quota enforcement for a filesystem. This takes
+ * effect immediately.
+ * (Switching on quota accounting must be done at mount time.)
+ */
+STATIC int
+xfs_qm_scall_quotaon(
+ xfs_mount_t *mp,
+ uint flags)
+{
+ int error;
+ unsigned long s;
+ uint qf;
+ uint accflags;
+ __int64_t sbflags;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return XFS_ERROR(EPERM);
+
+ flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+ /*
+ * Switching on quota accounting must be done at mount time.
+ */
+ accflags = flags & XFS_ALL_QUOTA_ACCT;
+ flags &= ~(XFS_ALL_QUOTA_ACCT);
+
+ sbflags = 0;
+
+ if (flags == 0) {
+ qdprintk("quotaon: zero flags, m_qflags=%x\n", mp->m_qflags);
+ return XFS_ERROR(EINVAL);
+ }
+
+ /* No fs can turn on quotas with a delayed effect */
+ ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
+
+ /*
+ * Can't enforce without accounting. We check the superblock
+ * qflags here instead of m_qflags because rootfs can have
+ * quota acct on ondisk without m_qflags' knowing.
+ */
+ if (((flags & XFS_UQUOTA_ACCT) == 0 &&
+ (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
+ (flags & XFS_UQUOTA_ENFD))
+ ||
+ ((flags & XFS_GQUOTA_ACCT) == 0 &&
+ (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
+ (flags & XFS_GQUOTA_ENFD))) {
+ qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n",
+ flags, mp->m_sb.sb_qflags);
+ return XFS_ERROR(EINVAL);
+ }
+ /*
+ * If everything's upto-date incore, then don't waste time.
+ */
+ if ((mp->m_qflags & flags) == flags)
+ return XFS_ERROR(EEXIST);
+
+ /*
+ * Change sb_qflags on disk but not incore mp->qflags
+ * if this is the root filesystem.
+ */
+ s = XFS_SB_LOCK(mp);
+ qf = mp->m_sb.sb_qflags;
+ mp->m_sb.sb_qflags = qf | flags;
+ XFS_SB_UNLOCK(mp, s);
+
+ /*
+ * There's nothing to change if it's the same.
+ */
+ if ((qf & flags) == flags && sbflags == 0)
+ return XFS_ERROR(EEXIST);
+ sbflags |= XFS_SB_QFLAGS;
+
+ if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
+ return (error);
+ /*
+ * If we aren't trying to switch on quota enforcement, we are done.
+ */
+ if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
+ (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
+ (flags & XFS_ALL_QUOTA_ENFD) == 0)
+ return (0);
+
+ if (! XFS_IS_QUOTA_RUNNING(mp))
+ return XFS_ERROR(ESRCH);
+
+ /*
+ * Switch on quota enforcement in core.
+ */
+ mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+ mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
+ mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+
+ return (0);
+}
+
+
+
+/*
+ * Return quota status information, such as uquota-off, enforcements, etc.
+ */
+STATIC int
+xfs_qm_scall_getqstat(
+ xfs_mount_t *mp,
+ fs_quota_stat_t *out)
+{
+ xfs_inode_t *uip, *gip;
+ boolean_t tempuqip, tempgqip;
+
+ uip = gip = NULL;
+ tempuqip = tempgqip = B_FALSE;
+ memset(out, 0, sizeof(fs_quota_stat_t));
+
+ out->qs_version = FS_QSTAT_VERSION;
+ if (! XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
+ out->qs_uquota.qfs_ino = NULLFSINO;
+ out->qs_gquota.qfs_ino = NULLFSINO;
+ return (0);
+ }
+ out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
+ (XFS_ALL_QUOTA_ACCT|
+ XFS_ALL_QUOTA_ENFD));
+ out->qs_pad = 0;
+ out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
+ out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
+
+ if (mp->m_quotainfo) {
+ uip = mp->m_quotainfo->qi_uquotaip;
+ gip = mp->m_quotainfo->qi_gquotaip;
+ }
+ if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
+ if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+ 0, 0, &uip, 0) == 0)
+ tempuqip = B_TRUE;
+ }
+ if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
+ if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+ 0, 0, &gip, 0) == 0)
+ tempgqip = B_TRUE;
+ }
+ if (uip) {
+ out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
+ out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
+ if (tempuqip)
+ VN_RELE(XFS_ITOV(uip));
+ }
+ if (gip) {
+ out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
+ out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
+ if (tempgqip)
+ VN_RELE(XFS_ITOV(gip));
+ }
+ if (mp->m_quotainfo) {
+ out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp);
+ out->qs_btimelimit = XFS_QI_BTIMELIMIT(mp);
+ out->qs_itimelimit = XFS_QI_ITIMELIMIT(mp);
+ out->qs_rtbtimelimit = XFS_QI_RTBTIMELIMIT(mp);
+ out->qs_bwarnlimit = XFS_QI_BWARNLIMIT(mp);
+ out->qs_iwarnlimit = XFS_QI_IWARNLIMIT(mp);
+ }
+ return (0);
+}
+
+/*
+ * Adjust quota limits, and start/stop timers accordingly.
+ */
+STATIC int
+xfs_qm_scall_setqlim(
+ xfs_mount_t *mp,
+ xfs_dqid_t id,
+ uint type,
+ fs_disk_quota_t *newlim)
+{
+ xfs_disk_dquot_t *ddq;
+ xfs_dquot_t *dqp;
+ xfs_trans_t *tp;
+ int error;
+ xfs_qcnt_t hard, soft;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return XFS_ERROR(EPERM);
+
+ if ((newlim->d_fieldmask & (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK)) == 0)
+ return (0);
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
+ if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
+ 0, 0, XFS_DEFAULT_LOG_COUNT))) {
+ xfs_trans_cancel(tp, 0);
+ return (error);
+ }
+
+ /*
+ * We don't want to race with a quotaoff so take the quotaoff lock.
+ * (We don't hold an inode lock, so there's nothing else to stop
+ * a quotaoff from happening). (XXXThis doesn't currently happen
+ * because we take the vfslock before calling xfs_qm_sysent).
+ */
+ mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD);
+
+ /*
+ * Get the dquot (locked), and join it to the transaction.
+ * Allocate the dquot if this doesn't exist.
+ */
+ if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
+ xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+ mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+ ASSERT(error != ENOENT);
+ return (error);
+ }
+ xfs_dqtrace_entry(dqp, "Q_SETQLIM: AFT DQGET");
+ xfs_trans_dqjoin(tp, dqp);
+ ddq = &dqp->q_core;
+
+ /*
+ * Make sure that hardlimits are >= soft limits before changing.
+ */
+ hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
+ (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
+ INT_GET(ddq->d_blk_hardlimit, ARCH_CONVERT);
+ soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
+ (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
+ INT_GET(ddq->d_blk_softlimit, ARCH_CONVERT);
+ if (hard == 0 || hard >= soft) {
+ INT_SET(ddq->d_blk_hardlimit, ARCH_CONVERT, hard);
+ INT_SET(ddq->d_blk_softlimit, ARCH_CONVERT, soft);
+ if (id == 0) {
+ mp->m_quotainfo->qi_bhardlimit = hard;
+ mp->m_quotainfo->qi_bsoftlimit = soft;
+ }
+ } else {
+ qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft);
+ }
+ hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
+ (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
+ INT_GET(ddq->d_rtb_hardlimit, ARCH_CONVERT);
+ soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
+ (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
+ INT_GET(ddq->d_rtb_softlimit, ARCH_CONVERT);
+ if (hard == 0 || hard >= soft) {
+ INT_SET(ddq->d_rtb_hardlimit, ARCH_CONVERT, hard);
+ INT_SET(ddq->d_rtb_softlimit, ARCH_CONVERT, soft);
+ if (id == 0) {
+ mp->m_quotainfo->qi_rtbhardlimit = hard;
+ mp->m_quotainfo->qi_rtbsoftlimit = soft;
+ }
+ } else {
+ qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
+ }
+
+ hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
+ (xfs_qcnt_t) newlim->d_ino_hardlimit :
+ INT_GET(ddq->d_ino_hardlimit, ARCH_CONVERT);
+ soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
+ (xfs_qcnt_t) newlim->d_ino_softlimit :
+ INT_GET(ddq->d_ino_softlimit, ARCH_CONVERT);
+ if (hard == 0 || hard >= soft) {
+ INT_SET(ddq->d_ino_hardlimit, ARCH_CONVERT, hard);
+ INT_SET(ddq->d_ino_softlimit, ARCH_CONVERT, soft);
+ if (id == 0) {
+ mp->m_quotainfo->qi_ihardlimit = hard;
+ mp->m_quotainfo->qi_isoftlimit = soft;
+ }
+ } else {
+ qdprintk("ihard %Ld < isoft %Ld\n", hard, soft);
+ }
+
+ if (id == 0) {
+ /*
+ * Timelimits for the super user set the relative time
+ * the other users can be over quota for this file system.
+ * If it is zero a default is used. Ditto for the default
+ * soft and hard limit values (already done, above).
+ */
+ if (newlim->d_fieldmask & FS_DQ_BTIMER) {
+ mp->m_quotainfo->qi_btimelimit = newlim->d_btimer;
+ INT_SET(ddq->d_btimer, ARCH_CONVERT, newlim->d_btimer);
+ }
+ if (newlim->d_fieldmask & FS_DQ_ITIMER) {
+ mp->m_quotainfo->qi_itimelimit = newlim->d_itimer;
+ INT_SET(ddq->d_itimer, ARCH_CONVERT, newlim->d_itimer);
+ }
+ if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
+ mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer;
+ INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, newlim->d_rtbtimer);
+ }
+ } else /* if (XFS_IS_QUOTA_ENFORCED(mp)) */ {
+ /*
+ * If the user is now over quota, start the timelimit.
+ * The user will not be 'warned'.
+ * Note that we keep the timers ticking, whether enforcement
+ * is on or off. We don't really want to bother with iterating
+ * over all ondisk dquots and turning the timers on/off.
+ */
+ xfs_qm_adjust_dqtimers(mp, ddq);
+ }
+ dqp->dq_flags |= XFS_DQ_DIRTY;
+ xfs_trans_log_dquot(tp, dqp);
+
+ xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT");
+ xfs_trans_commit(tp, 0, NULL);
+ xfs_qm_dqprint(dqp);
+ xfs_qm_dqrele(dqp);
+ mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+
+ return (0);
+}
+
+STATIC int
+xfs_qm_scall_getquota(
+ xfs_mount_t *mp,
+ xfs_dqid_t id,
+ uint type,
+ fs_disk_quota_t *out)
+{
+ xfs_dquot_t *dqp;
+ int error;
+
+ /*
+ * Try to get the dquot. We don't want it allocated on disk, so
+ * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
+ * exist, we'll get ENOENT back.
+ */
+ if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
+ return (error);
+ }
+
+ xfs_dqtrace_entry(dqp, "Q_GETQUOTA SUCCESS");
+ /*
+ * If everything's NULL, this dquot doesn't quite exist as far as
+ * our utility programs are concerned.
+ */
+ if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+ xfs_qm_dqput(dqp);
+ return XFS_ERROR(ENOENT);
+ }
+ /* xfs_qm_dqprint(dqp); */
+ /*
+ * Convert the disk dquot to the exportable format
+ */
+ xfs_qm_export_dquot(mp, &dqp->q_core, out);
+ xfs_qm_dqput(dqp);
+ return (error ? XFS_ERROR(EFAULT) : 0);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff_end(
+ xfs_mount_t *mp,
+ xfs_qoff_logitem_t *startqoff,
+ uint flags)
+{
+ xfs_trans_t *tp;
+ int error;
+ xfs_qoff_logitem_t *qoffi;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
+
+ if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
+ 0, 0, XFS_DEFAULT_LOG_COUNT))) {
+ xfs_trans_cancel(tp, 0);
+ return (error);
+ }
+
+ qoffi = xfs_trans_get_qoff_item(tp, startqoff,
+ flags & XFS_ALL_QUOTA_ACCT);
+ xfs_trans_log_quotaoff_item(tp, qoffi);
+
+ /*
+ * We have to make sure that the transaction is secure on disk before we
+ * return and actually stop quota accounting. So, make it synchronous.
+ * We don't care about quotoff's performance.
+ */
+ xfs_trans_set_sync(tp);
+ error = xfs_trans_commit(tp, 0, NULL);
+ return (error);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff(
+ xfs_mount_t *mp,
+ xfs_qoff_logitem_t **qoffstartp,
+ uint flags)
+{
+ xfs_trans_t *tp;
+ int error;
+ unsigned long s;
+ xfs_qoff_logitem_t *qoffi=NULL;
+ uint oldsbqflag=0;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
+ if ((error = xfs_trans_reserve(tp, 0,
+ sizeof(xfs_qoff_logitem_t) * 2 +
+ mp->m_sb.sb_sectsize + 128,
+ 0,
+ 0,
+ XFS_DEFAULT_LOG_COUNT))) {
+ goto error0;
+ }
+
+ qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
+ xfs_trans_log_quotaoff_item(tp, qoffi);
+
+ s = XFS_SB_LOCK(mp);
+ oldsbqflag = mp->m_sb.sb_qflags;
+ mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
+ XFS_SB_UNLOCK(mp, s);
+
+ xfs_mod_sb(tp, XFS_SB_QFLAGS);
+
+ /*
+ * We have to make sure that the transaction is secure on disk before we
+ * return and actually stop quota accounting. So, make it synchronous.
+ * We don't care about quotoff's performance.
+ */
+ xfs_trans_set_sync(tp);
+ error = xfs_trans_commit(tp, 0, NULL);
+
+error0:
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ /*
+ * No one else is modifying sb_qflags, so this is OK.
+ * We still hold the quotaofflock.
+ */
+ s = XFS_SB_LOCK(mp);
+ mp->m_sb.sb_qflags = oldsbqflag;
+ XFS_SB_UNLOCK(mp, s);
+ }
+ *qoffstartp = qoffi;
+ return (error);
+}
+
+
+/*
+ * Translate an internal style on-disk-dquot to the exportable format.
+ * The main differences are that the counters/limits are all in Basic
+ * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
+ * to be converted to the native endianness.
+ */
+STATIC void
+xfs_qm_export_dquot(
+ xfs_mount_t *mp,
+ xfs_disk_dquot_t *src,
+ struct fs_disk_quota *dst)
+{
+ memset(dst, 0, sizeof(*dst));
+ dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */
+ dst->d_flags =
+ xfs_qm_export_qtype_flags(INT_GET(src->d_flags, ARCH_CONVERT));
+ dst->d_id = INT_GET(src->d_id, ARCH_CONVERT);
+ dst->d_blk_hardlimit = (__uint64_t)
+ XFS_FSB_TO_BB(mp, INT_GET(src->d_blk_hardlimit, ARCH_CONVERT));
+ dst->d_blk_softlimit = (__uint64_t)
+ XFS_FSB_TO_BB(mp, INT_GET(src->d_blk_softlimit, ARCH_CONVERT));
+ dst->d_ino_hardlimit = (__uint64_t)
+ INT_GET(src->d_ino_hardlimit, ARCH_CONVERT);
+ dst->d_ino_softlimit = (__uint64_t)
+ INT_GET(src->d_ino_softlimit, ARCH_CONVERT);
+ dst->d_bcount = (__uint64_t)
+ XFS_FSB_TO_BB(mp, INT_GET(src->d_bcount, ARCH_CONVERT));
+ dst->d_icount = (__uint64_t) INT_GET(src->d_icount, ARCH_CONVERT);
+ dst->d_btimer = (__uint32_t) INT_GET(src->d_btimer, ARCH_CONVERT);
+ dst->d_itimer = (__uint32_t) INT_GET(src->d_itimer, ARCH_CONVERT);
+ dst->d_iwarns = INT_GET(src->d_iwarns, ARCH_CONVERT);
+ dst->d_bwarns = INT_GET(src->d_bwarns, ARCH_CONVERT);
+
+ dst->d_rtb_hardlimit = (__uint64_t)
+ XFS_FSB_TO_BB(mp, INT_GET(src->d_rtb_hardlimit, ARCH_CONVERT));
+ dst->d_rtb_softlimit = (__uint64_t)
+ XFS_FSB_TO_BB(mp, INT_GET(src->d_rtb_softlimit, ARCH_CONVERT));
+ dst->d_rtbcount = (__uint64_t)
+ XFS_FSB_TO_BB(mp, INT_GET(src->d_rtbcount, ARCH_CONVERT));
+ dst->d_rtbtimer = (__uint32_t) INT_GET(src->d_rtbtimer, ARCH_CONVERT);
+ dst->d_rtbwarns = INT_GET(src->d_rtbwarns, ARCH_CONVERT);
+
+ /*
+ * Internally, we don't reset all the timers when quota enforcement
+ * gets turned off. No need to confuse the userlevel code,
+ * so return zeroes in that case.
+ */
+ if (! XFS_IS_QUOTA_ENFORCED(mp)) {
+ dst->d_btimer = 0;
+ dst->d_itimer = 0;
+ dst->d_rtbtimer = 0;
+ }
+
+#ifdef DEBUG
+ if (XFS_IS_QUOTA_ENFORCED(mp) && dst->d_id != 0) {
+ if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
+ (dst->d_blk_softlimit > 0)) {
+ ASSERT(dst->d_btimer != 0);
+ }
+ if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
+ (dst->d_ino_softlimit > 0)) {
+ ASSERT(dst->d_itimer != 0);
+ }
+ }
+#endif
+}
+
+STATIC uint
+xfs_qm_import_qtype_flags(
+ uint uflags)
+{
+ /*
+ * Can't be both at the same time.
+ */
+ if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ==
+ (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ||
+ ((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) == 0))
+ return (0);
+
+ return (uflags & XFS_USER_QUOTA) ?
+ XFS_DQ_USER : XFS_DQ_GROUP;
+}
+
+STATIC uint
+xfs_qm_export_qtype_flags(
+ uint flags)
+{
+ /*
+ * Can't be both at the same time.
+ */
+ ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) !=
+ (XFS_GROUP_QUOTA | XFS_USER_QUOTA));
+ ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) != 0);
+
+ return (flags & XFS_DQ_USER) ?
+ XFS_USER_QUOTA : XFS_GROUP_QUOTA;
+}
+
+STATIC uint
+xfs_qm_import_flags(
+ uint uflags)
+{
+ uint flags = 0;
+
+ if (uflags & XFS_QUOTA_UDQ_ACCT)
+ flags |= XFS_UQUOTA_ACCT;
+ if (uflags & XFS_QUOTA_GDQ_ACCT)
+ flags |= XFS_GQUOTA_ACCT;
+ if (uflags & XFS_QUOTA_UDQ_ENFD)
+ flags |= XFS_UQUOTA_ENFD;
+ if (uflags & XFS_QUOTA_GDQ_ENFD)
+ flags |= XFS_GQUOTA_ENFD;
+ return (flags);
+}
+
+
+STATIC uint
+xfs_qm_export_flags(
+ uint flags)
+{
+ uint uflags;
+
+ uflags = 0;
+ if (flags & XFS_UQUOTA_ACCT)
+ uflags |= XFS_QUOTA_UDQ_ACCT;
+ if (flags & XFS_GQUOTA_ACCT)
+ uflags |= XFS_QUOTA_GDQ_ACCT;
+ if (flags & XFS_UQUOTA_ENFD)
+ uflags |= XFS_QUOTA_UDQ_ENFD;
+ if (flags & XFS_GQUOTA_ENFD)
+ uflags |= XFS_QUOTA_GDQ_ENFD;
+ return (uflags);
+}
+
+
+/*
+ * Go thru all the inodes in the file system, releasing their dquots.
+ * Note that the mount structure gets modified to indicate that quotas are off
+ * AFTER this, in the case of quotaoff. This also gets called from
+ * xfs_rootumount.
+ */
+void
+xfs_qm_dqrele_all_inodes(
+ struct xfs_mount *mp,
+ uint flags)
+{
+ vmap_t vmap;
+ xfs_inode_t *ip, *topino;
+ uint ireclaims;
+ vnode_t *vp;
+ boolean_t vnode_refd;
+
+ ASSERT(mp->m_quotainfo);
+
+again:
+ XFS_MOUNT_ILOCK(mp);
+ ip = mp->m_inodes;
+ if (ip == NULL) {
+ XFS_MOUNT_IUNLOCK(mp);
+ return;
+ }
+ do {
+ /* Skip markers inserted by xfs_sync */
+ if (ip->i_mount == NULL) {
+ ip = ip->i_mnext;
+ continue;
+ }
+ /* Root inode, rbmip and rsumip have associated blocks */
+ if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
+ ASSERT(ip->i_udquot == NULL);
+ ASSERT(ip->i_gdquot == NULL);
+ ip = ip->i_mnext;
+ continue;
+ }
+ vp = XFS_ITOV_NULL(ip);
+ if (!vp) {
+ ASSERT(ip->i_udquot == NULL);
+ ASSERT(ip->i_gdquot == NULL);
+ ip = ip->i_mnext;
+ continue;
+ }
+ vnode_refd = B_FALSE;
+ if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
+ /*
+ * Sample vp mapping while holding the mplock, lest
+ * we come across a non-existent vnode.
+ */
+ VMAP(vp, vmap);
+ ireclaims = mp->m_ireclaims;
+ topino = mp->m_inodes;
+ XFS_MOUNT_IUNLOCK(mp);
+
+ /* XXX restart limit ? */
+ if ( ! (vp = vn_get(vp, &vmap)))
+ goto again;
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ vnode_refd = B_TRUE;
+ } else {
+ ireclaims = mp->m_ireclaims;
+ topino = mp->m_inodes;
+ XFS_MOUNT_IUNLOCK(mp);
+ }
+
+ /*
+ * We don't keep the mountlock across the dqrele() call,
+ * since it can take a while..
+ */
+ if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+ xfs_qm_dqrele(ip->i_udquot);
+ ip->i_udquot = NULL;
+ }
+ if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+ xfs_qm_dqrele(ip->i_gdquot);
+ ip->i_gdquot = NULL;
+ }
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ /*
+ * Wait until we've dropped the ilock and mountlock to
+ * do the vn_rele. Or be condemned to an eternity in the
+ * inactive code in hell.
+ */
+ if (vnode_refd)
+ VN_RELE(vp);
+ XFS_MOUNT_ILOCK(mp);
+ /*
+ * If an inode was inserted or removed, we gotta
+ * start over again.
+ */
+ if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) {
+ /* XXX use a sentinel */
+ XFS_MOUNT_IUNLOCK(mp);
+ goto again;
+ }
+ ip = ip->i_mnext;
+ } while (ip != mp->m_inodes);
+
+ XFS_MOUNT_IUNLOCK(mp);
+}
+
+/*------------------------------------------------------------------------*/
+#ifdef DEBUG
+/*
+ * This contains all the test functions for XFS disk quotas.
+ * Currently it does a quota accounting check. ie. it walks through
+ * all inodes in the file system, calculating the dquot accounting fields,
+ * and prints out any inconsistencies.
+ */
+xfs_dqhash_t *qmtest_udqtab;
+xfs_dqhash_t *qmtest_gdqtab;
+int qmtest_hashmask;
+int qmtest_nfails;
+mutex_t qcheck_lock;
+
+#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
+ (__psunsigned_t)(id)) & \
+ (qmtest_hashmask - 1))
+
+#define DQTEST_HASH(mp, id, type) ((type & XFS_DQ_USER) ? \
+ (qmtest_udqtab + \
+ DQTEST_HASHVAL(mp, id)) : \
+ (qmtest_gdqtab + \
+ DQTEST_HASHVAL(mp, id)))
+
+#define DQTEST_LIST_PRINT(l, NXT, title) \
+{ \
+ xfs_dqtest_t *dqp; int i = 0;\
+ cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
+ for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
+ dqp = (xfs_dqtest_t *)dqp->NXT) { \
+ cmn_err(CE_DEBUG, " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \
+ ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \
+ dqp->d_bcount, dqp->d_icount); } \
+}
+
+typedef struct dqtest {
+ xfs_dqmarker_t q_lists;
+ xfs_dqhash_t *q_hash; /* the hashchain header */
+ xfs_mount_t *q_mount; /* filesystem this relates to */
+ xfs_dqid_t d_id; /* user id or group id */
+ xfs_qcnt_t d_bcount; /* # disk blocks owned by the user */
+ xfs_qcnt_t d_icount; /* # inodes owned by the user */
+} xfs_dqtest_t;
+
+STATIC void
+xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
+{
+ xfs_dquot_t *d;
+ if (((d) = (h)->qh_next))
+ (d)->HL_PREVP = &((dqp)->HL_NEXT);
+ (dqp)->HL_NEXT = d;
+ (dqp)->HL_PREVP = &((h)->qh_next);
+ (h)->qh_next = (xfs_dquot_t *)dqp;
+ (h)->qh_version++;
+ (h)->qh_nelems++;
+}
+STATIC void
+xfs_qm_dqtest_print(
+ xfs_dqtest_t *d)
+{
+ cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------");
+ cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id);
+ cmn_err(CE_DEBUG, "---- type = %s", XFS_QM_ISUDQ(d)? "USR" : "GRP");
+ cmn_err(CE_DEBUG, "---- fs = 0x%p", d->q_mount);
+ cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)",
+ d->d_bcount, (int)d->d_bcount);
+ cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)",
+ d->d_icount, (int)d->d_icount);
+ cmn_err(CE_DEBUG, "---------------------------");
+}
+
+STATIC void
+xfs_qm_dqtest_failed(
+ xfs_dqtest_t *d,
+ xfs_dquot_t *dqp,
+ char *reason,
+ xfs_qcnt_t a,
+ xfs_qcnt_t b,
+ int error)
+{
+ qmtest_nfails++;
+ if (error)
+ cmn_err(CE_DEBUG, "quotacheck failed id=%d, err=%d\nreason: %s",
+ INT_GET(d->d_id, ARCH_CONVERT), error, reason);
+ else
+ cmn_err(CE_DEBUG, "quotacheck failed id=%d (%s) [%d != %d]",
+ INT_GET(d->d_id, ARCH_CONVERT), reason, (int)a, (int)b);
+ xfs_qm_dqtest_print(d);
+ if (dqp)
+ xfs_qm_dqprint(dqp);
+}
+
+STATIC int
+xfs_dqtest_cmp2(
+ xfs_dqtest_t *d,
+ xfs_dquot_t *dqp)
+{
+ int err = 0;
+ if (INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) != d->d_icount) {
+ xfs_qm_dqtest_failed(d, dqp, "icount mismatch",
+ INT_GET(dqp->q_core.d_icount, ARCH_CONVERT),
+ d->d_icount, 0);
+ err++;
+ }
+ if (INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT) != d->d_bcount) {
+ xfs_qm_dqtest_failed(d, dqp, "bcount mismatch",
+ INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT),
+ d->d_bcount, 0);
+ err++;
+ }
+ if (INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT) &&
+ INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT) >=
+ INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT)) {
+ if (!dqp->q_core.d_btimer && dqp->q_core.d_id) {
+ cmn_err(CE_DEBUG,
+ "%d [%s] [0x%p] BLK TIMER NOT STARTED",
+ d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount);
+ err++;
+ }
+ }
+ if (INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT) &&
+ INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >=
+ INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT)) {
+ if (!dqp->q_core.d_itimer && dqp->q_core.d_id) {
+ cmn_err(CE_DEBUG,
+ "%d [%s] [0x%p] INO TIMER NOT STARTED",
+ d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount);
+ err++;
+ }
+ }
+#ifdef QUOTADEBUG
+ if (!err) {
+ cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked",
+ d->d_id, XFS_QM_ISUDQ(d) ? "USR" : "GRP", d->q_mount);
+ }
+#endif
+ return (err);
+}
+
+STATIC void
+xfs_dqtest_cmp(
+ xfs_dqtest_t *d)
+{
+ xfs_dquot_t *dqp;
+ int error;
+
+ /* xfs_qm_dqtest_print(d); */
+ if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0,
+ &dqp))) {
+ xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error);
+ return;
+ }
+ xfs_dqtest_cmp2(d, dqp);
+ xfs_qm_dqput(dqp);
+}
+
+STATIC int
+xfs_qm_internalqcheck_dqget(
+ xfs_mount_t *mp,
+ xfs_dqid_t id,
+ uint type,
+ xfs_dqtest_t **O_dq)
+{
+ xfs_dqtest_t *d;
+ xfs_dqhash_t *h;
+
+ h = DQTEST_HASH(mp, id, type);
+ for (d = (xfs_dqtest_t *) h->qh_next; d != NULL;
+ d = (xfs_dqtest_t *) d->HL_NEXT) {
+ /* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */
+ if (d->d_id == id && mp == d->q_mount) {
+ *O_dq = d;
+ return (0);
+ }
+ }
+ d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP);
+ d->dq_flags = type;
+ d->d_id = id;
+ d->q_mount = mp;
+ d->q_hash = h;
+ xfs_qm_hashinsert(h, d);
+ *O_dq = d;
+ return (0);
+}
+
+STATIC void
+xfs_qm_internalqcheck_get_dquots(
+ xfs_mount_t *mp,
+ xfs_dqid_t uid,
+ xfs_dqid_t gid,
+ xfs_dqtest_t **ud,
+ xfs_dqtest_t **gd)
+{
+ if (XFS_IS_UQUOTA_ON(mp))
+ xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud);
+ if (XFS_IS_GQUOTA_ON(mp))
+ xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd);
+}
+
+
+STATIC void
+xfs_qm_internalqcheck_dqadjust(
+ xfs_inode_t *ip,
+ xfs_dqtest_t *d)
+{
+ d->d_icount++;
+ d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks;
+}
+
+STATIC int
+xfs_qm_internalqcheck_adjust(
+ xfs_mount_t *mp, /* mount point for filesystem */
+ xfs_ino_t ino, /* inode number to get data for */
+ void __user *buffer, /* not used */
+ int ubsize, /* not used */
+ void *private_data, /* not used */
+ xfs_daddr_t bno, /* starting block of inode cluster */
+ int *ubused, /* not used */
+ void *dip, /* not used */
+ int *res) /* bulkstat result code */
+{
+ xfs_inode_t *ip;
+ xfs_dqtest_t *ud, *gd;
+ uint lock_flags;
+ boolean_t ipreleased;
+ int error;
+
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+ if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
+ *res = BULKSTAT_RV_NOTHING;
+ qdprintk("internalqcheck: ino=%llu, uqino=%llu, gqino=%llu\n",
+ (unsigned long long) ino,
+ (unsigned long long) mp->m_sb.sb_uquotino,
+ (unsigned long long) mp->m_sb.sb_gquotino);
+ return XFS_ERROR(EINVAL);
+ }
+ ipreleased = B_FALSE;
+ again:
+ lock_flags = XFS_ILOCK_SHARED;
+ if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) {
+ *res = BULKSTAT_RV_NOTHING;
+ return (error);
+ }
+
+ if (ip->i_d.di_mode == 0) {
+ xfs_iput_new(ip, lock_flags);
+ *res = BULKSTAT_RV_NOTHING;
+ return XFS_ERROR(ENOENT);
+ }
+
+ /*
+ * This inode can have blocks after eof which can get released
+ * when we send it to inactive. Since we don't check the dquot
+ * until the after all our calculations are done, we must get rid
+ * of those now.
+ */
+ if (! ipreleased) {
+ xfs_iput(ip, lock_flags);
+ ipreleased = B_TRUE;
+ goto again;
+ }
+ xfs_qm_internalqcheck_get_dquots(mp,
+ (xfs_dqid_t) ip->i_d.di_uid,
+ (xfs_dqid_t) ip->i_d.di_gid,
+ &ud, &gd);
+ if (XFS_IS_UQUOTA_ON(mp)) {
+ ASSERT(ud);
+ xfs_qm_internalqcheck_dqadjust(ip, ud);
+ }
+ if (XFS_IS_GQUOTA_ON(mp)) {
+ ASSERT(gd);
+ xfs_qm_internalqcheck_dqadjust(ip, gd);
+ }
+ xfs_iput(ip, lock_flags);
+ *res = BULKSTAT_RV_DIDONE;
+ return (0);
+}
+
+
+/* PRIVATE, debugging */
+int
+xfs_qm_internalqcheck(
+ xfs_mount_t *mp)
+{
+ xfs_ino_t lastino;
+ int done, count;
+ int i;
+ xfs_dqtest_t *d, *e;
+ xfs_dqhash_t *h1;
+ int error;
+
+ lastino = 0;
+ qmtest_hashmask = 32;
+ count = 5;
+ done = 0;
+ qmtest_nfails = 0;
+
+ if (! XFS_IS_QUOTA_ON(mp))
+ return XFS_ERROR(ESRCH);
+
+ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
+ XFS_bflush(mp->m_ddev_targp);
+ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
+ XFS_bflush(mp->m_ddev_targp);
+
+ mutex_lock(&qcheck_lock, PINOD);
+ /* There should be absolutely no quota activity while this
+ is going on. */
+ qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
+ sizeof(xfs_dqhash_t), KM_SLEEP);
+ qmtest_gdqtab = kmem_zalloc(qmtest_hashmask *
+ sizeof(xfs_dqhash_t), KM_SLEEP);
+ do {
+ /*
+ * Iterate thru all the inodes in the file system,
+ * adjusting the corresponding dquot counters
+ */
+ if ((error = xfs_bulkstat(mp, &lastino, &count,
+ xfs_qm_internalqcheck_adjust, NULL,
+ 0, NULL, BULKSTAT_FG_IGET, &done))) {
+ break;
+ }
+ } while (! done);
+ if (error) {
+ cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
+ }
+ cmn_err(CE_DEBUG, "Checking results against system dquots");
+ for (i = 0; i < qmtest_hashmask; i++) {
+ h1 = &qmtest_udqtab[i];
+ for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
+ xfs_dqtest_cmp(d);
+ e = (xfs_dqtest_t *) d->HL_NEXT;
+ kmem_free(d, sizeof(xfs_dqtest_t));
+ d = e;
+ }
+ h1 = &qmtest_gdqtab[i];
+ for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
+ xfs_dqtest_cmp(d);
+ e = (xfs_dqtest_t *) d->HL_NEXT;
+ kmem_free(d, sizeof(xfs_dqtest_t));
+ d = e;
+ }
+ }
+
+ if (qmtest_nfails) {
+ cmn_err(CE_DEBUG, "******** quotacheck failed ********");
+ cmn_err(CE_DEBUG, "failures = %d", qmtest_nfails);
+ } else {
+ cmn_err(CE_DEBUG, "******** quotacheck successful! ********");
+ }
+ kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
+ kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
+ mutex_unlock(&qcheck_lock);
+ return (qmtest_nfails);
+}
+
+#endif /* DEBUG */
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
new file mode 100644
index 000000000000..414b6004af21
--- /dev/null
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_QUOTA_PRIV_H__
+#define __XFS_QUOTA_PRIV_H__
+
+/*
+ * Number of bmaps that we ask from bmapi when doing a quotacheck.
+ * We make this restriction to keep the memory usage to a minimum.
+ */
+#define XFS_DQITER_MAP_SIZE 10
+
+/* Number of dquots that fit in to a dquot block */
+#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk)
+
+#define XFS_ISLOCKED_INODE(ip) (ismrlocked(&(ip)->i_lock, \
+ MR_UPDATE | MR_ACCESS) != 0)
+#define XFS_ISLOCKED_INODE_EXCL(ip) (ismrlocked(&(ip)->i_lock, \
+ MR_UPDATE) != 0)
+
+#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t))
+
+#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims)
+#define XFS_QI_UQIP(mp) ((mp)->m_quotainfo->qi_uquotaip)
+#define XFS_QI_GQIP(mp) ((mp)->m_quotainfo->qi_gquotaip)
+#define XFS_QI_DQCHUNKLEN(mp) ((mp)->m_quotainfo->qi_dqchunklen)
+#define XFS_QI_BTIMELIMIT(mp) ((mp)->m_quotainfo->qi_btimelimit)
+#define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit)
+#define XFS_QI_ITIMELIMIT(mp) ((mp)->m_quotainfo->qi_itimelimit)
+#define XFS_QI_BWARNLIMIT(mp) ((mp)->m_quotainfo->qi_bwarnlimit)
+#define XFS_QI_IWARNLIMIT(mp) ((mp)->m_quotainfo->qi_iwarnlimit)
+#define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock)
+
+#define XFS_QI_MPL_LIST(mp) ((mp)->m_quotainfo->qi_dqlist)
+#define XFS_QI_MPLLOCK(mp) ((mp)->m_quotainfo->qi_dqlist.qh_lock)
+#define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next)
+#define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems)
+
+#define XQMLCK(h) (mutex_lock(&((h)->qh_lock), PINOD))
+#define XQMUNLCK(h) (mutex_unlock(&((h)->qh_lock)))
+#ifdef DEBUG
+struct xfs_dqhash;
+static inline int XQMISLCKD(struct xfs_dqhash *h)
+{
+ if (mutex_trylock(&h->qh_lock)) {
+ mutex_unlock(&h->qh_lock);
+ return 0;
+ }
+ return 1;
+}
+#endif
+
+#define XFS_DQ_HASH_LOCK(h) XQMLCK(h)
+#define XFS_DQ_HASH_UNLOCK(h) XQMUNLCK(h)
+#define XFS_DQ_IS_HASH_LOCKED(h) XQMISLCKD(h)
+
+#define xfs_qm_mplist_lock(mp) XQMLCK(&(XFS_QI_MPL_LIST(mp)))
+#define xfs_qm_mplist_unlock(mp) XQMUNLCK(&(XFS_QI_MPL_LIST(mp)))
+#define XFS_QM_IS_MPLIST_LOCKED(mp) XQMISLCKD(&(XFS_QI_MPL_LIST(mp)))
+
+#define xfs_qm_freelist_lock(qm) XQMLCK(&((qm)->qm_dqfreelist))
+#define xfs_qm_freelist_unlock(qm) XQMUNLCK(&((qm)->qm_dqfreelist))
+#define XFS_QM_IS_FREELIST_LOCKED(qm) XQMISLCKD(&((qm)->qm_dqfreelist))
+
+/*
+ * Hash into a bucket in the dquot hash table, based on <mp, id>.
+ */
+#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
+ (__psunsigned_t)(id)) & \
+ (xfs_Gqm->qm_dqhashmask - 1))
+#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \
+ (xfs_Gqm->qm_usr_dqhtable + \
+ XFS_DQ_HASHVAL(mp, id)) : \
+ (xfs_Gqm->qm_grp_dqhtable + \
+ XFS_DQ_HASHVAL(mp, id)))
+#define XFS_IS_DQTYPE_ON(mp, type) (type == XFS_DQ_USER ? \
+ XFS_IS_UQUOTA_ON(mp):XFS_IS_GQUOTA_ON(mp))
+#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
+ !dqp->q_core.d_blk_hardlimit && \
+ !dqp->q_core.d_blk_softlimit && \
+ !dqp->q_core.d_rtb_hardlimit && \
+ !dqp->q_core.d_rtb_softlimit && \
+ !dqp->q_core.d_ino_hardlimit && \
+ !dqp->q_core.d_ino_softlimit && \
+ !dqp->q_core.d_bcount && \
+ !dqp->q_core.d_rtbcount && \
+ !dqp->q_core.d_icount)
+
+#define HL_PREVP dq_hashlist.ql_prevp
+#define HL_NEXT dq_hashlist.ql_next
+#define MPL_PREVP dq_mplist.ql_prevp
+#define MPL_NEXT dq_mplist.ql_next
+
+
+#define _LIST_REMOVE(h, dqp, PVP, NXT) \
+ { \
+ xfs_dquot_t *d; \
+ if (((d) = (dqp)->NXT)) \
+ (d)->PVP = (dqp)->PVP; \
+ *((dqp)->PVP) = d; \
+ (dqp)->NXT = NULL; \
+ (dqp)->PVP = NULL; \
+ (h)->qh_version++; \
+ (h)->qh_nelems--; \
+ }
+
+#define _LIST_INSERT(h, dqp, PVP, NXT) \
+ { \
+ xfs_dquot_t *d; \
+ if (((d) = (h)->qh_next)) \
+ (d)->PVP = &((dqp)->NXT); \
+ (dqp)->NXT = d; \
+ (dqp)->PVP = &((h)->qh_next); \
+ (h)->qh_next = dqp; \
+ (h)->qh_version++; \
+ (h)->qh_nelems++; \
+ }
+
+#define FOREACH_DQUOT_IN_MP(dqp, mp) \
+ for ((dqp) = XFS_QI_MPLNEXT(mp); (dqp) != NULL; (dqp) = (dqp)->MPL_NEXT)
+
+#define FOREACH_DQUOT_IN_FREELIST(dqp, qlist) \
+for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
+ (dqp) = (dqp)->dq_flnext)
+
+#define XQM_HASHLIST_INSERT(h, dqp) \
+ _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT)
+
+#define XQM_FREELIST_INSERT(h, dqp) \
+ xfs_qm_freelist_append(h, dqp)
+
+#define XQM_MPLIST_INSERT(h, dqp) \
+ _LIST_INSERT(h, dqp, MPL_PREVP, MPL_NEXT)
+
+#define XQM_HASHLIST_REMOVE(h, dqp) \
+ _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT)
+#define XQM_FREELIST_REMOVE(dqp) \
+ xfs_qm_freelist_unlink(dqp)
+#define XQM_MPLIST_REMOVE(h, dqp) \
+ { _LIST_REMOVE(h, dqp, MPL_PREVP, MPL_NEXT); \
+ XFS_QI_MPLRECLAIMS((dqp)->q_mount)++; }
+
+#define XFS_DQ_IS_LOGITEM_INITD(dqp) ((dqp)->q_logitem.qli_dquot == (dqp))
+
+#define XFS_QM_DQP_TO_DQACCT(tp, dqp) (XFS_QM_ISUDQ(dqp) ? \
+ (tp)->t_dqinfo->dqa_usrdquots : \
+ (tp)->t_dqinfo->dqa_grpdquots)
+#define XFS_IS_SUSER_DQUOT(dqp) \
+ (!((dqp)->q_core.d_id))
+
+#define XFS_PURGE_INODE(ip) \
+ { \
+ vmap_t dqvmap; \
+ vnode_t *dqvp; \
+ dqvp = XFS_ITOV(ip); \
+ VMAP(dqvp, dqvmap); \
+ VN_RELE(dqvp); \
+ }
+
+#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
+ (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : "???"))
+#define DQFLAGTO_DIRTYSTR(d) (XFS_DQ_IS_DIRTY(d) ? "DIRTY" : "NOTDIRTY")
+
+#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
new file mode 100644
index 000000000000..149b2a1fd949
--- /dev/null
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -0,0 +1,941 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+
+#include "xfs_qm.h"
+
+STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *);
+
+/*
+ * Add the locked dquot to the transaction.
+ * The dquot must be locked, and it cannot be associated with any
+ * transaction.
+ */
+void
+xfs_trans_dqjoin(
+ xfs_trans_t *tp,
+ xfs_dquot_t *dqp)
+{
+ xfs_dq_logitem_t *lp;
+
+ ASSERT(! XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ ASSERT(XFS_DQ_IS_LOGITEM_INITD(dqp));
+ lp = &dqp->q_logitem;
+
+ /*
+ * Get a log_item_desc to point at the new item.
+ */
+ (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(lp));
+
+ /*
+ * Initialize i_transp so we can later determine if this dquot is
+ * associated with this transaction.
+ */
+ dqp->q_transp = tp;
+}
+
+
+/*
+ * This is called to mark the dquot as needing
+ * to be logged when the transaction is committed. The dquot must
+ * already be associated with the given transaction.
+ * Note that it marks the entire transaction as dirty. In the ordinary
+ * case, this gets called via xfs_trans_commit, after the transaction
+ * is already dirty. However, there's nothing stop this from getting
+ * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
+ * flag.
+ */
+void
+xfs_trans_log_dquot(
+ xfs_trans_t *tp,
+ xfs_dquot_t *dqp)
+{
+ xfs_log_item_desc_t *lidp;
+
+ ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+ lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem));
+ ASSERT(lidp != NULL);
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ lidp->lid_flags |= XFS_LID_DIRTY;
+}
+
+/*
+ * Carry forward whatever is left of the quota blk reservation to
+ * the spanky new transaction
+ */
+STATIC void
+xfs_trans_dup_dqinfo(
+ xfs_trans_t *otp,
+ xfs_trans_t *ntp)
+{
+ xfs_dqtrx_t *oq, *nq;
+ int i,j;
+ xfs_dqtrx_t *oqa, *nqa;
+
+ if (!otp->t_dqinfo)
+ return;
+
+ xfs_trans_alloc_dqinfo(ntp);
+ oqa = otp->t_dqinfo->dqa_usrdquots;
+ nqa = ntp->t_dqinfo->dqa_usrdquots;
+
+ /*
+ * Because the quota blk reservation is carried forward,
+ * it is also necessary to carry forward the DQ_DIRTY flag.
+ */
+ if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
+ ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
+
+ for (j = 0; j < 2; j++) {
+ for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+ if (oqa[i].qt_dquot == NULL)
+ break;
+ oq = &oqa[i];
+ nq = &nqa[i];
+
+ nq->qt_dquot = oq->qt_dquot;
+ nq->qt_bcount_delta = nq->qt_icount_delta = 0;
+ nq->qt_rtbcount_delta = 0;
+
+ /*
+ * Transfer whatever is left of the reservations.
+ */
+ nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
+ oq->qt_blk_res = oq->qt_blk_res_used;
+
+ nq->qt_rtblk_res = oq->qt_rtblk_res -
+ oq->qt_rtblk_res_used;
+ oq->qt_rtblk_res = oq->qt_rtblk_res_used;
+
+ nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
+ oq->qt_ino_res = oq->qt_ino_res_used;
+
+ }
+ oqa = otp->t_dqinfo->dqa_grpdquots;
+ nqa = ntp->t_dqinfo->dqa_grpdquots;
+ }
+}
+
+/*
+ * Wrap around mod_dquot to account for both user and group quotas.
+ */
+void
+xfs_trans_mod_dquot_byino(
+ xfs_trans_t *tp,
+ xfs_inode_t *ip,
+ uint field,
+ long delta)
+{
+ xfs_mount_t *mp;
+
+ ASSERT(tp);
+ mp = tp->t_mountp;
+
+ if (!XFS_IS_QUOTA_ON(mp) ||
+ ip->i_ino == mp->m_sb.sb_uquotino ||
+ ip->i_ino == mp->m_sb.sb_gquotino)
+ return;
+
+ if (tp->t_dqinfo == NULL)
+ xfs_trans_alloc_dqinfo(tp);
+
+ if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) {
+ (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
+ }
+ if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) {
+ (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
+ }
+}
+
+STATIC xfs_dqtrx_t *
+xfs_trans_get_dqtrx(
+ xfs_trans_t *tp,
+ xfs_dquot_t *dqp)
+{
+ int i;
+ xfs_dqtrx_t *qa;
+
+ for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+ qa = XFS_QM_DQP_TO_DQACCT(tp, dqp);
+
+ if (qa[i].qt_dquot == NULL ||
+ qa[i].qt_dquot == dqp) {
+ return (&qa[i]);
+ }
+ }
+
+ return (NULL);
+}
+
+/*
+ * Make the changes in the transaction structure.
+ * The moral equivalent to xfs_trans_mod_sb().
+ * We don't touch any fields in the dquot, so we don't care
+ * if it's locked or not (most of the time it won't be).
+ */
+void
+xfs_trans_mod_dquot(
+ xfs_trans_t *tp,
+ xfs_dquot_t *dqp,
+ uint field,
+ long delta)
+{
+ xfs_dqtrx_t *qtrx;
+
+ ASSERT(tp);
+ qtrx = NULL;
+
+ if (tp->t_dqinfo == NULL)
+ xfs_trans_alloc_dqinfo(tp);
+ /*
+ * Find either the first free slot or the slot that belongs
+ * to this dquot.
+ */
+ qtrx = xfs_trans_get_dqtrx(tp, dqp);
+ ASSERT(qtrx);
+ if (qtrx->qt_dquot == NULL)
+ qtrx->qt_dquot = dqp;
+
+ switch (field) {
+
+ /*
+ * regular disk blk reservation
+ */
+ case XFS_TRANS_DQ_RES_BLKS:
+ qtrx->qt_blk_res += (ulong)delta;
+ break;
+
+ /*
+ * inode reservation
+ */
+ case XFS_TRANS_DQ_RES_INOS:
+ qtrx->qt_ino_res += (ulong)delta;
+ break;
+
+ /*
+ * disk blocks used.
+ */
+ case XFS_TRANS_DQ_BCOUNT:
+ if (qtrx->qt_blk_res && delta > 0) {
+ qtrx->qt_blk_res_used += (ulong)delta;
+ ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
+ }
+ qtrx->qt_bcount_delta += delta;
+ break;
+
+ case XFS_TRANS_DQ_DELBCOUNT:
+ qtrx->qt_delbcnt_delta += delta;
+ break;
+
+ /*
+ * Inode Count
+ */
+ case XFS_TRANS_DQ_ICOUNT:
+ if (qtrx->qt_ino_res && delta > 0) {
+ qtrx->qt_ino_res_used += (ulong)delta;
+ ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
+ }
+ qtrx->qt_icount_delta += delta;
+ break;
+
+ /*
+ * rtblk reservation
+ */
+ case XFS_TRANS_DQ_RES_RTBLKS:
+ qtrx->qt_rtblk_res += (ulong)delta;
+ break;
+
+ /*
+ * rtblk count
+ */
+ case XFS_TRANS_DQ_RTBCOUNT:
+ if (qtrx->qt_rtblk_res && delta > 0) {
+ qtrx->qt_rtblk_res_used += (ulong)delta;
+ ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
+ }
+ qtrx->qt_rtbcount_delta += delta;
+ break;
+
+ case XFS_TRANS_DQ_DELRTBCOUNT:
+ qtrx->qt_delrtb_delta += delta;
+ break;
+
+ default:
+ ASSERT(0);
+ }
+ tp->t_flags |= XFS_TRANS_DQ_DIRTY;
+}
+
+
+/*
+ * Given an array of dqtrx structures, lock all the dquots associated
+ * and join them to the transaction, provided they have been modified.
+ * We know that the highest number of dquots (of one type - usr OR grp),
+ * involved in a transaction is 2 and that both usr and grp combined - 3.
+ * So, we don't attempt to make this very generic.
+ */
+STATIC void
+xfs_trans_dqlockedjoin(
+ xfs_trans_t *tp,
+ xfs_dqtrx_t *q)
+{
+ ASSERT(q[0].qt_dquot != NULL);
+ if (q[1].qt_dquot == NULL) {
+ xfs_dqlock(q[0].qt_dquot);
+ xfs_trans_dqjoin(tp, q[0].qt_dquot);
+ } else {
+ ASSERT(XFS_QM_TRANS_MAXDQS == 2);
+ xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
+ xfs_trans_dqjoin(tp, q[0].qt_dquot);
+ xfs_trans_dqjoin(tp, q[1].qt_dquot);
+ }
+}
+
+
+/*
+ * Called by xfs_trans_commit() and similar in spirit to
+ * xfs_trans_apply_sb_deltas().
+ * Go thru all the dquots belonging to this transaction and modify the
+ * INCORE dquot to reflect the actual usages.
+ * Unreserve just the reservations done by this transaction.
+ * dquot is still left locked at exit.
+ */
+void
+xfs_trans_apply_dquot_deltas(
+ xfs_trans_t *tp)
+{
+ int i, j;
+ xfs_dquot_t *dqp;
+ xfs_dqtrx_t *qtrx, *qa;
+ xfs_disk_dquot_t *d;
+ long totalbdelta;
+ long totalrtbdelta;
+
+ if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY))
+ return;
+
+ ASSERT(tp->t_dqinfo);
+ qa = tp->t_dqinfo->dqa_usrdquots;
+ for (j = 0; j < 2; j++) {
+ if (qa[0].qt_dquot == NULL) {
+ qa = tp->t_dqinfo->dqa_grpdquots;
+ continue;
+ }
+
+ /*
+ * Lock all of the dquots and join them to the transaction.
+ */
+ xfs_trans_dqlockedjoin(tp, qa);
+
+ for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+ qtrx = &qa[i];
+ /*
+ * The array of dquots is filled
+ * sequentially, not sparsely.
+ */
+ if ((dqp = qtrx->qt_dquot) == NULL)
+ break;
+
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
+
+ /*
+ * adjust the actual number of blocks used
+ */
+ d = &dqp->q_core;
+
+ /*
+ * The issue here is - sometimes we don't make a blkquota
+ * reservation intentionally to be fair to users
+ * (when the amount is small). On the other hand,
+ * delayed allocs do make reservations, but that's
+ * outside of a transaction, so we have no
+ * idea how much was really reserved.
+ * So, here we've accumulated delayed allocation blks and
+ * non-delay blks. The assumption is that the
+ * delayed ones are always reserved (outside of a
+ * transaction), and the others may or may not have
+ * quota reservations.
+ */
+ totalbdelta = qtrx->qt_bcount_delta +
+ qtrx->qt_delbcnt_delta;
+ totalrtbdelta = qtrx->qt_rtbcount_delta +
+ qtrx->qt_delrtb_delta;
+#ifdef QUOTADEBUG
+ if (totalbdelta < 0)
+ ASSERT(INT_GET(d->d_bcount, ARCH_CONVERT) >=
+ (xfs_qcnt_t) -totalbdelta);
+
+ if (totalrtbdelta < 0)
+ ASSERT(INT_GET(d->d_rtbcount, ARCH_CONVERT) >=
+ (xfs_qcnt_t) -totalrtbdelta);
+
+ if (qtrx->qt_icount_delta < 0)
+ ASSERT(INT_GET(d->d_icount, ARCH_CONVERT) >=
+ (xfs_qcnt_t) -qtrx->qt_icount_delta);
+#endif
+ if (totalbdelta)
+ INT_MOD(d->d_bcount, ARCH_CONVERT, (xfs_qcnt_t)totalbdelta);
+
+ if (qtrx->qt_icount_delta)
+ INT_MOD(d->d_icount, ARCH_CONVERT, (xfs_qcnt_t)qtrx->qt_icount_delta);
+
+ if (totalrtbdelta)
+ INT_MOD(d->d_rtbcount, ARCH_CONVERT, (xfs_qcnt_t)totalrtbdelta);
+
+ /*
+ * Get any default limits in use.
+ * Start/reset the timer(s) if needed.
+ */
+ if (d->d_id) {
+ xfs_qm_adjust_dqlimits(tp->t_mountp, d);
+ xfs_qm_adjust_dqtimers(tp->t_mountp, d);
+ }
+
+ dqp->dq_flags |= XFS_DQ_DIRTY;
+ /*
+ * add this to the list of items to get logged
+ */
+ xfs_trans_log_dquot(tp, dqp);
+ /*
+ * Take off what's left of the original reservation.
+ * In case of delayed allocations, there's no
+ * reservation that a transaction structure knows of.
+ */
+ if (qtrx->qt_blk_res != 0) {
+ if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
+ if (qtrx->qt_blk_res >
+ qtrx->qt_blk_res_used)
+ dqp->q_res_bcount -= (xfs_qcnt_t)
+ (qtrx->qt_blk_res -
+ qtrx->qt_blk_res_used);
+ else
+ dqp->q_res_bcount -= (xfs_qcnt_t)
+ (qtrx->qt_blk_res_used -
+ qtrx->qt_blk_res);
+ }
+ } else {
+ /*
+ * These blks were never reserved, either inside
+ * a transaction or outside one (in a delayed
+ * allocation). Also, this isn't always a
+ * negative number since we sometimes
+ * deliberately skip quota reservations.
+ */
+ if (qtrx->qt_bcount_delta) {
+ dqp->q_res_bcount +=
+ (xfs_qcnt_t)qtrx->qt_bcount_delta;
+ }
+ }
+ /*
+ * Adjust the RT reservation.
+ */
+ if (qtrx->qt_rtblk_res != 0) {
+ if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
+ if (qtrx->qt_rtblk_res >
+ qtrx->qt_rtblk_res_used)
+ dqp->q_res_rtbcount -= (xfs_qcnt_t)
+ (qtrx->qt_rtblk_res -
+ qtrx->qt_rtblk_res_used);
+ else
+ dqp->q_res_rtbcount -= (xfs_qcnt_t)
+ (qtrx->qt_rtblk_res_used -
+ qtrx->qt_rtblk_res);
+ }
+ } else {
+ if (qtrx->qt_rtbcount_delta)
+ dqp->q_res_rtbcount +=
+ (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
+ }
+
+ /*
+ * Adjust the inode reservation.
+ */
+ if (qtrx->qt_ino_res != 0) {
+ ASSERT(qtrx->qt_ino_res >=
+ qtrx->qt_ino_res_used);
+ if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
+ dqp->q_res_icount -= (xfs_qcnt_t)
+ (qtrx->qt_ino_res -
+ qtrx->qt_ino_res_used);
+ } else {
+ if (qtrx->qt_icount_delta)
+ dqp->q_res_icount +=
+ (xfs_qcnt_t)qtrx->qt_icount_delta;
+ }
+
+
+#ifdef QUOTADEBUG
+ if (qtrx->qt_rtblk_res != 0)
+ cmn_err(CE_DEBUG, "RT res %d for 0x%p\n",
+ (int) qtrx->qt_rtblk_res, dqp);
+#endif
+ ASSERT(dqp->q_res_bcount >=
+ INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
+ ASSERT(dqp->q_res_icount >=
+ INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
+ ASSERT(dqp->q_res_rtbcount >=
+ INT_GET(dqp->q_core.d_rtbcount, ARCH_CONVERT));
+ }
+ /*
+ * Do the group quotas next
+ */
+ qa = tp->t_dqinfo->dqa_grpdquots;
+ }
+}
+
+/*
+ * Release the reservations, and adjust the dquots accordingly.
+ * This is called only when the transaction is being aborted. If by
+ * any chance we have done dquot modifications incore (ie. deltas) already,
+ * we simply throw those away, since that's the expected behavior
+ * when a transaction is curtailed without a commit.
+ */
+STATIC void
+xfs_trans_unreserve_and_mod_dquots(
+ xfs_trans_t *tp)
+{
+ int i, j;
+ xfs_dquot_t *dqp;
+ xfs_dqtrx_t *qtrx, *qa;
+ boolean_t locked;
+
+ if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+ return;
+
+ qa = tp->t_dqinfo->dqa_usrdquots;
+
+ for (j = 0; j < 2; j++) {
+ for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+ qtrx = &qa[i];
+ /*
+ * We assume that the array of dquots is filled
+ * sequentially, not sparsely.
+ */
+ if ((dqp = qtrx->qt_dquot) == NULL)
+ break;
+ /*
+ * Unreserve the original reservation. We don't care
+ * about the number of blocks used field, or deltas.
+ * Also we don't bother to zero the fields.
+ */
+ locked = B_FALSE;
+ if (qtrx->qt_blk_res) {
+ xfs_dqlock(dqp);
+ locked = B_TRUE;
+ dqp->q_res_bcount -=
+ (xfs_qcnt_t)qtrx->qt_blk_res;
+ }
+ if (qtrx->qt_ino_res) {
+ if (!locked) {
+ xfs_dqlock(dqp);
+ locked = B_TRUE;
+ }
+ dqp->q_res_icount -=
+ (xfs_qcnt_t)qtrx->qt_ino_res;
+ }
+
+ if (qtrx->qt_rtblk_res) {
+ if (!locked) {
+ xfs_dqlock(dqp);
+ locked = B_TRUE;
+ }
+ dqp->q_res_rtbcount -=
+ (xfs_qcnt_t)qtrx->qt_rtblk_res;
+ }
+ if (locked)
+ xfs_dqunlock(dqp);
+
+ }
+ qa = tp->t_dqinfo->dqa_grpdquots;
+ }
+}
+
+/*
+ * This reserves disk blocks and inodes against a dquot.
+ * Flags indicate if the dquot is to be locked here and also
+ * if the blk reservation is for RT or regular blocks.
+ * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
+ * Returns EDQUOT if quota is exceeded.
+ */
+STATIC int
+xfs_trans_dqresv(
+ xfs_trans_t *tp,
+ xfs_mount_t *mp,
+ xfs_dquot_t *dqp,
+ long nblks,
+ long ninos,
+ uint flags)
+{
+ int error;
+ xfs_qcnt_t hardlimit;
+ xfs_qcnt_t softlimit;
+ time_t btimer;
+ xfs_qcnt_t *resbcountp;
+ xfs_quotainfo_t *q = mp->m_quotainfo;
+
+ if (! (flags & XFS_QMOPT_DQLOCK)) {
+ xfs_dqlock(dqp);
+ }
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ if (flags & XFS_TRANS_DQ_RES_BLKS) {
+ hardlimit = INT_GET(dqp->q_core.d_blk_hardlimit, ARCH_CONVERT);
+ if (!hardlimit)
+ hardlimit = q->qi_bhardlimit;
+ softlimit = INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT);
+ if (!softlimit)
+ softlimit = q->qi_bsoftlimit;
+ btimer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT);
+ resbcountp = &dqp->q_res_bcount;
+ } else {
+ ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
+ hardlimit = INT_GET(dqp->q_core.d_rtb_hardlimit, ARCH_CONVERT);
+ if (!hardlimit)
+ hardlimit = q->qi_rtbhardlimit;
+ softlimit = INT_GET(dqp->q_core.d_rtb_softlimit, ARCH_CONVERT);
+ if (!softlimit)
+ softlimit = q->qi_rtbsoftlimit;
+ btimer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT);
+ resbcountp = &dqp->q_res_rtbcount;
+ }
+ error = 0;
+
+ if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
+ dqp->q_core.d_id &&
+ XFS_IS_QUOTA_ENFORCED(dqp->q_mount)) {
+#ifdef QUOTADEBUG
+ cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld"
+ " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit);
+#endif
+ if (nblks > 0) {
+ /*
+ * dquot is locked already. See if we'd go over the
+ * hardlimit or exceed the timelimit if we allocate
+ * nblks.
+ */
+ if (hardlimit > 0ULL &&
+ (hardlimit <= nblks + *resbcountp)) {
+ error = EDQUOT;
+ goto error_return;
+ }
+
+ if (softlimit > 0ULL &&
+ (softlimit <= nblks + *resbcountp)) {
+ /*
+ * If timer or warnings has expired,
+ * return EDQUOT
+ */
+ if ((btimer != 0 && get_seconds() > btimer) ||
+ (dqp->q_core.d_bwarns &&
+ INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT) >=
+ XFS_QI_BWARNLIMIT(dqp->q_mount))) {
+ error = EDQUOT;
+ goto error_return;
+ }
+ }
+ }
+ if (ninos > 0) {
+ hardlimit = INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT);
+ if (!hardlimit)
+ hardlimit = q->qi_ihardlimit;
+ softlimit = INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT);
+ if (!softlimit)
+ softlimit = q->qi_isoftlimit;
+ if (hardlimit > 0ULL &&
+ INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >= hardlimit) {
+ error = EDQUOT;
+ goto error_return;
+ } else if (softlimit > 0ULL &&
+ INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >= softlimit) {
+ /*
+ * If timer or warnings has expired,
+ * return EDQUOT
+ */
+ if ((dqp->q_core.d_itimer &&
+ get_seconds() > INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT)) ||
+ (dqp->q_core.d_iwarns &&
+ INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT) >=
+ XFS_QI_IWARNLIMIT(dqp->q_mount))) {
+ error = EDQUOT;
+ goto error_return;
+ }
+ }
+ }
+ }
+
+ /*
+ * Change the reservation, but not the actual usage.
+ * Note that q_res_bcount = q_core.d_bcount + resv
+ */
+ (*resbcountp) += (xfs_qcnt_t)nblks;
+ if (ninos != 0)
+ dqp->q_res_icount += (xfs_qcnt_t)ninos;
+
+ /*
+ * note the reservation amt in the trans struct too,
+ * so that the transaction knows how much was reserved by
+ * it against this particular dquot.
+ * We don't do this when we are reserving for a delayed allocation,
+ * because we don't have the luxury of a transaction envelope then.
+ */
+ if (tp) {
+ ASSERT(tp->t_dqinfo);
+ ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+ if (nblks != 0)
+ xfs_trans_mod_dquot(tp, dqp,
+ flags & XFS_QMOPT_RESBLK_MASK,
+ nblks);
+ if (ninos != 0)
+ xfs_trans_mod_dquot(tp, dqp,
+ XFS_TRANS_DQ_RES_INOS,
+ ninos);
+ }
+ ASSERT(dqp->q_res_bcount >= INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
+ ASSERT(dqp->q_res_rtbcount >= INT_GET(dqp->q_core.d_rtbcount, ARCH_CONVERT));
+ ASSERT(dqp->q_res_icount >= INT_GET(dqp->q_core.d_icount, ARCH_CONVERT));
+
+error_return:
+ if (! (flags & XFS_QMOPT_DQLOCK)) {
+ xfs_dqunlock(dqp);
+ }
+ return (error);
+}
+
+
+/*
+ * Given a dquot(s), make disk block and/or inode reservations against them.
+ * The fact that this does the reservation against both the usr and
+ * grp quotas is important, because this follows a both-or-nothing
+ * approach.
+ *
+ * flags = XFS_QMOPT_DQLOCK indicate if dquot(s) need to be locked.
+ * XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
+ * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
+ * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
+ * dquots are unlocked on return, if they were not locked by caller.
+ */
+int
+xfs_trans_reserve_quota_bydquots(
+ xfs_trans_t *tp,
+ xfs_mount_t *mp,
+ xfs_dquot_t *udqp,
+ xfs_dquot_t *gdqp,
+ long nblks,
+ long ninos,
+ uint flags)
+{
+ int resvd;
+
+ if (! XFS_IS_QUOTA_ON(mp))
+ return (0);
+
+ if (tp && tp->t_dqinfo == NULL)
+ xfs_trans_alloc_dqinfo(tp);
+
+ ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+ resvd = 0;
+
+ if (udqp) {
+ if (xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, flags))
+ return (EDQUOT);
+ resvd = 1;
+ }
+
+ if (gdqp) {
+ if (xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags)) {
+ /*
+ * can't do it, so backout previous reservation
+ */
+ if (resvd) {
+ flags |= XFS_QMOPT_FORCE_RES;
+ xfs_trans_dqresv(tp, mp, udqp,
+ -nblks, -ninos, flags);
+ }
+ return (EDQUOT);
+ }
+ }
+
+ /*
+ * Didnt change anything critical, so, no need to log
+ */
+ return (0);
+}
+
+
+/*
+ * Lock the dquot and change the reservation if we can.
+ * This doesn't change the actual usage, just the reservation.
+ * The inode sent in is locked.
+ *
+ * Returns 0 on success, EDQUOT or other errors otherwise
+ */
+STATIC int
+xfs_trans_reserve_quota_nblks(
+ xfs_trans_t *tp,
+ xfs_mount_t *mp,
+ xfs_inode_t *ip,
+ long nblks,
+ long ninos,
+ uint type)
+{
+ int error;
+
+ if (!XFS_IS_QUOTA_ON(mp))
+ return (0);
+
+ ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
+ ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
+
+ ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
+ ASSERT((type & ~XFS_QMOPT_FORCE_RES) == XFS_TRANS_DQ_RES_RTBLKS ||
+ (type & ~XFS_QMOPT_FORCE_RES) == XFS_TRANS_DQ_RES_BLKS);
+
+ /*
+ * Reserve nblks against these dquots, with trans as the mediator.
+ */
+ error = xfs_trans_reserve_quota_bydquots(tp, mp,
+ ip->i_udquot, ip->i_gdquot,
+ nblks, ninos,
+ type);
+ return (error);
+}
+
+/*
+ * This routine is called to allocate a quotaoff log item.
+ */
+xfs_qoff_logitem_t *
+xfs_trans_get_qoff_item(
+ xfs_trans_t *tp,
+ xfs_qoff_logitem_t *startqoff,
+ uint flags)
+{
+ xfs_qoff_logitem_t *q;
+
+ ASSERT(tp != NULL);
+
+ q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
+ ASSERT(q != NULL);
+
+ /*
+ * Get a log_item_desc to point at the new item.
+ */
+ (void) xfs_trans_add_item(tp, (xfs_log_item_t*)q);
+
+ return (q);
+}
+
+
+/*
+ * This is called to mark the quotaoff logitem as needing
+ * to be logged when the transaction is committed. The logitem must
+ * already be associated with the given transaction.
+ */
+void
+xfs_trans_log_quotaoff_item(
+ xfs_trans_t *tp,
+ xfs_qoff_logitem_t *qlp)
+{
+ xfs_log_item_desc_t *lidp;
+
+ lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)qlp);
+ ASSERT(lidp != NULL);
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ lidp->lid_flags |= XFS_LID_DIRTY;
+}
+
+STATIC void
+xfs_trans_alloc_dqinfo(
+ xfs_trans_t *tp)
+{
+ (tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+}
+
+STATIC void
+xfs_trans_free_dqinfo(
+ xfs_trans_t *tp)
+{
+ if (!tp->t_dqinfo)
+ return;
+ kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo);
+ (tp)->t_dqinfo = NULL;
+}
+
+xfs_dqtrxops_t xfs_trans_dquot_ops = {
+ .qo_dup_dqinfo = xfs_trans_dup_dqinfo,
+ .qo_free_dqinfo = xfs_trans_free_dqinfo,
+ .qo_mod_dquot_byino = xfs_trans_mod_dquot_byino,
+ .qo_apply_dquot_deltas = xfs_trans_apply_dquot_deltas,
+ .qo_reserve_quota_nblks = xfs_trans_reserve_quota_nblks,
+ .qo_reserve_quota_bydquots = xfs_trans_reserve_quota_bydquots,
+ .qo_unreserve_and_mod_dquots = xfs_trans_unreserve_and_mod_dquots,
+};