summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/libxfs')
-rw-r--r--fs/xfs/libxfs/xfs_ag.c379
-rw-r--r--fs/xfs/libxfs/xfs_ag.h226
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c44
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.h21
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c365
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h33
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c100
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_attr.c464
-rw-r--r--fs/xfs/libxfs/xfs_attr.h49
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c217
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h6
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c102
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.h8
-rw-r--r--fs/xfs/libxfs/xfs_attr_sf.h1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c800
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h18
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c26
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.h207
-rw-r--r--fs/xfs/libxfs/xfs_btree.c89
-rw-r--r--fs/xfs/libxfs/xfs_btree.h19
-rw-r--r--fs/xfs/libxfs/xfs_btree_mem.c6
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c194
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h34
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h37
-rw-r--r--fs/xfs/libxfs/xfs_defer.c23
-rw-r--r--fs/xfs/libxfs/xfs_defer.h11
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c942
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h72
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c42
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c49
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c100
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c44
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h22
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c190
-rw-r--r--fs/xfs/libxfs/xfs_errortag.h4
-rw-r--r--fs/xfs/libxfs/xfs_exchmaps.c1235
-rw-r--r--fs/xfs/libxfs/xfs_exchmaps.h124
-rw-r--r--fs/xfs/libxfs/xfs_format.h240
-rw-r--r--fs/xfs/libxfs/xfs_fs.h236
-rw-r--r--fs/xfs/libxfs/xfs_group.c225
-rw-r--r--fs/xfs/libxfs/xfs_group.h164
-rw-r--r--fs/xfs/libxfs/xfs_health.h93
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c275
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h9
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c45
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c145
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h3
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c97
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h6
-rw-r--r--fs/xfs/libxfs/xfs_inode_util.c751
-rw-r--r--fs/xfs/libxfs/xfs_inode_util.h62
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h95
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h6
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c46
-rw-r--r--fs/xfs/libxfs/xfs_metadir.c481
-rw-r--r--fs/xfs/libxfs/xfs_metadir.h47
-rw-r--r--fs/xfs/libxfs/xfs_metafile.c52
-rw-r--r--fs/xfs/libxfs/xfs_metafile.h31
-rw-r--r--fs/xfs/libxfs/xfs_ondisk.h191
-rw-r--r--fs/xfs/libxfs/xfs_parent.c379
-rw-r--r--fs/xfs/libxfs/xfs_parent.h110
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h45
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c185
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h13
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c24
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c306
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h21
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c42
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c533
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.h273
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.c697
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.h284
-rw-r--r--fs/xfs/libxfs/xfs_sb.c381
-rw-r--r--fs/xfs/libxfs/xfs_sb.h7
-rw-r--r--fs/xfs/libxfs/xfs_shared.h20
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c54
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.h8
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c8
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c351
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.c121
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h29
-rw-r--r--fs/xfs/libxfs/xfs_types.c44
-rw-r--r--fs/xfs/libxfs/xfs_types.h28
87 files changed, 10680 insertions, 2898 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index dc1873f76bff..b59cb461e096 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -30,137 +30,7 @@
#include "xfs_trace.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
-
-
-/*
- * Passive reference counting access wrappers to the perag structures. If the
- * per-ag structure is to be freed, the freeing code is responsible for cleaning
- * up objects with passive references before freeing the structure. This is
- * things like cached buffers.
- */
-struct xfs_perag *
-xfs_perag_get(
- struct xfs_mount *mp,
- xfs_agnumber_t agno)
-{
- struct xfs_perag *pag;
-
- rcu_read_lock();
- pag = radix_tree_lookup(&mp->m_perag_tree, agno);
- if (pag) {
- trace_xfs_perag_get(pag, _RET_IP_);
- ASSERT(atomic_read(&pag->pag_ref) >= 0);
- atomic_inc(&pag->pag_ref);
- }
- rcu_read_unlock();
- return pag;
-}
-
-/*
- * search from @first to find the next perag with the given tag set.
- */
-struct xfs_perag *
-xfs_perag_get_tag(
- struct xfs_mount *mp,
- xfs_agnumber_t first,
- unsigned int tag)
-{
- struct xfs_perag *pag;
- int found;
-
- rcu_read_lock();
- found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
- (void **)&pag, first, 1, tag);
- if (found <= 0) {
- rcu_read_unlock();
- return NULL;
- }
- trace_xfs_perag_get_tag(pag, _RET_IP_);
- atomic_inc(&pag->pag_ref);
- rcu_read_unlock();
- return pag;
-}
-
-/* Get a passive reference to the given perag. */
-struct xfs_perag *
-xfs_perag_hold(
- struct xfs_perag *pag)
-{
- ASSERT(atomic_read(&pag->pag_ref) > 0 ||
- atomic_read(&pag->pag_active_ref) > 0);
-
- trace_xfs_perag_hold(pag, _RET_IP_);
- atomic_inc(&pag->pag_ref);
- return pag;
-}
-
-void
-xfs_perag_put(
- struct xfs_perag *pag)
-{
- trace_xfs_perag_put(pag, _RET_IP_);
- ASSERT(atomic_read(&pag->pag_ref) > 0);
- atomic_dec(&pag->pag_ref);
-}
-
-/*
- * Active references for perag structures. This is for short term access to the
- * per ag structures for walking trees or accessing state. If an AG is being
- * shrunk or is offline, then this will fail to find that AG and return NULL
- * instead.
- */
-struct xfs_perag *
-xfs_perag_grab(
- struct xfs_mount *mp,
- xfs_agnumber_t agno)
-{
- struct xfs_perag *pag;
-
- rcu_read_lock();
- pag = radix_tree_lookup(&mp->m_perag_tree, agno);
- if (pag) {
- trace_xfs_perag_grab(pag, _RET_IP_);
- if (!atomic_inc_not_zero(&pag->pag_active_ref))
- pag = NULL;
- }
- rcu_read_unlock();
- return pag;
-}
-
-/*
- * search from @first to find the next perag with the given tag set.
- */
-struct xfs_perag *
-xfs_perag_grab_tag(
- struct xfs_mount *mp,
- xfs_agnumber_t first,
- int tag)
-{
- struct xfs_perag *pag;
- int found;
-
- rcu_read_lock();
- found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
- (void **)&pag, first, 1, tag);
- if (found <= 0) {
- rcu_read_unlock();
- return NULL;
- }
- trace_xfs_perag_grab_tag(pag, _RET_IP_);
- if (!atomic_inc_not_zero(&pag->pag_active_ref))
- pag = NULL;
- rcu_read_unlock();
- return pag;
-}
-
-void
-xfs_perag_rele(
- struct xfs_perag *pag)
-{
- trace_xfs_perag_rele(pag, _RET_IP_);
- if (atomic_dec_and_test(&pag->pag_active_ref))
- wake_up(&pag->pag_active_wq);
-}
+#include "xfs_group.h"
/*
* xfs_initialize_perag_data
@@ -194,7 +64,7 @@ xfs_initialize_perag_data(
pag = xfs_perag_get(mp, index);
error = xfs_alloc_read_agf(pag, NULL, 0, NULL);
if (!error)
- error = xfs_ialloc_read_agi(pag, NULL, NULL);
+ error = xfs_ialloc_read_agi(pag, NULL, 0, NULL);
if (error) {
xfs_perag_put(pag);
return error;
@@ -235,43 +105,32 @@ out:
return error;
}
-STATIC void
-__xfs_free_perag(
- struct rcu_head *head)
+static void
+xfs_perag_uninit(
+ struct xfs_group *xg)
{
- struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+#ifdef __KERNEL__
+ struct xfs_perag *pag = to_perag(xg);
- ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
- kfree(pag);
+ cancel_delayed_work_sync(&pag->pag_blockgc_work);
+ xfs_buf_cache_destroy(&pag->pag_bcache);
+#endif
}
/*
- * Free up the per-ag resources associated with the mount structure.
+ * Free up the per-ag resources within the specified AG range.
*/
void
-xfs_free_perag(
- struct xfs_mount *mp)
+xfs_free_perag_range(
+ struct xfs_mount *mp,
+ xfs_agnumber_t first_agno,
+ xfs_agnumber_t end_agno)
+
{
- struct xfs_perag *pag;
xfs_agnumber_t agno;
- for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
- spin_lock(&mp->m_perag_lock);
- pag = radix_tree_delete(&mp->m_perag_tree, agno);
- spin_unlock(&mp->m_perag_lock);
- ASSERT(pag);
- XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
- xfs_defer_drain_free(&pag->pag_intents_drain);
-
- cancel_delayed_work_sync(&pag->pag_blockgc_work);
- xfs_buf_cache_destroy(&pag->pag_bcache);
-
- /* drop the mount's active reference */
- xfs_perag_rele(pag);
- XFS_IS_CORRUPT(pag->pag_mount,
- atomic_read(&pag->pag_active_ref) != 0);
- call_rcu(&pag->rcu_head, __xfs_free_perag);
- }
+ for (agno = first_agno; agno < end_agno; agno++)
+ xfs_group_free(mp, agno, XG_TYPE_AG, xfs_perag_uninit);
}
/* Find the size of the AG, in blocks. */
@@ -334,131 +193,100 @@ xfs_agino_range(
}
/*
- * Free perag within the specified AG range, it is only used to free unused
- * perags under the error handling path.
+ * Update the perag of the previous tail AG if it has been changed during
+ * recovery (i.e. recovery of a growfs).
*/
-void
-xfs_free_unused_perag_range(
+int
+xfs_update_last_ag_size(
struct xfs_mount *mp,
- xfs_agnumber_t agstart,
- xfs_agnumber_t agend)
+ xfs_agnumber_t prev_agcount)
{
- struct xfs_perag *pag;
- xfs_agnumber_t index;
+ struct xfs_perag *pag = xfs_perag_grab(mp, prev_agcount - 1);
- for (index = agstart; index < agend; index++) {
- spin_lock(&mp->m_perag_lock);
- pag = radix_tree_delete(&mp->m_perag_tree, index);
- spin_unlock(&mp->m_perag_lock);
- if (!pag)
- break;
- xfs_buf_cache_destroy(&pag->pag_bcache);
- xfs_defer_drain_free(&pag->pag_intents_drain);
- kfree(pag);
- }
+ if (!pag)
+ return -EFSCORRUPTED;
+ pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp,
+ prev_agcount - 1, mp->m_sb.sb_agcount,
+ mp->m_sb.sb_dblocks);
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
+ &pag->agino_max);
+ xfs_perag_rele(pag);
+ return 0;
}
-int
-xfs_initialize_perag(
+static int
+xfs_perag_alloc(
struct xfs_mount *mp,
+ xfs_agnumber_t index,
xfs_agnumber_t agcount,
- xfs_rfsblock_t dblocks,
- xfs_agnumber_t *maxagi)
+ xfs_rfsblock_t dblocks)
{
struct xfs_perag *pag;
- xfs_agnumber_t index;
- xfs_agnumber_t first_initialised = NULLAGNUMBER;
int error;
- /*
- * Walk the current per-ag tree so we don't try to initialise AGs
- * that already exist (growfs case). Allocate and insert all the
- * AGs we don't find ready for initialisation.
- */
- for (index = 0; index < agcount; index++) {
- pag = xfs_perag_get(mp, index);
- if (pag) {
- xfs_perag_put(pag);
- continue;
- }
-
- pag = kzalloc(sizeof(*pag), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
- if (!pag) {
- error = -ENOMEM;
- goto out_unwind_new_pags;
- }
- pag->pag_agno = index;
- pag->pag_mount = mp;
-
- error = radix_tree_preload(GFP_KERNEL | __GFP_RETRY_MAYFAIL);
- if (error)
- goto out_free_pag;
-
- spin_lock(&mp->m_perag_lock);
- if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
- WARN_ON_ONCE(1);
- spin_unlock(&mp->m_perag_lock);
- radix_tree_preload_end();
- error = -EEXIST;
- goto out_free_pag;
- }
- spin_unlock(&mp->m_perag_lock);
- radix_tree_preload_end();
+ pag = kzalloc(sizeof(*pag), GFP_KERNEL);
+ if (!pag)
+ return -ENOMEM;
#ifdef __KERNEL__
- /* Place kernel structure only init below this point. */
- spin_lock_init(&pag->pag_ici_lock);
- spin_lock_init(&pag->pagb_lock);
- spin_lock_init(&pag->pag_state_lock);
- INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
- INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
- xfs_defer_drain_init(&pag->pag_intents_drain);
- init_waitqueue_head(&pag->pagb_wait);
- init_waitqueue_head(&pag->pag_active_wq);
- pag->pagb_count = 0;
- pag->pagb_tree = RB_ROOT;
- xfs_hooks_init(&pag->pag_rmap_update_hooks);
+ /* Place kernel structure only init below this point. */
+ spin_lock_init(&pag->pag_ici_lock);
+ INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
+ INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
#endif /* __KERNEL__ */
- error = xfs_buf_cache_init(&pag->pag_bcache);
- if (error)
- goto out_remove_pag;
+ error = xfs_buf_cache_init(&pag->pag_bcache);
+ if (error)
+ goto out_free_perag;
- /* Active ref owned by mount indicates AG is online. */
- atomic_set(&pag->pag_active_ref, 1);
+ /*
+ * Pre-calculated geometry
+ */
+ pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp, index, agcount,
+ dblocks);
+ pag_group(pag)->xg_min_gbno = XFS_AGFL_BLOCK(mp) + 1;
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
+ &pag->agino_max);
- /* first new pag is fully initialized */
- if (first_initialised == NULLAGNUMBER)
- first_initialised = index;
+ error = xfs_group_insert(mp, pag_group(pag), index, XG_TYPE_AG);
+ if (error)
+ goto out_buf_cache_destroy;
- /*
- * Pre-calculated geometry
- */
- pag->block_count = __xfs_ag_block_count(mp, index, agcount,
- dblocks);
- pag->min_block = XFS_AGFL_BLOCK(mp);
- __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
- &pag->agino_max);
- }
+ return 0;
+
+out_buf_cache_destroy:
+ xfs_buf_cache_destroy(&pag->pag_bcache);
+out_free_perag:
+ kfree(pag);
+ return error;
+}
+
+int
+xfs_initialize_perag(
+ struct xfs_mount *mp,
+ xfs_agnumber_t orig_agcount,
+ xfs_agnumber_t new_agcount,
+ xfs_rfsblock_t dblocks,
+ xfs_agnumber_t *maxagi)
+{
+ xfs_agnumber_t index;
+ int error;
- index = xfs_set_inode_alloc(mp, agcount);
+ if (orig_agcount >= new_agcount)
+ return 0;
- if (maxagi)
- *maxagi = index;
+ for (index = orig_agcount; index < new_agcount; index++) {
+ error = xfs_perag_alloc(mp, index, new_agcount, dblocks);
+ if (error)
+ goto out_unwind_new_pags;
+ }
+ *maxagi = xfs_set_inode_alloc(mp, new_agcount);
mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
return 0;
-out_remove_pag:
- xfs_defer_drain_free(&pag->pag_intents_drain);
- spin_lock(&mp->m_perag_lock);
- radix_tree_delete(&mp->m_perag_tree, index);
- spin_unlock(&mp->m_perag_lock);
-out_free_pag:
- kfree(pag);
out_unwind_new_pags:
- /* unwind any prior newly initialized pags */
- xfs_free_unused_perag_range(mp, first_initialised, agcount);
+ xfs_free_perag_range(mp, orig_agcount, index);
return error;
}
@@ -913,7 +741,7 @@ xfs_ag_shrink_space(
struct xfs_trans **tpp,
xfs_extlen_t delta)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_alloc_arg args = {
.tp = *tpp,
.mp = mp,
@@ -930,8 +758,8 @@ xfs_ag_shrink_space(
xfs_agblock_t aglen;
int error, err2;
- ASSERT(pag->pag_agno == mp->m_sb.sb_agcount - 1);
- error = xfs_ialloc_read_agi(pag, *tpp, &agibp);
+ ASSERT(pag_agno(pag) == mp->m_sb.sb_agcount - 1);
+ error = xfs_ialloc_read_agi(pag, *tpp, 0, &agibp);
if (error)
return error;
@@ -963,13 +791,11 @@ xfs_ag_shrink_space(
* Disable perag reservations so it doesn't cause the allocation request
* to fail. We'll reestablish reservation before we return.
*/
- error = xfs_ag_resv_free(pag);
- if (error)
- return error;
+ xfs_ag_resv_free(pag);
/* internal log shouldn't also show up in the free space btrees */
error = xfs_alloc_vextent_exact_bno(&args,
- XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta));
+ xfs_agbno_to_fsb(pag, aglen - delta));
if (!error && args.agbno == NULLAGBLOCK)
error = -ENOSPC;
@@ -1010,7 +836,7 @@ xfs_ag_shrink_space(
goto resv_err;
err2 = xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
- XFS_AG_RESV_NONE, true);
+ XFS_AG_RESV_NONE, XFS_FREE_EXTENT_SKIP_DISCARD);
if (err2)
goto resv_err;
@@ -1028,9 +854,9 @@ xfs_ag_shrink_space(
}
/* Update perag geometry */
- pag->block_count -= delta;
- __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min,
- &pag->agino_max);
+ pag_group(pag)->xg_block_count -= delta;
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
+ &pag->agino_max);
xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH);
xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH);
@@ -1055,14 +881,15 @@ xfs_ag_extend_space(
struct xfs_trans *tp,
xfs_extlen_t len)
{
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_buf *bp;
struct xfs_agi *agi;
struct xfs_agf *agf;
int error;
- ASSERT(pag->pag_agno == pag->pag_mount->m_sb.sb_agcount - 1);
+ ASSERT(pag_agno(pag) == mp->m_sb.sb_agcount - 1);
- error = xfs_ialloc_read_agi(pag, tp, &bp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &bp);
if (error)
return error;
@@ -1099,9 +926,9 @@ xfs_ag_extend_space(
return error;
/* Update perag geometry */
- pag->block_count = be32_to_cpu(agf->agf_length);
- __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min,
- &pag->agino_max);
+ pag_group(pag)->xg_block_count = be32_to_cpu(agf->agf_length);
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
+ &pag->agino_max);
return 0;
}
@@ -1119,7 +946,7 @@ xfs_ag_get_geometry(
int error;
/* Lock the AG headers. */
- error = xfs_ialloc_read_agi(pag, NULL, &agi_bp);
+ error = xfs_ialloc_read_agi(pag, NULL, 0, &agi_bp);
if (error)
return error;
error = xfs_alloc_read_agf(pag, NULL, 0, &agf_bp);
@@ -1128,7 +955,7 @@ xfs_ag_get_geometry(
/* Fill out form. */
memset(ageo, 0, sizeof(*ageo));
- ageo->ag_number = pag->pag_agno;
+ ageo->ag_number = pag_agno(pag);
agi = agi_bp->b_addr;
ageo->ag_icount = be32_to_cpu(agi->agi_count);
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 35de09a2516c..1f24cfa27321 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -7,6 +7,8 @@
#ifndef __LIBXFS_AG_H
#define __LIBXFS_AG_H 1
+#include "xfs_group.h"
+
struct xfs_mount;
struct xfs_trans;
struct xfs_perag;
@@ -30,11 +32,7 @@ struct xfs_ag_resv {
* performance of allocation group selection.
*/
struct xfs_perag {
- struct xfs_mount *pag_mount; /* owner filesystem */
- xfs_agnumber_t pag_agno; /* AG this structure belongs to */
- atomic_t pag_ref; /* passive reference count */
- atomic_t pag_active_ref; /* active reference count */
- wait_queue_head_t pag_active_wq;/* woken active_ref falls to zero */
+ struct xfs_group pag_group;
unsigned long pag_opstate;
uint8_t pagf_bno_level; /* # of levels in bno btree */
uint8_t pagf_cnt_level; /* # of levels in cnt btree */
@@ -55,7 +53,6 @@ struct xfs_perag {
xfs_agino_t pagl_leftrec;
xfs_agino_t pagl_rightrec;
- int pagb_count; /* pagb slots in use */
uint8_t pagf_refcount_level; /* recount btree height */
/* Blocks reserved for all kinds of metadata. */
@@ -63,25 +60,13 @@ struct xfs_perag {
/* Blocks reserved for the reverse mapping btree. */
struct xfs_ag_resv pag_rmapbt_resv;
- /* for rcu-safe freeing */
- struct rcu_head rcu_head;
-
/* Precalculated geometry info */
- xfs_agblock_t block_count;
- xfs_agblock_t min_block;
xfs_agino_t agino_min;
xfs_agino_t agino_max;
#ifdef __KERNEL__
/* -- kernel only structures below this line -- */
- /*
- * Bitsets of per-ag metadata that have been checked and/or are sick.
- * Callers should hold pag_state_lock before accessing this field.
- */
- uint16_t pag_checked;
- uint16_t pag_sick;
-
#ifdef CONFIG_XFS_ONLINE_REPAIR
/*
* Alternate btree heights so that online repair won't trip the write
@@ -93,13 +78,6 @@ struct xfs_perag {
uint8_t pagf_repair_rmap_level;
#endif
- spinlock_t pag_state_lock;
-
- spinlock_t pagb_lock; /* lock for pagb_tree */
- struct rb_root pagb_tree; /* ordered tree of busy extents */
- unsigned int pagb_gen; /* generation count for pagb_tree */
- wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */
-
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
spinlock_t pag_ici_lock; /* incore inode cache lock */
@@ -111,21 +89,29 @@ struct xfs_perag {
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
-
- /*
- * We use xfs_drain to track the number of deferred log intent items
- * that have been queued (but not yet processed) so that waiters (e.g.
- * scrub) will not lock resources when other threads are in the middle
- * of processing a chain of intent items only to find momentary
- * inconsistencies.
- */
- struct xfs_defer_drain pag_intents_drain;
-
- /* Hook to feed rmapbt updates to an active online repair. */
- struct xfs_hooks pag_rmap_update_hooks;
#endif /* __KERNEL__ */
};
+static inline struct xfs_perag *to_perag(struct xfs_group *xg)
+{
+ return container_of(xg, struct xfs_perag, pag_group);
+}
+
+static inline struct xfs_group *pag_group(struct xfs_perag *pag)
+{
+ return &pag->pag_group;
+}
+
+static inline struct xfs_mount *pag_mount(const struct xfs_perag *pag)
+{
+ return pag->pag_group.xg_mount;
+}
+
+static inline xfs_agnumber_t pag_agno(const struct xfs_perag *pag)
+{
+ return pag->pag_group.xg_gno;
+}
+
/*
* Per-AG operational state. These are atomic flag bits.
*/
@@ -147,25 +133,80 @@ __XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA)
__XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES)
__XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET)
-void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart,
- xfs_agnumber_t agend);
-int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
- xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi);
+int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t orig_agcount,
+ xfs_agnumber_t new_agcount, xfs_rfsblock_t dcount,
+ xfs_agnumber_t *maxagi);
+void xfs_free_perag_range(struct xfs_mount *mp, xfs_agnumber_t first_agno,
+ xfs_agnumber_t end_agno);
int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
-void xfs_free_perag(struct xfs_mount *mp);
+int xfs_update_last_ag_size(struct xfs_mount *mp, xfs_agnumber_t prev_agcount);
/* Passive AG references */
-struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
-struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
- unsigned int tag);
-struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag);
-void xfs_perag_put(struct xfs_perag *pag);
+static inline struct xfs_perag *
+xfs_perag_get(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ return to_perag(xfs_group_get(mp, agno, XG_TYPE_AG));
+}
+
+static inline struct xfs_perag *
+xfs_perag_hold(
+ struct xfs_perag *pag)
+{
+ return to_perag(xfs_group_hold(pag_group(pag)));
+}
+
+static inline void
+xfs_perag_put(
+ struct xfs_perag *pag)
+{
+ xfs_group_put(pag_group(pag));
+}
/* Active AG references */
-struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t);
-struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *, xfs_agnumber_t,
- int tag);
-void xfs_perag_rele(struct xfs_perag *pag);
+static inline struct xfs_perag *
+xfs_perag_grab(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ return to_perag(xfs_group_grab(mp, agno, XG_TYPE_AG));
+}
+
+static inline void
+xfs_perag_rele(
+ struct xfs_perag *pag)
+{
+ xfs_group_rele(pag_group(pag));
+}
+
+static inline struct xfs_perag *
+xfs_perag_next_range(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ xfs_agnumber_t start_agno,
+ xfs_agnumber_t end_agno)
+{
+ return to_perag(xfs_group_next_range(mp, pag ? pag_group(pag) : NULL,
+ start_agno, end_agno, XG_TYPE_AG));
+}
+
+static inline struct xfs_perag *
+xfs_perag_next_from(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ xfs_agnumber_t start_agno)
+{
+ return xfs_perag_next_range(mp, pag, start_agno, mp->m_sb.sb_agcount - 1);
+}
+
+static inline struct xfs_perag *
+xfs_perag_next(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag)
+{
+ return xfs_perag_next_from(mp, pag, 0);
+}
/*
* Per-ag geometry infomation and validation
@@ -177,11 +218,7 @@ void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
static inline bool
xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno)
{
- if (agbno >= pag->block_count)
- return false;
- if (agbno <= pag->min_block)
- return false;
- return true;
+ return xfs_verify_gbno(pag_group(pag), agbno);
}
static inline bool
@@ -190,13 +227,7 @@ xfs_verify_agbext(
xfs_agblock_t agbno,
xfs_agblock_t len)
{
- if (agbno + len <= agbno)
- return false;
-
- if (!xfs_verify_agbno(pag, agbno))
- return false;
-
- return xfs_verify_agbno(pag, agbno + len - 1);
+ return xfs_verify_gbext(pag_group(pag), agbno, len);
}
/*
@@ -232,47 +263,6 @@ xfs_ag_contains_log(struct xfs_mount *mp, xfs_agnumber_t agno)
agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
}
-/*
- * Perag iteration APIs
- */
-static inline struct xfs_perag *
-xfs_perag_next(
- struct xfs_perag *pag,
- xfs_agnumber_t *agno,
- xfs_agnumber_t end_agno)
-{
- struct xfs_mount *mp = pag->pag_mount;
-
- *agno = pag->pag_agno + 1;
- xfs_perag_rele(pag);
- while (*agno <= end_agno) {
- pag = xfs_perag_grab(mp, *agno);
- if (pag)
- return pag;
- (*agno)++;
- }
- return NULL;
-}
-
-#define for_each_perag_range(mp, agno, end_agno, pag) \
- for ((pag) = xfs_perag_grab((mp), (agno)); \
- (pag) != NULL; \
- (pag) = xfs_perag_next((pag), &(agno), (end_agno)))
-
-#define for_each_perag_from(mp, agno, pag) \
- for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag))
-
-#define for_each_perag(mp, agno, pag) \
- (agno) = 0; \
- for_each_perag_from((mp), (agno), (pag))
-
-#define for_each_perag_tag(mp, agno, pag, tag) \
- for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \
- (pag) != NULL; \
- (agno) = (pag)->pag_agno + 1, \
- xfs_perag_rele(pag), \
- (pag) = xfs_perag_grab_tag((mp), (agno), (tag)))
-
static inline struct xfs_perag *
xfs_perag_next_wrap(
struct xfs_perag *pag,
@@ -281,9 +271,9 @@ xfs_perag_next_wrap(
xfs_agnumber_t restart_agno,
xfs_agnumber_t wrap_agno)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
- *agno = pag->pag_agno + 1;
+ *agno = pag_agno(pag) + 1;
xfs_perag_rele(pag);
while (*agno != stop_agno) {
if (*agno >= wrap_agno) {
@@ -345,4 +335,28 @@ int xfs_ag_extend_space(struct xfs_perag *pag, struct xfs_trans *tp,
xfs_extlen_t len);
int xfs_ag_get_geometry(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
+static inline xfs_fsblock_t
+xfs_agbno_to_fsb(
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno)
+{
+ return XFS_AGB_TO_FSB(pag_mount(pag), pag_agno(pag), agbno);
+}
+
+static inline xfs_daddr_t
+xfs_agbno_to_daddr(
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno)
+{
+ return XFS_AGB_TO_DADDR(pag_mount(pag), pag_agno(pag), agbno);
+}
+
+static inline xfs_ino_t
+xfs_agino_to_ino(
+ struct xfs_perag *pag,
+ xfs_agino_t agino)
+{
+ return XFS_AGINO_TO_INO(pag_mount(pag), pag_agno(pag), agino);
+}
+
#endif /* __LIBXFS_AG_H */
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index da1057bd0e60..f5d853089019 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -70,6 +70,7 @@ xfs_ag_resv_critical(
struct xfs_perag *pag,
enum xfs_ag_resv_type type)
{
+ struct xfs_mount *mp = pag_mount(pag);
xfs_extlen_t avail;
xfs_extlen_t orig;
@@ -92,8 +93,8 @@ xfs_ag_resv_critical(
/* Critically low if less than 10% or max btree height remains. */
return XFS_TEST_ERROR(avail < orig / 10 ||
- avail < pag->pag_mount->m_agbtree_maxlevels,
- pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
+ avail < mp->m_agbtree_maxlevels,
+ mp, XFS_ERRTAG_AG_RESV_CRITICAL);
}
/*
@@ -126,20 +127,19 @@ xfs_ag_resv_needed(
}
/* Clean out a reservation */
-static int
+static void
__xfs_ag_resv_free(
struct xfs_perag *pag,
enum xfs_ag_resv_type type)
{
struct xfs_ag_resv *resv;
xfs_extlen_t oldresv;
- int error;
trace_xfs_ag_resv_free(pag, type, 0);
resv = xfs_perag_resv(pag, type);
- if (pag->pag_agno == 0)
- pag->pag_mount->m_ag_max_usable += resv->ar_asked;
+ if (pag_agno(pag) == 0)
+ pag_mount(pag)->m_ag_max_usable += resv->ar_asked;
/*
* RMAPBT blocks come from the AGFL and AGFL blocks are always
* considered "free", so whatever was reserved at mount time must be
@@ -149,30 +149,19 @@ __xfs_ag_resv_free(
oldresv = resv->ar_orig_reserved;
else
oldresv = resv->ar_reserved;
- error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
+ xfs_add_fdblocks(pag_mount(pag), oldresv);
resv->ar_reserved = 0;
resv->ar_asked = 0;
resv->ar_orig_reserved = 0;
-
- if (error)
- trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
- error, _RET_IP_);
- return error;
}
/* Free a per-AG reservation. */
-int
+void
xfs_ag_resv_free(
struct xfs_perag *pag)
{
- int error;
- int err2;
-
- error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
- err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
- if (err2 && !error)
- error = err2;
- return error;
+ __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
+ __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
}
static int
@@ -182,7 +171,7 @@ __xfs_ag_resv_init(
xfs_extlen_t ask,
xfs_extlen_t used)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_ag_resv *resv;
int error;
xfs_extlen_t hidden_space;
@@ -216,13 +205,12 @@ __xfs_ag_resv_init(
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_AG_RESV_FAIL))
error = -ENOSPC;
else
- error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
+ error = xfs_dec_fdblocks(mp, hidden_space, true);
if (error) {
- trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
- error, _RET_IP_);
+ trace_xfs_ag_resv_init_error(pag, error, _RET_IP_);
xfs_warn(mp,
"Per-AG reservation for AG %u failed. Filesystem may run out of space.",
- pag->pag_agno);
+ pag_agno(pag));
return error;
}
@@ -232,7 +220,7 @@ __xfs_ag_resv_init(
* counter, we only make the adjustment for AG 0. This assumes that
* there aren't any AGs hungrier for per-AG reservation than AG 0.
*/
- if (pag->pag_agno == 0)
+ if (pag_agno(pag) == 0)
mp->m_ag_max_usable -= ask;
resv = xfs_perag_resv(pag, type);
@@ -250,7 +238,7 @@ xfs_ag_resv_init(
struct xfs_perag *pag,
struct xfs_trans *tp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
xfs_extlen_t ask;
xfs_extlen_t used;
int error = 0, error2;
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index b74b210008ea..f247eeff7358 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -6,7 +6,7 @@
#ifndef __XFS_AG_RESV_H__
#define __XFS_AG_RESV_H__
-int xfs_ag_resv_free(struct xfs_perag *pag);
+void xfs_ag_resv_free(struct xfs_perag *pag);
int xfs_ag_resv_init(struct xfs_perag *pag, struct xfs_trans *tp);
bool xfs_ag_resv_critical(struct xfs_perag *pag, enum xfs_ag_resv_type type);
@@ -33,23 +33,4 @@ xfs_perag_resv(
}
}
-/*
- * RMAPBT reservation accounting wrappers. Since rmapbt blocks are sourced from
- * the AGFL, they are allocated one at a time and the reservation updates don't
- * require a transaction.
- */
-static inline void
-xfs_ag_resv_rmapbt_alloc(
- struct xfs_mount *mp,
- xfs_agnumber_t agno)
-{
- struct xfs_alloc_arg args = { NULL };
- struct xfs_perag *pag;
-
- args.len = 1;
- pag = xfs_perag_get(mp, agno);
- xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args);
- xfs_perag_put(pag);
-}
-
#endif /* __XFS_AG_RESV_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 9da52e92172a..3d33e17f2e5c 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -27,6 +27,7 @@
#include "xfs_ag_resv.h"
#include "xfs_bmap.h"
#include "xfs_health.h"
+#include "xfs_extfree_item.h"
struct kmem_cache *xfs_extfree_item_cache;
@@ -79,7 +80,7 @@ xfs_prealloc_blocks(
}
/*
- * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
+ * The number of blocks per AG that we withhold from xfs_dec_fdblocks to
* guarantee that we can refill the AGFL prior to allocating space in a nearly
* full AG. Although the space described by the free space btrees, the
* blocks used by the freesp btrees themselves, and the blocks owned by the
@@ -89,7 +90,7 @@ xfs_prealloc_blocks(
* until the fs goes down, we subtract this many AG blocks from the incore
* fdblocks to ensure user allocation does not overcommit the space the
* filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to
- * withhold space from xfs_mod_fdblocks, so we do not account for that here.
+ * withhold space from xfs_dec_fdblocks, so we do not account for that here.
*/
#define XFS_ALLOCBT_AGFL_RESERVE 4
@@ -274,7 +275,7 @@ xfs_alloc_complain_bad_rec(
xfs_warn(mp,
"%sbt record corruption in AG %d detected at %pS!",
- cur->bc_ops->name, cur->bc_ag.pag->pag_agno, fa);
+ cur->bc_ops->name, cur->bc_group->xg_gno, fa);
xfs_warn(mp,
"start block 0x%x block count 0x%x", irec->ar_startblock,
irec->ar_blockcount);
@@ -302,7 +303,7 @@ xfs_alloc_get_rec(
return error;
xfs_alloc_btrec_to_irec(rec, &irec);
- fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec);
+ fa = xfs_alloc_check_irec(to_perag(cur->bc_group), &irec);
if (fa)
return xfs_alloc_complain_bad_rec(cur, fa, &irec);
@@ -330,7 +331,8 @@ xfs_alloc_compute_aligned(
bool busy;
/* Trim busy sections out of found extent */
- busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
+ busy = xfs_extent_busy_trim(pag_group(args->pag), args->minlen,
+ args->maxlen, &bno, &len, busy_gen);
/*
* If we have a largish extent that happens to start before min_agbno,
@@ -466,6 +468,97 @@ xfs_alloc_fix_len(
}
/*
+ * Determine if the cursor points to the block that contains the right-most
+ * block of records in the by-count btree. This block contains the largest
+ * contiguous free extent in the AG, so if we modify a record in this block we
+ * need to call xfs_alloc_fixup_longest() once the modifications are done to
+ * ensure the agf->agf_longest field is kept up to date with the longest free
+ * extent tracked by the by-count btree.
+ */
+static bool
+xfs_alloc_cursor_at_lastrec(
+ struct xfs_btree_cur *cnt_cur)
+{
+ struct xfs_btree_block *block;
+ union xfs_btree_ptr ptr;
+ struct xfs_buf *bp;
+
+ block = xfs_btree_get_block(cnt_cur, 0, &bp);
+
+ xfs_btree_get_sibling(cnt_cur, block, &ptr, XFS_BB_RIGHTSIB);
+ return xfs_btree_ptr_is_null(cnt_cur, &ptr);
+}
+
+/*
+ * Find the rightmost record of the cntbt, and return the longest free space
+ * recorded in it. Simply set both the block number and the length to their
+ * maximum values before searching.
+ */
+static int
+xfs_cntbt_longest(
+ struct xfs_btree_cur *cnt_cur,
+ xfs_extlen_t *longest)
+{
+ struct xfs_alloc_rec_incore irec;
+ union xfs_btree_rec *rec;
+ int stat = 0;
+ int error;
+
+ memset(&cnt_cur->bc_rec, 0xFF, sizeof(cnt_cur->bc_rec));
+ error = xfs_btree_lookup(cnt_cur, XFS_LOOKUP_LE, &stat);
+ if (error)
+ return error;
+ if (!stat) {
+ /* totally empty tree */
+ *longest = 0;
+ return 0;
+ }
+
+ error = xfs_btree_get_rec(cnt_cur, &rec, &stat);
+ if (error)
+ return error;
+ if (XFS_IS_CORRUPT(cnt_cur->bc_mp, !stat)) {
+ xfs_btree_mark_sick(cnt_cur);
+ return -EFSCORRUPTED;
+ }
+
+ xfs_alloc_btrec_to_irec(rec, &irec);
+ *longest = irec.ar_blockcount;
+ return 0;
+}
+
+/*
+ * Update the longest contiguous free extent in the AG from the by-count cursor
+ * that is passed to us. This should be done at the end of any allocation or
+ * freeing operation that touches the longest extent in the btree.
+ *
+ * Needing to update the longest extent can be determined by calling
+ * xfs_alloc_cursor_at_lastrec() after the cursor is positioned for record
+ * modification but before the modification begins.
+ */
+static int
+xfs_alloc_fixup_longest(
+ struct xfs_btree_cur *cnt_cur)
+{
+ struct xfs_perag *pag = to_perag(cnt_cur->bc_group);
+ struct xfs_buf *bp = cnt_cur->bc_ag.agbp;
+ struct xfs_agf *agf = bp->b_addr;
+ xfs_extlen_t longest = 0;
+ int error;
+
+ /* Lookup last rec in order to update AGF. */
+ error = xfs_cntbt_longest(cnt_cur, &longest);
+ if (error)
+ return error;
+
+ pag->pagf_longest = longest;
+ agf->agf_longest = cpu_to_be32(pag->pagf_longest);
+ xfs_alloc_log_agf(cnt_cur->bc_tp, bp, XFS_AGF_LONGEST);
+
+ return 0;
+}
+
+/*
* Update the two btrees, logically removing from freespace the extent
* starting at rbno, rlen blocks. The extent is contained within the
* actual (current) free extent fbno for flen blocks.
@@ -489,6 +582,7 @@ xfs_alloc_fixup_trees(
xfs_extlen_t nflen1=0; /* first new free length */
xfs_extlen_t nflen2=0; /* second new free length */
struct xfs_mount *mp;
+ bool fixup_longest = false;
mp = cnt_cur->bc_mp;
@@ -577,6 +671,10 @@ xfs_alloc_fixup_trees(
nfbno2 = rbno + rlen;
nflen2 = (fbno + flen) - nfbno2;
}
+
+ if (xfs_alloc_cursor_at_lastrec(cnt_cur))
+ fixup_longest = true;
+
/*
* Delete the entry from the by-size btree.
*/
@@ -654,6 +752,10 @@ xfs_alloc_fixup_trees(
return -EFSCORRUPTED;
}
}
+
+ if (fixup_longest)
+ return xfs_alloc_fixup_longest(cnt_cur);
+
return 0;
}
@@ -698,7 +800,7 @@ xfs_agfl_verify(
* use it by using uncached buffers that don't have the perag attached
* so we can detect and avoid this problem.
*/
- if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
+ if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != pag_agno((bp->b_pag)))
return __this_address;
for (i = 0; i < xfs_agfl_size(mp); i++) {
@@ -778,13 +880,12 @@ xfs_alloc_read_agfl(
struct xfs_trans *tp,
struct xfs_buf **bpp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_buf *bp;
int error;
- error = xfs_trans_read_buf(
- mp, tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+ XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGFL_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
if (xfs_metadata_is_sick(error))
xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
@@ -1008,13 +1109,12 @@ xfs_alloc_cur_finish(
struct xfs_alloc_arg *args,
struct xfs_alloc_cur *acur)
{
- struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
int error;
ASSERT(acur->cnt && acur->bnolt);
ASSERT(acur->bno >= acur->rec_bno);
ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
- ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
+ ASSERT(xfs_verify_agbext(args->pag, acur->rec_bno, acur->rec_len));
error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
acur->rec_len, acur->bno, acur->len, 0);
@@ -1152,14 +1252,14 @@ xfs_alloc_ag_vextent_small(
if (fbno == NULLAGBLOCK)
goto out;
- xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
+ xfs_extent_busy_reuse(pag_group(args->pag), fbno, 1,
(args->datatype & XFS_ALLOC_NOBUSY));
if (args->datatype & XFS_ALLOC_USERDATA) {
struct xfs_buf *bp;
error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
+ xfs_agbno_to_daddr(args->pag, fbno),
args->mp->m_bsize, 0, &bp);
if (error)
goto error;
@@ -1217,7 +1317,6 @@ STATIC int /* error */
xfs_alloc_ag_vextent_exact(
xfs_alloc_arg_t *args) /* allocation argument structure */
{
- struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
int error;
@@ -1266,7 +1365,8 @@ xfs_alloc_ag_vextent_exact(
*/
tbno = fbno;
tlen = flen;
- xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
+ xfs_extent_busy_trim(pag_group(args->pag), args->minlen, args->maxlen,
+ &tbno, &tlen, &busy_gen);
/*
* Give up if the start of the extent is busy, or the freespace isn't
@@ -1297,7 +1397,7 @@ xfs_alloc_ag_vextent_exact(
*/
cnt_cur = xfs_cntbt_init_cursor(args->mp, args->tp, args->agbp,
args->pag);
- ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
+ ASSERT(xfs_verify_agbext(args->pag, args->agbno, args->len));
error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
args->len, XFSA_FIXUP_BNO_OK);
if (error) {
@@ -1659,8 +1759,9 @@ restart:
* the allocation can be retried.
*/
trace_xfs_alloc_near_busy(args);
- error = xfs_extent_busy_flush(args->tp, args->pag,
- acur.busy_gen, alloc_flags);
+ error = xfs_extent_busy_flush(args->tp,
+ pag_group(args->pag), acur.busy_gen,
+ alloc_flags);
if (error)
goto out;
@@ -1775,8 +1876,9 @@ restart:
* the allocation can be retried.
*/
trace_xfs_alloc_size_busy(args);
- error = xfs_extent_busy_flush(args->tp, args->pag,
- busy_gen, alloc_flags);
+ error = xfs_extent_busy_flush(args->tp,
+ pag_group(args->pag), busy_gen,
+ alloc_flags);
if (error)
goto error0;
@@ -1824,7 +1926,7 @@ restart:
error = -EFSCORRUPTED;
goto error0;
}
- if (flen < bestrlen)
+ if (flen <= bestrlen)
break;
busy = xfs_alloc_compute_aligned(args, fbno, flen,
&rbno, &rlen, &busy_gen);
@@ -1874,8 +1976,9 @@ restart:
* the allocation can be retried.
*/
trace_xfs_alloc_size_busy(args);
- error = xfs_extent_busy_flush(args->tp, args->pag,
- busy_gen, alloc_flags);
+ error = xfs_extent_busy_flush(args->tp,
+ pag_group(args->pag), busy_gen,
+ alloc_flags);
if (error)
goto error0;
@@ -1934,11 +2037,10 @@ out_nominleft:
/*
* Free the extent starting at agno/bno for length.
*/
-STATIC int
+int
xfs_free_ag_extent(
struct xfs_trans *tp,
struct xfs_buf *agbp,
- xfs_agnumber_t agno,
xfs_agblock_t bno,
xfs_extlen_t len,
const struct xfs_owner_info *oinfo,
@@ -1958,6 +2060,7 @@ xfs_free_ag_extent(
int i;
int error;
struct xfs_perag *pag = agbp->b_pag;
+ bool fixup_longest = false;
bno_cur = cnt_cur = NULL;
mp = tp->t_mountp;
@@ -2221,8 +2324,13 @@ xfs_free_ag_extent(
}
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
bno_cur = NULL;
+
/*
* In all cases we need to insert the new freespace in the by-size tree.
+ *
+ * If this new freespace is being inserted in the block that contains
+ * the largest free space in the btree, make sure we also fix up the
+ * agf->agf-longest tracker field.
*/
if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
goto error0;
@@ -2231,6 +2339,8 @@ xfs_free_ag_extent(
error = -EFSCORRUPTED;
goto error0;
}
+ if (xfs_alloc_cursor_at_lastrec(cnt_cur))
+ fixup_longest = true;
if ((error = xfs_btree_insert(cnt_cur, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
@@ -2238,6 +2348,12 @@ xfs_free_ag_extent(
error = -EFSCORRUPTED;
goto error0;
}
+ if (fixup_longest) {
+ error = xfs_alloc_fixup_longest(cnt_cur);
+ if (error)
+ goto error0;
+ }
+
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
cnt_cur = NULL;
@@ -2245,19 +2361,19 @@ xfs_free_ag_extent(
* Update the freespace totals in the ag and superblock.
*/
error = xfs_alloc_update_counters(tp, agbp, len);
- xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
+ xfs_ag_resv_free_extent(pag, type, tp, len);
if (error)
goto error0;
XFS_STATS_INC(mp, xs_freex);
XFS_STATS_ADD(mp, xs_freeb, len);
- trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
+ trace_xfs_free_extent(pag, bno, len, type, haveleft, haveright);
return 0;
error0:
- trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
+ trace_xfs_free_extent(pag, bno, len, type, -1, -1);
if (bno_cur)
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
if (cnt_cur)
@@ -2316,7 +2432,7 @@ xfs_alloc_longest_free_extent(
* reservations and AGFL rules in place, we can return this extent.
*/
if (pag->pagf_longest > delta)
- return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
+ return min_t(xfs_extlen_t, pag_mount(pag)->m_ag_max_usable,
pag->pagf_longest - delta);
/* Otherwise, let the caller try for 1 block if there's space. */
@@ -2424,32 +2540,6 @@ xfs_alloc_space_available(
return true;
}
-int
-xfs_free_agfl_block(
- struct xfs_trans *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t agbno,
- struct xfs_buf *agbp,
- struct xfs_owner_info *oinfo)
-{
- int error;
- struct xfs_buf *bp;
-
- error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
- XFS_AG_RESV_AGFL);
- if (error)
- return error;
-
- error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
- XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
- tp->t_mountp->m_bsize, 0, &bp);
- if (error)
- return error;
- xfs_trans_binval(tp, bp);
-
- return 0;
-}
-
/*
* Check the agfl fields of the agf for inconsistency or corruption.
*
@@ -2525,7 +2615,7 @@ xfs_agfl_reset(
xfs_warn(mp,
"WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
"Please unmount and run xfs_repair.",
- pag->pag_agno, pag->pagf_flcount);
+ pag_agno(pag), pag->pagf_flcount);
agf->agf_flfirst = 0;
agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
@@ -2538,48 +2628,6 @@ xfs_agfl_reset(
}
/*
- * Defer an AGFL block free. This is effectively equivalent to
- * xfs_free_extent_later() with some special handling particular to AGFL blocks.
- *
- * Deferring AGFL frees helps prevent log reservation overruns due to too many
- * allocation operations in a transaction. AGFL frees are prone to this problem
- * because for one they are always freed one at a time. Further, an immediate
- * AGFL block free can cause a btree join and require another block free before
- * the real allocation can proceed. Deferring the free disconnects freeing up
- * the AGFL slot from freeing the block.
- */
-static int
-xfs_defer_agfl_block(
- struct xfs_trans *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t agbno,
- struct xfs_owner_info *oinfo)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_extent_free_item *xefi;
- xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
-
- ASSERT(xfs_extfree_item_cache != NULL);
- ASSERT(oinfo != NULL);
-
- if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
- return -EFSCORRUPTED;
-
- xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
- GFP_KERNEL | __GFP_NOFAIL);
- xefi->xefi_startblock = fsbno;
- xefi->xefi_blockcount = 1;
- xefi->xefi_owner = oinfo->oi_owner;
- xefi->xefi_agresv = XFS_AG_RESV_AGFL;
-
- trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
-
- xfs_extent_free_get_group(mp, xefi);
- xfs_defer_add(tp, &xefi->xefi_list, &xfs_agfl_free_defer_type);
- return 0;
-}
-
-/*
* Add the extent to the list of extents to be free at transaction end.
* The list is maintained sorted (by block number).
*/
@@ -2590,39 +2638,37 @@ xfs_defer_extent_free(
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
- bool skip_discard,
+ unsigned int free_flags,
struct xfs_defer_pending **dfpp)
{
struct xfs_extent_free_item *xefi;
struct xfs_mount *mp = tp->t_mountp;
-#ifdef DEBUG
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- ASSERT(bno != NULLFSBLOCK);
- ASSERT(len > 0);
ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
ASSERT(!isnullstartblock(bno));
- agno = XFS_FSB_TO_AGNO(mp, bno);
- agbno = XFS_FSB_TO_AGBNO(mp, bno);
- ASSERT(agno < mp->m_sb.sb_agcount);
- ASSERT(agbno < mp->m_sb.sb_agblocks);
- ASSERT(len < mp->m_sb.sb_agblocks);
- ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
-#endif
- ASSERT(xfs_extfree_item_cache != NULL);
- ASSERT(type != XFS_AG_RESV_AGFL);
+ ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS));
- if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
- return -EFSCORRUPTED;
+ if (free_flags & XFS_FREE_EXTENT_REALTIME) {
+ if (type != XFS_AG_RESV_NONE) {
+ ASSERT(type == XFS_AG_RESV_NONE);
+ return -EFSCORRUPTED;
+ }
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_rtbext(mp, bno, len)))
+ return -EFSCORRUPTED;
+ } else {
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
+ return -EFSCORRUPTED;
+ }
xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
GFP_KERNEL | __GFP_NOFAIL);
xefi->xefi_startblock = bno;
xefi->xefi_blockcount = (xfs_extlen_t)len;
xefi->xefi_agresv = type;
- if (skip_discard)
+ if (free_flags & XFS_FREE_EXTENT_SKIP_DISCARD)
xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
+ if (free_flags & XFS_FREE_EXTENT_REALTIME)
+ xefi->xefi_flags |= XFS_EFI_REALTIME;
if (oinfo) {
ASSERT(oinfo->oi_offset == 0);
@@ -2634,12 +2680,8 @@ xfs_defer_extent_free(
} else {
xefi->xefi_owner = XFS_RMAP_OWN_NULL;
}
- trace_xfs_bmap_free_defer(mp,
- XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
- XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
- xfs_extent_free_get_group(mp, xefi);
- *dfpp = xfs_defer_add(tp, &xefi->xefi_list, &xfs_extent_free_defer_type);
+ xfs_extent_free_defer_add(tp, xefi, dfpp);
return 0;
}
@@ -2650,11 +2692,11 @@ xfs_free_extent_later(
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
- bool skip_discard)
+ unsigned int free_flags)
{
struct xfs_defer_pending *dontcare = NULL;
- return xfs_defer_extent_free(tp, bno, len, oinfo, type, skip_discard,
+ return xfs_defer_extent_free(tp, bno, len, oinfo, type, free_flags,
&dontcare);
}
@@ -2679,13 +2721,13 @@ xfs_free_extent_later(
int
xfs_alloc_schedule_autoreap(
const struct xfs_alloc_arg *args,
- bool skip_discard,
+ unsigned int free_flags,
struct xfs_alloc_autoreap *aarp)
{
int error;
error = xfs_defer_extent_free(args->tp, args->fsbno, args->len,
- &args->oinfo, args->resv, skip_discard, &aarp->dfp);
+ &args->oinfo, args->resv, free_flags, &aarp->dfp);
if (error)
return error;
@@ -2738,7 +2780,6 @@ xfs_alloc_commit_autoreap(
xfs_defer_item_unpause(tp, aarp->dfp);
}
-#ifdef DEBUG
/*
* Check if an AGF has a free extent record whose length is equal to
* args->minlen.
@@ -2778,7 +2819,6 @@ out:
return error;
}
-#endif
/*
* Decide whether to use this allocation group for this allocation.
@@ -2852,15 +2892,14 @@ xfs_alloc_fix_freelist(
if (!xfs_alloc_space_available(args, need, alloc_flags))
goto out_agbp_relse;
-#ifdef DEBUG
- if (args->alloc_minlen_only) {
+ if (IS_ENABLED(CONFIG_XFS_DEBUG) && args->alloc_minlen_only) {
int stat;
error = xfs_exact_minlen_extent_available(args, agbp, &stat);
if (error || !stat)
goto out_agbp_relse;
}
-#endif
+
/*
* Make the freelist shorter if it's too long.
*
@@ -2897,8 +2936,20 @@ xfs_alloc_fix_freelist(
if (error)
goto out_agbp_relse;
- /* defer agfl frees */
- error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
+ /*
+ * Defer the AGFL block free.
+ *
+ * This helps to prevent log reservation overruns due to too
+ * many allocation operations in a transaction. AGFL frees are
+ * prone to this problem because for one they are always freed
+ * one at a time. Further, an immediate AGFL block free can
+ * cause a btree join and require another block free before the
+ * real allocation can proceed.
+ * Deferring the free disconnects freeing up the AGFL slot from
+ * freeing the block.
+ */
+ error = xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, bno),
+ 1, &targs.oinfo, XFS_AG_RESV_AGFL, 0);
if (error)
goto out_agbp_relse;
}
@@ -3118,8 +3169,6 @@ xfs_alloc_put_freelist(
logflags |= XFS_AGF_BTREEBLKS;
}
- xfs_alloc_log_agf(tp, agbp, logflags);
-
ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
agfl_bno = xfs_buf_to_agfl_bno(agflbp);
@@ -3152,7 +3201,7 @@ xfs_validate_ag_length(
* use it by using uncached buffers that don't have the perag attached
* so we can detect and avoid this problem.
*/
- if (bp->b_pag && seqno != bp->b_pag->pag_agno)
+ if (bp->b_pag && seqno != pag_agno(bp->b_pag))
return __this_address;
/*
@@ -3321,13 +3370,13 @@ xfs_read_agf(
int flags,
struct xfs_buf **agfbpp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
int error;
- trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
+ trace_xfs_read_agf(pag);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
+ XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGF_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
if (xfs_metadata_is_sick(error))
xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
@@ -3350,12 +3399,13 @@ xfs_alloc_read_agf(
int flags,
struct xfs_buf **agfbpp)
{
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_buf *agfbp;
struct xfs_agf *agf;
int error;
int allocbt_blks;
- trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
+ trace_xfs_alloc_read_agf(pag);
/* We don't support trylock when freeing. */
ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
@@ -3376,7 +3426,7 @@ xfs_alloc_read_agf(
pag->pagf_cnt_level = be32_to_cpu(agf->agf_cnt_level);
pag->pagf_rmap_level = be32_to_cpu(agf->agf_rmap_level);
pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
- if (xfs_agfl_needs_reset(pag->pag_mount, agf))
+ if (xfs_agfl_needs_reset(mp, agf))
set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
else
clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
@@ -3389,16 +3439,15 @@ xfs_alloc_read_agf(
* counter only tracks non-root blocks.
*/
allocbt_blks = pag->pagf_btreeblks;
- if (xfs_has_rmapbt(pag->pag_mount))
+ if (xfs_has_rmapbt(mp))
allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
if (allocbt_blks > 0)
- atomic64_add(allocbt_blks,
- &pag->pag_mount->m_allocbt_blks);
+ atomic64_add(allocbt_blks, &mp->m_allocbt_blks);
set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
}
#ifdef DEBUG
- else if (!xfs_is_shutdown(pag->pag_mount)) {
+ else if (!xfs_is_shutdown(mp)) {
ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
@@ -3559,7 +3608,7 @@ xfs_alloc_vextent_finish(
goto out_drop_perag;
}
- args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
+ args->fsbno = xfs_agbno_to_fsb(args->pag, args->agbno);
ASSERT(args->len >= args->minlen);
ASSERT(args->len <= args->maxlen);
@@ -3580,8 +3629,8 @@ xfs_alloc_vextent_finish(
if (error)
goto out_drop_perag;
- ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
- args->len));
+ ASSERT(!xfs_extent_busy_search(pag_group(args->pag),
+ args->agbno, args->len));
}
xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
@@ -3611,21 +3660,20 @@ xfs_alloc_vextent_this_ag(
struct xfs_alloc_arg *args,
xfs_agnumber_t agno)
{
- struct xfs_mount *mp = args->mp;
xfs_agnumber_t minimum_agno;
uint32_t alloc_flags = 0;
int error;
ASSERT(args->pag != NULL);
- ASSERT(args->pag->pag_agno == agno);
+ ASSERT(pag_agno(args->pag) == agno);
args->agno = agno;
args->agbno = 0;
trace_xfs_alloc_vextent_this_ag(args);
- error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
- &minimum_agno);
+ error = xfs_alloc_vextent_check_args(args,
+ xfs_agbno_to_fsb(args->pag, 0), &minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0;
@@ -3830,7 +3878,7 @@ xfs_alloc_vextent_exact_bno(
int error;
ASSERT(args->pag != NULL);
- ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
+ ASSERT(pag_agno(args->pag) == XFS_FSB_TO_AGNO(mp, target));
args->agno = XFS_FSB_TO_AGNO(mp, target);
args->agbno = XFS_FSB_TO_AGBNO(mp, target);
@@ -3869,7 +3917,7 @@ xfs_alloc_vextent_near_bno(
int error;
if (!needs_perag)
- ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
+ ASSERT(pag_agno(args->pag) == XFS_FSB_TO_AGNO(mp, target));
args->agno = XFS_FSB_TO_AGNO(mp, target);
args->agbno = XFS_FSB_TO_AGBNO(mp, target);
@@ -3906,7 +3954,7 @@ xfs_free_extent_fix_freelist(
memset(&args, 0, sizeof(struct xfs_alloc_arg));
args.tp = tp;
args.mp = tp->t_mountp;
- args.agno = pag->pag_agno;
+ args.agno = pag_agno(pag);
args.pag = pag;
/*
@@ -3974,14 +4022,13 @@ __xfs_free_extent(
goto err_release;
}
- error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
- type);
+ error = xfs_free_ag_extent(tp, agbp, agbno, len, oinfo, type);
if (error)
goto err_release;
if (skip_discard)
busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
- xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
+ xfs_extent_busy_insert(tp, pag_group(pag), agbno, len, busy_flags);
return 0;
err_release:
@@ -4006,7 +4053,7 @@ xfs_alloc_query_range_helper(
xfs_failaddr_t fa;
xfs_alloc_btrec_to_irec(rec, &irec);
- fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec);
+ fa = xfs_alloc_check_irec(to_perag(cur->bc_group), &irec);
if (fa)
return xfs_alloc_complain_bad_rec(cur, fa, &irec);
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 0b956f8b9d5a..50ef79a1ed41 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -53,11 +53,9 @@ typedef struct xfs_alloc_arg {
int datatype; /* mask defining data type treatment */
char wasdel; /* set if allocation was prev delayed */
char wasfromfl; /* set if allocation is from freelist */
+ bool alloc_minlen_only; /* allocate exact minlen extent */
struct xfs_owner_info oinfo; /* owner of blocks being allocated */
enum xfs_ag_resv_type resv; /* block reservation to use */
-#ifdef DEBUG
- bool alloc_minlen_only; /* allocate exact minlen extent */
-#endif
} xfs_alloc_arg_t;
/*
@@ -80,6 +78,9 @@ int xfs_alloc_get_freelist(struct xfs_perag *pag, struct xfs_trans *tp,
int xfs_alloc_put_freelist(struct xfs_perag *pag, struct xfs_trans *tp,
struct xfs_buf *agfbp, struct xfs_buf *agflbp,
xfs_agblock_t bno, int btreeblk);
+int xfs_free_ag_extent(struct xfs_trans *tp, struct xfs_buf *agbp,
+ xfs_agblock_t bno, xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
/*
* Compute and fill in value of m_alloc_maxlevels.
@@ -194,8 +195,6 @@ int xfs_alloc_read_agf(struct xfs_perag *pag, struct xfs_trans *tp, int flags,
struct xfs_buf **agfbpp);
int xfs_alloc_read_agfl(struct xfs_perag *pag, struct xfs_trans *tp,
struct xfs_buf **bpp);
-int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t,
- struct xfs_buf *, struct xfs_owner_info *);
int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, uint32_t alloc_flags);
int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag,
struct xfs_buf **agbp);
@@ -233,7 +232,16 @@ xfs_buf_to_agfl_bno(
int xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type type, bool skip_discard);
+ enum xfs_ag_resv_type type, unsigned int free_flags);
+
+/* Don't issue a discard for the blocks freed. */
+#define XFS_FREE_EXTENT_SKIP_DISCARD (1U << 0)
+
+/* Free blocks on the realtime device. */
+#define XFS_FREE_EXTENT_REALTIME (1U << 1)
+
+#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD | \
+ XFS_FREE_EXTENT_REALTIME)
/*
* List of extents to be free "later".
@@ -244,25 +252,28 @@ struct xfs_extent_free_item {
uint64_t xefi_owner;
xfs_fsblock_t xefi_startblock;/* starting fs block number */
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
- struct xfs_perag *xefi_pag;
+ struct xfs_group *xefi_group;
unsigned int xefi_flags;
enum xfs_ag_resv_type xefi_agresv;
};
-void xfs_extent_free_get_group(struct xfs_mount *mp,
- struct xfs_extent_free_item *xefi);
-
#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
#define XFS_EFI_CANCELLED (1U << 3) /* dont actually free the space */
+#define XFS_EFI_REALTIME (1U << 4) /* freeing realtime extent */
+
+static inline bool xfs_efi_is_realtime(const struct xfs_extent_free_item *xefi)
+{
+ return xefi->xefi_flags & XFS_EFI_REALTIME;
+}
struct xfs_alloc_autoreap {
struct xfs_defer_pending *dfp;
};
int xfs_alloc_schedule_autoreap(const struct xfs_alloc_arg *args,
- bool skip_discard, struct xfs_alloc_autoreap *aarp);
+ unsigned int free_flags, struct xfs_alloc_autoreap *aarp);
void xfs_alloc_cancel_autoreap(struct xfs_trans *tp,
struct xfs_alloc_autoreap *aarp);
void xfs_alloc_commit_autoreap(struct xfs_trans *tp,
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 6ef5ddd89600..a4ac37ba5d51 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -28,7 +28,7 @@ xfs_bnobt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_bnobt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp,
- cur->bc_ag.pag);
+ to_perag(cur->bc_group));
}
STATIC struct xfs_btree_cur *
@@ -36,29 +36,29 @@ xfs_cntbt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_cntbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp,
- cur->bc_ag.pag);
+ to_perag(cur->bc_group));
}
-
STATIC void
xfs_allocbt_set_root(
struct xfs_btree_cur *cur,
const union xfs_btree_ptr *ptr,
int inc)
{
- struct xfs_buf *agbp = cur->bc_ag.agbp;
- struct xfs_agf *agf = agbp->b_addr;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
+ struct xfs_buf *agbp = cur->bc_ag.agbp;
+ struct xfs_agf *agf = agbp->b_addr;
ASSERT(ptr->s != 0);
if (xfs_btree_is_bno(cur->bc_ops)) {
agf->agf_bno_root = ptr->s;
be32_add_cpu(&agf->agf_bno_level, inc);
- cur->bc_ag.pag->pagf_bno_level += inc;
+ pag->pagf_bno_level += inc;
} else {
agf->agf_cnt_root = ptr->s;
be32_add_cpu(&agf->agf_cnt_level, inc);
- cur->bc_ag.pag->pagf_cnt_level += inc;
+ pag->pagf_cnt_level += inc;
}
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
@@ -75,7 +75,7 @@ xfs_allocbt_alloc_block(
xfs_agblock_t bno;
/* Allocate the new block from the freelist. If we can't, give up. */
- error = xfs_alloc_get_freelist(cur->bc_ag.pag, cur->bc_tp,
+ error = xfs_alloc_get_freelist(to_perag(cur->bc_group), cur->bc_tp,
cur->bc_ag.agbp, &bno, 1);
if (error)
return error;
@@ -86,7 +86,7 @@ xfs_allocbt_alloc_block(
}
atomic64_inc(&cur->bc_mp->m_allocbt_blks);
- xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.pag, bno, 1, false);
+ xfs_extent_busy_reuse(cur->bc_group, bno, 1, false);
new->s = cpu_to_be32(bno);
@@ -104,78 +104,17 @@ xfs_allocbt_free_block(
int error;
bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
- error = xfs_alloc_put_freelist(cur->bc_ag.pag, cur->bc_tp, agbp, NULL,
- bno, 1);
+ error = xfs_alloc_put_freelist(to_perag(cur->bc_group), cur->bc_tp,
+ agbp, NULL, bno, 1);
if (error)
return error;
atomic64_dec(&cur->bc_mp->m_allocbt_blks);
- xfs_extent_busy_insert(cur->bc_tp, agbp->b_pag, bno, 1,
+ xfs_extent_busy_insert(cur->bc_tp, pag_group(agbp->b_pag), bno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD);
return 0;
}
-/*
- * Update the longest extent in the AGF
- */
-STATIC void
-xfs_allocbt_update_lastrec(
- struct xfs_btree_cur *cur,
- const struct xfs_btree_block *block,
- const union xfs_btree_rec *rec,
- int ptr,
- int reason)
-{
- struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
- struct xfs_perag *pag;
- __be32 len;
- int numrecs;
-
- ASSERT(!xfs_btree_is_bno(cur->bc_ops));
-
- switch (reason) {
- case LASTREC_UPDATE:
- /*
- * If this is the last leaf block and it's the last record,
- * then update the size of the longest extent in the AG.
- */
- if (ptr != xfs_btree_get_numrecs(block))
- return;
- len = rec->alloc.ar_blockcount;
- break;
- case LASTREC_INSREC:
- if (be32_to_cpu(rec->alloc.ar_blockcount) <=
- be32_to_cpu(agf->agf_longest))
- return;
- len = rec->alloc.ar_blockcount;
- break;
- case LASTREC_DELREC:
- numrecs = xfs_btree_get_numrecs(block);
- if (ptr <= numrecs)
- return;
- ASSERT(ptr == numrecs + 1);
-
- if (numrecs) {
- xfs_alloc_rec_t *rrp;
-
- rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
- len = rrp->ar_blockcount;
- } else {
- len = 0;
- }
-
- break;
- default:
- ASSERT(0);
- return;
- }
-
- agf->agf_longest = len;
- pag = cur->bc_ag.agbp->b_pag;
- pag->pagf_longest = be32_to_cpu(len);
- xfs_alloc_log_agf(cur->bc_tp, cur->bc_ag.agbp, XFS_AGF_LONGEST);
-}
-
STATIC int
xfs_allocbt_get_minrecs(
struct xfs_btree_cur *cur,
@@ -239,7 +178,7 @@ xfs_allocbt_init_ptr_from_cur(
{
struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
- ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agf->agf_seqno));
if (xfs_btree_is_bno(cur->bc_ops))
ptr->s = agf->agf_bno_root;
@@ -493,7 +432,6 @@ const struct xfs_btree_ops xfs_bnobt_ops = {
.set_root = xfs_allocbt_set_root,
.alloc_block = xfs_allocbt_alloc_block,
.free_block = xfs_allocbt_free_block,
- .update_lastrec = xfs_allocbt_update_lastrec,
.get_minrecs = xfs_allocbt_get_minrecs,
.get_maxrecs = xfs_allocbt_get_maxrecs,
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
@@ -511,7 +449,6 @@ const struct xfs_btree_ops xfs_bnobt_ops = {
const struct xfs_btree_ops xfs_cntbt_ops = {
.name = "cnt",
.type = XFS_BTREE_TYPE_AG,
- .geom_flags = XFS_BTGEO_LASTREC_UPDATE,
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
@@ -525,7 +462,6 @@ const struct xfs_btree_ops xfs_cntbt_ops = {
.set_root = xfs_allocbt_set_root,
.alloc_block = xfs_allocbt_alloc_block,
.free_block = xfs_allocbt_free_block,
- .update_lastrec = xfs_allocbt_update_lastrec,
.get_minrecs = xfs_allocbt_get_minrecs,
.get_maxrecs = xfs_allocbt_get_maxrecs,
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
@@ -556,7 +492,7 @@ xfs_bnobt_init_cursor(
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bnobt_ops,
mp->m_alloc_maxlevels, xfs_allocbt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_ag.agbp = agbp;
if (agbp) {
struct xfs_agf *agf = agbp->b_addr;
@@ -582,7 +518,7 @@ xfs_cntbt_init_cursor(
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_cntbt_ops,
mp->m_alloc_maxlevels, xfs_allocbt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_ag.agbp = agbp;
if (agbp) {
struct xfs_agf *agf = agbp->b_addr;
@@ -633,11 +569,11 @@ xfs_allocbt_block_maxrecs(
/*
* Calculate number of records in an alloc btree block.
*/
-int
+unsigned int
xfs_allocbt_maxrecs(
struct xfs_mount *mp,
- int blocklen,
- int leaf)
+ unsigned int blocklen,
+ bool leaf)
{
blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
return xfs_allocbt_block_maxrecs(blocklen, leaf);
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h
index 155b47f231ab..12647f9aaa6d 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.h
+++ b/fs/xfs/libxfs/xfs_alloc_btree.h
@@ -53,7 +53,8 @@ struct xfs_btree_cur *xfs_bnobt_init_cursor(struct xfs_mount *mp,
struct xfs_btree_cur *xfs_cntbt_init_cursor(struct xfs_mount *mp,
struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_perag *pag);
-extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
+unsigned int xfs_allocbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen,
+ bool leaf);
extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp,
unsigned long long len);
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 673a4b6d2e8d..17875ad865f5 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -26,6 +26,7 @@
#include "xfs_trace.h"
#include "xfs_attr_item.h"
#include "xfs_xattr.h"
+#include "xfs_parent.h"
struct kmem_cache *xfs_attr_intent_cache;
@@ -50,7 +51,6 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp);
-STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args);
/*
* Internal routines when attribute list is more than one block.
@@ -87,6 +87,8 @@ xfs_attr_is_leaf(
struct xfs_iext_cursor icur;
struct xfs_bmbt_irec imap;
+ ASSERT(!xfs_need_iread_extents(ifp));
+
if (ifp->if_nextents != 1 || ifp->if_format != XFS_DINODE_FMT_EXTENTS)
return false;
@@ -224,11 +226,21 @@ int
xfs_attr_get_ilocked(
struct xfs_da_args *args)
{
+ int error;
+
xfs_assert_ilocked(args->dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
if (!xfs_inode_hasattr(args->dp))
return -ENOATTR;
+ /*
+ * The incore attr fork iext tree must be loaded for xfs_attr_is_leaf
+ * to work correctly.
+ */
+ error = xfs_iread_extents(args->trans, args->dp, XFS_ATTR_FORK);
+ if (error)
+ return error;
+
if (args->dp->i_af.if_format == XFS_DINODE_FMT_LOCAL)
return xfs_attr_shortform_getvalue(args);
if (xfs_attr_is_leaf(args->dp))
@@ -264,9 +276,11 @@ xfs_attr_get(
if (xfs_is_shutdown(args->dp->i_mount))
return -EIO;
+ if (!args->owner)
+ args->owner = args->dp->i_ino;
args->geo = args->dp->i_mount->m_attr_geo;
args->whichfork = XFS_ATTR_FORK;
- args->hashval = xfs_da_hashname(args->name, args->namelen);
+ xfs_attr_sethash(args);
/* Entirely possible to look up a name which doesn't exist */
args->op_flags = XFS_DA_OP_OKNOENT;
@@ -314,26 +328,20 @@ xfs_attr_calc_size(
return nblks;
}
-/* Initialize transaction reservation for attr operations */
-void
-xfs_init_attr_trans(
- struct xfs_da_args *args,
- struct xfs_trans_res *tres,
- unsigned int *total)
+/* Initialize transaction reservation for an xattr set/replace/upsert */
+inline struct xfs_trans_res
+xfs_attr_set_resv(
+ const struct xfs_da_args *args)
{
- struct xfs_mount *mp = args->dp->i_mount;
-
- if (args->value) {
- tres->tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
- M_RES(mp)->tr_attrsetrt.tr_logres *
- args->total;
- tres->tr_logcount = XFS_ATTRSET_LOG_COUNT;
- tres->tr_logflags = XFS_TRANS_PERM_LOG_RES;
- *total = args->total;
- } else {
- *tres = M_RES(mp)->tr_attrrm;
- *total = XFS_ATTRRM_SPACE_RES(mp);
- }
+ struct xfs_mount *mp = args->dp->i_mount;
+ struct xfs_trans_res ret = {
+ .tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
+ M_RES(mp)->tr_attrsetrt.tr_logres * args->total,
+ .tr_logcount = XFS_ATTRSET_LOG_COUNT,
+ .tr_logflags = XFS_TRANS_PERM_LOG_RES,
+ };
+
+ return ret;
}
/*
@@ -363,7 +371,7 @@ xfs_attr_try_sf_addname(
* Commit the shortform mods, and we're done.
* NOTE: this is also the error path (EEXIST, etc).
*/
- if (!error && !(args->op_flags & XFS_DA_OP_NOTIME))
+ if (!error)
xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
if (xfs_has_wsync(dp->i_mount))
@@ -401,6 +409,77 @@ out:
return error;
}
+/* Compute the hash value for a user/root/secure extended attribute */
+xfs_dahash_t
+xfs_attr_hashname(
+ const uint8_t *name,
+ int namelen)
+{
+ return xfs_da_hashname(name, namelen);
+}
+
+/* Compute the hash value for any extended attribute from any namespace. */
+xfs_dahash_t
+xfs_attr_hashval(
+ struct xfs_mount *mp,
+ unsigned int attr_flags,
+ const uint8_t *name,
+ int namelen,
+ const void *value,
+ int valuelen)
+{
+ ASSERT(xfs_attr_check_namespace(attr_flags));
+
+ if (attr_flags & XFS_ATTR_PARENT)
+ return xfs_parent_hashattr(mp, name, namelen, value, valuelen);
+
+ return xfs_attr_hashname(name, namelen);
+}
+
+/* Save the current remote block info and clear the current pointers. */
+static void
+xfs_attr_save_rmt_blk(
+ struct xfs_da_args *args)
+{
+ args->blkno2 = args->blkno;
+ args->index2 = args->index;
+ args->rmtblkno2 = args->rmtblkno;
+ args->rmtblkcnt2 = args->rmtblkcnt;
+ args->rmtvaluelen2 = args->rmtvaluelen;
+ args->rmtblkno = 0;
+ args->rmtblkcnt = 0;
+ args->rmtvaluelen = 0;
+}
+
+/* Set stored info about a remote block */
+static void
+xfs_attr_restore_rmt_blk(
+ struct xfs_da_args *args)
+{
+ args->blkno = args->blkno2;
+ args->index = args->index2;
+ args->rmtblkno = args->rmtblkno2;
+ args->rmtblkcnt = args->rmtblkcnt2;
+ args->rmtvaluelen = args->rmtvaluelen2;
+}
+
+/*
+ * PPTR_REPLACE operations require the caller to set the old and new names and
+ * values explicitly. Update the canonical fields to the new name and value
+ * here now that the removal phase has finished.
+ */
+static void
+xfs_attr_update_pptr_replace_args(
+ struct xfs_da_args *args)
+{
+ ASSERT(args->new_namelen > 0);
+ args->name = args->new_name;
+ args->namelen = args->new_namelen;
+ args->value = args->new_value;
+ args->valuelen = args->new_valuelen;
+ xfs_attr_sethash(args);
+}
+
/*
* Handle the state change on completion of a multi-state attr operation.
*
@@ -418,58 +497,84 @@ xfs_attr_complete_op(
enum xfs_delattr_state replace_state)
{
struct xfs_da_args *args = attr->xattri_da_args;
- bool do_replace = args->op_flags & XFS_DA_OP_REPLACE;
+
+ if (!(args->op_flags & XFS_DA_OP_REPLACE))
+ replace_state = XFS_DAS_DONE;
+ else if (xfs_attr_intent_op(attr) == XFS_ATTRI_OP_FLAGS_PPTR_REPLACE)
+ xfs_attr_update_pptr_replace_args(args);
args->op_flags &= ~XFS_DA_OP_REPLACE;
args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
- if (do_replace)
- return replace_state;
-
- return XFS_DAS_DONE;
+ return replace_state;
}
+/*
+ * Try to add an attribute to an inode in leaf form.
+ */
static int
xfs_attr_leaf_addname(
struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
+ struct xfs_buf *bp;
int error;
ASSERT(xfs_attr_is_leaf(args->dp));
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp);
+ if (error)
+ return error;
+
/*
- * Use the leaf buffer we may already hold locked as a result of
- * a sf-to-leaf conversion.
+ * Look up the xattr name to set the insertion point for the new xattr.
*/
- error = xfs_attr_leaf_try_add(args);
-
- if (error == -ENOSPC) {
- error = xfs_attr3_leaf_to_node(args);
- if (error)
- return error;
+ error = xfs_attr3_leaf_lookup_int(bp, args);
+ switch (error) {
+ case -ENOATTR:
+ if (args->op_flags & XFS_DA_OP_REPLACE)
+ goto out_brelse;
+ break;
+ case -EEXIST:
+ if (!(args->op_flags & XFS_DA_OP_REPLACE))
+ goto out_brelse;
+ trace_xfs_attr_leaf_replace(args);
/*
- * We're not in leaf format anymore, so roll the transaction and
- * retry the add to the newly allocated node block.
+ * Save the existing remote attr state so that the current
+ * values reflect the state of the new attribute we are about to
+ * add, not the attribute we just found and will remove later.
*/
- attr->xattri_dela_state = XFS_DAS_NODE_ADD;
- goto out;
+ xfs_attr_save_rmt_blk(args);
+ break;
+ case 0:
+ break;
+ default:
+ goto out_brelse;
}
- if (error)
- return error;
/*
* We need to commit and roll if we need to allocate remote xattr blocks
* or perform more xattr manipulations. Otherwise there is nothing more
* to do and we can return success.
*/
- if (args->rmtblkno)
+ if (!xfs_attr3_leaf_add(bp, args)) {
+ error = xfs_attr3_leaf_to_node(args);
+ if (error)
+ return error;
+
+ attr->xattri_dela_state = XFS_DAS_NODE_ADD;
+ } else if (args->rmtblkno) {
attr->xattri_dela_state = XFS_DAS_LEAF_SET_RMT;
- else
- attr->xattri_dela_state = xfs_attr_complete_op(attr,
- XFS_DAS_LEAF_REPLACE);
-out:
+ } else {
+ attr->xattri_dela_state =
+ xfs_attr_complete_op(attr, XFS_DAS_LEAF_REPLACE);
+ }
+
trace_xfs_attr_leaf_addname_return(attr->xattri_dela_state, args->dp);
+ return 0;
+
+out_brelse:
+ xfs_trans_brelse(args->trans, bp);
return error;
}
@@ -492,7 +597,7 @@ xfs_attr_node_addname(
return error;
error = xfs_attr_node_try_addname(attr);
- if (error == -ENOSPC) {
+ if (error == 1) {
error = xfs_attr3_leaf_to_node(args);
if (error)
return error;
@@ -647,8 +752,8 @@ xfs_attr_leaf_remove_attr(
int forkoff;
int error;
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
- &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner,
+ args->blkno, &bp);
if (error)
return error;
@@ -679,7 +784,7 @@ xfs_attr_leaf_shrink(
if (!xfs_attr_is_leaf(dp))
return 0;
- error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp);
if (error)
return error;
@@ -868,6 +973,11 @@ xfs_attr_lookup(
return -ENOATTR;
}
+ /* Prerequisite for xfs_attr_is_leaf */
+ error = xfs_iread_extents(args->trans, args->dp, XFS_ATTR_FORK);
+ if (error)
+ return error;
+
if (xfs_attr_is_leaf(dp)) {
error = xfs_attr_leaf_hasname(args, &bp);
@@ -883,74 +993,75 @@ xfs_attr_lookup(
return error;
}
-static void
-xfs_attr_defer_add(
- struct xfs_da_args *args,
- unsigned int op_flags)
+int
+xfs_attr_add_fork(
+ struct xfs_inode *ip, /* incore inode pointer */
+ int size, /* space new attribute needs */
+ int rsvd) /* xact may use reserved blks */
{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp; /* transaction pointer */
+ unsigned int blks; /* space reservation */
+ int error; /* error return value */
- struct xfs_attr_intent *new;
+ if (xfs_is_metadir_inode(ip))
+ ASSERT(XFS_IS_DQDETACHED(ip));
+ else
+ ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
- new = kmem_cache_zalloc(xfs_attr_intent_cache,
- GFP_KERNEL | __GFP_NOFAIL);
- new->xattri_op_flags = op_flags;
- new->xattri_da_args = args;
+ blks = XFS_ADDAFORK_SPACE_RES(mp);
- switch (op_flags) {
- case XFS_ATTRI_OP_FLAGS_SET:
- new->xattri_dela_state = xfs_attr_init_add_state(args);
- break;
- case XFS_ATTRI_OP_FLAGS_REPLACE:
- new->xattri_dela_state = xfs_attr_init_replace_state(args);
- break;
- case XFS_ATTRI_OP_FLAGS_REMOVE:
- new->xattri_dela_state = xfs_attr_init_remove_state(args);
- break;
- default:
- ASSERT(0);
- }
+ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
+ rsvd, &tp);
+ if (error)
+ return error;
+
+ if (xfs_inode_has_attr_fork(ip))
+ goto trans_cancel;
+
+ error = xfs_bmap_add_attrfork(tp, ip, size, rsvd);
+ if (error)
+ goto trans_cancel;
- xfs_defer_add(args->trans, &new->xattri_list, &xfs_attr_defer_type);
- trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp);
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+
+trans_cancel:
+ xfs_trans_cancel(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
}
/*
- * Note: If args->value is NULL the attribute will be removed, just like the
- * Linux ->setattr API.
+ * Make a change to the xattr structure.
+ *
+ * The caller must have initialized @args, attached dquots, and must not hold
+ * any ILOCKs. Reserved data blocks may be used if @rsvd is set.
+ *
+ * Returns -EEXIST for XFS_ATTRUPDATE_CREATE if the name already exists.
+ * Returns -ENOATTR for XFS_ATTRUPDATE_REMOVE if the name does not exist.
+ * Returns 0 on success, or a negative errno if something else went wrong.
*/
int
xfs_attr_set(
- struct xfs_da_args *args)
+ struct xfs_da_args *args,
+ enum xfs_attr_update op,
+ bool rsvd)
{
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
struct xfs_trans_res tres;
- bool rsvd = (args->attr_filter & XFS_ATTR_ROOT);
int error, local;
int rmt_blks = 0;
- unsigned int total;
-
- if (xfs_is_shutdown(dp->i_mount))
- return -EIO;
-
- error = xfs_qm_dqattach(dp);
- if (error)
- return error;
+ unsigned int total = 0;
- args->geo = mp->m_attr_geo;
- args->whichfork = XFS_ATTR_FORK;
- args->hashval = xfs_da_hashname(args->name, args->namelen);
+ ASSERT(!args->trans);
- /*
- * We have no control over the attribute names that userspace passes us
- * to remove, so we have to allow the name lookup prior to attribute
- * removal to fail as well. Preserve the logged flag, since we need
- * to pass that through to the logging code.
- */
- args->op_flags = XFS_DA_OP_OKNOENT |
- (args->op_flags & XFS_DA_OP_LOGGED);
-
- if (args->value) {
+ switch (op) {
+ case XFS_ATTRUPDATE_UPSERT:
+ case XFS_ATTRUPDATE_CREATE:
+ case XFS_ATTRUPDATE_REPLACE:
XFS_STATS_INC(mp, xs_attr_set);
args->total = xfs_attr_calc_size(args, &local);
@@ -963,33 +1074,36 @@ xfs_attr_set(
xfs_attr_sf_entsize_byname(args->namelen,
args->valuelen);
- error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
+ error = xfs_attr_add_fork(dp, sf_size, rsvd);
if (error)
return error;
}
if (!local)
rmt_blks = xfs_attr3_rmt_blocks(mp, args->valuelen);
- } else {
+
+ tres = xfs_attr_set_resv(args);
+ total = args->total;
+ break;
+ case XFS_ATTRUPDATE_REMOVE:
XFS_STATS_INC(mp, xs_attr_remove);
- rmt_blks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
+ rmt_blks = xfs_attr3_max_rmt_blocks(mp);
+ tres = M_RES(mp)->tr_attrrm;
+ total = XFS_ATTRRM_SPACE_RES(mp);
+ break;
}
/*
* Root fork attributes can use reserved data blocks for this
* operation if necessary
*/
- xfs_init_attr_trans(args, &tres, &total);
error = xfs_trans_alloc_inode(dp, &tres, total, 0, rsvd, &args->trans);
if (error)
return error;
- if (args->value || xfs_inode_hasattr(dp)) {
- error = xfs_iext_count_may_overflow(dp, XFS_ATTR_FORK,
+ if (op != XFS_ATTRUPDATE_REMOVE || xfs_inode_hasattr(dp)) {
+ error = xfs_iext_count_extend(args->trans, dp, XFS_ATTR_FORK,
XFS_IEXT_ATTR_MANIP_CNT(rmt_blks));
- if (error == -EFBIG)
- error = xfs_iext_count_upgrade(args->trans, dp,
- XFS_IEXT_ATTR_MANIP_CNT(rmt_blks));
if (error)
goto out_trans_cancel;
}
@@ -997,26 +1111,26 @@ xfs_attr_set(
error = xfs_attr_lookup(args);
switch (error) {
case -EEXIST:
- if (!args->value) {
+ if (op == XFS_ATTRUPDATE_REMOVE) {
/* if no value, we are performing a remove operation */
- xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REMOVE);
+ xfs_attr_defer_add(args, XFS_ATTR_DEFER_REMOVE);
break;
}
/* Pure create fails if the attr already exists */
- if (args->attr_flags & XATTR_CREATE)
+ if (op == XFS_ATTRUPDATE_CREATE)
goto out_trans_cancel;
- xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REPLACE);
+ xfs_attr_defer_add(args, XFS_ATTR_DEFER_REPLACE);
break;
case -ENOATTR:
/* Can't remove what isn't there. */
- if (!args->value)
+ if (op == XFS_ATTRUPDATE_REMOVE)
goto out_trans_cancel;
/* Pure replace fails if no existing attr to replace. */
- if (args->attr_flags & XATTR_REPLACE)
+ if (op == XFS_ATTRUPDATE_REPLACE)
goto out_trans_cancel;
- xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_SET);
+ xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET);
break;
default:
goto out_trans_cancel;
@@ -1029,8 +1143,7 @@ xfs_attr_set(
if (xfs_has_wsync(mp))
xfs_trans_set_sync(args->trans);
- if (!(args->op_flags & XFS_DA_OP_NOTIME))
- xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
+ xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
/*
* Commit the last in the sequence of transactions.
@@ -1039,6 +1152,7 @@ xfs_attr_set(
error = xfs_trans_commit(args->trans);
out_unlock:
xfs_iunlock(dp, XFS_ILOCK_EXCL);
+ args->trans = NULL;
return error;
out_trans_cancel:
@@ -1051,7 +1165,7 @@ out_trans_cancel:
* External routines when attribute list is inside the inode
*========================================================================*/
-static inline int xfs_attr_sf_totsize(struct xfs_inode *dp)
+int xfs_attr_sf_totsize(struct xfs_inode *dp)
{
struct xfs_attr_sf_hdr *sf = dp->i_af.if_data;
@@ -1110,88 +1224,6 @@ xfs_attr_shortform_addname(
* External routines when attribute list is one block
*========================================================================*/
-/* Save the current remote block info and clear the current pointers. */
-static void
-xfs_attr_save_rmt_blk(
- struct xfs_da_args *args)
-{
- args->blkno2 = args->blkno;
- args->index2 = args->index;
- args->rmtblkno2 = args->rmtblkno;
- args->rmtblkcnt2 = args->rmtblkcnt;
- args->rmtvaluelen2 = args->rmtvaluelen;
- args->rmtblkno = 0;
- args->rmtblkcnt = 0;
- args->rmtvaluelen = 0;
-}
-
-/* Set stored info about a remote block */
-static void
-xfs_attr_restore_rmt_blk(
- struct xfs_da_args *args)
-{
- args->blkno = args->blkno2;
- args->index = args->index2;
- args->rmtblkno = args->rmtblkno2;
- args->rmtblkcnt = args->rmtblkcnt2;
- args->rmtvaluelen = args->rmtvaluelen2;
-}
-
-/*
- * Tries to add an attribute to an inode in leaf form
- *
- * This function is meant to execute as part of a delayed operation and leaves
- * the transaction handling to the caller. On success the attribute is added
- * and the inode and transaction are left dirty. If there is not enough space,
- * the attr data is converted to node format and -ENOSPC is returned. Caller is
- * responsible for handling the dirty inode and transaction or adding the attr
- * in node format.
- */
-STATIC int
-xfs_attr_leaf_try_add(
- struct xfs_da_args *args)
-{
- struct xfs_buf *bp;
- int error;
-
- error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
- if (error)
- return error;
-
- /*
- * Look up the xattr name to set the insertion point for the new xattr.
- */
- error = xfs_attr3_leaf_lookup_int(bp, args);
- switch (error) {
- case -ENOATTR:
- if (args->op_flags & XFS_DA_OP_REPLACE)
- goto out_brelse;
- break;
- case -EEXIST:
- if (!(args->op_flags & XFS_DA_OP_REPLACE))
- goto out_brelse;
-
- trace_xfs_attr_leaf_replace(args);
- /*
- * Save the existing remote attr state so that the current
- * values reflect the state of the new attribute we are about to
- * add, not the attribute we just found and will remove later.
- */
- xfs_attr_save_rmt_blk(args);
- break;
- case 0:
- break;
- default:
- goto out_brelse;
- }
-
- return xfs_attr3_leaf_add(bp, args);
-
-out_brelse:
- xfs_trans_brelse(args->trans, bp);
- return error;
-}
-
/*
* Return EEXIST if attr is found, or ENOATTR if not
*/
@@ -1202,7 +1234,7 @@ xfs_attr_leaf_hasname(
{
int error = 0;
- error = xfs_attr3_leaf_read(args->trans, args->dp, 0, bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, bp);
if (error)
return error;
@@ -1357,9 +1389,12 @@ error:
/*
* Add a name to a Btree-format attribute list.
*
- * This will involve walking down the Btree, and may involve splitting
- * leaf nodes and even splitting intermediate nodes up to and including
- * the root node (a special case of an intermediate node).
+ * This will involve walking down the Btree, and may involve splitting leaf
+ * nodes and even splitting intermediate nodes up to and including the root
+ * node (a special case of an intermediate node).
+ *
+ * If the tree was still in single leaf format and needs to converted to
+ * real node format return 1 and let the caller handle that.
*/
static int
xfs_attr_node_try_addname(
@@ -1367,21 +1402,21 @@ xfs_attr_node_try_addname(
{
struct xfs_da_state *state = attr->xattri_da_state;
struct xfs_da_state_blk *blk;
- int error;
+ int error = 0;
trace_xfs_attr_node_addname(state->args);
blk = &state->path.blk[state->path.active-1];
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
- error = xfs_attr3_leaf_add(blk->bp, state->args);
- if (error == -ENOSPC) {
+ if (!xfs_attr3_leaf_add(blk->bp, state->args)) {
if (state->path.active == 1) {
/*
* Its really a single leaf node, but it had
* out-of-line values so it looked like it *might*
* have been a b-tree. Let the caller deal with this.
*/
+ error = 1;
goto out;
}
@@ -1511,12 +1546,23 @@ out_release:
return error;
}
+/* Enforce that there is at most one namespace bit per attr. */
+inline bool xfs_attr_check_namespace(unsigned int attr_flags)
+{
+ return hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) < 2;
+}
+
/* Returns true if the attribute entry name is valid. */
bool
xfs_attr_namecheck(
+ unsigned int attr_flags,
const void *name,
size_t length)
{
+ /* Only one namespace bit allowed. */
+ if (!xfs_attr_check_namespace(attr_flags))
+ return false;
+
/*
* MAXNAMELEN includes the trailing null, but (name/length) leave it
* out, so use >= for the length check.
@@ -1524,6 +1570,10 @@ xfs_attr_namecheck(
if (length >= MAXNAMELEN)
return false;
+ /* Parent pointers have their own validation. */
+ if (attr_flags & XFS_ATTR_PARENT)
+ return xfs_parent_namecheck(attr_flags, name, length);
+
/* There shouldn't be any nulls here */
return !memchr(name, 0, length);
}
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 81be9b3e4004..0e51d0723f9a 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -47,8 +47,9 @@ struct xfs_attrlist_cursor_kern {
/* void; state communicated via *context */
-typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
- unsigned char *, int, int);
+typedef void (*put_listent_func_t)(struct xfs_attr_list_context *context,
+ int flags, unsigned char *name, int namelen, void *value,
+ int valuelen);
struct xfs_attr_list_context {
struct xfs_trans *tp;
@@ -510,8 +511,8 @@ struct xfs_attr_intent {
struct xfs_da_args *xattri_da_args;
/*
- * Shared buffer containing the attr name and value so that the logging
- * code can share large memory buffers between log items.
+ * Shared buffer containing the attr name, new name, and value so that
+ * the logging code can share large memory buffers between log items.
*/
struct xfs_attri_log_nameval *xattri_nameval;
@@ -529,6 +530,11 @@ struct xfs_attr_intent {
struct xfs_bmbt_irec xattri_map;
};
+static inline unsigned int
+xfs_attr_intent_op(const struct xfs_attr_intent *attr)
+{
+ return attr->xattri_op_flags & XFS_ATTRI_OP_FLAGS_TYPE_MASK;
+}
/*========================================================================
* Function prototypes for the kernel.
@@ -544,13 +550,22 @@ int xfs_inode_hasattr(struct xfs_inode *ip);
bool xfs_attr_is_leaf(struct xfs_inode *ip);
int xfs_attr_get_ilocked(struct xfs_da_args *args);
int xfs_attr_get(struct xfs_da_args *args);
-int xfs_attr_set(struct xfs_da_args *args);
+
+enum xfs_attr_update {
+ XFS_ATTRUPDATE_REMOVE, /* remove attr */
+ XFS_ATTRUPDATE_UPSERT, /* set value, replace any existing attr */
+ XFS_ATTRUPDATE_CREATE, /* set value, fail if attr already exists */
+ XFS_ATTRUPDATE_REPLACE, /* set value, fail if attr does not exist */
+};
+
+int xfs_attr_set(struct xfs_da_args *args, enum xfs_attr_update op, bool rsvd);
int xfs_attr_set_iter(struct xfs_attr_intent *attr);
int xfs_attr_remove_iter(struct xfs_attr_intent *attr);
-bool xfs_attr_namecheck(const void *name, size_t length);
+bool xfs_attr_check_namespace(unsigned int attr_flags);
+bool xfs_attr_namecheck(unsigned int attr_flags, const void *name,
+ size_t length);
int xfs_attr_calc_size(struct xfs_da_args *args, int *local);
-void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres,
- unsigned int *total);
+struct xfs_trans_res xfs_attr_set_resv(const struct xfs_da_args *args);
/*
* Check to see if the attr should be upgraded from non-existent or shortform to
@@ -590,7 +605,6 @@ xfs_attr_init_add_state(struct xfs_da_args *args)
static inline enum xfs_delattr_state
xfs_attr_init_remove_state(struct xfs_da_args *args)
{
- args->op_flags |= XFS_DA_OP_REMOVE;
if (xfs_attr_is_shortform(args->dp))
return XFS_DAS_SF_REMOVE;
if (xfs_attr_is_leaf(args->dp))
@@ -614,8 +628,25 @@ xfs_attr_init_replace_state(struct xfs_da_args *args)
return xfs_attr_init_add_state(args);
}
+xfs_dahash_t xfs_attr_hashname(const uint8_t *name, int namelen);
+
+xfs_dahash_t xfs_attr_hashval(struct xfs_mount *mp, unsigned int attr_flags,
+ const uint8_t *name, int namelen, const void *value,
+ int valuelen);
+
+/* Set the hash value for any extended attribute from any namespace. */
+static inline void xfs_attr_sethash(struct xfs_da_args *args)
+{
+ args->hashval = xfs_attr_hashval(args->dp->i_mount, args->attr_filter,
+ args->name, args->namelen,
+ args->value, args->valuelen);
+}
+
extern struct kmem_cache *xfs_attr_intent_cache;
int __init xfs_attr_intent_init_cache(void);
void xfs_attr_intent_destroy_cache(void);
+int xfs_attr_sf_totsize(struct xfs_inode *dp);
+int xfs_attr_add_fork(struct xfs_inode *ip, int size, int rsvd);
+
#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index ac904cc1a97b..fddb55605e0c 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -47,7 +47,7 @@
*/
STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args,
xfs_dablk_t which_block, struct xfs_buf **bpp);
-STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
+STATIC void xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
struct xfs_attr3_icleaf_hdr *ichdr,
struct xfs_da_args *args, int freemap_index);
STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args,
@@ -388,6 +388,27 @@ xfs_attr3_leaf_verify(
return NULL;
}
+xfs_failaddr_t
+xfs_attr3_leaf_header_check(
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
+{
+ struct xfs_mount *mp = bp->b_mount;
+
+ if (xfs_has_crc(mp)) {
+ struct xfs_attr3_leafblock *hdr3 = bp->b_addr;
+
+ if (hdr3->hdr.info.hdr.magic !=
+ cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
+ return __this_address;
+
+ if (be64_to_cpu(hdr3->hdr.info.owner) != owner)
+ return __this_address;
+ }
+
+ return NULL;
+}
+
static void
xfs_attr3_leaf_write_verify(
struct xfs_buf *bp)
@@ -448,16 +469,30 @@ int
xfs_attr3_leaf_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t bno,
struct xfs_buf **bpp)
{
+ xfs_failaddr_t fa;
int err;
err = xfs_da_read_buf(tp, dp, bno, 0, bpp, XFS_ATTR_FORK,
&xfs_attr3_leaf_buf_ops);
- if (!err && tp && *bpp)
+ if (err || !(*bpp))
+ return err;
+
+ fa = xfs_attr3_leaf_header_check(*bpp, owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(*bpp, fa);
+ xfs_trans_brelse(tp, *bpp);
+ *bpp = NULL;
+ xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
+ return -EFSCORRUPTED;
+ }
+
+ if (tp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF);
- return err;
+ return 0;
}
/*========================================================================
@@ -472,28 +507,57 @@ xfs_attr3_leaf_read(
* INCOMPLETE flag will not be set in attr->attr_filter, but rather
* XFS_DA_OP_RECOVERY will be set in args->op_flags.
*/
+static inline unsigned int xfs_attr_match_mask(const struct xfs_da_args *args)
+{
+ if (args->op_flags & XFS_DA_OP_RECOVERY)
+ return XFS_ATTR_NSP_ONDISK_MASK;
+ return XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE;
+}
+
+static inline bool
+xfs_attr_parent_match(
+ const struct xfs_da_args *args,
+ const void *value,
+ unsigned int valuelen)
+{
+ ASSERT(args->value != NULL);
+
+ /* Parent pointers do not use remote values */
+ if (!value)
+ return false;
+
+ /*
+ * The only value we support is a parent rec. However, we'll accept
+ * any valuelen so that offline repair can delete ATTR_PARENT values
+ * that are not parent pointers.
+ */
+ if (valuelen != args->valuelen)
+ return false;
+
+ return memcmp(args->value, value, valuelen) == 0;
+}
+
static bool
xfs_attr_match(
struct xfs_da_args *args,
- uint8_t namelen,
- unsigned char *name,
- int flags)
+ unsigned int attr_flags,
+ const unsigned char *name,
+ unsigned int namelen,
+ const void *value,
+ unsigned int valuelen)
{
+ unsigned int mask = xfs_attr_match_mask(args);
if (args->namelen != namelen)
return false;
+ if ((args->attr_filter & mask) != (attr_flags & mask))
+ return false;
if (memcmp(args->name, name, namelen) != 0)
return false;
- /* Recovery ignores the INCOMPLETE flag. */
- if ((args->op_flags & XFS_DA_OP_RECOVERY) &&
- args->attr_filter == (flags & XFS_ATTR_NSP_ONDISK_MASK))
- return true;
+ if (attr_flags & XFS_ATTR_PARENT)
+ return xfs_attr_parent_match(args, value, valuelen);
- /* All remaining matches need to be filtered by INCOMPLETE state. */
- if (args->attr_filter !=
- (flags & (XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE)))
- return false;
return true;
}
@@ -504,6 +568,13 @@ xfs_attr_copy_value(
int valuelen)
{
/*
+ * Parent pointer lookups require the caller to specify the name and
+ * value, so don't copy anything.
+ */
+ if (args->attr_filter & XFS_ATTR_PARENT)
+ return 0;
+
+ /*
* No copy if all we have to do is get the length
*/
if (!args->valuelen) {
@@ -615,7 +686,7 @@ xfs_attr_shortform_bytesfit(
*/
if (!dp->i_forkoff && dp->i_df.if_bytes >
xfs_default_attroffset(dp))
- dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
+ dsize = xfs_bmdr_space_calc(MINDBTPTRS);
break;
case XFS_DINODE_FMT_BTREE:
/*
@@ -629,7 +700,7 @@ xfs_attr_shortform_bytesfit(
return 0;
return dp->i_forkoff;
}
- dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot);
+ dsize = xfs_bmap_bmdr_space(dp->i_df.if_broot);
break;
}
@@ -637,11 +708,11 @@ xfs_attr_shortform_bytesfit(
* A data fork btree root must have space for at least
* MINDBTPTRS key/ptr pairs if the data fork is small or empty.
*/
- minforkoff = max_t(int64_t, dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
+ minforkoff = max_t(int64_t, dsize, xfs_bmdr_space_calc(MINDBTPTRS));
minforkoff = roundup(minforkoff, 8) >> 3;
/* attr fork btree root can have at least this many key/ptr pairs */
- maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
+ maxforkoff = XFS_LITINO(mp) - xfs_bmdr_space_calc(MINABTPTRS);
maxforkoff = maxforkoff >> 3; /* rounded down */
if (offset >= maxforkoff)
@@ -711,8 +782,9 @@ xfs_attr_sf_findname(
for (sfe = xfs_attr_sf_firstentry(sf);
sfe < xfs_attr_sf_endptr(sf);
sfe = xfs_attr_sf_nextentry(sfe)) {
- if (xfs_attr_match(args, sfe->namelen, sfe->nameval,
- sfe->flags))
+ if (xfs_attr_match(args, sfe->flags, sfe->nameval,
+ sfe->namelen, &sfe->nameval[sfe->namelen],
+ sfe->valuelen))
return sfe;
}
@@ -819,7 +891,8 @@ xfs_attr_sf_removename(
*/
if (totsize == sizeof(struct xfs_attr_sf_hdr) && xfs_has_attr2(mp) &&
(dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
- !(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE))) {
+ !(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE)) &&
+ !xfs_has_parent(mp)) {
xfs_attr_fork_remove(dp, args->trans);
} else {
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
@@ -828,7 +901,8 @@ xfs_attr_sf_removename(
ASSERT(totsize > sizeof(struct xfs_attr_sf_hdr) ||
(args->op_flags & XFS_DA_OP_ADDNAME) ||
!xfs_has_attr2(mp) ||
- dp->i_df.if_format == XFS_DINODE_FMT_BTREE);
+ dp->i_df.if_format == XFS_DINODE_FMT_BTREE ||
+ xfs_has_parent(mp));
xfs_trans_log_inode(args->trans, dp,
XFS_ILOG_CORE | XFS_ILOG_ADATA);
}
@@ -904,6 +978,7 @@ xfs_attr_shortform_to_leaf(
nargs.whichfork = XFS_ATTR_FORK;
nargs.trans = args->trans;
nargs.op_flags = XFS_DA_OP_OKNOENT;
+ nargs.owner = args->owner;
sfe = xfs_attr_sf_firstentry(sf);
for (i = 0; i < sf->count; i++) {
@@ -911,15 +986,17 @@ xfs_attr_shortform_to_leaf(
nargs.namelen = sfe->namelen;
nargs.value = &sfe->nameval[nargs.namelen];
nargs.valuelen = sfe->valuelen;
- nargs.hashval = xfs_da_hashname(sfe->nameval,
- sfe->namelen);
nargs.attr_filter = sfe->flags & XFS_ATTR_NSP_ONDISK_MASK;
+ if (!xfs_attr_check_namespace(sfe->flags)) {
+ xfs_da_mark_sick(args);
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+ xfs_attr_sethash(&nargs);
error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
ASSERT(error == -ENOATTR);
- error = xfs_attr3_leaf_add(bp, &nargs);
- ASSERT(error != -ENOSPC);
- if (error)
- goto out;
+ if (!xfs_attr3_leaf_add(bp, &nargs))
+ ASSERT(0);
sfe = xfs_attr_sf_nextentry(sfe);
}
error = 0;
@@ -1027,7 +1104,7 @@ xfs_attr_shortform_verify(
* one namespace flag per xattr, so we can just count the
* bits (i.e. hweight) here.
*/
- if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1)
+ if (!xfs_attr_check_namespace(sfep->flags))
return __this_address;
sfep = next_sfep;
@@ -1059,10 +1136,7 @@ xfs_attr3_leaf_to_shortform(
trace_xfs_attr_leaf_to_sf(args);
- tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
- if (!tmpbuffer)
- return -ENOMEM;
-
+ tmpbuffer = kvmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
leaf = (xfs_attr_leafblock_t *)tmpbuffer;
@@ -1106,6 +1180,7 @@ xfs_attr3_leaf_to_shortform(
nargs.whichfork = XFS_ATTR_FORK;
nargs.trans = args->trans;
nargs.op_flags = XFS_DA_OP_OKNOENT;
+ nargs.owner = args->owner;
for (i = 0; i < ichdr.count; entry++, i++) {
if (entry->flags & XFS_ATTR_INCOMPLETE)
@@ -1125,7 +1200,7 @@ xfs_attr3_leaf_to_shortform(
error = 0;
out:
- kfree(tmpbuffer);
+ kvfree(tmpbuffer);
return error;
}
@@ -1158,7 +1233,7 @@ xfs_attr3_leaf_to_node(
error = xfs_da_grow_inode(args, &blkno);
if (error)
goto out;
- error = xfs_attr3_leaf_read(args->trans, dp, 0, &bp1);
+ error = xfs_attr3_leaf_read(args->trans, dp, args->owner, 0, &bp1);
if (error)
goto out;
@@ -1237,7 +1312,7 @@ xfs_attr3_leaf_create(
ichdr.magic = XFS_ATTR3_LEAF_MAGIC;
hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp));
- hdr3->owner = cpu_to_be64(dp->i_ino);
+ hdr3->owner = cpu_to_be64(args->owner);
uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
ichdr.freemap[0].base = sizeof(struct xfs_attr3_leaf_hdr);
@@ -1256,6 +1331,9 @@ xfs_attr3_leaf_create(
/*
* Split the leaf node, rebalance, then add the new entry.
+ *
+ * Returns 0 if the entry was added, 1 if a further split is needed or a
+ * negative error number otherwise.
*/
int
xfs_attr3_leaf_split(
@@ -1263,8 +1341,9 @@ xfs_attr3_leaf_split(
struct xfs_da_state_blk *oldblk,
struct xfs_da_state_blk *newblk)
{
- xfs_dablk_t blkno;
- int error;
+ bool added;
+ xfs_dablk_t blkno;
+ int error;
trace_xfs_attr_leaf_split(state->args);
@@ -1299,10 +1378,10 @@ xfs_attr3_leaf_split(
*/
if (state->inleaf) {
trace_xfs_attr_leaf_add_old(state->args);
- error = xfs_attr3_leaf_add(oldblk->bp, state->args);
+ added = xfs_attr3_leaf_add(oldblk->bp, state->args);
} else {
trace_xfs_attr_leaf_add_new(state->args);
- error = xfs_attr3_leaf_add(newblk->bp, state->args);
+ added = xfs_attr3_leaf_add(newblk->bp, state->args);
}
/*
@@ -1310,13 +1389,15 @@ xfs_attr3_leaf_split(
*/
oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
- return error;
+ if (!added)
+ return 1;
+ return 0;
}
/*
* Add a name to the leaf attribute list structure.
*/
-int
+bool
xfs_attr3_leaf_add(
struct xfs_buf *bp,
struct xfs_da_args *args)
@@ -1325,6 +1406,7 @@ xfs_attr3_leaf_add(
struct xfs_attr3_icleaf_hdr ichdr;
int tablesize;
int entsize;
+ bool added = true;
int sum;
int tmp;
int i;
@@ -1353,7 +1435,7 @@ xfs_attr3_leaf_add(
if (ichdr.freemap[i].base < ichdr.firstused)
tmp += sizeof(xfs_attr_leaf_entry_t);
if (ichdr.freemap[i].size >= tmp) {
- tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
+ xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
goto out_log_hdr;
}
sum += ichdr.freemap[i].size;
@@ -1365,7 +1447,7 @@ xfs_attr3_leaf_add(
* no good and we should just give up.
*/
if (!ichdr.holes && sum < entsize)
- return -ENOSPC;
+ return false;
/*
* Compact the entries to coalesce free space.
@@ -1378,24 +1460,24 @@ xfs_attr3_leaf_add(
* free region, in freemap[0]. If it is not big enough, give up.
*/
if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
- tmp = -ENOSPC;
+ added = false;
goto out_log_hdr;
}
- tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
+ xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
out_log_hdr:
xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, &leaf->hdr,
xfs_attr3_leaf_hdr_size(leaf)));
- return tmp;
+ return added;
}
/*
* Add a name to a leaf attribute list structure.
*/
-STATIC int
+STATIC void
xfs_attr3_leaf_add_work(
struct xfs_buf *bp,
struct xfs_attr3_icleaf_hdr *ichdr,
@@ -1513,7 +1595,6 @@ xfs_attr3_leaf_add_work(
}
}
ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
- return 0;
}
/*
@@ -1533,7 +1614,7 @@ xfs_attr3_leaf_compact(
trace_xfs_attr_leaf_compact(args);
- tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
+ tmpbuffer = kvmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
memset(bp->b_addr, 0, args->geo->blksize);
leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
@@ -1571,7 +1652,7 @@ xfs_attr3_leaf_compact(
*/
xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1);
- kfree(tmpbuffer);
+ kvfree(tmpbuffer);
}
/*
@@ -1993,7 +2074,7 @@ xfs_attr3_leaf_toosmall(
if (blkno == 0)
continue;
error = xfs_attr3_leaf_read(state->args->trans, state->args->dp,
- blkno, &bp);
+ state->args->owner, blkno, &bp);
if (error)
return error;
@@ -2250,7 +2331,7 @@ xfs_attr3_leaf_unbalance(
struct xfs_attr_leafblock *tmp_leaf;
struct xfs_attr3_icleaf_hdr tmphdr;
- tmp_leaf = kzalloc(state->args->geo->blksize,
+ tmp_leaf = kvzalloc(state->args->geo->blksize,
GFP_KERNEL | __GFP_NOFAIL);
/*
@@ -2291,7 +2372,7 @@ xfs_attr3_leaf_unbalance(
}
memcpy(save_leaf, tmp_leaf, state->args->geo->blksize);
savehdr = tmphdr; /* struct copy */
- kfree(tmp_leaf);
+ kvfree(tmp_leaf);
}
xfs_attr3_leaf_hdr_to_disk(state->args->geo, save_leaf, &savehdr);
@@ -2401,18 +2482,23 @@ xfs_attr3_leaf_lookup_int(
*/
if (entry->flags & XFS_ATTR_LOCAL) {
name_loc = xfs_attr3_leaf_name_local(leaf, probe);
- if (!xfs_attr_match(args, name_loc->namelen,
- name_loc->nameval, entry->flags))
+ if (!xfs_attr_match(args, entry->flags,
+ name_loc->nameval, name_loc->namelen,
+ &name_loc->nameval[name_loc->namelen],
+ be16_to_cpu(name_loc->valuelen)))
continue;
args->index = probe;
return -EEXIST;
} else {
+ unsigned int valuelen;
+
name_rmt = xfs_attr3_leaf_name_remote(leaf, probe);
- if (!xfs_attr_match(args, name_rmt->namelen,
- name_rmt->name, entry->flags))
+ valuelen = be32_to_cpu(name_rmt->valuelen);
+ if (!xfs_attr_match(args, entry->flags, name_rmt->name,
+ name_rmt->namelen, NULL, valuelen))
continue;
args->index = probe;
- args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
+ args->rmtvaluelen = valuelen;
args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
args->rmtblkcnt = xfs_attr3_rmt_blocks(
args->dp->i_mount,
@@ -2715,7 +2801,8 @@ xfs_attr3_leaf_clearflag(
/*
* Set up the operation.
*/
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner,
+ args->blkno, &bp);
if (error)
return error;
@@ -2779,7 +2866,8 @@ xfs_attr3_leaf_setflag(
/*
* Set up the operation.
*/
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner,
+ args->blkno, &bp);
if (error)
return error;
@@ -2838,7 +2926,8 @@ xfs_attr3_leaf_flipflags(
/*
* Read the block containing the "old" attr
*/
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp1);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner,
+ args->blkno, &bp1);
if (error)
return error;
@@ -2846,8 +2935,8 @@ xfs_attr3_leaf_flipflags(
* Read the block containing the "new" attr, if it is different
*/
if (args->blkno2 != args->blkno) {
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2,
- &bp2);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner,
+ args->blkno2, &bp2);
if (error)
return error;
} else {
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index 9b9948639c0f..589f810eedc0 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -76,7 +76,7 @@ int xfs_attr3_leaf_split(struct xfs_da_state *state,
int xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf,
struct xfs_da_args *args);
int xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
-int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
+bool xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
@@ -98,12 +98,14 @@ int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
struct xfs_buf *leaf2_bp);
int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);
int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t bno, struct xfs_buf **bpp);
+ xfs_ino_t owner, xfs_dablk_t bno, struct xfs_buf **bpp);
void xfs_attr3_leaf_hdr_from_disk(struct xfs_da_geometry *geo,
struct xfs_attr3_icleaf_hdr *to,
struct xfs_attr_leafblock *from);
void xfs_attr3_leaf_hdr_to_disk(struct xfs_da_geometry *geo,
struct xfs_attr_leafblock *to,
struct xfs_attr3_icleaf_hdr *from);
+xfs_failaddr_t xfs_attr3_leaf_header_check(struct xfs_buf *bp,
+ xfs_ino_t owner);
#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index ff0412828772..4c44ce1c8a64 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -43,19 +43,32 @@
* the logging system and therefore never have a log item.
*/
-/*
- * Each contiguous block has a header, so it is not just a simple attribute
- * length to FSB conversion.
- */
-int
+/* How many bytes can be stored in a remote value buffer? */
+inline unsigned int
+xfs_attr3_rmt_buf_space(
+ struct xfs_mount *mp)
+{
+ unsigned int blocksize = mp->m_attr_geo->blksize;
+
+ if (xfs_has_crc(mp))
+ return blocksize - sizeof(struct xfs_attr3_rmt_hdr);
+
+ return blocksize;
+}
+
+/* Compute number of fsblocks needed to store a remote attr value */
+unsigned int
xfs_attr3_rmt_blocks(
- struct xfs_mount *mp,
- int attrlen)
+ struct xfs_mount *mp,
+ unsigned int attrlen)
{
- if (xfs_has_crc(mp)) {
- int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
- return (attrlen + buflen - 1) / buflen;
- }
+ /*
+ * Each contiguous block has a header, so it is not just a simple
+ * attribute length to FSB conversion.
+ */
+ if (xfs_has_crc(mp))
+ return howmany(attrlen, xfs_attr3_rmt_buf_space(mp));
+
return XFS_B_TO_FSB(mp, attrlen);
}
@@ -92,7 +105,6 @@ xfs_attr3_rmt_verify(
struct xfs_mount *mp,
struct xfs_buf *bp,
void *ptr,
- int fsbsize,
xfs_daddr_t bno)
{
struct xfs_attr3_rmt_hdr *rmt = ptr;
@@ -103,7 +115,7 @@ xfs_attr3_rmt_verify(
return __this_address;
if (be64_to_cpu(rmt->rm_blkno) != bno)
return __this_address;
- if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
+ if (be32_to_cpu(rmt->rm_bytes) > mp->m_attr_geo->blksize - sizeof(*rmt))
return __this_address;
if (be32_to_cpu(rmt->rm_offset) +
be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
@@ -122,9 +134,9 @@ __xfs_attr3_rmt_read_verify(
{
struct xfs_mount *mp = bp->b_mount;
char *ptr;
- int len;
+ unsigned int len;
xfs_daddr_t bno;
- int blksize = mp->m_attr_geo->blksize;
+ unsigned int blksize = mp->m_attr_geo->blksize;
/* no verification of non-crc buffers */
if (!xfs_has_crc(mp))
@@ -141,7 +153,7 @@ __xfs_attr3_rmt_read_verify(
*failaddr = __this_address;
return -EFSBADCRC;
}
- *failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno);
+ *failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, bno);
if (*failaddr)
return -EFSCORRUPTED;
len -= blksize;
@@ -186,7 +198,7 @@ xfs_attr3_rmt_write_verify(
{
struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
- int blksize = mp->m_attr_geo->blksize;
+ unsigned int blksize = mp->m_attr_geo->blksize;
char *ptr;
int len;
xfs_daddr_t bno;
@@ -203,7 +215,7 @@ xfs_attr3_rmt_write_verify(
while (len > 0) {
struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
- fa = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno);
+ fa = xfs_attr3_rmt_verify(mp, bp, ptr, bno);
if (fa) {
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
@@ -280,30 +292,30 @@ xfs_attr_rmtval_copyout(
struct xfs_mount *mp,
struct xfs_buf *bp,
struct xfs_inode *dp,
- int *offset,
- int *valuelen,
+ xfs_ino_t owner,
+ unsigned int *offset,
+ unsigned int *valuelen,
uint8_t **dst)
{
char *src = bp->b_addr;
- xfs_ino_t ino = dp->i_ino;
xfs_daddr_t bno = xfs_buf_daddr(bp);
- int len = BBTOB(bp->b_length);
- int blksize = mp->m_attr_geo->blksize;
+ unsigned int len = BBTOB(bp->b_length);
+ unsigned int blksize = mp->m_attr_geo->blksize;
ASSERT(len >= blksize);
while (len > 0 && *valuelen > 0) {
- int hdr_size = 0;
- int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
+ unsigned int hdr_size = 0;
+ unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp);
byte_cnt = min(*valuelen, byte_cnt);
if (xfs_has_crc(mp)) {
- if (xfs_attr3_rmt_hdr_ok(src, ino, *offset,
+ if (xfs_attr3_rmt_hdr_ok(src, owner, *offset,
byte_cnt, bno)) {
xfs_alert(mp,
"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
- bno, *offset, byte_cnt, ino);
+ bno, *offset, byte_cnt, owner);
xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
return -EFSCORRUPTED;
}
@@ -330,20 +342,20 @@ xfs_attr_rmtval_copyin(
struct xfs_mount *mp,
struct xfs_buf *bp,
xfs_ino_t ino,
- int *offset,
- int *valuelen,
+ unsigned int *offset,
+ unsigned int *valuelen,
uint8_t **src)
{
char *dst = bp->b_addr;
xfs_daddr_t bno = xfs_buf_daddr(bp);
- int len = BBTOB(bp->b_length);
- int blksize = mp->m_attr_geo->blksize;
+ unsigned int len = BBTOB(bp->b_length);
+ unsigned int blksize = mp->m_attr_geo->blksize;
ASSERT(len >= blksize);
while (len > 0 && *valuelen > 0) {
- int hdr_size;
- int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
+ unsigned int hdr_size;
+ unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp);
byte_cnt = min(*valuelen, byte_cnt);
hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
@@ -389,12 +401,12 @@ xfs_attr_rmtval_get(
struct xfs_buf *bp;
xfs_dablk_t lblkno = args->rmtblkno;
uint8_t *dst = args->value;
- int valuelen;
+ unsigned int valuelen;
int nmap;
int error;
- int blkcnt = args->rmtblkcnt;
+ unsigned int blkcnt = args->rmtblkcnt;
int i;
- int offset = 0;
+ unsigned int offset = 0;
trace_xfs_attr_rmtval_get(args);
@@ -427,8 +439,7 @@ xfs_attr_rmtval_get(
return error;
error = xfs_attr_rmtval_copyout(mp, bp, args->dp,
- &offset, &valuelen,
- &dst);
+ args->owner, &offset, &valuelen, &dst);
xfs_buf_relse(bp);
if (error)
return error;
@@ -453,7 +464,7 @@ xfs_attr_rmt_find_hole(
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
int error;
- int blkcnt;
+ unsigned int blkcnt;
xfs_fileoff_t lfileoff = 0;
/*
@@ -482,11 +493,11 @@ xfs_attr_rmtval_set_value(
struct xfs_bmbt_irec map;
xfs_dablk_t lblkno;
uint8_t *src = args->value;
- int blkcnt;
- int valuelen;
+ unsigned int blkcnt;
+ unsigned int valuelen;
int nmap;
int error;
- int offset = 0;
+ unsigned int offset = 0;
/*
* Roll through the "value", copying the attribute value to the
@@ -522,8 +533,8 @@ xfs_attr_rmtval_set_value(
return error;
bp->b_ops = &xfs_attr3_rmt_buf_ops;
- xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
- &valuelen, &src);
+ xfs_attr_rmtval_copyin(mp, bp, args->owner, &offset, &valuelen,
+ &src);
error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
xfs_buf_relse(bp);
@@ -626,7 +637,6 @@ xfs_attr_rmtval_set_blk(
if (error)
return error;
- ASSERT(nmap == 1);
ASSERT((map->br_startblock != DELAYSTARTBLOCK) &&
(map->br_startblock != HOLESTARTBLOCK));
@@ -646,7 +656,7 @@ xfs_attr_rmtval_invalidate(
struct xfs_da_args *args)
{
xfs_dablk_t lblkno;
- int blkcnt;
+ unsigned int blkcnt;
int error;
/*
diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index d097ec6c4dc3..e3c6c7d774bf 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
@@ -6,7 +6,13 @@
#ifndef __XFS_ATTR_REMOTE_H__
#define __XFS_ATTR_REMOTE_H__
-int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
+unsigned int xfs_attr3_rmt_blocks(struct xfs_mount *mp, unsigned int attrlen);
+
+/* Number of rmt blocks needed to store the maximally sized attr value */
+static inline unsigned int xfs_attr3_max_rmt_blocks(struct xfs_mount *mp)
+{
+ return xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
+}
int xfs_attr_rmtval_get(struct xfs_da_args *args);
int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map,
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index bc4422223024..73bdc0e55682 100644
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -16,6 +16,7 @@ typedef struct xfs_attr_sf_sort {
uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
xfs_dahash_t hash; /* this entry's hash value */
unsigned char *name; /* name value, pointer into buffer */
+ void *value;
} xfs_attr_sf_sort_t;
#define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 656c95a22f2e..5255f93bae31 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -39,6 +39,8 @@
#include "xfs_health.h"
#include "xfs_bmap_item.h"
#include "xfs_symlink_remote.h"
+#include "xfs_inode_util.h"
+#include "xfs_rtgroup.h"
struct kmem_cache *xfs_bmap_intent_cache;
@@ -78,9 +80,9 @@ xfs_bmap_compute_maxlevels(
maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
whichfork);
if (whichfork == XFS_DATA_FORK)
- sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
+ sz = xfs_bmdr_space_calc(MINDBTPTRS);
else
- sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
+ sz = xfs_bmdr_space_calc(MINABTPTRS);
maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
minleafrecs = mp->m_bmap_dmnr[0];
@@ -101,8 +103,8 @@ xfs_bmap_compute_attr_offset(
struct xfs_mount *mp)
{
if (mp->m_sb.sb_inodesize == 256)
- return XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
- return XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
+ return XFS_LITINO(mp) - xfs_bmdr_space_calc(MINABTPTRS);
+ return xfs_bmdr_space_calc(6 * MINABTPTRS);
}
STATIC int /* error */
@@ -297,7 +299,7 @@ xfs_check_block(
prevp = NULL;
for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
dmxr = mp->m_bmap_dmxr[0];
- keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
+ keyp = xfs_bmbt_key_addr(mp, block, i);
if (prevp) {
ASSERT(be64_to_cpu(prevp->br_startoff) <
@@ -309,15 +311,15 @@ xfs_check_block(
* Compare the block numbers to see if there are dups.
*/
if (root)
- pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
+ pp = xfs_bmap_broot_ptr_addr(mp, block, i, sz);
else
- pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
+ pp = xfs_bmbt_ptr_addr(mp, block, i, dmxr);
for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
if (root)
- thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
+ thispa = xfs_bmap_broot_ptr_addr(mp, block, j, sz);
else
- thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
+ thispa = xfs_bmbt_ptr_addr(mp, block, j, dmxr);
if (*thispa == *pp) {
xfs_warn(mp, "%s: thispa(%d) == pp(%d) %lld",
__func__, j, i,
@@ -372,7 +374,7 @@ xfs_bmap_check_leaf_extents(
level = be16_to_cpu(block->bb_level);
ASSERT(level > 0);
xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
- pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
+ pp = xfs_bmap_broot_ptr_addr(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
ASSERT(bno != NULLFSBLOCK);
@@ -405,7 +407,7 @@ xfs_bmap_check_leaf_extents(
*/
xfs_check_block(block, mp, 0, 0);
- pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
+ pp = xfs_bmbt_ptr_addr(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
xfs_btree_mark_sick(cur);
@@ -445,14 +447,14 @@ xfs_bmap_check_leaf_extents(
* conform with the first entry in this one.
*/
- ep = XFS_BMBT_REC_ADDR(mp, block, 1);
+ ep = xfs_bmbt_rec_addr(mp, block, 1);
if (i) {
ASSERT(xfs_bmbt_disk_get_startoff(&last) +
xfs_bmbt_disk_get_blockcount(&last) <=
xfs_bmbt_disk_get_startoff(ep));
}
for (j = 1; j < num_recs; j++) {
- nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
+ nextp = xfs_bmbt_rec_addr(mp, block, j + 1);
ASSERT(xfs_bmbt_disk_get_startoff(ep) +
xfs_bmbt_disk_get_blockcount(ep) <=
xfs_bmbt_disk_get_startoff(nextp));
@@ -583,9 +585,9 @@ xfs_bmap_btree_to_extents(
ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
ASSERT(be16_to_cpu(rblock->bb_level) == 1);
ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
- ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
+ ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false) == 1);
- pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
+ pp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, ifp->if_broot_bytes);
cbno = be64_to_cpu(*pp);
#ifdef DEBUG
if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_verify_fsbno(mp, cbno))) {
@@ -604,7 +606,7 @@ xfs_bmap_btree_to_extents(
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
if (error)
return error;
@@ -713,7 +715,7 @@ xfs_bmap_extents_to_btree(
for_each_xfs_iext(ifp, &icur, &rec) {
if (isnullstartblock(rec.br_startblock))
continue;
- arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
+ arp = xfs_bmbt_rec_addr(mp, ablock, 1 + cnt);
xfs_bmbt_disk_set_all(arp, &rec);
cnt++;
}
@@ -723,10 +725,10 @@ xfs_bmap_extents_to_btree(
/*
* Fill in the root key and pointer.
*/
- kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
- arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
+ kp = xfs_bmbt_key_addr(mp, block, 1);
+ arp = xfs_bmbt_rec_addr(mp, ablock, 1);
kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
- pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
+ pp = xfs_bmbt_ptr_addr(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
be16_to_cpu(block->bb_level)));
*pp = cpu_to_be64(args.fsbno);
@@ -779,7 +781,7 @@ xfs_bmap_local_to_extents_empty(
}
-STATIC int /* error */
+int /* error */
xfs_bmap_local_to_extents(
xfs_trans_t *tp, /* transaction pointer */
xfs_inode_t *ip, /* incore inode pointer */
@@ -789,7 +791,8 @@ xfs_bmap_local_to_extents(
void (*init_fn)(struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_inode *ip,
- struct xfs_ifork *ifp))
+ struct xfs_ifork *ifp, void *priv),
+ void *priv)
{
int error = 0;
int flags; /* logging flags returned */
@@ -850,7 +853,7 @@ xfs_bmap_local_to_extents(
* log here. Note that init_fn must also set the buffer log item type
* correctly.
*/
- init_fn(tp, bp, ip, ifp);
+ init_fn(tp, bp, ip, ifp, priv);
/* account for the change in fork size */
xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -894,7 +897,7 @@ xfs_bmap_add_attrfork_btree(
mp = ip->i_mount;
- if (XFS_BMAP_BMDR_SPACE(block) <= xfs_inode_data_fork_size(ip))
+ if (xfs_bmap_bmdr_space(block) <= xfs_inode_data_fork_size(ip))
*flags |= XFS_ILOG_DBROOT;
else {
cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
@@ -976,13 +979,14 @@ xfs_bmap_add_attrfork_local(
dargs.total = dargs.geo->fsbcount;
dargs.whichfork = XFS_DATA_FORK;
dargs.trans = tp;
+ dargs.owner = ip->i_ino;
return xfs_dir2_sf_to_block(&dargs);
}
if (S_ISLNK(VFS_I(ip)->i_mode))
return xfs_bmap_local_to_extents(tp, ip, 1, flags,
- XFS_DATA_FORK,
- xfs_symlink_local_to_remote);
+ XFS_DATA_FORK, xfs_symlink_local_to_remote,
+ NULL);
/* should only be called for types that support local format data */
ASSERT(0);
@@ -1023,40 +1027,32 @@ xfs_bmap_set_attrforkoff(
}
/*
- * Convert inode from non-attributed to attributed.
- * Must not be in a transaction, ip must not be locked.
+ * Convert inode from non-attributed to attributed. Caller must hold the
+ * ILOCK_EXCL and the file cannot have an attr fork.
*/
int /* error code */
xfs_bmap_add_attrfork(
- xfs_inode_t *ip, /* incore inode pointer */
+ struct xfs_trans *tp,
+ struct xfs_inode *ip, /* incore inode pointer */
int size, /* space new attribute needs */
int rsvd) /* xact may use reserved blks */
{
- xfs_mount_t *mp; /* mount structure */
- xfs_trans_t *tp; /* transaction pointer */
- int blks; /* space reservation */
+ struct xfs_mount *mp = tp->t_mountp;
int version = 1; /* superblock attr version */
int logflags; /* logging flags */
int error; /* error return value */
- ASSERT(xfs_inode_has_attr_fork(ip) == 0);
-
- mp = ip->i_mount;
- ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
-
- blks = XFS_ADDAFORK_SPACE_RES(mp);
-
- error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
- rsvd, &tp);
- if (error)
- return error;
- if (xfs_inode_has_attr_fork(ip))
- goto trans_cancel;
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ if (xfs_is_metadir_inode(ip))
+ ASSERT(XFS_IS_DQDETACHED(ip));
+ else
+ ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
+ ASSERT(!xfs_inode_has_attr_fork(ip));
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_bmap_set_attrforkoff(ip, size, &version);
if (error)
- goto trans_cancel;
+ return error;
xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
logflags = 0;
@@ -1077,7 +1073,7 @@ xfs_bmap_add_attrfork(
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
if (error)
- goto trans_cancel;
+ return error;
if (!xfs_has_attr(mp) ||
(!xfs_has_attr2(mp) && version == 2)) {
bool log_sb = false;
@@ -1096,14 +1092,7 @@ xfs_bmap_add_attrfork(
xfs_log_sb(tp);
}
- error = xfs_trans_commit(tp);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return error;
-
-trans_cancel:
- xfs_trans_cancel(tp);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return error;
+ return 0;
}
/*
@@ -1175,7 +1164,7 @@ xfs_iread_bmbt_block(
}
/* Copy records into the incore cache. */
- frp = XFS_BMBT_REC_ADDR(mp, block, 1);
+ frp = xfs_bmbt_rec_addr(mp, block, 1);
for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
struct xfs_bmbt_irec new;
xfs_failaddr_t fa;
@@ -1438,6 +1427,24 @@ xfs_bmap_last_offset(
* Extent tree manipulation functions used during allocation.
*/
+static inline bool
+xfs_bmap_same_rtgroup(
+ struct xfs_inode *ip,
+ int whichfork,
+ struct xfs_bmbt_irec *left,
+ struct xfs_bmbt_irec *right)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ if (xfs_ifork_is_realtime(ip, whichfork) && xfs_has_rtgroups(mp)) {
+ if (xfs_rtb_to_rgno(mp, left->br_startblock) !=
+ xfs_rtb_to_rgno(mp, right->br_startblock))
+ return false;
+ }
+
+ return true;
+}
+
/*
* Convert a delayed allocation to a real allocation.
*/
@@ -1507,7 +1514,8 @@ xfs_bmap_add_extent_delay_real(
LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
LEFT.br_state == new->br_state &&
- LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
+ xfs_bmap_same_rtgroup(bma->ip, whichfork, &LEFT, new))
state |= BMAP_LEFT_CONTIG;
/*
@@ -1531,7 +1539,8 @@ xfs_bmap_add_extent_delay_real(
(BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING) ||
LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
- <= XFS_MAX_BMBT_EXTLEN))
+ <= XFS_MAX_BMBT_EXTLEN) &&
+ xfs_bmap_same_rtgroup(bma->ip, whichfork, new, &RIGHT))
state |= BMAP_RIGHT_CONTIG;
error = 0;
@@ -1586,6 +1595,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -1616,6 +1626,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1650,6 +1661,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -1684,6 +1696,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
}
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -1722,6 +1735,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING:
@@ -1812,6 +1826,7 @@ xfs_bmap_add_extent_delay_real(
xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
xfs_iext_next(ifp, &bma->icur);
xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
+ ASSERT(da_new <= da_old);
break;
case BMAP_RIGHT_FILLING:
@@ -1861,6 +1876,7 @@ xfs_bmap_add_extent_delay_real(
PREV.br_blockcount = temp;
xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
xfs_iext_next(ifp, &bma->icur);
+ ASSERT(da_new <= da_old);
break;
case 0:
@@ -1975,7 +1991,7 @@ xfs_bmap_add_extent_delay_real(
}
if (da_new != da_old)
- xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
+ xfs_mod_delalloc(bma->ip, 0, (int64_t)da_new - da_old);
if (bma->cur) {
da_new += bma->cur->bc_bmap.allocated;
@@ -1983,11 +1999,10 @@ xfs_bmap_add_extent_delay_real(
}
/* adjust for changes in reserved delayed indirect blocks */
- if (da_new != da_old) {
- ASSERT(state == 0 || da_new < da_old);
- error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
- false);
- }
+ if (da_new < da_old)
+ xfs_add_fdblocks(mp, da_old - da_new);
+ else if (da_new > da_old)
+ error = xfs_dec_fdblocks(mp, da_new - da_old, true);
xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
done:
@@ -2070,7 +2085,8 @@ xfs_bmap_add_extent_unwritten_real(
LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
LEFT.br_state == new->br_state &&
- LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
+ xfs_bmap_same_rtgroup(ip, whichfork, &LEFT, new))
state |= BMAP_LEFT_CONTIG;
/*
@@ -2094,7 +2110,8 @@ xfs_bmap_add_extent_unwritten_real(
(BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING) ||
LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
- <= XFS_MAX_BMBT_EXTLEN))
+ <= XFS_MAX_BMBT_EXTLEN) &&
+ xfs_bmap_same_rtgroup(ip, whichfork, new, &RIGHT))
state |= BMAP_RIGHT_CONTIG;
/*
@@ -2688,12 +2705,12 @@ xfs_bmap_add_extent_hole_delay(
}
if (oldlen != newlen) {
ASSERT(oldlen > newlen);
- xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
- false);
+ xfs_add_fdblocks(ip->i_mount, oldlen - newlen);
+
/*
* Nothing to do for disk quota accounting here.
*/
- xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
+ xfs_mod_delalloc(ip, 0, (int64_t)newlen - oldlen);
}
}
@@ -2754,7 +2771,8 @@ xfs_bmap_add_extent_hole_real(
left.br_startoff + left.br_blockcount == new->br_startoff &&
left.br_startblock + left.br_blockcount == new->br_startblock &&
left.br_state == new->br_state &&
- left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
+ xfs_bmap_same_rtgroup(ip, whichfork, &left, new))
state |= BMAP_LEFT_CONTIG;
if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
@@ -2764,7 +2782,8 @@ xfs_bmap_add_extent_hole_real(
new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
(!(state & BMAP_LEFT_CONTIG) ||
left.br_blockcount + new->br_blockcount +
- right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))
+ right.br_blockcount <= XFS_MAX_BMBT_EXTLEN) &&
+ xfs_bmap_same_rtgroup(ip, whichfork, new, &right))
state |= BMAP_RIGHT_CONTIG;
error = 0;
@@ -3121,6 +3140,30 @@ xfs_bmap_extsize_align(
return 0;
}
+static inline bool
+xfs_bmap_adjacent_valid(
+ struct xfs_bmalloca *ap,
+ xfs_fsblock_t x,
+ xfs_fsblock_t y)
+{
+ struct xfs_mount *mp = ap->ip->i_mount;
+
+ if (XFS_IS_REALTIME_INODE(ap->ip) &&
+ (ap->datatype & XFS_ALLOC_USERDATA)) {
+ if (!xfs_has_rtgroups(mp))
+ return x < mp->m_sb.sb_rblocks;
+
+ return xfs_rtb_to_rgno(mp, x) == xfs_rtb_to_rgno(mp, y) &&
+ xfs_rtb_to_rgno(mp, x) < mp->m_sb.sb_rgcount &&
+ xfs_rtb_to_rtx(mp, x) < mp->m_sb.sb_rgextents;
+
+ }
+
+ return XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) &&
+ XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount &&
+ XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks;
+}
+
#define XFS_ALLOC_GAP_UNITS 4
/* returns true if ap->blkno was modified */
@@ -3128,36 +3171,25 @@ bool
xfs_bmap_adjacent(
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
{
- xfs_fsblock_t adjust; /* adjustment to block numbers */
- xfs_mount_t *mp; /* mount point structure */
- int rt; /* true if inode is realtime */
-
-#define ISVALID(x,y) \
- (rt ? \
- (x) < mp->m_sb.sb_rblocks : \
- XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
- XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
- XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
-
- mp = ap->ip->i_mount;
- rt = XFS_IS_REALTIME_INODE(ap->ip) &&
- (ap->datatype & XFS_ALLOC_USERDATA);
+ xfs_fsblock_t adjust; /* adjustment to block numbers */
+
/*
* If allocating at eof, and there's a previous real block,
* try to use its last block as our starting point.
*/
if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
!isnullstartblock(ap->prev.br_startblock) &&
- ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
- ap->prev.br_startblock)) {
+ xfs_bmap_adjacent_valid(ap,
+ ap->prev.br_startblock + ap->prev.br_blockcount,
+ ap->prev.br_startblock)) {
ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
/*
* Adjust for the gap between prevp and us.
*/
adjust = ap->offset -
(ap->prev.br_startoff + ap->prev.br_blockcount);
- if (adjust &&
- ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
+ if (adjust && xfs_bmap_adjacent_valid(ap, ap->blkno + adjust,
+ ap->prev.br_startblock))
ap->blkno += adjust;
return true;
}
@@ -3180,7 +3212,8 @@ xfs_bmap_adjacent(
!isnullstartblock(ap->prev.br_startblock) &&
(prevbno = ap->prev.br_startblock +
ap->prev.br_blockcount) &&
- ISVALID(prevbno, ap->prev.br_startblock)) {
+ xfs_bmap_adjacent_valid(ap, prevbno,
+ ap->prev.br_startblock)) {
/*
* Calculate gap to end of previous block.
*/
@@ -3196,8 +3229,8 @@ xfs_bmap_adjacent(
* number, then just use the end of the previous block.
*/
if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
- ISVALID(prevbno + prevdiff,
- ap->prev.br_startblock))
+ xfs_bmap_adjacent_valid(ap, prevbno + prevdiff,
+ ap->prev.br_startblock))
prevbno += adjust;
else
prevdiff += adjust;
@@ -3229,9 +3262,11 @@ xfs_bmap_adjacent(
* offset by our length.
*/
if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
- ISVALID(gotbno - gotdiff, gotbno))
+ xfs_bmap_adjacent_valid(ap, gotbno - gotdiff,
+ gotbno))
gotbno -= adjust;
- else if (ISVALID(gotbno - ap->length, gotbno)) {
+ else if (xfs_bmap_adjacent_valid(ap, gotbno - ap->length,
+ gotbno)) {
gotbno -= ap->length;
gotdiff += adjust - ap->length;
} else
@@ -3259,7 +3294,7 @@ xfs_bmap_adjacent(
return true;
}
}
-#undef ISVALID
+
return false;
}
@@ -3280,7 +3315,7 @@ xfs_bmap_longest_free_extent(
}
longest = xfs_alloc_longest_free_extent(pag,
- xfs_alloc_min_freelist(pag->pag_mount, pag),
+ xfs_alloc_min_freelist(pag_mount(pag), pag),
xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
if (*blen < longest)
*blen = longest;
@@ -3370,7 +3405,7 @@ xfs_bmap_alloc_account(
* yet.
*/
if (ap->wasdel) {
- xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
+ xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0);
return;
}
@@ -3394,7 +3429,7 @@ xfs_bmap_alloc_account(
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
if (ap->wasdel) {
ap->ip->i_delayed_blks -= ap->length;
- xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
+ xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0);
fld = isrt ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_DELBCOUNT;
} else {
fld = isrt ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
@@ -3477,31 +3512,19 @@ xfs_bmap_process_allocated_extent(
xfs_bmap_alloc_account(ap);
}
-#ifdef DEBUG
static int
xfs_bmap_exact_minlen_extent_alloc(
- struct xfs_bmalloca *ap)
+ struct xfs_bmalloca *ap,
+ struct xfs_alloc_arg *args)
{
- struct xfs_mount *mp = ap->ip->i_mount;
- struct xfs_alloc_arg args = { .tp = ap->tp, .mp = mp };
- xfs_fileoff_t orig_offset;
- xfs_extlen_t orig_length;
- int error;
-
- ASSERT(ap->length);
-
if (ap->minlen != 1) {
- ap->blkno = NULLFSBLOCK;
- ap->length = 0;
+ args->fsbno = NULLFSBLOCK;
return 0;
}
- orig_offset = ap->offset;
- orig_length = ap->length;
-
- args.alloc_minlen_only = 1;
-
- xfs_bmap_compute_alignments(ap, &args);
+ args->alloc_minlen_only = 1;
+ args->minlen = args->maxlen = ap->minlen;
+ args->total = ap->total;
/*
* Unlike the longest extent available in an AG, we don't track
@@ -3511,39 +3534,16 @@ xfs_bmap_exact_minlen_extent_alloc(
* we need not be concerned about a drop in performance in
* "debug only" code paths.
*/
- ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
-
- args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
- args.minlen = args.maxlen = ap->minlen;
- args.total = ap->total;
-
- args.alignment = 1;
- args.minalignslop = 0;
-
- args.minleft = ap->minleft;
- args.wasdel = ap->wasdel;
- args.resv = XFS_AG_RESV_NONE;
- args.datatype = ap->datatype;
+ ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0);
- error = xfs_alloc_vextent_first_ag(&args, ap->blkno);
- if (error)
- return error;
-
- if (args.fsbno != NULLFSBLOCK) {
- xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
- orig_length);
- } else {
- ap->blkno = NULLFSBLOCK;
- ap->length = 0;
- }
-
- return 0;
+ /*
+ * Call xfs_bmap_btalloc_low_space here as it first does a "normal" AG
+ * iteration and then drops args->total to args->minlen, which might be
+ * required to find an allocation for the transaction reservation when
+ * the file system is very full.
+ */
+ return xfs_bmap_btalloc_low_space(ap, args);
}
-#else
-
-#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
-
-#endif
/*
* If we are not low on available data blocks and we are allocating at
@@ -3801,8 +3801,11 @@ xfs_bmap_btalloc(
/* Trim the allocation back to the maximum an AG can fit. */
args.maxlen = min(ap->length, mp->m_ag_max_usable);
- if ((ap->datatype & XFS_ALLOC_USERDATA) &&
- xfs_inode_is_filestream(ap->ip))
+ if (unlikely(XFS_TEST_ERROR(false, mp,
+ XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
+ error = xfs_bmap_exact_minlen_extent_alloc(ap, &args);
+ else if ((ap->datatype & XFS_ALLOC_USERDATA) &&
+ xfs_inode_is_filestream(ap->ip))
error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
else
error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
@@ -4066,21 +4069,34 @@ xfs_bmapi_reserve_delalloc(
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
xfs_extlen_t alen;
xfs_extlen_t indlen;
+ uint64_t fdblocks;
int error;
- xfs_fileoff_t aoff = off;
+ xfs_fileoff_t aoff;
+ bool use_cowextszhint =
+ whichfork == XFS_COW_FORK && !prealloc;
+retry:
/*
* Cap the alloc length. Keep track of prealloc so we know whether to
* tag the inode before we return.
*/
+ aoff = off;
alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
if (!eof)
alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
if (prealloc && alen >= len)
prealloc = alen - len;
- /* Figure out the extent size, adjust alen */
- if (whichfork == XFS_COW_FORK) {
+ /*
+ * If we're targetting the COW fork but aren't creating a speculative
+ * posteof preallocation, try to expand the reservation to align with
+ * the COW extent size hint if there's sufficient free space.
+ *
+ * Unlike the data fork, the CoW cancellation functions will free all
+ * the reservations at inactivation, so we don't require that every
+ * delalloc reservation have a dirty pagecache.
+ */
+ if (use_cowextszhint) {
struct xfs_bmbt_irec prev;
xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip);
@@ -4099,7 +4115,7 @@ xfs_bmapi_reserve_delalloc(
*/
error = xfs_quota_reserve_blkres(ip, alen);
if (error)
- return error;
+ goto out;
/*
* Split changing sb for alen and indlen since they could be coming
@@ -4108,17 +4124,21 @@ xfs_bmapi_reserve_delalloc(
indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
ASSERT(indlen > 0);
- error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
- if (error)
- goto out_unreserve_quota;
+ fdblocks = indlen;
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ error = xfs_dec_frextents(mp, xfs_blen_to_rtbxlen(mp, alen));
+ if (error)
+ goto out_unreserve_quota;
+ } else {
+ fdblocks += alen;
+ }
- error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
+ error = xfs_dec_fdblocks(mp, fdblocks, false);
if (error)
- goto out_unreserve_blocks;
-
+ goto out_unreserve_frextents;
ip->i_delayed_blks += alen;
- xfs_mod_delalloc(ip->i_mount, alen + indlen);
+ xfs_mod_delalloc(ip, alen, indlen);
got->br_startoff = aoff;
got->br_startblock = nullstartblock(indlen);
@@ -4139,49 +4159,24 @@ xfs_bmapi_reserve_delalloc(
return 0;
-out_unreserve_blocks:
- xfs_mod_fdblocks(mp, alen, false);
+out_unreserve_frextents:
+ if (XFS_IS_REALTIME_INODE(ip))
+ xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, alen));
out_unreserve_quota:
if (XFS_IS_QUOTA_ON(mp))
xfs_quota_unreserve_blkres(ip, alen);
- return error;
-}
-
-static int
-xfs_bmap_alloc_userdata(
- struct xfs_bmalloca *bma)
-{
- struct xfs_mount *mp = bma->ip->i_mount;
- int whichfork = xfs_bmapi_whichfork(bma->flags);
- int error;
-
- /*
- * Set the data type being allocated. For the data fork, the first data
- * in the file is treated differently to all other allocations. For the
- * attribute fork, we only need to ensure the allocated range is not on
- * the busy list.
- */
- bma->datatype = XFS_ALLOC_NOBUSY;
- if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
- bma->datatype |= XFS_ALLOC_USERDATA;
- if (bma->offset == 0)
- bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
-
- if (mp->m_dalign && bma->length >= mp->m_dalign) {
- error = xfs_bmap_isaeof(bma, whichfork);
- if (error)
- return error;
+out:
+ if (error == -ENOSPC || error == -EDQUOT) {
+ trace_xfs_delalloc_enospc(ip, off, len);
+
+ if (prealloc || use_cowextszhint) {
+ /* retry without any preallocation */
+ use_cowextszhint = false;
+ prealloc = 0;
+ goto retry;
}
-
- if (XFS_IS_REALTIME_INODE(bma->ip))
- return xfs_bmap_rtalloc(bma);
}
-
- if (unlikely(XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
- return xfs_bmap_exact_minlen_extent_alloc(bma);
-
- return xfs_bmap_btalloc(bma);
+ return error;
}
static int
@@ -4191,43 +4186,51 @@ xfs_bmapi_allocate(
struct xfs_mount *mp = bma->ip->i_mount;
int whichfork = xfs_bmapi_whichfork(bma->flags);
struct xfs_ifork *ifp = xfs_ifork_ptr(bma->ip, whichfork);
- int tmp_logflags = 0;
int error;
ASSERT(bma->length > 0);
-
- /*
- * For the wasdelay case, we could also just allocate the stuff asked
- * for in this bmap call but that wouldn't be as good.
- */
- if (bma->wasdel) {
- bma->length = (xfs_extlen_t)bma->got.br_blockcount;
- bma->offset = bma->got.br_startoff;
- if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
- bma->prev.br_startoff = NULLFILEOFF;
- } else {
- bma->length = XFS_FILBLKS_MIN(bma->length, XFS_MAX_BMBT_EXTLEN);
- if (!bma->eof)
- bma->length = XFS_FILBLKS_MIN(bma->length,
- bma->got.br_startoff - bma->offset);
- }
+ ASSERT(bma->length <= XFS_MAX_BMBT_EXTLEN);
if (bma->flags & XFS_BMAPI_CONTIG)
bma->minlen = bma->length;
else
bma->minlen = 1;
- if (bma->flags & XFS_BMAPI_METADATA) {
- if (unlikely(XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
- error = xfs_bmap_exact_minlen_extent_alloc(bma);
- else
- error = xfs_bmap_btalloc(bma);
- } else {
- error = xfs_bmap_alloc_userdata(bma);
+ if (!(bma->flags & XFS_BMAPI_METADATA)) {
+ /*
+ * For the data and COW fork, the first data in the file is
+ * treated differently to all other allocations. For the
+ * attribute fork, we only need to ensure the allocated range
+ * is not on the busy list.
+ */
+ bma->datatype = XFS_ALLOC_NOBUSY;
+ if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
+ bma->datatype |= XFS_ALLOC_USERDATA;
+ if (bma->offset == 0)
+ bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
+
+ if (mp->m_dalign && bma->length >= mp->m_dalign) {
+ error = xfs_bmap_isaeof(bma, whichfork);
+ if (error)
+ return error;
+ }
+ }
}
- if (error || bma->blkno == NULLFSBLOCK)
+
+ if ((bma->datatype & XFS_ALLOC_USERDATA) &&
+ XFS_IS_REALTIME_INODE(bma->ip))
+ error = xfs_bmap_rtalloc(bma);
+ else
+ error = xfs_bmap_btalloc(bma);
+ if (error)
return error;
+ if (bma->blkno == NULLFSBLOCK)
+ return -ENOSPC;
+
+ if (WARN_ON_ONCE(!xfs_valid_startblock(bma->ip, bma->blkno))) {
+ xfs_bmap_mark_sick(bma->ip, whichfork);
+ return -EFSCORRUPTED;
+ }
if (bma->flags & XFS_BMAPI_ZERO) {
error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
@@ -4260,8 +4263,6 @@ xfs_bmapi_allocate(
error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
whichfork, &bma->icur, &bma->cur, &bma->got,
&bma->logflags, bma->flags);
-
- bma->logflags |= tmp_logflags;
if (error)
return error;
@@ -4406,6 +4407,15 @@ xfs_bmapi_finish(
* extent state if necessary. Details behaviour is controlled by the flags
* parameter. Only allocates blocks from a single allocation group, to avoid
* locking problems.
+ *
+ * Returns 0 on success and places the extent mappings in mval. nmaps is used
+ * as an input/output parameter where the caller specifies the maximum number
+ * of mappings that may be returned and xfs_bmapi_write passes back the number
+ * of mappings (including existing mappings) it found.
+ *
+ * Returns a negative error code on failure, including -ENOSPC when it could not
+ * allocate any blocks and -ENOSR when it did allocate blocks to convert a
+ * delalloc range, but those blocks were before the passed in range.
*/
int
xfs_bmapi_write(
@@ -4524,20 +4534,33 @@ xfs_bmapi_write(
* allocation length request (which can be 64 bits in
* length) and the bma length request, which is
* xfs_extlen_t and therefore 32 bits. Hence we have to
- * check for 32-bit overflows and handle them here.
+ * be careful and do the min() using the larger type to
+ * avoid overflows.
*/
- if (len > (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN)
- bma.length = XFS_MAX_BMBT_EXTLEN;
- else
- bma.length = len;
+ bma.length = XFS_FILBLKS_MIN(len, XFS_MAX_BMBT_EXTLEN);
+
+ if (wasdelay) {
+ bma.length = XFS_FILBLKS_MIN(bma.length,
+ bma.got.br_blockcount -
+ (bno - bma.got.br_startoff));
+ } else {
+ if (!eof)
+ bma.length = XFS_FILBLKS_MIN(bma.length,
+ bma.got.br_startoff - bno);
+ }
- ASSERT(len > 0);
ASSERT(bma.length > 0);
error = xfs_bmapi_allocate(&bma);
- if (error)
+ if (error) {
+ /*
+ * If we already allocated space in a previous
+ * iteration return what we go so far when
+ * running out of space.
+ */
+ if (error == -ENOSPC && bma.nallocs)
+ break;
goto error0;
- if (bma.blkno == NULLFSBLOCK)
- break;
+ }
/*
* If this is a CoW allocation, record the data in
@@ -4575,7 +4598,6 @@ xfs_bmapi_write(
if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
eof = true;
}
- *nmap = n;
error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
whichfork);
@@ -4586,7 +4608,22 @@ xfs_bmapi_write(
ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
xfs_bmapi_finish(&bma, whichfork, 0);
xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
- orig_nmap, *nmap);
+ orig_nmap, n);
+
+ /*
+ * When converting delayed allocations, xfs_bmapi_allocate ignores
+ * the passed in bno and always converts from the start of the found
+ * delalloc extent.
+ *
+ * To avoid a successful return with *nmap set to 0, return the magic
+ * -ENOSR error code for this particular case so that the caller can
+ * handle it.
+ */
+ if (!n) {
+ ASSERT(bma.nallocs >= *nmap);
+ return -ENOSR;
+ }
+ *nmap = n;
return 0;
error0:
xfs_bmapi_finish(&bma, whichfork, error);
@@ -4599,8 +4636,8 @@ error0:
* invocations to allocate the target offset if a large enough physical extent
* is not available.
*/
-int
-xfs_bmapi_convert_delalloc(
+static int
+xfs_bmapi_convert_one_delalloc(
struct xfs_inode *ip,
int whichfork,
xfs_off_t offset,
@@ -4630,11 +4667,8 @@ xfs_bmapi_convert_delalloc(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- error = xfs_iext_count_may_overflow(ip, whichfork,
+ error = xfs_iext_count_extend(tp, ip, whichfork,
XFS_IEXT_ADD_NOSPLIT_CNT);
- if (error == -EFBIG)
- error = xfs_iext_count_upgrade(tp, ip,
- XFS_IEXT_ADD_NOSPLIT_CNT);
if (error)
goto out_trans_cancel;
@@ -4657,19 +4691,25 @@ xfs_bmapi_convert_delalloc(
if (!isnullstartblock(bma.got.br_startblock)) {
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
xfs_iomap_inode_sequence(ip, flags));
- *seq = READ_ONCE(ifp->if_seq);
+ if (seq)
+ *seq = READ_ONCE(ifp->if_seq);
goto out_trans_cancel;
}
bma.tp = tp;
bma.ip = ip;
bma.wasdel = true;
- bma.offset = bma.got.br_startoff;
- bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount,
- XFS_MAX_BMBT_EXTLEN);
bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
/*
+ * Always allocate convert from the start of the delalloc extent even if
+ * that is outside the passed in range to create large contiguous
+ * extents on disk.
+ */
+ bma.offset = bma.got.br_startoff;
+ bma.length = bma.got.br_blockcount;
+
+ /*
* When we're converting the delalloc reservations backing dirty pages
* in the page cache, we must be careful about how we create the new
* extents:
@@ -4693,22 +4733,14 @@ xfs_bmapi_convert_delalloc(
if (error)
goto out_finish;
- error = -ENOSPC;
- if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
- goto out_finish;
- if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock))) {
- xfs_bmap_mark_sick(ip, whichfork);
- error = -EFSCORRUPTED;
- goto out_finish;
- }
-
XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
XFS_STATS_INC(mp, xs_xstrat_quick);
ASSERT(!isnullstartblock(bma.got.br_startblock));
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
xfs_iomap_inode_sequence(ip, flags));
- *seq = READ_ONCE(ifp->if_seq);
+ if (seq)
+ *seq = READ_ONCE(ifp->if_seq);
if (whichfork == XFS_COW_FORK)
xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
@@ -4731,6 +4763,36 @@ out_trans_cancel:
return error;
}
+/*
+ * Pass in a dellalloc extent and convert it to real extents, return the real
+ * extent that maps offset_fsb in iomap.
+ */
+int
+xfs_bmapi_convert_delalloc(
+ struct xfs_inode *ip,
+ int whichfork,
+ loff_t offset,
+ struct iomap *iomap,
+ unsigned int *seq)
+{
+ int error;
+
+ /*
+ * Attempt to allocate whatever delalloc extent currently backs offset
+ * and put the result into iomap. Allocate in a loop because it may
+ * take several attempts to allocate real blocks for a contiguous
+ * delalloc extent if free space is sufficiently fragmented.
+ */
+ do {
+ error = xfs_bmapi_convert_one_delalloc(ip, whichfork, offset,
+ iomap, seq);
+ if (error)
+ return error;
+ } while (iomap->offset + iomap->length <= offset);
+
+ return 0;
+}
+
int
xfs_bmapi_remap(
struct xfs_trans *tp,
@@ -4777,6 +4839,7 @@ xfs_bmapi_remap(
}
ip->i_nblocks += len;
+ ip->i_delayed_blks -= len; /* see xfs_bmap_defer_add */
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (ifp->if_format == XFS_DINODE_FMT_BTREE)
@@ -4822,32 +4885,18 @@ error0:
* ores == 1). The number of stolen blocks is returned. The availability and
* subsequent accounting of stolen blocks is the responsibility of the caller.
*/
-static xfs_filblks_t
+static void
xfs_bmap_split_indlen(
xfs_filblks_t ores, /* original res. */
xfs_filblks_t *indlen1, /* ext1 worst indlen */
- xfs_filblks_t *indlen2, /* ext2 worst indlen */
- xfs_filblks_t avail) /* stealable blocks */
+ xfs_filblks_t *indlen2) /* ext2 worst indlen */
{
xfs_filblks_t len1 = *indlen1;
xfs_filblks_t len2 = *indlen2;
xfs_filblks_t nres = len1 + len2; /* new total res. */
- xfs_filblks_t stolen = 0;
xfs_filblks_t resfactor;
/*
- * Steal as many blocks as we can to try and satisfy the worst case
- * indlen for both new extents.
- */
- if (ores < nres && avail)
- stolen = XFS_FILBLKS_MIN(nres - ores, avail);
- ores += stolen;
-
- /* nothing else to do if we've satisfied the new reservation */
- if (ores >= nres)
- return stolen;
-
- /*
* We can't meet the total required reservation for the two extents.
* Calculate the percent of the overall shortage between both extents
* and apply this percentage to each of the requested indlen values.
@@ -4891,11 +4940,9 @@ xfs_bmap_split_indlen(
*indlen1 = len1;
*indlen2 = len2;
-
- return stolen;
}
-int
+void
xfs_bmap_del_extent_delay(
struct xfs_inode *ip,
int whichfork,
@@ -4908,9 +4955,9 @@ xfs_bmap_del_extent_delay(
struct xfs_bmbt_irec new;
int64_t da_old, da_new, da_diff = 0;
xfs_fileoff_t del_endoff, got_endoff;
- xfs_filblks_t got_indlen, new_indlen, stolen;
+ xfs_filblks_t got_indlen, new_indlen, stolen = 0;
uint32_t state = xfs_bmap_fork_to_state(whichfork);
- int error = 0;
+ uint64_t fdblocks;
bool isrt;
XFS_STATS_INC(mp, xs_del_exlist);
@@ -4925,18 +4972,12 @@ xfs_bmap_del_extent_delay(
ASSERT(got->br_startoff <= del->br_startoff);
ASSERT(got_endoff >= del_endoff);
- if (isrt)
- xfs_mod_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount));
-
/*
* Update the inode delalloc counter now and wait to update the
* sb counters as we might have to borrow some blocks for the
* indirect block accounting.
*/
- ASSERT(!isrt);
- error = xfs_quota_unreserve_blkres(ip, del->br_blockcount);
- if (error)
- return error;
+ xfs_quota_unreserve_blkres(ip, del->br_blockcount);
ip->i_delayed_blks -= del->br_blockcount;
if (got->br_startoff == del->br_startoff)
@@ -4990,8 +5031,24 @@ xfs_bmap_del_extent_delay(
new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
WARN_ON_ONCE(!got_indlen || !new_indlen);
- stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
- del->br_blockcount);
+ /*
+ * Steal as many blocks as we can to try and satisfy the worst
+ * case indlen for both new extents.
+ *
+ * However, we can't just steal reservations from the data
+ * blocks if this is an RT inodes as the data and metadata
+ * blocks come from different pools. We'll have to live with
+ * under-filled indirect reservation in this case.
+ */
+ da_new = got_indlen + new_indlen;
+ if (da_new > da_old && !isrt) {
+ stolen = XFS_FILBLKS_MIN(da_new - da_old,
+ del->br_blockcount);
+ da_old += stolen;
+ }
+ if (da_new > da_old)
+ xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen);
+ da_new = got_indlen + new_indlen;
got->br_startblock = nullstartblock((int)got_indlen);
@@ -5003,20 +5060,21 @@ xfs_bmap_del_extent_delay(
xfs_iext_next(ifp, icur);
xfs_iext_insert(ip, icur, &new, state);
- da_new = got_indlen + new_indlen - stolen;
del->br_blockcount -= stolen;
break;
}
ASSERT(da_old >= da_new);
da_diff = da_old - da_new;
- if (!isrt)
- da_diff += del->br_blockcount;
- if (da_diff) {
- xfs_mod_fdblocks(mp, da_diff, false);
- xfs_mod_delalloc(mp, -da_diff);
- }
- return error;
+ fdblocks = da_diff;
+
+ if (isrt)
+ xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, del->br_blockcount));
+ else
+ fdblocks += del->br_blockcount;
+
+ xfs_add_fdblocks(mp, fdblocks);
+ xfs_mod_delalloc(ip, -(int64_t)del->br_blockcount, -da_diff);
}
void
@@ -5090,6 +5148,34 @@ xfs_bmap_del_extent_cow(
ip->i_delayed_blks -= del->br_blockcount;
}
+static int
+xfs_bmap_free_rtblocks(
+ struct xfs_trans *tp,
+ struct xfs_bmbt_irec *del)
+{
+ struct xfs_rtgroup *rtg;
+ int error;
+
+ rtg = xfs_rtgroup_grab(tp->t_mountp, 0);
+ if (!rtg)
+ return -EIO;
+
+ /*
+ * Ensure the bitmap and summary inodes are locked and joined to the
+ * transaction before modifying them.
+ */
+ if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) {
+ tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED;
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP);
+ xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_BITMAP);
+ }
+
+ error = xfs_rtfree_blocks(tp, rtg, del->br_startblock,
+ del->br_blockcount);
+ xfs_rtgroup_rele(rtg);
+ return error;
+}
+
/*
* Called by xfs_bmapi to update file extent records and the btree
* after removing space.
@@ -5107,8 +5193,7 @@ xfs_bmap_del_extent_real(
{
xfs_fsblock_t del_endblock=0; /* first block past del */
xfs_fileoff_t del_endoff; /* first offset past del */
- int do_fx; /* free extent at end of routine */
- int error; /* error return value */
+ int error = 0; /* error return value */
struct xfs_bmbt_irec got; /* current extent entry */
xfs_fileoff_t got_endoff; /* first offset past got */
int i; /* temp state */
@@ -5151,20 +5236,10 @@ xfs_bmap_del_extent_real(
return -ENOSPC;
*logflagsp = XFS_ILOG_CORE;
- if (xfs_ifork_is_realtime(ip, whichfork)) {
- if (!(bflags & XFS_BMAPI_REMAP)) {
- error = xfs_rtfree_blocks(tp, del->br_startblock,
- del->br_blockcount);
- if (error)
- return error;
- }
-
- do_fx = 0;
+ if (xfs_ifork_is_realtime(ip, whichfork))
qfield = XFS_TRANS_DQ_RTBCOUNT;
- } else {
- do_fx = 1;
+ else
qfield = XFS_TRANS_DQ_BCOUNT;
- }
nblks = del->br_blockcount;
del_endblock = del->br_startblock + del->br_blockcount;
@@ -5312,18 +5387,39 @@ xfs_bmap_del_extent_real(
/*
* If we need to, add to list of extents to delete.
*/
- if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
+ if (!(bflags & XFS_BMAPI_REMAP)) {
+ bool isrt = xfs_ifork_is_realtime(ip, whichfork);
+
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
xfs_refcount_decrease_extent(tp, del);
+ } else if (isrt && !xfs_has_rtgroups(mp)) {
+ error = xfs_bmap_free_rtblocks(tp, del);
} else {
+ unsigned int efi_flags = 0;
+
+ if ((bflags & XFS_BMAPI_NODISCARD) ||
+ del->br_state == XFS_EXT_UNWRITTEN)
+ efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD;
+
+ /*
+ * Historically, we did not use EFIs to free realtime
+ * extents. However, when reverse mapping is enabled,
+ * we must maintain the same order of operations as the
+ * data device, which is: Remove the file mapping,
+ * remove the reverse mapping, and then free the
+ * blocks. Reflink for realtime volumes requires the
+ * same sort of ordering. Both features rely on
+ * rtgroups, so let's gate rt EFI usage on rtgroups.
+ */
+ if (isrt)
+ efi_flags |= XFS_FREE_EXTENT_REALTIME;
+
error = xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
- XFS_AG_RESV_NONE,
- ((bflags & XFS_BMAPI_NODISCARD) ||
- del->br_state == XFS_EXT_UNWRITTEN));
- if (error)
- return error;
+ XFS_AG_RESV_NONE, efi_flags);
}
+ if (error)
+ return error;
}
/*
@@ -5414,16 +5510,6 @@ __xfs_bunmapi(
} else
cur = NULL;
- if (isrt) {
- /*
- * Synchronize by locking the bitmap inode.
- */
- xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
- xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
- xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
- xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
- }
-
extno = 0;
while (end != (xfs_fileoff_t)-1 && end >= start &&
(nexts == 0 || extno < nexts)) {
@@ -5584,18 +5670,16 @@ __xfs_bunmapi(
delete:
if (wasdel) {
- error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
- &got, &del);
+ xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del);
} else {
error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
&del, &tmp_logflags, whichfork,
flags);
logflags |= tmp_logflags;
+ if (error)
+ goto error0;
}
- if (error)
- goto error0;
-
end = del.br_startoff - 1;
nodelete:
/*
@@ -5678,6 +5762,8 @@ xfs_bunmapi(
*/
STATIC bool
xfs_bmse_can_merge(
+ struct xfs_inode *ip,
+ int whichfork,
struct xfs_bmbt_irec *left, /* preceding extent */
struct xfs_bmbt_irec *got, /* current extent to shift */
xfs_fileoff_t shift) /* shift fsb */
@@ -5693,7 +5779,8 @@ xfs_bmse_can_merge(
if ((left->br_startoff + left->br_blockcount != startoff) ||
(left->br_startblock + left->br_blockcount != got->br_startblock) ||
(left->br_state != got->br_state) ||
- (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN))
+ (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN) ||
+ !xfs_bmap_same_rtgroup(ip, whichfork, left, got))
return false;
return true;
@@ -5729,7 +5816,7 @@ xfs_bmse_merge(
blockcount = left->br_blockcount + got->br_blockcount;
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
- ASSERT(xfs_bmse_can_merge(left, got, shift));
+ ASSERT(xfs_bmse_can_merge(ip, whichfork, left, got, shift));
new = *left;
new.br_blockcount = blockcount;
@@ -5891,7 +5978,8 @@ xfs_bmap_collapse_extents(
goto del_cursor;
}
- if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
+ if (xfs_bmse_can_merge(ip, whichfork, &prev, &got,
+ offset_shift_fsb)) {
error = xfs_bmse_merge(tp, ip, whichfork,
offset_shift_fsb, &icur, &got, &prev,
cur, &logflags);
@@ -6027,7 +6115,8 @@ xfs_bmap_insert_extents(
* never find mergeable extents in this scenario. Check anyways
* and warn if we encounter two extents that could be one.
*/
- if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
+ if (xfs_bmse_can_merge(ip, whichfork, &got, &next,
+ offset_shift_fsb))
WARN_ON_ONCE(1);
}
@@ -6354,6 +6443,7 @@ xfs_bunmapi_range(
error = xfs_defer_finish(tpp);
if (error)
goto out;
+ cond_resched();
}
out:
return error;
@@ -6401,3 +6491,45 @@ xfs_bmap_query_all(
return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query);
}
+
+/* Helper function to extract extent size hint from inode */
+xfs_extlen_t
+xfs_get_extsz_hint(
+ struct xfs_inode *ip)
+{
+ /*
+ * No point in aligning allocations if we need to COW to actually
+ * write to them.
+ */
+ if (xfs_is_always_cow_inode(ip))
+ return 0;
+ if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize)
+ return ip->i_extsize;
+ if (XFS_IS_REALTIME_INODE(ip) &&
+ ip->i_mount->m_sb.sb_rextsize > 1)
+ return ip->i_mount->m_sb.sb_rextsize;
+ return 0;
+}
+
+/*
+ * Helper function to extract CoW extent size hint from inode.
+ * Between the extent size hint and the CoW extent size hint, we
+ * return the greater of the two. If the value is zero (automatic),
+ * use the default size.
+ */
+xfs_extlen_t
+xfs_get_cowextsz_hint(
+ struct xfs_inode *ip)
+{
+ xfs_extlen_t a, b;
+
+ a = 0;
+ if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
+ a = ip->i_cowextsize;
+ b = xfs_get_extsz_hint(ip);
+
+ a = max(a, b);
+ if (a == 0)
+ return XFS_DEFAULT_COWEXTSZ_HINT;
+ return a;
+}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index f7662595309d..4b721d935994 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -158,7 +158,7 @@ static inline bool xfs_bmap_is_real_extent(const struct xfs_bmbt_irec *irec)
* Return true if the extent is a real, allocated extent, or false if it is a
* delayed allocation, and unwritten extent or a hole.
*/
-static inline bool xfs_bmap_is_written_extent(struct xfs_bmbt_irec *irec)
+static inline bool xfs_bmap_is_written_extent(const struct xfs_bmbt_irec *irec)
{
return xfs_bmap_is_real_extent(irec) &&
irec->br_state != XFS_EXT_UNWRITTEN;
@@ -176,9 +176,16 @@ int xfs_bmap_longest_free_extent(struct xfs_perag *pag,
void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len);
unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp);
-int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
+int xfs_bmap_add_attrfork(struct xfs_trans *tp, struct xfs_inode *ip,
+ int size, int rsvd);
void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp,
struct xfs_inode *ip, int whichfork);
+int xfs_bmap_local_to_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_extlen_t total, int *logflagsp, int whichfork,
+ void (*init_fn)(struct xfs_trans *tp, struct xfs_buf *bp,
+ struct xfs_inode *ip, struct xfs_ifork *ifp,
+ void *priv),
+ void *priv);
void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
@@ -195,7 +202,7 @@ int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags,
xfs_extnum_t nexts, int *done);
-int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
+void xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got,
struct xfs_bmbt_irec *del);
void xfs_bmap_del_extent_cow(struct xfs_inode *ip,
@@ -241,7 +248,7 @@ struct xfs_bmap_intent {
enum xfs_bmap_intent_type bi_type;
int bi_whichfork;
struct xfs_inode *bi_owner;
- struct xfs_perag *bi_pag;
+ struct xfs_group *bi_group;
struct xfs_bmbt_irec bi_bmap;
};
@@ -289,4 +296,7 @@ typedef int (*xfs_bmap_query_range_fn)(
int xfs_bmap_query_all(struct xfs_btree_cur *cur, xfs_bmap_query_range_fn fn,
void *priv);
+xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
+xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
+
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index f5d84dcb58da..3464be771f95 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -65,10 +65,10 @@ xfs_bmdr_to_bmbt(
ASSERT(be16_to_cpu(rblock->bb_level) > 0);
rblock->bb_numrecs = dblock->bb_numrecs;
dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
- fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
- tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
- fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
- tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
+ fkp = xfs_bmdr_key_addr(dblock, 1);
+ tkp = xfs_bmbt_key_addr(mp, rblock, 1);
+ fpp = xfs_bmdr_ptr_addr(dblock, 1, dmxr);
+ tpp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
@@ -168,10 +168,10 @@ xfs_bmbt_to_bmdr(
dblock->bb_level = rblock->bb_level;
dblock->bb_numrecs = rblock->bb_numrecs;
dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
- fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
- tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
- fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
- tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
+ fkp = xfs_bmbt_key_addr(mp, rblock, 1);
+ tkp = xfs_bmdr_key_addr(dblock, 1);
+ fpp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
+ tpp = xfs_bmdr_ptr_addr(dblock, 1, dmxr);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
@@ -282,7 +282,7 @@ xfs_bmbt_free_block(
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
if (error)
return error;
@@ -645,13 +645,13 @@ xfs_bmbt_commit_staged_btree(
/*
* Calculate number of records in a bmap btree block.
*/
-int
+unsigned int
xfs_bmbt_maxrecs(
struct xfs_mount *mp,
- int blocklen,
- int leaf)
+ unsigned int blocklen,
+ bool leaf)
{
- blocklen -= XFS_BMBT_BLOCK_LEN(mp);
+ blocklen -= xfs_bmbt_block_len(mp);
return xfs_bmbt_block_maxrecs(blocklen, leaf);
}
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index de1b73f1225c..49a3bae3f6ec 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -14,70 +14,6 @@ struct xfs_trans;
struct xbtree_ifakeroot;
/*
- * Btree block header size depends on a superblock flag.
- */
-#define XFS_BMBT_BLOCK_LEN(mp) \
- (xfs_has_crc(((mp))) ? \
- XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN)
-
-#define XFS_BMBT_REC_ADDR(mp, block, index) \
- ((xfs_bmbt_rec_t *) \
- ((char *)(block) + \
- XFS_BMBT_BLOCK_LEN(mp) + \
- ((index) - 1) * sizeof(xfs_bmbt_rec_t)))
-
-#define XFS_BMBT_KEY_ADDR(mp, block, index) \
- ((xfs_bmbt_key_t *) \
- ((char *)(block) + \
- XFS_BMBT_BLOCK_LEN(mp) + \
- ((index) - 1) * sizeof(xfs_bmbt_key_t)))
-
-#define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \
- ((xfs_bmbt_ptr_t *) \
- ((char *)(block) + \
- XFS_BMBT_BLOCK_LEN(mp) + \
- (maxrecs) * sizeof(xfs_bmbt_key_t) + \
- ((index) - 1) * sizeof(xfs_bmbt_ptr_t)))
-
-#define XFS_BMDR_REC_ADDR(block, index) \
- ((xfs_bmdr_rec_t *) \
- ((char *)(block) + \
- sizeof(struct xfs_bmdr_block) + \
- ((index) - 1) * sizeof(xfs_bmdr_rec_t)))
-
-#define XFS_BMDR_KEY_ADDR(block, index) \
- ((xfs_bmdr_key_t *) \
- ((char *)(block) + \
- sizeof(struct xfs_bmdr_block) + \
- ((index) - 1) * sizeof(xfs_bmdr_key_t)))
-
-#define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \
- ((xfs_bmdr_ptr_t *) \
- ((char *)(block) + \
- sizeof(struct xfs_bmdr_block) + \
- (maxrecs) * sizeof(xfs_bmdr_key_t) + \
- ((index) - 1) * sizeof(xfs_bmdr_ptr_t)))
-
-/*
- * These are to be used when we know the size of the block and
- * we don't have a cursor.
- */
-#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
- XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
-
-#define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \
- (int)(XFS_BMBT_BLOCK_LEN(mp) + \
- ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
-
-#define XFS_BMAP_BROOT_SPACE(mp, bb) \
- (XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs)))
-#define XFS_BMDR_SPACE_CALC(nrecs) \
- (int)(sizeof(xfs_bmdr_block_t) + \
- ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
-#define XFS_BMAP_BMDR_SPACE(bb) \
- (XFS_BMDR_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs)))
-
-/*
* Maximum number of bmap btree levels.
*/
#define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)])
@@ -99,7 +35,8 @@ extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
extern int xfs_bmdr_maxrecs(int blocklen, int leaf);
-extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
+unsigned int xfs_bmbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen,
+ bool leaf);
extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork, xfs_ino_t new_owner,
@@ -121,4 +58,144 @@ void xfs_bmbt_destroy_cur_cache(void);
void xfs_bmbt_init_block(struct xfs_inode *ip, struct xfs_btree_block *buf,
struct xfs_buf *bp, __u16 level, __u16 numrecs);
+/*
+ * Btree block header size depends on a superblock flag.
+ */
+static inline size_t
+xfs_bmbt_block_len(struct xfs_mount *mp)
+{
+ return xfs_has_crc(mp) ?
+ XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN;
+}
+
+/* Addresses of key, pointers, and records within an incore bmbt block. */
+
+static inline struct xfs_bmbt_rec *
+xfs_bmbt_rec_addr(
+ struct xfs_mount *mp,
+ struct xfs_btree_block *block,
+ unsigned int index)
+{
+ return (struct xfs_bmbt_rec *)
+ ((char *)block + xfs_bmbt_block_len(mp) +
+ (index - 1) * sizeof(struct xfs_bmbt_rec));
+}
+
+static inline struct xfs_bmbt_key *
+xfs_bmbt_key_addr(
+ struct xfs_mount *mp,
+ struct xfs_btree_block *block,
+ unsigned int index)
+{
+ return (struct xfs_bmbt_key *)
+ ((char *)block + xfs_bmbt_block_len(mp) +
+ (index - 1) * sizeof(struct xfs_bmbt_key *));
+}
+
+static inline xfs_bmbt_ptr_t *
+xfs_bmbt_ptr_addr(
+ struct xfs_mount *mp,
+ struct xfs_btree_block *block,
+ unsigned int index,
+ unsigned int maxrecs)
+{
+ return (xfs_bmbt_ptr_t *)
+ ((char *)block + xfs_bmbt_block_len(mp) +
+ maxrecs * sizeof(struct xfs_bmbt_key) +
+ (index - 1) * sizeof(xfs_bmbt_ptr_t));
+}
+
+/* Addresses of key, pointers, and records within an ondisk bmbt block. */
+
+static inline struct xfs_bmbt_rec *
+xfs_bmdr_rec_addr(
+ struct xfs_bmdr_block *block,
+ unsigned int index)
+{
+ return (struct xfs_bmbt_rec *)
+ ((char *)(block + 1) +
+ (index - 1) * sizeof(struct xfs_bmbt_rec));
+}
+
+static inline struct xfs_bmbt_key *
+xfs_bmdr_key_addr(
+ struct xfs_bmdr_block *block,
+ unsigned int index)
+{
+ return (struct xfs_bmbt_key *)
+ ((char *)(block + 1) +
+ (index - 1) * sizeof(struct xfs_bmbt_key));
+}
+
+static inline xfs_bmbt_ptr_t *
+xfs_bmdr_ptr_addr(
+ struct xfs_bmdr_block *block,
+ unsigned int index,
+ unsigned int maxrecs)
+{
+ return (xfs_bmbt_ptr_t *)
+ ((char *)(block + 1) +
+ maxrecs * sizeof(struct xfs_bmbt_key) +
+ (index - 1) * sizeof(xfs_bmbt_ptr_t));
+}
+
+/*
+ * Address of pointers within the incore btree root.
+ *
+ * These are to be used when we know the size of the block and
+ * we don't have a cursor.
+ */
+static inline xfs_bmbt_ptr_t *
+xfs_bmap_broot_ptr_addr(
+ struct xfs_mount *mp,
+ struct xfs_btree_block *bb,
+ unsigned int i,
+ unsigned int sz)
+{
+ return xfs_bmbt_ptr_addr(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, false));
+}
+
+/*
+ * Compute the space required for the incore btree root containing the given
+ * number of records.
+ */
+static inline size_t
+xfs_bmap_broot_space_calc(
+ struct xfs_mount *mp,
+ unsigned int nrecs)
+{
+ return xfs_bmbt_block_len(mp) +
+ (nrecs * (sizeof(struct xfs_bmbt_key) + sizeof(xfs_bmbt_ptr_t)));
+}
+
+/*
+ * Compute the space required for the incore btree root given the ondisk
+ * btree root block.
+ */
+static inline size_t
+xfs_bmap_broot_space(
+ struct xfs_mount *mp,
+ struct xfs_bmdr_block *bb)
+{
+ return xfs_bmap_broot_space_calc(mp, be16_to_cpu(bb->bb_numrecs));
+}
+
+/* Compute the space required for the ondisk root block. */
+static inline size_t
+xfs_bmdr_space_calc(unsigned int nrecs)
+{
+ return sizeof(struct xfs_bmdr_block) +
+ (nrecs * (sizeof(struct xfs_bmbt_key) + sizeof(xfs_bmbt_ptr_t)));
+}
+
+/*
+ * Compute the space required for the ondisk root block given an incore root
+ * block.
+ */
+static inline size_t
+xfs_bmap_bmdr_space(struct xfs_btree_block *bb)
+{
+ return xfs_bmdr_space_calc(be16_to_cpu(bb->bb_numrecs));
+}
+
#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index d29547572a68..2b5fc5fd1643 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -225,7 +225,7 @@ __xfs_btree_check_agblock(
struct xfs_buf *bp)
{
struct xfs_mount *mp = cur->bc_mp;
- struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
xfs_failaddr_t fa;
xfs_agblock_t agbno;
@@ -331,7 +331,7 @@ __xfs_btree_check_ptr(
return -EFSCORRUPTED;
break;
case XFS_BTREE_TYPE_AG:
- if (!xfs_verify_agbno(cur->bc_ag.pag,
+ if (!xfs_verify_agbno(to_perag(cur->bc_group),
be32_to_cpu((&ptr->s)[index])))
return -EFSCORRUPTED;
break;
@@ -372,7 +372,7 @@ xfs_btree_check_ptr(
case XFS_BTREE_TYPE_AG:
xfs_err(cur->bc_mp,
"AG %u: Corrupt %sbt pointer at level %d index %d.",
- cur->bc_ag.pag->pag_agno, cur->bc_ops->name,
+ cur->bc_group->xg_gno, cur->bc_ops->name,
level, index);
break;
}
@@ -523,20 +523,8 @@ xfs_btree_del_cursor(
ASSERT(!xfs_btree_is_bmap(cur->bc_ops) || cur->bc_bmap.allocated == 0 ||
xfs_is_shutdown(cur->bc_mp) || error != 0);
- switch (cur->bc_ops->type) {
- case XFS_BTREE_TYPE_AG:
- if (cur->bc_ag.pag)
- xfs_perag_put(cur->bc_ag.pag);
- break;
- case XFS_BTREE_TYPE_INODE:
- /* nothing to do */
- break;
- case XFS_BTREE_TYPE_MEM:
- if (cur->bc_mem.pag)
- xfs_perag_put(cur->bc_mem.pag);
- break;
- }
-
+ if (cur->bc_group)
+ xfs_group_put(cur->bc_group);
kmem_cache_free(cur->bc_cache, cur);
}
@@ -1017,22 +1005,22 @@ xfs_btree_readahead_agblock(
struct xfs_btree_block *block)
{
struct xfs_mount *mp = cur->bc_mp;
- xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
int rval = 0;
if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
xfs_buf_readahead(mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(mp, agno, left),
- mp->m_bsize, cur->bc_ops->buf_ops);
+ xfs_agbno_to_daddr(pag, left), mp->m_bsize,
+ cur->bc_ops->buf_ops);
rval++;
}
if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
xfs_buf_readahead(mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(mp, agno, right),
- mp->m_bsize, cur->bc_ops->buf_ops);
+ xfs_agbno_to_daddr(pag, right), mp->m_bsize,
+ cur->bc_ops->buf_ops);
rval++;
}
@@ -1091,7 +1079,7 @@ xfs_btree_ptr_to_daddr(
switch (cur->bc_ops->type) {
case XFS_BTREE_TYPE_AG:
- *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ *daddr = xfs_agbno_to_daddr(to_perag(cur->bc_group),
be32_to_cpu(ptr->s));
break;
case XFS_BTREE_TYPE_INODE:
@@ -1313,7 +1301,7 @@ xfs_btree_owner(
case XFS_BTREE_TYPE_INODE:
return cur->bc_ino.ip->i_ino;
case XFS_BTREE_TYPE_AG:
- return cur->bc_ag.pag->pag_agno;
+ return cur->bc_group->xg_gno;
default:
ASSERT(0);
return 0;
@@ -1331,30 +1319,6 @@ xfs_btree_init_block_cur(
xfs_btree_owner(cur));
}
-/*
- * Return true if ptr is the last record in the btree and
- * we need to track updates to this record. The decision
- * will be further refined in the update_lastrec method.
- */
-STATIC int
-xfs_btree_is_lastrec(
- struct xfs_btree_cur *cur,
- struct xfs_btree_block *block,
- int level)
-{
- union xfs_btree_ptr ptr;
-
- if (level > 0)
- return 0;
- if (!(cur->bc_ops->geom_flags & XFS_BTGEO_LASTREC_UPDATE))
- return 0;
-
- xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
- if (!xfs_btree_ptr_is_null(cur, &ptr))
- return 0;
- return 1;
-}
-
STATIC void
xfs_btree_buf_to_ptr(
struct xfs_btree_cur *cur,
@@ -2420,15 +2384,6 @@ xfs_btree_update(
xfs_btree_copy_recs(cur, rp, rec, 1);
xfs_btree_log_recs(cur, bp, ptr, ptr);
- /*
- * If we are tracking the last record in the tree and
- * we are at the far right edge of the tree, update it.
- */
- if (xfs_btree_is_lastrec(cur, block, 0)) {
- cur->bc_ops->update_lastrec(cur, block, rec,
- ptr, LASTREC_UPDATE);
- }
-
/* Pass new key value up to our parent. */
if (xfs_btree_needs_key_update(cur, ptr)) {
error = xfs_btree_update_keys(cur, 0);
@@ -3618,15 +3573,6 @@ xfs_btree_insrec(
}
/*
- * If we are tracking the last record in the tree and
- * we are at the far right edge of the tree, update it.
- */
- if (xfs_btree_is_lastrec(cur, block, level)) {
- cur->bc_ops->update_lastrec(cur, block, rec,
- ptr, LASTREC_INSREC);
- }
-
- /*
* Return the new block number, if any.
* If there is one, give back a record value and a cursor too.
*/
@@ -3984,15 +3930,6 @@ xfs_btree_delrec(
xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
/*
- * If we are tracking the last record in the tree and
- * we are at the far right edge of the tree, update it.
- */
- if (xfs_btree_is_lastrec(cur, block, level)) {
- cur->bc_ops->update_lastrec(cur, block, NULL,
- ptr, LASTREC_DELREC);
- }
-
- /*
* We're at the root level. First, shrink the root block in-memory.
* Try to get rid of the next level down. If we can't then there's
* nothing left to do.
@@ -4796,7 +4733,7 @@ xfs_btree_agblock_v5hdr_verify(
return __this_address;
if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
return __this_address;
- if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag_agno(pag))
return __this_address;
return NULL;
}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index f93374278aa1..3b739459ebb0 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -154,12 +154,6 @@ struct xfs_btree_ops {
int *stat);
int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
- /* update last record information */
- void (*update_lastrec)(struct xfs_btree_cur *cur,
- const struct xfs_btree_block *block,
- const union xfs_btree_rec *rec,
- int ptr, int reason);
-
/* records in block/level */
int (*get_minrecs)(struct xfs_btree_cur *cur, int level);
int (*get_maxrecs)(struct xfs_btree_cur *cur, int level);
@@ -222,15 +216,7 @@ struct xfs_btree_ops {
};
/* btree geometry flags */
-#define XFS_BTGEO_LASTREC_UPDATE (1U << 0) /* track last rec externally */
-#define XFS_BTGEO_OVERLAPPING (1U << 1) /* overlapping intervals */
-
-/*
- * Reasons for the update_lastrec method to be called.
- */
-#define LASTREC_UPDATE 0
-#define LASTREC_INSREC 1
-#define LASTREC_DELREC 2
+#define XFS_BTGEO_OVERLAPPING (1U << 0) /* overlapping intervals */
union xfs_btree_irec {
@@ -268,6 +254,7 @@ struct xfs_btree_cur
union xfs_btree_irec bc_rec; /* current insert/search record value */
uint8_t bc_nlevels; /* number of levels in the tree */
uint8_t bc_maxlevels; /* maximum levels for this btree type */
+ struct xfs_group *bc_group;
/* per-type information */
union {
@@ -278,13 +265,11 @@ struct xfs_btree_cur
struct xbtree_ifakeroot *ifake; /* for staging cursor */
} bc_ino;
struct {
- struct xfs_perag *pag;
struct xfs_buf *agbp;
struct xbtree_afakeroot *afake; /* for staging cursor */
} bc_ag;
struct {
struct xfbtree *xfbtree;
- struct xfs_perag *pag;
} bc_mem;
};
diff --git a/fs/xfs/libxfs/xfs_btree_mem.c b/fs/xfs/libxfs/xfs_btree_mem.c
index 036061fe32cc..df3d613675a1 100644
--- a/fs/xfs/libxfs/xfs_btree_mem.c
+++ b/fs/xfs/libxfs/xfs_btree_mem.c
@@ -57,10 +57,8 @@ xfbtree_dup_cursor(
ncur->bc_flags = cur->bc_flags;
ncur->bc_nlevels = cur->bc_nlevels;
ncur->bc_mem.xfbtree = cur->bc_mem.xfbtree;
-
- if (cur->bc_mem.pag)
- ncur->bc_mem.pag = xfs_perag_hold(cur->bc_mem.pag);
-
+ if (cur->bc_group)
+ ncur->bc_group = xfs_group_hold(cur->bc_group);
return ncur;
}
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 718d071bb21a..17d9e6154f19 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -252,6 +252,51 @@ xfs_da3_node_verify(
return NULL;
}
+xfs_failaddr_t
+xfs_da3_node_header_check(
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
+{
+ struct xfs_mount *mp = bp->b_mount;
+
+ if (xfs_has_crc(mp)) {
+ struct xfs_da3_blkinfo *hdr3 = bp->b_addr;
+
+ if (hdr3->hdr.magic != cpu_to_be16(XFS_DA3_NODE_MAGIC))
+ return __this_address;
+
+ if (be64_to_cpu(hdr3->owner) != owner)
+ return __this_address;
+ }
+
+ return NULL;
+}
+
+xfs_failaddr_t
+xfs_da3_header_check(
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
+{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_da_blkinfo *hdr = bp->b_addr;
+
+ if (!xfs_has_crc(mp))
+ return NULL;
+
+ switch (hdr->magic) {
+ case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
+ return xfs_attr3_leaf_header_check(bp, owner);
+ case cpu_to_be16(XFS_DA3_NODE_MAGIC):
+ return xfs_da3_node_header_check(bp, owner);
+ case cpu_to_be16(XFS_DIR3_LEAF1_MAGIC):
+ case cpu_to_be16(XFS_DIR3_LEAFN_MAGIC):
+ return xfs_dir3_leaf_header_check(bp, owner);
+ }
+
+ ASSERT(0);
+ return NULL;
+}
+
static void
xfs_da3_node_write_verify(
struct xfs_buf *bp)
@@ -486,7 +531,7 @@ xfs_da3_node_create(
memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
ichdr.magic = XFS_DA3_NODE_MAGIC;
hdr3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
- hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
+ hdr3->info.owner = cpu_to_be64(args->owner);
uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid);
} else {
ichdr.magic = XFS_DA_NODE_MAGIC;
@@ -548,9 +593,8 @@ xfs_da3_split(
switch (oldblk->magic) {
case XFS_ATTR_LEAF_MAGIC:
error = xfs_attr3_leaf_split(state, oldblk, newblk);
- if ((error != 0) && (error != -ENOSPC)) {
+ if (error < 0)
return error; /* GROT: attr is inconsistent */
- }
if (!error) {
addblk = newblk;
break;
@@ -572,6 +616,8 @@ xfs_da3_split(
error = xfs_attr3_leaf_split(state, newblk,
&state->extrablk);
}
+ if (error == 1)
+ return -ENOSPC;
if (error)
return error; /* GROT: attr inconsistent */
addblk = newblk;
@@ -1199,6 +1245,7 @@ xfs_da3_root_join(
struct xfs_da3_icnode_hdr oldroothdr;
int error;
struct xfs_inode *dp = state->args->dp;
+ xfs_failaddr_t fa;
trace_xfs_da_root_join(state->args);
@@ -1225,6 +1272,13 @@ xfs_da3_root_join(
error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork);
if (error)
return error;
+ fa = xfs_da3_header_check(bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(args->trans, bp);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
/*
@@ -1259,6 +1313,7 @@ xfs_da3_node_toosmall(
struct xfs_da_blkinfo *info;
xfs_dablk_t blkno;
struct xfs_buf *bp;
+ xfs_failaddr_t fa;
struct xfs_da3_icnode_hdr nodehdr;
int count;
int forward;
@@ -1333,6 +1388,13 @@ xfs_da3_node_toosmall(
state->args->whichfork);
if (error)
return error;
+ fa = xfs_da3_node_header_check(bp, state->args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(state->args->trans, bp);
+ xfs_da_mark_sick(state->args);
+ return -EFSCORRUPTED;
+ }
node = bp->b_addr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &thdr, node);
@@ -1591,6 +1653,7 @@ xfs_da3_node_lookup_int(
struct xfs_da_node_entry *btree;
struct xfs_da3_icnode_hdr nodehdr;
struct xfs_da_args *args;
+ xfs_failaddr_t fa;
xfs_dablk_t blkno;
xfs_dahash_t hashval;
xfs_dahash_t btreehashval;
@@ -1629,6 +1692,12 @@ xfs_da3_node_lookup_int(
if (magic == XFS_ATTR_LEAF_MAGIC ||
magic == XFS_ATTR3_LEAF_MAGIC) {
+ fa = xfs_attr3_leaf_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
blk->magic = XFS_ATTR_LEAF_MAGIC;
blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
break;
@@ -1636,6 +1705,12 @@ xfs_da3_node_lookup_int(
if (magic == XFS_DIR2_LEAFN_MAGIC ||
magic == XFS_DIR3_LEAFN_MAGIC) {
+ fa = xfs_dir3_leaf_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
blk->magic = XFS_DIR2_LEAFN_MAGIC;
blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
blk->bp, NULL);
@@ -1648,6 +1723,13 @@ xfs_da3_node_lookup_int(
return -EFSCORRUPTED;
}
+ fa = xfs_da3_node_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
+
blk->magic = XFS_DA_NODE_MAGIC;
/*
@@ -1820,6 +1902,7 @@ xfs_da3_blk_link(
struct xfs_da_blkinfo *tmp_info;
struct xfs_da_args *args;
struct xfs_buf *bp;
+ xfs_failaddr_t fa;
int before = 0;
int error;
struct xfs_inode *dp = state->args->dp;
@@ -1863,6 +1946,13 @@ xfs_da3_blk_link(
&bp, args->whichfork);
if (error)
return error;
+ fa = xfs_da3_header_check(bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(args->trans, bp);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == old_info->magic);
@@ -1884,6 +1974,13 @@ xfs_da3_blk_link(
&bp, args->whichfork);
if (error)
return error;
+ fa = xfs_da3_header_check(bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(args->trans, bp);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == old_info->magic);
@@ -1913,6 +2010,7 @@ xfs_da3_blk_unlink(
struct xfs_da_blkinfo *tmp_info;
struct xfs_da_args *args;
struct xfs_buf *bp;
+ xfs_failaddr_t fa;
int error;
/*
@@ -1943,6 +2041,13 @@ xfs_da3_blk_unlink(
&bp, args->whichfork);
if (error)
return error;
+ fa = xfs_da3_header_check(bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(args->trans, bp);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == save_info->magic);
@@ -1960,6 +2065,13 @@ xfs_da3_blk_unlink(
&bp, args->whichfork);
if (error)
return error;
+ fa = xfs_da3_header_check(bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(args->trans, bp);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == save_info->magic);
@@ -1996,6 +2108,7 @@ xfs_da3_path_shift(
struct xfs_da_node_entry *btree;
struct xfs_da3_icnode_hdr nodehdr;
struct xfs_buf *bp;
+ xfs_failaddr_t fa;
xfs_dablk_t blkno = 0;
int level;
int error;
@@ -2074,6 +2187,12 @@ xfs_da3_path_shift(
switch (be16_to_cpu(info->magic)) {
case XFS_DA_NODE_MAGIC:
case XFS_DA3_NODE_MAGIC:
+ fa = xfs_da3_node_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
blk->magic = XFS_DA_NODE_MAGIC;
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
bp->b_addr);
@@ -2087,6 +2206,12 @@ xfs_da3_path_shift(
break;
case XFS_ATTR_LEAF_MAGIC:
case XFS_ATTR3_LEAF_MAGIC:
+ fa = xfs_attr3_leaf_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
blk->magic = XFS_ATTR_LEAF_MAGIC;
ASSERT(level == path->active-1);
blk->index = 0;
@@ -2094,6 +2219,12 @@ xfs_da3_path_shift(
break;
case XFS_DIR2_LEAFN_MAGIC:
case XFS_DIR3_LEAFN_MAGIC:
+ fa = xfs_dir3_leaf_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
blk->magic = XFS_DIR2_LEAFN_MAGIC;
ASSERT(level == path->active-1);
blk->index = 0;
@@ -2167,8 +2298,8 @@ xfs_da_grow_inode_int(
struct xfs_inode *dp = args->dp;
int w = args->whichfork;
xfs_rfsblock_t nblks = dp->i_nblocks;
- struct xfs_bmbt_irec map, *mapp;
- int nmap, error, got, i, mapi;
+ struct xfs_bmbt_irec map, *mapp = &map;
+ int nmap, error, got, i, mapi = 1;
/*
* Find a spot in the file space to put the new block.
@@ -2184,14 +2315,7 @@ xfs_da_grow_inode_int(
error = xfs_bmapi_write(tp, dp, *bno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
args->total, &map, &nmap);
- if (error)
- return error;
-
- ASSERT(nmap <= 1);
- if (nmap == 1) {
- mapp = &map;
- mapi = 1;
- } else if (nmap == 0 && count > 1) {
+ if (error == -ENOSPC && count > 1) {
xfs_fileoff_t b;
int c;
@@ -2209,16 +2333,13 @@ xfs_da_grow_inode_int(
args->total, &mapp[mapi], &nmap);
if (error)
goto out_free_map;
- if (nmap < 1)
- break;
mapi += nmap;
b = mapp[mapi - 1].br_startoff +
mapp[mapi - 1].br_blockcount;
}
- } else {
- mapi = 0;
- mapp = NULL;
}
+ if (error)
+ goto out_free_map;
/*
* Count the blocks we got, make sure it matches the total.
@@ -2290,6 +2411,7 @@ xfs_da3_swap_lastblock(
struct xfs_buf *last_buf;
struct xfs_buf *sib_buf;
struct xfs_buf *par_buf;
+ xfs_failaddr_t fa;
xfs_dahash_t dead_hash;
xfs_fileoff_t lastoff;
xfs_dablk_t dead_blkno;
@@ -2326,6 +2448,14 @@ xfs_da3_swap_lastblock(
error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w);
if (error)
return error;
+ fa = xfs_da3_header_check(last_buf, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(last_buf, fa);
+ xfs_trans_brelse(tp, last_buf);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
+
/*
* Copy the last block into the dead buffer and log it.
*/
@@ -2364,6 +2494,13 @@ xfs_da3_swap_lastblock(
error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
if (error)
goto done;
+ fa = xfs_da3_header_check(sib_buf, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(sib_buf, fa);
+ xfs_da_mark_sick(args);
+ error = -EFSCORRUPTED;
+ goto done;
+ }
sib_info = sib_buf->b_addr;
if (XFS_IS_CORRUPT(mp,
be32_to_cpu(sib_info->forw) != last_blkno ||
@@ -2385,6 +2522,13 @@ xfs_da3_swap_lastblock(
error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
if (error)
goto done;
+ fa = xfs_da3_header_check(sib_buf, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(sib_buf, fa);
+ xfs_da_mark_sick(args);
+ error = -EFSCORRUPTED;
+ goto done;
+ }
sib_info = sib_buf->b_addr;
if (XFS_IS_CORRUPT(mp,
be32_to_cpu(sib_info->back) != last_blkno ||
@@ -2408,6 +2552,13 @@ xfs_da3_swap_lastblock(
error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
if (error)
goto done;
+ fa = xfs_da3_node_header_check(par_buf, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(par_buf, fa);
+ xfs_da_mark_sick(args);
+ error = -EFSCORRUPTED;
+ goto done;
+ }
par_node = par_buf->b_addr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
if (XFS_IS_CORRUPT(mp,
@@ -2457,6 +2608,13 @@ xfs_da3_swap_lastblock(
error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
if (error)
goto done;
+ fa = xfs_da3_node_header_check(par_buf, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(par_buf, fa);
+ xfs_da_mark_sick(args);
+ error = -EFSCORRUPTED;
+ goto done;
+ }
par_node = par_buf->b_addr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 706baf36e175..354d5d65043e 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -54,17 +54,24 @@ enum xfs_dacmp {
*/
typedef struct xfs_da_args {
struct xfs_da_geometry *geo; /* da block geometry */
- const uint8_t *name; /* string (maybe not NULL terminated) */
- int namelen; /* length of string (maybe no NULL) */
- uint8_t filetype; /* filetype of inode for directories */
+ const uint8_t *name; /* string (maybe not NULL terminated) */
+ const uint8_t *new_name; /* new attr name */
void *value; /* set of bytes (maybe contain NULLs) */
- int valuelen; /* length of value */
- unsigned int attr_filter; /* XFS_ATTR_{ROOT,SECURE,INCOMPLETE} */
- unsigned int attr_flags; /* XATTR_{CREATE,REPLACE} */
- xfs_dahash_t hashval; /* hash value of name */
- xfs_ino_t inumber; /* input/output inode number */
+ void *new_value; /* new xattr value (may contain NULLs) */
struct xfs_inode *dp; /* directory inode to manipulate */
struct xfs_trans *trans; /* current trans (changes over time) */
+
+ xfs_ino_t inumber; /* input/output inode number */
+ xfs_ino_t owner; /* inode that owns the dir/attr data */
+
+ int valuelen; /* length of value */
+ int new_valuelen; /* length of new_value */
+ uint8_t filetype; /* filetype of inode for directories */
+ uint8_t op_flags; /* operation flags */
+ uint8_t attr_filter; /* XFS_ATTR_{ROOT,SECURE,INCOMPLETE} */
+ short namelen; /* length of string (maybe no NULL) */
+ short new_namelen; /* length of new attr name */
+ xfs_dahash_t hashval; /* hash value of name */
xfs_extlen_t total; /* total blocks needed, for 1st bmap */
int whichfork; /* data or attribute fork */
xfs_dablk_t blkno; /* blkno of attr leaf of interest */
@@ -77,7 +84,6 @@ typedef struct xfs_da_args {
xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */
int rmtblkcnt2; /* remote attr value block count */
int rmtvaluelen2; /* remote attr value length in bytes */
- uint32_t op_flags; /* operation flags */
enum xfs_dacmp cmpresult; /* name compare result for lookups */
} xfs_da_args_t;
@@ -89,10 +95,8 @@ typedef struct xfs_da_args {
#define XFS_DA_OP_ADDNAME (1u << 2) /* this is an add operation */
#define XFS_DA_OP_OKNOENT (1u << 3) /* lookup op, ENOENT ok, else die */
#define XFS_DA_OP_CILOOKUP (1u << 4) /* lookup returns CI name if found */
-#define XFS_DA_OP_NOTIME (1u << 5) /* don't update inode timestamps */
-#define XFS_DA_OP_REMOVE (1u << 6) /* this is a remove operation */
-#define XFS_DA_OP_RECOVERY (1u << 7) /* Log recovery operation */
-#define XFS_DA_OP_LOGGED (1u << 8) /* Use intent items to track op */
+#define XFS_DA_OP_RECOVERY (1u << 5) /* Log recovery operation */
+#define XFS_DA_OP_LOGGED (1u << 6) /* Use intent items to track op */
#define XFS_DA_OP_FLAGS \
{ XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \
@@ -100,8 +104,6 @@ typedef struct xfs_da_args {
{ XFS_DA_OP_ADDNAME, "ADDNAME" }, \
{ XFS_DA_OP_OKNOENT, "OKNOENT" }, \
{ XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \
- { XFS_DA_OP_NOTIME, "NOTIME" }, \
- { XFS_DA_OP_REMOVE, "REMOVE" }, \
{ XFS_DA_OP_RECOVERY, "RECOVERY" }, \
{ XFS_DA_OP_LOGGED, "LOGGED" }
@@ -235,6 +237,8 @@ void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp,
struct xfs_da3_icnode_hdr *to, struct xfs_da_intnode *from);
void xfs_da3_node_hdr_to_disk(struct xfs_mount *mp,
struct xfs_da_intnode *to, struct xfs_da3_icnode_hdr *from);
+xfs_failaddr_t xfs_da3_header_check(struct xfs_buf *bp, xfs_ino_t owner);
+xfs_failaddr_t xfs_da3_node_header_check(struct xfs_buf *bp, xfs_ino_t owner);
extern struct kmem_cache *xfs_da_state_cache;
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 060e5c96b70f..86de99e2f757 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -714,12 +714,30 @@ struct xfs_attr3_leafblock {
#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */
#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */
#define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */
+#define XFS_ATTR_PARENT_BIT 3 /* parent pointer attrs */
#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */
#define XFS_ATTR_LOCAL (1u << XFS_ATTR_LOCAL_BIT)
#define XFS_ATTR_ROOT (1u << XFS_ATTR_ROOT_BIT)
#define XFS_ATTR_SECURE (1u << XFS_ATTR_SECURE_BIT)
+#define XFS_ATTR_PARENT (1u << XFS_ATTR_PARENT_BIT)
#define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT)
-#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
+
+#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | \
+ XFS_ATTR_SECURE | \
+ XFS_ATTR_PARENT)
+
+/* Private attr namespaces not exposed to userspace */
+#define XFS_ATTR_PRIVATE_NSP_MASK (XFS_ATTR_PARENT)
+
+#define XFS_ATTR_ONDISK_MASK (XFS_ATTR_NSP_ONDISK_MASK | \
+ XFS_ATTR_LOCAL | \
+ XFS_ATTR_INCOMPLETE)
+
+#define XFS_ATTR_NAMESPACE_STR \
+ { XFS_ATTR_LOCAL, "local" }, \
+ { XFS_ATTR_ROOT, "root" }, \
+ { XFS_ATTR_SECURE, "secure" }, \
+ { XFS_ATTR_PARENT, "parent" }
/*
* Alignment for namelist and valuelist entries (since they are mixed
@@ -862,9 +880,7 @@ struct xfs_attr3_rmt_hdr {
#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
-#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \
- ((bufsize) - (xfs_has_crc((mp)) ? \
- sizeof(struct xfs_attr3_rmt_hdr) : 0))
+unsigned int xfs_attr3_rmt_buf_space(struct xfs_mount *mp);
/* Number of bytes in a directory block. */
static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
@@ -875,4 +891,17 @@ static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
xfs_failaddr_t xfs_da3_blkinfo_verify(struct xfs_buf *bp,
struct xfs_da3_blkinfo *hdr3);
+/*
+ * Parent pointer attribute format definition
+ *
+ * The xattr name contains the dirent name.
+ * The xattr value encodes the parent inode number and generation to ease
+ * opening parents by handle.
+ * The xattr hashval is xfs_dir2_namehash() ^ p_ino
+ */
+struct xfs_parent_rec {
+ __be64 p_ino;
+ __be32 p_gen;
+} __packed;
+
#endif /* __XFS_DA_FORMAT_H__ */
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index c13276095cc0..5b377cbbb1f7 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -12,12 +12,14 @@
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_log.h"
+#include "xfs_log_priv.h"
#include "xfs_rmap.h"
#include "xfs_refcount.h"
#include "xfs_bmap.h"
@@ -26,7 +28,7 @@
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_attr.h"
-#include "xfs_trans_priv.h"
+#include "xfs_exchmaps.h"
static struct kmem_cache *xfs_defer_pending_cache;
@@ -555,7 +557,7 @@ xfs_defer_relog(
* the log threshold once per call.
*/
if (threshold_lsn == NULLCOMMITLSN) {
- threshold_lsn = xlog_grant_push_threshold(log, 0);
+ threshold_lsn = xfs_ail_get_push_target(log->l_ailp);
if (threshold_lsn == NULLCOMMITLSN)
break;
}
@@ -844,6 +846,12 @@ xfs_defer_add(
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+ if (!ops->finish_item) {
+ ASSERT(ops->finish_item != NULL);
+ xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE);
+ return NULL;
+ }
+
dfp = xfs_defer_find_last(tp, ops);
if (!dfp || !xfs_defer_can_append(dfp, ops))
dfp = xfs_defer_alloc(&tp->t_dfops, ops);
@@ -1091,7 +1099,11 @@ xfs_defer_ops_continue(
ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
/* Lock the captured resources to the new transaction. */
- if (dfc->dfc_held.dr_inos == 2)
+ if (dfc->dfc_held.dr_inos > 2) {
+ xfs_sort_inodes(dfc->dfc_held.dr_ip, dfc->dfc_held.dr_inos);
+ xfs_lock_inodes(dfc->dfc_held.dr_ip, dfc->dfc_held.dr_inos,
+ XFS_ILOCK_EXCL);
+ } else if (dfc->dfc_held.dr_inos == 2)
xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL,
dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL);
else if (dfc->dfc_held.dr_inos == 1)
@@ -1176,6 +1188,10 @@ xfs_defer_init_item_caches(void)
error = xfs_attr_intent_init_cache();
if (error)
goto err;
+ error = xfs_exchmaps_intent_init_cache();
+ if (error)
+ goto err;
+
return 0;
err:
xfs_defer_destroy_item_caches();
@@ -1186,6 +1202,7 @@ err:
void
xfs_defer_destroy_item_caches(void)
{
+ xfs_exchmaps_intent_destroy_cache();
xfs_attr_intent_destroy_cache();
xfs_extfree_intent_destroy_cache();
xfs_bmap_intent_destroy_cache();
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 18a9fb92dde8..ec51b8465e61 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -71,13 +71,20 @@ extern const struct xfs_defer_op_type xfs_refcount_update_defer_type;
extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
+extern const struct xfs_defer_op_type xfs_rtextent_free_defer_type;
extern const struct xfs_defer_op_type xfs_attr_defer_type;
-
+extern const struct xfs_defer_op_type xfs_exchmaps_defer_type;
/*
* Deferred operation item relogging limits.
*/
-#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
+
+/*
+ * Rename w/ parent pointers can require up to 5 inodes with deferred ops to
+ * be joined to the transaction: src_dp, target_dp, src_ip, target_ip, and wip.
+ * These inodes are locked in sorted order by their inode numbers
+ */
+#define XFS_DEFER_OPS_NR_INODES 5
#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
/* Resources that must be held across a transaction roll. */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 4821519efad4..202468223bf9 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -19,6 +19,11 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_health.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_parent.h"
+#include "xfs_ag.h"
+#include "xfs_ialloc.h"
const struct xfs_name xfs_name_dotdot = {
.name = (const unsigned char *)"..",
@@ -250,11 +255,68 @@ xfs_dir_init(
args->geo = dp->i_mount->m_dir_geo;
args->dp = dp;
args->trans = tp;
+ args->owner = dp->i_ino;
error = xfs_dir2_sf_create(args, pdp->i_ino);
kfree(args);
return error;
}
+enum xfs_dir2_fmt
+xfs_dir2_format(
+ struct xfs_da_args *args,
+ int *error)
+{
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_geometry *geo = mp->m_dir_geo;
+ xfs_fileoff_t eof;
+
+ xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
+
+ *error = 0;
+ if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
+ return XFS_DIR2_FMT_SF;
+
+ *error = xfs_bmap_last_offset(dp, &eof, XFS_DATA_FORK);
+ if (*error)
+ return XFS_DIR2_FMT_ERROR;
+
+ if (eof == XFS_B_TO_FSB(mp, geo->blksize)) {
+ if (XFS_IS_CORRUPT(mp, dp->i_disk_size != geo->blksize)) {
+ xfs_da_mark_sick(args);
+ *error = -EFSCORRUPTED;
+ return XFS_DIR2_FMT_ERROR;
+ }
+ return XFS_DIR2_FMT_BLOCK;
+ }
+ if (eof == geo->leafblk + geo->fsbcount)
+ return XFS_DIR2_FMT_LEAF;
+ return XFS_DIR2_FMT_NODE;
+}
+
+int
+xfs_dir_createname_args(
+ struct xfs_da_args *args)
+{
+ int error;
+
+ if (!args->inumber)
+ args->op_flags |= XFS_DA_OP_JUSTCHECK;
+
+ switch (xfs_dir2_format(args, &error)) {
+ case XFS_DIR2_FMT_SF:
+ return xfs_dir2_sf_addname(args);
+ case XFS_DIR2_FMT_BLOCK:
+ return xfs_dir2_block_addname(args);
+ case XFS_DIR2_FMT_LEAF:
+ return xfs_dir2_leaf_addname(args);
+ case XFS_DIR2_FMT_NODE:
+ return xfs_dir2_node_addname(args);
+ default:
+ return error;
+ }
+}
+
/*
* Enter a name in a directory, or check for available space.
* If inum is 0, only the available space test is performed.
@@ -269,7 +331,6 @@ xfs_dir_createname(
{
struct xfs_da_args *args;
int rval;
- bool v;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
@@ -295,31 +356,9 @@ xfs_dir_createname(
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
- if (!inum)
- args->op_flags |= XFS_DA_OP_JUSTCHECK;
-
- if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
- rval = xfs_dir2_sf_addname(args);
- goto out_free;
- }
-
- rval = xfs_dir2_isblock(args, &v);
- if (rval)
- goto out_free;
- if (v) {
- rval = xfs_dir2_block_addname(args);
- goto out_free;
- }
-
- rval = xfs_dir2_isleaf(args, &v);
- if (rval)
- goto out_free;
- if (v)
- rval = xfs_dir2_leaf_addname(args);
- else
- rval = xfs_dir2_node_addname(args);
+ args->owner = dp->i_ino;
-out_free:
+ rval = xfs_dir_createname_args(args);
kfree(args);
return rval;
}
@@ -350,6 +389,34 @@ xfs_dir_cilookup_result(
return -EEXIST;
}
+int
+xfs_dir_lookup_args(
+ struct xfs_da_args *args)
+{
+ int error;
+
+ switch (xfs_dir2_format(args, &error)) {
+ case XFS_DIR2_FMT_SF:
+ error = xfs_dir2_sf_lookup(args);
+ break;
+ case XFS_DIR2_FMT_BLOCK:
+ error = xfs_dir2_block_lookup(args);
+ break;
+ case XFS_DIR2_FMT_LEAF:
+ error = xfs_dir2_leaf_lookup(args);
+ break;
+ case XFS_DIR2_FMT_NODE:
+ error = xfs_dir2_node_lookup(args);
+ break;
+ default:
+ break;
+ }
+
+ if (error != -EEXIST)
+ return error;
+ return 0;
+}
+
/*
* Lookup a name in a directory, give back the inode number.
* If ci_name is not NULL, returns the actual name in ci_name if it differs
@@ -366,7 +433,6 @@ xfs_dir_lookup(
{
struct xfs_da_args *args;
int rval;
- bool v;
int lock_mode;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
@@ -383,34 +449,12 @@ xfs_dir_lookup(
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
args->op_flags = XFS_DA_OP_OKNOENT;
+ args->owner = dp->i_ino;
if (ci_name)
args->op_flags |= XFS_DA_OP_CILOOKUP;
lock_mode = xfs_ilock_data_map_shared(dp);
- if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
- rval = xfs_dir2_sf_lookup(args);
- goto out_check_rval;
- }
-
- rval = xfs_dir2_isblock(args, &v);
- if (rval)
- goto out_free;
- if (v) {
- rval = xfs_dir2_block_lookup(args);
- goto out_check_rval;
- }
-
- rval = xfs_dir2_isleaf(args, &v);
- if (rval)
- goto out_free;
- if (v)
- rval = xfs_dir2_leaf_lookup(args);
- else
- rval = xfs_dir2_node_lookup(args);
-
-out_check_rval:
- if (rval == -EEXIST)
- rval = 0;
+ rval = xfs_dir_lookup_args(args);
if (!rval) {
*inum = args->inumber;
if (ci_name) {
@@ -418,12 +462,31 @@ out_check_rval:
ci_name->len = args->valuelen;
}
}
-out_free:
xfs_iunlock(dp, lock_mode);
kfree(args);
return rval;
}
+int
+xfs_dir_removename_args(
+ struct xfs_da_args *args)
+{
+ int error;
+
+ switch (xfs_dir2_format(args, &error)) {
+ case XFS_DIR2_FMT_SF:
+ return xfs_dir2_sf_removename(args);
+ case XFS_DIR2_FMT_BLOCK:
+ return xfs_dir2_block_removename(args);
+ case XFS_DIR2_FMT_LEAF:
+ return xfs_dir2_leaf_removename(args);
+ case XFS_DIR2_FMT_NODE:
+ return xfs_dir2_node_removename(args);
+ default:
+ return error;
+ }
+}
+
/*
* Remove an entry from a directory.
*/
@@ -431,13 +494,12 @@ int
xfs_dir_removename(
struct xfs_trans *tp,
struct xfs_inode *dp,
- struct xfs_name *name,
+ const struct xfs_name *name,
xfs_ino_t ino,
xfs_extlen_t total) /* bmap's total block count */
{
struct xfs_da_args *args;
int rval;
- bool v;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
XFS_STATS_INC(dp->i_mount, xs_dir_remove);
@@ -456,30 +518,30 @@ xfs_dir_removename(
args->total = total;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
+ args->owner = dp->i_ino;
+ rval = xfs_dir_removename_args(args);
+ kfree(args);
+ return rval;
+}
- if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
- rval = xfs_dir2_sf_removename(args);
- goto out_free;
- }
+int
+xfs_dir_replace_args(
+ struct xfs_da_args *args)
+{
+ int error;
- rval = xfs_dir2_isblock(args, &v);
- if (rval)
- goto out_free;
- if (v) {
- rval = xfs_dir2_block_removename(args);
- goto out_free;
+ switch (xfs_dir2_format(args, &error)) {
+ case XFS_DIR2_FMT_SF:
+ return xfs_dir2_sf_replace(args);
+ case XFS_DIR2_FMT_BLOCK:
+ return xfs_dir2_block_replace(args);
+ case XFS_DIR2_FMT_LEAF:
+ return xfs_dir2_leaf_replace(args);
+ case XFS_DIR2_FMT_NODE:
+ return xfs_dir2_node_replace(args);
+ default:
+ return error;
}
-
- rval = xfs_dir2_isleaf(args, &v);
- if (rval)
- goto out_free;
- if (v)
- rval = xfs_dir2_leaf_removename(args);
- else
- rval = xfs_dir2_node_removename(args);
-out_free:
- kfree(args);
- return rval;
}
/*
@@ -495,7 +557,6 @@ xfs_dir_replace(
{
struct xfs_da_args *args;
int rval;
- bool v;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
@@ -517,28 +578,8 @@ xfs_dir_replace(
args->total = total;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
-
- if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
- rval = xfs_dir2_sf_replace(args);
- goto out_free;
- }
-
- rval = xfs_dir2_isblock(args, &v);
- if (rval)
- goto out_free;
- if (v) {
- rval = xfs_dir2_block_replace(args);
- goto out_free;
- }
-
- rval = xfs_dir2_isleaf(args, &v);
- if (rval)
- goto out_free;
- if (v)
- rval = xfs_dir2_leaf_replace(args);
- else
- rval = xfs_dir2_node_replace(args);
-out_free:
+ args->owner = dp->i_ino;
+ rval = xfs_dir_replace_args(args);
kfree(args);
return rval;
}
@@ -548,9 +589,9 @@ out_free:
*/
int
xfs_dir_canenter(
- xfs_trans_t *tp,
- xfs_inode_t *dp,
- struct xfs_name *name) /* name of entry to add */
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ const struct xfs_name *name) /* name of entry to add */
{
return xfs_dir_createname(tp, dp, name, 0, 0);
}
@@ -607,57 +648,6 @@ xfs_dir2_grow_inode(
}
/*
- * See if the directory is a single-block form directory.
- */
-int
-xfs_dir2_isblock(
- struct xfs_da_args *args,
- bool *isblock)
-{
- struct xfs_mount *mp = args->dp->i_mount;
- xfs_fileoff_t eof;
- int error;
-
- error = xfs_bmap_last_offset(args->dp, &eof, XFS_DATA_FORK);
- if (error)
- return error;
-
- *isblock = false;
- if (XFS_FSB_TO_B(mp, eof) != args->geo->blksize)
- return 0;
-
- *isblock = true;
- if (XFS_IS_CORRUPT(mp, args->dp->i_disk_size != args->geo->blksize)) {
- xfs_da_mark_sick(args);
- return -EFSCORRUPTED;
- }
- return 0;
-}
-
-/*
- * See if the directory is a single-leaf form directory.
- */
-int
-xfs_dir2_isleaf(
- struct xfs_da_args *args,
- bool *isleaf)
-{
- xfs_fileoff_t eof;
- int error;
-
- error = xfs_bmap_last_offset(args->dp, &eof, XFS_DATA_FORK);
- if (error)
- return error;
-
- *isleaf = false;
- if (eof != args->geo->leafblk + args->geo->fsbcount)
- return 0;
-
- *isleaf = true;
- return 0;
-}
-
-/*
* Remove the given block from the directory.
* This routine is used for data and free blocks, leaf/node are done
* by xfs_da_shrink_inode.
@@ -771,3 +761,653 @@ xfs_dir2_compname(
return xfs_ascii_ci_compname(args, name, len);
return xfs_da_compname(args, name, len);
}
+
+#ifdef CONFIG_XFS_LIVE_HOOKS
+/*
+ * Use a static key here to reduce the overhead of directory live update hooks.
+ * If the compiler supports jump labels, the static branch will be replaced by
+ * a nop sled when there are no hook users. Online fsck is currently the only
+ * caller, so this is a reasonable tradeoff.
+ *
+ * Note: Patching the kernel code requires taking the cpu hotplug lock. Other
+ * parts of the kernel allocate memory with that lock held, which means that
+ * XFS callers cannot hold any locks that might be used by memory reclaim or
+ * writeback when calling the static_branch_{inc,dec} functions.
+ */
+DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch);
+
+void
+xfs_dir_hook_disable(void)
+{
+ xfs_hooks_switch_off(&xfs_dir_hooks_switch);
+}
+
+void
+xfs_dir_hook_enable(void)
+{
+ xfs_hooks_switch_on(&xfs_dir_hooks_switch);
+}
+
+/* Call hooks for a directory update relating to a child dirent update. */
+inline void
+xfs_dir_update_hook(
+ struct xfs_inode *dp,
+ struct xfs_inode *ip,
+ int delta,
+ const struct xfs_name *name)
+{
+ if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) {
+ struct xfs_dir_update_params p = {
+ .dp = dp,
+ .ip = ip,
+ .delta = delta,
+ .name = name,
+ };
+ struct xfs_mount *mp = ip->i_mount;
+
+ xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p);
+ }
+}
+
+/* Call the specified function during a directory update. */
+int
+xfs_dir_hook_add(
+ struct xfs_mount *mp,
+ struct xfs_dir_hook *hook)
+{
+ return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook);
+}
+
+/* Stop calling the specified function during a directory update. */
+void
+xfs_dir_hook_del(
+ struct xfs_mount *mp,
+ struct xfs_dir_hook *hook)
+{
+ xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook);
+}
+
+/* Configure directory update hook functions. */
+void
+xfs_dir_hook_setup(
+ struct xfs_dir_hook *hook,
+ notifier_fn_t mod_fn)
+{
+ xfs_hook_setup(&hook->dirent_hook, mod_fn);
+}
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
+/*
+ * Given a directory @dp, a newly allocated inode @ip, and a @name, link @ip
+ * into @dp under the given @name. If @ip is a directory, it will be
+ * initialized. Both inodes must have the ILOCK held and the transaction must
+ * have sufficient blocks reserved.
+ */
+int
+xfs_dir_create_child(
+ struct xfs_trans *tp,
+ unsigned int resblks,
+ struct xfs_dir_update *du)
+{
+ struct xfs_inode *dp = du->dp;
+ const struct xfs_name *name = du->name;
+ struct xfs_inode *ip = du->ip;
+ int error;
+
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(dp, XFS_ILOCK_EXCL);
+
+ error = xfs_dir_createname(tp, dp, name, ip->i_ino, resblks);
+ if (error) {
+ ASSERT(error != -ENOSPC);
+ return error;
+ }
+
+ xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
+ error = xfs_dir_init(tp, ip, dp);
+ if (error)
+ return error;
+
+ xfs_bumplink(tp, dp);
+ }
+
+ /*
+ * If we have parent pointers, we need to add the attribute containing
+ * the parent information now.
+ */
+ if (du->ppargs) {
+ error = xfs_parent_addname(tp, du->ppargs, dp, name, ip);
+ if (error)
+ return error;
+ }
+
+ xfs_dir_update_hook(dp, ip, 1, name);
+ return 0;
+}
+
+/*
+ * Given a directory @dp, an existing non-directory inode @ip, and a @name,
+ * link @ip into @dp under the given @name. Both inodes must have the ILOCK
+ * held.
+ */
+int
+xfs_dir_add_child(
+ struct xfs_trans *tp,
+ unsigned int resblks,
+ struct xfs_dir_update *du)
+{
+ struct xfs_inode *dp = du->dp;
+ const struct xfs_name *name = du->name;
+ struct xfs_inode *ip = du->ip;
+ struct xfs_mount *mp = tp->t_mountp;
+ int error;
+
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(dp, XFS_ILOCK_EXCL);
+ ASSERT(!S_ISDIR(VFS_I(ip)->i_mode));
+
+ if (!resblks) {
+ error = xfs_dir_canenter(tp, dp, name);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Handle initial link state of O_TMPFILE inode
+ */
+ if (VFS_I(ip)->i_nlink == 0) {
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ error = xfs_iunlink_remove(tp, pag, ip);
+ xfs_perag_put(pag);
+ if (error)
+ return error;
+ }
+
+ error = xfs_dir_createname(tp, dp, name, ip->i_ino, resblks);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+
+ xfs_bumplink(tp, ip);
+
+ /*
+ * If we have parent pointers, we now need to add the parent record to
+ * the attribute fork of the inode. If this is the initial parent
+ * attribute, we need to create it correctly, otherwise we can just add
+ * the parent to the inode.
+ */
+ if (du->ppargs) {
+ error = xfs_parent_addname(tp, du->ppargs, dp, name, ip);
+ if (error)
+ return error;
+ }
+
+ xfs_dir_update_hook(dp, ip, 1, name);
+ return 0;
+}
+
+/*
+ * Given a directory @dp, a child @ip, and a @name, remove the (@name, @ip)
+ * entry from the directory. Both inodes must have the ILOCK held.
+ */
+int
+xfs_dir_remove_child(
+ struct xfs_trans *tp,
+ unsigned int resblks,
+ struct xfs_dir_update *du)
+{
+ struct xfs_inode *dp = du->dp;
+ const struct xfs_name *name = du->name;
+ struct xfs_inode *ip = du->ip;
+ int error;
+
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(dp, XFS_ILOCK_EXCL);
+
+ /*
+ * If we're removing a directory perform some additional validation.
+ */
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
+ ASSERT(VFS_I(ip)->i_nlink >= 2);
+ if (VFS_I(ip)->i_nlink != 2)
+ return -ENOTEMPTY;
+ if (!xfs_dir_isempty(ip))
+ return -ENOTEMPTY;
+
+ /* Drop the link from ip's "..". */
+ error = xfs_droplink(tp, dp);
+ if (error)
+ return error;
+
+ /* Drop the "." link from ip to self. */
+ error = xfs_droplink(tp, ip);
+ if (error)
+ return error;
+
+ /*
+ * Point the unlinked child directory's ".." entry to the root
+ * directory to eliminate back-references to inodes that may
+ * get freed before the child directory is closed. If the fs
+ * gets shrunk, this can lead to dirent inode validation errors.
+ */
+ if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
+ error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
+ tp->t_mountp->m_sb.sb_rootino, 0);
+ if (error)
+ return error;
+ }
+ } else {
+ /*
+ * When removing a non-directory we need to log the parent
+ * inode here. For a directory this is done implicitly
+ * by the xfs_droplink call for the ".." entry.
+ */
+ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+ }
+ xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+ /* Drop the link from dp to ip. */
+ error = xfs_droplink(tp, ip);
+ if (error)
+ return error;
+
+ error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks);
+ if (error) {
+ ASSERT(error != -ENOENT);
+ return error;
+ }
+
+ /* Remove parent pointer. */
+ if (du->ppargs) {
+ error = xfs_parent_removename(tp, du->ppargs, dp, name, ip);
+ if (error)
+ return error;
+ }
+
+ xfs_dir_update_hook(dp, ip, -1, name);
+ return 0;
+}
+
+/*
+ * Exchange the entry (@name1, @ip1) in directory @dp1 with the entry (@name2,
+ * @ip2) in directory @dp2, and update '..' @ip1 and @ip2's entries as needed.
+ * @ip1 and @ip2 need not be of the same type.
+ *
+ * All inodes must have the ILOCK held, and both entries must already exist.
+ */
+int
+xfs_dir_exchange_children(
+ struct xfs_trans *tp,
+ struct xfs_dir_update *du1,
+ struct xfs_dir_update *du2,
+ unsigned int spaceres)
+{
+ struct xfs_inode *dp1 = du1->dp;
+ const struct xfs_name *name1 = du1->name;
+ struct xfs_inode *ip1 = du1->ip;
+ struct xfs_inode *dp2 = du2->dp;
+ const struct xfs_name *name2 = du2->name;
+ struct xfs_inode *ip2 = du2->ip;
+ int ip1_flags = 0;
+ int ip2_flags = 0;
+ int dp2_flags = 0;
+ int error;
+
+ /* Swap inode number for dirent in first parent */
+ error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /* Swap inode number for dirent in second parent */
+ error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /*
+ * If we're renaming one or more directories across different parents,
+ * update the respective ".." entries (and link counts) to match the new
+ * parents.
+ */
+ if (dp1 != dp2) {
+ dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+
+ if (S_ISDIR(VFS_I(ip2)->i_mode)) {
+ error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
+ dp1->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /* transfer ip2 ".." reference to dp1 */
+ if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
+ error = xfs_droplink(tp, dp2);
+ if (error)
+ return error;
+ xfs_bumplink(tp, dp1);
+ }
+
+ /*
+ * Although ip1 isn't changed here, userspace needs
+ * to be warned about the change, so that applications
+ * relying on it (like backup ones), will properly
+ * notify the change
+ */
+ ip1_flags |= XFS_ICHGTIME_CHG;
+ ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+ }
+
+ if (S_ISDIR(VFS_I(ip1)->i_mode)) {
+ error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
+ dp2->i_ino, spaceres);
+ if (error)
+ return error;
+
+ /* transfer ip1 ".." reference to dp2 */
+ if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
+ error = xfs_droplink(tp, dp1);
+ if (error)
+ return error;
+ xfs_bumplink(tp, dp2);
+ }
+
+ /*
+ * Although ip2 isn't changed here, userspace needs
+ * to be warned about the change, so that applications
+ * relying on it (like backup ones), will properly
+ * notify the change
+ */
+ ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+ ip2_flags |= XFS_ICHGTIME_CHG;
+ }
+ }
+
+ if (ip1_flags) {
+ xfs_trans_ichgtime(tp, ip1, ip1_flags);
+ xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
+ }
+ if (ip2_flags) {
+ xfs_trans_ichgtime(tp, ip2, ip2_flags);
+ xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE);
+ }
+ if (dp2_flags) {
+ xfs_trans_ichgtime(tp, dp2, dp2_flags);
+ xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE);
+ }
+ xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
+
+ /* Schedule parent pointer replacements */
+ if (du1->ppargs) {
+ error = xfs_parent_replacename(tp, du1->ppargs, dp1, name1,
+ dp2, name2, ip1);
+ if (error)
+ return error;
+ }
+
+ if (du2->ppargs) {
+ error = xfs_parent_replacename(tp, du2->ppargs, dp2, name2,
+ dp1, name1, ip2);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Inform our hook clients that we've finished an exchange operation as
+ * follows: removed the source and target files from their directories;
+ * added the target to the source directory; and added the source to
+ * the target directory. All inodes are locked, so it's ok to model a
+ * rename this way so long as we say we deleted entries before we add
+ * new ones.
+ */
+ xfs_dir_update_hook(dp1, ip1, -1, name1);
+ xfs_dir_update_hook(dp2, ip2, -1, name2);
+ xfs_dir_update_hook(dp1, ip2, 1, name1);
+ xfs_dir_update_hook(dp2, ip1, 1, name2);
+ return 0;
+}
+
+/*
+ * Given an entry (@src_name, @src_ip) in directory @src_dp, make the entry
+ * @target_name in directory @target_dp point to @src_ip and remove the
+ * original entry, cleaning up everything left behind.
+ *
+ * Cleanup involves dropping a link count on @target_ip, and either removing
+ * the (@src_name, @src_ip) entry from @src_dp or simply replacing the entry
+ * with (@src_name, @wip) if a whiteout inode @wip is supplied.
+ *
+ * All inodes must have the ILOCK held. We assume that if @src_ip is a
+ * directory then its '..' doesn't already point to @target_dp, and that @wip
+ * is a freshly allocated whiteout.
+ */
+int
+xfs_dir_rename_children(
+ struct xfs_trans *tp,
+ struct xfs_dir_update *du_src,
+ struct xfs_dir_update *du_tgt,
+ unsigned int spaceres,
+ struct xfs_dir_update *du_wip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *src_dp = du_src->dp;
+ const struct xfs_name *src_name = du_src->name;
+ struct xfs_inode *src_ip = du_src->ip;
+ struct xfs_inode *target_dp = du_tgt->dp;
+ const struct xfs_name *target_name = du_tgt->name;
+ struct xfs_inode *target_ip = du_tgt->ip;
+ bool new_parent = (src_dp != target_dp);
+ bool src_is_directory;
+ int error;
+
+ src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
+
+ /*
+ * Check for expected errors before we dirty the transaction
+ * so we can return an error without a transaction abort.
+ */
+ if (target_ip == NULL) {
+ /*
+ * If there's no space reservation, check the entry will
+ * fit before actually inserting it.
+ */
+ if (!spaceres) {
+ error = xfs_dir_canenter(tp, target_dp, target_name);
+ if (error)
+ return error;
+ }
+ } else {
+ /*
+ * If target exists and it's a directory, check that whether
+ * it can be destroyed.
+ */
+ if (S_ISDIR(VFS_I(target_ip)->i_mode) &&
+ (!xfs_dir_isempty(target_ip) ||
+ (VFS_I(target_ip)->i_nlink > 2)))
+ return -EEXIST;
+ }
+
+ /*
+ * Directory entry creation below may acquire the AGF. Remove
+ * the whiteout from the unlinked list first to preserve correct
+ * AGI/AGF locking order. This dirties the transaction so failures
+ * after this point will abort and log recovery will clean up the
+ * mess.
+ *
+ * For whiteouts, we need to bump the link count on the whiteout
+ * inode. After this point, we have a real link, clear the tmpfile
+ * state flag from the inode so it doesn't accidentally get misused
+ * in future.
+ */
+ if (du_wip->ip) {
+ struct xfs_perag *pag;
+
+ ASSERT(VFS_I(du_wip->ip)->i_nlink == 0);
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, du_wip->ip->i_ino));
+ error = xfs_iunlink_remove(tp, pag, du_wip->ip);
+ xfs_perag_put(pag);
+ if (error)
+ return error;
+
+ xfs_bumplink(tp, du_wip->ip);
+ }
+
+ /*
+ * Set up the target.
+ */
+ if (target_ip == NULL) {
+ /*
+ * If target does not exist and the rename crosses
+ * directories, adjust the target directory link count
+ * to account for the ".." reference from the new entry.
+ */
+ error = xfs_dir_createname(tp, target_dp, target_name,
+ src_ip->i_ino, spaceres);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, target_dp,
+ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+ if (new_parent && src_is_directory) {
+ xfs_bumplink(tp, target_dp);
+ }
+ } else { /* target_ip != NULL */
+ /*
+ * Link the source inode under the target name.
+ * If the source inode is a directory and we are moving
+ * it across directories, its ".." entry will be
+ * inconsistent until we replace that down below.
+ *
+ * In case there is already an entry with the same
+ * name at the destination directory, remove it first.
+ */
+ error = xfs_dir_replace(tp, target_dp, target_name,
+ src_ip->i_ino, spaceres);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, target_dp,
+ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+ /*
+ * Decrement the link count on the target since the target
+ * dir no longer points to it.
+ */
+ error = xfs_droplink(tp, target_ip);
+ if (error)
+ return error;
+
+ if (src_is_directory) {
+ /*
+ * Drop the link from the old "." entry.
+ */
+ error = xfs_droplink(tp, target_ip);
+ if (error)
+ return error;
+ }
+ } /* target_ip != NULL */
+
+ /*
+ * Remove the source.
+ */
+ if (new_parent && src_is_directory) {
+ /*
+ * Rewrite the ".." entry to point to the new
+ * directory.
+ */
+ error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
+ target_dp->i_ino, spaceres);
+ ASSERT(error != -EEXIST);
+ if (error)
+ return error;
+ }
+
+ /*
+ * We always want to hit the ctime on the source inode.
+ *
+ * This isn't strictly required by the standards since the source
+ * inode isn't really being changed, but old unix file systems did
+ * it and some incremental backup programs won't work without it.
+ */
+ xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
+
+ /*
+ * Adjust the link count on src_dp. This is necessary when
+ * renaming a directory, either within one parent when
+ * the target existed, or across two parent directories.
+ */
+ if (src_is_directory && (new_parent || target_ip != NULL)) {
+
+ /*
+ * Decrement link count on src_directory since the
+ * entry that's moved no longer points to it.
+ */
+ error = xfs_droplink(tp, src_dp);
+ if (error)
+ return error;
+ }
+
+ /*
+ * For whiteouts, we only need to update the source dirent with the
+ * inode number of the whiteout inode rather than removing it
+ * altogether.
+ */
+ if (du_wip->ip)
+ error = xfs_dir_replace(tp, src_dp, src_name, du_wip->ip->i_ino,
+ spaceres);
+ else
+ error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+ spaceres);
+ if (error)
+ return error;
+
+ xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
+ if (new_parent)
+ xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
+
+ /* Schedule parent pointer updates. */
+ if (du_wip->ppargs) {
+ error = xfs_parent_addname(tp, du_wip->ppargs, src_dp,
+ src_name, du_wip->ip);
+ if (error)
+ return error;
+ }
+
+ if (du_src->ppargs) {
+ error = xfs_parent_replacename(tp, du_src->ppargs, src_dp,
+ src_name, target_dp, target_name, src_ip);
+ if (error)
+ return error;
+ }
+
+ if (du_tgt->ppargs) {
+ error = xfs_parent_removename(tp, du_tgt->ppargs, target_dp,
+ target_name, target_ip);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Inform our hook clients that we've finished a rename operation as
+ * follows: removed the source and target files from their directories;
+ * that we've added the source to the target directory; and finally
+ * that we've added the whiteout, if there was one. All inodes are
+ * locked, so it's ok to model a rename this way so long as we say we
+ * deleted entries before we add new ones.
+ */
+ if (target_ip)
+ xfs_dir_update_hook(target_dp, target_ip, -1, target_name);
+ xfs_dir_update_hook(src_dp, src_ip, -1, src_name);
+ xfs_dir_update_hook(target_dp, src_ip, 1, target_name);
+ if (du_wip->ip)
+ xfs_dir_update_hook(src_dp, du_wip->ip, 1, src_name);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index 8497d041f316..576068ed81fa 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -36,6 +36,16 @@ xfs_dir2_samename(
return !memcmp(n1->name, n2->name, n1->len);
}
+enum xfs_dir2_fmt {
+ XFS_DIR2_FMT_SF,
+ XFS_DIR2_FMT_BLOCK,
+ XFS_DIR2_FMT_LEAF,
+ XFS_DIR2_FMT_NODE,
+ XFS_DIR2_FMT_ERROR,
+};
+
+enum xfs_dir2_fmt xfs_dir2_format(struct xfs_da_args *args, int *error);
+
/*
* Convert inode mode to directory entry filetype
*/
@@ -58,13 +68,18 @@ extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
const struct xfs_name *name, xfs_ino_t *inum,
struct xfs_name *ci_name);
extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_name *name, xfs_ino_t ino,
+ const struct xfs_name *name, xfs_ino_t ino,
xfs_extlen_t tot);
extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
const struct xfs_name *name, xfs_ino_t inum,
xfs_extlen_t tot);
extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_name *name);
+ const struct xfs_name *name);
+
+int xfs_dir_lookup_args(struct xfs_da_args *args);
+int xfs_dir_createname_args(struct xfs_da_args *args);
+int xfs_dir_removename_args(struct xfs_da_args *args);
+int xfs_dir_replace_args(struct xfs_da_args *args);
/*
* Direct call from the bmap code, bypassing the generic directory layer.
@@ -74,8 +89,6 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
/*
* Interface routines used by userspace utilities
*/
-extern int xfs_dir2_isblock(struct xfs_da_args *args, bool *isblock);
-extern int xfs_dir2_isleaf(struct xfs_da_args *args, bool *isleaf);
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
struct xfs_buf *bp);
@@ -101,6 +114,10 @@ extern struct xfs_dir2_data_free *xfs_dir2_data_freefind(
extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
+xfs_failaddr_t xfs_dir3_leaf_header_check(struct xfs_buf *bp, xfs_ino_t owner);
+xfs_failaddr_t xfs_dir3_data_header_check(struct xfs_buf *bp, xfs_ino_t owner);
+xfs_failaddr_t xfs_dir3_block_header_check(struct xfs_buf *bp, xfs_ino_t owner);
+
extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;
@@ -292,4 +309,51 @@ static inline unsigned char xfs_ascii_ci_xfrm(unsigned char c)
return c;
}
+struct xfs_dir_update_params {
+ const struct xfs_inode *dp;
+ const struct xfs_inode *ip;
+ const struct xfs_name *name;
+ int delta;
+};
+
+#ifdef CONFIG_XFS_LIVE_HOOKS
+void xfs_dir_update_hook(struct xfs_inode *dp, struct xfs_inode *ip,
+ int delta, const struct xfs_name *name);
+
+struct xfs_dir_hook {
+ struct xfs_hook dirent_hook;
+};
+
+void xfs_dir_hook_disable(void);
+void xfs_dir_hook_enable(void);
+
+int xfs_dir_hook_add(struct xfs_mount *mp, struct xfs_dir_hook *hook);
+void xfs_dir_hook_del(struct xfs_mount *mp, struct xfs_dir_hook *hook);
+void xfs_dir_hook_setup(struct xfs_dir_hook *hook, notifier_fn_t mod_fn);
+#else
+# define xfs_dir_update_hook(dp, ip, delta, name) ((void)0)
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
+struct xfs_parent_args;
+
+struct xfs_dir_update {
+ struct xfs_inode *dp;
+ const struct xfs_name *name;
+ struct xfs_inode *ip;
+ struct xfs_parent_args *ppargs;
+};
+
+int xfs_dir_create_child(struct xfs_trans *tp, unsigned int resblks,
+ struct xfs_dir_update *du);
+int xfs_dir_add_child(struct xfs_trans *tp, unsigned int resblks,
+ struct xfs_dir_update *du);
+int xfs_dir_remove_child(struct xfs_trans *tp, unsigned int resblks,
+ struct xfs_dir_update *du);
+
+int xfs_dir_exchange_children(struct xfs_trans *tp, struct xfs_dir_update *du1,
+ struct xfs_dir_update *du2, unsigned int spaceres);
+int xfs_dir_rename_children(struct xfs_trans *tp, struct xfs_dir_update *du_src,
+ struct xfs_dir_update *du_tgt, unsigned int spaceres,
+ struct xfs_dir_update *du_wip);
+
#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index a2da007adb46..0f93ed1a4a74 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -115,17 +115,20 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
.verify_struct = xfs_dir3_block_verify,
};
-static xfs_failaddr_t
+xfs_failaddr_t
xfs_dir3_block_header_check(
- struct xfs_inode *dp,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
{
- struct xfs_mount *mp = dp->i_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (xfs_has_crc(mp)) {
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
- if (be64_to_cpu(hdr3->owner) != dp->i_ino)
+ if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
+ return __this_address;
+
+ if (be64_to_cpu(hdr3->owner) != owner)
return __this_address;
}
@@ -136,6 +139,7 @@ int
xfs_dir3_block_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
struct xfs_buf **bpp)
{
struct xfs_mount *mp = dp->i_mount;
@@ -148,7 +152,7 @@ xfs_dir3_block_read(
return err;
/* Check things that we can't do in the verifier. */
- fa = xfs_dir3_block_header_check(dp, *bpp);
+ fa = xfs_dir3_block_header_check(*bpp, owner);
if (fa) {
__xfs_buf_mark_corrupt(*bpp, fa);
xfs_trans_brelse(tp, *bpp);
@@ -163,12 +167,13 @@ xfs_dir3_block_read(
static void
xfs_dir3_block_init(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- struct xfs_buf *bp,
- struct xfs_inode *dp)
+ struct xfs_da_args *args,
+ struct xfs_buf *bp)
{
- struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+ struct xfs_trans *tp = args->trans;
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
bp->b_ops = &xfs_dir3_block_buf_ops;
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF);
@@ -177,7 +182,7 @@ xfs_dir3_block_init(
memset(hdr3, 0, sizeof(*hdr3));
hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp));
- hdr3->owner = cpu_to_be64(dp->i_ino);
+ hdr3->owner = cpu_to_be64(args->owner);
uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
return;
@@ -382,7 +387,7 @@ xfs_dir2_block_addname(
tp = args->trans;
/* Read the (one and only) directory block into bp. */
- error = xfs_dir3_block_read(tp, dp, &bp);
+ error = xfs_dir3_block_read(tp, dp, args->owner, &bp);
if (error)
return error;
@@ -697,7 +702,7 @@ xfs_dir2_block_lookup_int(
dp = args->dp;
tp = args->trans;
- error = xfs_dir3_block_read(tp, dp, &bp);
+ error = xfs_dir3_block_read(tp, dp, args->owner, &bp);
if (error)
return error;
@@ -981,7 +986,8 @@ xfs_dir2_leaf_to_block(
* Read the data block if we don't already have it, give up if it fails.
*/
if (!dbp) {
- error = xfs_dir3_data_read(tp, dp, args->geo->datablk, 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ args->geo->datablk, 0, &dbp);
if (error)
return error;
}
@@ -1009,7 +1015,7 @@ xfs_dir2_leaf_to_block(
/*
* Start converting it to block form.
*/
- xfs_dir3_block_init(mp, tp, dbp, dp);
+ xfs_dir3_block_init(args, dbp);
needlog = 1;
needscan = 0;
@@ -1129,7 +1135,7 @@ xfs_dir2_sf_to_block(
error = xfs_dir3_data_init(args, blkno, &bp);
if (error)
goto out_free;
- xfs_dir3_block_init(mp, tp, bp, dp);
+ xfs_dir3_block_init(args, bp);
hdr = bp->b_addr;
/*
@@ -1169,7 +1175,7 @@ xfs_dir2_sf_to_block(
* Create entry for .
*/
dep = bp->b_addr + offset;
- dep->inumber = cpu_to_be64(dp->i_ino);
+ dep->inumber = cpu_to_be64(args->owner);
dep->namelen = 1;
dep->name[0] = '.';
xfs_dir2_data_put_ftype(mp, dep, XFS_DIR3_FT_DIR);
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 7a6d965bea71..a16b05c43e2e 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -178,6 +178,14 @@ __xfs_dir3_data_check(
while (offset < end) {
struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
+ unsigned int reclen;
+
+ /*
+ * Are the remaining bytes large enough to hold an
+ * unused entry?
+ */
+ if (offset > end - xfs_dir2_data_unusedsize(1))
+ return __this_address;
/*
* If it's unused, look for the space in the bestfree table.
@@ -187,9 +195,13 @@ __xfs_dir3_data_check(
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
xfs_failaddr_t fa;
+ reclen = xfs_dir2_data_unusedsize(
+ be16_to_cpu(dup->length));
if (lastfree != 0)
return __this_address;
- if (offset + be16_to_cpu(dup->length) > end)
+ if (be16_to_cpu(dup->length) != reclen)
+ return __this_address;
+ if (offset + reclen > end)
return __this_address;
if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
offset)
@@ -207,10 +219,18 @@ __xfs_dir3_data_check(
be16_to_cpu(bf[2].length))
return __this_address;
}
- offset += be16_to_cpu(dup->length);
+ offset += reclen;
lastfree = 1;
continue;
}
+
+ /*
+ * This is not an unused entry. Are the remaining bytes
+ * large enough for a dirent with a single-byte name?
+ */
+ if (offset > end - xfs_dir2_data_entsize(mp, 1))
+ return __this_address;
+
/*
* It's a real entry. Validate the fields.
* If this is a block directory then make sure it's
@@ -219,9 +239,10 @@ __xfs_dir3_data_check(
*/
if (dep->namelen == 0)
return __this_address;
- if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber)))
+ reclen = xfs_dir2_data_entsize(mp, dep->namelen);
+ if (offset + reclen > end)
return __this_address;
- if (offset + xfs_dir2_data_entsize(mp, dep->namelen) > end)
+ if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber)))
return __this_address;
if (be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)) != offset)
return __this_address;
@@ -245,7 +266,7 @@ __xfs_dir3_data_check(
if (i >= be32_to_cpu(btp->count))
return __this_address;
}
- offset += xfs_dir2_data_entsize(mp, dep->namelen);
+ offset += reclen;
}
/*
* Need to have seen all the entries and all the bestfree slots.
@@ -395,17 +416,20 @@ static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
.verify_write = xfs_dir3_data_write_verify,
};
-static xfs_failaddr_t
+xfs_failaddr_t
xfs_dir3_data_header_check(
- struct xfs_inode *dp,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
{
- struct xfs_mount *mp = dp->i_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (xfs_has_crc(mp)) {
struct xfs_dir3_data_hdr *hdr3 = bp->b_addr;
- if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino)
+ if (hdr3->hdr.magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
+ return __this_address;
+
+ if (be64_to_cpu(hdr3->hdr.owner) != owner)
return __this_address;
}
@@ -416,6 +440,7 @@ int
xfs_dir3_data_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t bno,
unsigned int flags,
struct xfs_buf **bpp)
@@ -429,7 +454,7 @@ xfs_dir3_data_read(
return err;
/* Check things that we can't do in the verifier. */
- fa = xfs_dir3_data_header_check(dp, *bpp);
+ fa = xfs_dir3_data_header_check(*bpp, owner);
if (fa) {
__xfs_buf_mark_corrupt(*bpp, fa);
xfs_trans_brelse(tp, *bpp);
@@ -725,7 +750,7 @@ xfs_dir3_data_init(
memset(hdr3, 0, sizeof(*hdr3));
hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp));
- hdr3->owner = cpu_to_be64(dp->i_ino);
+ hdr3->owner = cpu_to_be64(args->owner);
uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
} else
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 08dda5ce9d91..71c2f22a3f6e 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -208,6 +208,29 @@ xfs_dir3_leaf_verify(
return xfs_dir3_leaf_check_int(mp, &leafhdr, bp->b_addr, true);
}
+xfs_failaddr_t
+xfs_dir3_leaf_header_check(
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
+{
+ struct xfs_mount *mp = bp->b_mount;
+
+ if (xfs_has_crc(mp)) {
+ struct xfs_dir3_leaf *hdr3 = bp->b_addr;
+
+ if (hdr3->hdr.info.hdr.magic !=
+ cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) &&
+ hdr3->hdr.info.hdr.magic !=
+ cpu_to_be16(XFS_DIR3_LEAFN_MAGIC))
+ return __this_address;
+
+ if (be64_to_cpu(hdr3->hdr.info.owner) != owner)
+ return __this_address;
+ }
+
+ return NULL;
+}
+
static void
xfs_dir3_leaf_read_verify(
struct xfs_buf *bp)
@@ -271,32 +294,60 @@ int
xfs_dir3_leaf_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
+ xfs_failaddr_t fa;
int err;
err = xfs_da_read_buf(tp, dp, fbno, 0, bpp, XFS_DATA_FORK,
&xfs_dir3_leaf1_buf_ops);
- if (!err && tp && *bpp)
+ if (err || !(*bpp))
+ return err;
+
+ fa = xfs_dir3_leaf_header_check(*bpp, owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(*bpp, fa);
+ xfs_trans_brelse(tp, *bpp);
+ *bpp = NULL;
+ xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
+ return -EFSCORRUPTED;
+ }
+
+ if (tp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
- return err;
+ return 0;
}
int
xfs_dir3_leafn_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
+ xfs_failaddr_t fa;
int err;
err = xfs_da_read_buf(tp, dp, fbno, 0, bpp, XFS_DATA_FORK,
&xfs_dir3_leafn_buf_ops);
- if (!err && tp && *bpp)
+ if (err || !(*bpp))
+ return err;
+
+ fa = xfs_dir3_leaf_header_check(*bpp, owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(*bpp, fa);
+ xfs_trans_brelse(tp, *bpp);
+ *bpp = NULL;
+ xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
+ return -EFSCORRUPTED;
+ }
+
+ if (tp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
- return err;
+ return 0;
}
/*
@@ -304,12 +355,12 @@ xfs_dir3_leafn_read(
*/
static void
xfs_dir3_leaf_init(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
+ struct xfs_da_args *args,
struct xfs_buf *bp,
- xfs_ino_t owner,
uint16_t type)
{
+ struct xfs_mount *mp = args->dp->i_mount;
+ struct xfs_trans *tp = args->trans;
struct xfs_dir2_leaf *leaf = bp->b_addr;
ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC);
@@ -323,7 +374,7 @@ xfs_dir3_leaf_init(
? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)
: cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
leaf3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
- leaf3->info.owner = cpu_to_be64(owner);
+ leaf3->info.owner = cpu_to_be64(args->owner);
uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid);
} else {
memset(leaf, 0, sizeof(*leaf));
@@ -356,7 +407,6 @@ xfs_dir3_leaf_get_buf(
{
struct xfs_inode *dp = args->dp;
struct xfs_trans *tp = args->trans;
- struct xfs_mount *mp = dp->i_mount;
struct xfs_buf *bp;
int error;
@@ -369,7 +419,7 @@ xfs_dir3_leaf_get_buf(
if (error)
return error;
- xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic);
+ xfs_dir3_leaf_init(args, bp, magic);
xfs_dir3_leaf_log_header(args, bp);
if (magic == XFS_DIR2_LEAF1_MAGIC)
xfs_dir3_leaf_log_tail(args, bp);
@@ -647,7 +697,8 @@ xfs_dir2_leaf_addname(
trace_xfs_dir2_leaf_addname(args);
- error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, &lbp);
+ error = xfs_dir3_leaf_read(tp, dp, args->owner, args->geo->leafblk,
+ &lbp);
if (error)
return error;
@@ -834,9 +885,9 @@ xfs_dir2_leaf_addname(
* Already had space in some data block.
* Just read that one in.
*/
- error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(args->geo, use_block),
- 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(args->geo, use_block), 0,
+ &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
@@ -1238,7 +1289,8 @@ xfs_dir2_leaf_lookup_int(
tp = args->trans;
mp = dp->i_mount;
- error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, &lbp);
+ error = xfs_dir3_leaf_read(tp, dp, args->owner, args->geo->leafblk,
+ &lbp);
if (error)
return error;
@@ -1276,9 +1328,9 @@ xfs_dir2_leaf_lookup_int(
if (newdb != curdb) {
if (dbp)
xfs_trans_brelse(tp, dbp);
- error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(args->geo, newdb),
- 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(args->geo, newdb), 0,
+ &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
@@ -1318,9 +1370,9 @@ xfs_dir2_leaf_lookup_int(
ASSERT(cidb != -1);
if (cidb != curdb) {
xfs_trans_brelse(tp, dbp);
- error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(args->geo, cidb),
- 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(args->geo, cidb), 0,
+ &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
@@ -1614,7 +1666,8 @@ xfs_dir2_leaf_trim_data(
/*
* Read the offending data block. We need its buffer.
*/
- error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(geo, db), 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(geo, db), 0, &dbp);
if (error)
return error;
@@ -1753,7 +1806,8 @@ xfs_dir2_node_to_leaf(
/*
* Read the freespace block.
*/
- error = xfs_dir2_free_read(tp, dp, args->geo->freeblk, &fbp);
+ error = xfs_dir2_free_read(tp, dp, args->owner, args->geo->freeblk,
+ &fbp);
if (error)
return error;
xfs_dir2_free_hdr_from_disk(mp, &freehdr, fbp->b_addr);
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index be0b8834028c..fe8d4fa13128 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -175,11 +175,11 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
/* Everything ok in the free block header? */
static xfs_failaddr_t
xfs_dir3_free_header_check(
- struct xfs_inode *dp,
- xfs_dablk_t fbno,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ xfs_ino_t owner,
+ xfs_dablk_t fbno)
{
- struct xfs_mount *mp = dp->i_mount;
+ struct xfs_mount *mp = bp->b_mount;
int maxbests = mp->m_dir_geo->free_max_bests;
unsigned int firstdb;
@@ -195,7 +195,7 @@ xfs_dir3_free_header_check(
return __this_address;
if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused))
return __this_address;
- if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino)
+ if (be64_to_cpu(hdr3->hdr.owner) != owner)
return __this_address;
} else {
struct xfs_dir2_free_hdr *hdr = bp->b_addr;
@@ -214,6 +214,7 @@ static int
__xfs_dir3_free_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t fbno,
unsigned int flags,
struct xfs_buf **bpp)
@@ -227,7 +228,7 @@ __xfs_dir3_free_read(
return err;
/* Check things that we can't do in the verifier. */
- fa = xfs_dir3_free_header_check(dp, fbno, *bpp);
+ fa = xfs_dir3_free_header_check(*bpp, owner, fbno);
if (fa) {
__xfs_buf_mark_corrupt(*bpp, fa);
xfs_trans_brelse(tp, *bpp);
@@ -299,20 +300,23 @@ int
xfs_dir2_free_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
- return __xfs_dir3_free_read(tp, dp, fbno, 0, bpp);
+ return __xfs_dir3_free_read(tp, dp, owner, fbno, 0, bpp);
}
static int
xfs_dir2_free_try_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
- return __xfs_dir3_free_read(tp, dp, fbno, XFS_DABUF_MAP_HOLE_OK, bpp);
+ return __xfs_dir3_free_read(tp, dp, owner, fbno, XFS_DABUF_MAP_HOLE_OK,
+ bpp);
}
static int
@@ -349,7 +353,7 @@ xfs_dir3_free_get_buf(
hdr.magic = XFS_DIR3_FREE_MAGIC;
hdr3->hdr.blkno = cpu_to_be64(xfs_buf_daddr(bp));
- hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
+ hdr3->hdr.owner = cpu_to_be64(args->owner);
uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_meta_uuid);
} else
hdr.magic = XFS_DIR2_FREE_MAGIC;
@@ -717,7 +721,7 @@ xfs_dir2_leafn_lookup_for_addname(
if (curbp)
xfs_trans_brelse(tp, curbp);
- error = xfs_dir2_free_read(tp, dp,
+ error = xfs_dir2_free_read(tp, dp, args->owner,
xfs_dir2_db_to_da(args->geo,
newfdb),
&curbp);
@@ -863,7 +867,7 @@ xfs_dir2_leafn_lookup_for_entry(
ASSERT(state->extravalid);
curbp = state->extrablk.bp;
} else {
- error = xfs_dir3_data_read(tp, dp,
+ error = xfs_dir3_data_read(tp, dp, args->owner,
xfs_dir2_db_to_da(args->geo,
newdb),
0, &curbp);
@@ -1356,8 +1360,8 @@ xfs_dir2_leafn_remove(
* read in the free block.
*/
fdb = xfs_dir2_db_to_fdb(geo, db);
- error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(geo, fdb),
- &fbp);
+ error = xfs_dir2_free_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(geo, fdb), &fbp);
if (error)
return error;
free = fbp->b_addr;
@@ -1562,7 +1566,8 @@ xfs_dir2_leafn_toosmall(
/*
* Read the sibling leaf block.
*/
- error = xfs_dir3_leafn_read(state->args->trans, dp, blkno, &bp);
+ error = xfs_dir3_leafn_read(state->args->trans, dp,
+ state->args->owner, blkno, &bp);
if (error)
return error;
@@ -1715,7 +1720,7 @@ xfs_dir2_node_add_datablk(
* that was just allocated.
*/
fbno = xfs_dir2_db_to_fdb(args->geo, *dbno);
- error = xfs_dir2_free_try_read(tp, dp,
+ error = xfs_dir2_free_try_read(tp, dp, args->owner,
xfs_dir2_db_to_da(args->geo, fbno), &fbp);
if (error)
return error;
@@ -1862,7 +1867,7 @@ xfs_dir2_node_find_freeblk(
* so this might not succeed. This should be really rare, so
* there's no reason to avoid it.
*/
- error = xfs_dir2_free_try_read(tp, dp,
+ error = xfs_dir2_free_try_read(tp, dp, args->owner,
xfs_dir2_db_to_da(args->geo, fbno),
&fbp);
if (error)
@@ -1948,9 +1953,8 @@ xfs_dir2_node_addname_int(
&freehdr, &findex);
} else {
/* Read the data block in. */
- error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(args->geo, dbno),
- 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(args->geo, dbno), 0, &dbp);
}
if (error)
return error;
@@ -2302,7 +2306,7 @@ xfs_dir2_node_trim_free(
/*
* Read the freespace block.
*/
- error = xfs_dir2_free_try_read(tp, dp, fo, &bp);
+ error = xfs_dir2_free_try_read(tp, dp, args->owner, fo, &bp);
if (error)
return error;
/*
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 1db2e60ba827..10041350274a 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -50,8 +50,8 @@ extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
/* xfs_dir2_block.c */
-extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_buf **bpp);
+int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_ino_t owner, struct xfs_buf **bpp);
extern int xfs_dir2_block_addname(struct xfs_da_args *args);
extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
extern int xfs_dir2_block_removename(struct xfs_da_args *args);
@@ -78,7 +78,8 @@ extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp,
struct xfs_buf *bp);
int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t bno, unsigned int flags, struct xfs_buf **bpp);
+ xfs_ino_t owner, xfs_dablk_t bno, unsigned int flags,
+ struct xfs_buf **bpp);
int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
unsigned int flags);
@@ -95,9 +96,9 @@ void xfs_dir2_leaf_hdr_from_disk(struct xfs_mount *mp,
void xfs_dir2_leaf_hdr_to_disk(struct xfs_mount *mp, struct xfs_dir2_leaf *to,
struct xfs_dir3_icleaf_hdr *from);
int xfs_dir3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t fbno, struct xfs_buf **bpp);
+ xfs_ino_t owner, xfs_dablk_t fbno, struct xfs_buf **bpp);
int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t fbno, struct xfs_buf **bpp);
+ xfs_ino_t owner, xfs_dablk_t fbno, struct xfs_buf **bpp);
extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
struct xfs_buf *dbp);
extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
@@ -154,8 +155,8 @@ extern int xfs_dir2_node_removename(struct xfs_da_args *args);
extern int xfs_dir2_node_replace(struct xfs_da_args *args);
extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
int *rvalp);
-extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t fbno, struct xfs_buf **bpp);
+int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_ino_t owner, xfs_dablk_t fbno, struct xfs_buf **bpp);
/* xfs_dir2_sf.c */
xfs_ino_t xfs_dir2_sf_get_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr,
@@ -189,6 +190,13 @@ extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
struct dir_context *ctx, size_t bufsize);
static inline unsigned int
+xfs_dir2_data_unusedsize(
+ unsigned int len)
+{
+ return round_up(len, XFS_DIR2_DATA_ALIGN);
+}
+
+static inline unsigned int
xfs_dir2_data_entsize(
struct xfs_mount *mp,
unsigned int namelen)
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 15a362e2f5ea..dceef2abd4e2 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -16,6 +16,9 @@
#include "xfs_trans.h"
#include "xfs_qm.h"
#include "xfs_error.h"
+#include "xfs_health.h"
+#include "xfs_metadir.h"
+#include "xfs_metafile.h"
int
xfs_calc_dquots_per_chunk(
@@ -323,3 +326,190 @@ xfs_dquot_to_disk_ts(
return cpu_to_be32(t);
}
+
+inline unsigned int
+xfs_dqinode_sick_mask(xfs_dqtype_t type)
+{
+ switch (type) {
+ case XFS_DQTYPE_USER:
+ return XFS_SICK_FS_UQUOTA;
+ case XFS_DQTYPE_GROUP:
+ return XFS_SICK_FS_GQUOTA;
+ case XFS_DQTYPE_PROJ:
+ return XFS_SICK_FS_PQUOTA;
+ }
+
+ ASSERT(0);
+ return 0;
+}
+
+/*
+ * Load the inode for a given type of quota, assuming that the sb fields have
+ * been sorted out. This is not true when switching quota types on a V4
+ * filesystem, so do not use this function for that. If metadir is enabled,
+ * @dp must be the /quota metadir.
+ *
+ * Returns -ENOENT if the quota inode field is NULLFSINO; 0 and an inode on
+ * success; or a negative errno.
+ */
+int
+xfs_dqinode_load(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ xfs_dqtype_t type,
+ struct xfs_inode **ipp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *ip;
+ enum xfs_metafile_type metafile_type = xfs_dqinode_metafile_type(type);
+ int error;
+
+ if (!xfs_has_metadir(mp)) {
+ xfs_ino_t ino;
+
+ switch (type) {
+ case XFS_DQTYPE_USER:
+ ino = mp->m_sb.sb_uquotino;
+ break;
+ case XFS_DQTYPE_GROUP:
+ ino = mp->m_sb.sb_gquotino;
+ break;
+ case XFS_DQTYPE_PROJ:
+ ino = mp->m_sb.sb_pquotino;
+ break;
+ default:
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ /* Should have set 0 to NULLFSINO when loading superblock */
+ if (ino == NULLFSINO)
+ return -ENOENT;
+
+ error = xfs_trans_metafile_iget(tp, ino, metafile_type, &ip);
+ } else {
+ error = xfs_metadir_load(tp, dp, xfs_dqinode_path(type),
+ metafile_type, &ip);
+ if (error == -ENOENT)
+ return error;
+ }
+ if (error) {
+ if (xfs_metadata_is_sick(error))
+ xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type));
+ return error;
+ }
+
+ if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
+ ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
+ xfs_irele(ip);
+ xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type));
+ return -EFSCORRUPTED;
+ }
+
+ if (XFS_IS_CORRUPT(mp, ip->i_projid != 0)) {
+ xfs_irele(ip);
+ xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type));
+ return -EFSCORRUPTED;
+ }
+
+ *ipp = ip;
+ return 0;
+}
+
+/* Create a metadata directory quota inode. */
+int
+xfs_dqinode_metadir_create(
+ struct xfs_inode *dp,
+ xfs_dqtype_t type,
+ struct xfs_inode **ipp)
+{
+ struct xfs_metadir_update upd = {
+ .dp = dp,
+ .metafile_type = xfs_dqinode_metafile_type(type),
+ .path = xfs_dqinode_path(type),
+ };
+ int error;
+
+ error = xfs_metadir_start_create(&upd);
+ if (error)
+ return error;
+
+ error = xfs_metadir_create(&upd, S_IFREG);
+ if (error)
+ return error;
+
+ xfs_trans_log_inode(upd.tp, upd.ip, XFS_ILOG_CORE);
+
+ error = xfs_metadir_commit(&upd);
+ if (error)
+ return error;
+
+ xfs_finish_inode_setup(upd.ip);
+ *ipp = upd.ip;
+ return 0;
+}
+
+#ifndef __KERNEL__
+/* Link a metadata directory quota inode. */
+int
+xfs_dqinode_metadir_link(
+ struct xfs_inode *dp,
+ xfs_dqtype_t type,
+ struct xfs_inode *ip)
+{
+ struct xfs_metadir_update upd = {
+ .dp = dp,
+ .metafile_type = xfs_dqinode_metafile_type(type),
+ .path = xfs_dqinode_path(type),
+ .ip = ip,
+ };
+ int error;
+
+ error = xfs_metadir_start_link(&upd);
+ if (error)
+ return error;
+
+ error = xfs_metadir_link(&upd);
+ if (error)
+ return error;
+
+ xfs_trans_log_inode(upd.tp, upd.ip, XFS_ILOG_CORE);
+
+ return xfs_metadir_commit(&upd);
+}
+#endif /* __KERNEL__ */
+
+/* Create the parent directory for all quota inodes and load it. */
+int
+xfs_dqinode_mkdir_parent(
+ struct xfs_mount *mp,
+ struct xfs_inode **dpp)
+{
+ if (!mp->m_metadirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ return xfs_metadir_mkdir(mp->m_metadirip, "quota", dpp);
+}
+
+/*
+ * Load the parent directory of all quota inodes. Pass the inode to the caller
+ * because quota functions (e.g. QUOTARM) can be called on the quota files even
+ * if quotas are not enabled.
+ */
+int
+xfs_dqinode_load_parent(
+ struct xfs_trans *tp,
+ struct xfs_inode **dpp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+
+ if (!mp->m_metadirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ return xfs_metadir_load(tp, mp->m_metadirip, "quota", XFS_METAFILE_DIR,
+ dpp);
+}
diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index 01a9e86b3037..7002d7676a78 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -63,7 +63,8 @@
#define XFS_ERRTAG_ATTR_LEAF_TO_NODE 41
#define XFS_ERRTAG_WB_DELAY_MS 42
#define XFS_ERRTAG_WRITE_DELAY_MS 43
-#define XFS_ERRTAG_MAX 44
+#define XFS_ERRTAG_EXCHMAPS_FINISH_ONE 44
+#define XFS_ERRTAG_MAX 45
/*
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -111,5 +112,6 @@
#define XFS_RANDOM_ATTR_LEAF_TO_NODE 1
#define XFS_RANDOM_WB_DELAY_MS 3000
#define XFS_RANDOM_WRITE_DELAY_MS 3000
+#define XFS_RANDOM_EXCHMAPS_FINISH_ONE 1
#endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/libxfs/xfs_exchmaps.c b/fs/xfs/libxfs/xfs_exchmaps.c
new file mode 100644
index 000000000000..2021396651de
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_exchmaps.c
@@ -0,0 +1,1235 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_bmap.h"
+#include "xfs_icache.h"
+#include "xfs_quota.h"
+#include "xfs_exchmaps.h"
+#include "xfs_trace.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_error.h"
+#include "xfs_errortag.h"
+#include "xfs_health.h"
+#include "xfs_exchmaps_item.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_attr.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_dir2.h"
+#include "xfs_symlink_remote.h"
+
+struct kmem_cache *xfs_exchmaps_intent_cache;
+
+/* bmbt mappings adjacent to a pair of records. */
+struct xfs_exchmaps_adjacent {
+ struct xfs_bmbt_irec left1;
+ struct xfs_bmbt_irec right1;
+ struct xfs_bmbt_irec left2;
+ struct xfs_bmbt_irec right2;
+};
+
+#define ADJACENT_INIT { \
+ .left1 = { .br_startblock = HOLESTARTBLOCK }, \
+ .right1 = { .br_startblock = HOLESTARTBLOCK }, \
+ .left2 = { .br_startblock = HOLESTARTBLOCK }, \
+ .right2 = { .br_startblock = HOLESTARTBLOCK }, \
+}
+
+/* Information to reset reflink flag / CoW fork state after an exchange. */
+
+/*
+ * If the reflink flag is set on either inode, make sure it has an incore CoW
+ * fork, since all reflink inodes must have them. If there's a CoW fork and it
+ * has mappings in it, make sure the inodes are tagged appropriately so that
+ * speculative preallocations can be GC'd if we run low of space.
+ */
+static inline void
+xfs_exchmaps_ensure_cowfork(
+ struct xfs_inode *ip)
+{
+ struct xfs_ifork *cfork;
+
+ if (xfs_is_reflink_inode(ip))
+ xfs_ifork_init_cow(ip);
+
+ cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
+ if (!cfork)
+ return;
+ if (cfork->if_bytes > 0)
+ xfs_inode_set_cowblocks_tag(ip);
+ else
+ xfs_inode_clear_cowblocks_tag(ip);
+}
+
+/*
+ * Adjust the on-disk inode size upwards if needed so that we never add
+ * mappings into the file past EOF. This is crucial so that log recovery won't
+ * get confused by the sudden appearance of post-eof mappings.
+ */
+STATIC void
+xfs_exchmaps_update_size(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap,
+ xfs_fsize_t new_isize)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_fsize_t len;
+
+ if (new_isize < 0)
+ return;
+
+ len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
+ new_isize);
+
+ if (len <= ip->i_disk_size)
+ return;
+
+ trace_xfs_exchmaps_update_inode_size(ip, len);
+
+ ip->i_disk_size = len;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Advance the incore state tracking after exchanging a mapping. */
+static inline void
+xmi_advance(
+ struct xfs_exchmaps_intent *xmi,
+ const struct xfs_bmbt_irec *irec)
+{
+ xmi->xmi_startoff1 += irec->br_blockcount;
+ xmi->xmi_startoff2 += irec->br_blockcount;
+ xmi->xmi_blockcount -= irec->br_blockcount;
+}
+
+/* Do we still have more mappings to exchange? */
+static inline bool
+xmi_has_more_exchange_work(const struct xfs_exchmaps_intent *xmi)
+{
+ return xmi->xmi_blockcount > 0;
+}
+
+/* Do we have post-operation cleanups to perform? */
+static inline bool
+xmi_has_postop_work(const struct xfs_exchmaps_intent *xmi)
+{
+ return xmi->xmi_flags & (XFS_EXCHMAPS_CLEAR_INO1_REFLINK |
+ XFS_EXCHMAPS_CLEAR_INO2_REFLINK |
+ __XFS_EXCHMAPS_INO2_SHORTFORM);
+}
+
+/* Check all mappings to make sure we can actually exchange them. */
+int
+xfs_exchmaps_check_forks(
+ struct xfs_mount *mp,
+ const struct xfs_exchmaps_req *req)
+{
+ struct xfs_ifork *ifp1, *ifp2;
+ int whichfork = xfs_exchmaps_reqfork(req);
+
+ /* No fork? */
+ ifp1 = xfs_ifork_ptr(req->ip1, whichfork);
+ ifp2 = xfs_ifork_ptr(req->ip2, whichfork);
+ if (!ifp1 || !ifp2)
+ return -EINVAL;
+
+ /* We don't know how to exchange local format forks. */
+ if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
+ ifp2->if_format == XFS_DINODE_FMT_LOCAL)
+ return -EINVAL;
+
+ return 0;
+}
+
+#ifdef CONFIG_XFS_QUOTA
+/* Log the actual updates to the quota accounting. */
+static inline void
+xfs_exchmaps_update_quota(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi,
+ struct xfs_bmbt_irec *irec1,
+ struct xfs_bmbt_irec *irec2)
+{
+ int64_t ip1_delta = 0, ip2_delta = 0;
+ unsigned int qflag;
+
+ qflag = XFS_IS_REALTIME_INODE(xmi->xmi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
+ XFS_TRANS_DQ_BCOUNT;
+
+ if (xfs_bmap_is_real_extent(irec1)) {
+ ip1_delta -= irec1->br_blockcount;
+ ip2_delta += irec1->br_blockcount;
+ }
+
+ if (xfs_bmap_is_real_extent(irec2)) {
+ ip1_delta += irec2->br_blockcount;
+ ip2_delta -= irec2->br_blockcount;
+ }
+
+ xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip1, qflag, ip1_delta);
+ xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip2, qflag, ip2_delta);
+}
+#else
+# define xfs_exchmaps_update_quota(tp, xmi, irec1, irec2) ((void)0)
+#endif
+
+/* Decide if we want to skip this mapping from file1. */
+static inline bool
+xfs_exchmaps_can_skip_mapping(
+ struct xfs_exchmaps_intent *xmi,
+ struct xfs_bmbt_irec *irec)
+{
+ struct xfs_mount *mp = xmi->xmi_ip1->i_mount;
+
+ /* Do not skip this mapping if the caller did not tell us to. */
+ if (!(xmi->xmi_flags & XFS_EXCHMAPS_INO1_WRITTEN))
+ return false;
+
+ /* Do not skip mapped, written mappings. */
+ if (xfs_bmap_is_written_extent(irec))
+ return false;
+
+ /*
+ * The mapping is unwritten or a hole. It cannot be a delalloc
+ * reservation because we already excluded those. It cannot be an
+ * unwritten extent with dirty page cache because we flushed the page
+ * cache. For files where the allocation unit is 1FSB (files on the
+ * data dev, rt files if the extent size is 1FSB), we can safely
+ * skip this mapping.
+ */
+ if (!xfs_inode_has_bigrtalloc(xmi->xmi_ip1))
+ return true;
+
+ /*
+ * For a realtime file with a multi-fsb allocation unit, the decision
+ * is trickier because we can only swap full allocation units.
+ * Unwritten mappings can appear in the middle of an rtx if the rtx is
+ * partially written, but they can also appear for preallocations.
+ *
+ * If the mapping is a hole, skip it entirely. Holes should align with
+ * rtx boundaries.
+ */
+ if (!xfs_bmap_is_real_extent(irec))
+ return true;
+
+ /*
+ * All mappings below this point are unwritten.
+ *
+ * - If the beginning is not aligned to an rtx, trim the end of the
+ * mapping so that it does not cross an rtx boundary, and swap it.
+ *
+ * - If both ends are aligned to an rtx, skip the entire mapping.
+ */
+ if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
+ xfs_fileoff_t new_end;
+
+ new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
+ irec->br_blockcount = min(irec->br_blockcount,
+ new_end - irec->br_startoff);
+ return false;
+ }
+ if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
+ return true;
+
+ /*
+ * All mappings below this point are unwritten, start on an rtx
+ * boundary, and do not end on an rtx boundary.
+ *
+ * - If the mapping is longer than one rtx, trim the end of the mapping
+ * down to an rtx boundary and skip it.
+ *
+ * - The mapping is shorter than one rtx. Swap it.
+ */
+ if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
+ xfs_fileoff_t new_end;
+
+ new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
+ mp->m_sb.sb_rextsize);
+ irec->br_blockcount = new_end - irec->br_startoff;
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Walk forward through the file ranges in @xmi until we find two different
+ * mappings to exchange. If there is work to do, return the mappings;
+ * otherwise we've reached the end of the range and xmi_blockcount will be
+ * zero.
+ *
+ * If the walk skips over a pair of mappings to the same storage, save them as
+ * the left records in @adj (if provided) so that the simulation phase can
+ * avoid an extra lookup.
+ */
+static int
+xfs_exchmaps_find_mappings(
+ struct xfs_exchmaps_intent *xmi,
+ struct xfs_bmbt_irec *irec1,
+ struct xfs_bmbt_irec *irec2,
+ struct xfs_exchmaps_adjacent *adj)
+{
+ int nimaps;
+ int bmap_flags;
+ int error;
+
+ bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_whichfork(xmi));
+
+ for (; xmi_has_more_exchange_work(xmi); xmi_advance(xmi, irec1)) {
+ /* Read mapping from the first file */
+ nimaps = 1;
+ error = xfs_bmapi_read(xmi->xmi_ip1, xmi->xmi_startoff1,
+ xmi->xmi_blockcount, irec1, &nimaps,
+ bmap_flags);
+ if (error)
+ return error;
+ if (nimaps != 1 ||
+ irec1->br_startblock == DELAYSTARTBLOCK ||
+ irec1->br_startoff != xmi->xmi_startoff1) {
+ /*
+ * We should never get no mapping or a delalloc mapping
+ * or something that doesn't match what we asked for,
+ * since the caller flushed both inodes and we hold the
+ * ILOCKs for both inodes.
+ */
+ ASSERT(0);
+ return -EINVAL;
+ }
+
+ if (xfs_exchmaps_can_skip_mapping(xmi, irec1)) {
+ trace_xfs_exchmaps_mapping1_skip(xmi->xmi_ip1, irec1);
+ continue;
+ }
+
+ /* Read mapping from the second file */
+ nimaps = 1;
+ error = xfs_bmapi_read(xmi->xmi_ip2, xmi->xmi_startoff2,
+ irec1->br_blockcount, irec2, &nimaps,
+ bmap_flags);
+ if (error)
+ return error;
+ if (nimaps != 1 ||
+ irec2->br_startblock == DELAYSTARTBLOCK ||
+ irec2->br_startoff != xmi->xmi_startoff2) {
+ /*
+ * We should never get no mapping or a delalloc mapping
+ * or something that doesn't match what we asked for,
+ * since the caller flushed both inodes and we hold the
+ * ILOCKs for both inodes.
+ */
+ ASSERT(0);
+ return -EINVAL;
+ }
+
+ /*
+ * We can only exchange as many blocks as the smaller of the
+ * two mapping maps.
+ */
+ irec1->br_blockcount = min(irec1->br_blockcount,
+ irec2->br_blockcount);
+
+ trace_xfs_exchmaps_mapping1(xmi->xmi_ip1, irec1);
+ trace_xfs_exchmaps_mapping2(xmi->xmi_ip2, irec2);
+
+ /* We found something to exchange, so return it. */
+ if (irec1->br_startblock != irec2->br_startblock)
+ return 0;
+
+ /*
+ * Two mappings pointing to the same physical block must not
+ * have different states; that's filesystem corruption. Move
+ * on to the next mapping if they're both holes or both point
+ * to the same physical space extent.
+ */
+ if (irec1->br_state != irec2->br_state) {
+ xfs_bmap_mark_sick(xmi->xmi_ip1,
+ xfs_exchmaps_whichfork(xmi));
+ xfs_bmap_mark_sick(xmi->xmi_ip2,
+ xfs_exchmaps_whichfork(xmi));
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Save the mappings if we're estimating work and skipping
+ * these identical mappings.
+ */
+ if (adj) {
+ memcpy(&adj->left1, irec1, sizeof(*irec1));
+ memcpy(&adj->left2, irec2, sizeof(*irec2));
+ }
+ }
+
+ return 0;
+}
+
+/* Exchange these two mappings. */
+static void
+xfs_exchmaps_one_step(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi,
+ struct xfs_bmbt_irec *irec1,
+ struct xfs_bmbt_irec *irec2)
+{
+ int whichfork = xfs_exchmaps_whichfork(xmi);
+
+ xfs_exchmaps_update_quota(tp, xmi, irec1, irec2);
+
+ /* Remove both mappings. */
+ xfs_bmap_unmap_extent(tp, xmi->xmi_ip1, whichfork, irec1);
+ xfs_bmap_unmap_extent(tp, xmi->xmi_ip2, whichfork, irec2);
+
+ /*
+ * Re-add both mappings. We exchange the file offsets between the two
+ * maps and add the opposite map, which has the effect of filling the
+ * logical offsets we just unmapped, but with with the physical mapping
+ * information exchanged.
+ */
+ swap(irec1->br_startoff, irec2->br_startoff);
+ xfs_bmap_map_extent(tp, xmi->xmi_ip1, whichfork, irec2);
+ xfs_bmap_map_extent(tp, xmi->xmi_ip2, whichfork, irec1);
+
+ /* Make sure we're not adding mappings past EOF. */
+ if (whichfork == XFS_DATA_FORK) {
+ xfs_exchmaps_update_size(tp, xmi->xmi_ip1, irec2,
+ xmi->xmi_isize1);
+ xfs_exchmaps_update_size(tp, xmi->xmi_ip2, irec1,
+ xmi->xmi_isize2);
+ }
+
+ /*
+ * Advance our cursor and exit. The caller (either defer ops or log
+ * recovery) will log the XMD item, and if *blockcount is nonzero, it
+ * will log a new XMI item for the remainder and call us back.
+ */
+ xmi_advance(xmi, irec1);
+}
+
+/* Convert inode2's leaf attr fork back to shortform, if possible.. */
+STATIC int
+xfs_exchmaps_attr_to_sf(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi)
+{
+ struct xfs_da_args args = {
+ .dp = xmi->xmi_ip2,
+ .geo = tp->t_mountp->m_attr_geo,
+ .whichfork = XFS_ATTR_FORK,
+ .trans = tp,
+ .owner = xmi->xmi_ip2->i_ino,
+ };
+ struct xfs_buf *bp;
+ int forkoff;
+ int error;
+
+ if (!xfs_attr_is_leaf(xmi->xmi_ip2))
+ return 0;
+
+ error = xfs_attr3_leaf_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, 0,
+ &bp);
+ if (error)
+ return error;
+
+ forkoff = xfs_attr_shortform_allfit(bp, xmi->xmi_ip2);
+ if (forkoff == 0)
+ return 0;
+
+ return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
+}
+
+/* Convert inode2's block dir fork back to shortform, if possible.. */
+STATIC int
+xfs_exchmaps_dir_to_sf(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi)
+{
+ struct xfs_da_args args = {
+ .dp = xmi->xmi_ip2,
+ .geo = tp->t_mountp->m_dir_geo,
+ .whichfork = XFS_DATA_FORK,
+ .trans = tp,
+ .owner = xmi->xmi_ip2->i_ino,
+ };
+ struct xfs_dir2_sf_hdr sfh;
+ struct xfs_buf *bp;
+ int size;
+ int error = 0;
+
+ if (xfs_dir2_format(&args, &error) != XFS_DIR2_FMT_BLOCK)
+ return error;
+
+ error = xfs_dir3_block_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, &bp);
+ if (error)
+ return error;
+
+ size = xfs_dir2_block_sfsize(xmi->xmi_ip2, bp->b_addr, &sfh);
+ if (size > xfs_inode_data_fork_size(xmi->xmi_ip2))
+ return 0;
+
+ return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
+}
+
+/* Convert inode2's remote symlink target back to shortform, if possible. */
+STATIC int
+xfs_exchmaps_link_to_sf(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi)
+{
+ struct xfs_inode *ip = xmi->xmi_ip2;
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
+ char *buf;
+ int error;
+
+ if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
+ ip->i_disk_size > xfs_inode_data_fork_size(ip))
+ return 0;
+
+ /* Read the current symlink target into a buffer. */
+ buf = kmalloc(ip->i_disk_size + 1,
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
+ if (!buf) {
+ ASSERT(0);
+ return -ENOMEM;
+ }
+
+ error = xfs_symlink_remote_read(ip, buf);
+ if (error)
+ goto free;
+
+ /* Remove the blocks. */
+ error = xfs_symlink_remote_truncate(tp, ip);
+ if (error)
+ goto free;
+
+ /* Convert fork to local format and log our changes. */
+ xfs_idestroy_fork(ifp);
+ ifp->if_bytes = 0;
+ ifp->if_format = XFS_DINODE_FMT_LOCAL;
+ xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
+free:
+ kfree(buf);
+ return error;
+}
+
+/* Clear the reflink flag after an exchange. */
+static inline void
+xfs_exchmaps_clear_reflink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ trace_xfs_reflink_unset_inode_flag(ip);
+
+ ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Finish whatever work might come after an exchange operation. */
+static int
+xfs_exchmaps_do_postop_work(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi)
+{
+ if (xmi->xmi_flags & __XFS_EXCHMAPS_INO2_SHORTFORM) {
+ int error = 0;
+
+ if (xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)
+ error = xfs_exchmaps_attr_to_sf(tp, xmi);
+ else if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode))
+ error = xfs_exchmaps_dir_to_sf(tp, xmi);
+ else if (S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
+ error = xfs_exchmaps_link_to_sf(tp, xmi);
+ xmi->xmi_flags &= ~__XFS_EXCHMAPS_INO2_SHORTFORM;
+ if (error)
+ return error;
+ }
+
+ if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO1_REFLINK) {
+ xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip1);
+ xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
+ }
+
+ if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO2_REFLINK) {
+ xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip2);
+ xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
+ }
+
+ return 0;
+}
+
+/* Finish one step in a mapping exchange operation, possibly relogging. */
+int
+xfs_exchmaps_finish_one(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi)
+{
+ struct xfs_bmbt_irec irec1, irec2;
+ int error;
+
+ if (xmi_has_more_exchange_work(xmi)) {
+ /*
+ * If the operation state says that some range of the files
+ * have not yet been exchanged, look for mappings in that range
+ * to exchange. If we find some mappings, exchange them.
+ */
+ error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, NULL);
+ if (error)
+ return error;
+
+ if (xmi_has_more_exchange_work(xmi))
+ xfs_exchmaps_one_step(tp, xmi, &irec1, &irec2);
+
+ /*
+ * If the caller asked us to exchange the file sizes after the
+ * exchange and either we just exchanged the last mappings in
+ * the range or we didn't find anything to exchange, update the
+ * ondisk file sizes.
+ */
+ if ((xmi->xmi_flags & XFS_EXCHMAPS_SET_SIZES) &&
+ !xmi_has_more_exchange_work(xmi)) {
+ xmi->xmi_ip1->i_disk_size = xmi->xmi_isize1;
+ xmi->xmi_ip2->i_disk_size = xmi->xmi_isize2;
+
+ xfs_trans_log_inode(tp, xmi->xmi_ip1, XFS_ILOG_CORE);
+ xfs_trans_log_inode(tp, xmi->xmi_ip2, XFS_ILOG_CORE);
+ }
+ } else if (xmi_has_postop_work(xmi)) {
+ /*
+ * Now that we're finished with the exchange operation,
+ * complete the post-op cleanup work.
+ */
+ error = xfs_exchmaps_do_postop_work(tp, xmi);
+ if (error)
+ return error;
+ }
+
+ if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
+ return -EIO;
+
+ /* If we still have work to do, ask for a new transaction. */
+ if (xmi_has_more_exchange_work(xmi) || xmi_has_postop_work(xmi)) {
+ trace_xfs_exchmaps_defer(tp->t_mountp, xmi);
+ return -EAGAIN;
+ }
+
+ /*
+ * If we reach here, we've finished all the exchange work and the post
+ * operation work. The last thing we need to do before returning to
+ * the caller is to make sure that COW forks are set up correctly.
+ */
+ if (!(xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)) {
+ xfs_exchmaps_ensure_cowfork(xmi->xmi_ip1);
+ xfs_exchmaps_ensure_cowfork(xmi->xmi_ip2);
+ }
+
+ return 0;
+}
+
+/*
+ * Compute the amount of bmbt blocks we should reserve for each file. In the
+ * worst case, each exchange will fill a hole with a new mapping, which could
+ * result in a btree split every time we add a new leaf block.
+ */
+static inline uint64_t
+xfs_exchmaps_bmbt_blocks(
+ struct xfs_mount *mp,
+ const struct xfs_exchmaps_req *req)
+{
+ return howmany_64(req->nr_exchanges,
+ XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
+ XFS_EXTENTADD_SPACE_RES(mp, xfs_exchmaps_reqfork(req));
+}
+
+/* Compute the space we should reserve for the rmap btree expansions. */
+static inline uint64_t
+xfs_exchmaps_rmapbt_blocks(
+ struct xfs_mount *mp,
+ const struct xfs_exchmaps_req *req)
+{
+ if (!xfs_has_rmapbt(mp))
+ return 0;
+ if (XFS_IS_REALTIME_INODE(req->ip1))
+ return 0;
+
+ return howmany_64(req->nr_exchanges,
+ XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
+ XFS_RMAPADD_SPACE_RES(mp);
+}
+
+/* Estimate the bmbt and rmapbt overhead required to exchange mappings. */
+int
+xfs_exchmaps_estimate_overhead(
+ struct xfs_exchmaps_req *req)
+{
+ struct xfs_mount *mp = req->ip1->i_mount;
+ xfs_filblks_t bmbt_blocks;
+ xfs_filblks_t rmapbt_blocks;
+ xfs_filblks_t resblks = req->resblks;
+
+ /*
+ * Compute the number of bmbt and rmapbt blocks we might need to handle
+ * the estimated number of exchanges.
+ */
+ bmbt_blocks = xfs_exchmaps_bmbt_blocks(mp, req);
+ rmapbt_blocks = xfs_exchmaps_rmapbt_blocks(mp, req);
+
+ trace_xfs_exchmaps_overhead(mp, bmbt_blocks, rmapbt_blocks);
+
+ /* Make sure the change in file block count doesn't overflow. */
+ if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
+ return -EFBIG;
+ if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
+ return -EFBIG;
+
+ /*
+ * Add together the number of blocks we need to handle btree growth,
+ * then add it to the number of blocks we need to reserve to this
+ * transaction.
+ */
+ if (check_add_overflow(resblks, bmbt_blocks, &resblks))
+ return -ENOSPC;
+ if (check_add_overflow(resblks, bmbt_blocks, &resblks))
+ return -ENOSPC;
+ if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
+ return -ENOSPC;
+ if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
+ return -ENOSPC;
+
+ /* Can't actually reserve more than UINT_MAX blocks. */
+ if (req->resblks > UINT_MAX)
+ return -ENOSPC;
+
+ req->resblks = resblks;
+ trace_xfs_exchmaps_final_estimate(req);
+ return 0;
+}
+
+/* Decide if we can merge two real mappings. */
+static inline bool
+xmi_can_merge(
+ const struct xfs_bmbt_irec *b1,
+ const struct xfs_bmbt_irec *b2)
+{
+ /* Don't merge holes. */
+ if (b1->br_startblock == HOLESTARTBLOCK ||
+ b2->br_startblock == HOLESTARTBLOCK)
+ return false;
+
+ /* We don't merge holes. */
+ if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
+ return false;
+
+ if (b1->br_startoff + b1->br_blockcount == b2->br_startoff &&
+ b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
+ b1->br_state == b2->br_state &&
+ b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ return true;
+
+ return false;
+}
+
+/*
+ * Decide if we can merge three mappings. Caller must ensure all three
+ * mappings must not be holes or delalloc reservations.
+ */
+static inline bool
+xmi_can_merge_all(
+ const struct xfs_bmbt_irec *l,
+ const struct xfs_bmbt_irec *m,
+ const struct xfs_bmbt_irec *r)
+{
+ xfs_filblks_t new_len;
+
+ new_len = l->br_blockcount + m->br_blockcount + r->br_blockcount;
+ return new_len <= XFS_MAX_BMBT_EXTLEN;
+}
+
+#define CLEFT_CONTIG 0x01
+#define CRIGHT_CONTIG 0x02
+#define CHOLE 0x04
+#define CBOTH_CONTIG (CLEFT_CONTIG | CRIGHT_CONTIG)
+
+#define NLEFT_CONTIG 0x10
+#define NRIGHT_CONTIG 0x20
+#define NHOLE 0x40
+#define NBOTH_CONTIG (NLEFT_CONTIG | NRIGHT_CONTIG)
+
+/* Estimate the effect of a single exchange on mapping count. */
+static inline int
+xmi_delta_nextents_step(
+ struct xfs_mount *mp,
+ const struct xfs_bmbt_irec *left,
+ const struct xfs_bmbt_irec *curr,
+ const struct xfs_bmbt_irec *new,
+ const struct xfs_bmbt_irec *right)
+{
+ bool lhole, rhole, chole, nhole;
+ unsigned int state = 0;
+ int ret = 0;
+
+ lhole = left->br_startblock == HOLESTARTBLOCK;
+ rhole = right->br_startblock == HOLESTARTBLOCK;
+ chole = curr->br_startblock == HOLESTARTBLOCK;
+ nhole = new->br_startblock == HOLESTARTBLOCK;
+
+ if (chole)
+ state |= CHOLE;
+ if (!lhole && !chole && xmi_can_merge(left, curr))
+ state |= CLEFT_CONTIG;
+ if (!rhole && !chole && xmi_can_merge(curr, right))
+ state |= CRIGHT_CONTIG;
+ if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
+ !xmi_can_merge_all(left, curr, right))
+ state &= ~CRIGHT_CONTIG;
+
+ if (nhole)
+ state |= NHOLE;
+ if (!lhole && !nhole && xmi_can_merge(left, new))
+ state |= NLEFT_CONTIG;
+ if (!rhole && !nhole && xmi_can_merge(new, right))
+ state |= NRIGHT_CONTIG;
+ if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
+ !xmi_can_merge_all(left, new, right))
+ state &= ~NRIGHT_CONTIG;
+
+ switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
+ case CLEFT_CONTIG | CRIGHT_CONTIG:
+ /*
+ * left/curr/right are the same mapping, so deleting curr
+ * causes 2 new mappings to be created.
+ */
+ ret += 2;
+ break;
+ case 0:
+ /*
+ * curr is not contiguous with any mapping, so we remove curr
+ * completely
+ */
+ ret--;
+ break;
+ case CHOLE:
+ /* hole, do nothing */
+ break;
+ case CLEFT_CONTIG:
+ case CRIGHT_CONTIG:
+ /* trim either left or right, no change */
+ break;
+ }
+
+ switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
+ case NLEFT_CONTIG | NRIGHT_CONTIG:
+ /*
+ * left/curr/right will become the same mapping, so adding
+ * curr causes the deletion of right.
+ */
+ ret--;
+ break;
+ case 0:
+ /* new is not contiguous with any mapping */
+ ret++;
+ break;
+ case NHOLE:
+ /* hole, do nothing. */
+ break;
+ case NLEFT_CONTIG:
+ case NRIGHT_CONTIG:
+ /* new is absorbed into left or right, no change */
+ break;
+ }
+
+ trace_xfs_exchmaps_delta_nextents_step(mp, left, curr, new, right, ret,
+ state);
+ return ret;
+}
+
+/* Make sure we don't overflow the extent (mapping) counters. */
+static inline int
+xmi_ensure_delta_nextents(
+ struct xfs_exchmaps_req *req,
+ struct xfs_inode *ip,
+ int64_t delta)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ int whichfork = xfs_exchmaps_reqfork(req);
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
+ uint64_t new_nextents;
+ xfs_extnum_t max_nextents;
+
+ if (delta < 0)
+ return 0;
+
+ /*
+ * It's always an error if the delta causes integer overflow. delta
+ * needs an explicit cast here to avoid warnings about implicit casts
+ * coded into the overflow check.
+ */
+ if (check_add_overflow(ifp->if_nextents, (uint64_t)delta,
+ &new_nextents))
+ return -EFBIG;
+
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
+ new_nextents > 10)
+ return -EFBIG;
+
+ /*
+ * We always promote both inodes to have large extent counts if the
+ * superblock feature is enabled, so we only need to check against the
+ * theoretical maximum.
+ */
+ max_nextents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
+ whichfork);
+ if (new_nextents > max_nextents)
+ return -EFBIG;
+
+ return 0;
+}
+
+/* Find the next mapping after irec. */
+static inline int
+xmi_next(
+ struct xfs_inode *ip,
+ int bmap_flags,
+ const struct xfs_bmbt_irec *irec,
+ struct xfs_bmbt_irec *nrec)
+{
+ xfs_fileoff_t off;
+ xfs_filblks_t blockcount;
+ int nimaps = 1;
+ int error;
+
+ off = irec->br_startoff + irec->br_blockcount;
+ blockcount = XFS_MAX_FILEOFF - off;
+ error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
+ if (error)
+ return error;
+ if (nrec->br_startblock == DELAYSTARTBLOCK ||
+ nrec->br_startoff != off) {
+ /*
+ * If we don't get the mapping we want, return a zero-length
+ * mapping, which our estimator function will pretend is a hole.
+ * We shouldn't get delalloc reservations.
+ */
+ nrec->br_startblock = HOLESTARTBLOCK;
+ }
+
+ return 0;
+}
+
+int __init
+xfs_exchmaps_intent_init_cache(void)
+{
+ xfs_exchmaps_intent_cache = kmem_cache_create("xfs_exchmaps_intent",
+ sizeof(struct xfs_exchmaps_intent),
+ 0, 0, NULL);
+
+ return xfs_exchmaps_intent_cache != NULL ? 0 : -ENOMEM;
+}
+
+void
+xfs_exchmaps_intent_destroy_cache(void)
+{
+ kmem_cache_destroy(xfs_exchmaps_intent_cache);
+ xfs_exchmaps_intent_cache = NULL;
+}
+
+/*
+ * Decide if we will exchange the reflink flags between the two files after the
+ * exchange. The only time we want to do this is if we're exchanging all
+ * mappings under EOF and the inode reflink flags have different states.
+ */
+static inline bool
+xmi_can_exchange_reflink_flags(
+ const struct xfs_exchmaps_req *req,
+ unsigned int reflink_state)
+{
+ struct xfs_mount *mp = req->ip1->i_mount;
+
+ if (hweight32(reflink_state) != 1)
+ return false;
+ if (req->startoff1 != 0 || req->startoff2 != 0)
+ return false;
+ if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
+ return false;
+ if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
+ return false;
+ return true;
+}
+
+
+/* Allocate and initialize a new incore intent item from a request. */
+struct xfs_exchmaps_intent *
+xfs_exchmaps_init_intent(
+ const struct xfs_exchmaps_req *req)
+{
+ struct xfs_exchmaps_intent *xmi;
+ unsigned int rs = 0;
+
+ xmi = kmem_cache_zalloc(xfs_exchmaps_intent_cache,
+ GFP_NOFS | __GFP_NOFAIL);
+ INIT_LIST_HEAD(&xmi->xmi_list);
+ xmi->xmi_ip1 = req->ip1;
+ xmi->xmi_ip2 = req->ip2;
+ xmi->xmi_startoff1 = req->startoff1;
+ xmi->xmi_startoff2 = req->startoff2;
+ xmi->xmi_blockcount = req->blockcount;
+ xmi->xmi_isize1 = xmi->xmi_isize2 = -1;
+ xmi->xmi_flags = req->flags & XFS_EXCHMAPS_PARAMS;
+
+ if (xfs_exchmaps_whichfork(xmi) == XFS_ATTR_FORK) {
+ xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
+ return xmi;
+ }
+
+ if (req->flags & XFS_EXCHMAPS_SET_SIZES) {
+ xmi->xmi_flags |= XFS_EXCHMAPS_SET_SIZES;
+ xmi->xmi_isize1 = req->ip2->i_disk_size;
+ xmi->xmi_isize2 = req->ip1->i_disk_size;
+ }
+
+ /* Record the state of each inode's reflink flag before the op. */
+ if (xfs_is_reflink_inode(req->ip1))
+ rs |= 1;
+ if (xfs_is_reflink_inode(req->ip2))
+ rs |= 2;
+
+ /*
+ * Figure out if we're clearing the reflink flags (which effectively
+ * exchanges them) after the operation.
+ */
+ if (xmi_can_exchange_reflink_flags(req, rs)) {
+ if (rs & 1)
+ xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
+ if (rs & 2)
+ xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
+ }
+
+ if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode) ||
+ S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
+ xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
+
+ return xmi;
+}
+
+/*
+ * Estimate the number of exchange operations and the number of file blocks
+ * in each file that will be affected by the exchange operation.
+ */
+int
+xfs_exchmaps_estimate(
+ struct xfs_exchmaps_req *req)
+{
+ struct xfs_exchmaps_intent *xmi;
+ struct xfs_bmbt_irec irec1, irec2;
+ struct xfs_exchmaps_adjacent adj = ADJACENT_INIT;
+ xfs_filblks_t ip1_blocks = 0, ip2_blocks = 0;
+ int64_t d_nexts1, d_nexts2;
+ int bmap_flags;
+ int error;
+
+ ASSERT(!(req->flags & ~XFS_EXCHMAPS_PARAMS));
+
+ bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_reqfork(req));
+ xmi = xfs_exchmaps_init_intent(req);
+
+ /*
+ * To guard against the possibility of overflowing the extent counters,
+ * we have to estimate an upper bound on the potential increase in that
+ * counter. We can split the mapping at each end of the range, and for
+ * each step of the exchange we can split the mapping that we're
+ * working on if the mappings do not align.
+ */
+ d_nexts1 = d_nexts2 = 3;
+
+ while (xmi_has_more_exchange_work(xmi)) {
+ /*
+ * Walk through the file ranges until we find something to
+ * exchange. Because we're simulating the exchange, pass in
+ * adj to capture skipped mappings for correct estimation of
+ * bmbt record merges.
+ */
+ error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, &adj);
+ if (error)
+ goto out_free;
+ if (!xmi_has_more_exchange_work(xmi))
+ break;
+
+ /* Update accounting. */
+ if (xfs_bmap_is_real_extent(&irec1))
+ ip1_blocks += irec1.br_blockcount;
+ if (xfs_bmap_is_real_extent(&irec2))
+ ip2_blocks += irec2.br_blockcount;
+ req->nr_exchanges++;
+
+ /* Read the next mappings from both files. */
+ error = xmi_next(req->ip1, bmap_flags, &irec1, &adj.right1);
+ if (error)
+ goto out_free;
+
+ error = xmi_next(req->ip2, bmap_flags, &irec2, &adj.right2);
+ if (error)
+ goto out_free;
+
+ /* Update extent count deltas. */
+ d_nexts1 += xmi_delta_nextents_step(req->ip1->i_mount,
+ &adj.left1, &irec1, &irec2, &adj.right1);
+
+ d_nexts2 += xmi_delta_nextents_step(req->ip1->i_mount,
+ &adj.left2, &irec2, &irec1, &adj.right2);
+
+ /* Now pretend we exchanged the mappings. */
+ if (xmi_can_merge(&adj.left2, &irec1))
+ adj.left2.br_blockcount += irec1.br_blockcount;
+ else
+ memcpy(&adj.left2, &irec1, sizeof(irec1));
+
+ if (xmi_can_merge(&adj.left1, &irec2))
+ adj.left1.br_blockcount += irec2.br_blockcount;
+ else
+ memcpy(&adj.left1, &irec2, sizeof(irec2));
+
+ xmi_advance(xmi, &irec1);
+ }
+
+ /* Account for the blocks that are being exchanged. */
+ if (XFS_IS_REALTIME_INODE(req->ip1) &&
+ xfs_exchmaps_reqfork(req) == XFS_DATA_FORK) {
+ req->ip1_rtbcount = ip1_blocks;
+ req->ip2_rtbcount = ip2_blocks;
+ } else {
+ req->ip1_bcount = ip1_blocks;
+ req->ip2_bcount = ip2_blocks;
+ }
+
+ /*
+ * Make sure that both forks have enough slack left in their extent
+ * counters that the exchange operation will not overflow.
+ */
+ trace_xfs_exchmaps_delta_nextents(req, d_nexts1, d_nexts2);
+ if (req->ip1 == req->ip2) {
+ error = xmi_ensure_delta_nextents(req, req->ip1,
+ d_nexts1 + d_nexts2);
+ } else {
+ error = xmi_ensure_delta_nextents(req, req->ip1, d_nexts1);
+ if (error)
+ goto out_free;
+ error = xmi_ensure_delta_nextents(req, req->ip2, d_nexts2);
+ }
+ if (error)
+ goto out_free;
+
+ trace_xfs_exchmaps_initial_estimate(req);
+ error = xfs_exchmaps_estimate_overhead(req);
+out_free:
+ kmem_cache_free(xfs_exchmaps_intent_cache, xmi);
+ return error;
+}
+
+/* Set the reflink flag before an operation. */
+static inline void
+xfs_exchmaps_set_reflink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ trace_xfs_reflink_set_inode_flag(ip);
+
+ ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/*
+ * If either file has shared blocks and we're exchanging data forks, we must
+ * flag the other file as having shared blocks so that we get the shared-block
+ * rmap functions if we need to fix up the rmaps.
+ */
+void
+xfs_exchmaps_ensure_reflink(
+ struct xfs_trans *tp,
+ const struct xfs_exchmaps_intent *xmi)
+{
+ unsigned int rs = 0;
+
+ if (xfs_is_reflink_inode(xmi->xmi_ip1))
+ rs |= 1;
+ if (xfs_is_reflink_inode(xmi->xmi_ip2))
+ rs |= 2;
+
+ if ((rs & 1) && !xfs_is_reflink_inode(xmi->xmi_ip2))
+ xfs_exchmaps_set_reflink(tp, xmi->xmi_ip2);
+
+ if ((rs & 2) && !xfs_is_reflink_inode(xmi->xmi_ip1))
+ xfs_exchmaps_set_reflink(tp, xmi->xmi_ip1);
+}
+
+/* Set the large extent count flag before an operation if needed. */
+static inline void
+xfs_exchmaps_ensure_large_extent_counts(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ if (xfs_inode_has_large_extent_counts(ip))
+ return;
+
+ ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Widen the extent counter fields of both inodes if necessary. */
+void
+xfs_exchmaps_upgrade_extent_counts(
+ struct xfs_trans *tp,
+ const struct xfs_exchmaps_intent *xmi)
+{
+ if (!xfs_has_large_extent_counts(tp->t_mountp))
+ return;
+
+ xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip1);
+ xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip2);
+}
+
+/*
+ * Schedule an exchange a range of mappings from one inode to another.
+ *
+ * The use of file mapping exchange log intent items ensures the operation can
+ * be resumed even if the system goes down. The caller must commit the
+ * transaction to start the work.
+ *
+ * The caller must ensure the inodes must be joined to the transaction and
+ * ILOCKd; they will still be joined to the transaction at exit.
+ */
+void
+xfs_exchange_mappings(
+ struct xfs_trans *tp,
+ const struct xfs_exchmaps_req *req)
+{
+ struct xfs_exchmaps_intent *xmi;
+
+ BUILD_BUG_ON(XFS_EXCHMAPS_INTERNAL_FLAGS & XFS_EXCHMAPS_LOGGED_FLAGS);
+
+ xfs_assert_ilocked(req->ip1, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(req->ip2, XFS_ILOCK_EXCL);
+ ASSERT(!(req->flags & ~XFS_EXCHMAPS_LOGGED_FLAGS));
+ if (req->flags & XFS_EXCHMAPS_SET_SIZES)
+ ASSERT(!(req->flags & XFS_EXCHMAPS_ATTR_FORK));
+ ASSERT(xfs_has_exchange_range(tp->t_mountp));
+
+ if (req->blockcount == 0)
+ return;
+
+ xmi = xfs_exchmaps_init_intent(req);
+ xfs_exchmaps_defer_add(tp, xmi);
+ xfs_exchmaps_ensure_reflink(tp, xmi);
+ xfs_exchmaps_upgrade_extent_counts(tp, xmi);
+}
diff --git a/fs/xfs/libxfs/xfs_exchmaps.h b/fs/xfs/libxfs/xfs_exchmaps.h
new file mode 100644
index 000000000000..fa822dff202a
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_exchmaps.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_EXCHMAPS_H__
+#define __XFS_EXCHMAPS_H__
+
+/* In-core deferred operation info about a file mapping exchange request. */
+struct xfs_exchmaps_intent {
+ /* List of other incore deferred work. */
+ struct list_head xmi_list;
+
+ /* Inodes participating in the operation. */
+ struct xfs_inode *xmi_ip1;
+ struct xfs_inode *xmi_ip2;
+
+ /* File offset range information. */
+ xfs_fileoff_t xmi_startoff1;
+ xfs_fileoff_t xmi_startoff2;
+ xfs_filblks_t xmi_blockcount;
+
+ /* Set these file sizes after the operation, unless negative. */
+ xfs_fsize_t xmi_isize1;
+ xfs_fsize_t xmi_isize2;
+
+ uint64_t xmi_flags; /* XFS_EXCHMAPS_* flags */
+};
+
+/* Try to convert inode2 from block to short format at the end, if possible. */
+#define __XFS_EXCHMAPS_INO2_SHORTFORM (1ULL << 63)
+
+#define XFS_EXCHMAPS_INTERNAL_FLAGS (__XFS_EXCHMAPS_INO2_SHORTFORM)
+
+/* flags that can be passed to xfs_exchmaps_{estimate,mappings} */
+#define XFS_EXCHMAPS_PARAMS (XFS_EXCHMAPS_ATTR_FORK | \
+ XFS_EXCHMAPS_SET_SIZES | \
+ XFS_EXCHMAPS_INO1_WRITTEN)
+
+static inline int
+xfs_exchmaps_whichfork(const struct xfs_exchmaps_intent *xmi)
+{
+ if (xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)
+ return XFS_ATTR_FORK;
+ return XFS_DATA_FORK;
+}
+
+/* Parameters for a mapping exchange request. */
+struct xfs_exchmaps_req {
+ /* Inodes participating in the operation. */
+ struct xfs_inode *ip1;
+ struct xfs_inode *ip2;
+
+ /* File offset range information. */
+ xfs_fileoff_t startoff1;
+ xfs_fileoff_t startoff2;
+ xfs_filblks_t blockcount;
+
+ /* XFS_EXCHMAPS_* operation flags */
+ uint64_t flags;
+
+ /*
+ * Fields below this line are filled out by xfs_exchmaps_estimate;
+ * callers should initialize this part of the struct to zero.
+ */
+
+ /*
+ * Data device blocks to be moved out of ip1, and free space needed to
+ * handle the bmbt changes.
+ */
+ xfs_filblks_t ip1_bcount;
+
+ /*
+ * Data device blocks to be moved out of ip2, and free space needed to
+ * handle the bmbt changes.
+ */
+ xfs_filblks_t ip2_bcount;
+
+ /* rt blocks to be moved out of ip1. */
+ xfs_filblks_t ip1_rtbcount;
+
+ /* rt blocks to be moved out of ip2. */
+ xfs_filblks_t ip2_rtbcount;
+
+ /* Free space needed to handle the bmbt changes */
+ unsigned long long resblks;
+
+ /* Number of exchanges needed to complete the operation */
+ unsigned long long nr_exchanges;
+};
+
+static inline int
+xfs_exchmaps_reqfork(const struct xfs_exchmaps_req *req)
+{
+ if (req->flags & XFS_EXCHMAPS_ATTR_FORK)
+ return XFS_ATTR_FORK;
+ return XFS_DATA_FORK;
+}
+
+int xfs_exchmaps_estimate_overhead(struct xfs_exchmaps_req *req);
+int xfs_exchmaps_estimate(struct xfs_exchmaps_req *req);
+
+extern struct kmem_cache *xfs_exchmaps_intent_cache;
+
+int __init xfs_exchmaps_intent_init_cache(void);
+void xfs_exchmaps_intent_destroy_cache(void);
+
+struct xfs_exchmaps_intent *xfs_exchmaps_init_intent(
+ const struct xfs_exchmaps_req *req);
+void xfs_exchmaps_ensure_reflink(struct xfs_trans *tp,
+ const struct xfs_exchmaps_intent *xmi);
+void xfs_exchmaps_upgrade_extent_counts(struct xfs_trans *tp,
+ const struct xfs_exchmaps_intent *xmi);
+
+int xfs_exchmaps_finish_one(struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi);
+
+int xfs_exchmaps_check_forks(struct xfs_mount *mp,
+ const struct xfs_exchmaps_req *req);
+
+void xfs_exchange_mappings(struct xfs_trans *tp,
+ const struct xfs_exchmaps_req *req);
+
+#endif /* __XFS_EXCHMAPS_H__ */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 2b2f9050fbfb..4d47a3e723aa 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -90,8 +90,7 @@ struct xfs_ifork;
#define XFSLABEL_MAX 12
/*
- * Superblock - in core version. Must match the ondisk version below.
- * Must be padded to 64 bit alignment.
+ * Superblock - in core version. Must be padded to 64 bit alignment.
*/
typedef struct xfs_sb {
uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
@@ -175,13 +174,19 @@ typedef struct xfs_sb {
xfs_lsn_t sb_lsn; /* last write sequence */
uuid_t sb_meta_uuid; /* metadata file system unique id */
+ xfs_ino_t sb_metadirino; /* metadata directory tree root */
+
+ xfs_rgnumber_t sb_rgcount; /* number of realtime groups */
+ xfs_rtxlen_t sb_rgextents; /* size of a realtime group in rtx */
+
+ uint8_t sb_rgblklog; /* rt group number shift */
+ uint8_t sb_pad[7]; /* zeroes */
+
/* must be padded to 64 bit alignment */
} xfs_sb_t;
-#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc)
-
/*
- * Superblock - on disk version. Must match the in core version above.
+ * Superblock - on disk version.
* Must be padded to 64 bit alignment.
*/
struct xfs_dsb {
@@ -262,9 +267,23 @@ struct xfs_dsb {
__be64 sb_lsn; /* last write sequence */
uuid_t sb_meta_uuid; /* metadata file system unique id */
- /* must be padded to 64 bit alignment */
+ __be64 sb_metadirino; /* metadata directory tree root */
+ __be32 sb_rgcount; /* # of realtime groups */
+ __be32 sb_rgextents; /* size of rtgroup in rtx */
+
+ __u8 sb_rgblklog; /* rt group number shift */
+ __u8 sb_pad[7]; /* zeroes */
+
+ /*
+ * The size of this structure must be padded to 64 bit alignment.
+ *
+ * NOTE: Don't forget to update secondary_sb_whack in xfs_repair when
+ * adding new fields here.
+ */
};
+#define XFS_SB_CRC_OFF offsetof(struct xfs_dsb, sb_crc)
+
/*
* Misc. Flags - warning - these will be cleared by xfs_repair unless
* a feature bit is set when the flag is used.
@@ -279,7 +298,7 @@ struct xfs_dsb {
#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
-static inline bool xfs_sb_is_v5(struct xfs_sb *sbp)
+static inline bool xfs_sb_is_v5(const struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
}
@@ -288,12 +307,12 @@ static inline bool xfs_sb_is_v5(struct xfs_sb *sbp)
* Detect a mismatched features2 field. Older kernels read/wrote
* this into the wrong slot, so to be safe we keep them in sync.
*/
-static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp)
+static inline bool xfs_sb_has_mismatched_features2(const struct xfs_sb *sbp)
{
return sbp->sb_bad_features2 != sbp->sb_features2;
}
-static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp)
+static inline bool xfs_sb_version_hasmorebits(const struct xfs_sb *sbp)
{
return xfs_sb_is_v5(sbp) ||
(sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
@@ -343,8 +362,8 @@ static inline void xfs_sb_version_addprojid32(struct xfs_sb *sbp)
#define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL
static inline bool
xfs_sb_has_compat_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_compat & feature) != 0;
}
@@ -361,31 +380,37 @@ xfs_sb_has_compat_feature(
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_ro_compat & feature) != 0;
}
-#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
-#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */
-#define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */
-#define XFS_SB_FEAT_INCOMPAT_BIGTIME (1 << 3) /* large timestamps */
-#define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4) /* needs xfs_repair */
-#define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */
+#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
+#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */
+#define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */
+#define XFS_SB_FEAT_INCOMPAT_BIGTIME (1 << 3) /* large timestamps */
+#define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4) /* needs xfs_repair */
+#define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */
+#define XFS_SB_FEAT_INCOMPAT_EXCHRANGE (1 << 6) /* exchangerange supported */
+#define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 7) /* parent pointers */
+#define XFS_SB_FEAT_INCOMPAT_METADIR (1 << 8) /* metadata dir tree */
#define XFS_SB_FEAT_INCOMPAT_ALL \
- (XFS_SB_FEAT_INCOMPAT_FTYPE| \
- XFS_SB_FEAT_INCOMPAT_SPINODES| \
- XFS_SB_FEAT_INCOMPAT_META_UUID| \
- XFS_SB_FEAT_INCOMPAT_BIGTIME| \
- XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR| \
- XFS_SB_FEAT_INCOMPAT_NREXT64)
+ (XFS_SB_FEAT_INCOMPAT_FTYPE | \
+ XFS_SB_FEAT_INCOMPAT_SPINODES | \
+ XFS_SB_FEAT_INCOMPAT_META_UUID | \
+ XFS_SB_FEAT_INCOMPAT_BIGTIME | \
+ XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR | \
+ XFS_SB_FEAT_INCOMPAT_NREXT64 | \
+ XFS_SB_FEAT_INCOMPAT_EXCHRANGE | \
+ XFS_SB_FEAT_INCOMPAT_PARENT | \
+ XFS_SB_FEAT_INCOMPAT_METADIR)
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool
xfs_sb_has_incompat_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_incompat & feature) != 0;
}
@@ -396,8 +421,8 @@ xfs_sb_has_incompat_feature(
#define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL
static inline bool
xfs_sb_has_incompat_log_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_log_incompat & feature) != 0;
}
@@ -417,7 +442,7 @@ xfs_sb_add_incompat_log_features(
sbp->sb_features_log_incompat |= features;
}
-static inline bool xfs_sb_version_haslogxattrs(struct xfs_sb *sbp)
+static inline bool xfs_sb_version_haslogxattrs(const struct xfs_sb *sbp)
{
return xfs_sb_is_v5(sbp) && (sbp->sb_features_log_incompat &
XFS_SB_FEAT_INCOMPAT_LOG_XATTRS);
@@ -691,20 +716,57 @@ struct xfs_agfl {
/*
* Realtime bitmap information is accessed by the word, which is currently
- * stored in host-endian format.
+ * stored in host-endian format. Starting with the realtime groups feature,
+ * the words are stored in be32 ondisk.
*/
union xfs_rtword_raw {
__u32 old;
+ __be32 rtg;
};
/*
* Realtime summary counts are accessed by the word, which is currently
- * stored in host-endian format.
+ * stored in host-endian format. Starting with the realtime groups feature,
+ * the words are stored in be32 ondisk.
*/
union xfs_suminfo_raw {
__u32 old;
+ __be32 rtg;
+};
+
+/*
+ * Realtime allocation groups break the rt section into multiple pieces that
+ * could be locked independently. Realtime block group numbers are 32-bit
+ * quantities. Block numbers within a group are also 32-bit quantities, but
+ * the upper bit must never be set. rtgroup 0 might have a superblock in it,
+ * so the minimum size of an rtgroup is 2 rtx.
+ */
+#define XFS_MAX_RGBLOCKS ((xfs_rgblock_t)(1U << 31) - 1)
+#define XFS_MIN_RGEXTENTS ((xfs_rtxlen_t)2)
+#define XFS_MAX_RGNUMBER ((xfs_rgnumber_t)(-1U))
+
+#define XFS_RTSB_MAGIC 0x46726F67 /* 'Frog' */
+
+/*
+ * Realtime superblock - on disk version. Must be padded to 64 bit alignment.
+ * The first block of the realtime volume contains this superblock.
+ */
+struct xfs_rtsb {
+ __be32 rsb_magicnum; /* magic number == XFS_RTSB_MAGIC */
+ __le32 rsb_crc; /* superblock crc */
+
+ __be32 rsb_pad; /* zero */
+ unsigned char rsb_fname[XFSLABEL_MAX]; /* file system name */
+
+ uuid_t rsb_uuid; /* user-visible file system unique id */
+ uuid_t rsb_meta_uuid; /* metadata file system unique id */
+
+ /* must be padded to 64 bit alignment */
};
+#define XFS_RTSB_CRC_OFF offsetof(struct xfs_rtsb, rsb_crc)
+#define XFS_RTSB_DADDR ((xfs_daddr_t)0) /* daddr in rt section */
+
/*
* XFS Timestamps
* ==============
@@ -787,6 +849,27 @@ static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds)
return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET;
}
+enum xfs_metafile_type {
+ XFS_METAFILE_UNKNOWN, /* unknown */
+ XFS_METAFILE_DIR, /* metadir directory */
+ XFS_METAFILE_USRQUOTA, /* user quota */
+ XFS_METAFILE_GRPQUOTA, /* group quota */
+ XFS_METAFILE_PRJQUOTA, /* project quota */
+ XFS_METAFILE_RTBITMAP, /* rt bitmap */
+ XFS_METAFILE_RTSUMMARY, /* rt summary */
+
+ XFS_METAFILE_MAX
+} __packed;
+
+#define XFS_METAFILE_TYPE_STR \
+ { XFS_METAFILE_UNKNOWN, "unknown" }, \
+ { XFS_METAFILE_DIR, "dir" }, \
+ { XFS_METAFILE_USRQUOTA, "usrquota" }, \
+ { XFS_METAFILE_GRPQUOTA, "grpquota" }, \
+ { XFS_METAFILE_PRJQUOTA, "prjquota" }, \
+ { XFS_METAFILE_RTBITMAP, "rtbitmap" }, \
+ { XFS_METAFILE_RTSUMMARY, "rtsummary" }
+
/*
* On-disk inode structure.
*
@@ -809,7 +892,7 @@ struct xfs_dinode {
__be16 di_mode; /* mode and type of file */
__u8 di_version; /* inode version */
__u8 di_format; /* format of di_c data */
- __be16 di_onlink; /* old number of links to file */
+ __be16 di_metatype; /* XFS_METAFILE_*; was di_onlink */
__be32 di_uid; /* owner's user id */
__be32 di_gid; /* owner's group id */
__be32 di_nlink; /* number of links to file */
@@ -898,6 +981,12 @@ static inline uint xfs_dinode_size(int version)
#define XFS_MAXLINK ((1U << 31) - 1U)
/*
+ * Any file that hits the maximum ondisk link count should be pinned to avoid
+ * a use-after-free situation.
+ */
+#define XFS_NLINK_PINNED (~0U)
+
+/*
* Values for di_format
*
* This enum is used in string mapping in xfs_trace.h; please keep the
@@ -1079,21 +1168,60 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
* Values for di_flags2 These start by being exposed to userspace in the upper
* 16 bits of the XFS_XFLAG_s range.
*/
-#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */
-#define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */
-#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */
-#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */
-#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */
+/* use DAX for this inode */
+#define XFS_DIFLAG2_DAX_BIT 0
+
+/* file's blocks may be shared */
+#define XFS_DIFLAG2_REFLINK_BIT 1
+
+/* copy on write extent size hint */
+#define XFS_DIFLAG2_COWEXTSIZE_BIT 2
+
+/* big timestamps */
+#define XFS_DIFLAG2_BIGTIME_BIT 3
+
+/* large extent counters */
+#define XFS_DIFLAG2_NREXT64_BIT 4
+
+/*
+ * The inode contains filesystem metadata and can be found through the metadata
+ * directory tree. Metadata inodes must satisfy the following constraints:
+ *
+ * - V5 filesystem (and ftype) are enabled;
+ * - The only valid modes are regular files and directories;
+ * - The access bits must be zero;
+ * - DMAPI event and state masks are zero;
+ * - The user and group IDs must be zero;
+ * - The project ID can be used as a u32 annotation;
+ * - The immutable, sync, noatime, nodump, nodefrag flags must be set.
+ * - The dax flag must not be set.
+ * - Directories must have nosymlinks set.
+ *
+ * These requirements are chosen defensively to minimize the ability of
+ * userspace to read or modify the contents, should a metadata file ever
+ * escape to userspace.
+ *
+ * There are further constraints on the directory tree itself:
+ *
+ * - Metadata inodes must never be resolvable through the root directory;
+ * - They must never be accessed by userspace;
+ * - Metadata directory entries must have correct ftype.
+ *
+ * Superblock-rooted metadata files must have the METADATA iflag set even
+ * though they do not have a parent directory.
+ */
+#define XFS_DIFLAG2_METADATA_BIT 5
-#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
-#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT)
-#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
-#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT)
-#define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT)
+#define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT)
+#define XFS_DIFLAG2_REFLINK (1ULL << XFS_DIFLAG2_REFLINK_BIT)
+#define XFS_DIFLAG2_COWEXTSIZE (1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT)
+#define XFS_DIFLAG2_BIGTIME (1ULL << XFS_DIFLAG2_BIGTIME_BIT)
+#define XFS_DIFLAG2_NREXT64 (1ULL << XFS_DIFLAG2_NREXT64_BIT)
+#define XFS_DIFLAG2_METADATA (1ULL << XFS_DIFLAG2_METADATA_BIT)
#define XFS_DIFLAG2_ANY \
(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
- XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64)
+ XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA)
static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
{
@@ -1108,6 +1236,12 @@ static inline bool xfs_dinode_has_large_extent_counts(
(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_NREXT64));
}
+static inline bool xfs_dinode_is_metadir(const struct xfs_dinode *dip)
+{
+ return dip->di_version >= 3 &&
+ (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA));
+}
+
/*
* Inode number format:
* low inopblog bits - offset in block
@@ -1156,6 +1290,24 @@ static inline bool xfs_dinode_has_large_extent_counts(
#define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4kB */
/*
+ * RT bit manipulation macros.
+ */
+#define XFS_RTBITMAP_MAGIC 0x424D505A /* BMPZ */
+#define XFS_RTSUMMARY_MAGIC 0x53554D59 /* SUMY */
+
+struct xfs_rtbuf_blkinfo {
+ __be32 rt_magic; /* validity check on block */
+ __be32 rt_crc; /* CRC of block */
+ __be64 rt_owner; /* inode that owns the block */
+ __be64 rt_blkno; /* first block of the buffer */
+ __be64 rt_lsn; /* sequence number of last write */
+ uuid_t rt_uuid; /* filesystem we belong to */
+};
+
+#define XFS_RTBUF_CRC_OFF \
+ offsetof(struct xfs_rtbuf_blkinfo, rt_crc)
+
+/*
* Dquot and dquot block format definitions
*/
#define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index ca1b17d01437..41ce4d3d650e 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -8,6 +8,7 @@
/*
* SGI's XFS filesystem's major stuff (constants, structures)
+ * NOTE: This file must be compile-able with C++ compilers.
*/
/*
@@ -186,7 +187,9 @@ struct xfs_fsop_geom {
__u32 logsunit; /* log stripe unit, bytes */
uint32_t sick; /* o: unhealthy fs & rt metadata */
uint32_t checked; /* o: checked fs & rt metadata */
- __u64 reserved[17]; /* reserved space */
+ __u32 rgextents; /* rt extents in a realtime group */
+ __u32 rgcount; /* number of realtime groups */
+ __u64 reserved[16]; /* reserved space */
};
#define XFS_FSOP_GEOM_SICK_COUNTERS (1 << 0) /* summary counters */
@@ -197,6 +200,8 @@ struct xfs_fsop_geom {
#define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */
#define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */
#define XFS_FSOP_GEOM_SICK_NLINKS (1 << 7) /* inode link counts */
+#define XFS_FSOP_GEOM_SICK_METADIR (1 << 8) /* metadata directory */
+#define XFS_FSOP_GEOM_SICK_METAPATH (1 << 9) /* metadir tree path */
/* Output for XFS_FS_COUNTS */
typedef struct xfs_fsop_counts {
@@ -239,6 +244,9 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_BIGTIME (1 << 21) /* 64-bit nsec timestamps */
#define XFS_FSOP_GEOM_FLAGS_INOBTCNT (1 << 22) /* inobt btree counter */
#define XFS_FSOP_GEOM_FLAGS_NREXT64 (1 << 23) /* large extent counters */
+#define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24) /* exchange range */
+#define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */
+#define XFS_FSOP_GEOM_FLAGS_METADIR (1 << 26) /* metadata directories */
/*
* Minimum and maximum sizes need for growth checks.
@@ -409,6 +417,7 @@ struct xfs_bulkstat {
#define XFS_BS_SICK_XATTR (1 << 5) /* extended attributes */
#define XFS_BS_SICK_SYMLINK (1 << 6) /* symbolic link remote target */
#define XFS_BS_SICK_PARENT (1 << 7) /* parent pointers */
+#define XFS_BS_SICK_DIRTREE (1 << 8) /* directory tree structure */
/*
* Project quota id helpers (previously projid was 16bit only
@@ -485,9 +494,17 @@ struct xfs_bulk_ireq {
*/
#define XFS_BULK_IREQ_NREXT64 (1U << 2)
+/*
+ * Allow bulkstat to return information about metadata directories. This
+ * enables xfs_scrub to find them for scanning, as they are otherwise ordinary
+ * directories.
+ */
+#define XFS_BULK_IREQ_METADIR (1U << 3)
+
#define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \
XFS_BULK_IREQ_SPECIAL | \
- XFS_BULK_IREQ_NREXT64)
+ XFS_BULK_IREQ_NREXT64 | \
+ XFS_BULK_IREQ_METADIR)
/* Operate on the root directory inode. */
#define XFS_BULK_IREQ_SPECIAL_ROOT (1)
@@ -632,7 +649,9 @@ typedef struct xfs_fsop_attrmulti_handlereq {
/*
* per machine unique filesystem identifier types.
*/
-typedef struct { __u32 val[2]; } xfs_fsid_t; /* file system id type */
+typedef struct xfs_fsid {
+ __u32 val[2]; /* file system id type */
+} xfs_fsid_t;
typedef struct xfs_fid {
__u16 fid_len; /* length of remainder */
@@ -715,9 +734,21 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_QUOTACHECK 25 /* quota counters */
#define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */
#define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */
+#define XFS_SCRUB_TYPE_DIRTREE 28 /* directory tree structure */
+#define XFS_SCRUB_TYPE_METAPATH 29 /* metadata directory tree paths */
+#define XFS_SCRUB_TYPE_RGSUPER 30 /* realtime superblock */
/* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR 28
+#define XFS_SCRUB_TYPE_NR 31
+
+/*
+ * This special type code only applies to the vectored scrub implementation.
+ *
+ * If any of the previous scrub vectors recorded runtime errors or have
+ * sv_flags bits set that match the OFLAG bits in the barrier vector's
+ * sv_flags, set the barrier's sv_ret to -ECANCELED and return to userspace.
+ */
+#define XFS_SCRUB_TYPE_BARRIER (0xFFFFFFFF)
/* i: Repair this metadata. */
#define XFS_SCRUB_IFLAG_REPAIR (1u << 0)
@@ -763,6 +794,45 @@ struct xfs_scrub_metadata {
XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED)
#define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
+/* Vectored scrub calls to reduce the number of kernel transitions. */
+
+struct xfs_scrub_vec {
+ __u32 sv_type; /* XFS_SCRUB_TYPE_* */
+ __u32 sv_flags; /* XFS_SCRUB_FLAGS_* */
+ __s32 sv_ret; /* 0 or a negative error code */
+ __u32 sv_reserved; /* must be zero */
+};
+
+/* Vectored metadata scrub control structure. */
+struct xfs_scrub_vec_head {
+ __u64 svh_ino; /* inode number. */
+ __u32 svh_gen; /* inode generation. */
+ __u32 svh_agno; /* ag number. */
+ __u32 svh_flags; /* XFS_SCRUB_VEC_FLAGS_* */
+ __u16 svh_rest_us; /* wait this much time between vector items */
+ __u16 svh_nr; /* number of svh_vectors */
+ __u64 svh_reserved; /* must be zero */
+ __u64 svh_vectors; /* pointer to buffer of xfs_scrub_vec */
+};
+
+#define XFS_SCRUB_VEC_FLAGS_ALL (0)
+
+/*
+ * i: sm_ino values for XFS_SCRUB_TYPE_METAPATH to select a metadata file for
+ * path checking.
+ */
+#define XFS_SCRUB_METAPATH_PROBE (0) /* do we have a metapath scrubber? */
+#define XFS_SCRUB_METAPATH_RTDIR (1) /* rtrgroups metadir */
+#define XFS_SCRUB_METAPATH_RTBITMAP (2) /* per-rtg bitmap */
+#define XFS_SCRUB_METAPATH_RTSUMMARY (3) /* per-rtg summary */
+#define XFS_SCRUB_METAPATH_QUOTADIR (4) /* quota metadir */
+#define XFS_SCRUB_METAPATH_USRQUOTA (5) /* user quota */
+#define XFS_SCRUB_METAPATH_GRPQUOTA (6) /* group quota */
+#define XFS_SCRUB_METAPATH_PRJQUOTA (7) /* project quota */
+
+/* Number of metapath sm_ino values */
+#define XFS_SCRUB_METAPATH_NR (8)
+
/*
* ioctl limits
*/
@@ -772,6 +842,157 @@ struct xfs_scrub_metadata {
# define XFS_XATTR_LIST_MAX 65536
#endif
+/*
+ * Exchange part of file1 with part of the file that this ioctl that is being
+ * called against (which we'll call file2). Filesystems must be able to
+ * restart and complete the operation even after the system goes down.
+ */
+struct xfs_exchange_range {
+ __s32 file1_fd;
+ __u32 pad; /* must be zeroes */
+ __u64 file1_offset; /* file1 offset, bytes */
+ __u64 file2_offset; /* file2 offset, bytes */
+ __u64 length; /* bytes to exchange */
+
+ __u64 flags; /* see XFS_EXCHANGE_RANGE_* below */
+};
+
+/*
+ * Using the same definition of file2 as struct xfs_exchange_range, commit the
+ * contents of file1 into file2 if file2 has the same inode number, mtime, and
+ * ctime as the arguments provided to the call. The old contents of file2 will
+ * be moved to file1.
+ *
+ * Returns -EBUSY if there isn't an exact match for the file2 fields.
+ *
+ * Filesystems must be able to restart and complete the operation even after
+ * the system goes down.
+ */
+struct xfs_commit_range {
+ __s32 file1_fd;
+ __u32 pad; /* must be zeroes */
+ __u64 file1_offset; /* file1 offset, bytes */
+ __u64 file2_offset; /* file2 offset, bytes */
+ __u64 length; /* bytes to exchange */
+
+ __u64 flags; /* see XFS_EXCHANGE_RANGE_* below */
+
+ /* opaque file2 metadata for freshness checks */
+ __u64 file2_freshness[6];
+};
+
+/*
+ * Exchange file data all the way to the ends of both files, and then exchange
+ * the file sizes. This flag can be used to replace a file's contents with a
+ * different amount of data. length will be ignored.
+ */
+#define XFS_EXCHANGE_RANGE_TO_EOF (1ULL << 0)
+
+/* Flush all changes in file data and file metadata to disk before returning. */
+#define XFS_EXCHANGE_RANGE_DSYNC (1ULL << 1)
+
+/* Dry run; do all the parameter verification but do not change anything. */
+#define XFS_EXCHANGE_RANGE_DRY_RUN (1ULL << 2)
+
+/*
+ * Exchange only the parts of the two files where the file allocation units
+ * mapped to file1's range have been written to. This can accelerate
+ * scatter-gather atomic writes with a temp file if all writes are aligned to
+ * the file allocation unit.
+ */
+#define XFS_EXCHANGE_RANGE_FILE1_WRITTEN (1ULL << 3)
+
+#define XFS_EXCHANGE_RANGE_ALL_FLAGS (XFS_EXCHANGE_RANGE_TO_EOF | \
+ XFS_EXCHANGE_RANGE_DSYNC | \
+ XFS_EXCHANGE_RANGE_DRY_RUN | \
+ XFS_EXCHANGE_RANGE_FILE1_WRITTEN)
+
+/* Iterating parent pointers of files. */
+
+/* target was the root directory */
+#define XFS_GETPARENTS_OFLAG_ROOT (1U << 0)
+
+/* Cursor is done iterating pptrs */
+#define XFS_GETPARENTS_OFLAG_DONE (1U << 1)
+
+#define XFS_GETPARENTS_OFLAGS_ALL (XFS_GETPARENTS_OFLAG_ROOT | \
+ XFS_GETPARENTS_OFLAG_DONE)
+
+#define XFS_GETPARENTS_IFLAGS_ALL (0)
+
+struct xfs_getparents_rec {
+ struct xfs_handle gpr_parent; /* Handle to parent */
+ __u32 gpr_reclen; /* Length of entire record */
+ __u32 gpr_reserved; /* zero */
+ char gpr_name[]; /* Null-terminated filename */
+};
+
+/* Iterate through this file's directory parent pointers */
+struct xfs_getparents {
+ /*
+ * Structure to track progress in iterating the parent pointers.
+ * Must be initialized to zeroes before the first ioctl call, and
+ * not touched by callers after that.
+ */
+ struct xfs_attrlist_cursor gp_cursor;
+
+ /* Input flags: XFS_GETPARENTS_IFLAG* */
+ __u16 gp_iflags;
+
+ /* Output flags: XFS_GETPARENTS_OFLAG* */
+ __u16 gp_oflags;
+
+ /* Size of the gp_buffer in bytes */
+ __u32 gp_bufsize;
+
+ /* Must be set to zero */
+ __u64 gp_reserved;
+
+ /* Pointer to a buffer in which to place xfs_getparents_rec */
+ __u64 gp_buffer;
+};
+
+static inline struct xfs_getparents_rec *
+xfs_getparents_first_rec(struct xfs_getparents *gp)
+{
+ return (struct xfs_getparents_rec *)(uintptr_t)gp->gp_buffer;
+}
+
+static inline struct xfs_getparents_rec *
+xfs_getparents_next_rec(struct xfs_getparents *gp,
+ struct xfs_getparents_rec *gpr)
+{
+ void *next = ((char *)gpr + gpr->gpr_reclen);
+ void *end = (void *)(uintptr_t)(gp->gp_buffer + gp->gp_bufsize);
+
+ if (next >= end)
+ return NULL;
+
+ return (struct xfs_getparents_rec *)next;
+}
+
+/* Iterate through this file handle's directory parent pointers. */
+struct xfs_getparents_by_handle {
+ /* Handle to file whose parents we want. */
+ struct xfs_handle gph_handle;
+
+ struct xfs_getparents gph_request;
+};
+
+/*
+ * Output for XFS_IOC_RTGROUP_GEOMETRY
+ */
+struct xfs_rtgroup_geometry {
+ __u32 rg_number; /* i/o: rtgroup number */
+ __u32 rg_length; /* o: length in blocks */
+ __u32 rg_sick; /* o: sick things in ag */
+ __u32 rg_checked; /* o: checked metadata in ag */
+ __u32 rg_flags; /* i/o: flags for this ag */
+ __u32 rg_reserved[27]; /* o: zero */
+};
+#define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */
+#define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */
+#define XFS_RTGROUP_GEOM_SICK_SUMMARY (1U << 2) /* rtsummary */
/*
* ioctl commands that are used by Linux filesystems
@@ -808,6 +1029,10 @@ struct xfs_scrub_metadata {
/* XFS_IOC_GETFSMAP ------ hoisted 59 */
#define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata)
#define XFS_IOC_AG_GEOMETRY _IOWR('X', 61, struct xfs_ag_geometry)
+#define XFS_IOC_GETPARENTS _IOWR('X', 62, struct xfs_getparents)
+#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
+#define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head)
+#define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry)
/*
* ioctl commands that replace IRIX syssgi()'s
@@ -843,6 +1068,9 @@ struct xfs_scrub_metadata {
#define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom)
#define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req)
#define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req)
+#define XFS_IOC_EXCHANGE_RANGE _IOW ('X', 129, struct xfs_exchange_range)
+#define XFS_IOC_START_COMMIT _IOR ('X', 130, struct xfs_commit_range)
+#define XFS_IOC_COMMIT_RANGE _IOW ('X', 131, struct xfs_commit_range)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
diff --git a/fs/xfs/libxfs/xfs_group.c b/fs/xfs/libxfs/xfs_group.c
new file mode 100644
index 000000000000..e9d76bcdc820
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_group.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Red Hat, Inc.
+ */
+
+#include "xfs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_extent_busy.h"
+#include "xfs_group.h"
+
+/*
+ * Groups can have passive and active references.
+ *
+ * For passive references the code freeing a group is responsible for cleaning
+ * up objects that hold the passive references (e.g. cached buffers).
+ * Routines manipulating passive references are xfs_group_get, xfs_group_hold
+ * and xfs_group_put.
+ *
+ * Active references are for short term access to the group for walking trees or
+ * accessing state. If a group is being shrunk or offlined, the lookup will fail
+ * to find that group and return NULL instead.
+ * Routines manipulating active references are xfs_group_grab and
+ * xfs_group_rele.
+ */
+
+struct xfs_group *
+xfs_group_get(
+ struct xfs_mount *mp,
+ uint32_t index,
+ enum xfs_group_type type)
+{
+ struct xfs_group *xg;
+
+ rcu_read_lock();
+ xg = xa_load(&mp->m_groups[type].xa, index);
+ if (xg) {
+ trace_xfs_group_get(xg, _RET_IP_);
+ ASSERT(atomic_read(&xg->xg_ref) >= 0);
+ atomic_inc(&xg->xg_ref);
+ }
+ rcu_read_unlock();
+ return xg;
+}
+
+struct xfs_group *
+xfs_group_hold(
+ struct xfs_group *xg)
+{
+ ASSERT(atomic_read(&xg->xg_ref) > 0 ||
+ atomic_read(&xg->xg_active_ref) > 0);
+
+ trace_xfs_group_hold(xg, _RET_IP_);
+ atomic_inc(&xg->xg_ref);
+ return xg;
+}
+
+void
+xfs_group_put(
+ struct xfs_group *xg)
+{
+ trace_xfs_group_put(xg, _RET_IP_);
+
+ ASSERT(atomic_read(&xg->xg_ref) > 0);
+ atomic_dec(&xg->xg_ref);
+}
+
+struct xfs_group *
+xfs_group_grab(
+ struct xfs_mount *mp,
+ uint32_t index,
+ enum xfs_group_type type)
+{
+ struct xfs_group *xg;
+
+ rcu_read_lock();
+ xg = xa_load(&mp->m_groups[type].xa, index);
+ if (xg) {
+ trace_xfs_group_grab(xg, _RET_IP_);
+ if (!atomic_inc_not_zero(&xg->xg_active_ref))
+ xg = NULL;
+ }
+ rcu_read_unlock();
+ return xg;
+}
+
+/*
+ * Iterate to the next group. To start the iteration at @start_index, a %NULL
+ * @xg is passed, else the previous group returned from this function. The
+ * caller should break out of the loop when this returns %NULL. If the caller
+ * wants to break out of a loop that did not finish it needs to release the
+ * active reference to @xg using xfs_group_rele() itself.
+ */
+struct xfs_group *
+xfs_group_next_range(
+ struct xfs_mount *mp,
+ struct xfs_group *xg,
+ uint32_t start_index,
+ uint32_t end_index,
+ enum xfs_group_type type)
+{
+ uint32_t index = start_index;
+
+ if (xg) {
+ index = xg->xg_gno + 1;
+ xfs_group_rele(xg);
+ }
+ if (index > end_index)
+ return NULL;
+ return xfs_group_grab(mp, index, type);
+}
+
+/*
+ * Find the next group after @xg, or the first group if @xg is NULL.
+ */
+struct xfs_group *
+xfs_group_grab_next_mark(
+ struct xfs_mount *mp,
+ struct xfs_group *xg,
+ xa_mark_t mark,
+ enum xfs_group_type type)
+{
+ unsigned long index = 0;
+
+ if (xg) {
+ index = xg->xg_gno + 1;
+ xfs_group_rele(xg);
+ }
+
+ rcu_read_lock();
+ xg = xa_find(&mp->m_groups[type].xa, &index, ULONG_MAX, mark);
+ if (xg) {
+ trace_xfs_group_grab_next_tag(xg, _RET_IP_);
+ if (!atomic_inc_not_zero(&xg->xg_active_ref))
+ xg = NULL;
+ }
+ rcu_read_unlock();
+ return xg;
+}
+
+void
+xfs_group_rele(
+ struct xfs_group *xg)
+{
+ trace_xfs_group_rele(xg, _RET_IP_);
+ atomic_dec(&xg->xg_active_ref);
+}
+
+void
+xfs_group_free(
+ struct xfs_mount *mp,
+ uint32_t index,
+ enum xfs_group_type type,
+ void (*uninit)(struct xfs_group *xg))
+{
+ struct xfs_group *xg = xa_erase(&mp->m_groups[type].xa, index);
+
+ XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_ref) != 0);
+
+ xfs_defer_drain_free(&xg->xg_intents_drain);
+#ifdef __KERNEL__
+ kfree(xg->xg_busy_extents);
+#endif
+
+ if (uninit)
+ uninit(xg);
+
+ /* drop the mount's active reference */
+ xfs_group_rele(xg);
+ XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_active_ref) != 0);
+ kfree_rcu_mightsleep(xg);
+}
+
+int
+xfs_group_insert(
+ struct xfs_mount *mp,
+ struct xfs_group *xg,
+ uint32_t index,
+ enum xfs_group_type type)
+{
+ int error;
+
+ xg->xg_mount = mp;
+ xg->xg_gno = index;
+ xg->xg_type = type;
+
+#ifdef __KERNEL__
+ xg->xg_busy_extents = xfs_extent_busy_alloc();
+ if (!xg->xg_busy_extents)
+ return -ENOMEM;
+ spin_lock_init(&xg->xg_state_lock);
+ xfs_hooks_init(&xg->xg_rmap_update_hooks);
+#endif
+ xfs_defer_drain_init(&xg->xg_intents_drain);
+
+ /* Active ref owned by mount indicates group is online. */
+ atomic_set(&xg->xg_active_ref, 1);
+
+ error = xa_insert(&mp->m_groups[type].xa, index, xg, GFP_KERNEL);
+ if (error) {
+ WARN_ON_ONCE(error == -EBUSY);
+ goto out_drain;
+ }
+
+ return 0;
+out_drain:
+ xfs_defer_drain_free(&xg->xg_intents_drain);
+#ifdef __KERNEL__
+ kfree(xg->xg_busy_extents);
+#endif
+ return error;
+}
+
+struct xfs_group *
+xfs_group_get_by_fsb(
+ struct xfs_mount *mp,
+ xfs_fsblock_t fsbno,
+ enum xfs_group_type type)
+{
+ return xfs_group_get(mp, xfs_fsb_to_gno(mp, fsbno, type), type);
+}
diff --git a/fs/xfs/libxfs/xfs_group.h b/fs/xfs/libxfs/xfs_group.h
new file mode 100644
index 000000000000..242b05627c7a
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_group.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 Red Hat, Inc.
+ */
+#ifndef __LIBXFS_GROUP_H
+#define __LIBXFS_GROUP_H 1
+
+struct xfs_group {
+ struct xfs_mount *xg_mount;
+ uint32_t xg_gno;
+ enum xfs_group_type xg_type;
+ atomic_t xg_ref; /* passive reference count */
+ atomic_t xg_active_ref; /* active reference count */
+
+ /* Precalculated geometry info */
+ uint32_t xg_block_count; /* max usable gbno */
+ uint32_t xg_min_gbno; /* min usable gbno */
+
+#ifdef __KERNEL__
+ /* -- kernel only structures below this line -- */
+
+ /*
+ * Track freed but not yet committed extents.
+ */
+ struct xfs_extent_busy_tree *xg_busy_extents;
+
+ /*
+ * Bitsets of per-ag metadata that have been checked and/or are sick.
+ * Callers should hold xg_state_lock before accessing this field.
+ */
+ uint16_t xg_checked;
+ uint16_t xg_sick;
+ spinlock_t xg_state_lock;
+
+ /*
+ * We use xfs_drain to track the number of deferred log intent items
+ * that have been queued (but not yet processed) so that waiters (e.g.
+ * scrub) will not lock resources when other threads are in the middle
+ * of processing a chain of intent items only to find momentary
+ * inconsistencies.
+ */
+ struct xfs_defer_drain xg_intents_drain;
+
+ /*
+ * Hook to feed rmapbt updates to an active online repair.
+ */
+ struct xfs_hooks xg_rmap_update_hooks;
+#endif /* __KERNEL__ */
+};
+
+struct xfs_group *xfs_group_get(struct xfs_mount *mp, uint32_t index,
+ enum xfs_group_type type);
+struct xfs_group *xfs_group_get_by_fsb(struct xfs_mount *mp,
+ xfs_fsblock_t fsbno, enum xfs_group_type type);
+struct xfs_group *xfs_group_hold(struct xfs_group *xg);
+void xfs_group_put(struct xfs_group *xg);
+
+struct xfs_group *xfs_group_grab(struct xfs_mount *mp, uint32_t index,
+ enum xfs_group_type type);
+struct xfs_group *xfs_group_next_range(struct xfs_mount *mp,
+ struct xfs_group *xg, uint32_t start_index, uint32_t end_index,
+ enum xfs_group_type type);
+struct xfs_group *xfs_group_grab_next_mark(struct xfs_mount *mp,
+ struct xfs_group *xg, xa_mark_t mark, enum xfs_group_type type);
+void xfs_group_rele(struct xfs_group *xg);
+
+void xfs_group_free(struct xfs_mount *mp, uint32_t index,
+ enum xfs_group_type type, void (*uninit)(struct xfs_group *xg));
+int xfs_group_insert(struct xfs_mount *mp, struct xfs_group *xg,
+ uint32_t index, enum xfs_group_type);
+
+#define xfs_group_set_mark(_xg, _mark) \
+ xa_set_mark(&(_xg)->xg_mount->m_groups[(_xg)->xg_type].xa, \
+ (_xg)->xg_gno, (_mark))
+#define xfs_group_clear_mark(_xg, _mark) \
+ xa_clear_mark(&(_xg)->xg_mount->m_groups[(_xg)->xg_type].xa, \
+ (_xg)->xg_gno, (_mark))
+#define xfs_group_marked(_mp, _type, _mark) \
+ xa_marked(&(_mp)->m_groups[(_type)].xa, (_mark))
+
+static inline xfs_agblock_t
+xfs_group_max_blocks(
+ struct xfs_group *xg)
+{
+ return xg->xg_mount->m_groups[xg->xg_type].blocks;
+}
+
+static inline xfs_fsblock_t
+xfs_group_start_fsb(
+ struct xfs_group *xg)
+{
+ return ((xfs_fsblock_t)xg->xg_gno) <<
+ xg->xg_mount->m_groups[xg->xg_type].blklog;
+}
+
+static inline xfs_fsblock_t
+xfs_gbno_to_fsb(
+ struct xfs_group *xg,
+ xfs_agblock_t gbno)
+{
+ return xfs_group_start_fsb(xg) | gbno;
+}
+
+static inline xfs_daddr_t
+xfs_gbno_to_daddr(
+ struct xfs_group *xg,
+ xfs_agblock_t gbno)
+{
+ struct xfs_mount *mp = xg->xg_mount;
+ uint32_t blocks = mp->m_groups[xg->xg_type].blocks;
+
+ return XFS_FSB_TO_BB(mp, (xfs_fsblock_t)xg->xg_gno * blocks + gbno);
+}
+
+static inline uint32_t
+xfs_fsb_to_gno(
+ struct xfs_mount *mp,
+ xfs_fsblock_t fsbno,
+ enum xfs_group_type type)
+{
+ if (!mp->m_groups[type].blklog)
+ return 0;
+ return fsbno >> mp->m_groups[type].blklog;
+}
+
+static inline xfs_agblock_t
+xfs_fsb_to_gbno(
+ struct xfs_mount *mp,
+ xfs_fsblock_t fsbno,
+ enum xfs_group_type type)
+{
+ return fsbno & mp->m_groups[type].blkmask;
+}
+
+static inline bool
+xfs_verify_gbno(
+ struct xfs_group *xg,
+ uint32_t gbno)
+{
+ if (gbno >= xg->xg_block_count)
+ return false;
+ if (gbno < xg->xg_min_gbno)
+ return false;
+ return true;
+}
+
+static inline bool
+xfs_verify_gbext(
+ struct xfs_group *xg,
+ uint32_t gbno,
+ uint32_t glen)
+{
+ uint32_t end;
+
+ if (!xfs_verify_gbno(xg, gbno))
+ return false;
+ if (glen == 0 || check_add_overflow(gbno, glen - 1, &end))
+ return false;
+ if (!xfs_verify_gbno(xg, end))
+ return false;
+ return true;
+}
+
+#endif /* __LIBXFS_GROUP_H */
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index 3c64b5f9bd68..d34986ac18c3 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -6,6 +6,8 @@
#ifndef __XFS_HEALTH_H__
#define __XFS_HEALTH_H__
+struct xfs_group;
+
/*
* In-Core Filesystem Health Assessments
* =====================================
@@ -52,6 +54,7 @@ struct xfs_inode;
struct xfs_fsop_geom;
struct xfs_btree_cur;
struct xfs_da_args;
+struct xfs_rtgroup;
/* Observable health issues for metadata spanning the entire filesystem. */
#define XFS_SICK_FS_COUNTERS (1 << 0) /* summary counters */
@@ -60,10 +63,13 @@ struct xfs_da_args;
#define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */
#define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */
#define XFS_SICK_FS_NLINKS (1 << 5) /* inode link counts */
+#define XFS_SICK_FS_METADIR (1 << 6) /* metadata directory tree */
+#define XFS_SICK_FS_METAPATH (1 << 7) /* metadata directory tree path */
-/* Observable health issues for realtime volume metadata. */
-#define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */
-#define XFS_SICK_RT_SUMMARY (1 << 1) /* realtime summary */
+/* Observable health issues for realtime group metadata. */
+#define XFS_SICK_RG_SUPER (1 << 0) /* rt group superblock */
+#define XFS_SICK_RG_BITMAP (1 << 1) /* rt group bitmap */
+#define XFS_SICK_RG_SUMMARY (1 << 2) /* rt groups summary */
/* Observable health issues for AG metadata. */
#define XFS_SICK_AG_SB (1 << 0) /* superblock */
@@ -95,6 +101,7 @@ struct xfs_da_args;
/* Don't propagate sick status to ag health summary during inactivation */
#define XFS_SICK_INO_FORGET (1 << 12)
+#define XFS_SICK_INO_DIRTREE (1 << 13) /* directory tree structure */
/* Primary evidence of health problems in a given group. */
#define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \
@@ -102,10 +109,13 @@ struct xfs_da_args;
XFS_SICK_FS_GQUOTA | \
XFS_SICK_FS_PQUOTA | \
XFS_SICK_FS_QUOTACHECK | \
- XFS_SICK_FS_NLINKS)
+ XFS_SICK_FS_NLINKS | \
+ XFS_SICK_FS_METADIR | \
+ XFS_SICK_FS_METAPATH)
-#define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \
- XFS_SICK_RT_SUMMARY)
+#define XFS_SICK_RG_PRIMARY (XFS_SICK_RG_SUPER | \
+ XFS_SICK_RG_BITMAP | \
+ XFS_SICK_RG_SUMMARY)
#define XFS_SICK_AG_PRIMARY (XFS_SICK_AG_SB | \
XFS_SICK_AG_AGF | \
@@ -125,7 +135,8 @@ struct xfs_da_args;
XFS_SICK_INO_DIR | \
XFS_SICK_INO_XATTR | \
XFS_SICK_INO_SYMLINK | \
- XFS_SICK_INO_PARENT)
+ XFS_SICK_INO_PARENT | \
+ XFS_SICK_INO_DIRTREE)
#define XFS_SICK_INO_ZAPPED (XFS_SICK_INO_BMBTD_ZAPPED | \
XFS_SICK_INO_BMBTA_ZAPPED | \
@@ -134,26 +145,26 @@ struct xfs_da_args;
/* Secondary state related to (but not primary evidence of) health problems. */
#define XFS_SICK_FS_SECONDARY (0)
-#define XFS_SICK_RT_SECONDARY (0)
+#define XFS_SICK_RG_SECONDARY (0)
#define XFS_SICK_AG_SECONDARY (0)
#define XFS_SICK_INO_SECONDARY (XFS_SICK_INO_FORGET)
/* Evidence of health problems elsewhere. */
#define XFS_SICK_FS_INDIRECT (0)
-#define XFS_SICK_RT_INDIRECT (0)
+#define XFS_SICK_RG_INDIRECT (0)
#define XFS_SICK_AG_INDIRECT (XFS_SICK_AG_INODES)
#define XFS_SICK_INO_INDIRECT (0)
/* All health masks. */
-#define XFS_SICK_FS_ALL (XFS_SICK_FS_PRIMARY | \
+#define XFS_SICK_FS_ALL (XFS_SICK_FS_PRIMARY | \
XFS_SICK_FS_SECONDARY | \
XFS_SICK_FS_INDIRECT)
-#define XFS_SICK_RT_ALL (XFS_SICK_RT_PRIMARY | \
- XFS_SICK_RT_SECONDARY | \
- XFS_SICK_RT_INDIRECT)
+#define XFS_SICK_RG_ALL (XFS_SICK_RG_PRIMARY | \
+ XFS_SICK_RG_SECONDARY | \
+ XFS_SICK_RG_INDIRECT)
-#define XFS_SICK_AG_ALL (XFS_SICK_AG_PRIMARY | \
+#define XFS_SICK_AG_ALL (XFS_SICK_AG_PRIMARY | \
XFS_SICK_AG_SECONDARY | \
XFS_SICK_AG_INDIRECT)
@@ -187,18 +198,17 @@ void xfs_fs_mark_healthy(struct xfs_mount *mp, unsigned int mask);
void xfs_fs_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
unsigned int *checked);
-void xfs_rt_mark_sick(struct xfs_mount *mp, unsigned int mask);
-void xfs_rt_mark_corrupt(struct xfs_mount *mp, unsigned int mask);
-void xfs_rt_mark_healthy(struct xfs_mount *mp, unsigned int mask);
-void xfs_rt_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
- unsigned int *checked);
+void xfs_rgno_mark_sick(struct xfs_mount *mp, xfs_rgnumber_t rgno,
+ unsigned int mask);
void xfs_agno_mark_sick(struct xfs_mount *mp, xfs_agnumber_t agno,
unsigned int mask);
-void xfs_ag_mark_sick(struct xfs_perag *pag, unsigned int mask);
-void xfs_ag_mark_corrupt(struct xfs_perag *pag, unsigned int mask);
-void xfs_ag_mark_healthy(struct xfs_perag *pag, unsigned int mask);
-void xfs_ag_measure_sickness(struct xfs_perag *pag, unsigned int *sick,
+void xfs_group_mark_sick(struct xfs_group *xg, unsigned int mask);
+#define xfs_ag_mark_sick(pag, mask) \
+ xfs_group_mark_sick(pag_group(pag), (mask))
+void xfs_group_mark_corrupt(struct xfs_group *xg, unsigned int mask);
+void xfs_group_mark_healthy(struct xfs_group *xg, unsigned int mask);
+void xfs_group_measure_sickness(struct xfs_group *xg, unsigned int *sick,
unsigned int *checked);
void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask);
@@ -225,22 +235,25 @@ xfs_fs_has_sickness(struct xfs_mount *mp, unsigned int mask)
}
static inline bool
-xfs_rt_has_sickness(struct xfs_mount *mp, unsigned int mask)
+xfs_group_has_sickness(
+ struct xfs_group *xg,
+ unsigned int mask)
{
- unsigned int sick, checked;
+ unsigned int sick, checked;
- xfs_rt_measure_sickness(mp, &sick, &checked);
+ xfs_group_measure_sickness(xg, &sick, &checked);
return sick & mask;
}
-static inline bool
-xfs_ag_has_sickness(struct xfs_perag *pag, unsigned int mask)
-{
- unsigned int sick, checked;
+#define xfs_ag_has_sickness(pag, mask) \
+ xfs_group_has_sickness(pag_group(pag), (mask))
+#define xfs_ag_is_healthy(pag) \
+ (!xfs_ag_has_sickness((pag), UINT_MAX))
- xfs_ag_measure_sickness(pag, &sick, &checked);
- return sick & mask;
-}
+#define xfs_rtgroup_has_sickness(rtg, mask) \
+ xfs_group_has_sickness(rtg_group(rtg), (mask))
+#define xfs_rtgroup_is_healthy(rtg) \
+ (!xfs_rtgroup_has_sickness((rtg), UINT_MAX))
static inline bool
xfs_inode_has_sickness(struct xfs_inode *ip, unsigned int mask)
@@ -258,18 +271,6 @@ xfs_fs_is_healthy(struct xfs_mount *mp)
}
static inline bool
-xfs_rt_is_healthy(struct xfs_mount *mp)
-{
- return !xfs_rt_has_sickness(mp, -1U);
-}
-
-static inline bool
-xfs_ag_is_healthy(struct xfs_perag *pag)
-{
- return !xfs_ag_has_sickness(pag, -1U);
-}
-
-static inline bool
xfs_inode_is_healthy(struct xfs_inode *ip)
{
return !xfs_inode_has_sickness(ip, -1U);
@@ -277,6 +278,8 @@ xfs_inode_is_healthy(struct xfs_inode *ip)
void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
+void xfs_rtgroup_geom_health(struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo);
void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
#define xfs_metadata_is_sick(error) \
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index e5ac3e5430c4..f3a840a425f5 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -142,7 +142,7 @@ xfs_inobt_complain_bad_rec(
xfs_warn(mp,
"%sbt record corruption in AG %d detected at %pS!",
- cur->bc_ops->name, cur->bc_ag.pag->pag_agno, fa);
+ cur->bc_ops->name, cur->bc_group->xg_gno, fa);
xfs_warn(mp,
"start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
irec->ir_startino, irec->ir_count, irec->ir_freecount,
@@ -170,7 +170,7 @@ xfs_inobt_get_rec(
return error;
xfs_inobt_btrec_to_irec(mp, rec, irec);
- fa = xfs_inobt_check_irec(cur->bc_ag.pag, irec);
+ fa = xfs_inobt_check_irec(to_perag(cur->bc_group), irec);
if (fa)
return xfs_inobt_complain_bad_rec(cur, fa, irec);
@@ -275,8 +275,10 @@ xfs_check_agi_freecount(
}
} while (i == 1);
- if (!xfs_is_shutdown(cur->bc_mp))
- ASSERT(freecount == cur->bc_ag.pag->pagi_freecount);
+ if (!xfs_is_shutdown(cur->bc_mp)) {
+ ASSERT(freecount ==
+ to_perag(cur->bc_group)->pagi_freecount);
+ }
}
return 0;
}
@@ -551,7 +553,7 @@ xfs_inobt_insert_sprec(
struct xfs_buf *agbp,
struct xfs_inobt_rec_incore *nrec) /* in/out: new/merged rec. */
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
int error;
int i;
@@ -606,15 +608,12 @@ xfs_inobt_insert_sprec(
goto error;
}
- trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino,
- rec.ir_holemask, nrec->ir_startino,
- nrec->ir_holemask);
+ trace_xfs_irec_merge_pre(pag, &rec, nrec);
/* merge to nrec to output the updated record */
__xfs_inobt_rec_merge(nrec, &rec);
- trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino,
- nrec->ir_holemask);
+ trace_xfs_irec_merge_post(pag, nrec);
error = xfs_inobt_rec_check_count(mp, nrec);
if (error)
@@ -648,7 +647,7 @@ xfs_finobt_insert_sprec(
struct xfs_buf *agbp,
struct xfs_inobt_rec_incore *nrec) /* in/out: new rec. */
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
int error;
int i;
@@ -768,8 +767,7 @@ xfs_ialloc_ag_alloc(
/* Allow space for the inode btree to split. */
args.minleft = igeo->inobt_maxlevels;
error = xfs_alloc_vextent_exact_bno(&args,
- XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
- args.agbno));
+ xfs_agbno_to_fsb(pag, args.agbno));
if (error)
return error;
@@ -811,8 +809,8 @@ xfs_ialloc_ag_alloc(
*/
args.minleft = igeo->inobt_maxlevels;
error = xfs_alloc_vextent_near_bno(&args,
- XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
- be32_to_cpu(agi->agi_root)));
+ xfs_agbno_to_fsb(pag,
+ be32_to_cpu(agi->agi_root)));
if (error)
return error;
}
@@ -824,8 +822,8 @@ xfs_ialloc_ag_alloc(
if (isaligned && args.fsbno == NULLFSBLOCK) {
args.alignment = igeo->cluster_align;
error = xfs_alloc_vextent_near_bno(&args,
- XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
- be32_to_cpu(agi->agi_root)));
+ xfs_agbno_to_fsb(pag,
+ be32_to_cpu(agi->agi_root)));
if (error)
return error;
}
@@ -855,13 +853,14 @@ sparse_alloc:
* the end of the AG.
*/
args.min_agbno = args.mp->m_sb.sb_inoalignmt;
- args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
+ args.max_agbno = round_down(xfs_ag_block_count(args.mp,
+ pag_agno(pag)),
args.mp->m_sb.sb_inoalignmt) -
igeo->ialloc_blks;
error = xfs_alloc_vextent_near_bno(&args,
- XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
- be32_to_cpu(agi->agi_root)));
+ xfs_agbno_to_fsb(pag,
+ be32_to_cpu(agi->agi_root)));
if (error)
return error;
@@ -884,7 +883,7 @@ sparse_alloc:
* rather than a linear progression to prevent the next generation
* number from being easily guessable.
*/
- error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag->pag_agno,
+ error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag_agno(pag),
args.agbno, args.len, get_random_u32());
if (error)
@@ -915,8 +914,7 @@ sparse_alloc:
if (error == -EFSCORRUPTED) {
xfs_alert(args.mp,
"invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
- XFS_AGINO_TO_INO(args.mp, pag->pag_agno,
- rec.ir_startino),
+ xfs_agino_to_ino(pag, rec.ir_startino),
rec.ir_holemask, rec.ir_count);
xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
}
@@ -1058,6 +1056,33 @@ xfs_inobt_first_free_inode(
}
/*
+ * If this AG has corrupt inodes, check if allocating this inode would fail
+ * with corruption errors. Returns 0 if we're clear, or EAGAIN to try again
+ * somewhere else.
+ */
+static int
+xfs_dialloc_check_ino(
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ xfs_ino_t ino)
+{
+ struct xfs_imap imap;
+ struct xfs_buf *bp;
+ int error;
+
+ error = xfs_imap(pag, tp, ino, &imap, 0);
+ if (error)
+ return -EAGAIN;
+
+ error = xfs_imap_to_bp(pag_mount(pag), tp, &imap, &bp);
+ if (error)
+ return -EAGAIN;
+
+ xfs_trans_brelse(tp, bp);
+ return 0;
+}
+
+/*
* Allocate an inode using the inobt-only algorithm.
*/
STATIC int
@@ -1100,7 +1125,7 @@ xfs_dialloc_ag_inobt(
/*
* If in the same AG as the parent, try to get near the parent.
*/
- if (pagno == pag->pag_agno) {
+ if (pagno == pag_agno(pag)) {
int doneleft; /* done, to the left */
int doneright; /* done, to the right */
@@ -1308,7 +1333,14 @@ alloc_inode:
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
- ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+ ino = xfs_agino_to_ino(pag, rec.ir_startino + offset);
+
+ if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
+ error = xfs_dialloc_check_ino(pag, tp, ino);
+ if (error)
+ goto error0;
+ }
+
rec.ir_free &= ~XFS_INOBT_MASK(offset);
rec.ir_freecount--;
error = xfs_inobt_update(cur, &rec);
@@ -1570,7 +1602,7 @@ xfs_dialloc_ag(
* parent. If so, find the closest available inode to the parent. If
* not, consider the agi hint or find the first free inode in the AG.
*/
- if (pag->pag_agno == pagno)
+ if (pag_agno(pag) == pagno)
error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
else
error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
@@ -1582,7 +1614,13 @@ xfs_dialloc_ag(
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
- ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+ ino = xfs_agino_to_ino(pag, rec.ir_startino + offset);
+
+ if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
+ error = xfs_dialloc_check_ino(pag, tp, ino);
+ if (error)
+ goto error_cur;
+ }
/*
* Modify or remove the finobt record.
@@ -1699,7 +1737,7 @@ xfs_dialloc_good_ag(
return false;
if (!xfs_perag_initialised_agi(pag)) {
- error = xfs_ialloc_read_agi(pag, tp, NULL);
+ error = xfs_ialloc_read_agi(pag, tp, 0, NULL);
if (error)
return false;
}
@@ -1768,7 +1806,7 @@ xfs_dialloc_try_ag(
* Then read in the AGI buffer and recheck with the AGI buffer
* lock held.
*/
- error = xfs_ialloc_read_agi(pag, *tpp, &agbp);
+ error = xfs_ialloc_read_agi(pag, *tpp, 0, &agbp);
if (error)
return error;
@@ -1805,6 +1843,40 @@ out_release:
}
/*
+ * Pick an AG for the new inode.
+ *
+ * Directories, symlinks, and regular files frequently allocate at least one
+ * block, so factor that potential expansion when we examine whether an AG has
+ * enough space for file creation. Try to keep metadata files all in the same
+ * AG.
+ */
+static inline xfs_agnumber_t
+xfs_dialloc_pick_ag(
+ struct xfs_mount *mp,
+ struct xfs_inode *dp,
+ umode_t mode)
+{
+ xfs_agnumber_t start_agno;
+
+ if (!dp)
+ return 0;
+ if (xfs_is_metadir_inode(dp)) {
+ if (mp->m_sb.sb_logstart)
+ return XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
+ return 0;
+ }
+
+ if (S_ISDIR(mode))
+ return (atomic_inc_return(&mp->m_agirotor) - 1) % mp->m_maxagi;
+
+ start_agno = XFS_INO_TO_AGNO(mp, dp->i_ino);
+ if (start_agno >= mp->m_maxagi)
+ start_agno = 0;
+
+ return start_agno;
+}
+
+/*
* Allocate an on-disk inode.
*
* Mode is used to tell whether the new inode is a directory and hence where to
@@ -1815,34 +1887,23 @@ out_release:
int
xfs_dialloc(
struct xfs_trans **tpp,
- xfs_ino_t parent,
- umode_t mode,
+ const struct xfs_icreate_args *args,
xfs_ino_t *new_ino)
{
struct xfs_mount *mp = (*tpp)->t_mountp;
- xfs_agnumber_t agno;
- int error = 0;
- xfs_agnumber_t start_agno;
struct xfs_perag *pag;
struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ xfs_ino_t ino = NULLFSINO;
+ xfs_ino_t parent = args->pip ? args->pip->i_ino : 0;
+ xfs_agnumber_t agno;
+ xfs_agnumber_t start_agno;
+ umode_t mode = args->mode & S_IFMT;
bool ok_alloc = true;
bool low_space = false;
int flags;
- xfs_ino_t ino = NULLFSINO;
+ int error = 0;
- /*
- * Directories, symlinks, and regular files frequently allocate at least
- * one block, so factor that potential expansion when we examine whether
- * an AG has enough space for file creation.
- */
- if (S_ISDIR(mode))
- start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) %
- mp->m_maxagi;
- else {
- start_agno = XFS_INO_TO_AGNO(mp, parent);
- if (start_agno >= mp->m_maxagi)
- start_agno = 0;
- }
+ start_agno = xfs_dialloc_pick_ag(mp, args->pip, mode);
/*
* If we have already hit the ceiling of inode blocks then clear
@@ -1906,6 +1967,21 @@ retry:
}
return -ENOSPC;
}
+
+ /*
+ * Protect against obviously corrupt allocation btree records. Later
+ * xfs_iget checks will catch re-allocation of other active in-memory
+ * and on-disk inodes. If we don't catch reallocating the parent inode
+ * here we will deadlock in xfs_iget() so we have to do these checks
+ * first.
+ */
+ if (ino == parent || !xfs_verify_dir_ino(mp, ino)) {
+ xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino);
+ xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino),
+ XFS_SICK_AG_INOBT);
+ return -EFSCORRUPTED;
+ }
+
*new_ino = ino;
return 0;
}
@@ -1918,7 +1994,7 @@ retry:
static int
xfs_difree_inode_chunk(
struct xfs_trans *tp,
- xfs_agnumber_t agno,
+ struct xfs_perag *pag,
struct xfs_inobt_rec_incore *rec)
{
struct xfs_mount *mp = tp->t_mountp;
@@ -1932,10 +2008,9 @@ xfs_difree_inode_chunk(
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
- return xfs_free_extent_later(tp,
- XFS_AGB_TO_FSB(mp, agno, sagbno),
+ return xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, sagbno),
M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
}
/* holemask is only 16-bits (fits in an unsigned long) */
@@ -1979,10 +2054,9 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
- error = xfs_free_extent_later(tp,
- XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
- &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE,
- false);
+ error = xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, agbno),
+ contigblk, &XFS_RMAP_OINFO_INODES,
+ XFS_AG_RESV_NONE, 0);
if (error)
return error;
@@ -2004,7 +2078,7 @@ xfs_difree_inobt(
struct xfs_icluster *xic,
struct xfs_inobt_rec_incore *orec)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_agi *agi = agbp->b_addr;
struct xfs_btree_cur *cur;
struct xfs_inobt_rec_incore rec;
@@ -2069,8 +2143,7 @@ xfs_difree_inobt(
if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
xic->deleted = true;
- xic->first_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno,
- rec.ir_startino);
+ xic->first_ino = xfs_agino_to_ino(pag, rec.ir_startino);
xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
/*
@@ -2093,7 +2166,7 @@ xfs_difree_inobt(
goto error0;
}
- error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
+ error = xfs_difree_inode_chunk(tp, pag, &rec);
if (error)
goto error0;
} else {
@@ -2139,7 +2212,7 @@ xfs_difree_finobt(
xfs_agino_t agino,
struct xfs_inobt_rec_incore *ibtrec) /* inobt record */
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
struct xfs_inobt_rec_incore rec;
int offset = agino - ibtrec->ir_startino;
@@ -2262,31 +2335,31 @@ xfs_difree(
/*
* Break up inode number into its components.
*/
- if (pag->pag_agno != XFS_INO_TO_AGNO(mp, inode)) {
- xfs_warn(mp, "%s: agno != pag->pag_agno (%d != %d).",
- __func__, XFS_INO_TO_AGNO(mp, inode), pag->pag_agno);
+ if (pag_agno(pag) != XFS_INO_TO_AGNO(mp, inode)) {
+ xfs_warn(mp, "%s: agno != pag_agno(pag) (%d != %d).",
+ __func__, XFS_INO_TO_AGNO(mp, inode), pag_agno(pag));
ASSERT(0);
return -EINVAL;
}
agino = XFS_INO_TO_AGINO(mp, inode);
- if (inode != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
- xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
+ if (inode != xfs_agino_to_ino(pag, agino)) {
+ xfs_warn(mp, "%s: inode != xfs_agino_to_ino() (%llu != %llu).",
__func__, (unsigned long long)inode,
- (unsigned long long)XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
+ (unsigned long long)xfs_agino_to_ino(pag, agino));
ASSERT(0);
return -EINVAL;
}
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (agbno >= mp->m_sb.sb_agblocks) {
- xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
- __func__, agbno, mp->m_sb.sb_agblocks);
+ if (agbno >= xfs_ag_block_count(mp, pag_agno(pag))) {
+ xfs_warn(mp, "%s: agbno >= xfs_ag_block_count (%d >= %d).",
+ __func__, agbno, xfs_ag_block_count(mp, pag_agno(pag)));
ASSERT(0);
return -EINVAL;
}
/*
* Get the allocation group header.
*/
- error = xfs_ialloc_read_agi(pag, tp, &agbp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &agbp);
if (error) {
xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
__func__, error);
@@ -2325,18 +2398,18 @@ xfs_imap_lookup(
xfs_agblock_t *offset_agbno,
int flags)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_inobt_rec_incore rec;
struct xfs_btree_cur *cur;
struct xfs_buf *agbp;
int error;
int i;
- error = xfs_ialloc_read_agi(pag, tp, &agbp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &agbp);
if (error) {
xfs_alert(mp,
"%s: xfs_ialloc_read_agi() returned error %d, agno %d",
- __func__, error, pag->pag_agno);
+ __func__, error, pag_agno(pag));
return error;
}
@@ -2386,7 +2459,7 @@ xfs_imap(
struct xfs_imap *imap, /* location map structure */
uint flags) /* flags for inode btree lookup */
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
xfs_agblock_t agbno; /* block number of inode in the alloc group */
xfs_agino_t agino; /* inode number within alloc group */
xfs_agblock_t chunk_agbno; /* first block in inode chunk */
@@ -2402,8 +2475,8 @@ xfs_imap(
*/
agino = XFS_INO_TO_AGINO(mp, ino);
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (agbno >= mp->m_sb.sb_agblocks ||
- ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
+ if (agbno >= xfs_ag_block_count(mp, pag_agno(pag)) ||
+ ino != xfs_agino_to_ino(pag, agino)) {
error = -EINVAL;
#ifdef DEBUG
/*
@@ -2412,17 +2485,18 @@ xfs_imap(
*/
if (flags & XFS_IGET_UNTRUSTED)
return error;
- if (agbno >= mp->m_sb.sb_agblocks) {
+ if (agbno >= xfs_ag_block_count(mp, pag_agno(pag))) {
xfs_alert(mp,
"%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
__func__, (unsigned long long)agbno,
- (unsigned long)mp->m_sb.sb_agblocks);
+ (unsigned long)xfs_ag_block_count(mp,
+ pag_agno(pag)));
}
- if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
+ if (ino != xfs_agino_to_ino(pag, agino)) {
xfs_alert(mp,
- "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
+ "%s: ino (0x%llx) != xfs_agino_to_ino() (0x%llx)",
__func__, ino,
- XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
+ xfs_agino_to_ino(pag, agino));
}
xfs_stack_trace();
#endif /* DEBUG */
@@ -2452,7 +2526,7 @@ xfs_imap(
offset = XFS_INO_TO_OFFSET(mp, ino);
ASSERT(offset < mp->m_sb.sb_inopblock);
- imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, agbno);
+ imap->im_blkno = xfs_agbno_to_daddr(pag, agbno);
imap->im_len = XFS_FSB_TO_BB(mp, 1);
imap->im_boffset = (unsigned short)(offset <<
mp->m_sb.sb_inodelog);
@@ -2482,7 +2556,7 @@ out_map:
offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
XFS_INO_TO_OFFSET(mp, ino);
- imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, cluster_agbno);
+ imap->im_blkno = xfs_agbno_to_daddr(pag, cluster_agbno);
imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
@@ -2675,16 +2749,17 @@ int
xfs_read_agi(
struct xfs_perag *pag,
struct xfs_trans *tp,
+ xfs_buf_flags_t flags,
struct xfs_buf **agibpp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
int error;
- trace_xfs_read_agi(pag->pag_mount, pag->pag_agno);
+ trace_xfs_read_agi(pag);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGI_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1), 0, agibpp, &xfs_agi_buf_ops);
+ XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGI_DADDR(mp)),
+ XFS_FSS_TO_BB(mp, 1), flags, agibpp, &xfs_agi_buf_ops);
if (xfs_metadata_is_sick(error))
xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
if (error)
@@ -2704,15 +2779,18 @@ int
xfs_ialloc_read_agi(
struct xfs_perag *pag,
struct xfs_trans *tp,
+ int flags,
struct xfs_buf **agibpp)
{
struct xfs_buf *agibp;
struct xfs_agi *agi;
int error;
- trace_xfs_ialloc_read_agi(pag->pag_mount, pag->pag_agno);
+ trace_xfs_ialloc_read_agi(pag);
- error = xfs_read_agi(pag, tp, &agibp);
+ error = xfs_read_agi(pag, tp,
+ (flags & XFS_IALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
+ &agibp);
if (error)
return error;
@@ -2728,7 +2806,7 @@ xfs_ialloc_read_agi(
* we are in the middle of a forced shutdown.
*/
ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
- xfs_is_shutdown(pag->pag_mount));
+ xfs_is_shutdown(pag_mount(pag)));
if (agibpp)
*agibpp = agibp;
else
@@ -2828,7 +2906,7 @@ xfs_ialloc_count_inodes_rec(
xfs_failaddr_t fa;
xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
- fa = xfs_inobt_check_irec(cur->bc_ag.pag, &irec);
+ fa = xfs_inobt_check_irec(to_perag(cur->bc_group), &irec);
if (fa)
return xfs_inobt_complain_bad_rec(cur, fa, &irec);
@@ -2889,8 +2967,8 @@ xfs_ialloc_setup_geometry(
/* Compute inode btree geometry. */
igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
- igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
- igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
+ igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, true);
+ igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, false);
igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2;
igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2;
@@ -2975,6 +3053,11 @@ xfs_ialloc_setup_geometry(
igeo->ialloc_align = mp->m_dalign;
else
igeo->ialloc_align = 0;
+
+ if (mp->m_sb.sb_blocksize > PAGE_SIZE)
+ igeo->min_folio_order = mp->m_sb.sb_blocklog - PAGE_SHIFT;
+ else
+ igeo->min_folio_order = 0;
}
/* Compute the location of the root directory inode that is laid out by mkfs. */
@@ -3062,13 +3145,13 @@ xfs_ialloc_check_shrink(
int has;
int error;
- if (!xfs_has_sparseinodes(pag->pag_mount))
+ if (!xfs_has_sparseinodes(pag_mount(pag)))
return 0;
cur = xfs_inobt_init_cursor(pag, tp, agibp);
/* Look up the inobt record that would correspond to the new EOFS. */
- agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length);
+ agino = XFS_AGB_TO_AGINO(pag_mount(pag), new_length);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
if (error || !has)
goto out;
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index f1412183bb44..3a1323155a45 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -33,11 +33,13 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
return xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog);
}
+struct xfs_icreate_args;
+
/*
* Allocate an inode on disk. Mode is used to tell whether the new inode will
* need space, and whether it is a directory.
*/
-int xfs_dialloc(struct xfs_trans **tpp, xfs_ino_t parent, umode_t mode,
+int xfs_dialloc(struct xfs_trans **tpp, const struct xfs_icreate_args *args,
xfs_ino_t *new_ino);
int xfs_difree(struct xfs_trans *tp, struct xfs_perag *pag,
@@ -63,10 +65,11 @@ xfs_ialloc_log_agi(
struct xfs_buf *bp, /* allocation group header buffer */
uint32_t fields); /* bitmask of fields to log */
-int xfs_read_agi(struct xfs_perag *pag, struct xfs_trans *tp,
+int xfs_read_agi(struct xfs_perag *pag, struct xfs_trans *tp, xfs_buf_flags_t flags,
struct xfs_buf **agibpp);
int xfs_ialloc_read_agi(struct xfs_perag *pag, struct xfs_trans *tp,
- struct xfs_buf **agibpp);
+ int flags, struct xfs_buf **agibpp);
+#define XFS_IALLOC_FLAG_TRYLOCK (1U << 0) /* use trylock for buffer locking */
/*
* Lookup a record by ino in the btree given by cur.
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index cc661fca6ff5..9b34896dd1a3 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -37,7 +37,7 @@ STATIC struct xfs_btree_cur *
xfs_inobt_dup_cursor(
struct xfs_btree_cur *cur)
{
- return xfs_inobt_init_cursor(cur->bc_ag.pag, cur->bc_tp,
+ return xfs_inobt_init_cursor(to_perag(cur->bc_group), cur->bc_tp,
cur->bc_ag.agbp);
}
@@ -45,7 +45,7 @@ STATIC struct xfs_btree_cur *
xfs_finobt_dup_cursor(
struct xfs_btree_cur *cur)
{
- return xfs_finobt_init_cursor(cur->bc_ag.pag, cur->bc_tp,
+ return xfs_finobt_init_cursor(to_perag(cur->bc_group), cur->bc_tp,
cur->bc_ag.agbp);
}
@@ -112,7 +112,7 @@ __xfs_inobt_alloc_block(
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
- args.pag = cur->bc_ag.pag;
+ args.pag = to_perag(cur->bc_group);
args.oinfo = XFS_RMAP_OINFO_INOBT;
args.minlen = 1;
args.maxlen = 1;
@@ -120,7 +120,7 @@ __xfs_inobt_alloc_block(
args.resv = resv;
error = xfs_alloc_vextent_near_bno(&args,
- XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno, sbno));
+ xfs_agbno_to_fsb(args.pag, sbno));
if (error)
return error;
@@ -170,7 +170,7 @@ __xfs_inobt_free_block(
xfs_inobt_mod_blockcount(cur, -1);
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
- &XFS_RMAP_OINFO_INOBT, resv, false);
+ &XFS_RMAP_OINFO_INOBT, resv, 0);
}
STATIC int
@@ -248,7 +248,7 @@ xfs_inobt_init_ptr_from_cur(
{
struct xfs_agi *agi = cur->bc_ag.agbp->b_addr;
- ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agi->agi_seqno));
+ ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agi->agi_seqno));
ptr->s = agi->agi_root;
}
@@ -260,7 +260,8 @@ xfs_finobt_init_ptr_from_cur(
{
struct xfs_agi *agi = cur->bc_ag.agbp->b_addr;
- ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agi->agi_seqno));
+ ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agi->agi_seqno));
+
ptr->s = agi->agi_free_root;
}
@@ -478,12 +479,12 @@ xfs_inobt_init_cursor(
struct xfs_trans *tp,
struct xfs_buf *agbp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_inobt_ops,
M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_ag.agbp = agbp;
if (agbp) {
struct xfs_agi *agi = agbp->b_addr;
@@ -504,12 +505,12 @@ xfs_finobt_init_cursor(
struct xfs_trans *tp,
struct xfs_buf *agbp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_finobt_ops,
M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_ag.agbp = agbp;
if (agbp) {
struct xfs_agi *agi = agbp->b_addr;
@@ -572,11 +573,11 @@ xfs_inobt_block_maxrecs(
/*
* Calculate number of records in an inobt btree block.
*/
-int
+unsigned int
xfs_inobt_maxrecs(
struct xfs_mount *mp,
- int blocklen,
- int leaf)
+ unsigned int blocklen,
+ bool leaf)
{
blocklen -= XFS_INOBT_BLOCK_LEN(mp);
return xfs_inobt_block_maxrecs(blocklen, leaf);
@@ -715,8 +716,8 @@ static xfs_extlen_t
xfs_inobt_max_size(
struct xfs_perag *pag)
{
- struct xfs_mount *mp = pag->pag_mount;
- xfs_agblock_t agblocks = pag->block_count;
+ struct xfs_mount *mp = pag_mount(pag);
+ xfs_agblock_t agblocks = pag_group(pag)->xg_block_count;
/* Bail out if we're uninitialized, which can happen in mkfs. */
if (M_IGEO(mp)->inobt_mxr[0] == 0)
@@ -727,7 +728,7 @@ xfs_inobt_max_size(
* never be available for the kinds of things that would require btree
* expansion. We therefore can pretend the space isn't there.
*/
- if (xfs_ag_contains_log(mp, pag->pag_agno))
+ if (xfs_ag_contains_log(mp, pag_agno(pag)))
agblocks -= mp->m_sb.sb_logblocks;
return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr,
@@ -745,11 +746,11 @@ xfs_finobt_count_blocks(
struct xfs_btree_cur *cur;
int error;
- error = xfs_ialloc_read_agi(pag, tp, &agbp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &agbp);
if (error)
return error;
- cur = xfs_inobt_init_cursor(pag, tp, agbp);
+ cur = xfs_finobt_init_cursor(pag, tp, agbp);
error = xfs_btree_count_blocks(cur, tree_blocks);
xfs_btree_del_cursor(cur, error);
xfs_trans_brelse(tp, agbp);
@@ -768,7 +769,7 @@ xfs_finobt_read_blocks(
struct xfs_agi *agi;
int error;
- error = xfs_ialloc_read_agi(pag, tp, &agbp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &agbp);
if (error)
return error;
@@ -791,10 +792,10 @@ xfs_finobt_calc_reserves(
xfs_extlen_t tree_len = 0;
int error;
- if (!xfs_has_finobt(pag->pag_mount))
+ if (!xfs_has_finobt(pag_mount(pag)))
return 0;
- if (xfs_has_inobtcounts(pag->pag_mount))
+ if (xfs_has_inobtcounts(pag_mount(pag)))
error = xfs_finobt_read_blocks(pag, tp, &tree_len);
else
error = xfs_finobt_count_blocks(pag, tp, &tree_len);
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index 6472ec1ecbb4..300edf5bc009 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -50,7 +50,8 @@ struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_buf *agbp);
struct xfs_btree_cur *xfs_finobt_init_cursor(struct xfs_perag *pag,
struct xfs_trans *tp, struct xfs_buf *agbp);
-extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
+unsigned int xfs_inobt_maxrecs(struct xfs_mount *mp, unsigned int blocklen,
+ bool leaf);
/* ir_holemask to inode allocation bitmap conversion */
uint64_t xfs_inobt_irec_to_allocmask(const struct xfs_inobt_rec_incore *irec);
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index d0dcce462bf4..424861fbf1bd 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -19,6 +19,7 @@
#include "xfs_ialloc.h"
#include "xfs_dir2.h"
#include "xfs_health.h"
+#include "xfs_metafile.h"
#include <linux/iversion.h>
@@ -209,12 +210,15 @@ xfs_inode_from_disk(
* They will also be unconditionally written back to disk as v2 inodes.
*/
if (unlikely(from->di_version == 1)) {
- set_nlink(inode, be16_to_cpu(from->di_onlink));
+ /* di_metatype used to be di_onlink */
+ set_nlink(inode, be16_to_cpu(from->di_metatype));
ip->i_projid = 0;
} else {
set_nlink(inode, be32_to_cpu(from->di_nlink));
ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
be16_to_cpu(from->di_projid_lo);
+ if (xfs_dinode_is_metadir(from))
+ ip->i_metatype = be16_to_cpu(from->di_metatype);
}
i_uid_write(inode, be32_to_cpu(from->di_uid));
@@ -315,7 +319,10 @@ xfs_inode_to_disk(
struct inode *inode = VFS_I(ip);
to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
- to->di_onlink = 0;
+ if (xfs_is_metadir_inode(ip))
+ to->di_metatype = cpu_to_be16(ip->i_metatype);
+ else
+ to->di_metatype = 0;
to->di_format = xfs_ifork_format(&ip->i_df);
to->di_uid = cpu_to_be32(i_uid_read(inode));
@@ -374,17 +381,40 @@ xfs_dinode_verify_fork(
/*
* For fork types that can contain local data, check that the fork
* format matches the size of local data contained within the fork.
- *
- * For all types, check that when the size says the should be in extent
- * or btree format, the inode isn't claiming it is in local format.
*/
if (whichfork == XFS_DATA_FORK) {
- if (S_ISDIR(mode) || S_ISLNK(mode)) {
+ /*
+ * A directory small enough to fit in the inode must be stored
+ * in local format. The directory sf <-> extents conversion
+ * code updates the directory size accordingly. Directories
+ * being truncated have zero size and are not subject to this
+ * check.
+ */
+ if (S_ISDIR(mode)) {
+ if (dip->di_size &&
+ be64_to_cpu(dip->di_size) <= fork_size &&
+ fork_format != XFS_DINODE_FMT_LOCAL)
+ return __this_address;
+ }
+
+ /*
+ * A symlink with a target small enough to fit in the inode can
+ * be stored in extents format if xattrs were added (thus
+ * converting the data fork from shortform to remote format)
+ * and then removed.
+ */
+ if (S_ISLNK(mode)) {
if (be64_to_cpu(dip->di_size) <= fork_size &&
+ fork_format != XFS_DINODE_FMT_EXTENTS &&
fork_format != XFS_DINODE_FMT_LOCAL)
return __this_address;
}
+ /*
+ * For all types, check that when the size says the fork should
+ * be in extent or btree format, the inode isn't claiming to be
+ * in local format.
+ */
if (be64_to_cpu(dip->di_size) > fork_size &&
fork_format == XFS_DINODE_FMT_LOCAL)
return __this_address;
@@ -460,6 +490,69 @@ xfs_dinode_verify_nrext64(
return NULL;
}
+/*
+ * Validate all the picky requirements we have for a file that claims to be
+ * filesystem metadata.
+ */
+xfs_failaddr_t
+xfs_dinode_verify_metadir(
+ struct xfs_mount *mp,
+ struct xfs_dinode *dip,
+ uint16_t mode,
+ uint16_t flags,
+ uint64_t flags2)
+{
+ if (!xfs_has_metadir(mp))
+ return __this_address;
+
+ /* V5 filesystem only */
+ if (dip->di_version < 3)
+ return __this_address;
+
+ if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
+ return __this_address;
+
+ /* V3 inode fields that are always zero */
+ if ((flags2 & XFS_DIFLAG2_NREXT64) && dip->di_nrext64_pad)
+ return __this_address;
+ if (!(flags2 & XFS_DIFLAG2_NREXT64) && dip->di_flushiter)
+ return __this_address;
+
+ /* Metadata files can only be directories or regular files */
+ if (!S_ISDIR(mode) && !S_ISREG(mode))
+ return __this_address;
+
+ /* They must have zero access permissions */
+ if (mode & 0777)
+ return __this_address;
+
+ /* DMAPI event and state masks are zero */
+ if (dip->di_dmevmask || dip->di_dmstate)
+ return __this_address;
+
+ /*
+ * User and group IDs must be zero. The project ID is used for
+ * grouping inodes. Metadata inodes are never accounted to quotas.
+ */
+ if (dip->di_uid || dip->di_gid)
+ return __this_address;
+
+ /* Mandatory inode flags must be set */
+ if (S_ISDIR(mode)) {
+ if ((flags & XFS_METADIR_DIFLAGS) != XFS_METADIR_DIFLAGS)
+ return __this_address;
+ } else {
+ if ((flags & XFS_METAFILE_DIFLAGS) != XFS_METAFILE_DIFLAGS)
+ return __this_address;
+ }
+
+ /* dax flags2 must not be set */
+ if (flags2 & XFS_DIFLAG2_DAX)
+ return __this_address;
+
+ return NULL;
+}
+
xfs_failaddr_t
xfs_dinode_verify(
struct xfs_mount *mp,
@@ -491,6 +584,23 @@ xfs_dinode_verify(
return __this_address;
}
+ /*
+ * Historical note: xfsprogs in the 3.2 era set up its incore inodes to
+ * have di_nlink track the link count, even if the actual filesystem
+ * only supported V1 inodes (i.e. di_onlink). When writing out the
+ * ondisk inode, it would set both the ondisk di_nlink and di_onlink to
+ * the the incore di_nlink value, which is why we cannot check for
+ * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with
+ * di_onlink==0, so we can check that.
+ */
+ if (dip->di_version == 2) {
+ if (dip->di_metatype)
+ return __this_address;
+ } else if (dip->di_version >= 3) {
+ if (!xfs_dinode_is_metadir(dip) && dip->di_metatype)
+ return __this_address;
+ }
+
/* don't allow invalid i_size */
di_size = be64_to_cpu(dip->di_size);
if (di_size & (1ULL << 63))
@@ -500,9 +610,20 @@ xfs_dinode_verify(
if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
return __this_address;
- /* No zero-length symlinks/dirs. */
- if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
- return __this_address;
+ /*
+ * No zero-length symlinks/dirs unless they're unlinked and hence being
+ * inactivated.
+ */
+ if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) {
+ if (dip->di_version > 1) {
+ if (dip->di_nlink)
+ return __this_address;
+ } else {
+ /* di_metatype used to be di_onlink */
+ if (dip->di_metatype)
+ return __this_address;
+ }
+ }
fa = xfs_dinode_verify_nrext64(mp, dip);
if (fa)
@@ -616,6 +737,12 @@ xfs_dinode_verify(
!xfs_has_bigtime(mp))
return __this_address;
+ if (flags2 & XFS_DIFLAG2_METADATA) {
+ fa = xfs_dinode_verify_metadir(mp, dip, mode, flags, flags2);
+ if (fa)
+ return fa;
+ }
+
return NULL;
}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 585ed5a110af..8d43d2641c73 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -28,6 +28,9 @@ int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
struct xfs_dinode *dip);
+xfs_failaddr_t xfs_dinode_verify_metadir(struct xfs_mount *mp,
+ struct xfs_dinode *dip, uint16_t mode, uint16_t flags,
+ uint64_t flags2);
xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp,
uint32_t extsize, uint16_t mode, uint16_t flags);
xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 7d660a973909..1158ca48626b 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -185,7 +185,7 @@ xfs_iformat_btree(
ifp = xfs_ifork_ptr(ip, whichfork);
dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
- size = XFS_BMAP_BROOT_SPACE(mp, dfp);
+ size = xfs_bmap_broot_space(mp, dfp);
nrecs = be16_to_cpu(dfp->bb_numrecs);
level = be16_to_cpu(dfp->bb_level);
@@ -198,7 +198,7 @@ xfs_iformat_btree(
*/
if (unlikely(ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork) ||
nrecs == 0 ||
- XFS_BMDR_SPACE_CALC(nrecs) >
+ xfs_bmdr_space_calc(nrecs) >
XFS_DFORK_SIZE(dip, mp, whichfork) ||
ifp->if_nextents > ip->i_nblocks) ||
level == 0 || level > XFS_BM_MAXLEVELS(mp, whichfork)) {
@@ -409,7 +409,7 @@ xfs_iroot_realloc(
* allocate it now and get out.
*/
if (ifp->if_broot_bytes == 0) {
- new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
+ new_size = xfs_bmap_broot_space_calc(mp, rec_diff);
ifp->if_broot = kmalloc(new_size,
GFP_KERNEL | __GFP_NOFAIL);
ifp->if_broot_bytes = (int)new_size;
@@ -422,17 +422,17 @@ xfs_iroot_realloc(
* location. The records don't change location because
* they are kept butted up against the btree block header.
*/
- cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
+ cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false);
new_max = cur_max + rec_diff;
- new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
+ new_size = xfs_bmap_broot_space_calc(mp, new_max);
ifp->if_broot = krealloc(ifp->if_broot, new_size,
GFP_KERNEL | __GFP_NOFAIL);
- op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
+ op = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1,
ifp->if_broot_bytes);
- np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
+ np = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1,
(int)new_size);
ifp->if_broot_bytes = (int)new_size;
- ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+ ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <=
xfs_inode_fork_size(ip, whichfork));
memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t));
return;
@@ -444,11 +444,11 @@ xfs_iroot_realloc(
* records, just get rid of the root and clear the status bit.
*/
ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
- cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
+ cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false);
new_max = cur_max + rec_diff;
ASSERT(new_max >= 0);
if (new_max > 0)
- new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
+ new_size = xfs_bmap_broot_space_calc(mp, new_max);
else
new_size = 0;
if (new_size > 0) {
@@ -457,28 +457,28 @@ xfs_iroot_realloc(
* First copy over the btree block header.
*/
memcpy(new_broot, ifp->if_broot,
- XFS_BMBT_BLOCK_LEN(ip->i_mount));
+ xfs_bmbt_block_len(ip->i_mount));
} else {
new_broot = NULL;
}
/*
- * Only copy the records and pointers if there are any.
+ * Only copy the keys and pointers if there are any.
*/
if (new_max > 0) {
/*
- * First copy the records.
+ * First copy the keys.
*/
- op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
- np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
- memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
+ op = (char *)xfs_bmbt_key_addr(mp, ifp->if_broot, 1);
+ np = (char *)xfs_bmbt_key_addr(mp, new_broot, 1);
+ memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_key_t));
/*
* Then copy the pointers.
*/
- op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
+ op = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1,
ifp->if_broot_bytes);
- np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
+ np = (char *)xfs_bmap_broot_ptr_addr(mp, new_broot, 1,
(int)new_size);
memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
}
@@ -486,7 +486,7 @@ xfs_iroot_realloc(
ifp->if_broot = new_broot;
ifp->if_broot_bytes = (int)new_size;
if (ifp->if_broot)
- ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+ ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <=
xfs_inode_fork_size(ip, whichfork));
return;
}
@@ -655,7 +655,7 @@ xfs_iflush_fork(
if ((iip->ili_fields & brootflag[whichfork]) &&
(ifp->if_broot_bytes > 0)) {
ASSERT(ifp->if_broot != NULL);
- ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+ ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <=
xfs_inode_fork_size(ip, whichfork));
xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
(xfs_bmdr_block_t *)cp,
@@ -765,53 +765,46 @@ xfs_ifork_verify_local_attr(
return 0;
}
+/*
+ * Check if the inode fork supports adding nr_to_add more extents.
+ *
+ * If it doesn't but we can upgrade it to large extent counters, do the upgrade.
+ * If we can't upgrade or are already using big counters but still can't fit the
+ * additional extents, return -EFBIG.
+ */
int
-xfs_iext_count_may_overflow(
+xfs_iext_count_extend(
+ struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork,
- int nr_to_add)
+ uint nr_to_add)
{
+ struct xfs_mount *mp = ip->i_mount;
+ bool has_large =
+ xfs_inode_has_large_extent_counts(ip);
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
- uint64_t max_exts;
uint64_t nr_exts;
+ ASSERT(nr_to_add <= XFS_MAX_EXTCNT_UPGRADE_NR);
+
if (whichfork == XFS_COW_FORK)
return 0;
- max_exts = xfs_iext_max_nextents(xfs_inode_has_large_extent_counts(ip),
- whichfork);
-
- if (XFS_TEST_ERROR(false, ip->i_mount, XFS_ERRTAG_REDUCE_MAX_IEXTENTS))
- max_exts = 10;
-
+ /* no point in upgrading if if_nextents overflows */
nr_exts = ifp->if_nextents + nr_to_add;
- if (nr_exts < ifp->if_nextents || nr_exts > max_exts)
+ if (nr_exts < ifp->if_nextents)
return -EFBIG;
- return 0;
-}
-
-/*
- * Upgrade this inode's extent counter fields to be able to handle a potential
- * increase in the extent count by nr_to_add. Normally this is the same
- * quantity that caused xfs_iext_count_may_overflow() to return -EFBIG.
- */
-int
-xfs_iext_count_upgrade(
- struct xfs_trans *tp,
- struct xfs_inode *ip,
- uint nr_to_add)
-{
- ASSERT(nr_to_add <= XFS_MAX_EXTCNT_UPGRADE_NR);
-
- if (!xfs_has_large_extent_counts(ip->i_mount) ||
- xfs_inode_has_large_extent_counts(ip) ||
- XFS_TEST_ERROR(false, ip->i_mount, XFS_ERRTAG_REDUCE_MAX_IEXTENTS))
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
+ nr_exts > 10)
return -EFBIG;
- ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
+ if (nr_exts > xfs_iext_max_nextents(has_large, whichfork)) {
+ if (has_large || !xfs_has_large_extent_counts(mp))
+ return -EFBIG;
+ ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ }
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index bd53eb951b65..2373d12fd474 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -256,10 +256,8 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip);
int xfs_ifork_verify_local_data(struct xfs_inode *ip);
int xfs_ifork_verify_local_attr(struct xfs_inode *ip);
-int xfs_iext_count_may_overflow(struct xfs_inode *ip, int whichfork,
- int nr_to_add);
-int xfs_iext_count_upgrade(struct xfs_trans *tp, struct xfs_inode *ip,
- uint nr_to_add);
+int xfs_iext_count_extend(struct xfs_trans *tp, struct xfs_inode *ip,
+ int whichfork, uint nr_to_add);
bool xfs_ifork_is_realtime(struct xfs_inode *ip, int whichfork);
/* returns true if the fork has extents but they are not read in yet. */
diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c
new file mode 100644
index 000000000000..deb0b7c00a1f
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_inode_util.c
@@ -0,0 +1,751 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include <linux/iversion.h>
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_inode_util.h"
+#include "xfs_trans.h"
+#include "xfs_ialloc.h"
+#include "xfs_health.h"
+#include "xfs_bmap.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_ag.h"
+#include "xfs_iunlink_item.h"
+#include "xfs_inode_item.h"
+
+uint16_t
+xfs_flags2diflags(
+ struct xfs_inode *ip,
+ unsigned int xflags)
+{
+ /* can't set PREALLOC this way, just preserve it */
+ uint16_t di_flags =
+ (ip->i_diflags & XFS_DIFLAG_PREALLOC);
+
+ if (xflags & FS_XFLAG_IMMUTABLE)
+ di_flags |= XFS_DIFLAG_IMMUTABLE;
+ if (xflags & FS_XFLAG_APPEND)
+ di_flags |= XFS_DIFLAG_APPEND;
+ if (xflags & FS_XFLAG_SYNC)
+ di_flags |= XFS_DIFLAG_SYNC;
+ if (xflags & FS_XFLAG_NOATIME)
+ di_flags |= XFS_DIFLAG_NOATIME;
+ if (xflags & FS_XFLAG_NODUMP)
+ di_flags |= XFS_DIFLAG_NODUMP;
+ if (xflags & FS_XFLAG_NODEFRAG)
+ di_flags |= XFS_DIFLAG_NODEFRAG;
+ if (xflags & FS_XFLAG_FILESTREAM)
+ di_flags |= XFS_DIFLAG_FILESTREAM;
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
+ if (xflags & FS_XFLAG_RTINHERIT)
+ di_flags |= XFS_DIFLAG_RTINHERIT;
+ if (xflags & FS_XFLAG_NOSYMLINKS)
+ di_flags |= XFS_DIFLAG_NOSYMLINKS;
+ if (xflags & FS_XFLAG_EXTSZINHERIT)
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ if (xflags & FS_XFLAG_PROJINHERIT)
+ di_flags |= XFS_DIFLAG_PROJINHERIT;
+ } else if (S_ISREG(VFS_I(ip)->i_mode)) {
+ if (xflags & FS_XFLAG_REALTIME)
+ di_flags |= XFS_DIFLAG_REALTIME;
+ if (xflags & FS_XFLAG_EXTSIZE)
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ }
+
+ return di_flags;
+}
+
+uint64_t
+xfs_flags2diflags2(
+ struct xfs_inode *ip,
+ unsigned int xflags)
+{
+ uint64_t di_flags2 =
+ (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK |
+ XFS_DIFLAG2_BIGTIME |
+ XFS_DIFLAG2_NREXT64));
+
+ if (xflags & FS_XFLAG_DAX)
+ di_flags2 |= XFS_DIFLAG2_DAX;
+ if (xflags & FS_XFLAG_COWEXTSIZE)
+ di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
+
+ return di_flags2;
+}
+
+uint32_t
+xfs_ip2xflags(
+ struct xfs_inode *ip)
+{
+ uint32_t flags = 0;
+
+ if (ip->i_diflags & XFS_DIFLAG_ANY) {
+ if (ip->i_diflags & XFS_DIFLAG_REALTIME)
+ flags |= FS_XFLAG_REALTIME;
+ if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
+ flags |= FS_XFLAG_PREALLOC;
+ if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
+ flags |= FS_XFLAG_IMMUTABLE;
+ if (ip->i_diflags & XFS_DIFLAG_APPEND)
+ flags |= FS_XFLAG_APPEND;
+ if (ip->i_diflags & XFS_DIFLAG_SYNC)
+ flags |= FS_XFLAG_SYNC;
+ if (ip->i_diflags & XFS_DIFLAG_NOATIME)
+ flags |= FS_XFLAG_NOATIME;
+ if (ip->i_diflags & XFS_DIFLAG_NODUMP)
+ flags |= FS_XFLAG_NODUMP;
+ if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
+ flags |= FS_XFLAG_RTINHERIT;
+ if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
+ flags |= FS_XFLAG_PROJINHERIT;
+ if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
+ flags |= FS_XFLAG_NOSYMLINKS;
+ if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
+ flags |= FS_XFLAG_EXTSIZE;
+ if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
+ flags |= FS_XFLAG_EXTSZINHERIT;
+ if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
+ flags |= FS_XFLAG_NODEFRAG;
+ if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
+ flags |= FS_XFLAG_FILESTREAM;
+ }
+
+ if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
+ if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
+ flags |= FS_XFLAG_DAX;
+ if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
+ flags |= FS_XFLAG_COWEXTSIZE;
+ }
+
+ if (xfs_inode_has_attr_fork(ip))
+ flags |= FS_XFLAG_HASATTR;
+ return flags;
+}
+
+prid_t
+xfs_get_initial_prid(struct xfs_inode *dp)
+{
+ if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT)
+ return dp->i_projid;
+
+ /* Assign to the root project by default. */
+ return 0;
+}
+
+/* Propagate di_flags from a parent inode to a child inode. */
+static inline void
+xfs_inode_inherit_flags(
+ struct xfs_inode *ip,
+ const struct xfs_inode *pip)
+{
+ unsigned int di_flags = 0;
+ xfs_failaddr_t failaddr;
+ umode_t mode = VFS_I(ip)->i_mode;
+
+ if (S_ISDIR(mode)) {
+ if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
+ di_flags |= XFS_DIFLAG_RTINHERIT;
+ if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+ ip->i_extsize = pip->i_extsize;
+ }
+ if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
+ di_flags |= XFS_DIFLAG_PROJINHERIT;
+ } else if (S_ISREG(mode)) {
+ if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
+ xfs_has_realtime(ip->i_mount))
+ di_flags |= XFS_DIFLAG_REALTIME;
+ if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
+ di_flags |= XFS_DIFLAG_EXTSIZE;
+ ip->i_extsize = pip->i_extsize;
+ }
+ }
+ if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
+ xfs_inherit_noatime)
+ di_flags |= XFS_DIFLAG_NOATIME;
+ if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
+ xfs_inherit_nodump)
+ di_flags |= XFS_DIFLAG_NODUMP;
+ if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
+ xfs_inherit_sync)
+ di_flags |= XFS_DIFLAG_SYNC;
+ if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
+ xfs_inherit_nosymlinks)
+ di_flags |= XFS_DIFLAG_NOSYMLINKS;
+ if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
+ xfs_inherit_nodefrag)
+ di_flags |= XFS_DIFLAG_NODEFRAG;
+ if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
+ di_flags |= XFS_DIFLAG_FILESTREAM;
+
+ ip->i_diflags |= di_flags;
+
+ /*
+ * Inode verifiers on older kernels only check that the extent size
+ * hint is an integer multiple of the rt extent size on realtime files.
+ * They did not check the hint alignment on a directory with both
+ * rtinherit and extszinherit flags set. If the misaligned hint is
+ * propagated from a directory into a new realtime file, new file
+ * allocations will fail due to math errors in the rt allocator and/or
+ * trip the verifiers. Validate the hint settings in the new file so
+ * that we don't let broken hints propagate.
+ */
+ failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
+ VFS_I(ip)->i_mode, ip->i_diflags);
+ if (failaddr) {
+ ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
+ XFS_DIFLAG_EXTSZINHERIT);
+ ip->i_extsize = 0;
+ }
+}
+
+/* Propagate di_flags2 from a parent inode to a child inode. */
+static inline void
+xfs_inode_inherit_flags2(
+ struct xfs_inode *ip,
+ const struct xfs_inode *pip)
+{
+ xfs_failaddr_t failaddr;
+
+ if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
+ ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
+ ip->i_cowextsize = pip->i_cowextsize;
+ }
+ if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
+ ip->i_diflags2 |= XFS_DIFLAG2_DAX;
+ if (xfs_is_metadir_inode(pip))
+ ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
+
+ /* Don't let invalid cowextsize hints propagate. */
+ failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
+ VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
+ if (failaddr) {
+ ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
+ ip->i_cowextsize = 0;
+ }
+}
+
+/*
+ * If we need to create attributes immediately after allocating the inode,
+ * initialise an empty attribute fork right now. We use the default fork offset
+ * for attributes here as we don't know exactly what size or how many
+ * attributes we might be adding. We can do this safely here because we know
+ * the data fork is completely empty and this saves us from needing to run a
+ * separate transaction to set the fork offset in the immediate future.
+ *
+ * If we have parent pointers and the caller hasn't told us that the file will
+ * never be linked into a directory tree, we /must/ create the attr fork.
+ */
+static inline bool
+xfs_icreate_want_attrfork(
+ struct xfs_mount *mp,
+ const struct xfs_icreate_args *args)
+{
+ if (args->flags & XFS_ICREATE_INIT_XATTRS)
+ return true;
+
+ if (!(args->flags & XFS_ICREATE_UNLINKABLE) && xfs_has_parent(mp))
+ return true;
+
+ return false;
+}
+
+/* Initialise an inode's attributes. */
+void
+xfs_inode_init(
+ struct xfs_trans *tp,
+ const struct xfs_icreate_args *args,
+ struct xfs_inode *ip)
+{
+ struct xfs_inode *pip = args->pip;
+ struct inode *dir = pip ? VFS_I(pip) : NULL;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct inode *inode = VFS_I(ip);
+ unsigned int flags;
+ int times = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG |
+ XFS_ICHGTIME_ACCESS;
+
+ if (args->flags & XFS_ICREATE_TMPFILE)
+ set_nlink(inode, 0);
+ else if (S_ISDIR(args->mode))
+ set_nlink(inode, 2);
+ else
+ set_nlink(inode, 1);
+ inode->i_rdev = args->rdev;
+
+ if (!args->idmap || pip == NULL) {
+ /* creating a tree root, sb rooted, or detached file */
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ ip->i_projid = 0;
+ inode->i_mode = args->mode;
+ } else {
+ /* creating a child in the directory tree */
+ if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
+ inode_fsuid_set(inode, args->idmap);
+ inode->i_gid = dir->i_gid;
+ inode->i_mode = args->mode;
+ } else {
+ inode_init_owner(args->idmap, inode, dir, args->mode);
+ }
+
+ /*
+ * If the group ID of the new file does not match the effective
+ * group ID or one of the supplementary group IDs, the S_ISGID
+ * bit is cleared (and only if the irix_sgid_inherit
+ * compatibility variable is set).
+ */
+ if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
+ !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode)))
+ inode->i_mode &= ~S_ISGID;
+
+ ip->i_projid = xfs_get_initial_prid(pip);
+ }
+
+ ip->i_disk_size = 0;
+ ip->i_df.if_nextents = 0;
+ ASSERT(ip->i_nblocks == 0);
+
+ ip->i_extsize = 0;
+ ip->i_diflags = 0;
+
+ if (xfs_has_v3inodes(mp)) {
+ inode_set_iversion(inode, 1);
+ ip->i_cowextsize = 0;
+ times |= XFS_ICHGTIME_CREATE;
+ }
+
+ xfs_trans_ichgtime(tp, ip, times);
+
+ flags = XFS_ILOG_CORE;
+ switch (args->mode & S_IFMT) {
+ case S_IFIFO:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFSOCK:
+ ip->i_df.if_format = XFS_DINODE_FMT_DEV;
+ flags |= XFS_ILOG_DEV;
+ break;
+ case S_IFREG:
+ case S_IFDIR:
+ if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
+ xfs_inode_inherit_flags(ip, pip);
+ if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
+ xfs_inode_inherit_flags2(ip, pip);
+ fallthrough;
+ case S_IFLNK:
+ ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
+ ip->i_df.if_bytes = 0;
+ ip->i_df.if_data = NULL;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ if (xfs_icreate_want_attrfork(mp, args)) {
+ ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
+ xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
+
+ if (!xfs_has_attr(mp)) {
+ spin_lock(&mp->m_sb_lock);
+ xfs_add_attr(mp);
+ spin_unlock(&mp->m_sb_lock);
+ xfs_log_sb(tp);
+ }
+ }
+
+ xfs_trans_log_inode(tp, ip, flags);
+}
+
+/*
+ * In-Core Unlinked List Lookups
+ * =============================
+ *
+ * Every inode is supposed to be reachable from some other piece of metadata
+ * with the exception of the root directory. Inodes with a connection to a
+ * file descriptor but not linked from anywhere in the on-disk directory tree
+ * are collectively known as unlinked inodes, though the filesystem itself
+ * maintains links to these inodes so that on-disk metadata are consistent.
+ *
+ * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
+ * header contains a number of buckets that point to an inode, and each inode
+ * record has a pointer to the next inode in the hash chain. This
+ * singly-linked list causes scaling problems in the iunlink remove function
+ * because we must walk that list to find the inode that points to the inode
+ * being removed from the unlinked hash bucket list.
+ *
+ * Hence we keep an in-memory double linked list to link each inode on an
+ * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer
+ * based lists would require having 64 list heads in the perag, one for each
+ * list. This is expensive in terms of memory (think millions of AGs) and cache
+ * misses on lookups. Instead, use the fact that inodes on the unlinked list
+ * must be referenced at the VFS level to keep them on the list and hence we
+ * have an existence guarantee for inodes on the unlinked list.
+ *
+ * Given we have an existence guarantee, we can use lockless inode cache lookups
+ * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode
+ * for the double linked unlinked list, and we don't need any extra locking to
+ * keep the list safe as all manipulations are done under the AGI buffer lock.
+ * Keeping the list up to date does not require memory allocation, just finding
+ * the XFS inode and updating the next/prev unlinked list aginos.
+ */
+
+/*
+ * Update the prev pointer of the next agino. Returns -ENOLINK if the inode
+ * is not in cache.
+ */
+static int
+xfs_iunlink_update_backref(
+ struct xfs_perag *pag,
+ xfs_agino_t prev_agino,
+ xfs_agino_t next_agino)
+{
+ struct xfs_inode *ip;
+
+ /* No update necessary if we are at the end of the list. */
+ if (next_agino == NULLAGINO)
+ return 0;
+
+ ip = xfs_iunlink_lookup(pag, next_agino);
+ if (!ip)
+ return -ENOLINK;
+
+ ip->i_prev_unlinked = prev_agino;
+ return 0;
+}
+
+/*
+ * Point the AGI unlinked bucket at an inode and log the results. The caller
+ * is responsible for validating the old value.
+ */
+STATIC int
+xfs_iunlink_update_bucket(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_buf *agibp,
+ unsigned int bucket_index,
+ xfs_agino_t new_agino)
+{
+ struct xfs_agi *agi = agibp->b_addr;
+ xfs_agino_t old_value;
+ int offset;
+
+ ASSERT(xfs_verify_agino_or_null(pag, new_agino));
+
+ old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+ trace_xfs_iunlink_update_bucket(pag, bucket_index, old_value,
+ new_agino);
+
+ /*
+ * We should never find the head of the list already set to the value
+ * passed in because either we're adding or removing ourselves from the
+ * head of the list.
+ */
+ if (old_value == new_agino) {
+ xfs_buf_mark_corrupt(agibp);
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ return -EFSCORRUPTED;
+ }
+
+ agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
+ offset = offsetof(struct xfs_agi, agi_unlinked) +
+ (sizeof(xfs_agino_t) * bucket_index);
+ xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
+ return 0;
+}
+
+static int
+xfs_iunlink_insert_inode(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_buf *agibp,
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agi *agi = agibp->b_addr;
+ xfs_agino_t next_agino;
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
+ short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
+ int error;
+
+ /*
+ * Get the index into the agi hash table for the list this inode will
+ * go on. Make sure the pointer isn't garbage and that this inode
+ * isn't already on the list.
+ */
+ next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+ if (next_agino == agino ||
+ !xfs_verify_agino_or_null(pag, next_agino)) {
+ xfs_buf_mark_corrupt(agibp);
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Update the prev pointer in the next inode to point back to this
+ * inode.
+ */
+ error = xfs_iunlink_update_backref(pag, agino, next_agino);
+ if (error == -ENOLINK)
+ error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);
+ if (error)
+ return error;
+
+ if (next_agino != NULLAGINO) {
+ /*
+ * There is already another inode in the bucket, so point this
+ * inode to the current head of the list.
+ */
+ error = xfs_iunlink_log_inode(tp, ip, pag, next_agino);
+ if (error)
+ return error;
+ ip->i_next_unlinked = next_agino;
+ }
+
+ /* Point the head of the list to point to this inode. */
+ ip->i_prev_unlinked = NULLAGINO;
+ return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
+}
+
+/*
+ * This is called when the inode's link count has gone to 0 or we are creating
+ * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
+ *
+ * We place the on-disk inode on a list in the AGI. It will be pulled from this
+ * list when the inode is freed.
+ */
+int
+xfs_iunlink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_perag *pag;
+ struct xfs_buf *agibp;
+ int error;
+
+ ASSERT(VFS_I(ip)->i_nlink == 0);
+ ASSERT(VFS_I(ip)->i_mode != 0);
+ trace_xfs_iunlink(ip);
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+
+ /* Get the agi buffer first. It ensures lock ordering on the list. */
+ error = xfs_read_agi(pag, tp, 0, &agibp);
+ if (error)
+ goto out;
+
+ error = xfs_iunlink_insert_inode(tp, pag, agibp, ip);
+out:
+ xfs_perag_put(pag);
+ return error;
+}
+
+static int
+xfs_iunlink_remove_inode(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_buf *agibp,
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agi *agi = agibp->b_addr;
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
+ xfs_agino_t head_agino;
+ short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
+ int error;
+
+ trace_xfs_iunlink_remove(ip);
+
+ /*
+ * Get the index into the agi hash table for the list this inode will
+ * go on. Make sure the head pointer isn't garbage.
+ */
+ head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+ if (!xfs_verify_agino(pag, head_agino)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ agi, sizeof(*agi));
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Set our inode's next_unlinked pointer to NULL and then return
+ * the old pointer value so that we can update whatever was previous
+ * to us in the list to point to whatever was next in the list.
+ */
+ error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO);
+ if (error)
+ return error;
+
+ /*
+ * Update the prev pointer in the next inode to point back to previous
+ * inode in the chain.
+ */
+ error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
+ ip->i_next_unlinked);
+ if (error == -ENOLINK)
+ error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked,
+ ip->i_next_unlinked);
+ if (error)
+ return error;
+
+ if (head_agino != agino) {
+ struct xfs_inode *prev_ip;
+
+ prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked);
+ if (!prev_ip) {
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
+ return -EFSCORRUPTED;
+ }
+
+ error = xfs_iunlink_log_inode(tp, prev_ip, pag,
+ ip->i_next_unlinked);
+ prev_ip->i_next_unlinked = ip->i_next_unlinked;
+ } else {
+ /* Point the head of the list to the next unlinked inode. */
+ error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
+ ip->i_next_unlinked);
+ }
+
+ ip->i_next_unlinked = NULLAGINO;
+ ip->i_prev_unlinked = 0;
+ return error;
+}
+
+/*
+ * Pull the on-disk inode from the AGI unlinked list.
+ */
+int
+xfs_iunlink_remove(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_inode *ip)
+{
+ struct xfs_buf *agibp;
+ int error;
+
+ trace_xfs_iunlink_remove(ip);
+
+ /* Get the agi buffer first. It ensures lock ordering on the list. */
+ error = xfs_read_agi(pag, tp, 0, &agibp);
+ if (error)
+ return error;
+
+ return xfs_iunlink_remove_inode(tp, pag, agibp, ip);
+}
+
+/*
+ * Decrement the link count on an inode & log the change. If this causes the
+ * link count to go to zero, move the inode to AGI unlinked list so that it can
+ * be freed when the last active reference goes away via xfs_inactive().
+ */
+int
+xfs_droplink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+
+ if (inode->i_nlink == 0) {
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count dropped below zero. Pinning link count.",
+ ip->i_ino);
+ set_nlink(inode, XFS_NLINK_PINNED);
+ }
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ drop_nlink(inode);
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ if (inode->i_nlink)
+ return 0;
+
+ return xfs_iunlink(tp, ip);
+}
+
+/*
+ * Increment the link count on an inode & log the change.
+ */
+void
+xfs_bumplink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+
+ if (inode->i_nlink == XFS_NLINK_PINNED - 1)
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count exceeded maximum. Pinning link count.",
+ ip->i_ino);
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ inc_nlink(inode);
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Free an inode in the ondisk index and zero it out. */
+int
+xfs_inode_uninit(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ struct xfs_inode *ip,
+ struct xfs_icluster *xic)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ int error;
+
+ /*
+ * Free the inode first so that we guarantee that the AGI lock is going
+ * to be taken before we remove the inode from the unlinked list. This
+ * makes the AGI lock -> unlinked list modification order the same as
+ * used in O_TMPFILE creation.
+ */
+ error = xfs_difree(tp, pag, ip->i_ino, xic);
+ if (error)
+ return error;
+
+ error = xfs_iunlink_remove(tp, pag, ip);
+ if (error)
+ return error;
+
+ /*
+ * Free any local-format data sitting around before we reset the
+ * data fork to extents format. Note that the attr fork data has
+ * already been freed by xfs_attr_inactive.
+ */
+ if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
+ kfree(ip->i_df.if_data);
+ ip->i_df.if_data = NULL;
+ ip->i_df.if_bytes = 0;
+ }
+
+ VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
+ ip->i_diflags = 0;
+ ip->i_diflags2 = mp->m_ino_geo.new_diflags2;
+ ip->i_forkoff = 0; /* mark the attr fork not in use */
+ ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
+
+ /*
+ * Bump the generation count so no one will be confused
+ * by reincarnations of this inode.
+ */
+ VFS_I(ip)->i_generation++;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_inode_util.h b/fs/xfs/libxfs/xfs_inode_util.h
new file mode 100644
index 000000000000..060242998a23
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_inode_util.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_INODE_UTIL_H__
+#define __XFS_INODE_UTIL_H__
+
+struct xfs_icluster;
+
+uint16_t xfs_flags2diflags(struct xfs_inode *ip, unsigned int xflags);
+uint64_t xfs_flags2diflags2(struct xfs_inode *ip, unsigned int xflags);
+uint32_t xfs_dic2xflags(struct xfs_inode *ip);
+uint32_t xfs_ip2xflags(struct xfs_inode *ip);
+
+prid_t xfs_get_initial_prid(struct xfs_inode *dp);
+
+/*
+ * File creation context.
+ *
+ * Due to our only partial reliance on the VFS to propagate uid and gid values
+ * according to accepted Unix behaviors, callers must initialize idmap to the
+ * correct idmapping structure to get the correct inheritance behaviors when
+ * XFS_MOUNT_GRPID is set.
+ *
+ * To create files detached from the directory tree (e.g. quota inodes), set
+ * idmap to NULL. To create a tree root, set pip to NULL.
+ */
+struct xfs_icreate_args {
+ struct mnt_idmap *idmap;
+ struct xfs_inode *pip; /* parent inode or null */
+ dev_t rdev;
+ umode_t mode;
+
+#define XFS_ICREATE_TMPFILE (1U << 0) /* create an unlinked file */
+#define XFS_ICREATE_INIT_XATTRS (1U << 1) /* will set xattrs immediately */
+#define XFS_ICREATE_UNLINKABLE (1U << 2) /* cannot link into dir tree */
+ uint16_t flags;
+};
+
+/*
+ * Flags for xfs_trans_ichgtime().
+ */
+#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
+#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
+#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
+#define XFS_ICHGTIME_ACCESS 0x8 /* last access timestamp */
+void xfs_trans_ichgtime(struct xfs_trans *tp, struct xfs_inode *ip, int flags);
+
+void xfs_inode_init(struct xfs_trans *tp, const struct xfs_icreate_args *args,
+ struct xfs_inode *ip);
+
+int xfs_inode_uninit(struct xfs_trans *tp, struct xfs_perag *pag,
+ struct xfs_inode *ip, struct xfs_icluster *xic);
+
+int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip);
+int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
+ struct xfs_inode *ip);
+int xfs_droplink(struct xfs_trans *tp, struct xfs_inode *ip);
+void xfs_bumplink(struct xfs_trans *tp, struct xfs_inode *ip);
+
+#endif /* __XFS_INODE_UTIL_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 16872972e1e9..15dec19b6c32 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -115,10 +115,13 @@ struct xfs_unmount_log_format {
#define XLOG_REG_TYPE_BUD_FORMAT 26
#define XLOG_REG_TYPE_ATTRI_FORMAT 27
#define XLOG_REG_TYPE_ATTRD_FORMAT 28
-#define XLOG_REG_TYPE_ATTR_NAME 29
+#define XLOG_REG_TYPE_ATTR_NAME 29
#define XLOG_REG_TYPE_ATTR_VALUE 30
-#define XLOG_REG_TYPE_MAX 30
-
+#define XLOG_REG_TYPE_XMI_FORMAT 31
+#define XLOG_REG_TYPE_XMD_FORMAT 32
+#define XLOG_REG_TYPE_ATTR_NEWNAME 33
+#define XLOG_REG_TYPE_ATTR_NEWVALUE 34
+#define XLOG_REG_TYPE_MAX 34
/*
* Flags to log operation header
@@ -243,6 +246,10 @@ typedef struct xfs_trans_header {
#define XFS_LI_BUD 0x1245
#define XFS_LI_ATTRI 0x1246 /* attr set/remove intent*/
#define XFS_LI_ATTRD 0x1247 /* attr set/remove done */
+#define XFS_LI_XMI 0x1248 /* mapping exchange intent */
+#define XFS_LI_XMD 0x1249 /* mapping exchange done */
+#define XFS_LI_EFI_RT 0x124a /* realtime extent free intent */
+#define XFS_LI_EFD_RT 0x124b /* realtime extent free done */
#define XFS_LI_TYPE_DESC \
{ XFS_LI_EFI, "XFS_LI_EFI" }, \
@@ -260,7 +267,11 @@ typedef struct xfs_trans_header {
{ XFS_LI_BUI, "XFS_LI_BUI" }, \
{ XFS_LI_BUD, "XFS_LI_BUD" }, \
{ XFS_LI_ATTRI, "XFS_LI_ATTRI" }, \
- { XFS_LI_ATTRD, "XFS_LI_ATTRD" }
+ { XFS_LI_ATTRD, "XFS_LI_ATTRD" }, \
+ { XFS_LI_XMI, "XFS_LI_XMI" }, \
+ { XFS_LI_XMD, "XFS_LI_XMD" }, \
+ { XFS_LI_EFI_RT, "XFS_LI_EFI_RT" }, \
+ { XFS_LI_EFD_RT, "XFS_LI_EFD_RT" }
/*
* Inode Log Item Format definitions.
@@ -397,7 +408,7 @@ struct xfs_log_dinode {
uint16_t di_mode; /* mode and type of file */
int8_t di_version; /* inode version */
int8_t di_format; /* format of di_c data */
- uint8_t di_pad3[2]; /* unused in v2/3 inodes */
+ uint16_t di_metatype; /* metadata type, if DIFLAG2_METADATA */
uint32_t di_uid; /* owner's user id */
uint32_t di_gid; /* owner's group id */
uint32_t di_nlink; /* number of links to file */
@@ -879,6 +890,61 @@ struct xfs_bud_log_format {
};
/*
+ * XMI/XMD (file mapping exchange) log format definitions
+ */
+
+/* This is the structure used to lay out an mapping exchange log item. */
+struct xfs_xmi_log_format {
+ uint16_t xmi_type; /* xmi log item type */
+ uint16_t xmi_size; /* size of this item */
+ uint32_t __pad; /* must be zero */
+ uint64_t xmi_id; /* xmi identifier */
+
+ uint64_t xmi_inode1; /* inumber of first file */
+ uint64_t xmi_inode2; /* inumber of second file */
+ uint32_t xmi_igen1; /* generation of first file */
+ uint32_t xmi_igen2; /* generation of second file */
+ uint64_t xmi_startoff1; /* block offset into file1 */
+ uint64_t xmi_startoff2; /* block offset into file2 */
+ uint64_t xmi_blockcount; /* number of blocks */
+ uint64_t xmi_flags; /* XFS_EXCHMAPS_* */
+ uint64_t xmi_isize1; /* intended file1 size */
+ uint64_t xmi_isize2; /* intended file2 size */
+};
+
+/* Exchange mappings between extended attribute forks instead of data forks. */
+#define XFS_EXCHMAPS_ATTR_FORK (1ULL << 0)
+
+/* Set the file sizes when finished. */
+#define XFS_EXCHMAPS_SET_SIZES (1ULL << 1)
+
+/*
+ * Exchange the mappings of the two files only if the file allocation units
+ * mapped to file1's range have been written.
+ */
+#define XFS_EXCHMAPS_INO1_WRITTEN (1ULL << 2)
+
+/* Clear the reflink flag from inode1 after the operation. */
+#define XFS_EXCHMAPS_CLEAR_INO1_REFLINK (1ULL << 3)
+
+/* Clear the reflink flag from inode2 after the operation. */
+#define XFS_EXCHMAPS_CLEAR_INO2_REFLINK (1ULL << 4)
+
+#define XFS_EXCHMAPS_LOGGED_FLAGS (XFS_EXCHMAPS_ATTR_FORK | \
+ XFS_EXCHMAPS_SET_SIZES | \
+ XFS_EXCHMAPS_INO1_WRITTEN | \
+ XFS_EXCHMAPS_CLEAR_INO1_REFLINK | \
+ XFS_EXCHMAPS_CLEAR_INO2_REFLINK)
+
+/* This is the structure used to lay out an mapping exchange done log item. */
+struct xfs_xmd_log_format {
+ uint16_t xmd_type; /* xmd log item type */
+ uint16_t xmd_size; /* size of this item */
+ uint32_t __pad;
+ uint64_t xmd_xmi_id; /* id of corresponding xmi */
+};
+
+/*
* Dquot Log format definitions.
*
* The first two fields must be the type and size fitting into
@@ -966,6 +1032,9 @@ struct xfs_icreate_log {
#define XFS_ATTRI_OP_FLAGS_SET 1 /* Set the attribute */
#define XFS_ATTRI_OP_FLAGS_REMOVE 2 /* Remove the attribute */
#define XFS_ATTRI_OP_FLAGS_REPLACE 3 /* Replace the attribute */
+#define XFS_ATTRI_OP_FLAGS_PPTR_SET 4 /* Set parent pointer */
+#define XFS_ATTRI_OP_FLAGS_PPTR_REMOVE 5 /* Remove parent pointer */
+#define XFS_ATTRI_OP_FLAGS_PPTR_REPLACE 6 /* Replace parent pointer */
#define XFS_ATTRI_OP_FLAGS_TYPE_MASK 0xFF /* Flags type mask */
/*
@@ -974,6 +1043,7 @@ struct xfs_icreate_log {
*/
#define XFS_ATTRI_FILTER_MASK (XFS_ATTR_ROOT | \
XFS_ATTR_SECURE | \
+ XFS_ATTR_PARENT | \
XFS_ATTR_INCOMPLETE)
/*
@@ -983,11 +1053,22 @@ struct xfs_icreate_log {
struct xfs_attri_log_format {
uint16_t alfi_type; /* attri log item type */
uint16_t alfi_size; /* size of this item */
- uint32_t __pad; /* pad to 64 bit aligned */
+ uint32_t alfi_igen; /* generation of alfi_ino for pptr ops */
uint64_t alfi_id; /* attri identifier */
uint64_t alfi_ino; /* the inode for this attr operation */
uint32_t alfi_op_flags; /* marks the op as a set or remove */
- uint32_t alfi_name_len; /* attr name length */
+ union {
+ uint32_t alfi_name_len; /* attr name length */
+ struct {
+ /*
+ * For PPTR_REPLACE, these are the lengths of the old
+ * and new attr names. The new and old values must
+ * have the same length.
+ */
+ uint16_t alfi_old_name_len;
+ uint16_t alfi_new_name_len;
+ };
+ };
uint32_t alfi_value_len; /* attr value length */
uint32_t alfi_attr_filter;/* attr filter flags */
};
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 9fe7a9564bca..5397a8ff004d 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -75,6 +75,10 @@ extern const struct xlog_recover_item_ops xlog_cui_item_ops;
extern const struct xlog_recover_item_ops xlog_cud_item_ops;
extern const struct xlog_recover_item_ops xlog_attri_item_ops;
extern const struct xlog_recover_item_ops xlog_attrd_item_ops;
+extern const struct xlog_recover_item_ops xlog_xmi_item_ops;
+extern const struct xlog_recover_item_ops xlog_xmd_item_ops;
+extern const struct xlog_recover_item_ops xlog_rtefi_item_ops;
+extern const struct xlog_recover_item_ops xlog_rtefd_item_ops;
/*
* Macros, structures, prototypes for internal log manager use.
@@ -121,6 +125,8 @@ bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
int xlog_recover_iget(struct xfs_mount *mp, xfs_ino_t ino,
struct xfs_inode **ipp);
+int xlog_recover_iget_handle(struct xfs_mount *mp, xfs_ino_t ino, uint32_t gen,
+ struct xfs_inode **ipp);
void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
uint64_t intent_id);
int xlog_alloc_buf_cancel_table(struct xlog *log);
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index 9975b93a7412..d3bd6a86c8fe 100644
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -17,6 +17,34 @@
#include "xfs_trace.h"
/*
+ * Shortly after enabling the large extents count feature in 2023, longstanding
+ * bugs were found in the code that computes the minimum log size. Luckily,
+ * the bugs resulted in over-estimates of that size, so there's no impact to
+ * existing users. However, we don't want to reduce the minimum log size
+ * because that can create the situation where a newer mkfs writes a new
+ * filesystem that an older kernel won't mount.
+ *
+ * Several years prior, we also discovered that the transaction reservations
+ * for rmap and reflink operations were unnecessarily large. That was fixed,
+ * but the minimum log size computation was left alone to avoid the
+ * compatibility problems noted above. Fix that too.
+ *
+ * Therefore, we only may correct the computation starting with filesystem
+ * features that didn't exist in 2023. In other words, only turn this on if
+ * the filesystem has parent pointers.
+ *
+ * This function can be called before the XFS_HAS_* flags have been set up,
+ * (e.g. mkfs) so we must check the ondisk superblock.
+ */
+static inline bool
+xfs_want_minlogsize_fixes(
+ struct xfs_sb *sb)
+{
+ return xfs_sb_is_v5(sb) &&
+ xfs_sb_has_incompat_feature(sb, XFS_SB_FEAT_INCOMPAT_PARENT);
+}
+
+/*
* Calculate the maximum length in bytes that would be required for a local
* attribute value as large attributes out of line are not logged.
*/
@@ -31,6 +59,15 @@ xfs_log_calc_max_attrsetm_res(
MAXNAMELEN - 1;
nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
nblks += XFS_B_TO_FSB(mp, size);
+
+ /*
+ * If the feature set is new enough, correct a unit conversion error in
+ * the xattr transaction reservation code that resulted in oversized
+ * minimum log size computations.
+ */
+ if (xfs_want_minlogsize_fixes(&mp->m_sb))
+ size = XFS_B_TO_FSB(mp, size);
+
nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK);
return M_RES(mp)->tr_attrsetm.tr_logres +
@@ -49,6 +86,15 @@ xfs_log_calc_trans_resv_for_minlogblocks(
unsigned int rmap_maxlevels = mp->m_rmap_maxlevels;
/*
+ * If the feature set is new enough, drop the oversized minimum log
+ * size computation introduced by the original reflink code.
+ */
+ if (xfs_want_minlogsize_fixes(&mp->m_sb)) {
+ xfs_trans_resv_calc(mp, resv);
+ return;
+ }
+
+ /*
* In the early days of rmap+reflink, we always set the rmap maxlevels
* to 9 even if the AG was small enough that it would never grow to
* that height. Transaction reservation sizes influence the minimum
diff --git a/fs/xfs/libxfs/xfs_metadir.c b/fs/xfs/libxfs/xfs_metadir.c
new file mode 100644
index 000000000000..bae7377c0f22
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metadir.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_metafile.h"
+#include "xfs_metadir.h"
+#include "xfs_trace.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_ialloc.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_parent.h"
+#include "xfs_health.h"
+
+/*
+ * Metadata Directory Tree
+ * =======================
+ *
+ * These functions provide an abstraction layer for looking up, creating, and
+ * deleting metadata inodes that live within a special metadata directory tree.
+ *
+ * This code does not manage the five existing metadata inodes: real time
+ * bitmap & summary; and the user, group, and quotas. All other metadata
+ * inodes must use only the xfs_meta{dir,file}_* functions.
+ *
+ * Callers wishing to create or hardlink a metadata inode must create an
+ * xfs_metadir_update structure, call the appropriate xfs_metadir* function,
+ * and then call xfs_metadir_commit or xfs_metadir_cancel to commit or cancel
+ * the update. Files in the metadata directory tree currently cannot be
+ * unlinked.
+ *
+ * When the metadir feature is enabled, all metadata inodes must have the
+ * "metadata" inode flag set to prevent them from being exposed to the outside
+ * world.
+ *
+ * Callers must take the ILOCK of any inode in the metadata directory tree to
+ * synchronize access to that inode. It is never necessary to take the IOLOCK
+ * or the MMAPLOCK since metadata inodes must not be exposed to user space.
+ */
+
+static inline void
+xfs_metadir_set_xname(
+ struct xfs_name *xname,
+ const char *path,
+ unsigned char ftype)
+{
+ xname->name = (const unsigned char *)path;
+ xname->len = strlen(path);
+ xname->type = ftype;
+}
+
+/*
+ * Given a parent directory @dp and a metadata inode path component @xname,
+ * Look up the inode number in the directory, returning it in @ino.
+ * @xname.type must match the directory entry's ftype.
+ *
+ * Caller must hold ILOCK_EXCL.
+ */
+static inline int
+xfs_metadir_lookup(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ struct xfs_name *xname,
+ xfs_ino_t *ino)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_args args = {
+ .trans = tp,
+ .dp = dp,
+ .geo = mp->m_dir_geo,
+ .name = xname->name,
+ .namelen = xname->len,
+ .hashval = xfs_dir2_hashname(mp, xname),
+ .whichfork = XFS_DATA_FORK,
+ .op_flags = XFS_DA_OP_OKNOENT,
+ .owner = dp->i_ino,
+ };
+ int error;
+
+ if (!S_ISDIR(VFS_I(dp)->i_mode)) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+
+ error = xfs_dir_lookup_args(&args);
+ if (error)
+ return error;
+
+ if (!xfs_verify_ino(mp, args.inumber)) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+ if (xname->type != XFS_DIR3_FT_UNKNOWN && xname->type != args.filetype) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ trace_xfs_metadir_lookup(dp, xname, args.inumber);
+ *ino = args.inumber;
+ return 0;
+}
+
+/*
+ * Look up and read a metadata inode from the metadata directory. If the path
+ * component doesn't exist, return -ENOENT.
+ */
+int
+xfs_metadir_load(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ const char *path,
+ enum xfs_metafile_type metafile_type,
+ struct xfs_inode **ipp)
+{
+ struct xfs_name xname;
+ xfs_ino_t ino;
+ int error;
+
+ xfs_metadir_set_xname(&xname, path, XFS_DIR3_FT_UNKNOWN);
+
+ xfs_ilock(dp, XFS_ILOCK_EXCL);
+ error = xfs_metadir_lookup(tp, dp, &xname, &ino);
+ xfs_iunlock(dp, XFS_ILOCK_EXCL);
+ if (error)
+ return error;
+ return xfs_trans_metafile_iget(tp, ino, metafile_type, ipp);
+}
+
+/*
+ * Unlock and release resources after committing (or cancelling) a metadata
+ * directory tree operation. The caller retains its reference to @upd->ip
+ * and must release it explicitly.
+ */
+static inline void
+xfs_metadir_teardown(
+ struct xfs_metadir_update *upd,
+ int error)
+{
+ trace_xfs_metadir_teardown(upd, error);
+
+ if (upd->ppargs) {
+ xfs_parent_finish(upd->dp->i_mount, upd->ppargs);
+ upd->ppargs = NULL;
+ }
+
+ if (upd->ip) {
+ if (upd->ip_locked)
+ xfs_iunlock(upd->ip, XFS_ILOCK_EXCL);
+ upd->ip_locked = false;
+ }
+
+ if (upd->dp_locked)
+ xfs_iunlock(upd->dp, XFS_ILOCK_EXCL);
+ upd->dp_locked = false;
+}
+
+/*
+ * Begin the process of creating a metadata file by allocating transactions
+ * and taking whatever resources we're going to need.
+ */
+int
+xfs_metadir_start_create(
+ struct xfs_metadir_update *upd)
+{
+ struct xfs_mount *mp = upd->dp->i_mount;
+ int error;
+
+ ASSERT(upd->dp != NULL);
+ ASSERT(upd->ip == NULL);
+ ASSERT(xfs_has_metadir(mp));
+ ASSERT(upd->metafile_type != XFS_METAFILE_UNKNOWN);
+
+ error = xfs_parent_start(mp, &upd->ppargs);
+ if (error)
+ return error;
+
+ /*
+ * If we ever need the ability to create rt metadata files on a
+ * pre-metadir filesystem, we'll need to dqattach the parent here.
+ * Currently we assume that mkfs will create the files and quotacheck
+ * will account for them.
+ */
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create,
+ xfs_create_space_res(mp, MAXNAMELEN), 0, 0, &upd->tp);
+ if (error)
+ goto out_teardown;
+
+ /*
+ * Lock the parent directory if there is one. We can't ijoin it to
+ * the transaction until after the child file has been created.
+ */
+ xfs_ilock(upd->dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
+ upd->dp_locked = true;
+
+ trace_xfs_metadir_start_create(upd);
+ return 0;
+out_teardown:
+ xfs_metadir_teardown(upd, error);
+ return error;
+}
+
+/*
+ * Create a metadata inode with the given @mode, and insert it into the
+ * metadata directory tree at the given @upd->path. The path up to the final
+ * component must already exist. The final path component must not exist.
+ *
+ * The new metadata inode will be attached to the update structure @upd->ip,
+ * with the ILOCK held until the caller releases it.
+ *
+ * NOTE: This function may return a new inode to the caller even if it returns
+ * a negative error code. If an inode is passed back, the caller must finish
+ * setting up the inode before releasing it.
+ */
+int
+xfs_metadir_create(
+ struct xfs_metadir_update *upd,
+ umode_t mode)
+{
+ struct xfs_icreate_args args = {
+ .pip = upd->dp,
+ .mode = mode,
+ };
+ struct xfs_name xname;
+ struct xfs_dir_update du = {
+ .dp = upd->dp,
+ .name = &xname,
+ .ppargs = upd->ppargs,
+ };
+ struct xfs_mount *mp = upd->dp->i_mount;
+ xfs_ino_t ino;
+ unsigned int resblks;
+ int error;
+
+ xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL);
+
+ /* Check that the name does not already exist in the directory. */
+ xfs_metadir_set_xname(&xname, upd->path, XFS_DIR3_FT_UNKNOWN);
+ error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino);
+ switch (error) {
+ case -ENOENT:
+ break;
+ case 0:
+ error = -EEXIST;
+ fallthrough;
+ default:
+ return error;
+ }
+
+ /*
+ * A newly created regular or special file just has one directory
+ * entry pointing to them, but a directory also the "." entry
+ * pointing to itself.
+ */
+ error = xfs_dialloc(&upd->tp, &args, &ino);
+ if (error)
+ return error;
+ error = xfs_icreate(upd->tp, ino, &args, &upd->ip);
+ if (error)
+ return error;
+ du.ip = upd->ip;
+ xfs_metafile_set_iflag(upd->tp, upd->ip, upd->metafile_type);
+ upd->ip_locked = true;
+
+ /*
+ * Join the directory inode to the transaction. We do not do it
+ * earlier because xfs_dialloc rolls the transaction.
+ */
+ xfs_trans_ijoin(upd->tp, upd->dp, 0);
+
+ /* Create the entry. */
+ if (S_ISDIR(args.mode))
+ resblks = xfs_mkdir_space_res(mp, xname.len);
+ else
+ resblks = xfs_create_space_res(mp, xname.len);
+ xname.type = xfs_mode_to_ftype(args.mode);
+
+ trace_xfs_metadir_try_create(upd);
+
+ error = xfs_dir_create_child(upd->tp, resblks, &du);
+ if (error)
+ return error;
+
+ /* Metadir files are not accounted to quota. */
+
+ trace_xfs_metadir_create(upd);
+
+ return 0;
+}
+
+#ifndef __KERNEL__
+/*
+ * Begin the process of linking a metadata file by allocating transactions
+ * and locking whatever resources we're going to need.
+ */
+int
+xfs_metadir_start_link(
+ struct xfs_metadir_update *upd)
+{
+ struct xfs_mount *mp = upd->dp->i_mount;
+ unsigned int resblks;
+ int nospace_error = 0;
+ int error;
+
+ ASSERT(upd->dp != NULL);
+ ASSERT(upd->ip != NULL);
+ ASSERT(xfs_has_metadir(mp));
+
+ error = xfs_parent_start(mp, &upd->ppargs);
+ if (error)
+ return error;
+
+ resblks = xfs_link_space_res(mp, MAXNAMELEN);
+ error = xfs_trans_alloc_dir(upd->dp, &M_RES(mp)->tr_link, upd->ip,
+ &resblks, &upd->tp, &nospace_error);
+ if (error)
+ goto out_teardown;
+ if (!resblks) {
+ /* We don't allow reservationless updates. */
+ xfs_trans_cancel(upd->tp);
+ upd->tp = NULL;
+ xfs_iunlock(upd->dp, XFS_ILOCK_EXCL);
+ xfs_iunlock(upd->ip, XFS_ILOCK_EXCL);
+ error = nospace_error;
+ goto out_teardown;
+ }
+
+ upd->dp_locked = true;
+ upd->ip_locked = true;
+
+ trace_xfs_metadir_start_link(upd);
+ return 0;
+out_teardown:
+ xfs_metadir_teardown(upd, error);
+ return error;
+}
+
+/*
+ * Link the metadata directory given by @path to the inode @upd->ip.
+ * The path (up to the final component) must already exist, but the final
+ * component must not already exist.
+ */
+int
+xfs_metadir_link(
+ struct xfs_metadir_update *upd)
+{
+ struct xfs_name xname;
+ struct xfs_dir_update du = {
+ .dp = upd->dp,
+ .name = &xname,
+ .ip = upd->ip,
+ .ppargs = upd->ppargs,
+ };
+ struct xfs_mount *mp = upd->dp->i_mount;
+ xfs_ino_t ino;
+ unsigned int resblks;
+ int error;
+
+ xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(upd->ip, XFS_ILOCK_EXCL);
+
+ /* Look up the name in the current directory. */
+ xfs_metadir_set_xname(&xname, upd->path,
+ xfs_mode_to_ftype(VFS_I(upd->ip)->i_mode));
+ error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino);
+ switch (error) {
+ case -ENOENT:
+ break;
+ case 0:
+ error = -EEXIST;
+ fallthrough;
+ default:
+ return error;
+ }
+
+ resblks = xfs_link_space_res(mp, xname.len);
+ error = xfs_dir_add_child(upd->tp, resblks, &du);
+ if (error)
+ return error;
+
+ trace_xfs_metadir_link(upd);
+
+ return 0;
+}
+#endif /* ! __KERNEL__ */
+
+/* Commit a metadir update and unlock/drop all resources. */
+int
+xfs_metadir_commit(
+ struct xfs_metadir_update *upd)
+{
+ int error;
+
+ trace_xfs_metadir_commit(upd);
+
+ error = xfs_trans_commit(upd->tp);
+ upd->tp = NULL;
+
+ xfs_metadir_teardown(upd, error);
+ return error;
+}
+
+/* Cancel a metadir update and unlock/drop all resources. */
+void
+xfs_metadir_cancel(
+ struct xfs_metadir_update *upd,
+ int error)
+{
+ trace_xfs_metadir_cancel(upd);
+
+ xfs_trans_cancel(upd->tp);
+ upd->tp = NULL;
+
+ xfs_metadir_teardown(upd, error);
+}
+
+/* Create a metadata for the last component of the path. */
+int
+xfs_metadir_mkdir(
+ struct xfs_inode *dp,
+ const char *path,
+ struct xfs_inode **ipp)
+{
+ struct xfs_metadir_update upd = {
+ .dp = dp,
+ .path = path,
+ .metafile_type = XFS_METAFILE_DIR,
+ };
+ int error;
+
+ if (xfs_is_shutdown(dp->i_mount))
+ return -EIO;
+
+ /* Allocate a transaction to create the last directory. */
+ error = xfs_metadir_start_create(&upd);
+ if (error)
+ return error;
+
+ /* Create the subdirectory and take our reference. */
+ error = xfs_metadir_create(&upd, S_IFDIR);
+ if (error)
+ goto out_cancel;
+
+ error = xfs_metadir_commit(&upd);
+ if (error)
+ goto out_irele;
+
+ xfs_finish_inode_setup(upd.ip);
+ *ipp = upd.ip;
+ return 0;
+
+out_cancel:
+ xfs_metadir_cancel(&upd, error);
+out_irele:
+ /* Have to finish setting up the inode to ensure it's deleted. */
+ if (upd.ip) {
+ xfs_finish_inode_setup(upd.ip);
+ xfs_irele(upd.ip);
+ }
+ return error;
+}
diff --git a/fs/xfs/libxfs/xfs_metadir.h b/fs/xfs/libxfs/xfs_metadir.h
new file mode 100644
index 000000000000..bfecac7d3d14
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metadir.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_METADIR_H__
+#define __XFS_METADIR_H__
+
+/* Cleanup widget for metadata inode creation and deletion. */
+struct xfs_metadir_update {
+ /* Parent directory */
+ struct xfs_inode *dp;
+
+ /* Path to metadata file */
+ const char *path;
+
+ /* Parent pointer update context */
+ struct xfs_parent_args *ppargs;
+
+ /* Child metadata file */
+ struct xfs_inode *ip;
+
+ struct xfs_trans *tp;
+
+ enum xfs_metafile_type metafile_type;
+
+ unsigned int dp_locked:1;
+ unsigned int ip_locked:1;
+};
+
+int xfs_metadir_load(struct xfs_trans *tp, struct xfs_inode *dp,
+ const char *path, enum xfs_metafile_type metafile_type,
+ struct xfs_inode **ipp);
+
+int xfs_metadir_start_create(struct xfs_metadir_update *upd);
+int xfs_metadir_create(struct xfs_metadir_update *upd, umode_t mode);
+
+int xfs_metadir_start_link(struct xfs_metadir_update *upd);
+int xfs_metadir_link(struct xfs_metadir_update *upd);
+
+int xfs_metadir_commit(struct xfs_metadir_update *upd);
+void xfs_metadir_cancel(struct xfs_metadir_update *upd, int error);
+
+int xfs_metadir_mkdir(struct xfs_inode *dp, const char *path,
+ struct xfs_inode **ipp);
+
+#endif /* __XFS_METADIR_H__ */
diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c
new file mode 100644
index 000000000000..adeb25d1a444
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metafile.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_metafile.h"
+#include "xfs_trace.h"
+#include "xfs_inode.h"
+
+/* Set up an inode to be recognized as a metadata directory inode. */
+void
+xfs_metafile_set_iflag(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ enum xfs_metafile_type metafile_type)
+{
+ VFS_I(ip)->i_mode &= ~0777;
+ VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
+ VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
+ if (S_ISDIR(VFS_I(ip)->i_mode))
+ ip->i_diflags |= XFS_METADIR_DIFLAGS;
+ else
+ ip->i_diflags |= XFS_METAFILE_DIFLAGS;
+ ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
+ ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
+ ip->i_metatype = metafile_type;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Clear the metadata directory inode flag. */
+void
+xfs_metafile_clear_iflag(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ ASSERT(xfs_is_metadir_inode(ip));
+ ASSERT(VFS_I(ip)->i_nlink == 0);
+
+ ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
diff --git a/fs/xfs/libxfs/xfs_metafile.h b/fs/xfs/libxfs/xfs_metafile.h
new file mode 100644
index 000000000000..acec400123db
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metafile.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_METAFILE_H__
+#define __XFS_METAFILE_H__
+
+/* All metadata files must have these flags set. */
+#define XFS_METAFILE_DIFLAGS (XFS_DIFLAG_IMMUTABLE | \
+ XFS_DIFLAG_SYNC | \
+ XFS_DIFLAG_NOATIME | \
+ XFS_DIFLAG_NODUMP | \
+ XFS_DIFLAG_NODEFRAG)
+
+/* All metadata directories must have these flags set. */
+#define XFS_METADIR_DIFLAGS (XFS_METAFILE_DIFLAGS | \
+ XFS_DIFLAG_NOSYMLINKS)
+
+void xfs_metafile_set_iflag(struct xfs_trans *tp, struct xfs_inode *ip,
+ enum xfs_metafile_type metafile_type);
+void xfs_metafile_clear_iflag(struct xfs_trans *tp, struct xfs_inode *ip);
+
+/* Code specific to kernel/userspace; must be provided externally. */
+
+int xfs_trans_metafile_iget(struct xfs_trans *tp, xfs_ino_t ino,
+ enum xfs_metafile_type metafile_type, struct xfs_inode **ipp);
+int xfs_metafile_iget(struct xfs_mount *mp, xfs_ino_t ino,
+ enum xfs_metafile_type metafile_type, struct xfs_inode **ipp);
+
+#endif /* __XFS_METAFILE_H__ */
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index 81885a6a028e..ad0dedf00f18 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -19,40 +19,46 @@
static_assert((value) == (expected), \
"XFS: value of " #value " is wrong, expected " #expected)
+#define XFS_CHECK_SB_OFFSET(field, offset) \
+ XFS_CHECK_OFFSET(struct xfs_dsb, field, offset); \
+ XFS_CHECK_OFFSET(struct xfs_sb, field, offset);
+
static inline void __init
xfs_check_ondisk_structs(void)
{
- /* ag/file structures */
+ /* file structures */
XFS_CHECK_STRUCT_SIZE(struct xfs_acl, 4);
XFS_CHECK_STRUCT_SIZE(struct xfs_acl_entry, 12);
- XFS_CHECK_STRUCT_SIZE(struct xfs_agf, 224);
- XFS_CHECK_STRUCT_SIZE(struct xfs_agfl, 36);
- XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 344);
XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_key, 8);
XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_rec, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_bmdr_block, 4);
- XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_shdr, 48);
- XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_lhdr, 64);
- XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block, 72);
XFS_CHECK_STRUCT_SIZE(struct xfs_dinode, 176);
XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot, 104);
XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk, 136);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 264);
XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56);
+ XFS_CHECK_STRUCT_SIZE(xfs_timestamp_t, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_timestamp, 8);
+
+ /* space btrees */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_agf, 224);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_agfl, 36);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 344);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_alloc_rec, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block, 72);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_lhdr, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_shdr, 48);
XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4);
XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_key, 4);
XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_rec, 12);
XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key, 20);
XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec, 24);
- XFS_CHECK_STRUCT_SIZE(xfs_timestamp_t, 8);
- XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_timestamp, 8);
XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8);
XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4);
- XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8);
XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4);
XFS_CHECK_STRUCT_SIZE(xfs_refcount_ptr_t, 4);
XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t, 4);
+ XFS_CHECK_STRUCT_SIZE(xfs_bmdr_key_t, 8);
/* dir/attr trees */
XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80);
@@ -67,32 +73,34 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free_hdr, 64);
XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf, 64);
XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf_hdr, 64);
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_entry_t, 8);
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_hdr_t, 32);
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_map_t, 4);
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_local_t, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_entry, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_hdr, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_map, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_name_local, 4);
/* realtime structures */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rtsb, 56);
XFS_CHECK_STRUCT_SIZE(union xfs_rtword_raw, 4);
XFS_CHECK_STRUCT_SIZE(union xfs_suminfo_raw, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rtbuf_blkinfo, 48);
/*
- * m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to
- * 4 bytes anyway so it's not obviously a problem. Hence for the moment
- * we don't check this structure. This can be re-instated when the attr
- * definitions are updated to use c99 VLA definitions.
+ * m68k has problems with struct xfs_attr_leaf_name_remote, but we pad
+ * it to 4 bytes anyway so it's not obviously a problem. Hence for the
+ * moment we don't check this structure. This can be re-instated when
+ * the attr definitions are updated to use c99 VLA definitions.
*
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t, 12);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_name_remote, 12);
*/
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, valuelen, 0);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, namelen, 2);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, nameval, 3);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valueblk, 0);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valuelen, 4);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, namelen, 8);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, name, 9);
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 32);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_local, valuelen, 0);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_local, namelen, 2);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_local, nameval, 3);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, valueblk, 0);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, valuelen, 4);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, namelen, 8);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, name, 9);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leafblock, 32);
XFS_CHECK_STRUCT_SIZE(struct xfs_attr_sf_hdr, 4);
XFS_CHECK_OFFSET(struct xfs_attr_sf_hdr, totsize, 0);
XFS_CHECK_OFFSET(struct xfs_attr_sf_hdr, count, 2);
@@ -100,25 +108,40 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, valuelen, 1);
XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, flags, 2);
XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, nameval, 3);
- XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t, 12);
- XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t, 8);
- XFS_CHECK_STRUCT_SIZE(xfs_da_node_hdr_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_free_t, 4);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_hdr_t, 16);
- XFS_CHECK_OFFSET(xfs_dir2_data_unused_t, freetag, 0);
- XFS_CHECK_OFFSET(xfs_dir2_data_unused_t, length, 2);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_hdr_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_entry_t, 8);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_hdr_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_tail_t, 4);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_entry_t, 3);
- XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, namelen, 0);
- XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, offset, 1);
- XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, name, 3);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t, 10);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_da_blkinfo, 12);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_da_intnode, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_entry, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16);
+ XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, freetag, 0);
+ XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, length, 2);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_entry, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_tail, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_entry, 3);
+ XFS_CHECK_OFFSET(struct xfs_dir2_sf_entry, namelen, 0);
+ XFS_CHECK_OFFSET(struct xfs_dir2_sf_entry, offset, 1);
+ XFS_CHECK_OFFSET(struct xfs_dir2_sf_entry, name, 3);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_hdr, 10);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_parent_rec, 12);
+
+ /* ondisk dir/attr structures from xfs/122 */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_sf_entry, 3);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_unused, 6);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_entry, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_tail, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_entry, 3);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_hdr, 10);
/* log structures */
XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88);
@@ -155,6 +178,16 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16);
XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16);
+ /* ondisk log structures from xfs/122 */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_unmount_log_format, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_xmd_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_xmi_log_format, 88);
+
+ /* parent pointer ioctls */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_rec, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_getparents, 40);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_by_handle, 64);
+
/*
* The v5 superblock format extended several v4 header structures with
* additional data. While new fields are only accessible on v5
@@ -194,6 +227,70 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MIN << XFS_DQ_BIGTIME_SHIFT, 4);
XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT,
16299260424LL);
+
+ /* superblock field checks we got from xfs/122 */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 288);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_sb, 288);
+ XFS_CHECK_SB_OFFSET(sb_magicnum, 0);
+ XFS_CHECK_SB_OFFSET(sb_blocksize, 4);
+ XFS_CHECK_SB_OFFSET(sb_dblocks, 8);
+ XFS_CHECK_SB_OFFSET(sb_rblocks, 16);
+ XFS_CHECK_SB_OFFSET(sb_rextents, 24);
+ XFS_CHECK_SB_OFFSET(sb_uuid, 32);
+ XFS_CHECK_SB_OFFSET(sb_logstart, 48);
+ XFS_CHECK_SB_OFFSET(sb_rootino, 56);
+ XFS_CHECK_SB_OFFSET(sb_rbmino, 64);
+ XFS_CHECK_SB_OFFSET(sb_rsumino, 72);
+ XFS_CHECK_SB_OFFSET(sb_rextsize, 80);
+ XFS_CHECK_SB_OFFSET(sb_agblocks, 84);
+ XFS_CHECK_SB_OFFSET(sb_agcount, 88);
+ XFS_CHECK_SB_OFFSET(sb_rbmblocks, 92);
+ XFS_CHECK_SB_OFFSET(sb_logblocks, 96);
+ XFS_CHECK_SB_OFFSET(sb_versionnum, 100);
+ XFS_CHECK_SB_OFFSET(sb_sectsize, 102);
+ XFS_CHECK_SB_OFFSET(sb_inodesize, 104);
+ XFS_CHECK_SB_OFFSET(sb_inopblock, 106);
+ XFS_CHECK_SB_OFFSET(sb_blocklog, 120);
+ XFS_CHECK_SB_OFFSET(sb_fname[12], 120);
+ XFS_CHECK_SB_OFFSET(sb_sectlog, 121);
+ XFS_CHECK_SB_OFFSET(sb_inodelog, 122);
+ XFS_CHECK_SB_OFFSET(sb_inopblog, 123);
+ XFS_CHECK_SB_OFFSET(sb_agblklog, 124);
+ XFS_CHECK_SB_OFFSET(sb_rextslog, 125);
+ XFS_CHECK_SB_OFFSET(sb_inprogress, 126);
+ XFS_CHECK_SB_OFFSET(sb_imax_pct, 127);
+ XFS_CHECK_SB_OFFSET(sb_icount, 128);
+ XFS_CHECK_SB_OFFSET(sb_ifree, 136);
+ XFS_CHECK_SB_OFFSET(sb_fdblocks, 144);
+ XFS_CHECK_SB_OFFSET(sb_frextents, 152);
+ XFS_CHECK_SB_OFFSET(sb_uquotino, 160);
+ XFS_CHECK_SB_OFFSET(sb_gquotino, 168);
+ XFS_CHECK_SB_OFFSET(sb_qflags, 176);
+ XFS_CHECK_SB_OFFSET(sb_flags, 178);
+ XFS_CHECK_SB_OFFSET(sb_shared_vn, 179);
+ XFS_CHECK_SB_OFFSET(sb_inoalignmt, 180);
+ XFS_CHECK_SB_OFFSET(sb_unit, 184);
+ XFS_CHECK_SB_OFFSET(sb_width, 188);
+ XFS_CHECK_SB_OFFSET(sb_dirblklog, 192);
+ XFS_CHECK_SB_OFFSET(sb_logsectlog, 193);
+ XFS_CHECK_SB_OFFSET(sb_logsectsize, 194);
+ XFS_CHECK_SB_OFFSET(sb_logsunit, 196);
+ XFS_CHECK_SB_OFFSET(sb_features2, 200);
+ XFS_CHECK_SB_OFFSET(sb_bad_features2, 204);
+ XFS_CHECK_SB_OFFSET(sb_features_compat, 208);
+ XFS_CHECK_SB_OFFSET(sb_features_ro_compat, 212);
+ XFS_CHECK_SB_OFFSET(sb_features_incompat, 216);
+ XFS_CHECK_SB_OFFSET(sb_features_log_incompat, 220);
+ XFS_CHECK_SB_OFFSET(sb_crc, 224);
+ XFS_CHECK_SB_OFFSET(sb_spino_align, 228);
+ XFS_CHECK_SB_OFFSET(sb_pquotino, 232);
+ XFS_CHECK_SB_OFFSET(sb_lsn, 240);
+ XFS_CHECK_SB_OFFSET(sb_meta_uuid, 248);
+ XFS_CHECK_SB_OFFSET(sb_metadirino, 264);
+ XFS_CHECK_SB_OFFSET(sb_rgcount, 272);
+ XFS_CHECK_SB_OFFSET(sb_rgextents, 276);
+ XFS_CHECK_SB_OFFSET(sb_rgblklog, 280);
+ XFS_CHECK_SB_OFFSET(sb_pad, 281);
}
#endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c
new file mode 100644
index 000000000000..69366c44a701
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_parent.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022-2024 Oracle.
+ * All rights reserved.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_da_format.h"
+#include "xfs_log_format.h"
+#include "xfs_shared.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_trans.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_attr_sf.h"
+#include "xfs_bmap.h"
+#include "xfs_defer.h"
+#include "xfs_log.h"
+#include "xfs_xattr.h"
+#include "xfs_parent.h"
+#include "xfs_trans_space.h"
+#include "xfs_attr_item.h"
+#include "xfs_health.h"
+
+struct kmem_cache *xfs_parent_args_cache;
+
+/*
+ * Parent pointer attribute handling.
+ *
+ * Because the attribute name is a filename component, it will never be longer
+ * than 255 bytes and must not contain nulls or slashes. These are roughly the
+ * same constraints that apply to attribute names.
+ *
+ * The attribute value must always be a struct xfs_parent_rec. This means the
+ * attribute will never be in remote format because 12 bytes is nowhere near
+ * xfs_attr_leaf_entsize_local_max() (~75% of block size).
+ *
+ * Creating a new parent attribute will always create a new attribute - there
+ * should never, ever be an existing attribute in the tree for a new inode.
+ * ENOSPC behavior is problematic - creating the inode without the parent
+ * pointer is effectively a corruption, so we allow parent attribute creation
+ * to dip into the reserve block pool to avoid unexpected ENOSPC errors from
+ * occurring.
+ */
+
+/* Return true if parent pointer attr name is valid. */
+bool
+xfs_parent_namecheck(
+ unsigned int attr_flags,
+ const void *name,
+ size_t length)
+{
+ /*
+ * Parent pointers always use logged operations, so there should never
+ * be incomplete xattrs.
+ */
+ if (attr_flags & XFS_ATTR_INCOMPLETE)
+ return false;
+
+ return xfs_dir2_namecheck(name, length);
+}
+
+/* Return true if parent pointer attr value is valid. */
+bool
+xfs_parent_valuecheck(
+ struct xfs_mount *mp,
+ const void *value,
+ size_t valuelen)
+{
+ const struct xfs_parent_rec *rec = value;
+
+ if (!xfs_has_parent(mp))
+ return false;
+
+ /* The xattr value must be a parent record. */
+ if (valuelen != sizeof(struct xfs_parent_rec))
+ return false;
+
+ /* The parent record must be local. */
+ if (value == NULL)
+ return false;
+
+ /* The parent inumber must be valid. */
+ if (!xfs_verify_dir_ino(mp, be64_to_cpu(rec->p_ino)))
+ return false;
+
+ return true;
+}
+
+/* Compute the attribute name hash for a parent pointer. */
+xfs_dahash_t
+xfs_parent_hashval(
+ struct xfs_mount *mp,
+ const uint8_t *name,
+ int namelen,
+ xfs_ino_t parent_ino)
+{
+ struct xfs_name xname = {
+ .name = name,
+ .len = namelen,
+ };
+
+ /*
+ * Use the same dirent name hash as would be used on the directory, but
+ * mix in the parent inode number to avoid collisions on hardlinked
+ * files with identical names but different parents.
+ */
+ return xfs_dir2_hashname(mp, &xname) ^
+ upper_32_bits(parent_ino) ^ lower_32_bits(parent_ino);
+}
+
+/* Compute the attribute name hash from the xattr components. */
+xfs_dahash_t
+xfs_parent_hashattr(
+ struct xfs_mount *mp,
+ const uint8_t *name,
+ int namelen,
+ const void *value,
+ int valuelen)
+{
+ const struct xfs_parent_rec *rec = value;
+
+ /* Requires a local attr value in xfs_parent_rec format */
+ if (valuelen != sizeof(struct xfs_parent_rec)) {
+ ASSERT(valuelen == sizeof(struct xfs_parent_rec));
+ return 0;
+ }
+
+ if (!value) {
+ ASSERT(value != NULL);
+ return 0;
+ }
+
+ return xfs_parent_hashval(mp, name, namelen, be64_to_cpu(rec->p_ino));
+}
+
+/*
+ * Initialize the parent pointer arguments structure. Caller must have zeroed
+ * the contents of @args. @tp is only required for updates.
+ */
+static void
+xfs_parent_da_args_init(
+ struct xfs_da_args *args,
+ struct xfs_trans *tp,
+ struct xfs_parent_rec *rec,
+ struct xfs_inode *child,
+ xfs_ino_t owner,
+ const struct xfs_name *parent_name)
+{
+ args->geo = child->i_mount->m_attr_geo;
+ args->whichfork = XFS_ATTR_FORK;
+ args->attr_filter = XFS_ATTR_PARENT;
+ args->op_flags = XFS_DA_OP_LOGGED | XFS_DA_OP_OKNOENT;
+ args->trans = tp;
+ args->dp = child;
+ args->owner = owner;
+ args->name = parent_name->name;
+ args->namelen = parent_name->len;
+ args->value = rec;
+ args->valuelen = sizeof(struct xfs_parent_rec);
+ xfs_attr_sethash(args);
+}
+
+/* Make sure the incore state is ready for a parent pointer query/update. */
+static inline int
+xfs_parent_iread_extents(
+ struct xfs_trans *tp,
+ struct xfs_inode *child)
+{
+ /* Parent pointers require that the attr fork must exist. */
+ if (XFS_IS_CORRUPT(child->i_mount, !xfs_inode_has_attr_fork(child))) {
+ xfs_inode_mark_sick(child, XFS_SICK_INO_PARENT);
+ return -EFSCORRUPTED;
+ }
+
+ return xfs_iread_extents(tp, child, XFS_ATTR_FORK);
+}
+
+/* Add a parent pointer to reflect a dirent addition. */
+int
+xfs_parent_addname(
+ struct xfs_trans *tp,
+ struct xfs_parent_args *ppargs,
+ struct xfs_inode *dp,
+ const struct xfs_name *parent_name,
+ struct xfs_inode *child)
+{
+ int error;
+
+ error = xfs_parent_iread_extents(tp, child);
+ if (error)
+ return error;
+
+ xfs_inode_to_parent_rec(&ppargs->rec, dp);
+ xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
+ child->i_ino, parent_name);
+ xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_SET);
+ return 0;
+}
+
+/* Remove a parent pointer to reflect a dirent removal. */
+int
+xfs_parent_removename(
+ struct xfs_trans *tp,
+ struct xfs_parent_args *ppargs,
+ struct xfs_inode *dp,
+ const struct xfs_name *parent_name,
+ struct xfs_inode *child)
+{
+ int error;
+
+ error = xfs_parent_iread_extents(tp, child);
+ if (error)
+ return error;
+
+ xfs_inode_to_parent_rec(&ppargs->rec, dp);
+ xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
+ child->i_ino, parent_name);
+ xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REMOVE);
+ return 0;
+}
+
+/* Replace one parent pointer with another to reflect a rename. */
+int
+xfs_parent_replacename(
+ struct xfs_trans *tp,
+ struct xfs_parent_args *ppargs,
+ struct xfs_inode *old_dp,
+ const struct xfs_name *old_name,
+ struct xfs_inode *new_dp,
+ const struct xfs_name *new_name,
+ struct xfs_inode *child)
+{
+ int error;
+
+ error = xfs_parent_iread_extents(tp, child);
+ if (error)
+ return error;
+
+ xfs_inode_to_parent_rec(&ppargs->rec, old_dp);
+ xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
+ child->i_ino, old_name);
+
+ xfs_inode_to_parent_rec(&ppargs->new_rec, new_dp);
+ ppargs->args.new_name = new_name->name;
+ ppargs->args.new_namelen = new_name->len;
+ ppargs->args.new_value = &ppargs->new_rec;
+ ppargs->args.new_valuelen = sizeof(struct xfs_parent_rec);
+ xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REPLACE);
+ return 0;
+}
+
+/*
+ * Extract parent pointer information from any parent pointer xattr into
+ * @parent_ino/gen. The last two parameters can be NULL pointers.
+ *
+ * Returns 0 if this is not a parent pointer xattr at all; or -EFSCORRUPTED for
+ * garbage.
+ */
+int
+xfs_parent_from_attr(
+ struct xfs_mount *mp,
+ unsigned int attr_flags,
+ const unsigned char *name,
+ unsigned int namelen,
+ const void *value,
+ unsigned int valuelen,
+ xfs_ino_t *parent_ino,
+ uint32_t *parent_gen)
+{
+ const struct xfs_parent_rec *rec = value;
+
+ ASSERT(attr_flags & XFS_ATTR_PARENT);
+
+ if (!xfs_parent_namecheck(attr_flags, name, namelen))
+ return -EFSCORRUPTED;
+ if (!xfs_parent_valuecheck(mp, value, valuelen))
+ return -EFSCORRUPTED;
+
+ if (parent_ino)
+ *parent_ino = be64_to_cpu(rec->p_ino);
+ if (parent_gen)
+ *parent_gen = be32_to_cpu(rec->p_gen);
+ return 0;
+}
+
+/*
+ * Look up a parent pointer record (@parent_name -> @pptr) of @ip.
+ *
+ * Caller must hold at least ILOCK_SHARED. The scratchpad need not be
+ * initialized.
+ *
+ * Returns 0 if the pointer is found, -ENOATTR if there is no match, or a
+ * negative errno.
+ */
+int
+xfs_parent_lookup(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ const struct xfs_name *parent_name,
+ struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch)
+{
+ memset(scratch, 0, sizeof(struct xfs_da_args));
+ xfs_parent_da_args_init(scratch, tp, pptr, ip, ip->i_ino, parent_name);
+ return xfs_attr_get_ilocked(scratch);
+}
+
+/* Sanity-check a parent pointer before we try to perform repairs. */
+static inline bool
+xfs_parent_sanity_check(
+ struct xfs_mount *mp,
+ const struct xfs_name *parent_name,
+ const struct xfs_parent_rec *pptr)
+{
+ if (!xfs_parent_namecheck(XFS_ATTR_PARENT, parent_name->name,
+ parent_name->len))
+ return false;
+
+ if (!xfs_parent_valuecheck(mp, pptr, sizeof(*pptr)))
+ return false;
+
+ return true;
+}
+
+
+/*
+ * Attach the parent pointer (@parent_name -> @pptr) to @ip immediately.
+ * Caller must not have a transaction or hold the ILOCK. This is for
+ * specialized repair functions only. The scratchpad need not be initialized.
+ */
+int
+xfs_parent_set(
+ struct xfs_inode *ip,
+ xfs_ino_t owner,
+ const struct xfs_name *parent_name,
+ struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch)
+{
+ if (!xfs_parent_sanity_check(ip->i_mount, parent_name, pptr)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ memset(scratch, 0, sizeof(struct xfs_da_args));
+ xfs_parent_da_args_init(scratch, NULL, pptr, ip, owner, parent_name);
+ return xfs_attr_set(scratch, XFS_ATTRUPDATE_CREATE, false);
+}
+
+/*
+ * Remove the parent pointer (@parent_name -> @pptr) from @ip immediately.
+ * Caller must not have a transaction or hold the ILOCK. This is for
+ * specialized repair functions only. The scratchpad need not be initialized.
+ */
+int
+xfs_parent_unset(
+ struct xfs_inode *ip,
+ xfs_ino_t owner,
+ const struct xfs_name *parent_name,
+ struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch)
+{
+ if (!xfs_parent_sanity_check(ip->i_mount, parent_name, pptr)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ memset(scratch, 0, sizeof(struct xfs_da_args));
+ xfs_parent_da_args_init(scratch, NULL, pptr, ip, owner, parent_name);
+ return xfs_attr_set(scratch, XFS_ATTRUPDATE_REMOVE, false);
+}
diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h
new file mode 100644
index 000000000000..b8036527cdc7
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_parent.h
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022-2024 Oracle.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_PARENT_H__
+#define __XFS_PARENT_H__
+
+/* Metadata validators */
+bool xfs_parent_namecheck(unsigned int attr_flags, const void *name,
+ size_t length);
+bool xfs_parent_valuecheck(struct xfs_mount *mp, const void *value,
+ size_t valuelen);
+
+xfs_dahash_t xfs_parent_hashval(struct xfs_mount *mp, const uint8_t *name,
+ int namelen, xfs_ino_t parent_ino);
+xfs_dahash_t xfs_parent_hashattr(struct xfs_mount *mp, const uint8_t *name,
+ int namelen, const void *value, int valuelen);
+
+/* Initializes a xfs_parent_rec to be stored as an attribute name. */
+static inline void
+xfs_parent_rec_init(
+ struct xfs_parent_rec *rec,
+ xfs_ino_t ino,
+ uint32_t gen)
+{
+ rec->p_ino = cpu_to_be64(ino);
+ rec->p_gen = cpu_to_be32(gen);
+}
+
+/* Initializes a xfs_parent_rec to be stored as an attribute name. */
+static inline void
+xfs_inode_to_parent_rec(
+ struct xfs_parent_rec *rec,
+ const struct xfs_inode *dp)
+{
+ xfs_parent_rec_init(rec, dp->i_ino, VFS_IC(dp)->i_generation);
+}
+
+extern struct kmem_cache *xfs_parent_args_cache;
+
+/*
+ * Parent pointer information needed to pass around the deferred xattr update
+ * machinery.
+ */
+struct xfs_parent_args {
+ struct xfs_parent_rec rec;
+ struct xfs_parent_rec new_rec;
+ struct xfs_da_args args;
+};
+
+/*
+ * Start a parent pointer update by allocating the context object we need to
+ * perform a parent pointer update.
+ */
+static inline int
+xfs_parent_start(
+ struct xfs_mount *mp,
+ struct xfs_parent_args **ppargsp)
+{
+ if (!xfs_has_parent(mp)) {
+ *ppargsp = NULL;
+ return 0;
+ }
+
+ *ppargsp = kmem_cache_zalloc(xfs_parent_args_cache, GFP_KERNEL);
+ if (!*ppargsp)
+ return -ENOMEM;
+ return 0;
+}
+
+/* Finish a parent pointer update by freeing the context object. */
+static inline void
+xfs_parent_finish(
+ struct xfs_mount *mp,
+ struct xfs_parent_args *ppargs)
+{
+ if (ppargs)
+ kmem_cache_free(xfs_parent_args_cache, ppargs);
+}
+
+int xfs_parent_addname(struct xfs_trans *tp, struct xfs_parent_args *ppargs,
+ struct xfs_inode *dp, const struct xfs_name *parent_name,
+ struct xfs_inode *child);
+int xfs_parent_removename(struct xfs_trans *tp, struct xfs_parent_args *ppargs,
+ struct xfs_inode *dp, const struct xfs_name *parent_name,
+ struct xfs_inode *child);
+int xfs_parent_replacename(struct xfs_trans *tp,
+ struct xfs_parent_args *ppargs,
+ struct xfs_inode *old_dp, const struct xfs_name *old_name,
+ struct xfs_inode *new_dp, const struct xfs_name *new_name,
+ struct xfs_inode *child);
+
+int xfs_parent_from_attr(struct xfs_mount *mp, unsigned int attr_flags,
+ const unsigned char *name, unsigned int namelen,
+ const void *value, unsigned int valuelen,
+ xfs_ino_t *parent_ino, uint32_t *parent_gen);
+
+/* Repair functions */
+int xfs_parent_lookup(struct xfs_trans *tp, struct xfs_inode *ip,
+ const struct xfs_name *name, struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch);
+int xfs_parent_set(struct xfs_inode *ip, xfs_ino_t owner,
+ const struct xfs_name *name, struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch);
+int xfs_parent_unset(struct xfs_inode *ip, xfs_ino_t owner,
+ const struct xfs_name *name, struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch);
+
+#endif /* __XFS_PARENT_H__ */
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index cb035da3f990..763d941a8420 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -56,7 +56,7 @@ typedef uint8_t xfs_dqtype_t;
* And, of course, we also need to take into account the dquot log format item
* used to describe each dquot.
*/
-#define XFS_DQUOT_LOGRES(mp) \
+#define XFS_DQUOT_LOGRES \
((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6)
#define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
@@ -143,4 +143,47 @@ time64_t xfs_dquot_from_disk_ts(struct xfs_disk_dquot *ddq,
__be32 dtimer);
__be32 xfs_dquot_to_disk_ts(struct xfs_dquot *ddq, time64_t timer);
+static inline const char *
+xfs_dqinode_path(xfs_dqtype_t type)
+{
+ switch (type) {
+ case XFS_DQTYPE_USER:
+ return "user";
+ case XFS_DQTYPE_GROUP:
+ return "group";
+ case XFS_DQTYPE_PROJ:
+ return "project";
+ }
+
+ ASSERT(0);
+ return NULL;
+}
+
+static inline enum xfs_metafile_type
+xfs_dqinode_metafile_type(xfs_dqtype_t type)
+{
+ switch (type) {
+ case XFS_DQTYPE_USER:
+ return XFS_METAFILE_USRQUOTA;
+ case XFS_DQTYPE_GROUP:
+ return XFS_METAFILE_GRPQUOTA;
+ case XFS_DQTYPE_PROJ:
+ return XFS_METAFILE_PRJQUOTA;
+ }
+
+ ASSERT(0);
+ return XFS_METAFILE_UNKNOWN;
+}
+
+unsigned int xfs_dqinode_sick_mask(xfs_dqtype_t type);
+
+int xfs_dqinode_load(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_dqtype_t type, struct xfs_inode **ipp);
+int xfs_dqinode_metadir_create(struct xfs_inode *dp, xfs_dqtype_t type,
+ struct xfs_inode **ipp);
+int xfs_dqinode_metadir_link(struct xfs_inode *dp, xfs_dqtype_t type,
+ struct xfs_inode *ip);
+int xfs_dqinode_mkdir_parent(struct xfs_mount *mp, struct xfs_inode **dpp);
+int xfs_dqinode_load_parent(struct xfs_trans *tp, struct xfs_inode **dpp);
+
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 511c912d515c..2dbab68b4fe6 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -24,6 +24,7 @@
#include "xfs_rmap.h"
#include "xfs_ag.h"
#include "xfs_health.h"
+#include "xfs_refcount_item.h"
struct kmem_cache *xfs_refcount_intent_cache;
@@ -51,7 +52,7 @@ xfs_refcount_lookup_le(
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ trace_xfs_refcount_lookup(cur,
xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_LE);
cur->bc_rec.rc.rc_startblock = bno;
@@ -71,7 +72,7 @@ xfs_refcount_lookup_ge(
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ trace_xfs_refcount_lookup(cur,
xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_GE);
cur->bc_rec.rc.rc_startblock = bno;
@@ -91,7 +92,7 @@ xfs_refcount_lookup_eq(
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ trace_xfs_refcount_lookup(cur,
xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_LE);
cur->bc_rec.rc.rc_startblock = bno;
@@ -153,7 +154,7 @@ xfs_refcount_complain_bad_rec(
xfs_warn(mp,
"Refcount BTree record corruption in AG %d detected at %pS!",
- cur->bc_ag.pag->pag_agno, fa);
+ cur->bc_group->xg_gno, fa);
xfs_warn(mp,
"Start block 0x%x, block count 0x%x, references 0x%x",
irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
@@ -179,11 +180,11 @@ xfs_refcount_get_rec(
return error;
xfs_refcount_btrec_to_irec(rec, irec);
- fa = xfs_refcount_check_irec(cur->bc_ag.pag, irec);
+ fa = xfs_refcount_check_irec(to_perag(cur->bc_group), irec);
if (fa)
return xfs_refcount_complain_bad_rec(cur, fa, irec);
- trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
+ trace_xfs_refcount_get(cur, irec);
return 0;
}
@@ -201,7 +202,7 @@ xfs_refcount_update(
uint32_t start;
int error;
- trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
+ trace_xfs_refcount_update(cur, irec);
start = xfs_refcount_encode_startblock(irec->rc_startblock,
irec->rc_domain);
@@ -211,8 +212,7 @@ xfs_refcount_update(
error = xfs_btree_update(cur, &rec);
if (error)
- trace_xfs_refcount_update_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_update_error(cur, error, _RET_IP_);
return error;
}
@@ -229,7 +229,7 @@ xfs_refcount_insert(
{
int error;
- trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
+ trace_xfs_refcount_insert(cur, irec);
cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
@@ -247,8 +247,7 @@ xfs_refcount_insert(
out_error:
if (error)
- trace_xfs_refcount_insert_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_insert_error(cur, error, _RET_IP_);
return error;
}
@@ -275,7 +274,7 @@ xfs_refcount_delete(
error = -EFSCORRUPTED;
goto out_error;
}
- trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec);
+ trace_xfs_refcount_delete(cur, &irec);
error = xfs_btree_delete(cur, i);
if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
xfs_btree_mark_sick(cur);
@@ -288,8 +287,7 @@ xfs_refcount_delete(
&found_rec);
out_error:
if (error)
- trace_xfs_refcount_delete_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_delete_error(cur, error, _RET_IP_);
return error;
}
@@ -413,8 +411,7 @@ xfs_refcount_split_extent(
return 0;
*shape_changed = true;
- trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- &rcext, agbno);
+ trace_xfs_refcount_split_extent(cur, &rcext, agbno);
/* Establish the right extent. */
tmp = rcext;
@@ -438,8 +435,7 @@ xfs_refcount_split_extent(
return error;
out_error:
- trace_xfs_refcount_split_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_split_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -458,8 +454,7 @@ xfs_refcount_merge_center_extents(
int error;
int found_rec;
- trace_xfs_refcount_merge_center_extents(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, left, center, right);
+ trace_xfs_refcount_merge_center_extents(cur, left, center, right);
ASSERT(left->rc_domain == center->rc_domain);
ASSERT(right->rc_domain == center->rc_domain);
@@ -522,8 +517,7 @@ xfs_refcount_merge_center_extents(
return error;
out_error:
- trace_xfs_refcount_merge_center_extents_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_merge_center_extents_error(cur, error, _RET_IP_);
return error;
}
@@ -541,8 +535,7 @@ xfs_refcount_merge_left_extent(
int error;
int found_rec;
- trace_xfs_refcount_merge_left_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, left, cleft);
+ trace_xfs_refcount_merge_left_extent(cur, left, cleft);
ASSERT(left->rc_domain == cleft->rc_domain);
@@ -589,8 +582,7 @@ xfs_refcount_merge_left_extent(
return error;
out_error:
- trace_xfs_refcount_merge_left_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_merge_left_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -607,8 +599,7 @@ xfs_refcount_merge_right_extent(
int error;
int found_rec;
- trace_xfs_refcount_merge_right_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, cright, right);
+ trace_xfs_refcount_merge_right_extent(cur, cright, right);
ASSERT(right->rc_domain == cright->rc_domain);
@@ -658,8 +649,7 @@ xfs_refcount_merge_right_extent(
return error;
out_error:
- trace_xfs_refcount_merge_right_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_merge_right_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -748,13 +738,11 @@ not_found:
cleft->rc_refcount = 1;
cleft->rc_domain = domain;
}
- trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- left, cleft, agbno);
+ trace_xfs_refcount_find_left_extent(cur, left, cleft, agbno);
return error;
out_error:
- trace_xfs_refcount_find_left_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_find_left_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -843,13 +831,12 @@ not_found:
cright->rc_refcount = 1;
cright->rc_domain = domain;
}
- trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- cright, right, agbno + aglen);
+ trace_xfs_refcount_find_right_extent(cur, cright, right,
+ agbno + aglen);
return error;
out_error:
- trace_xfs_refcount_find_right_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_find_right_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -1148,8 +1135,7 @@ xfs_refcount_adjust_extents(
tmp.rc_refcount = 1 + adj;
tmp.rc_domain = XFS_REFC_DOMAIN_SHARED;
- trace_xfs_refcount_modify_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, &tmp);
+ trace_xfs_refcount_modify_extent(cur, &tmp);
/*
* Either cover the hole (increment) or
@@ -1168,12 +1154,11 @@ xfs_refcount_adjust_extents(
goto out_error;
}
} else {
- fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
- cur->bc_ag.pag->pag_agno,
+ fsbno = xfs_agbno_to_fsb(to_perag(cur->bc_group),
tmp.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
tmp.rc_blockcount, NULL,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
if (error)
goto out_error;
}
@@ -1214,8 +1199,7 @@ xfs_refcount_adjust_extents(
if (ext.rc_refcount == MAXREFCOUNT)
goto skip;
ext.rc_refcount += adj;
- trace_xfs_refcount_modify_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, &ext);
+ trace_xfs_refcount_modify_extent(cur, &ext);
cur->bc_refc.nr_ops++;
if (ext.rc_refcount > 1) {
error = xfs_refcount_update(cur, &ext);
@@ -1232,12 +1216,11 @@ xfs_refcount_adjust_extents(
}
goto advloop;
} else {
- fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
- cur->bc_ag.pag->pag_agno,
+ fsbno = xfs_agbno_to_fsb(to_perag(cur->bc_group),
ext.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
ext.rc_blockcount, NULL,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
if (error)
goto out_error;
}
@@ -1254,8 +1237,7 @@ advloop:
return error;
out_error:
- trace_xfs_refcount_modify_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -1272,11 +1254,9 @@ xfs_refcount_adjust(
int error;
if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
- trace_xfs_refcount_increase(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, *agbno, *aglen);
+ trace_xfs_refcount_increase(cur, *agbno, *aglen);
else
- trace_xfs_refcount_decrease(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, *agbno, *aglen);
+ trace_xfs_refcount_decrease(cur, *agbno, *aglen);
/*
* Ensure that no rcextents cross the boundary of the adjustment range.
@@ -1315,28 +1295,10 @@ xfs_refcount_adjust(
return 0;
out_error:
- trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- error, _RET_IP_);
+ trace_xfs_refcount_adjust_error(cur, error, _RET_IP_);
return error;
}
-/* Clean up after calling xfs_refcount_finish_one. */
-void
-xfs_refcount_finish_one_cleanup(
- struct xfs_trans *tp,
- struct xfs_btree_cur *rcur,
- int error)
-{
- struct xfs_buf *agbp;
-
- if (rcur == NULL)
- return;
- agbp = rcur->bc_ag.agbp;
- xfs_btree_del_cursor(rcur, error);
- if (error)
- xfs_trans_brelse(tp, agbp);
-}
-
/*
* Set up a continuation a deferred refcount operation by updating the intent.
* Checks to make sure we're not going to run off the end of the AG.
@@ -1348,7 +1310,7 @@ xfs_refcount_continue_op(
xfs_agblock_t new_agbno)
{
struct xfs_mount *mp = cur->bc_mp;
- struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno,
ri->ri_blockcount))) {
@@ -1356,10 +1318,10 @@ xfs_refcount_continue_op(
return -EFSCORRUPTED;
}
- ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+ ri->ri_startblock = xfs_agbno_to_fsb(pag, new_agbno);
ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount));
- ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
+ ASSERT(pag_agno(pag) == XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
return 0;
}
@@ -1378,7 +1340,7 @@ xfs_refcount_finish_one(
struct xfs_btree_cur **pcur)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_btree_cur *rcur;
+ struct xfs_btree_cur *rcur = *pcur;
struct xfs_buf *agbp = NULL;
int error = 0;
xfs_agblock_t bno;
@@ -1387,9 +1349,7 @@ xfs_refcount_finish_one(
bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock);
- trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock),
- ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock),
- ri->ri_blockcount);
+ trace_xfs_refcount_deferred(mp, ri);
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
return -EIO;
@@ -1398,25 +1358,25 @@ xfs_refcount_finish_one(
* If we haven't gotten a cursor or the cursor AG doesn't match
* the startblock, get one now.
*/
- rcur = *pcur;
- if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
+ if (rcur != NULL && rcur->bc_group != ri->ri_group) {
nr_ops = rcur->bc_refc.nr_ops;
shape_changes = rcur->bc_refc.shape_changes;
- xfs_refcount_finish_one_cleanup(tp, rcur, 0);
+ xfs_btree_del_cursor(rcur, 0);
rcur = NULL;
*pcur = NULL;
}
if (rcur == NULL) {
- error = xfs_alloc_read_agf(ri->ri_pag, tp,
+ struct xfs_perag *pag = to_perag(ri->ri_group);
+
+ error = xfs_alloc_read_agf(pag, tp,
XFS_ALLOC_FLAG_FREEING, &agbp);
if (error)
return error;
- rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag);
+ *pcur = rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
rcur->bc_refc.nr_ops = nr_ops;
rcur->bc_refc.shape_changes = shape_changes;
}
- *pcur = rcur;
switch (ri->ri_type) {
case XFS_REFCOUNT_INCREASE:
@@ -1452,8 +1412,7 @@ xfs_refcount_finish_one(
return -EFSCORRUPTED;
}
if (!error && ri->ri_blockcount > 0)
- trace_xfs_refcount_finish_one_leftover(mp, ri->ri_pag->pag_agno,
- ri->ri_type, bno, ri->ri_blockcount);
+ trace_xfs_refcount_finish_one_leftover(mp, ri);
return error;
}
@@ -1469,11 +1428,6 @@ __xfs_refcount_add(
{
struct xfs_refcount_intent *ri;
- trace_xfs_refcount_defer(tp->t_mountp,
- XFS_FSB_TO_AGNO(tp->t_mountp, startblock),
- type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
- blockcount);
-
ri = kmem_cache_alloc(xfs_refcount_intent_cache,
GFP_KERNEL | __GFP_NOFAIL);
INIT_LIST_HEAD(&ri->ri_list);
@@ -1481,8 +1435,7 @@ __xfs_refcount_add(
ri->ri_startblock = startblock;
ri->ri_blockcount = blockcount;
- xfs_refcount_update_get_group(tp->t_mountp, ri);
- xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type);
+ xfs_refcount_defer_add(tp, ri);
}
/*
@@ -1537,8 +1490,7 @@ xfs_refcount_find_shared(
int have;
int error;
- trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- agbno, aglen);
+ trace_xfs_refcount_find_shared(cur, agbno, aglen);
/* By default, skip the whole range */
*fbno = NULLAGBLOCK;
@@ -1625,13 +1577,11 @@ xfs_refcount_find_shared(
}
done:
- trace_xfs_refcount_find_shared_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, *fbno, *flen);
+ trace_xfs_refcount_find_shared_result(cur, *fbno, *flen);
out_error:
if (error)
- trace_xfs_refcount_find_shared_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_find_shared_error(cur, error, _RET_IP_);
return error;
}
@@ -1737,8 +1687,7 @@ xfs_refcount_adjust_cow_extents(
tmp.rc_refcount = 1;
tmp.rc_domain = XFS_REFC_DOMAIN_COW;
- trace_xfs_refcount_modify_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, &tmp);
+ trace_xfs_refcount_modify_extent(cur, &tmp);
error = xfs_refcount_insert(cur, &tmp,
&found_tmp);
@@ -1769,8 +1718,7 @@ xfs_refcount_adjust_cow_extents(
}
ext.rc_refcount = 0;
- trace_xfs_refcount_modify_extent(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, &ext);
+ trace_xfs_refcount_modify_extent(cur, &ext);
error = xfs_refcount_delete(cur, &found_rec);
if (error)
goto out_error;
@@ -1786,8 +1734,7 @@ xfs_refcount_adjust_cow_extents(
return error;
out_error:
- trace_xfs_refcount_modify_extent_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_);
return error;
}
@@ -1833,8 +1780,7 @@ xfs_refcount_adjust_cow(
return 0;
out_error:
- trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- error, _RET_IP_);
+ trace_xfs_refcount_adjust_cow_error(cur, error, _RET_IP_);
return error;
}
@@ -1847,8 +1793,7 @@ __xfs_refcount_cow_alloc(
xfs_agblock_t agbno,
xfs_extlen_t aglen)
{
- trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
- agbno, aglen);
+ trace_xfs_refcount_cow_increase(rcur, agbno, aglen);
/* Add refcount btree reservation */
return xfs_refcount_adjust_cow(rcur, agbno, aglen,
@@ -1864,8 +1809,7 @@ __xfs_refcount_cow_free(
xfs_agblock_t agbno,
xfs_extlen_t aglen)
{
- trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
- agbno, aglen);
+ trace_xfs_refcount_cow_decrease(rcur, agbno, aglen);
/* Remove refcount btree reservation */
return xfs_refcount_adjust_cow(rcur, agbno, aglen,
@@ -1935,7 +1879,8 @@ xfs_refcount_recover_extent(
INIT_LIST_HEAD(&rr->rr_list);
xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
- if (xfs_refcount_check_irec(cur->bc_ag.pag, &rr->rr_rrec) != NULL ||
+ if (xfs_refcount_check_irec(to_perag(cur->bc_group), &rr->rr_rrec) !=
+ NULL ||
XFS_IS_CORRUPT(cur->bc_mp,
rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
xfs_btree_mark_sick(cur);
@@ -2010,19 +1955,15 @@ xfs_refcount_recover_cow_leftovers(
if (error)
goto out_free;
- trace_xfs_refcount_recover_extent(mp, pag->pag_agno,
- &rr->rr_rrec);
-
/* Free the orphan record */
- fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno,
- rr->rr_rrec.rc_startblock);
+ fsb = xfs_agbno_to_fsb(pag, rr->rr_rrec.rc_startblock);
xfs_refcount_free_cow_extent(tp, fsb,
rr->rr_rrec.rc_blockcount);
/* Free the block. */
error = xfs_free_extent_later(tp, fsb,
rr->rr_rrec.rc_blockcount, NULL,
- XFS_AG_RESV_NONE, false);
+ XFS_AG_RESV_NONE, 0);
if (error)
goto out_trans;
@@ -2087,7 +2028,7 @@ xfs_refcount_query_range_helper(
xfs_failaddr_t fa;
xfs_refcount_btrec_to_irec(rec, &irec);
- fa = xfs_refcount_check_irec(cur->bc_ag.pag, &irec);
+ fa = xfs_refcount_check_irec(to_perag(cur->bc_group), &irec);
if (fa)
return xfs_refcount_complain_bad_rec(cur, fa, &irec);
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 9b56768a590c..62d78afcf1f3 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -48,9 +48,15 @@ enum xfs_refcount_intent_type {
XFS_REFCOUNT_FREE_COW,
};
+#define XFS_REFCOUNT_INTENT_STRINGS \
+ { XFS_REFCOUNT_INCREASE, "incr" }, \
+ { XFS_REFCOUNT_DECREASE, "decr" }, \
+ { XFS_REFCOUNT_ALLOC_COW, "alloc_cow" }, \
+ { XFS_REFCOUNT_FREE_COW, "free_cow" }
+
struct xfs_refcount_intent {
struct list_head ri_list;
- struct xfs_perag *ri_pag;
+ struct xfs_group *ri_group;
enum xfs_refcount_intent_type ri_type;
xfs_extlen_t ri_blockcount;
xfs_fsblock_t ri_startblock;
@@ -68,16 +74,11 @@ xfs_refcount_check_domain(
return true;
}
-void xfs_refcount_update_get_group(struct xfs_mount *mp,
- struct xfs_refcount_intent *ri);
-
void xfs_refcount_increase_extent(struct xfs_trans *tp,
struct xfs_bmbt_irec *irec);
void xfs_refcount_decrease_extent(struct xfs_trans *tp,
struct xfs_bmbt_irec *irec);
-extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp,
- struct xfs_btree_cur *rcur, int error);
extern int xfs_refcount_finish_one(struct xfs_trans *tp,
struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index ca59f6c89f3e..54505fee1852 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -30,7 +30,7 @@ xfs_refcountbt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp,
- cur->bc_ag.agbp, cur->bc_ag.pag);
+ cur->bc_ag.agbp, to_perag(cur->bc_group));
}
STATIC void
@@ -68,21 +68,20 @@ xfs_refcountbt_alloc_block(
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
- args.pag = cur->bc_ag.pag;
+ args.pag = to_perag(cur->bc_group);
args.oinfo = XFS_RMAP_OINFO_REFC;
args.minlen = args.maxlen = args.prod = 1;
args.resv = XFS_AG_RESV_METADATA;
error = xfs_alloc_vextent_near_bno(&args,
- XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno,
- xfs_refc_block(args.mp)));
+ xfs_agbno_to_fsb(args.pag, xfs_refc_block(args.mp)));
if (error)
goto out_error;
if (args.fsbno == NULLFSBLOCK) {
*stat = 0;
return 0;
}
- ASSERT(args.agno == cur->bc_ag.pag->pag_agno);
+ ASSERT(args.agno == cur->bc_group->xg_gno);
ASSERT(args.len == 1);
new->s = cpu_to_be32(args.agbno);
@@ -109,7 +108,7 @@ xfs_refcountbt_free_block(
be32_add_cpu(&agf->agf_refcount_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
- &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, false);
+ &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, 0);
}
STATIC int
@@ -170,7 +169,7 @@ xfs_refcountbt_init_ptr_from_cur(
{
struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
- ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agf->agf_seqno));
ptr->s = agf->agf_refcount_root;
}
@@ -362,11 +361,11 @@ xfs_refcountbt_init_cursor(
{
struct xfs_btree_cur *cur;
- ASSERT(pag->pag_agno < mp->m_sb.sb_agcount);
+ ASSERT(pag_agno(pag) < mp->m_sb.sb_agcount);
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_refcountbt_ops,
mp->m_refc_maxlevels, xfs_refcountbt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_refc.nr_ops = 0;
cur->bc_refc.shape_changes = 0;
cur->bc_ag.agbp = agbp;
@@ -417,9 +416,10 @@ xfs_refcountbt_block_maxrecs(
/*
* Calculate the number of records in a refcount btree block.
*/
-int
+unsigned int
xfs_refcountbt_maxrecs(
- int blocklen,
+ struct xfs_mount *mp,
+ unsigned int blocklen,
bool leaf)
{
blocklen -= XFS_REFCOUNT_BLOCK_LEN;
@@ -514,7 +514,7 @@ xfs_refcountbt_calc_reserves(
* never be available for the kinds of things that would require btree
* expansion. We therefore can pretend the space isn't there.
*/
- if (xfs_ag_contains_log(mp, pag->pag_agno))
+ if (xfs_ag_contains_log(mp, pag_agno(pag)))
agblocks -= mp->m_sb.sb_logblocks;
*ask += xfs_refcountbt_max_size(mp, agblocks);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h
index 1e0ab25f6c68..beb93bef6a81 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.h
+++ b/fs/xfs/libxfs/xfs_refcount_btree.h
@@ -48,7 +48,8 @@ struct xbtree_afakeroot;
extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp,
struct xfs_trans *tp, struct xfs_buf *agbp,
struct xfs_perag *pag);
-extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf);
+unsigned int xfs_refcountbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen,
+ bool leaf);
extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp);
extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp,
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index ef16f6f9cef6..d0df68dc3131 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -24,6 +24,7 @@
#include "xfs_inode.h"
#include "xfs_ag.h"
#include "xfs_health.h"
+#include "xfs_rmap_item.h"
struct kmem_cache *xfs_rmap_intent_cache;
@@ -100,8 +101,7 @@ xfs_rmap_update(
union xfs_btree_rec rec;
int error;
- trace_xfs_rmap_update(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- irec->rm_startblock, irec->rm_blockcount,
+ trace_xfs_rmap_update(cur, irec->rm_startblock, irec->rm_blockcount,
irec->rm_owner, irec->rm_offset, irec->rm_flags);
rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
@@ -111,8 +111,7 @@ xfs_rmap_update(
xfs_rmap_irec_offset_pack(irec));
error = xfs_btree_update(cur, &rec);
if (error)
- trace_xfs_rmap_update_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_update_error(cur, error, _RET_IP_);
return error;
}
@@ -128,8 +127,7 @@ xfs_rmap_insert(
int i;
int error;
- trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno,
- len, owner, offset, flags);
+ trace_xfs_rmap_insert(rcur, agbno, len, owner, offset, flags);
error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
if (error)
@@ -155,8 +153,7 @@ xfs_rmap_insert(
}
done:
if (error)
- trace_xfs_rmap_insert_error(rcur->bc_mp,
- rcur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_insert_error(rcur, error, _RET_IP_);
return error;
}
@@ -172,8 +169,7 @@ xfs_rmap_delete(
int i;
int error;
- trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno,
- len, owner, offset, flags);
+ trace_xfs_rmap_delete(rcur, agbno, len, owner, offset, flags);
error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
if (error)
@@ -194,8 +190,7 @@ xfs_rmap_delete(
}
done:
if (error)
- trace_xfs_rmap_delete_error(rcur->bc_mp,
- rcur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_delete_error(rcur, error, _RET_IP_);
return error;
}
@@ -218,7 +213,7 @@ xfs_rmap_check_irec(
struct xfs_perag *pag,
const struct xfs_rmap_irec *irec)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
bool is_inode;
bool is_unwritten;
bool is_bmbt;
@@ -274,9 +269,7 @@ xfs_rmap_check_btrec(
struct xfs_btree_cur *cur,
const struct xfs_rmap_irec *irec)
{
- if (xfs_btree_is_mem_rmap(cur->bc_ops))
- return xfs_rmap_check_irec(cur->bc_mem.pag, irec);
- return xfs_rmap_check_irec(cur->bc_ag.pag, irec);
+ return xfs_rmap_check_irec(to_perag(cur->bc_group), irec);
}
static inline int
@@ -293,7 +286,7 @@ xfs_rmap_complain_bad_rec(
else
xfs_warn(mp,
"Reverse Mapping BTree record corruption in AG %d detected at %pS!",
- cur->bc_ag.pag->pag_agno, fa);
+ cur->bc_group->xg_gno, fa);
xfs_warn(mp,
"Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
irec->rm_owner, irec->rm_flags, irec->rm_startblock,
@@ -342,8 +335,7 @@ xfs_rmap_find_left_neighbor_helper(
{
struct xfs_find_left_neighbor_info *info = priv;
- trace_xfs_rmap_find_left_neighbor_candidate(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, rec->rm_startblock,
+ trace_xfs_rmap_find_left_neighbor_candidate(cur, rec->rm_startblock,
rec->rm_blockcount, rec->rm_owner, rec->rm_offset,
rec->rm_flags);
@@ -393,8 +385,8 @@ xfs_rmap_find_left_neighbor(
info.high.rm_blockcount = 0;
info.irec = irec;
- trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags);
+ trace_xfs_rmap_find_left_neighbor_query(cur, bno, 0, owner, offset,
+ flags);
/*
* Historically, we always used the range query to walk every reverse
@@ -425,8 +417,7 @@ xfs_rmap_find_left_neighbor(
return error;
*stat = 1;
- trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, irec->rm_startblock,
+ trace_xfs_rmap_find_left_neighbor_result(cur, irec->rm_startblock,
irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
irec->rm_flags);
return 0;
@@ -441,8 +432,7 @@ xfs_rmap_lookup_le_range_helper(
{
struct xfs_find_left_neighbor_info *info = priv;
- trace_xfs_rmap_lookup_le_range_candidate(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, rec->rm_startblock,
+ trace_xfs_rmap_lookup_le_range_candidate(cur, rec->rm_startblock,
rec->rm_blockcount, rec->rm_owner, rec->rm_offset,
rec->rm_flags);
@@ -489,8 +479,7 @@ xfs_rmap_lookup_le_range(
*stat = 0;
info.irec = irec;
- trace_xfs_rmap_lookup_le_range(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- bno, 0, owner, offset, flags);
+ trace_xfs_rmap_lookup_le_range(cur, bno, 0, owner, offset, flags);
/*
* Historically, we always used the range query to walk every reverse
@@ -521,8 +510,7 @@ xfs_rmap_lookup_le_range(
return error;
*stat = 1;
- trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, irec->rm_startblock,
+ trace_xfs_rmap_lookup_le_range_result(cur, irec->rm_startblock,
irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
irec->rm_flags);
return 0;
@@ -634,8 +622,7 @@ xfs_rmap_unmap(
(flags & XFS_RMAP_BMBT_BLOCK);
if (unwritten)
flags |= XFS_RMAP_UNWRITTEN;
- trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_unmap(cur, bno, len, unwritten, oinfo);
/*
* We should always have a left record because there's a static record
@@ -651,10 +638,9 @@ xfs_rmap_unmap(
goto out_error;
}
- trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
- ltrec.rm_blockcount, ltrec.rm_owner,
- ltrec.rm_offset, ltrec.rm_flags);
+ trace_xfs_rmap_lookup_le_range_result(cur, ltrec.rm_startblock,
+ ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset,
+ ltrec.rm_flags);
ltoff = ltrec.rm_offset;
/*
@@ -721,10 +707,9 @@ xfs_rmap_unmap(
if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
/* exact match, simply remove the record from rmap tree */
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- ltrec.rm_startblock, ltrec.rm_blockcount,
- ltrec.rm_owner, ltrec.rm_offset,
- ltrec.rm_flags);
+ trace_xfs_rmap_delete(cur, ltrec.rm_startblock,
+ ltrec.rm_blockcount, ltrec.rm_owner,
+ ltrec.rm_offset, ltrec.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto out_error;
@@ -800,8 +785,7 @@ xfs_rmap_unmap(
else
cur->bc_rec.r.rm_offset = offset + len;
cur->bc_rec.r.rm_flags = flags;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno,
- cur->bc_rec.r.rm_startblock,
+ trace_xfs_rmap_insert(cur, cur->bc_rec.r.rm_startblock,
cur->bc_rec.r.rm_blockcount,
cur->bc_rec.r.rm_owner,
cur->bc_rec.r.rm_offset,
@@ -812,12 +796,10 @@ xfs_rmap_unmap(
}
out_done:
- trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_unmap_done(cur, bno, len, unwritten, oinfo);
out_error:
if (error)
- trace_xfs_rmap_unmap_error(mp, cur->bc_ag.pag->pag_agno,
- error, _RET_IP_);
+ trace_xfs_rmap_unmap_error(cur, error, _RET_IP_);
return error;
}
@@ -851,7 +833,7 @@ xfs_rmap_hook_enable(void)
static inline void
xfs_rmap_update_hook(
struct xfs_trans *tp,
- struct xfs_perag *pag,
+ struct xfs_group *xg,
enum xfs_rmap_intent_type op,
xfs_agblock_t startblock,
xfs_extlen_t blockcount,
@@ -866,27 +848,27 @@ xfs_rmap_update_hook(
.oinfo = *oinfo, /* struct copy */
};
- if (pag)
- xfs_hooks_call(&pag->pag_rmap_update_hooks, op, &p);
+ if (xg)
+ xfs_hooks_call(&xg->xg_rmap_update_hooks, op, &p);
}
}
/* Call the specified function during a reverse mapping update. */
int
xfs_rmap_hook_add(
- struct xfs_perag *pag,
+ struct xfs_group *xg,
struct xfs_rmap_hook *hook)
{
- return xfs_hooks_add(&pag->pag_rmap_update_hooks, &hook->rmap_hook);
+ return xfs_hooks_add(&xg->xg_rmap_update_hooks, &hook->rmap_hook);
}
/* Stop calling the specified function during a reverse mapping update. */
void
xfs_rmap_hook_del(
- struct xfs_perag *pag,
+ struct xfs_group *xg,
struct xfs_rmap_hook *hook)
{
- xfs_hooks_del(&pag->pag_rmap_update_hooks, &hook->rmap_hook);
+ xfs_hooks_del(&xg->xg_rmap_update_hooks, &hook->rmap_hook);
}
/* Configure rmap update hook functions. */
@@ -921,7 +903,8 @@ xfs_rmap_free(
return 0;
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
- xfs_rmap_update_hook(tp, pag, XFS_RMAP_UNMAP, bno, len, false, oinfo);
+ xfs_rmap_update_hook(tp, pag_group(pag), XFS_RMAP_UNMAP, bno, len,
+ false, oinfo);
error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
xfs_btree_del_cursor(cur, error);
@@ -987,8 +970,7 @@ xfs_rmap_map(
(flags & XFS_RMAP_BMBT_BLOCK);
if (unwritten)
flags |= XFS_RMAP_UNWRITTEN;
- trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_map(cur, bno, len, unwritten, oinfo);
ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
/*
@@ -1001,8 +983,7 @@ xfs_rmap_map(
if (error)
goto out_error;
if (have_lt) {
- trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
+ trace_xfs_rmap_lookup_le_range_result(cur, ltrec.rm_startblock,
ltrec.rm_blockcount, ltrec.rm_owner,
ltrec.rm_offset, ltrec.rm_flags);
@@ -1040,10 +1021,10 @@ xfs_rmap_map(
error = -EFSCORRUPTED;
goto out_error;
}
- trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, gtrec.rm_startblock,
- gtrec.rm_blockcount, gtrec.rm_owner,
- gtrec.rm_offset, gtrec.rm_flags);
+ trace_xfs_rmap_find_right_neighbor_result(cur,
+ gtrec.rm_startblock, gtrec.rm_blockcount,
+ gtrec.rm_owner, gtrec.rm_offset,
+ gtrec.rm_flags);
if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
have_gt = 0;
}
@@ -1080,12 +1061,9 @@ xfs_rmap_map(
* result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
*/
ltrec.rm_blockcount += gtrec.rm_blockcount;
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- gtrec.rm_startblock,
- gtrec.rm_blockcount,
- gtrec.rm_owner,
- gtrec.rm_offset,
- gtrec.rm_flags);
+ trace_xfs_rmap_delete(cur, gtrec.rm_startblock,
+ gtrec.rm_blockcount, gtrec.rm_owner,
+ gtrec.rm_offset, gtrec.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto out_error;
@@ -1132,8 +1110,7 @@ xfs_rmap_map(
cur->bc_rec.r.rm_owner = owner;
cur->bc_rec.r.rm_offset = offset;
cur->bc_rec.r.rm_flags = flags;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len,
- owner, offset, flags);
+ trace_xfs_rmap_insert(cur, bno, len, owner, offset, flags);
error = xfs_btree_insert(cur, &i);
if (error)
goto out_error;
@@ -1144,12 +1121,10 @@ xfs_rmap_map(
}
}
- trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_map_done(cur, bno, len, unwritten, oinfo);
out_error:
if (error)
- trace_xfs_rmap_map_error(mp, cur->bc_ag.pag->pag_agno,
- error, _RET_IP_);
+ trace_xfs_rmap_map_error(cur, error, _RET_IP_);
return error;
}
@@ -1173,7 +1148,8 @@ xfs_rmap_alloc(
return 0;
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
- xfs_rmap_update_hook(tp, pag, XFS_RMAP_MAP, bno, len, false, oinfo);
+ xfs_rmap_update_hook(tp, pag_group(pag), XFS_RMAP_MAP, bno, len, false,
+ oinfo);
error = xfs_rmap_map(cur, bno, len, false, oinfo);
xfs_btree_del_cursor(cur, error);
@@ -1223,8 +1199,7 @@ xfs_rmap_convert(
(flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
new_endoff = offset + len;
- trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_convert(cur, bno, len, unwritten, oinfo);
/*
* For the initial lookup, look for an exact match or the left-adjacent
@@ -1240,10 +1215,9 @@ xfs_rmap_convert(
goto done;
}
- trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, PREV.rm_startblock,
- PREV.rm_blockcount, PREV.rm_owner,
- PREV.rm_offset, PREV.rm_flags);
+ trace_xfs_rmap_lookup_le_range_result(cur, PREV.rm_startblock,
+ PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset,
+ PREV.rm_flags);
ASSERT(PREV.rm_offset <= offset);
ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
@@ -1284,10 +1258,9 @@ xfs_rmap_convert(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, LEFT.rm_startblock,
- LEFT.rm_blockcount, LEFT.rm_owner,
- LEFT.rm_offset, LEFT.rm_flags);
+ trace_xfs_rmap_find_left_neighbor_result(cur,
+ LEFT.rm_startblock, LEFT.rm_blockcount,
+ LEFT.rm_owner, LEFT.rm_offset, LEFT.rm_flags);
if (LEFT.rm_startblock + LEFT.rm_blockcount == bno &&
LEFT.rm_offset + LEFT.rm_blockcount == offset &&
xfs_rmap_is_mergeable(&LEFT, owner, newext))
@@ -1325,10 +1298,10 @@ xfs_rmap_convert(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock,
- RIGHT.rm_blockcount, RIGHT.rm_owner,
- RIGHT.rm_offset, RIGHT.rm_flags);
+ trace_xfs_rmap_find_right_neighbor_result(cur,
+ RIGHT.rm_startblock, RIGHT.rm_blockcount,
+ RIGHT.rm_owner, RIGHT.rm_offset,
+ RIGHT.rm_flags);
if (bno + len == RIGHT.rm_startblock &&
offset + len == RIGHT.rm_offset &&
xfs_rmap_is_mergeable(&RIGHT, owner, newext))
@@ -1344,8 +1317,7 @@ xfs_rmap_convert(
RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
state &= ~RMAP_RIGHT_CONTIG;
- trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state,
- _RET_IP_);
+ trace_xfs_rmap_convert_state(cur, state, _RET_IP_);
/* reset the cursor back to PREV */
error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, NULL, &i);
@@ -1376,10 +1348,9 @@ xfs_rmap_convert(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- RIGHT.rm_startblock, RIGHT.rm_blockcount,
- RIGHT.rm_owner, RIGHT.rm_offset,
- RIGHT.rm_flags);
+ trace_xfs_rmap_delete(cur, RIGHT.rm_startblock,
+ RIGHT.rm_blockcount, RIGHT.rm_owner,
+ RIGHT.rm_offset, RIGHT.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
@@ -1396,10 +1367,9 @@ xfs_rmap_convert(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- PREV.rm_startblock, PREV.rm_blockcount,
- PREV.rm_owner, PREV.rm_offset,
- PREV.rm_flags);
+ trace_xfs_rmap_delete(cur, PREV.rm_startblock,
+ PREV.rm_blockcount, PREV.rm_owner,
+ PREV.rm_offset, PREV.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
@@ -1428,10 +1398,9 @@ xfs_rmap_convert(
* Setting all of a previous oldext extent to newext.
* The left neighbor is contiguous, the right is not.
*/
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- PREV.rm_startblock, PREV.rm_blockcount,
- PREV.rm_owner, PREV.rm_offset,
- PREV.rm_flags);
+ trace_xfs_rmap_delete(cur, PREV.rm_startblock,
+ PREV.rm_blockcount, PREV.rm_owner,
+ PREV.rm_offset, PREV.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
@@ -1468,10 +1437,9 @@ xfs_rmap_convert(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
- RIGHT.rm_startblock, RIGHT.rm_blockcount,
- RIGHT.rm_owner, RIGHT.rm_offset,
- RIGHT.rm_flags);
+ trace_xfs_rmap_delete(cur, RIGHT.rm_startblock,
+ RIGHT.rm_blockcount, RIGHT.rm_owner,
+ RIGHT.rm_offset, RIGHT.rm_flags);
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
@@ -1549,8 +1517,7 @@ xfs_rmap_convert(
NEW.rm_blockcount = len;
NEW.rm_flags = newext;
cur->bc_rec.r = NEW;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno,
- len, owner, offset, newext);
+ trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext);
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
@@ -1608,8 +1575,7 @@ xfs_rmap_convert(
NEW.rm_blockcount = len;
NEW.rm_flags = newext;
cur->bc_rec.r = NEW;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno,
- len, owner, offset, newext);
+ trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext);
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
@@ -1640,9 +1606,8 @@ xfs_rmap_convert(
NEW = PREV;
NEW.rm_blockcount = offset - PREV.rm_offset;
cur->bc_rec.r = NEW;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno,
- NEW.rm_startblock, NEW.rm_blockcount,
- NEW.rm_owner, NEW.rm_offset,
+ trace_xfs_rmap_insert(cur, NEW.rm_startblock,
+ NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset,
NEW.rm_flags);
error = xfs_btree_insert(cur, &i);
if (error)
@@ -1669,8 +1634,7 @@ xfs_rmap_convert(
/* new middle extent - newext */
cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN;
cur->bc_rec.r.rm_flags |= newext;
- trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len,
- owner, offset, newext);
+ trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext);
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
@@ -1694,12 +1658,10 @@ xfs_rmap_convert(
ASSERT(0);
}
- trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_convert_done(cur, bno, len, unwritten, oinfo);
done:
if (error)
- trace_xfs_rmap_convert_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_convert_error(cur, error, _RET_IP_);
return error;
}
@@ -1735,8 +1697,7 @@ xfs_rmap_convert_shared(
(flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
new_endoff = offset + len;
- trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_convert(cur, bno, len, unwritten, oinfo);
/*
* For the initial lookup, look for and exact match or the left-adjacent
@@ -1805,10 +1766,10 @@ xfs_rmap_convert_shared(
error = -EFSCORRUPTED;
goto done;
}
- trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock,
- RIGHT.rm_blockcount, RIGHT.rm_owner,
- RIGHT.rm_offset, RIGHT.rm_flags);
+ trace_xfs_rmap_find_right_neighbor_result(cur,
+ RIGHT.rm_startblock, RIGHT.rm_blockcount,
+ RIGHT.rm_owner, RIGHT.rm_offset,
+ RIGHT.rm_flags);
if (xfs_rmap_is_mergeable(&RIGHT, owner, newext))
state |= RMAP_RIGHT_CONTIG;
}
@@ -1822,8 +1783,7 @@ xfs_rmap_convert_shared(
RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
state &= ~RMAP_RIGHT_CONTIG;
- trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state,
- _RET_IP_);
+ trace_xfs_rmap_convert_state(cur, state, _RET_IP_);
/*
* Switch out based on the FILLING and CONTIG state bits.
*/
@@ -2121,12 +2081,10 @@ xfs_rmap_convert_shared(
ASSERT(0);
}
- trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_convert_done(cur, bno, len, unwritten, oinfo);
done:
if (error)
- trace_xfs_rmap_convert_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_convert_error(cur, error, _RET_IP_);
return error;
}
@@ -2164,8 +2122,7 @@ xfs_rmap_unmap_shared(
xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
if (unwritten)
flags |= XFS_RMAP_UNWRITTEN;
- trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_unmap(cur, bno, len, unwritten, oinfo);
/*
* We should always have a left record because there's a static record
@@ -2321,12 +2278,10 @@ xfs_rmap_unmap_shared(
goto out_error;
}
- trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_unmap_done(cur, bno, len, unwritten, oinfo);
out_error:
if (error)
- trace_xfs_rmap_unmap_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_unmap_error(cur, error, _RET_IP_);
return error;
}
@@ -2361,8 +2316,7 @@ xfs_rmap_map_shared(
xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
if (unwritten)
flags |= XFS_RMAP_UNWRITTEN;
- trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_map(cur, bno, len, unwritten, oinfo);
/* Is there a left record that abuts our range? */
error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, flags,
@@ -2387,10 +2341,10 @@ xfs_rmap_map_shared(
error = -EFSCORRUPTED;
goto out_error;
}
- trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, gtrec.rm_startblock,
- gtrec.rm_blockcount, gtrec.rm_owner,
- gtrec.rm_offset, gtrec.rm_flags);
+ trace_xfs_rmap_find_right_neighbor_result(cur,
+ gtrec.rm_startblock, gtrec.rm_blockcount,
+ gtrec.rm_owner, gtrec.rm_offset,
+ gtrec.rm_flags);
if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
have_gt = 0;
@@ -2482,12 +2436,10 @@ xfs_rmap_map_shared(
goto out_error;
}
- trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
- unwritten, oinfo);
+ trace_xfs_rmap_map_done(cur, bno, len, unwritten, oinfo);
out_error:
if (error)
- trace_xfs_rmap_map_error(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, error, _RET_IP_);
+ trace_xfs_rmap_map_error(cur, error, _RET_IP_);
return error;
}
@@ -2572,23 +2524,6 @@ xfs_rmap_query_all(
return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query);
}
-/* Clean up after calling xfs_rmap_finish_one. */
-void
-xfs_rmap_finish_one_cleanup(
- struct xfs_trans *tp,
- struct xfs_btree_cur *rcur,
- int error)
-{
- struct xfs_buf *agbp;
-
- if (rcur == NULL)
- return;
- agbp = rcur->bc_ag.agbp;
- xfs_btree_del_cursor(rcur, error);
- if (error)
- xfs_trans_brelse(tp, agbp);
-}
-
/* Commit an rmap operation into the ondisk tree. */
int
__xfs_rmap_finish_intent(
@@ -2634,20 +2569,15 @@ xfs_rmap_finish_one(
struct xfs_rmap_intent *ri,
struct xfs_btree_cur **pcur)
{
+ struct xfs_owner_info oinfo;
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_btree_cur *rcur;
+ struct xfs_btree_cur *rcur = *pcur;
struct xfs_buf *agbp = NULL;
- int error = 0;
- struct xfs_owner_info oinfo;
xfs_agblock_t bno;
bool unwritten;
+ int error = 0;
- bno = XFS_FSB_TO_AGBNO(mp, ri->ri_bmap.br_startblock);
-
- trace_xfs_rmap_deferred(mp, ri->ri_pag->pag_agno, ri->ri_type, bno,
- ri->ri_owner, ri->ri_whichfork,
- ri->ri_bmap.br_startoff, ri->ri_bmap.br_blockcount,
- ri->ri_bmap.br_state);
+ trace_xfs_rmap_deferred(mp, ri);
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE))
return -EIO;
@@ -2656,31 +2586,31 @@ xfs_rmap_finish_one(
* If we haven't gotten a cursor or the cursor AG doesn't match
* the startblock, get one now.
*/
- rcur = *pcur;
- if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
- xfs_rmap_finish_one_cleanup(tp, rcur, 0);
+ if (rcur != NULL && rcur->bc_group != ri->ri_group) {
+ xfs_btree_del_cursor(rcur, 0);
rcur = NULL;
*pcur = NULL;
}
if (rcur == NULL) {
+ struct xfs_perag *pag = to_perag(ri->ri_group);
+
/*
* Refresh the freelist before we start changing the
* rmapbt, because a shape change could cause us to
* allocate blocks.
*/
- error = xfs_free_extent_fix_freelist(tp, ri->ri_pag, &agbp);
+ error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
if (error) {
- xfs_ag_mark_sick(ri->ri_pag, XFS_SICK_AG_AGFL);
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
return error;
}
if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) {
- xfs_ag_mark_sick(ri->ri_pag, XFS_SICK_AG_AGFL);
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
return -EFSCORRUPTED;
}
- rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag);
+ *pcur = rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
}
- *pcur = rcur;
xfs_rmap_ino_owner(&oinfo, ri->ri_owner, ri->ri_whichfork,
ri->ri_bmap.br_startoff);
@@ -2692,7 +2622,7 @@ xfs_rmap_finish_one(
if (error)
return error;
- xfs_rmap_update_hook(tp, ri->ri_pag, ri->ri_type, bno,
+ xfs_rmap_update_hook(tp, ri->ri_group, ri->ri_type, bno,
ri->ri_bmap.br_blockcount, unwritten, &oinfo);
return 0;
}
@@ -2722,15 +2652,6 @@ __xfs_rmap_add(
{
struct xfs_rmap_intent *ri;
- trace_xfs_rmap_defer(tp->t_mountp,
- XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
- type,
- XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
- owner, whichfork,
- bmap->br_startoff,
- bmap->br_blockcount,
- bmap->br_state);
-
ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL);
INIT_LIST_HEAD(&ri->ri_list);
ri->ri_type = type;
@@ -2738,8 +2659,7 @@ __xfs_rmap_add(
ri->ri_whichfork = whichfork;
ri->ri_bmap = *bmap;
- xfs_rmap_update_get_group(tp->t_mountp, ri);
- xfs_defer_add(tp, &ri->ri_list, &xfs_rmap_update_defer_type);
+ xfs_rmap_defer_add(tp, ri);
}
/* Map an extent into a file. */
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 9d01fe689497..96b4321d8310 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -157,18 +157,25 @@ enum xfs_rmap_intent_type {
XFS_RMAP_FREE,
};
+#define XFS_RMAP_INTENT_STRINGS \
+ { XFS_RMAP_MAP, "map" }, \
+ { XFS_RMAP_MAP_SHARED, "map_shared" }, \
+ { XFS_RMAP_UNMAP, "unmap" }, \
+ { XFS_RMAP_UNMAP_SHARED, "unmap_shared" }, \
+ { XFS_RMAP_CONVERT, "cvt" }, \
+ { XFS_RMAP_CONVERT_SHARED, "cvt_shared" }, \
+ { XFS_RMAP_ALLOC, "alloc" }, \
+ { XFS_RMAP_FREE, "free" }
+
struct xfs_rmap_intent {
struct list_head ri_list;
enum xfs_rmap_intent_type ri_type;
int ri_whichfork;
uint64_t ri_owner;
struct xfs_bmbt_irec ri_bmap;
- struct xfs_perag *ri_pag;
+ struct xfs_group *ri_group;
};
-void xfs_rmap_update_get_group(struct xfs_mount *mp,
- struct xfs_rmap_intent *ri);
-
/* functions for updating the rmapbt based on bmbt map/unmap operations */
void xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork, struct xfs_bmbt_irec *imap);
@@ -182,8 +189,6 @@ void xfs_rmap_alloc_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
void xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner);
-void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
- struct xfs_btree_cur *rcur, int error);
int xfs_rmap_finish_one(struct xfs_trans *tp, struct xfs_rmap_intent *ri,
struct xfs_btree_cur **pcur);
int __xfs_rmap_finish_intent(struct xfs_btree_cur *rcur,
@@ -259,8 +264,8 @@ struct xfs_rmap_hook {
void xfs_rmap_hook_disable(void);
void xfs_rmap_hook_enable(void);
-int xfs_rmap_hook_add(struct xfs_perag *pag, struct xfs_rmap_hook *hook);
-void xfs_rmap_hook_del(struct xfs_perag *pag, struct xfs_rmap_hook *hook);
+int xfs_rmap_hook_add(struct xfs_group *xg, struct xfs_rmap_hook *hook);
+void xfs_rmap_hook_del(struct xfs_group *xg, struct xfs_rmap_hook *hook);
void xfs_rmap_hook_setup(struct xfs_rmap_hook *hook, notifier_fn_t mod_fn);
#endif
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 9e759efa81cc..2cab694ac58a 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -57,7 +57,7 @@ xfs_rmapbt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
- cur->bc_ag.agbp, cur->bc_ag.pag);
+ cur->bc_ag.agbp, to_perag(cur->bc_group));
}
STATIC void
@@ -66,14 +66,15 @@ xfs_rmapbt_set_root(
const union xfs_btree_ptr *ptr,
int inc)
{
- struct xfs_buf *agbp = cur->bc_ag.agbp;
- struct xfs_agf *agf = agbp->b_addr;
+ struct xfs_buf *agbp = cur->bc_ag.agbp;
+ struct xfs_agf *agf = agbp->b_addr;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
ASSERT(ptr->s != 0);
agf->agf_rmap_root = ptr->s;
be32_add_cpu(&agf->agf_rmap_level, inc);
- cur->bc_ag.pag->pagf_rmap_level += inc;
+ pag->pagf_rmap_level += inc;
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
}
@@ -87,7 +88,8 @@ xfs_rmapbt_alloc_block(
{
struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr;
- struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
+ struct xfs_alloc_arg args = { .len = 1 };
int error;
xfs_agblock_t bno;
@@ -101,13 +103,17 @@ xfs_rmapbt_alloc_block(
return 0;
}
- xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false);
+ xfs_extent_busy_reuse(pag_group(pag), bno, 1, false);
new->s = cpu_to_be32(bno);
be32_add_cpu(&agf->agf_rmap_blocks, 1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
- xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno);
+ /*
+ * Since rmapbt blocks are sourced from the AGFL, they are allocated one
+ * at a time and the reservation updates don't require a transaction.
+ */
+ xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args);
*stat = 1;
return 0;
@@ -120,7 +126,7 @@ xfs_rmapbt_free_block(
{
struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr;
- struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
xfs_agblock_t bno;
int error;
@@ -131,7 +137,7 @@ xfs_rmapbt_free_block(
if (error)
return error;
- xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1,
+ xfs_extent_busy_insert(cur->bc_tp, pag_group(pag), bno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD);
xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
@@ -222,7 +228,7 @@ xfs_rmapbt_init_ptr_from_cur(
{
struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
- ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agf->agf_seqno));
ptr->s = agf->agf_rmap_root;
}
@@ -533,7 +539,7 @@ xfs_rmapbt_init_cursor(
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_ops,
mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_ag.agbp = agbp;
if (agbp) {
struct xfs_agf *agf = agbp->b_addr;
@@ -642,14 +648,13 @@ xfs_rmapbt_mem_cursor(
struct xfbtree *xfbt)
{
struct xfs_btree_cur *cur;
- struct xfs_mount *mp = pag->pag_mount;
- cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_mem_ops,
+ cur = xfs_btree_alloc_cursor(pag_mount(pag), tp, &xfs_rmapbt_mem_ops,
xfs_rmapbt_maxlevels_ondisk(), xfs_rmapbt_cur_cache);
cur->bc_mem.xfbtree = xfbt;
cur->bc_nlevels = xfbt->nlevels;
- cur->bc_mem.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
return cur;
}
@@ -726,10 +731,11 @@ xfs_rmapbt_block_maxrecs(
/*
* Calculate number of records in an rmap btree block.
*/
-int
+unsigned int
xfs_rmapbt_maxrecs(
- int blocklen,
- int leaf)
+ struct xfs_mount *mp,
+ unsigned int blocklen,
+ bool leaf)
{
blocklen -= XFS_RMAP_BLOCK_LEN;
return xfs_rmapbt_block_maxrecs(blocklen, leaf);
@@ -857,7 +863,7 @@ xfs_rmapbt_calc_reserves(
* never be available for the kinds of things that would require btree
* expansion. We therefore can pretend the space isn't there.
*/
- if (xfs_ag_contains_log(mp, pag->pag_agno))
+ if (xfs_ag_contains_log(mp, pag_agno(pag)))
agblocks -= mp->m_sb.sb_logblocks;
/* Reserve 1% of the AG or enough for 1 block per record. */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index eb90d89e8086..119b1567cd0e 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -47,7 +47,8 @@ struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
struct xfs_perag *pag);
void xfs_rmapbt_commit_staged_btree(struct xfs_btree_cur *cur,
struct xfs_trans *tp, struct xfs_buf *agbp);
-int xfs_rmapbt_maxrecs(int blocklen, int leaf);
+unsigned int xfs_rmapbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen,
+ bool leaf);
extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp,
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index f246d6dbf4ec..4ddfb7e395b3 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -13,33 +13,94 @@
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
#include "xfs_trans.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rtbitmap.h"
#include "xfs_health.h"
+#include "xfs_sb.h"
+#include "xfs_errortag.h"
+#include "xfs_log.h"
+#include "xfs_buf_item.h"
+#include "xfs_extent_busy.h"
/*
* Realtime allocator bitmap functions shared with userspace.
*/
-/*
- * Real time buffers need verifiers to avoid runtime warnings during IO.
- * We don't have anything to verify, however, so these are just dummy
- * operations.
- */
+static xfs_failaddr_t
+xfs_rtbuf_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ if (!xfs_verify_magic(bp, hdr->rt_magic))
+ return __this_address;
+ if (!xfs_has_rtgroups(mp))
+ return __this_address;
+ if (!xfs_has_crc(mp))
+ return __this_address;
+ if (!uuid_equal(&hdr->rt_uuid, &mp->m_sb.sb_meta_uuid))
+ return __this_address;
+ if (hdr->rt_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
+ return __this_address;
+ return NULL;
+}
+
static void
xfs_rtbuf_verify_read(
- struct xfs_buf *bp)
+ struct xfs_buf *bp)
{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+ xfs_failaddr_t fa;
+
+ if (!xfs_has_rtgroups(mp))
+ return;
+
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr->rt_lsn))) {
+ fa = __this_address;
+ goto fail;
+ }
+
+ if (!xfs_buf_verify_cksum(bp, XFS_RTBUF_CRC_OFF)) {
+ fa = __this_address;
+ goto fail;
+ }
+
+ fa = xfs_rtbuf_verify(bp);
+ if (fa)
+ goto fail;
+
return;
+fail:
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
}
static void
xfs_rtbuf_verify_write(
struct xfs_buf *bp)
{
- return;
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
+ xfs_failaddr_t fa;
+
+ if (!xfs_has_rtgroups(mp))
+ return;
+
+ fa = xfs_rtbuf_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ return;
+ }
+
+ if (bip)
+ hdr->rt_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ xfs_buf_update_cksum(bp, XFS_RTBUF_CRC_OFF);
}
const struct xfs_buf_ops xfs_rtbuf_ops = {
@@ -48,6 +109,22 @@ const struct xfs_buf_ops xfs_rtbuf_ops = {
.verify_write = xfs_rtbuf_verify_write,
};
+const struct xfs_buf_ops xfs_rtbitmap_buf_ops = {
+ .name = "xfs_rtbitmap",
+ .magic = { 0, cpu_to_be32(XFS_RTBITMAP_MAGIC) },
+ .verify_read = xfs_rtbuf_verify_read,
+ .verify_write = xfs_rtbuf_verify_write,
+ .verify_struct = xfs_rtbuf_verify,
+};
+
+const struct xfs_buf_ops xfs_rtsummary_buf_ops = {
+ .name = "xfs_rtsummary",
+ .magic = { 0, cpu_to_be32(XFS_RTSUMMARY_MAGIC) },
+ .verify_read = xfs_rtbuf_verify_read,
+ .verify_write = xfs_rtbuf_verify_write,
+ .verify_struct = xfs_rtbuf_verify,
+};
+
/* Release cached rt bitmap and summary buffers. */
void
xfs_rtbuf_cache_relse(
@@ -69,32 +146,35 @@ xfs_rtbuf_cache_relse(
* Get a buffer for the bitmap or summary file block specified.
* The buffer is returned read and locked.
*/
-int
+static int
xfs_rtbuf_get(
struct xfs_rtalloc_args *args,
xfs_fileoff_t block, /* block number in bitmap or summary */
- int issum) /* is summary not bitmap */
+ enum xfs_rtg_inodes type)
{
+ struct xfs_inode *ip = args->rtg->rtg_inodes[type];
struct xfs_mount *mp = args->mp;
struct xfs_buf **cbpp; /* cached block buffer */
xfs_fileoff_t *coffp; /* cached block number */
struct xfs_buf *bp; /* block buffer, result */
- struct xfs_inode *ip; /* bitmap or summary inode */
struct xfs_bmbt_irec map;
- enum xfs_blft type;
+ enum xfs_blft buf_type;
int nmap = 1;
int error;
- if (issum) {
+ switch (type) {
+ case XFS_RTGI_SUMMARY:
cbpp = &args->sumbp;
coffp = &args->sumoff;
- ip = mp->m_rsumip;
- type = XFS_BLFT_RTSUMMARY_BUF;
- } else {
+ buf_type = XFS_BLFT_RTSUMMARY_BUF;
+ break;
+ case XFS_RTGI_BITMAP:
cbpp = &args->rbmbp;
coffp = &args->rbmoff;
- ip = mp->m_rbmip;
- type = XFS_BLFT_RTBITMAP_BUF;
+ buf_type = XFS_BLFT_RTBITMAP_BUF;
+ break;
+ default:
+ return -EINVAL;
}
/*
@@ -117,36 +197,74 @@ xfs_rtbuf_get(
return error;
if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_written_extent(&map))) {
- xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY :
- XFS_SICK_RT_BITMAP);
+ xfs_rtginode_mark_sick(args->rtg, type);
return -EFSCORRUPTED;
}
ASSERT(map.br_startblock != NULLFSBLOCK);
error = xfs_trans_read_buf(mp, args->tp, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, map.br_startblock),
- mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
+ mp->m_bsize, 0, &bp,
+ xfs_rtblock_ops(mp, type));
if (xfs_metadata_is_sick(error))
- xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY :
- XFS_SICK_RT_BITMAP);
+ xfs_rtginode_mark_sick(args->rtg, type);
if (error)
return error;
- xfs_trans_buf_set_type(args->tp, bp, type);
+ if (xfs_has_rtgroups(mp)) {
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ if (hdr->rt_owner != cpu_to_be64(ip->i_ino)) {
+ xfs_buf_mark_corrupt(bp);
+ xfs_trans_brelse(args->tp, bp);
+ xfs_rtginode_mark_sick(args->rtg, type);
+ return -EFSCORRUPTED;
+ }
+ }
+
+ xfs_trans_buf_set_type(args->tp, bp, buf_type);
*cbpp = bp;
*coffp = block;
return 0;
}
+int
+xfs_rtbitmap_read_buf(
+ struct xfs_rtalloc_args *args,
+ xfs_fileoff_t block)
+{
+ struct xfs_mount *mp = args->mp;
+
+ if (XFS_IS_CORRUPT(mp, block >= mp->m_sb.sb_rbmblocks)) {
+ xfs_rtginode_mark_sick(args->rtg, XFS_RTGI_BITMAP);
+ return -EFSCORRUPTED;
+ }
+
+ return xfs_rtbuf_get(args, block, XFS_RTGI_BITMAP);
+}
+
+int
+xfs_rtsummary_read_buf(
+ struct xfs_rtalloc_args *args,
+ xfs_fileoff_t block)
+{
+ struct xfs_mount *mp = args->mp;
+
+ if (XFS_IS_CORRUPT(mp, block >= mp->m_rsumblocks)) {
+ xfs_rtginode_mark_sick(args->rtg, XFS_RTGI_SUMMARY);
+ return -EFSCORRUPTED;
+ }
+ return xfs_rtbuf_get(args, block, XFS_RTGI_SUMMARY);
+}
+
/*
- * Searching backward from start to limit, find the first block whose
- * allocated/free state is different from start's.
+ * Searching backward from start find the first block whose allocated/free state
+ * is different from start's.
*/
int
xfs_rtfind_back(
struct xfs_rtalloc_args *args,
xfs_rtxnum_t start, /* starting rtext to look at */
- xfs_rtxnum_t limit, /* last rtext to look at */
xfs_rtxnum_t *rtx) /* out: start rtext found */
{
struct xfs_mount *mp = args->mp;
@@ -175,7 +293,7 @@ xfs_rtfind_back(
*/
word = xfs_rtx_to_rbmword(mp, start);
bit = (int)(start & (XFS_NBWORD - 1));
- len = start - limit + 1;
+ len = start + 1;
/*
* Compute match value, based on the bit at start: if 1 (free)
* then all-ones, else all-zeroes.
@@ -316,6 +434,8 @@ xfs_rtfind_forw(
xfs_rtword_t incore;
unsigned int word; /* word number in the buffer */
+ ASSERT(start <= limit);
+
/*
* Compute and read in starting bitmap block for starting block.
*/
@@ -471,6 +591,7 @@ xfs_rtmodify_summary(
{
struct xfs_mount *mp = args->mp;
xfs_rtsumoff_t so = xfs_rtsumoffs(mp, log, bbno);
+ uint8_t *rsum_cache = args->rtg->rtg_rsum_cache;
unsigned int infoword;
xfs_suminfo_t val;
int error;
@@ -482,11 +603,11 @@ xfs_rtmodify_summary(
infoword = xfs_rtsumoffs_to_infoword(mp, so);
val = xfs_suminfo_add(args, infoword, delta);
- if (mp->m_rsum_cache) {
- if (val == 0 && log + 1 == mp->m_rsum_cache[bbno])
- mp->m_rsum_cache[bbno] = log;
- if (val != 0 && log >= mp->m_rsum_cache[bbno])
- mp->m_rsum_cache[bbno] = log + 1;
+ if (rsum_cache) {
+ if (val == 0 && log + 1 == rsum_cache[bbno])
+ rsum_cache[bbno] = log;
+ if (val != 0 && log >= rsum_cache[bbno])
+ rsum_cache[bbno] = log + 1;
}
xfs_trans_log_rtsummary(args, infoword);
@@ -698,14 +819,14 @@ xfs_rtfree_range(
* We need to find the beginning and end of the extent so we can
* properly update the summary.
*/
- error = xfs_rtfind_back(args, start, 0, &preblock);
+ error = xfs_rtfind_back(args, start, &preblock);
if (error) {
return error;
}
/*
* Find the next allocated block (end of allocated extent).
*/
- error = xfs_rtfind_forw(args, end, mp->m_sb.sb_rextents - 1,
+ error = xfs_rtfind_forw(args, end, args->rtg->rtg_extents - 1,
&postblock);
if (error)
return error;
@@ -929,19 +1050,25 @@ xfs_rtcheck_alloc_range(
int
xfs_rtfree_extent(
struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_rtgroup *rtg,
xfs_rtxnum_t start, /* starting rtext number to free */
xfs_rtxlen_t len) /* length of extent freed */
{
struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
struct xfs_rtalloc_args args = {
.mp = mp,
.tp = tp,
+ .rtg = rtg,
};
int error;
struct timespec64 atime;
- ASSERT(mp->m_rbmip->i_itemp != NULL);
- xfs_assert_ilocked(mp->m_rbmip, XFS_ILOCK_EXCL);
+ ASSERT(rbmip->i_itemp != NULL);
+ xfs_assert_ilocked(rbmip, XFS_ILOCK_EXCL);
+
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FREE_EXTENT))
+ return -EIO;
error = xfs_rtcheck_alloc_range(&args, start, len);
if (error)
@@ -958,19 +1085,21 @@ xfs_rtfree_extent(
* Mark more blocks free in the superblock.
*/
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len);
+
/*
* If we've now freed all the blocks, reset the file sequence
- * number to 0.
+ * number to 0 for pre-RTG file systems.
*/
- if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
+ if (!xfs_has_rtgroups(mp) &&
+ tp->t_frextents_delta + mp->m_sb.sb_frextents ==
mp->m_sb.sb_rextents) {
- if (!(mp->m_rbmip->i_diflags & XFS_DIFLAG_NEWRTBM))
- mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
+ if (!(rbmip->i_diflags & XFS_DIFLAG_NEWRTBM))
+ rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
- atime = inode_get_atime(VFS_I(mp->m_rbmip));
+ atime = inode_get_atime(VFS_I(rbmip));
atime.tv_sec = 0;
- inode_set_atime_to_ts(VFS_I(mp->m_rbmip), atime);
- xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
+ inode_set_atime_to_ts(VFS_I(rbmip), atime);
+ xfs_trans_log_inode(tp, rbmip, XFS_ILOG_CORE);
}
error = 0;
out:
@@ -986,84 +1115,92 @@ out:
int
xfs_rtfree_blocks(
struct xfs_trans *tp,
+ struct xfs_rtgroup *rtg,
xfs_fsblock_t rtbno,
xfs_filblks_t rtlen)
{
struct xfs_mount *mp = tp->t_mountp;
- xfs_rtxnum_t start;
- xfs_filblks_t len;
xfs_extlen_t mod;
+ int error;
ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN);
- len = xfs_rtb_to_rtxrem(mp, rtlen, &mod);
+ mod = xfs_blen_to_rtxoff(mp, rtlen);
if (mod) {
ASSERT(mod == 0);
return -EIO;
}
- start = xfs_rtb_to_rtxrem(mp, rtbno, &mod);
+ mod = xfs_rtb_to_rtxoff(mp, rtbno);
if (mod) {
ASSERT(mod == 0);
return -EIO;
}
- return xfs_rtfree_extent(tp, start, len);
+ error = xfs_rtfree_extent(tp, rtg, xfs_rtb_to_rtx(mp, rtbno),
+ xfs_extlen_to_rtxlen(mp, rtlen));
+ if (error)
+ return error;
+
+ if (xfs_has_rtgroups(mp))
+ xfs_extent_busy_insert(tp, rtg_group(rtg),
+ xfs_rtb_to_rgbno(mp, rtbno), rtlen, 0);
+
+ return 0;
}
/* Find all the free records within a given range. */
int
xfs_rtalloc_query_range(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
- const struct xfs_rtalloc_rec *low_rec,
- const struct xfs_rtalloc_rec *high_rec,
+ xfs_rtxnum_t start,
+ xfs_rtxnum_t end,
xfs_rtalloc_query_range_fn fn,
void *priv)
{
+ struct xfs_mount *mp = rtg_mount(rtg);
struct xfs_rtalloc_args args = {
+ .rtg = rtg,
.mp = mp,
.tp = tp,
};
- struct xfs_rtalloc_rec rec;
- xfs_rtxnum_t rtstart;
- xfs_rtxnum_t rtend;
- xfs_rtxnum_t high_key;
- int is_free;
int error = 0;
- if (low_rec->ar_startext > high_rec->ar_startext)
+ if (start > end)
return -EINVAL;
- if (low_rec->ar_startext >= mp->m_sb.sb_rextents ||
- low_rec->ar_startext == high_rec->ar_startext)
+ if (start == end || start >= rtg->rtg_extents)
return 0;
- high_key = min(high_rec->ar_startext, mp->m_sb.sb_rextents - 1);
+ end = min(end, rtg->rtg_extents - 1);
/* Iterate the bitmap, looking for discrepancies. */
- rtstart = low_rec->ar_startext;
- while (rtstart <= high_key) {
+ while (start <= end) {
+ struct xfs_rtalloc_rec rec;
+ int is_free;
+ xfs_rtxnum_t rtend;
+
/* Is the first block free? */
- error = xfs_rtcheck_range(&args, rtstart, 1, 1, &rtend,
+ error = xfs_rtcheck_range(&args, start, 1, 1, &rtend,
&is_free);
if (error)
break;
/* How long does the extent go for? */
- error = xfs_rtfind_forw(&args, rtstart, high_key, &rtend);
+ error = xfs_rtfind_forw(&args, start, end, &rtend);
if (error)
break;
if (is_free) {
- rec.ar_startext = rtstart;
- rec.ar_extcount = rtend - rtstart + 1;
+ rec.ar_startext = start;
+ rec.ar_extcount = rtend - start + 1;
- error = fn(mp, tp, &rec, priv);
+ error = fn(rtg, tp, &rec, priv);
if (error)
break;
}
- rtstart = rtend + 1;
+ start = rtend + 1;
}
xfs_rtbuf_cache_relse(&args);
@@ -1073,31 +1210,27 @@ xfs_rtalloc_query_range(
/* Find all the free records. */
int
xfs_rtalloc_query_all(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
xfs_rtalloc_query_range_fn fn,
void *priv)
{
- struct xfs_rtalloc_rec keys[2];
-
- keys[0].ar_startext = 0;
- keys[1].ar_startext = mp->m_sb.sb_rextents - 1;
- keys[0].ar_extcount = keys[1].ar_extcount = 0;
-
- return xfs_rtalloc_query_range(mp, tp, &keys[0], &keys[1], fn, priv);
+ return xfs_rtalloc_query_range(rtg, tp, 0, rtg->rtg_extents - 1, fn,
+ priv);
}
/* Is the given extent all free? */
int
xfs_rtalloc_extent_is_free(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
xfs_rtxnum_t start,
xfs_rtxlen_t len,
bool *is_free)
{
struct xfs_rtalloc_args args = {
- .mp = mp,
+ .mp = rtg_mount(rtg),
+ .rtg = rtg,
.tp = tp,
};
xfs_rtxnum_t end;
@@ -1113,58 +1246,248 @@ xfs_rtalloc_extent_is_free(
return 0;
}
+/* Compute the number of rt extents tracked by a single bitmap block. */
+xfs_rtxnum_t
+xfs_rtbitmap_rtx_per_rbmblock(
+ struct xfs_mount *mp)
+{
+ unsigned int rbmblock_bytes = mp->m_sb.sb_blocksize;
+
+ if (xfs_has_rtgroups(mp))
+ rbmblock_bytes -= sizeof(struct xfs_rtbuf_blkinfo);
+
+ return rbmblock_bytes * NBBY;
+}
+
/*
* Compute the number of rtbitmap blocks needed to track the given number of rt
* extents.
*/
xfs_filblks_t
-xfs_rtbitmap_blockcount(
+xfs_rtbitmap_blockcount_len(
struct xfs_mount *mp,
xfs_rtbxlen_t rtextents)
{
- return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize);
+ return howmany_64(rtextents, xfs_rtbitmap_rtx_per_rbmblock(mp));
+}
+
+/* How many rt extents does each rtbitmap file track? */
+static inline xfs_rtbxlen_t
+xfs_rtbitmap_bitcount(
+ struct xfs_mount *mp)
+{
+ if (!mp->m_sb.sb_rextents)
+ return 0;
+
+ /* rtgroup size can be nonzero even if rextents is zero */
+ if (xfs_has_rtgroups(mp))
+ return mp->m_sb.sb_rgextents;
+
+ return mp->m_sb.sb_rextents;
}
/*
- * Compute the number of rtbitmap words needed to populate every block of a
- * bitmap that is large enough to track the given number of rt extents.
+ * Compute the number of rtbitmap blocks used for a given file system.
*/
-unsigned long long
-xfs_rtbitmap_wordcount(
- struct xfs_mount *mp,
- xfs_rtbxlen_t rtextents)
+xfs_filblks_t
+xfs_rtbitmap_blockcount(
+ struct xfs_mount *mp)
{
- xfs_filblks_t blocks;
-
- blocks = xfs_rtbitmap_blockcount(mp, rtextents);
- return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG;
+ return xfs_rtbitmap_blockcount_len(mp, xfs_rtbitmap_bitcount(mp));
}
-/* Compute the number of rtsummary blocks needed to track the given rt space. */
+/*
+ * Compute the geometry of the rtsummary file needed to track the given rt
+ * space.
+ */
xfs_filblks_t
xfs_rtsummary_blockcount(
struct xfs_mount *mp,
- unsigned int rsumlevels,
- xfs_extlen_t rbmblocks)
+ unsigned int *rsumlevels)
{
+ xfs_rtbxlen_t rextents = xfs_rtbitmap_bitcount(mp);
unsigned long long rsumwords;
- rsumwords = (unsigned long long)rsumlevels * rbmblocks;
- return XFS_B_TO_FSB(mp, rsumwords << XFS_WORDLOG);
+ *rsumlevels = xfs_compute_rextslog(rextents) + 1;
+ rsumwords = xfs_rtbitmap_blockcount_len(mp, rextents) * (*rsumlevels);
+ return howmany_64(rsumwords, mp->m_blockwsize);
+}
+
+static int
+xfs_rtfile_alloc_blocks(
+ struct xfs_inode *ip,
+ xfs_fileoff_t offset_fsb,
+ xfs_filblks_t count_fsb,
+ struct xfs_bmbt_irec *map)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int nmap = 1;
+ int error;
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtalloc,
+ XFS_GROWFSRT_SPACE_RES(mp, count_fsb), 0, 0, &tp);
+ if (error)
+ return error;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+ error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK,
+ XFS_IEXT_ADD_NOSPLIT_CNT);
+ if (error)
+ goto out_trans_cancel;
+
+ error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
+ XFS_BMAPI_METADATA, 0, map, &nmap);
+ if (error)
+ goto out_trans_cancel;
+
+ return xfs_trans_commit(tp);
+
+out_trans_cancel:
+ xfs_trans_cancel(tp);
+ return error;
+}
+
+/* Get a buffer for the block. */
+static int
+xfs_rtfile_initialize_block(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type,
+ xfs_fsblock_t fsbno,
+ void *data)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct xfs_inode *ip = rtg->rtg_inodes[type];
+ struct xfs_trans *tp;
+ struct xfs_buf *bp;
+ void *bufdata;
+ const size_t copylen = mp->m_blockwsize << XFS_WORDLOG;
+ enum xfs_blft buf_type;
+ int error;
+
+ if (type == XFS_RTGI_BITMAP)
+ buf_type = XFS_BLFT_RTBITMAP_BUF;
+ else if (type == XFS_RTGI_SUMMARY)
+ buf_type = XFS_BLFT_RTSUMMARY_BUF;
+ else
+ return -EINVAL;
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero, 0, 0, 0, &tp);
+ if (error)
+ return error;
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, fsbno), mp->m_bsize, 0, &bp);
+ if (error) {
+ xfs_trans_cancel(tp);
+ return error;
+ }
+ bufdata = bp->b_addr;
+
+ xfs_trans_buf_set_type(tp, bp, buf_type);
+ bp->b_ops = xfs_rtblock_ops(mp, type);
+
+ if (xfs_has_rtgroups(mp)) {
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ if (type == XFS_RTGI_BITMAP)
+ hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
+ else
+ hdr->rt_magic = cpu_to_be32(XFS_RTSUMMARY_MAGIC);
+ hdr->rt_owner = cpu_to_be64(ip->i_ino);
+ hdr->rt_blkno = cpu_to_be64(XFS_FSB_TO_DADDR(mp, fsbno));
+ hdr->rt_lsn = 0;
+ uuid_copy(&hdr->rt_uuid, &mp->m_sb.sb_meta_uuid);
+
+ bufdata += sizeof(*hdr);
+ }
+
+ if (data)
+ memcpy(bufdata, data, copylen);
+ else
+ memset(bufdata, 0, copylen);
+ xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
+ return xfs_trans_commit(tp);
}
/*
- * Compute the number of rtsummary info words needed to populate every block of
- * a summary file that is large enough to track the given rt space.
+ * Allocate space to the bitmap or summary file, and zero it, for growfs.
+ * @data must be a contiguous buffer large enough to fill all blocks in the
+ * file; or NULL to initialize the contents to zeroes.
*/
-unsigned long long
-xfs_rtsummary_wordcount(
- struct xfs_mount *mp,
- unsigned int rsumlevels,
- xfs_extlen_t rbmblocks)
+int
+xfs_rtfile_initialize_blocks(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type,
+ xfs_fileoff_t offset_fsb, /* offset to start from */
+ xfs_fileoff_t end_fsb, /* offset to allocate to */
+ void *data) /* data to fill the blocks */
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ const size_t copylen = mp->m_blockwsize << XFS_WORDLOG;
+
+ while (offset_fsb < end_fsb) {
+ struct xfs_bmbt_irec map;
+ xfs_filblks_t i;
+ int error;
+
+ error = xfs_rtfile_alloc_blocks(rtg->rtg_inodes[type],
+ offset_fsb, end_fsb - offset_fsb, &map);
+ if (error)
+ return error;
+
+ /*
+ * Now we need to clear the allocated blocks.
+ *
+ * Do this one block per transaction, to keep it simple.
+ */
+ for (i = 0; i < map.br_blockcount; i++) {
+ error = xfs_rtfile_initialize_block(rtg, type,
+ map.br_startblock + i, data);
+ if (error)
+ return error;
+ if (data)
+ data += copylen;
+ }
+
+ offset_fsb = map.br_startoff + map.br_blockcount;
+ }
+
+ return 0;
+}
+
+int
+xfs_rtbitmap_create(
+ struct xfs_rtgroup *rtg,
+ struct xfs_inode *ip,
+ struct xfs_trans *tp,
+ bool init)
{
- xfs_filblks_t blocks;
+ struct xfs_mount *mp = rtg_mount(rtg);
- blocks = xfs_rtsummary_blockcount(mp, rsumlevels, rbmblocks);
- return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG;
+ ip->i_disk_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
+ if (init && !xfs_has_rtgroups(mp)) {
+ ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
+ inode_set_atime(VFS_I(ip), 0, 0);
+ }
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return 0;
+}
+
+int
+xfs_rtsummary_create(
+ struct xfs_rtgroup *rtg,
+ struct xfs_inode *ip,
+ struct xfs_trans *tp,
+ bool init)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+
+ ip->i_disk_size = mp->m_rsumblocks * mp->m_sb.sb_blocksize;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return 0;
}
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h
index 152a66750af5..16563a44bd13 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.h
+++ b/fs/xfs/libxfs/xfs_rtbitmap.h
@@ -6,7 +6,10 @@
#ifndef __XFS_RTBITMAP_H__
#define __XFS_RTBITMAP_H__
+#include "xfs_rtgroup.h"
+
struct xfs_rtalloc_args {
+ struct xfs_rtgroup *rtg;
struct xfs_mount *mp;
struct xfs_trans *tp;
@@ -19,13 +22,37 @@ struct xfs_rtalloc_args {
static inline xfs_rtblock_t
xfs_rtx_to_rtb(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
xfs_rtxnum_t rtx)
{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ xfs_rtblock_t start = xfs_group_start_fsb(rtg_group(rtg));
+
+ if (mp->m_rtxblklog >= 0)
+ return start + (rtx << mp->m_rtxblklog);
+ return start + (rtx * mp->m_sb.sb_rextsize);
+}
+
+/* Convert an rgbno into an rt extent number. */
+static inline xfs_rtxnum_t
+xfs_rgbno_to_rtx(
+ struct xfs_mount *mp,
+ xfs_rgblock_t rgbno)
+{
+ if (likely(mp->m_rtxblklog >= 0))
+ return rgbno >> mp->m_rtxblklog;
+ return rgbno / mp->m_sb.sb_rextsize;
+}
+
+static inline uint64_t
+xfs_rtbxlen_to_blen(
+ struct xfs_mount *mp,
+ xfs_rtbxlen_t rtbxlen)
+{
if (mp->m_rtxblklog >= 0)
- return rtx << mp->m_rtxblklog;
+ return rtbxlen << mp->m_rtxblklog;
- return rtx * mp->m_sb.sb_rextsize;
+ return rtbxlen * mp->m_sb.sb_rextsize;
}
static inline xfs_extlen_t
@@ -62,84 +89,81 @@ xfs_extlen_to_rtxlen(
return len / mp->m_sb.sb_rextsize;
}
-/* Convert an rt block number into an rt extent number. */
-static inline xfs_rtxnum_t
-xfs_rtb_to_rtx(
+/* Convert an rt block count into an rt extent count. */
+static inline xfs_rtbxlen_t
+xfs_blen_to_rtbxlen(
struct xfs_mount *mp,
- xfs_rtblock_t rtbno)
+ uint64_t blen)
{
if (likely(mp->m_rtxblklog >= 0))
- return rtbno >> mp->m_rtxblklog;
+ return blen >> mp->m_rtxblklog;
- return div_u64(rtbno, mp->m_sb.sb_rextsize);
+ return div_u64(blen, mp->m_sb.sb_rextsize);
}
-/* Return the offset of an rt block number within an rt extent. */
+/* Return the offset of a file block length within an rt extent. */
static inline xfs_extlen_t
-xfs_rtb_to_rtxoff(
+xfs_blen_to_rtxoff(
struct xfs_mount *mp,
- xfs_rtblock_t rtbno)
+ xfs_filblks_t blen)
{
if (likely(mp->m_rtxblklog >= 0))
- return rtbno & mp->m_rtxblkmask;
+ return blen & mp->m_rtxblkmask;
- return do_div(rtbno, mp->m_sb.sb_rextsize);
+ return do_div(blen, mp->m_sb.sb_rextsize);
}
-/*
- * Crack an rt block number into an rt extent number and an offset within that
- * rt extent. Returns the rt extent number directly and the offset in @off.
- */
-static inline xfs_rtxnum_t
-xfs_rtb_to_rtxrem(
+/* Round this block count up to the nearest rt extent size. */
+static inline xfs_filblks_t
+xfs_blen_roundup_rtx(
struct xfs_mount *mp,
- xfs_rtblock_t rtbno,
- xfs_extlen_t *off)
+ xfs_filblks_t blen)
{
- if (likely(mp->m_rtxblklog >= 0)) {
- *off = rtbno & mp->m_rtxblkmask;
- return rtbno >> mp->m_rtxblklog;
- }
-
- return div_u64_rem(rtbno, mp->m_sb.sb_rextsize, off);
+ return roundup_64(blen, mp->m_sb.sb_rextsize);
}
-/*
- * Convert an rt block number into an rt extent number, rounding up to the next
- * rt extent if the rt block is not aligned to an rt extent boundary.
- */
+/* Convert an rt block number into an rt extent number. */
static inline xfs_rtxnum_t
-xfs_rtb_to_rtxup(
+xfs_rtb_to_rtx(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
- if (likely(mp->m_rtxblklog >= 0)) {
- if (rtbno & mp->m_rtxblkmask)
- return (rtbno >> mp->m_rtxblklog) + 1;
+ /* open-coded 64-bit masking operation */
+ rtbno &= mp->m_groups[XG_TYPE_RTG].blkmask;
+ if (likely(mp->m_rtxblklog >= 0))
return rtbno >> mp->m_rtxblklog;
- }
+ return div_u64(rtbno, mp->m_sb.sb_rextsize);
+}
- if (do_div(rtbno, mp->m_sb.sb_rextsize))
- rtbno++;
- return rtbno;
+/* Return the offset of an rt block number within an rt extent. */
+static inline xfs_extlen_t
+xfs_rtb_to_rtxoff(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ /* open-coded 64-bit masking operation */
+ rtbno &= mp->m_groups[XG_TYPE_RTG].blkmask;
+ if (likely(mp->m_rtxblklog >= 0))
+ return rtbno & mp->m_rtxblkmask;
+ return do_div(rtbno, mp->m_sb.sb_rextsize);
}
-/* Round this rtblock up to the nearest rt extent size. */
+/* Round this file block offset up to the nearest rt extent size. */
static inline xfs_rtblock_t
-xfs_rtb_roundup_rtx(
+xfs_fileoff_roundup_rtx(
struct xfs_mount *mp,
- xfs_rtblock_t rtbno)
+ xfs_fileoff_t off)
{
- return roundup_64(rtbno, mp->m_sb.sb_rextsize);
+ return roundup_64(off, mp->m_sb.sb_rextsize);
}
-/* Round this rtblock down to the nearest rt extent size. */
+/* Round this file block offset down to the nearest rt extent size. */
static inline xfs_rtblock_t
-xfs_rtb_rounddown_rtx(
+xfs_fileoff_rounddown_rtx(
struct xfs_mount *mp,
- xfs_rtblock_t rtbno)
+ xfs_fileoff_t off)
{
- return rounddown_64(rtbno, mp->m_sb.sb_rextsize);
+ return rounddown_64(off, mp->m_sb.sb_rextsize);
}
/* Convert an rt extent number to a file block offset in the rt bitmap file. */
@@ -148,6 +172,9 @@ xfs_rtx_to_rbmblock(
struct xfs_mount *mp,
xfs_rtxnum_t rtx)
{
+ if (xfs_has_rtgroups(mp))
+ return div_u64(rtx, mp->m_rtx_per_rbmblock);
+
return rtx >> mp->m_blkbit_log;
}
@@ -157,6 +184,13 @@ xfs_rtx_to_rbmword(
struct xfs_mount *mp,
xfs_rtxnum_t rtx)
{
+ if (xfs_has_rtgroups(mp)) {
+ unsigned int mod;
+
+ div_u64_rem(rtx >> XFS_NBWORDLOG, mp->m_blockwsize, &mod);
+ return mod;
+ }
+
return (rtx >> XFS_NBWORDLOG) & (mp->m_blockwsize - 1);
}
@@ -166,6 +200,9 @@ xfs_rbmblock_to_rtx(
struct xfs_mount *mp,
xfs_fileoff_t rbmoff)
{
+ if (xfs_has_rtgroups(mp))
+ return rbmoff * mp->m_rtx_per_rbmblock;
+
return rbmoff << mp->m_blkbit_log;
}
@@ -175,7 +212,14 @@ xfs_rbmblock_wordptr(
struct xfs_rtalloc_args *args,
unsigned int index)
{
- union xfs_rtword_raw *words = args->rbmbp->b_addr;
+ struct xfs_mount *mp = args->mp;
+ union xfs_rtword_raw *words;
+ struct xfs_rtbuf_blkinfo *hdr = args->rbmbp->b_addr;
+
+ if (xfs_has_rtgroups(mp))
+ words = (union xfs_rtword_raw *)(hdr + 1);
+ else
+ words = args->rbmbp->b_addr;
return words + index;
}
@@ -188,6 +232,8 @@ xfs_rtbitmap_getword(
{
union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index);
+ if (xfs_has_rtgroups(args->mp))
+ return be32_to_cpu(word->rtg);
return word->old;
}
@@ -200,7 +246,10 @@ xfs_rtbitmap_setword(
{
union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index);
- word->old = value;
+ if (xfs_has_rtgroups(args->mp))
+ word->rtg = cpu_to_be32(value);
+ else
+ word->old = value;
}
/*
@@ -225,6 +274,9 @@ xfs_rtsumoffs_to_block(
struct xfs_mount *mp,
xfs_rtsumoff_t rsumoff)
{
+ if (xfs_has_rtgroups(mp))
+ return rsumoff / mp->m_blockwsize;
+
return XFS_B_TO_FSBT(mp, rsumoff * sizeof(xfs_suminfo_t));
}
@@ -239,6 +291,9 @@ xfs_rtsumoffs_to_infoword(
{
unsigned int mask = mp->m_blockmask >> XFS_SUMINFOLOG;
+ if (xfs_has_rtgroups(mp))
+ return rsumoff % mp->m_blockwsize;
+
return rsumoff & mask;
}
@@ -248,7 +303,13 @@ xfs_rsumblock_infoptr(
struct xfs_rtalloc_args *args,
unsigned int index)
{
- union xfs_suminfo_raw *info = args->sumbp->b_addr;
+ union xfs_suminfo_raw *info;
+ struct xfs_rtbuf_blkinfo *hdr = args->sumbp->b_addr;
+
+ if (xfs_has_rtgroups(args->mp))
+ info = (union xfs_suminfo_raw *)(hdr + 1);
+ else
+ info = args->sumbp->b_addr;
return info + index;
}
@@ -261,6 +322,8 @@ xfs_suminfo_get(
{
union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index);
+ if (xfs_has_rtgroups(args->mp))
+ return be32_to_cpu(info->rtg);
return info->old;
}
@@ -273,10 +336,28 @@ xfs_suminfo_add(
{
union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index);
+ if (xfs_has_rtgroups(args->mp)) {
+ be32_add_cpu(&info->rtg, delta);
+ return be32_to_cpu(info->rtg);
+ }
+
info->old += delta;
return info->old;
}
+static inline const struct xfs_buf_ops *
+xfs_rtblock_ops(
+ struct xfs_mount *mp,
+ enum xfs_rtg_inodes type)
+{
+ if (xfs_has_rtgroups(mp)) {
+ if (type == XFS_RTGI_SUMMARY)
+ return &xfs_rtsummary_buf_ops;
+ return &xfs_rtbitmap_buf_ops;
+ }
+ return &xfs_rtbuf_ops;
+}
+
/*
* Functions for walking free space rtextents in the realtime bitmap.
*/
@@ -286,37 +367,19 @@ struct xfs_rtalloc_rec {
};
typedef int (*xfs_rtalloc_query_range_fn)(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
const struct xfs_rtalloc_rec *rec,
void *priv);
#ifdef CONFIG_XFS_RT
void xfs_rtbuf_cache_relse(struct xfs_rtalloc_args *args);
-
-int xfs_rtbuf_get(struct xfs_rtalloc_args *args, xfs_fileoff_t block,
- int issum);
-
-static inline int
-xfs_rtbitmap_read_buf(
- struct xfs_rtalloc_args *args,
- xfs_fileoff_t block)
-{
- return xfs_rtbuf_get(args, block, 0);
-}
-
-static inline int
-xfs_rtsummary_read_buf(
- struct xfs_rtalloc_args *args,
- xfs_fileoff_t block)
-{
- return xfs_rtbuf_get(args, block, 1);
-}
-
+int xfs_rtbitmap_read_buf(struct xfs_rtalloc_args *args, xfs_fileoff_t block);
+int xfs_rtsummary_read_buf(struct xfs_rtalloc_args *args, xfs_fileoff_t block);
int xfs_rtcheck_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
xfs_rtxlen_t len, int val, xfs_rtxnum_t *new, int *stat);
int xfs_rtfind_back(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
- xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock);
+ xfs_rtxnum_t *rtblock);
int xfs_rtfind_forw(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock);
int xfs_rtmodify_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
@@ -327,42 +390,43 @@ int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log,
xfs_fileoff_t bbno, int delta);
int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
xfs_rtxlen_t len);
-int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
- const struct xfs_rtalloc_rec *low_rec,
- const struct xfs_rtalloc_rec *high_rec,
+int xfs_rtalloc_query_range(struct xfs_rtgroup *rtg, struct xfs_trans *tp,
+ xfs_rtxnum_t start, xfs_rtxnum_t end,
xfs_rtalloc_query_range_fn fn, void *priv);
-int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtalloc_query_range_fn fn,
- void *priv);
-int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtxnum_t start, xfs_rtxlen_t len,
- bool *is_free);
-/*
- * Free an extent in the realtime subvolume. Length is expressed in
- * realtime extents, as is the block number.
- */
-int /* error */
-xfs_rtfree_extent(
- struct xfs_trans *tp, /* transaction pointer */
- xfs_rtxnum_t start, /* starting rtext number to free */
- xfs_rtxlen_t len); /* length of extent freed */
-
+int xfs_rtalloc_query_all(struct xfs_rtgroup *rtg, struct xfs_trans *tp,
+ xfs_rtalloc_query_range_fn fn, void *priv);
+int xfs_rtalloc_extent_is_free(struct xfs_rtgroup *rtg, struct xfs_trans *tp,
+ xfs_rtxnum_t start, xfs_rtxlen_t len, bool *is_free);
+int xfs_rtfree_extent(struct xfs_trans *tp, struct xfs_rtgroup *rtg,
+ xfs_rtxnum_t start, xfs_rtxlen_t len);
/* Same as above, but in units of rt blocks. */
-int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
- xfs_filblks_t rtlen);
+int xfs_rtfree_blocks(struct xfs_trans *tp, struct xfs_rtgroup *rtg,
+ xfs_fsblock_t rtbno, xfs_filblks_t rtlen);
-xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t
- rtextents);
-unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp,
+xfs_rtxnum_t xfs_rtbitmap_rtx_per_rbmblock(struct xfs_mount *mp);
+xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp);
+xfs_filblks_t xfs_rtbitmap_blockcount_len(struct xfs_mount *mp,
xfs_rtbxlen_t rtextents);
-
xfs_filblks_t xfs_rtsummary_blockcount(struct xfs_mount *mp,
- unsigned int rsumlevels, xfs_extlen_t rbmblocks);
-unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp,
- unsigned int rsumlevels, xfs_extlen_t rbmblocks);
+ unsigned int *rsumlevels);
+
+int xfs_rtfile_initialize_blocks(struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type, xfs_fileoff_t offset_fsb,
+ xfs_fileoff_t end_fsb, void *data);
+int xfs_rtbitmap_create(struct xfs_rtgroup *rtg, struct xfs_inode *ip,
+ struct xfs_trans *tp, bool init);
+int xfs_rtsummary_create(struct xfs_rtgroup *rtg, struct xfs_inode *ip,
+ struct xfs_trans *tp, bool init);
+
#else /* CONFIG_XFS_RT */
# define xfs_rtfree_extent(t,b,l) (-ENOSYS)
-# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
+
+static inline int xfs_rtfree_blocks(struct xfs_trans *tp,
+ struct xfs_rtgroup *rtg, xfs_fsblock_t rtbno,
+ xfs_filblks_t rtlen)
+{
+ return -ENOSYS;
+}
# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS)
# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS)
# define xfs_rtbitmap_read_buf(a,b) (-ENOSYS)
@@ -370,14 +434,11 @@ unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp,
# define xfs_rtbuf_cache_relse(a) (0)
# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
static inline xfs_filblks_t
-xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents)
+xfs_rtbitmap_blockcount_len(struct xfs_mount *mp, xfs_rtbxlen_t rtextents)
{
/* shut up gcc */
return 0;
}
-# define xfs_rtbitmap_wordcount(mp, r) (0)
-# define xfs_rtsummary_blockcount(mp, l, b) (0)
-# define xfs_rtsummary_wordcount(mp, l, b) (0)
#endif /* CONFIG_XFS_RT */
#endif /* __XFS_RTBITMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c
new file mode 100644
index 000000000000..e74bb059f24f
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rtgroup.c
@@ -0,0 +1,697 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2022-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
+#include "xfs_ag.h"
+#include "xfs_ag_resv.h"
+#include "xfs_health.h"
+#include "xfs_error.h"
+#include "xfs_bmap.h"
+#include "xfs_defer.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_buf_item.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_metafile.h"
+#include "xfs_metadir.h"
+
+/* Find the first usable fsblock in this rtgroup. */
+static inline uint32_t
+xfs_rtgroup_min_block(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ if (xfs_has_rtsb(mp) && rgno == 0)
+ return mp->m_sb.sb_rextsize;
+
+ return 0;
+}
+
+/* Precompute this group's geometry */
+void
+xfs_rtgroup_calc_geometry(
+ struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
+ xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount,
+ xfs_rtbxlen_t rextents)
+{
+ rtg->rtg_extents = __xfs_rtgroup_extents(mp, rgno, rgcount, rextents);
+ rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize;
+ rtg_group(rtg)->xg_min_gbno = xfs_rtgroup_min_block(mp, rgno);
+}
+
+int
+xfs_rtgroup_alloc(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount,
+ xfs_rtbxlen_t rextents)
+{
+ struct xfs_rtgroup *rtg;
+ int error;
+
+ rtg = kzalloc(sizeof(struct xfs_rtgroup), GFP_KERNEL);
+ if (!rtg)
+ return -ENOMEM;
+
+ xfs_rtgroup_calc_geometry(mp, rtg, rgno, rgcount, rextents);
+
+ error = xfs_group_insert(mp, rtg_group(rtg), rgno, XG_TYPE_RTG);
+ if (error)
+ goto out_free_rtg;
+ return 0;
+
+out_free_rtg:
+ kfree(rtg);
+ return error;
+}
+
+void
+xfs_rtgroup_free(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ xfs_group_free(mp, rgno, XG_TYPE_RTG, NULL);
+}
+
+/* Free a range of incore rtgroup objects. */
+void
+xfs_free_rtgroups(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t first_rgno,
+ xfs_rgnumber_t end_rgno)
+{
+ xfs_rgnumber_t rgno;
+
+ for (rgno = first_rgno; rgno < end_rgno; rgno++)
+ xfs_rtgroup_free(mp, rgno);
+}
+
+/* Initialize some range of incore rtgroup objects. */
+int
+xfs_initialize_rtgroups(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t first_rgno,
+ xfs_rgnumber_t end_rgno,
+ xfs_rtbxlen_t rextents)
+{
+ xfs_rgnumber_t index;
+ int error;
+
+ if (first_rgno >= end_rgno)
+ return 0;
+
+ for (index = first_rgno; index < end_rgno; index++) {
+ error = xfs_rtgroup_alloc(mp, index, end_rgno, rextents);
+ if (error)
+ goto out_unwind_new_rtgs;
+ }
+
+ return 0;
+
+out_unwind_new_rtgs:
+ xfs_free_rtgroups(mp, first_rgno, index);
+ return error;
+}
+
+/* Compute the number of rt extents in this realtime group. */
+xfs_rtxnum_t
+__xfs_rtgroup_extents(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount,
+ xfs_rtbxlen_t rextents)
+{
+ ASSERT(rgno < rgcount);
+ if (rgno == rgcount - 1)
+ return rextents - ((xfs_rtxnum_t)rgno * mp->m_sb.sb_rgextents);
+
+ ASSERT(xfs_has_rtgroups(mp));
+ return mp->m_sb.sb_rgextents;
+}
+
+xfs_rtxnum_t
+xfs_rtgroup_extents(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ return __xfs_rtgroup_extents(mp, rgno, mp->m_sb.sb_rgcount,
+ mp->m_sb.sb_rextents);
+}
+
+/*
+ * Update the rt extent count of the previous tail rtgroup if it changed during
+ * recovery (i.e. recovery of a growfs).
+ */
+int
+xfs_update_last_rtgroup_size(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t prev_rgcount)
+{
+ struct xfs_rtgroup *rtg;
+
+ ASSERT(prev_rgcount > 0);
+
+ rtg = xfs_rtgroup_grab(mp, prev_rgcount - 1);
+ if (!rtg)
+ return -EFSCORRUPTED;
+ rtg->rtg_extents = __xfs_rtgroup_extents(mp, prev_rgcount - 1,
+ mp->m_sb.sb_rgcount, mp->m_sb.sb_rextents);
+ rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize;
+ xfs_rtgroup_rele(rtg);
+ return 0;
+}
+
+/* Lock metadata inodes associated with this rt group. */
+void
+xfs_rtgroup_lock(
+ struct xfs_rtgroup *rtg,
+ unsigned int rtglock_flags)
+{
+ ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS));
+ ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) ||
+ !(rtglock_flags & XFS_RTGLOCK_BITMAP));
+
+ if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
+ /*
+ * Lock both realtime free space metadata inodes for a freespace
+ * update.
+ */
+ xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL);
+ xfs_ilock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL);
+ } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
+ xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED);
+ }
+}
+
+/* Unlock metadata inodes associated with this rt group. */
+void
+xfs_rtgroup_unlock(
+ struct xfs_rtgroup *rtg,
+ unsigned int rtglock_flags)
+{
+ ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS));
+ ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) ||
+ !(rtglock_flags & XFS_RTGLOCK_BITMAP));
+
+ if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
+ xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL);
+ xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL);
+ } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
+ xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED);
+ }
+}
+
+/*
+ * Join realtime group metadata inodes to the transaction. The ILOCKs will be
+ * released on transaction commit.
+ */
+void
+xfs_rtgroup_trans_join(
+ struct xfs_trans *tp,
+ struct xfs_rtgroup *rtg,
+ unsigned int rtglock_flags)
+{
+ ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS));
+ ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED));
+
+ if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
+ xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_BITMAP],
+ XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_SUMMARY],
+ XFS_ILOCK_EXCL);
+ }
+}
+
+/* Retrieve rt group geometry. */
+int
+xfs_rtgroup_get_geometry(
+ struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo)
+{
+ /* Fill out form. */
+ memset(rgeo, 0, sizeof(*rgeo));
+ rgeo->rg_number = rtg_rgno(rtg);
+ rgeo->rg_length = rtg_group(rtg)->xg_block_count;
+ xfs_rtgroup_geom_health(rtg, rgeo);
+ return 0;
+}
+
+#ifdef CONFIG_PROVE_LOCKING
+static struct lock_class_key xfs_rtginode_lock_class;
+
+static int
+xfs_rtginode_ilock_cmp_fn(
+ const struct lockdep_map *m1,
+ const struct lockdep_map *m2)
+{
+ const struct xfs_inode *ip1 =
+ container_of(m1, struct xfs_inode, i_lock.dep_map);
+ const struct xfs_inode *ip2 =
+ container_of(m2, struct xfs_inode, i_lock.dep_map);
+
+ if (ip1->i_projid < ip2->i_projid)
+ return -1;
+ if (ip1->i_projid > ip2->i_projid)
+ return 1;
+ return 0;
+}
+
+static inline void
+xfs_rtginode_ilock_print_fn(
+ const struct lockdep_map *m)
+{
+ const struct xfs_inode *ip =
+ container_of(m, struct xfs_inode, i_lock.dep_map);
+
+ printk(KERN_CONT " rgno=%u", ip->i_projid);
+}
+
+/*
+ * Most of the time each of the RTG inode locks are only taken one at a time.
+ * But when committing deferred ops, more than one of a kind can be taken.
+ * However, deferred rt ops will be committed in rgno order so there is no
+ * potential for deadlocks. The code here is needed to tell lockdep about this
+ * order.
+ */
+static inline void
+xfs_rtginode_lockdep_setup(
+ struct xfs_inode *ip,
+ xfs_rgnumber_t rgno,
+ enum xfs_rtg_inodes type)
+{
+ lockdep_set_class_and_subclass(&ip->i_lock, &xfs_rtginode_lock_class,
+ type);
+ lock_set_cmp_fn(&ip->i_lock, xfs_rtginode_ilock_cmp_fn,
+ xfs_rtginode_ilock_print_fn);
+}
+#else
+#define xfs_rtginode_lockdep_setup(ip, rgno, type) do { } while (0)
+#endif /* CONFIG_PROVE_LOCKING */
+
+struct xfs_rtginode_ops {
+ const char *name; /* short name */
+
+ enum xfs_metafile_type metafile_type;
+
+ unsigned int sick; /* rtgroup sickness flag */
+
+ /* Does the fs have this feature? */
+ bool (*enabled)(struct xfs_mount *mp);
+
+ /* Create this rtgroup metadata inode and initialize it. */
+ int (*create)(struct xfs_rtgroup *rtg,
+ struct xfs_inode *ip,
+ struct xfs_trans *tp,
+ bool init);
+};
+
+static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = {
+ [XFS_RTGI_BITMAP] = {
+ .name = "bitmap",
+ .metafile_type = XFS_METAFILE_RTBITMAP,
+ .sick = XFS_SICK_RG_BITMAP,
+ .create = xfs_rtbitmap_create,
+ },
+ [XFS_RTGI_SUMMARY] = {
+ .name = "summary",
+ .metafile_type = XFS_METAFILE_RTSUMMARY,
+ .sick = XFS_SICK_RG_SUMMARY,
+ .create = xfs_rtsummary_create,
+ },
+};
+
+/* Return the shortname of this rtgroup inode. */
+const char *
+xfs_rtginode_name(
+ enum xfs_rtg_inodes type)
+{
+ return xfs_rtginode_ops[type].name;
+}
+
+/* Return the metafile type of this rtgroup inode. */
+enum xfs_metafile_type
+xfs_rtginode_metafile_type(
+ enum xfs_rtg_inodes type)
+{
+ return xfs_rtginode_ops[type].metafile_type;
+}
+
+/* Should this rtgroup inode be present? */
+bool
+xfs_rtginode_enabled(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type)
+{
+ const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
+
+ if (!ops->enabled)
+ return true;
+ return ops->enabled(rtg_mount(rtg));
+}
+
+/* Mark an rtgroup inode sick */
+void
+xfs_rtginode_mark_sick(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type)
+{
+ const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
+
+ xfs_group_mark_sick(rtg_group(rtg), ops->sick);
+}
+
+/* Load and existing rtgroup inode into the rtgroup structure. */
+int
+xfs_rtginode_load(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type,
+ struct xfs_trans *tp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *ip;
+ const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
+ int error;
+
+ if (!xfs_rtginode_enabled(rtg, type))
+ return 0;
+
+ if (!xfs_has_rtgroups(mp)) {
+ xfs_ino_t ino;
+
+ switch (type) {
+ case XFS_RTGI_BITMAP:
+ ino = mp->m_sb.sb_rbmino;
+ break;
+ case XFS_RTGI_SUMMARY:
+ ino = mp->m_sb.sb_rsumino;
+ break;
+ default:
+ /* None of the other types exist on !rtgroups */
+ return 0;
+ }
+
+ error = xfs_trans_metafile_iget(tp, ino, ops->metafile_type,
+ &ip);
+ } else {
+ const char *path;
+
+ if (!mp->m_rtdirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ path = xfs_rtginode_path(rtg_rgno(rtg), type);
+ if (!path)
+ return -ENOMEM;
+ error = xfs_metadir_load(tp, mp->m_rtdirip, path,
+ ops->metafile_type, &ip);
+ kfree(path);
+ }
+
+ if (error) {
+ if (xfs_metadata_is_sick(error))
+ xfs_rtginode_mark_sick(rtg, type);
+ return error;
+ }
+
+ if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
+ ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
+ xfs_irele(ip);
+ xfs_rtginode_mark_sick(rtg, type);
+ return -EFSCORRUPTED;
+ }
+
+ if (XFS_IS_CORRUPT(mp, ip->i_projid != rtg_rgno(rtg))) {
+ xfs_irele(ip);
+ xfs_rtginode_mark_sick(rtg, type);
+ return -EFSCORRUPTED;
+ }
+
+ xfs_rtginode_lockdep_setup(ip, rtg_rgno(rtg), type);
+ rtg->rtg_inodes[type] = ip;
+ return 0;
+}
+
+/* Release an rtgroup metadata inode. */
+void
+xfs_rtginode_irele(
+ struct xfs_inode **ipp)
+{
+ if (*ipp)
+ xfs_irele(*ipp);
+ *ipp = NULL;
+}
+
+/* Add a metadata inode for a realtime rmap btree. */
+int
+xfs_rtginode_create(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type,
+ bool init)
+{
+ const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct xfs_metadir_update upd = {
+ .dp = mp->m_rtdirip,
+ .metafile_type = ops->metafile_type,
+ };
+ int error;
+
+ if (!xfs_rtginode_enabled(rtg, type))
+ return 0;
+
+ if (!mp->m_rtdirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ upd.path = xfs_rtginode_path(rtg_rgno(rtg), type);
+ if (!upd.path)
+ return -ENOMEM;
+
+ error = xfs_metadir_start_create(&upd);
+ if (error)
+ goto out_path;
+
+ error = xfs_metadir_create(&upd, S_IFREG);
+ if (error)
+ return error;
+
+ xfs_rtginode_lockdep_setup(upd.ip, rtg_rgno(rtg), type);
+
+ upd.ip->i_projid = rtg_rgno(rtg);
+ error = ops->create(rtg, upd.ip, upd.tp, init);
+ if (error)
+ goto out_cancel;
+
+ error = xfs_metadir_commit(&upd);
+ if (error)
+ goto out_path;
+
+ kfree(upd.path);
+ xfs_finish_inode_setup(upd.ip);
+ rtg->rtg_inodes[type] = upd.ip;
+ return 0;
+
+out_cancel:
+ xfs_metadir_cancel(&upd, error);
+ /* Have to finish setting up the inode to ensure it's deleted. */
+ if (upd.ip) {
+ xfs_finish_inode_setup(upd.ip);
+ xfs_irele(upd.ip);
+ }
+out_path:
+ kfree(upd.path);
+ return error;
+}
+
+/* Create the parent directory for all rtgroup inodes and load it. */
+int
+xfs_rtginode_mkdir_parent(
+ struct xfs_mount *mp)
+{
+ if (!mp->m_metadirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ return xfs_metadir_mkdir(mp->m_metadirip, "rtgroups", &mp->m_rtdirip);
+}
+
+/* Load the parent directory of all rtgroup inodes. */
+int
+xfs_rtginode_load_parent(
+ struct xfs_trans *tp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+
+ if (!mp->m_metadirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ return xfs_metadir_load(tp, mp->m_metadirip, "rtgroups",
+ XFS_METAFILE_DIR, &mp->m_rtdirip);
+}
+
+/* Check superblock fields for a read or a write. */
+static xfs_failaddr_t
+xfs_rtsb_verify_common(
+ struct xfs_buf *bp)
+{
+ struct xfs_rtsb *rsb = bp->b_addr;
+
+ if (!xfs_verify_magic(bp, rsb->rsb_magicnum))
+ return __this_address;
+ if (rsb->rsb_pad)
+ return __this_address;
+
+ /* Everything to the end of the fs block must be zero */
+ if (memchr_inv(rsb + 1, 0, BBTOB(bp->b_length) - sizeof(*rsb)))
+ return __this_address;
+
+ return NULL;
+}
+
+/* Check superblock fields for a read or revalidation. */
+static inline xfs_failaddr_t
+xfs_rtsb_verify_all(
+ struct xfs_buf *bp)
+{
+ struct xfs_rtsb *rsb = bp->b_addr;
+ struct xfs_mount *mp = bp->b_mount;
+ xfs_failaddr_t fa;
+
+ fa = xfs_rtsb_verify_common(bp);
+ if (fa)
+ return fa;
+
+ if (memcmp(&rsb->rsb_fname, &mp->m_sb.sb_fname, XFSLABEL_MAX))
+ return __this_address;
+ if (!uuid_equal(&rsb->rsb_uuid, &mp->m_sb.sb_uuid))
+ return __this_address;
+ if (!uuid_equal(&rsb->rsb_meta_uuid, &mp->m_sb.sb_meta_uuid))
+ return __this_address;
+
+ return NULL;
+}
+
+static void
+xfs_rtsb_read_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ if (!xfs_buf_verify_cksum(bp, XFS_RTSB_CRC_OFF)) {
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ return;
+ }
+
+ fa = xfs_rtsb_verify_all(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+}
+
+static void
+xfs_rtsb_write_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ fa = xfs_rtsb_verify_common(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ return;
+ }
+
+ xfs_buf_update_cksum(bp, XFS_RTSB_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_rtsb_buf_ops = {
+ .name = "xfs_rtsb",
+ .magic = { 0, cpu_to_be32(XFS_RTSB_MAGIC) },
+ .verify_read = xfs_rtsb_read_verify,
+ .verify_write = xfs_rtsb_write_verify,
+ .verify_struct = xfs_rtsb_verify_all,
+};
+
+/* Update a realtime superblock from the primary fs super */
+void
+xfs_update_rtsb(
+ struct xfs_buf *rtsb_bp,
+ const struct xfs_buf *sb_bp)
+{
+ const struct xfs_dsb *dsb = sb_bp->b_addr;
+ struct xfs_rtsb *rsb = rtsb_bp->b_addr;
+ const uuid_t *meta_uuid;
+
+ rsb->rsb_magicnum = cpu_to_be32(XFS_RTSB_MAGIC);
+
+ rsb->rsb_pad = 0;
+ memcpy(&rsb->rsb_fname, &dsb->sb_fname, XFSLABEL_MAX);
+
+ memcpy(&rsb->rsb_uuid, &dsb->sb_uuid, sizeof(rsb->rsb_uuid));
+
+ /*
+ * The metadata uuid is the fs uuid if the metauuid feature is not
+ * enabled.
+ */
+ if (dsb->sb_features_incompat &
+ cpu_to_be32(XFS_SB_FEAT_INCOMPAT_META_UUID))
+ meta_uuid = &dsb->sb_meta_uuid;
+ else
+ meta_uuid = &dsb->sb_uuid;
+ memcpy(&rsb->rsb_meta_uuid, meta_uuid, sizeof(rsb->rsb_meta_uuid));
+}
+
+/*
+ * Update the realtime superblock from a filesystem superblock and log it to
+ * the given transaction.
+ */
+struct xfs_buf *
+xfs_log_rtsb(
+ struct xfs_trans *tp,
+ const struct xfs_buf *sb_bp)
+{
+ struct xfs_buf *rtsb_bp;
+
+ if (!xfs_has_rtsb(tp->t_mountp))
+ return NULL;
+
+ rtsb_bp = xfs_trans_getrtsb(tp);
+ if (!rtsb_bp) {
+ /*
+ * It's possible for the rtgroups feature to be enabled but
+ * there is no incore rt superblock buffer if the rt geometry
+ * was specified at mkfs time but the rt section has not yet
+ * been attached. In this case, rblocks must be zero.
+ */
+ ASSERT(tp->t_mountp->m_sb.sb_rblocks == 0);
+ return NULL;
+ }
+
+ xfs_update_rtsb(rtsb_bp, sb_bp);
+ xfs_trans_ordered_buf(tp, rtsb_bp);
+ return rtsb_bp;
+}
diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h
new file mode 100644
index 000000000000..7e7e491ff06f
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rtgroup.h
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2022-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __LIBXFS_RTGROUP_H
+#define __LIBXFS_RTGROUP_H 1
+
+#include "xfs_group.h"
+
+struct xfs_mount;
+struct xfs_trans;
+
+enum xfs_rtg_inodes {
+ XFS_RTGI_BITMAP, /* allocation bitmap */
+ XFS_RTGI_SUMMARY, /* allocation summary */
+
+ XFS_RTGI_MAX,
+};
+
+#ifdef MAX_LOCKDEP_SUBCLASSES
+static_assert(XFS_RTGI_MAX <= MAX_LOCKDEP_SUBCLASSES);
+#endif
+
+/*
+ * Realtime group incore structure, similar to the per-AG structure.
+ */
+struct xfs_rtgroup {
+ struct xfs_group rtg_group;
+
+ /* per-rtgroup metadata inodes */
+ struct xfs_inode *rtg_inodes[XFS_RTGI_MAX];
+
+ /* Number of blocks in this group */
+ xfs_rtxnum_t rtg_extents;
+
+ /*
+ * Cache of rt summary level per bitmap block with the invariant that
+ * rtg_rsum_cache[bbno] > the maximum i for which rsum[i][bbno] != 0,
+ * or 0 if rsum[i][bbno] == 0 for all i.
+ *
+ * Reads and writes are serialized by the rsumip inode lock.
+ */
+ uint8_t *rtg_rsum_cache;
+};
+
+static inline struct xfs_rtgroup *to_rtg(struct xfs_group *xg)
+{
+ return container_of(xg, struct xfs_rtgroup, rtg_group);
+}
+
+static inline struct xfs_group *rtg_group(struct xfs_rtgroup *rtg)
+{
+ return &rtg->rtg_group;
+}
+
+static inline struct xfs_mount *rtg_mount(const struct xfs_rtgroup *rtg)
+{
+ return rtg->rtg_group.xg_mount;
+}
+
+static inline xfs_rgnumber_t rtg_rgno(const struct xfs_rtgroup *rtg)
+{
+ return rtg->rtg_group.xg_gno;
+}
+
+/* Passive rtgroup references */
+static inline struct xfs_rtgroup *
+xfs_rtgroup_get(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ return to_rtg(xfs_group_get(mp, rgno, XG_TYPE_RTG));
+}
+
+static inline struct xfs_rtgroup *
+xfs_rtgroup_hold(
+ struct xfs_rtgroup *rtg)
+{
+ return to_rtg(xfs_group_hold(rtg_group(rtg)));
+}
+
+static inline void
+xfs_rtgroup_put(
+ struct xfs_rtgroup *rtg)
+{
+ xfs_group_put(rtg_group(rtg));
+}
+
+/* Active rtgroup references */
+static inline struct xfs_rtgroup *
+xfs_rtgroup_grab(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ return to_rtg(xfs_group_grab(mp, rgno, XG_TYPE_RTG));
+}
+
+static inline void
+xfs_rtgroup_rele(
+ struct xfs_rtgroup *rtg)
+{
+ xfs_group_rele(rtg_group(rtg));
+}
+
+static inline struct xfs_rtgroup *
+xfs_rtgroup_next_range(
+ struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
+ xfs_rgnumber_t start_rgno,
+ xfs_rgnumber_t end_rgno)
+{
+ return to_rtg(xfs_group_next_range(mp, rtg ? rtg_group(rtg) : NULL,
+ start_rgno, end_rgno, XG_TYPE_RTG));
+}
+
+static inline struct xfs_rtgroup *
+xfs_rtgroup_next(
+ struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg)
+{
+ return xfs_rtgroup_next_range(mp, rtg, 0, mp->m_sb.sb_rgcount - 1);
+}
+
+static inline xfs_rtblock_t
+xfs_rgbno_to_rtb(
+ struct xfs_rtgroup *rtg,
+ xfs_rgblock_t rgbno)
+{
+ return xfs_gbno_to_fsb(rtg_group(rtg), rgbno);
+}
+
+static inline xfs_rgnumber_t
+xfs_rtb_to_rgno(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ return xfs_fsb_to_gno(mp, rtbno, XG_TYPE_RTG);
+}
+
+static inline xfs_rgblock_t
+xfs_rtb_to_rgbno(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ return xfs_fsb_to_gbno(mp, rtbno, XG_TYPE_RTG);
+}
+
+/* Is rtbno the start of a RT group? */
+static inline bool
+xfs_rtbno_is_group_start(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ return (rtbno & mp->m_groups[XG_TYPE_RTG].blkmask) == 0;
+}
+
+/* Convert an rtgroups rt extent number into an rgbno. */
+static inline xfs_rgblock_t
+xfs_rtx_to_rgbno(
+ struct xfs_rtgroup *rtg,
+ xfs_rtxnum_t rtx)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+
+ if (likely(mp->m_rtxblklog >= 0))
+ return rtx << mp->m_rtxblklog;
+ return rtx * mp->m_sb.sb_rextsize;
+}
+
+static inline xfs_daddr_t
+xfs_rtb_to_daddr(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG];
+ xfs_rgnumber_t rgno = xfs_rtb_to_rgno(mp, rtbno);
+ uint64_t start_bno = (xfs_rtblock_t)rgno * g->blocks;
+
+ return XFS_FSB_TO_BB(mp, start_bno + (rtbno & g->blkmask));
+}
+
+static inline xfs_rtblock_t
+xfs_daddr_to_rtb(
+ struct xfs_mount *mp,
+ xfs_daddr_t daddr)
+{
+ xfs_rfsblock_t bno = XFS_BB_TO_FSBT(mp, daddr);
+
+ if (xfs_has_rtgroups(mp)) {
+ struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG];
+ xfs_rgnumber_t rgno;
+ uint32_t rgbno;
+
+ rgno = div_u64_rem(bno, g->blocks, &rgbno);
+ return ((xfs_rtblock_t)rgno << g->blklog) + rgbno;
+ }
+
+ return bno;
+}
+
+#ifdef CONFIG_XFS_RT
+int xfs_rtgroup_alloc(struct xfs_mount *mp, xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents);
+void xfs_rtgroup_free(struct xfs_mount *mp, xfs_rgnumber_t rgno);
+
+void xfs_free_rtgroups(struct xfs_mount *mp, xfs_rgnumber_t first_rgno,
+ xfs_rgnumber_t end_rgno);
+int xfs_initialize_rtgroups(struct xfs_mount *mp, xfs_rgnumber_t first_rgno,
+ xfs_rgnumber_t end_rgno, xfs_rtbxlen_t rextents);
+
+xfs_rtxnum_t __xfs_rtgroup_extents(struct xfs_mount *mp, xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents);
+xfs_rtxnum_t xfs_rtgroup_extents(struct xfs_mount *mp, xfs_rgnumber_t rgno);
+void xfs_rtgroup_calc_geometry(struct xfs_mount *mp, struct xfs_rtgroup *rtg,
+ xfs_rgnumber_t rgno, xfs_rgnumber_t rgcount,
+ xfs_rtbxlen_t rextents);
+
+int xfs_update_last_rtgroup_size(struct xfs_mount *mp,
+ xfs_rgnumber_t prev_rgcount);
+
+/* Lock the rt bitmap inode in exclusive mode */
+#define XFS_RTGLOCK_BITMAP (1U << 0)
+/* Lock the rt bitmap inode in shared mode */
+#define XFS_RTGLOCK_BITMAP_SHARED (1U << 1)
+
+#define XFS_RTGLOCK_ALL_FLAGS (XFS_RTGLOCK_BITMAP | \
+ XFS_RTGLOCK_BITMAP_SHARED)
+
+void xfs_rtgroup_lock(struct xfs_rtgroup *rtg, unsigned int rtglock_flags);
+void xfs_rtgroup_unlock(struct xfs_rtgroup *rtg, unsigned int rtglock_flags);
+void xfs_rtgroup_trans_join(struct xfs_trans *tp, struct xfs_rtgroup *rtg,
+ unsigned int rtglock_flags);
+
+int xfs_rtgroup_get_geometry(struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo);
+
+int xfs_rtginode_mkdir_parent(struct xfs_mount *mp);
+int xfs_rtginode_load_parent(struct xfs_trans *tp);
+
+const char *xfs_rtginode_name(enum xfs_rtg_inodes type);
+enum xfs_metafile_type xfs_rtginode_metafile_type(enum xfs_rtg_inodes type);
+bool xfs_rtginode_enabled(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type);
+void xfs_rtginode_mark_sick(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type);
+int xfs_rtginode_load(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type,
+ struct xfs_trans *tp);
+int xfs_rtginode_create(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type,
+ bool init);
+void xfs_rtginode_irele(struct xfs_inode **ipp);
+
+static inline const char *xfs_rtginode_path(xfs_rgnumber_t rgno,
+ enum xfs_rtg_inodes type)
+{
+ return kasprintf(GFP_KERNEL, "%u.%s", rgno, xfs_rtginode_name(type));
+}
+
+void xfs_update_rtsb(struct xfs_buf *rtsb_bp,
+ const struct xfs_buf *sb_bp);
+struct xfs_buf *xfs_log_rtsb(struct xfs_trans *tp,
+ const struct xfs_buf *sb_bp);
+#else
+static inline void xfs_free_rtgroups(struct xfs_mount *mp,
+ xfs_rgnumber_t first_rgno, xfs_rgnumber_t end_rgno)
+{
+}
+
+static inline int xfs_initialize_rtgroups(struct xfs_mount *mp,
+ xfs_rgnumber_t first_rgno, xfs_rgnumber_t end_rgno,
+ xfs_rtbxlen_t rextents)
+{
+ return 0;
+}
+
+# define xfs_rtgroup_extents(mp, rgno) (0)
+# define xfs_update_last_rtgroup_size(mp, rgno) (-EOPNOTSUPP)
+# define xfs_rtgroup_lock(rtg, gf) ((void)0)
+# define xfs_rtgroup_unlock(rtg, gf) ((void)0)
+# define xfs_rtgroup_trans_join(tp, rtg, gf) ((void)0)
+# define xfs_update_rtsb(bp, sb_bp) ((void)0)
+# define xfs_log_rtsb(tp, sb_bp) (NULL)
+# define xfs_rtgroup_get_geometry(rtg, rgeo) (-EOPNOTSUPP)
+#endif /* CONFIG_XFS_RT */
+
+#endif /* __LIBXFS_RTGROUP_H */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 73a4b895de67..a809513a290c 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -26,6 +26,8 @@
#include "xfs_health.h"
#include "xfs_ag.h"
#include "xfs_rtbitmap.h"
+#include "xfs_exchrange.h"
+#include "xfs_rtgroup.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -175,6 +177,12 @@ xfs_sb_version_to_features(
features |= XFS_FEAT_NEEDSREPAIR;
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NREXT64)
features |= XFS_FEAT_NREXT64;
+ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE)
+ features |= XFS_FEAT_EXCHANGE_RANGE;
+ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT)
+ features |= XFS_FEAT_PARENT;
+ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)
+ features |= XFS_FEAT_METADIR;
return features;
}
@@ -227,6 +235,64 @@ xfs_validate_sb_read(
return 0;
}
+/* Return the number of extents covered by a single rt bitmap file */
+static xfs_rtbxlen_t
+xfs_extents_per_rbm(
+ struct xfs_sb *sbp)
+{
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR))
+ return sbp->sb_rgextents;
+ return sbp->sb_rextents;
+}
+
+/*
+ * Return the payload size of a single rt bitmap block (without the metadata
+ * header if any).
+ */
+static inline unsigned int
+xfs_rtbmblock_size(
+ struct xfs_sb *sbp)
+{
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR))
+ return sbp->sb_blocksize - sizeof(struct xfs_rtbuf_blkinfo);
+ return sbp->sb_blocksize;
+}
+
+static uint64_t
+xfs_expected_rbmblocks(
+ struct xfs_sb *sbp)
+{
+ return howmany_64(xfs_extents_per_rbm(sbp),
+ NBBY * xfs_rtbmblock_size(sbp));
+}
+
+/* Validate the realtime geometry */
+bool
+xfs_validate_rt_geometry(
+ struct xfs_sb *sbp)
+{
+ if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||
+ sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)
+ return false;
+
+ if (sbp->sb_rblocks == 0) {
+ if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 ||
+ sbp->sb_rextslog != 0 || sbp->sb_frextents != 0)
+ return false;
+ return true;
+ }
+
+ if (sbp->sb_rextents == 0 ||
+ sbp->sb_rextents != div_u64(sbp->sb_rblocks, sbp->sb_rextsize) ||
+ sbp->sb_rextslog != xfs_compute_rextslog(sbp->sb_rextents) ||
+ sbp->sb_rbmblocks != xfs_expected_rbmblocks(sbp))
+ return false;
+
+ return true;
+}
+
/* Check all the superblock fields we care about when writing one out. */
STATIC int
xfs_validate_sb_write(
@@ -260,13 +326,6 @@ xfs_validate_sb_write(
* the kernel cannot support since we checked for unsupported bits in
* the read verifier, which means that memory is corrupt.
*/
- if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) {
- xfs_warn(mp,
-"Corruption detected in superblock compatible features (0x%x)!",
- (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN));
- return -EFSCORRUPTED;
- }
-
if (!xfs_is_readonly(mp) &&
xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
xfs_alert(mp,
@@ -302,6 +361,78 @@ xfs_validate_sb_write(
return 0;
}
+int
+xfs_compute_rgblklog(
+ xfs_rtxlen_t rgextents,
+ xfs_rgblock_t rextsize)
+{
+ uint64_t rgblocks = (uint64_t)rgextents * rextsize;
+
+ return xfs_highbit64(rgblocks - 1) + 1;
+}
+
+static int
+xfs_validate_sb_rtgroups(
+ struct xfs_mount *mp,
+ struct xfs_sb *sbp)
+{
+ uint64_t groups;
+ int rgblklog;
+
+ if (sbp->sb_rextsize == 0) {
+ xfs_warn(mp,
+"Realtime extent size must not be zero.");
+ return -EINVAL;
+ }
+
+ if (sbp->sb_rgextents > XFS_MAX_RGBLOCKS / sbp->sb_rextsize) {
+ xfs_warn(mp,
+"Realtime group size (%u) must be less than %u rt extents.",
+ sbp->sb_rgextents,
+ XFS_MAX_RGBLOCKS / sbp->sb_rextsize);
+ return -EINVAL;
+ }
+
+ if (sbp->sb_rgextents < XFS_MIN_RGEXTENTS) {
+ xfs_warn(mp,
+"Realtime group size (%u) must be at least %u rt extents.",
+ sbp->sb_rgextents, XFS_MIN_RGEXTENTS);
+ return -EINVAL;
+ }
+
+ if (sbp->sb_rgcount > XFS_MAX_RGNUMBER) {
+ xfs_warn(mp,
+"Realtime groups (%u) must be less than %u.",
+ sbp->sb_rgcount, XFS_MAX_RGNUMBER);
+ return -EINVAL;
+ }
+
+ groups = howmany_64(sbp->sb_rextents, sbp->sb_rgextents);
+ if (groups != sbp->sb_rgcount) {
+ xfs_warn(mp,
+"Realtime groups (%u) do not cover the entire rt section; need (%llu) groups.",
+ sbp->sb_rgcount, groups);
+ return -EINVAL;
+ }
+
+ /* Exchange-range is required for fsr to work on realtime files */
+ if (!(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE)) {
+ xfs_warn(mp,
+"Realtime groups feature requires exchange-range support.");
+ return -EINVAL;
+ }
+
+ rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents, sbp->sb_rextsize);
+ if (sbp->sb_rgblklog != rgblklog) {
+ xfs_warn(mp,
+"Realtime group log (%d) does not match expected value (%d).",
+ sbp->sb_rgblklog, rgblklog);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* Check the validity of the SB. */
STATIC int
xfs_validate_sb_common(
@@ -313,6 +444,7 @@ xfs_validate_sb_common(
uint32_t agcount = 0;
uint32_t rem;
bool has_dalign;
+ int error;
if (!xfs_verify_magic(bp, dsb->sb_magicnum)) {
xfs_warn(mp,
@@ -361,6 +493,32 @@ xfs_validate_sb_common(
sbp->sb_inoalignmt, align);
return -EINVAL;
}
+
+ if (!sbp->sb_spino_align ||
+ sbp->sb_spino_align > sbp->sb_inoalignmt ||
+ (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) {
+ xfs_warn(mp,
+ "Sparse inode alignment (%u) is invalid.",
+ sbp->sb_spino_align);
+ return -EINVAL;
+ }
+ } else if (sbp->sb_spino_align) {
+ xfs_warn(mp,
+ "Sparse inode alignment (%u) should be zero.",
+ sbp->sb_spino_align);
+ return -EINVAL;
+ }
+
+ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) {
+ if (memchr_inv(sbp->sb_pad, 0, sizeof(sbp->sb_pad))) {
+ xfs_warn(mp,
+"Metadir superblock padding fields must be zero.");
+ return -EINVAL;
+ }
+
+ error = xfs_validate_sb_rtgroups(mp, sbp);
+ if (error)
+ return error;
}
} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
@@ -486,39 +644,13 @@ xfs_validate_sb_common(
}
}
- /* Validate the realtime geometry; stolen from xfs_repair */
- if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||
- sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) {
+ if (!xfs_validate_rt_geometry(sbp)) {
xfs_notice(mp,
- "realtime extent sanity check failed");
+ "realtime %sgeometry check failed",
+ sbp->sb_rblocks ? "" : "zeroed ");
return -EFSCORRUPTED;
}
- if (sbp->sb_rblocks == 0) {
- if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 ||
- sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) {
- xfs_notice(mp,
- "realtime zeroed geometry check failed");
- return -EFSCORRUPTED;
- }
- } else {
- uint64_t rexts;
- uint64_t rbmblocks;
-
- rexts = div_u64(sbp->sb_rblocks, sbp->sb_rextsize);
- rbmblocks = howmany_64(sbp->sb_rextents,
- NBBY * sbp->sb_blocksize);
-
- if (!xfs_validate_rtextents(rexts) ||
- sbp->sb_rextents != rexts ||
- sbp->sb_rextslog != xfs_compute_rextslog(rexts) ||
- sbp->sb_rbmblocks != rbmblocks) {
- xfs_notice(mp,
- "realtime geometry sanity check failed");
- return -EFSCORRUPTED;
- }
- }
-
/*
* Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign)
* would imply the image is corrupted.
@@ -555,6 +687,14 @@ xfs_validate_sb_common(
void
xfs_sb_quota_from_disk(struct xfs_sb *sbp)
{
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) {
+ sbp->sb_uquotino = NULLFSINO;
+ sbp->sb_gquotino = NULLFSINO;
+ sbp->sb_pquotino = NULLFSINO;
+ return;
+ }
+
/*
* older mkfs doesn't initialize quota inodes to NULLFSINO. This
* leads to in-core values having two different values for a quota
@@ -678,6 +818,20 @@ __xfs_sb_from_disk(
/* Convert on-disk flags to in-memory flags? */
if (convert_xquota)
xfs_sb_quota_from_disk(to);
+
+ if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) {
+ to->sb_metadirino = be64_to_cpu(from->sb_metadirino);
+ to->sb_rgblklog = from->sb_rgblklog;
+ memcpy(to->sb_pad, from->sb_pad, sizeof(to->sb_pad));
+ to->sb_rgcount = be32_to_cpu(from->sb_rgcount);
+ to->sb_rgextents = be32_to_cpu(from->sb_rgextents);
+ to->sb_rbmino = NULLFSINO;
+ to->sb_rsumino = NULLFSINO;
+ } else {
+ to->sb_metadirino = NULLFSINO;
+ to->sb_rgcount = 1;
+ to->sb_rgextents = 0;
+ }
}
void
@@ -695,6 +849,15 @@ xfs_sb_quota_to_disk(
{
uint16_t qflags = from->sb_qflags;
+ if (xfs_sb_is_v5(from) &&
+ (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) {
+ to->sb_qflags = cpu_to_be16(from->sb_qflags);
+ to->sb_uquotino = cpu_to_be64(0);
+ to->sb_gquotino = cpu_to_be64(0);
+ to->sb_pquotino = cpu_to_be64(0);
+ return;
+ }
+
to->sb_uquotino = cpu_to_be64(from->sb_uquotino);
/*
@@ -825,6 +988,16 @@ xfs_sb_to_disk(
to->sb_lsn = cpu_to_be64(from->sb_lsn);
if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID)
uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid);
+
+ if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) {
+ to->sb_metadirino = cpu_to_be64(from->sb_metadirino);
+ to->sb_rgblklog = from->sb_rgblklog;
+ memset(to->sb_pad, 0, sizeof(to->sb_pad));
+ to->sb_rgcount = cpu_to_be32(from->sb_rgcount);
+ to->sb_rgextents = cpu_to_be32(from->sb_rgextents);
+ to->sb_rbmino = cpu_to_be64(0);
+ to->sb_rsumino = cpu_to_be64(0);
+ }
}
/*
@@ -954,6 +1127,45 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
.verify_write = xfs_sb_write_verify,
};
+/* Compute cached rt geometry from the incore sb. */
+void
+xfs_sb_mount_rextsize(
+ struct xfs_mount *mp,
+ struct xfs_sb *sbp)
+{
+ struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
+
+ mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize);
+ mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize);
+
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) {
+ rgs->blocks = sbp->sb_rgextents * sbp->sb_rextsize;
+ rgs->blklog = mp->m_sb.sb_rgblklog;
+ rgs->blkmask = xfs_mask32lo(mp->m_sb.sb_rgblklog);
+ } else {
+ rgs->blocks = 0;
+ rgs->blklog = 0;
+ rgs->blkmask = (uint64_t)-1;
+ }
+}
+
+/* Update incore sb rt extent size, then recompute the cached rt geometry. */
+void
+xfs_mount_sb_set_rextsize(
+ struct xfs_mount *mp,
+ struct xfs_sb *sbp,
+ xfs_agblock_t rextsize)
+{
+ sbp->sb_rextsize = rextsize;
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR))
+ sbp->sb_rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents,
+ rextsize);
+
+ xfs_sb_mount_rextsize(mp, sbp);
+}
+
/*
* xfs_mount_common
*
@@ -968,6 +1180,8 @@ xfs_sb_mount_common(
struct xfs_mount *mp,
struct xfs_sb *sbp)
{
+ struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG];
+
mp->m_agfrotor = 0;
atomic_set(&mp->m_agirotor, 0);
mp->m_maxagi = mp->m_sb.sb_agcount;
@@ -976,28 +1190,32 @@ xfs_sb_mount_common(
mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
mp->m_blockmask = sbp->sb_blocksize - 1;
- mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
- mp->m_blockwmask = mp->m_blockwsize - 1;
- mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize);
- mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize);
+ mp->m_blockwsize = xfs_rtbmblock_size(sbp) >> XFS_WORDLOG;
+ mp->m_rtx_per_rbmblock = mp->m_blockwsize << XFS_NBWORDLOG;
+
+ ags->blocks = mp->m_sb.sb_agblocks;
+ ags->blklog = mp->m_sb.sb_agblklog;
+ ags->blkmask = xfs_mask32lo(mp->m_sb.sb_agblklog);
+
+ xfs_sb_mount_rextsize(mp, sbp);
- mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
- mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
+ mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, true);
+ mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, false);
mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
- mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
- mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
+ mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, true);
+ mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, false);
mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
- mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 1);
- mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 0);
+ mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, true);
+ mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, false);
mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
- mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, true);
- mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, false);
+ mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, true);
+ mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, false);
mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2;
mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
@@ -1026,20 +1244,26 @@ xfs_log_sb(
* reservations that have been taken out percpu counters. If we have an
* unclean shutdown, this will be corrected by log recovery rebuilding
* the counters from the AGF block counts.
- *
- * Do not update sb_frextents here because it is not part of the lazy
- * sb counters, despite having a percpu counter. It is always kept
- * consistent with the ondisk rtbitmap by xfs_trans_apply_sb_deltas()
- * and hence we don't need have to update it here.
*/
if (xfs_has_lazysbcount(mp)) {
- mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+ mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount);
mp->m_sb.sb_ifree = min_t(uint64_t,
- percpu_counter_sum(&mp->m_ifree),
+ percpu_counter_sum_positive(&mp->m_ifree),
mp->m_sb.sb_icount);
- mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+ mp->m_sb.sb_fdblocks =
+ percpu_counter_sum_positive(&mp->m_fdblocks);
}
+ /*
+ * sb_frextents was added to the lazy sb counters when the rt groups
+ * feature was introduced. This counter can go negative due to the way
+ * we handle nearly-lockless reservations, so we must use the _positive
+ * variant here to avoid writing out nonsense frextents.
+ */
+ if (xfs_has_rtgroups(mp))
+ mp->m_sb.sb_frextents =
+ percpu_counter_sum_positive(&mp->m_frextents);
+
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1);
@@ -1089,18 +1313,17 @@ int
xfs_update_secondary_sbs(
struct xfs_mount *mp)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno = 1;
+ struct xfs_perag *pag = NULL;
int saved_error = 0;
int error = 0;
LIST_HEAD (buffer_list);
/* update secondary superblocks. */
- for_each_perag_from(mp, agno, pag) {
+ while ((pag = xfs_perag_next_from(mp, pag, 1))) {
struct xfs_buf *bp;
error = xfs_buf_get(mp->m_ddev_targp,
- XFS_AG_DADDR(mp, pag->pag_agno, XFS_SB_DADDR),
+ XFS_AG_DADDR(mp, pag_agno(pag), XFS_SB_DADDR),
XFS_FSS_TO_BB(mp, 1), &bp);
/*
* If we get an error reading or writing alternate superblocks,
@@ -1112,7 +1335,7 @@ xfs_update_secondary_sbs(
if (error) {
xfs_warn(mp,
"error allocating secondary superblock for ag %d",
- pag->pag_agno);
+ pag_agno(pag));
if (!saved_error)
saved_error = error;
continue;
@@ -1126,26 +1349,22 @@ xfs_update_secondary_sbs(
xfs_buf_relse(bp);
/* don't hold too many buffers at once */
- if (agno % 16)
+ if (pag_agno(pag) % 16)
continue;
error = xfs_buf_delwri_submit(&buffer_list);
if (error) {
xfs_warn(mp,
"write error %d updating a secondary superblock near ag %d",
- error, pag->pag_agno);
+ error, pag_agno(pag));
if (!saved_error)
saved_error = error;
continue;
}
}
error = xfs_buf_delwri_submit(&buffer_list);
- if (error) {
- xfs_warn(mp,
- "write error %d updating a secondary superblock near ag %d",
- error, agno);
- }
-
+ if (error)
+ xfs_warn(mp, "error %d writing secondary superblocks", error);
return saved_error ? saved_error : error;
}
@@ -1155,10 +1374,12 @@ xfs_update_secondary_sbs(
*/
int
xfs_sync_sb_buf(
- struct xfs_mount *mp)
+ struct xfs_mount *mp,
+ bool update_rtsb)
{
struct xfs_trans *tp;
struct xfs_buf *bp;
+ struct xfs_buf *rtsb_bp = NULL;
int error;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 0, &tp);
@@ -1168,6 +1389,11 @@ xfs_sync_sb_buf(
bp = xfs_trans_getsb(tp);
xfs_log_sb(tp);
xfs_trans_bhold(tp, bp);
+ if (update_rtsb) {
+ rtsb_bp = xfs_log_rtsb(tp, bp);
+ if (rtsb_bp)
+ xfs_trans_bhold(tp, rtsb_bp);
+ }
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp);
if (error)
@@ -1176,7 +1402,11 @@ xfs_sync_sb_buf(
* write out the sb buffer to get the changes to disk
*/
error = xfs_bwrite(bp);
+ if (!error && rtsb_bp)
+ error = xfs_bwrite(rtsb_bp);
out:
+ if (rtsb_bp)
+ xfs_buf_relse(rtsb_bp);
xfs_buf_relse(bp);
return error;
}
@@ -1251,6 +1481,8 @@ xfs_fs_geometry(
geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME;
if (xfs_has_inobtcounts(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_INOBTCNT;
+ if (xfs_has_parent(mp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_PARENT;
if (xfs_has_sector(mp)) {
geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR;
geo->logsectsize = sbp->sb_logsectsize;
@@ -1259,6 +1491,10 @@ xfs_fs_geometry(
}
if (xfs_has_large_extent_counts(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_NREXT64;
+ if (xfs_has_exchange_range(mp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE;
+ if (xfs_has_metadir(mp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR;
geo->rtsectsize = sbp->sb_blocksize;
geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp);
@@ -1274,6 +1510,11 @@ xfs_fs_geometry(
return;
geo->version = XFS_FSOP_GEOM_VERSION_V5;
+
+ if (xfs_has_rtgroups(mp)) {
+ geo->rgcount = sbp->sb_rgcount;
+ geo->rgextents = sbp->sb_rgextents;
+ }
}
/* Read a secondary superblock. */
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 37b1ed1bc209..34d0dd374e9b 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -15,8 +15,11 @@ struct xfs_perag;
extern void xfs_log_sb(struct xfs_trans *tp);
extern int xfs_sync_sb(struct xfs_mount *mp, bool wait);
-extern int xfs_sync_sb_buf(struct xfs_mount *mp);
+extern int xfs_sync_sb_buf(struct xfs_mount *mp, bool update_rtsb);
extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
+void xfs_sb_mount_rextsize(struct xfs_mount *mp, struct xfs_sb *sbp);
+void xfs_mount_sb_set_rextsize(struct xfs_mount *mp,
+ struct xfs_sb *sbp, xfs_agblock_t rextsize);
extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from);
extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from);
extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp);
@@ -38,7 +41,9 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp,
bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
__s64 sunit, __s64 swidth, int sectorsize, bool may_repair,
bool silent);
+bool xfs_validate_rt_geometry(struct xfs_sb *sbp);
uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
+int xfs_compute_rgblklog(xfs_rtxlen_t rgextents, xfs_rgblock_t rextsize);
#endif /* __XFS_SB_H__ */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index dfd61fa8332e..e7efdb9ceaf3 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -38,7 +38,10 @@ extern const struct xfs_buf_ops xfs_inode_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
extern const struct xfs_buf_ops xfs_refcountbt_buf_ops;
extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
+extern const struct xfs_buf_ops xfs_rtbitmap_buf_ops;
+extern const struct xfs_buf_ops xfs_rtsummary_buf_ops;
extern const struct xfs_buf_ops xfs_rtbuf_ops;
+extern const struct xfs_buf_ops xfs_rtsb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
@@ -124,7 +127,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
#define XFS_TRANS_RES_FDBLKS (1u << 6)
/* Transaction contains an intent done log item */
#define XFS_TRANS_HAS_INTENT_DONE (1u << 7)
-
/*
* LOWMODE is used by the allocator to activate the lowspace algorithm - when
* free space is running low the extent allocator may choose to allocate an
@@ -136,7 +138,10 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
* for free space from AG 0. If the correct transaction reservations have been
* made then this algorithm will eventually find all the space it needs.
*/
-#define XFS_TRANS_LOWMODE 0x100 /* allocate in low space mode */
+#define XFS_TRANS_LOWMODE (1u << 8)
+
+/* Transaction has locked the rtbitmap and rtsum inodes */
+#define XFS_TRANS_RTBITMAP_LOCKED (1u << 9)
/*
* Field values for xfs_trans_mod_sb.
@@ -155,6 +160,7 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
#define XFS_TRANS_SB_RBLOCKS 0x00000800
#define XFS_TRANS_SB_REXTENTS 0x00001000
#define XFS_TRANS_SB_REXTSLOG 0x00002000
+#define XFS_TRANS_SB_RGCOUNT 0x00004000
/*
* Here we centralize the specification of XFS meta-data buffer reference count
@@ -175,13 +181,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
#define XFS_REFC_BTREE_REF 1
#define XFS_SSB_REF 0
-/*
- * Flags for xfs_trans_ichgtime().
- */
-#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
-#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
-#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
-
/* Computed inode geometry for the filesystem. */
struct xfs_ino_geometry {
/* Maximum inode count in this filesystem. */
@@ -229,6 +228,9 @@ struct xfs_ino_geometry {
/* precomputed value for di_flags2 */
uint64_t new_diflags2;
+ /* minimum folio order of a page cache allocation */
+ unsigned int min_folio_order;
+
};
#endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index ffb1317a9212..f228127a88ff 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -169,7 +169,8 @@ xfs_symlink_local_to_remote(
struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_inode *ip,
- struct xfs_ifork *ifp)
+ struct xfs_ifork *ifp,
+ void *priv)
{
struct xfs_mount *mp = ip->i_mount;
char *buf;
@@ -310,6 +311,7 @@ int
xfs_symlink_write_target(
struct xfs_trans *tp,
struct xfs_inode *ip,
+ xfs_ino_t owner,
const char *target_path,
int pathlen,
xfs_fsblock_t fs_blocks,
@@ -364,8 +366,7 @@ xfs_symlink_write_target(
byte_cnt = min(byte_cnt, pathlen);
buf = bp->b_addr;
- buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset, byte_cnt,
- bp);
+ buf += xfs_symlink_hdr_set(mp, owner, offset, byte_cnt, bp);
memcpy(buf, cur_chunk, byte_cnt);
@@ -380,3 +381,50 @@ xfs_symlink_write_target(
ASSERT(pathlen == 0);
return 0;
}
+
+/* Remove all the blocks from a symlink and invalidate buffers. */
+int
+xfs_symlink_remote_truncate(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS];
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_buf *bp;
+ int nmaps = XFS_SYMLINK_MAPS;
+ int done = 0;
+ int i;
+ int error;
+
+ /* Read mappings and invalidate buffers. */
+ error = xfs_bmapi_read(ip, 0, XFS_MAX_FILEOFF, mval, &nmaps, 0);
+ if (error)
+ return error;
+
+ for (i = 0; i < nmaps; i++) {
+ if (!xfs_bmap_is_real_extent(&mval[i]))
+ break;
+
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
+ XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0,
+ &bp);
+ if (error)
+ return error;
+
+ xfs_trans_binval(tp, bp);
+ }
+
+ /* Unmap the remote blocks. */
+ error = xfs_bunmapi(tp, ip, 0, XFS_MAX_FILEOFF, 0, nmaps, &done);
+ if (error)
+ return error;
+ if (!done) {
+ ASSERT(done);
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_SYMLINK);
+ return -EFSCORRUPTED;
+ }
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.h b/fs/xfs/libxfs/xfs_symlink_remote.h
index a63bd38ae4fa..c1672fe1f17b 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.h
+++ b/fs/xfs/libxfs/xfs_symlink_remote.h
@@ -16,11 +16,13 @@ int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
uint32_t size, struct xfs_buf *bp);
void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
- struct xfs_inode *ip, struct xfs_ifork *ifp);
+ struct xfs_inode *ip, struct xfs_ifork *ifp,
+ void *priv);
xfs_failaddr_t xfs_symlink_shortform_verify(void *sfp, int64_t size);
int xfs_symlink_remote_read(struct xfs_inode *ip, char *link);
int xfs_symlink_write_target(struct xfs_trans *tp, struct xfs_inode *ip,
- const char *target_path, int pathlen, xfs_fsblock_t fs_blocks,
- uint resblks);
+ xfs_ino_t owner, const char *target_path, int pathlen,
+ xfs_fsblock_t fs_blocks, uint resblks);
+int xfs_symlink_remote_truncate(struct xfs_trans *tp, struct xfs_inode *ip);
#endif /* __XFS_SYMLINK_REMOTE_H */
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 69fc5b981352..c962ad64b0c1 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -62,12 +62,14 @@ xfs_trans_ichgtime(
ASSERT(tp);
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
- tv = current_time(inode);
+ /* If the mtime changes, then ctime must also change */
+ ASSERT(flags & XFS_ICHGTIME_CHG);
+ tv = inode_set_ctime_current(inode);
if (flags & XFS_ICHGTIME_MOD)
inode_set_mtime_to_ts(inode, tv);
- if (flags & XFS_ICHGTIME_CHG)
- inode_set_ctime_to_ts(inode, tv);
+ if (flags & XFS_ICHGTIME_ACCESS)
+ inode_set_atime_to_ts(inode, tv);
if (flags & XFS_ICHGTIME_CREATE)
ip->i_crtime = tv;
}
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 6cd45e8c118d..bab402340b5d 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -20,6 +20,8 @@
#include "xfs_qm.h"
#include "xfs_trans_space.h"
#include "xfs_rtbitmap.h"
+#include "xfs_attr_item.h"
+#include "xfs_log.h"
#define _ALLOC true
#define _FREE false
@@ -128,7 +130,7 @@ xfs_calc_inode_res(
(4 * sizeof(struct xlog_op_header) +
sizeof(struct xfs_inode_log_format) +
mp->m_sb.sb_inodesize +
- 2 * XFS_BMBT_BLOCK_LEN(mp));
+ 2 * xfs_bmbt_block_len(mp));
}
/*
@@ -222,7 +224,7 @@ xfs_rtalloc_block_count(
xfs_rtxlen_t rtxlen;
rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN);
- rtbmp_blocks = xfs_rtbitmap_blockcount(mp, rtxlen);
+ rtbmp_blocks = xfs_rtbitmap_blockcount_len(mp, rtxlen);
return (rtbmp_blocks + 1) * num_ops;
}
@@ -336,11 +338,11 @@ xfs_calc_write_reservation(
blksz);
t1 += adj;
t3 += adj;
- return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+ return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
}
t4 = xfs_calc_refcountbt_reservation(mp, 1);
- return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
+ return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
}
unsigned int
@@ -408,11 +410,11 @@ xfs_calc_itruncate_reservation(
xfs_refcountbt_block_count(mp, 4),
blksz);
- return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+ return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
}
t4 = xfs_calc_refcountbt_reservation(mp, 2);
- return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
+ return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
}
unsigned int
@@ -422,29 +424,110 @@ xfs_calc_itruncate_reservation_minlogsize(
return xfs_calc_itruncate_reservation(mp, true);
}
+static inline unsigned int xfs_calc_pptr_link_overhead(void)
+{
+ return sizeof(struct xfs_attri_log_format) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1);
+}
+static inline unsigned int xfs_calc_pptr_unlink_overhead(void)
+{
+ return sizeof(struct xfs_attri_log_format) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1);
+}
+static inline unsigned int xfs_calc_pptr_replace_overhead(void)
+{
+ return sizeof(struct xfs_attri_log_format) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1);
+}
+
/*
* In renaming a files we can modify:
* the five inodes involved: 5 * inode size
* the two directory btrees: 2 * (max depth + v2) * dir block size
* the two directory bmap btrees: 2 * max depth * block size
* And the bmap_finish transaction can free dir and bmap blocks (two sets
- * of bmap blocks) giving:
+ * of bmap blocks) giving (t2):
* the agf for the ags in which the blocks live: 3 * sector size
* the agfl for the ags in which the blocks live: 3 * sector size
* the superblock for the free block count: sector size
* the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ * If parent pointers are enabled (t3), then each transaction in the chain
+ * must be capable of setting or removing the extended attribute
+ * containing the parent information. It must also be able to handle
+ * the three xattr intent items that track the progress of the parent
+ * pointer update.
*/
STATIC uint
xfs_calc_rename_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 5) +
- xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int overhead = XFS_DQUOT_LOGRES;
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int t1, t2, t3 = 0;
+
+ t1 = xfs_calc_inode_res(mp, 5) +
+ xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
+ XFS_FSB_TO_B(mp, 1));
+
+ t2 = xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
+ XFS_FSB_TO_B(mp, 1));
+
+ if (xfs_has_parent(mp)) {
+ unsigned int rename_overhead, exchange_overhead;
+
+ t3 = max(resp->tr_attrsetm.tr_logres,
+ resp->tr_attrrm.tr_logres);
+
+ /*
+ * For a standard rename, the three xattr intent log items
+ * are (1) replacing the pptr for the source file; (2)
+ * removing the pptr on the dest file; and (3) adding a
+ * pptr for the whiteout file in the src dir.
+ *
+ * For an RENAME_EXCHANGE, there are two xattr intent
+ * items to replace the pptr for both src and dest
+ * files. Link counts don't change and there is no
+ * whiteout.
+ *
+ * In the worst case we can end up relogging all log
+ * intent items to allow the log tail to move ahead, so
+ * they become overhead added to each transaction in a
+ * processing chain.
+ */
+ rename_overhead = xfs_calc_pptr_replace_overhead() +
+ xfs_calc_pptr_unlink_overhead() +
+ xfs_calc_pptr_link_overhead();
+ exchange_overhead = 2 * xfs_calc_pptr_replace_overhead();
+
+ overhead += max(rename_overhead, exchange_overhead);
+ }
+
+ return overhead + max3(t1, t2, t3);
+}
+
+static inline unsigned int
+xfs_rename_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ /* One for the rename, one more for freeing blocks */
+ unsigned int ret = XFS_RENAME_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to remove or add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += max(resp->tr_attrsetm.tr_logcount,
+ resp->tr_attrrm.tr_logcount);
+
+ return ret;
}
/*
@@ -461,6 +544,23 @@ xfs_calc_iunlink_remove_reservation(
2 * M_IGEO(mp)->inode_cluster_size;
}
+static inline unsigned int
+xfs_link_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_LINK_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
+
/*
* For creating a link to an inode:
* the parent directory inode: inode size
@@ -477,14 +577,23 @@ STATIC uint
xfs_calc_link_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- xfs_calc_iunlink_remove_reservation(mp) +
- max((xfs_calc_inode_res(mp, 2) +
- xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int overhead = XFS_DQUOT_LOGRES;
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int t1, t2, t3 = 0;
+
+ overhead += xfs_calc_iunlink_remove_reservation(mp);
+ t1 = xfs_calc_inode_res(mp, 2) +
+ xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
+
+ if (xfs_has_parent(mp)) {
+ t3 = resp->tr_attrsetm.tr_logres;
+ overhead += xfs_calc_pptr_link_overhead();
+ }
+
+ return overhead + max3(t1, t2, t3);
}
/*
@@ -499,6 +608,23 @@ xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
M_IGEO(mp)->inode_cluster_size;
}
+static inline unsigned int
+xfs_remove_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_REMOVE_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrrm.tr_logcount;
+
+ return ret;
+}
+
/*
* For removing a directory entry we can modify:
* the parent directory inode: inode size
@@ -515,14 +641,24 @@ STATIC uint
xfs_calc_remove_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- xfs_calc_iunlink_add_reservation(mp) +
- max((xfs_calc_inode_res(mp, 2) +
- xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int overhead = XFS_DQUOT_LOGRES;
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int t1, t2, t3 = 0;
+
+ overhead += xfs_calc_iunlink_add_reservation(mp);
+
+ t1 = xfs_calc_inode_res(mp, 2) +
+ xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
+ XFS_FSB_TO_B(mp, 1));
+
+ if (xfs_has_parent(mp)) {
+ t3 = resp->tr_attrrm.tr_logres;
+ overhead += xfs_calc_pptr_unlink_overhead();
+ }
+
+ return overhead + max3(t1, t2, t3);
}
/*
@@ -571,24 +707,69 @@ xfs_calc_icreate_resv_alloc(
xfs_calc_finobt_res(mp);
}
+static inline unsigned int
+xfs_icreate_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_CREATE_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
+
STATIC uint
-xfs_calc_icreate_reservation(xfs_mount_t *mp)
+xfs_calc_icreate_reservation(
+ struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max(xfs_calc_icreate_resv_alloc(mp),
- xfs_calc_create_resv_modify(mp));
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int overhead = XFS_DQUOT_LOGRES;
+ unsigned int t1, t2, t3 = 0;
+
+ t1 = xfs_calc_icreate_resv_alloc(mp);
+ t2 = xfs_calc_create_resv_modify(mp);
+
+ if (xfs_has_parent(mp)) {
+ t3 = resp->tr_attrsetm.tr_logres;
+ overhead += xfs_calc_pptr_link_overhead();
+ }
+
+ return overhead + max3(t1, t2, t3);
}
STATIC uint
xfs_calc_create_tmpfile_reservation(
struct xfs_mount *mp)
{
- uint res = XFS_DQUOT_LOGRES(mp);
+ uint res = XFS_DQUOT_LOGRES;
res += xfs_calc_icreate_resv_alloc(mp);
return res + xfs_calc_iunlink_add_reservation(mp);
}
+static inline unsigned int
+xfs_mkdir_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_MKDIR_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
+
/*
* Making a new directory is the same as creating a new file.
*/
@@ -599,6 +780,22 @@ xfs_calc_mkdir_reservation(
return xfs_calc_icreate_reservation(mp);
}
+static inline unsigned int
+xfs_symlink_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_SYMLINK_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
/*
* Making a new symplink is the same as creating a new file, but
@@ -632,7 +829,7 @@ STATIC uint
xfs_calc_ifree_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_iunlink_remove_reservation(mp) +
@@ -649,7 +846,7 @@ STATIC uint
xfs_calc_ichange_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
@@ -721,7 +918,7 @@ xfs_calc_growrtfree_reservation(
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
- xfs_calc_buf_res(1, mp->m_rsumsize);
+ xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, mp->m_rsumblocks));
}
/*
@@ -758,7 +955,7 @@ STATIC uint
xfs_calc_addafork_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
@@ -806,7 +1003,7 @@ STATIC uint
xfs_calc_attrsetm_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
@@ -846,7 +1043,7 @@ STATIC uint
xfs_calc_attrrm_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
max((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
XFS_FSB_TO_B(mp, 1)) +
@@ -911,54 +1108,76 @@ xfs_calc_sb_reservation(
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
-void
-xfs_trans_resv_calc(
+/*
+ * Namespace reservations.
+ *
+ * These get tricky when parent pointers are enabled as we have attribute
+ * modifications occurring from within these transactions. Rather than confuse
+ * each of these reservation calculations with the conditional attribute
+ * reservations, add them here in a clear and concise manner. This requires that
+ * the attribute reservations have already been calculated.
+ *
+ * Note that we only include the static attribute reservation here; the runtime
+ * reservation will have to be modified by the size of the attributes being
+ * added/removed/modified. See the comments on the attribute reservation
+ * calculations for more details.
+ */
+STATIC void
+xfs_calc_namespace_reservations(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
- int logcount_adj = 0;
-
- /*
- * The following transactions are logged in physical format and
- * require a permanent reservation on space.
- */
- resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
- resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
- resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
- resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
- resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
- resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+ ASSERT(resp->tr_attrsetm.tr_logres > 0);
resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
- resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT;
+ resp->tr_rename.tr_logcount = xfs_rename_log_count(mp, resp);
resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
- resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT;
+ resp->tr_link.tr_logcount = xfs_link_log_count(mp, resp);
resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
- resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT;
+ resp->tr_remove.tr_logcount = xfs_remove_log_count(mp, resp);
resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
- resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
+ resp->tr_symlink.tr_logcount = xfs_symlink_log_count(mp, resp);
resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
- resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
+ resp->tr_create.tr_logcount = xfs_icreate_log_count(mp, resp);
resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+ resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
+ resp->tr_mkdir.tr_logcount = xfs_mkdir_log_count(mp, resp);
+ resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+}
+
+void
+xfs_trans_resv_calc(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ int logcount_adj = 0;
+
+ /*
+ * The following transactions are logged in physical format and
+ * require a permanent reservation on space.
+ */
+ resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
+ resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
+ resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+ resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
+ resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
+ resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
resp->tr_create_tmpfile.tr_logres =
xfs_calc_create_tmpfile_reservation(mp);
resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
- resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
- resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
- resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
@@ -988,6 +1207,8 @@ xfs_trans_resv_calc(
resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+ xfs_calc_namespace_reservations(mp, resp);
+
/*
* The following transactions are logged in logical format with
* a default log count.
diff --git a/fs/xfs/libxfs/xfs_trans_space.c b/fs/xfs/libxfs/xfs_trans_space.c
new file mode 100644
index 000000000000..b9dc3752f702
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_trans_space.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_da_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+
+/* Calculate the disk space required to add a parent pointer. */
+unsigned int
+xfs_parent_calc_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen)
+{
+ /*
+ * Parent pointers are always the first attr in an attr tree, and never
+ * larger than a block
+ */
+ return XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) +
+ XFS_NEXTENTADD_SPACE_RES(mp, namelen, XFS_ATTR_FORK);
+}
+
+unsigned int
+xfs_create_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen)
+{
+ unsigned int ret;
+
+ ret = XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp, namelen);
+ if (xfs_has_parent(mp))
+ ret += xfs_parent_calc_space_res(mp, namelen);
+
+ return ret;
+}
+
+unsigned int
+xfs_mkdir_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen)
+{
+ return xfs_create_space_res(mp, namelen);
+}
+
+unsigned int
+xfs_link_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen)
+{
+ unsigned int ret;
+
+ ret = XFS_DIRENTER_SPACE_RES(mp, namelen);
+ if (xfs_has_parent(mp))
+ ret += xfs_parent_calc_space_res(mp, namelen);
+
+ return ret;
+}
+
+unsigned int
+xfs_symlink_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen,
+ unsigned int fsblocks)
+{
+ unsigned int ret;
+
+ ret = XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp, namelen) +
+ fsblocks;
+
+ if (xfs_has_parent(mp))
+ ret += xfs_parent_calc_space_res(mp, namelen);
+
+ return ret;
+}
+
+unsigned int
+xfs_remove_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen)
+{
+ unsigned int ret = XFS_DIRREMOVE_SPACE_RES(mp);
+
+ if (xfs_has_parent(mp))
+ ret += xfs_parent_calc_space_res(mp, namelen);
+
+ return ret;
+}
+
+unsigned int
+xfs_rename_space_res(
+ struct xfs_mount *mp,
+ unsigned int src_namelen,
+ bool target_exists,
+ unsigned int target_namelen,
+ bool has_whiteout)
+{
+ unsigned int ret;
+
+ ret = XFS_DIRREMOVE_SPACE_RES(mp) +
+ XFS_DIRENTER_SPACE_RES(mp, target_namelen);
+
+ if (xfs_has_parent(mp)) {
+ if (has_whiteout)
+ ret += xfs_parent_calc_space_res(mp, src_namelen);
+ ret += 2 * xfs_parent_calc_space_res(mp, target_namelen);
+ }
+
+ if (target_exists)
+ ret += xfs_parent_calc_space_res(mp, target_namelen);
+
+ return ret;
+}
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index 87b31c69a773..1155ff2d37e2 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -10,6 +10,10 @@
* Components of space reservations.
*/
+/* Worst case number of bmaps that can be held in a block. */
+#define XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp) \
+ (((mp)->m_bmap_dmxr[0]) - ((mp)->m_bmap_dmnr[0]))
+
/* Worst case number of rmaps that can be held in a block. */
#define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) \
(((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0]))
@@ -76,31 +80,32 @@
/* This macro is not used - see inline code in xfs_attr_set */
#define XFS_ATTRSET_SPACE_RES(mp, v) \
(XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) + XFS_B_TO_FSB(mp, v))
-#define XFS_CREATE_SPACE_RES(mp,nl) \
- (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
#define XFS_DIOSTRAT_SPACE_RES(mp, v) \
(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v))
#define XFS_GROWFS_SPACE_RES(mp) \
(2 * (mp)->m_alloc_maxlevels)
#define XFS_GROWFSRT_SPACE_RES(mp,b) \
((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK))
-#define XFS_LINK_SPACE_RES(mp,nl) \
- XFS_DIRENTER_SPACE_RES(mp,nl)
-#define XFS_MKDIR_SPACE_RES(mp,nl) \
- (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
#define XFS_QM_DQALLOC_SPACE_RES(mp) \
(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + \
XFS_DQUOT_CLUSTER_SIZE_FSB)
#define XFS_QM_QINOCREATE_SPACE_RES(mp) \
XFS_IALLOC_SPACE_RES(mp)
-#define XFS_REMOVE_SPACE_RES(mp) \
- XFS_DIRREMOVE_SPACE_RES(mp)
-#define XFS_RENAME_SPACE_RES(mp,nl) \
- (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
-#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \
- (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
#define XFS_IFREE_SPACE_RES(mp) \
(xfs_has_finobt(mp) ? M_IGEO(mp)->inobt_maxlevels : 0)
+unsigned int xfs_parent_calc_space_res(struct xfs_mount *mp,
+ unsigned int namelen);
+
+unsigned int xfs_create_space_res(struct xfs_mount *mp, unsigned int namelen);
+unsigned int xfs_mkdir_space_res(struct xfs_mount *mp, unsigned int namelen);
+unsigned int xfs_link_space_res(struct xfs_mount *mp, unsigned int namelen);
+unsigned int xfs_symlink_space_res(struct xfs_mount *mp, unsigned int namelen,
+ unsigned int fsblocks);
+unsigned int xfs_remove_space_res(struct xfs_mount *mp, unsigned int namelen);
+
+unsigned int xfs_rename_space_res(struct xfs_mount *mp,
+ unsigned int src_namelen, bool target_exists,
+ unsigned int target_namelen, bool has_whiteout);
#endif /* __XFS_TRANS_SPACE_H__ */
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index c299b16c9365..1faf04204c5d 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -12,6 +12,8 @@
#include "xfs_bit.h"
#include "xfs_mount.h"
#include "xfs_ag.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
/*
@@ -111,7 +113,7 @@ xfs_verify_ino(
/* Is this an internal inode number? */
inline bool
-xfs_internal_inum(
+xfs_is_sb_inum(
struct xfs_mount *mp,
xfs_ino_t ino)
{
@@ -129,24 +131,42 @@ xfs_verify_dir_ino(
struct xfs_mount *mp,
xfs_ino_t ino)
{
- if (xfs_internal_inum(mp, ino))
+ if (xfs_is_sb_inum(mp, ino))
return false;
return xfs_verify_ino(mp, ino);
}
/*
- * Verify that an realtime block number pointer doesn't point off the
- * end of the realtime device.
+ * Verify that a realtime block number pointer neither points outside the
+ * allocatable areas of the rtgroup nor off the end of the realtime
+ * device.
*/
inline bool
xfs_verify_rtbno(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
+ if (xfs_has_rtgroups(mp)) {
+ xfs_rgnumber_t rgno = xfs_rtb_to_rgno(mp, rtbno);
+ xfs_rtxnum_t rtx = xfs_rtb_to_rtx(mp, rtbno);
+
+ if (rgno >= mp->m_sb.sb_rgcount)
+ return false;
+ if (rtx >= xfs_rtgroup_extents(mp, rgno))
+ return false;
+ if (xfs_has_rtsb(mp) && rgno == 0 && rtx == 0)
+ return false;
+ return true;
+ }
+
return rtbno < mp->m_sb.sb_rblocks;
}
-/* Verify that a realtime device extent is fully contained inside the volume. */
+/*
+ * Verify that an allocated realtime device extent neither points outside
+ * allocatable areas of the rtgroup, across an rtgroup boundary, nor off the
+ * end of the realtime device.
+ */
bool
xfs_verify_rtbext(
struct xfs_mount *mp,
@@ -159,7 +179,14 @@ xfs_verify_rtbext(
if (!xfs_verify_rtbno(mp, rtbno))
return false;
- return xfs_verify_rtbno(mp, rtbno + len - 1);
+ if (!xfs_verify_rtbno(mp, rtbno + len - 1))
+ return false;
+
+ if (xfs_has_rtgroups(mp) &&
+ xfs_rtb_to_rgno(mp, rtbno) != xfs_rtb_to_rgno(mp, rtbno + len - 1))
+ return false;
+
+ return true;
}
/* Calculate the range of valid icount values. */
@@ -170,13 +197,12 @@ xfs_icount_range(
unsigned long long *max)
{
unsigned long long nr_inos = 0;
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
/* root, rtbitmap, rtsum all live in the first chunk */
*min = XFS_INODES_PER_CHUNK;
- for_each_perag(mp, agno, pag)
+ while ((pag = xfs_perag_next(mp, pag)))
nr_inos += pag->agino_max - pag->agino_min + 1;
*max = nr_inos;
}
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 76eb9e328835..bf33c2b1e43e 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -9,10 +9,12 @@
typedef uint32_t prid_t; /* project ID */
typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */
+typedef uint32_t xfs_rgblock_t; /* blockno in realtime group */
typedef uint32_t xfs_agino_t; /* inode # within allocation grp */
typedef uint32_t xfs_extlen_t; /* extent length in blocks */
typedef uint32_t xfs_rtxlen_t; /* file extent length in rtextents */
typedef uint32_t xfs_agnumber_t; /* allocation group number */
+typedef uint32_t xfs_rgnumber_t; /* realtime group number */
typedef uint64_t xfs_extnum_t; /* # of extents in a file */
typedef uint32_t xfs_aextnum_t; /* # extents in an attribute fork */
typedef int64_t xfs_fsize_t; /* bytes in a file */
@@ -53,7 +55,9 @@ typedef void * xfs_failaddr_t;
#define NULLFILEOFF ((xfs_fileoff_t)-1)
#define NULLAGBLOCK ((xfs_agblock_t)-1)
+#define NULLRGBLOCK ((xfs_rgblock_t)-1)
#define NULLAGNUMBER ((xfs_agnumber_t)-1)
+#define NULLRGNUMBER ((xfs_rgnumber_t)-1)
#define NULLCOMMITLSN ((xfs_lsn_t)-1)
@@ -212,6 +216,16 @@ enum xbtree_recpacking {
XBTREE_RECPACKING_FULL,
};
+enum xfs_group_type {
+ XG_TYPE_AG,
+ XG_TYPE_RTG,
+ XG_TYPE_MAX,
+} __packed;
+
+#define XG_TYPE_STRINGS \
+ { XG_TYPE_AG, "ag" }, \
+ { XG_TYPE_RTG, "rtg" }
+
/*
* Type verifier functions
*/
@@ -222,7 +236,7 @@ bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno,
xfs_fsblock_t len);
bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino);
-bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
+bool xfs_is_sb_inum(struct xfs_mount *mp, xfs_ino_t ino);
bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino);
bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
bool xfs_verify_rtbext(struct xfs_mount *mp, xfs_rtblock_t rtbno,
@@ -235,16 +249,4 @@ bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off);
bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off,
xfs_fileoff_t len);
-/* Do we support an rt volume having this number of rtextents? */
-static inline bool
-xfs_validate_rtextents(
- xfs_rtbxlen_t rtextents)
-{
- /* No runt rt volumes */
- if (rtextents == 0)
- return false;
-
- return true;
-}
-
#endif /* __XFS_TYPES_H__ */