summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_extfree_item.c
diff options
context:
space:
mode:
authorCarlos Maiolino <cem@kernel.org>2024-11-12 11:00:42 +0100
committerCarlos Maiolino <cem@kernel.org>2024-11-12 11:00:42 +0100
commitb939bcdca3756db877aa084edd70901624faf26a (patch)
tree89e070904515052ed6741928bf6626e8c3b60fce /fs/xfs/xfs_extfree_item.c
parentcb288c9fb2aba9a5d71b8191dfcb6f2cced37f7a (diff)
parenta3315d11305f5c2d82fcb00e3df34775adff4084 (diff)
Merge tag 'realtime-groups-6.13_2024-11-05' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into staging-merge
xfs: shard the realtime section [v5.5 06/10] Right now, the realtime section uses a single pair of metadata inodes to store the free space information. This presents a scalability problem since every thread trying to allocate or free rt extents have to lock these files. Solve this problem by sharding the realtime section into separate realtime allocation groups. While we're at it, define a superblock to be stamped into the start of the rt section. This enables utilities such as blkid to identify block devices containing realtime sections, and avoids the situation where anything written into block 0 of the realtime extent can be misinterpreted as file data. The best advantage for rtgroups will become evident later when we get to adding rmap and reflink to the realtime volume, since the geometry constraints are the same for rt groups and AGs. Hence we can reuse all that code directly. This is a very large patchset, but it catches us up with 20 years of technical debt that have accumulated. With a bit of luck, this should all go splendidly. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Diffstat (limited to 'fs/xfs/xfs_extfree_item.c')
-rw-r--r--fs/xfs/xfs_extfree_item.c270
1 files changed, 239 insertions, 31 deletions
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index e469510986e8..a25c713ff888 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -25,6 +25,10 @@
#include "xfs_error.h"
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
struct kmem_cache *xfs_efi_cache;
struct kmem_cache *xfs_efd_cache;
@@ -95,16 +99,15 @@ xfs_efi_item_format(
ASSERT(atomic_read(&efip->efi_next_extent) ==
efip->efi_format.efi_nextents);
+ ASSERT(lip->li_type == XFS_LI_EFI || lip->li_type == XFS_LI_EFI_RT);
- efip->efi_format.efi_type = XFS_LI_EFI;
+ efip->efi_format.efi_type = lip->li_type;
efip->efi_format.efi_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT,
- &efip->efi_format,
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format,
xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents));
}
-
/*
* The unpin operation is the last place an EFI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
@@ -140,12 +143,14 @@ xfs_efi_item_release(
STATIC struct xfs_efi_log_item *
xfs_efi_init(
struct xfs_mount *mp,
+ unsigned short item_type,
uint nextents)
-
{
struct xfs_efi_log_item *efip;
+ ASSERT(item_type == XFS_LI_EFI || item_type == XFS_LI_EFI_RT);
ASSERT(nextents > 0);
+
if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
efip = kzalloc(xfs_efi_log_item_sizeof(nextents),
GFP_KERNEL | __GFP_NOFAIL);
@@ -154,7 +159,7 @@ xfs_efi_init(
GFP_KERNEL | __GFP_NOFAIL);
}
- xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
+ xfs_log_item_init(mp, &efip->efi_item, item_type, &xfs_efi_item_ops);
efip->efi_format.efi_nextents = nextents;
efip->efi_format.efi_id = (uintptr_t)(void *)efip;
atomic_set(&efip->efi_next_extent, 0);
@@ -264,12 +269,12 @@ xfs_efd_item_format(
struct xfs_log_iovec *vecp = NULL;
ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);
+ ASSERT(lip->li_type == XFS_LI_EFD || lip->li_type == XFS_LI_EFD_RT);
- efdp->efd_format.efd_type = XFS_LI_EFD;
+ efdp->efd_format.efd_type = lip->li_type;
efdp->efd_format.efd_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT,
- &efdp->efd_format,
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format,
xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents));
}
@@ -308,6 +313,14 @@ static inline struct xfs_extent_free_item *xefi_entry(const struct list_head *e)
return list_entry(e, struct xfs_extent_free_item, xefi_list);
}
+static inline bool
+xfs_efi_item_isrt(const struct xfs_log_item *lip)
+{
+ ASSERT(lip->li_type == XFS_LI_EFI || lip->li_type == XFS_LI_EFI_RT);
+
+ return lip->li_type == XFS_LI_EFI_RT;
+}
+
/*
* Fill the EFD with all extents from the EFI when we need to roll the
* transaction and continue with a new EFI.
@@ -388,18 +401,20 @@ xfs_extent_free_log_item(
}
static struct xfs_log_item *
-xfs_extent_free_create_intent(
+__xfs_extent_free_create_intent(
struct xfs_trans *tp,
struct list_head *items,
unsigned int count,
- bool sort)
+ bool sort,
+ unsigned short item_type)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_efi_log_item *efip = xfs_efi_init(mp, count);
+ struct xfs_efi_log_item *efip;
struct xfs_extent_free_item *xefi;
ASSERT(count > 0);
+ efip = xfs_efi_init(mp, item_type, count);
if (sort)
list_sort(mp, items, xfs_extent_free_diff_items);
list_for_each_entry(xefi, items, xefi_list)
@@ -407,6 +422,23 @@ xfs_extent_free_create_intent(
return &efip->efi_item;
}
+static struct xfs_log_item *
+xfs_extent_free_create_intent(
+ struct xfs_trans *tp,
+ struct list_head *items,
+ unsigned int count,
+ bool sort)
+{
+ return __xfs_extent_free_create_intent(tp, items, count, sort,
+ XFS_LI_EFI);
+}
+
+static inline unsigned short
+xfs_efd_type_from_efi(const struct xfs_efi_log_item *efip)
+{
+ return xfs_efi_item_isrt(&efip->efi_item) ? XFS_LI_EFD_RT : XFS_LI_EFD;
+}
+
/* Get an EFD so we can process all the free extents. */
static struct xfs_log_item *
xfs_extent_free_create_done(
@@ -427,8 +459,8 @@ xfs_extent_free_create_done(
GFP_KERNEL | __GFP_NOFAIL);
}
- xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD,
- &xfs_efd_item_ops);
+ xfs_log_item_init(tp->t_mountp, &efdp->efd_item,
+ xfs_efd_type_from_efi(efip), &xfs_efd_item_ops);
efdp->efd_efip = efip;
efdp->efd_format.efd_nextents = count;
efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
@@ -436,6 +468,17 @@ xfs_extent_free_create_done(
return &efdp->efd_item;
}
+static inline const struct xfs_defer_op_type *
+xefi_ops(
+ struct xfs_extent_free_item *xefi)
+{
+ if (xfs_efi_is_realtime(xefi))
+ return &xfs_rtextent_free_defer_type;
+ if (xefi->xefi_agresv == XFS_AG_RESV_AGFL)
+ return &xfs_agfl_free_defer_type;
+ return &xfs_extent_free_defer_type;
+}
+
/* Add this deferred EFI to the transaction. */
void
xfs_extent_free_defer_add(
@@ -445,16 +488,11 @@ xfs_extent_free_defer_add(
{
struct xfs_mount *mp = tp->t_mountp;
- trace_xfs_extent_free_defer(mp, xefi);
-
xefi->xefi_group = xfs_group_intent_get(mp, xefi->xefi_startblock,
- XG_TYPE_AG);
- if (xefi->xefi_agresv == XFS_AG_RESV_AGFL)
- *dfpp = xfs_defer_add(tp, &xefi->xefi_list,
- &xfs_agfl_free_defer_type);
- else
- *dfpp = xfs_defer_add(tp, &xefi->xefi_list,
- &xfs_extent_free_defer_type);
+ xfs_efi_is_realtime(xefi) ? XG_TYPE_RTG : XG_TYPE_AG);
+
+ trace_xfs_extent_free_defer(mp, xefi);
+ *dfpp = xfs_defer_add(tp, &xefi->xefi_list, xefi_ops(xefi));
}
/* Cancel a free extent. */
@@ -560,8 +598,12 @@ xfs_agfl_free_finish_item(
static inline bool
xfs_efi_validate_ext(
struct xfs_mount *mp,
+ bool isrt,
struct xfs_extent *extp)
{
+ if (isrt)
+ return xfs_verify_rtbext(mp, extp->ext_start, extp->ext_len);
+
return xfs_verify_fsbext(mp, extp->ext_start, extp->ext_len);
}
@@ -569,6 +611,7 @@ static inline void
xfs_efi_recover_work(
struct xfs_mount *mp,
struct xfs_defer_pending *dfp,
+ bool isrt,
struct xfs_extent *extp)
{
struct xfs_extent_free_item *xefi;
@@ -580,7 +623,9 @@ xfs_efi_recover_work(
xefi->xefi_agresv = XFS_AG_RESV_NONE;
xefi->xefi_owner = XFS_RMAP_OWN_UNKNOWN;
xefi->xefi_group = xfs_group_intent_get(mp, extp->ext_start,
- XG_TYPE_AG);
+ isrt ? XG_TYPE_RTG : XG_TYPE_AG);
+ if (isrt)
+ xefi->xefi_flags |= XFS_EFI_REALTIME;
xfs_defer_add_item(dfp, &xefi->xefi_list);
}
@@ -601,14 +646,15 @@ xfs_extent_free_recover_work(
struct xfs_trans *tp;
int i;
int error = 0;
+ bool isrt = xfs_efi_item_isrt(lip);
/*
- * First check the validity of the extents described by the
- * EFI. If any are bad, then assume that all are bad and
- * just toss the EFI.
+ * First check the validity of the extents described by the EFI. If
+ * any are bad, then assume that all are bad and just toss the EFI.
+ * Mixing RT and non-RT extents in the same EFI item is not allowed.
*/
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
- if (!xfs_efi_validate_ext(mp,
+ if (!xfs_efi_validate_ext(mp, isrt,
&efip->efi_format.efi_extents[i])) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
&efip->efi_format,
@@ -616,7 +662,8 @@ xfs_extent_free_recover_work(
return -EFSCORRUPTED;
}
- xfs_efi_recover_work(mp, dfp, &efip->efi_format.efi_extents[i]);
+ xfs_efi_recover_work(mp, dfp, isrt,
+ &efip->efi_format.efi_extents[i]);
}
resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
@@ -654,10 +701,12 @@ xfs_extent_free_relog_intent(
count = EFI_ITEM(intent)->efi_format.efi_nextents;
extp = EFI_ITEM(intent)->efi_format.efi_extents;
+ ASSERT(intent->li_type == XFS_LI_EFI || intent->li_type == XFS_LI_EFI_RT);
+
efdp->efd_next_extent = count;
memcpy(efdp->efd_format.efd_extents, extp, count * sizeof(*extp));
- efip = xfs_efi_init(tp->t_mountp, count);
+ efip = xfs_efi_init(tp->t_mountp, intent->li_type, count);
memcpy(efip->efi_format.efi_extents, extp, count * sizeof(*extp));
atomic_set(&efip->efi_next_extent, count);
@@ -689,6 +738,72 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
.relog_intent = xfs_extent_free_relog_intent,
};
+#ifdef CONFIG_XFS_RT
+/* Create a realtime extent freeing */
+static struct xfs_log_item *
+xfs_rtextent_free_create_intent(
+ struct xfs_trans *tp,
+ struct list_head *items,
+ unsigned int count,
+ bool sort)
+{
+ return __xfs_extent_free_create_intent(tp, items, count, sort,
+ XFS_LI_EFI_RT);
+}
+
+/* Process a free realtime extent. */
+STATIC int
+xfs_rtextent_free_finish_item(
+ struct xfs_trans *tp,
+ struct xfs_log_item *done,
+ struct list_head *item,
+ struct xfs_btree_cur **state)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_extent_free_item *xefi = xefi_entry(item);
+ struct xfs_efd_log_item *efdp = EFD_ITEM(done);
+ struct xfs_rtgroup **rtgp = (struct xfs_rtgroup **)state;
+ int error = 0;
+
+ trace_xfs_extent_free_deferred(mp, xefi);
+
+ if (!(xefi->xefi_flags & XFS_EFI_CANCELLED)) {
+ if (*rtgp != to_rtg(xefi->xefi_group)) {
+ *rtgp = to_rtg(xefi->xefi_group);
+ xfs_rtgroup_lock(*rtgp, XFS_RTGLOCK_BITMAP);
+ xfs_rtgroup_trans_join(tp, *rtgp,
+ XFS_RTGLOCK_BITMAP);
+ }
+ error = xfs_rtfree_blocks(tp, *rtgp,
+ xefi->xefi_startblock, xefi->xefi_blockcount);
+ }
+ if (error == -EAGAIN) {
+ xfs_efd_from_efi(efdp);
+ return error;
+ }
+
+ xfs_efd_add_extent(efdp, xefi);
+ xfs_extent_free_cancel_item(item);
+ return error;
+}
+
+const struct xfs_defer_op_type xfs_rtextent_free_defer_type = {
+ .name = "rtextent_free",
+ .max_items = XFS_EFI_MAX_FAST_EXTENTS,
+ .create_intent = xfs_rtextent_free_create_intent,
+ .abort_intent = xfs_extent_free_abort_intent,
+ .create_done = xfs_extent_free_create_done,
+ .finish_item = xfs_rtextent_free_finish_item,
+ .cancel_item = xfs_extent_free_cancel_item,
+ .recover_work = xfs_extent_free_recover_work,
+ .relog_intent = xfs_extent_free_relog_intent,
+};
+#else
+const struct xfs_defer_op_type xfs_rtextent_free_defer_type = {
+ .name = "rtextent_free",
+};
+#endif /* CONFIG_XFS_RT */
+
STATIC bool
xfs_efi_item_match(
struct xfs_log_item *lip,
@@ -733,7 +848,7 @@ xlog_recover_efi_commit_pass2(
return -EFSCORRUPTED;
}
- efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
+ efip = xfs_efi_init(mp, ITEM_TYPE(item), efi_formatp->efi_nextents);
error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
if (error) {
xfs_efi_item_free(efip);
@@ -751,6 +866,58 @@ const struct xlog_recover_item_ops xlog_efi_item_ops = {
.commit_pass2 = xlog_recover_efi_commit_pass2,
};
+#ifdef CONFIG_XFS_RT
+STATIC int
+xlog_recover_rtefi_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_efi_log_item *efip;
+ struct xfs_efi_log_format *efi_formatp;
+ int error;
+
+ efi_formatp = item->ri_buf[0].i_addr;
+
+ if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ efip = xfs_efi_init(mp, ITEM_TYPE(item), efi_formatp->efi_nextents);
+ error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
+ if (error) {
+ xfs_efi_item_free(efip);
+ return error;
+ }
+ atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
+
+ xlog_recover_intent_item(log, &efip->efi_item, lsn,
+ &xfs_rtextent_free_defer_type);
+ return 0;
+}
+#else
+STATIC int
+xlog_recover_rtefi_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+}
+#endif
+
+const struct xlog_recover_item_ops xlog_rtefi_item_ops = {
+ .item_type = XFS_LI_EFI_RT,
+ .commit_pass2 = xlog_recover_rtefi_commit_pass2,
+};
+
/*
* This routine is called when an EFD format structure is found in a committed
* transaction in the log. Its purpose is to cancel the corresponding EFI if it
@@ -793,3 +960,44 @@ const struct xlog_recover_item_ops xlog_efd_item_ops = {
.item_type = XFS_LI_EFD,
.commit_pass2 = xlog_recover_efd_commit_pass2,
};
+
+#ifdef CONFIG_XFS_RT
+STATIC int
+xlog_recover_rtefd_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_efd_log_format *efd_formatp;
+ int buflen = item->ri_buf[0].i_len;
+
+ efd_formatp = item->ri_buf[0].i_addr;
+
+ if (buflen < sizeof(struct xfs_efd_log_format)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
+
+ if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof(
+ efd_formatp->efd_nextents) &&
+ item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof(
+ efd_formatp->efd_nextents)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
+
+ xlog_recover_release_intent(log, XFS_LI_EFI_RT,
+ efd_formatp->efd_efi_id);
+ return 0;
+}
+#else
+# define xlog_recover_rtefd_commit_pass2 xlog_recover_rtefi_commit_pass2
+#endif
+
+const struct xlog_recover_item_ops xlog_rtefd_item_ops = {
+ .item_type = XFS_LI_EFD_RT,
+ .commit_pass2 = xlog_recover_rtefd_commit_pass2,
+};