From ee1293308e01d359688243d665138f35a6f1f9b8 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 14 Sep 2023 09:06:56 -0700 Subject: btrfs: add raid stripe tree definitions Add definitions for the raid stripe tree. This tree will hold information about the on-disk layout of the stripes in a RAID set. Each stripe extent has a 1:1 relationship with an on-disk extent item and is doing the logical to per-drive physical address translation for the extent item in question. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index fc3c32186d7e..ca65d7b7a6ca 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -73,6 +73,9 @@ /* Holds the block group items for extent tree v2. */ #define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL +/* Tracks RAID stripes in block groups. */ +#define BTRFS_RAID_STRIPE_TREE_OBJECTID 12ULL + /* device stats in the device tree */ #define BTRFS_DEV_STATS_OBJECTID 0ULL @@ -261,6 +264,8 @@ #define BTRFS_DEV_ITEM_KEY 216 #define BTRFS_CHUNK_ITEM_KEY 228 +#define BTRFS_RAID_STRIPE_KEY 230 + /* * Records the overall state of the qgroups. * There's only one instance of this key present, @@ -719,6 +724,30 @@ struct btrfs_free_space_header { __le64 num_bitmaps; } __attribute__ ((__packed__)); +struct btrfs_raid_stride { + /* The id of device this raid extent lives on. */ + __le64 devid; + /* The physical location on disk. */ + __le64 physical; +} __attribute__ ((__packed__)); + +/* The stripe_extent::encoding, 1:1 mapping of enum btrfs_raid_types. */ +#define BTRFS_STRIPE_RAID0 1 +#define BTRFS_STRIPE_RAID1 2 +#define BTRFS_STRIPE_DUP 3 +#define BTRFS_STRIPE_RAID10 4 +#define BTRFS_STRIPE_RAID5 5 +#define BTRFS_STRIPE_RAID6 6 +#define BTRFS_STRIPE_RAID1C3 7 +#define BTRFS_STRIPE_RAID1C4 8 + +struct btrfs_stripe_extent { + __u8 encoding; + __u8 reserved[7]; + /* An array of raid strides this stripe is composed of. */ + struct btrfs_raid_stride strides[]; +} __attribute__ ((__packed__)); + #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) -- cgit v1.2.3 From 515020900d447796bc2f0f57064663617a11b65d Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 14 Sep 2023 09:06:57 -0700 Subject: btrfs: read raid stripe tree from disk If we find the raid-stripe-tree on mount, read it from disk. This is a backward incompatible feature. The rescue=ignorebadroots mount option will skip this tree. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index dbb8b96da50d..b9a1d9af8ae8 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -333,6 +333,7 @@ struct btrfs_ioctl_fs_info_args { #define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11) #define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12) #define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13) +#define BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE (1ULL << 14) struct btrfs_ioctl_feature_flags { __u64 compat_flags; -- cgit v1.2.3 From 182940f4f4dbd932776414744c8de64333957725 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Tue, 16 May 2023 16:35:45 -0700 Subject: btrfs: qgroup: add new quota mode for simple quotas Add a new quota mode called "simple quotas". It can be enabled by the existing quota enable ioctl via a new command, and sets an incompat bit, as the implementation of simple quotas will make backwards incompatible changes to the disk format of the extent tree. Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 2 ++ include/uapi/linux/btrfs_tree.h | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index b9a1d9af8ae8..7c29d82db9ee 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -334,6 +334,7 @@ struct btrfs_ioctl_fs_info_args { #define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12) #define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13) #define BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE (1ULL << 14) +#define BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA (1ULL << 16) struct btrfs_ioctl_feature_flags { __u64 compat_flags; @@ -754,6 +755,7 @@ struct btrfs_ioctl_get_dev_stats { #define BTRFS_QUOTA_CTL_ENABLE 1 #define BTRFS_QUOTA_CTL_DISABLE 2 #define BTRFS_QUOTA_CTL_RESCAN__NOTUSED 3 +#define BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA 4 struct btrfs_ioctl_quota_ctl_args { __u64 cmd; __u64 status; diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index ca65d7b7a6ca..a1d5347f6adb 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -1233,9 +1233,17 @@ static inline __u16 btrfs_qgroup_level(__u64 qgroupid) */ #define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2) +/* + * Whether or not this filesystem is using simple quotas. Not exactly the + * incompat bit, because we support using simple quotas, disabling it, then + * going back to full qgroup quotas. + */ +#define BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE (1ULL << 3) + #define BTRFS_QGROUP_STATUS_FLAGS_MASK (BTRFS_QGROUP_STATUS_FLAG_ON | \ BTRFS_QGROUP_STATUS_FLAG_RESCAN | \ - BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT | \ + BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE) #define BTRFS_QGROUP_STATUS_VERSION 1 -- cgit v1.2.3 From d9a620f77e33f2b0e9a5f131f3ee3c66d3285c57 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Mon, 30 Jan 2023 14:45:55 -0800 Subject: btrfs: new inline ref storing owning subvol of data extents In order to implement simple quota groups, we need to be able to associate a data extent with the subvolume that created it. Once you account for reflink, this information cannot be recovered without explicitly storing it. Options for storing it are: - a new key/item - a new extent inline ref item The former is backwards compatible, but wastes space, the latter is incompat, but is efficient in space and reuses the existing inline ref machinery, while only abusing it a tiny amount -- specifically, the new item is not a ref, per-se. Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index a1d5347f6adb..8b4def6aee52 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -233,6 +233,14 @@ #define BTRFS_SHARED_DATA_REF_KEY 184 +/* + * Special inline ref key which stores the id of the subvolume which originally + * created the extent. This subvolume owns the extent permanently from the + * perspective of simple quotas. Needed to know which subvolume to free quota + * usage from when the extent is deleted. + */ +#define BTRFS_EXTENT_OWNER_REF_KEY 188 + /* * block groups give us hints into the extent allocation trees. Which * blocks are free etc etc @@ -816,6 +824,10 @@ struct btrfs_shared_data_ref { __le32 count; } __attribute__ ((__packed__)); +struct btrfs_extent_owner_ref { + __le64 root_id; +} __attribute__ ((__packed__)); + struct btrfs_extent_inline_ref { __u8 type; __le64 offset; -- cgit v1.2.3 From bd7c1ea3a302aba727a1ced9937ec84c6407724e Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Tue, 28 Mar 2023 15:45:20 -0700 Subject: btrfs: qgroup: check generation when recording simple quota delta Simple quotas count extents only from the moment the feature is enabled. Therefore, if we do something like: 1. create subvol S 2. write F in S 3. enable quotas 4. remove F 5. write G in S then after 3. and 4. we would expect the simple quota usage of S to be 0 (putting aside some metadata extents that might be written) and after 5., it should be the size of G plus metadata. Therefore, we need to be able to determine whether a particular quota delta we are processing predates simple quota enablement. To do this, store the transaction id when quotas were enabled. In fs_info for immediate use and in the quota status item to make it recoverable on mount. When we see a delta, check if the generation of the extent item is less than that of quota enablement. If so, we should ignore the delta from this extent. Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 8b4def6aee52..c25fc9614594 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -1277,6 +1277,15 @@ struct btrfs_qgroup_status_item { * of the scan. It contains a logical address */ __le64 rescan; + + /* + * The generation when quotas were last enabled. Used by simple quotas to + * avoid decrementing when freeing an extent that was written before + * enable. + * + * Set only if flags contain BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE. + */ + __le64 enable_gen; } __attribute__ ((__packed__)); struct btrfs_qgroup_info_item { -- cgit v1.2.3