From 65adc27375a85beb57c3869cedb2a410fad9c288 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Tue, 12 Dec 2017 11:37:15 -0600 Subject: gfs2: Add a next-resource-group pointer to resource groups Add a new rg_skip field to struct gfs2_rgrp, replacing __pad. The rg_skip field has the following meaning: - If rg_skip is zero, it is considered unset and not useful. - If rg_skip is non-zero, its value will be the number of blocks between this rgrp's address and the next rgrp's address. This can be used as a hint by fsck.gfs2 when rebuilding a bad rindex, for example. This will provide less dependency on the rindex in future, and allow tools such as fsck.gfs2 to iterate the resource groups without keeping the rindex around. The field is updated in gfs2_rgrp_out() so that existing file systems will have it set. This means that any resource groups that aren't ever written will not be updated. The final rgrp is a special case as there is no next rgrp, so it will always have a rg_skip of 0 (unless the fs is extended). Before this patch, gfs2_rgrp_out() zeroes the __pad field explicitly, so the rg_skip field can get set back to 0 in cases where nodes with and without this patch are mixed in a cluster. In some cases, the field may bounce between being set by one node and then zeroed by another which may harm performance slightly, e.g. when two nodes create many small files. In testing this situation is rare but it becomes more likely as the filesystem fills up and there are fewer resource groups to choose from. The problem goes away when all nodes are running with this patch. Dipping into the space currently occupied by the rg_reserved field would have resulted in the same problem as it is also explicitly zeroed, so unfortunately there is no other way around it. Signed-off-by: Andrew Price Signed-off-by: Bob Peterson --- include/uapi/linux/gfs2_ondisk.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h index 5156bad77b47..da7a30ddef72 100644 --- a/include/uapi/linux/gfs2_ondisk.h +++ b/include/uapi/linux/gfs2_ondisk.h @@ -187,7 +187,10 @@ struct gfs2_rgrp { __be32 rg_flags; __be32 rg_free; __be32 rg_dinodes; - __be32 __pad; + union { + __be32 __pad; + __be32 rg_skip; /* Distance to the next rgrp in fs blocks */ + }; __be64 rg_igeneration; __u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */ -- cgit v1.2.3 From 166725d96322473305e35f9d580591a01697ab29 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Tue, 12 Dec 2017 11:40:05 -0600 Subject: gfs2: Add rindex fields to rgrp headers Add rg_data0, rg_data and rg_bitbytes to struct gfs2_rgrp. The fields are identical to their counterparts in struct gfs2_rindex and are intended to reduce the use of the rindex. For now the fields are only written back as the in-memory equivalents in struct gfs2_rgrpd are set using values from the rindex. However, they are needed at this point so that userspace can make use of them, allowing a migration away from the rindex over time. The new fields take up previously reserved space which was explicitly zeroed on write so, in clusters with mixed kernels, these fields could get zeroed after being set and this should not be treated as an error. Signed-off-by: Andrew Price Signed-off-by: Bob Peterson --- include/uapi/linux/gfs2_ondisk.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h index da7a30ddef72..648e0cbca574 100644 --- a/include/uapi/linux/gfs2_ondisk.h +++ b/include/uapi/linux/gfs2_ondisk.h @@ -192,8 +192,13 @@ struct gfs2_rgrp { __be32 rg_skip; /* Distance to the next rgrp in fs blocks */ }; __be64 rg_igeneration; + /* The following 3 fields are duplicated from gfs2_rindex to reduce + reliance on the rindex */ + __be64 rg_data0; /* First data location */ + __be32 rg_data; /* Number of data blocks in rgrp */ + __be32 rg_bitbytes; /* Number of bytes in data bitmaps */ - __u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */ + __u8 rg_reserved[64]; /* Several fields from gfs1 now reserved */ }; /* -- cgit v1.2.3 From 850d2d915fa69011bef9bd668499cce889fdd8b3 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Tue, 12 Dec 2017 11:42:30 -0600 Subject: gfs2: Add a crc field to resource group headers Add the rg_crc field to store a crc32 of the gfs2_rgrp structure. This allows us to check resource group headers' integrity and removes the requirement to check them against the rindex entries in fsck. If this field is found to be zero, it should be ignored (or updated with an accurate value). Signed-off-by: Andrew Price Signed-off-by: Bob Peterson --- include/uapi/linux/gfs2_ondisk.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h index 648e0cbca574..09f0920f07e9 100644 --- a/include/uapi/linux/gfs2_ondisk.h +++ b/include/uapi/linux/gfs2_ondisk.h @@ -197,8 +197,9 @@ struct gfs2_rgrp { __be64 rg_data0; /* First data location */ __be32 rg_data; /* Number of data blocks in rgrp */ __be32 rg_bitbytes; /* Number of bytes in data bitmaps */ + __be32 rg_crc; /* crc32 of the structure with this field 0 */ - __u8 rg_reserved[64]; /* Several fields from gfs1 now reserved */ + __u8 rg_reserved[60]; /* Several fields from gfs1 now reserved */ }; /* -- cgit v1.2.3 From c1696fb85d33194cf65c7ebfc82a75696299c3a3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 17 Jan 2018 00:01:33 +0100 Subject: GFS2: Introduce new gfs2_log_header_v2 This patch adds a new structure called gfs2_log_header_v2 which is used to store expanded fields into previously unused areas of the log headers (i.e., this change is backwards compatible). Some of these are used for debug purposes so we can backtrack when problems occur. Others are reserved for future expansion. This patch is based on a prototype from Steve Whitehouse. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- include/uapi/linux/gfs2_ondisk.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h index 09f0920f07e9..9a81d520f54a 100644 --- a/include/uapi/linux/gfs2_ondisk.h +++ b/include/uapi/linux/gfs2_ondisk.h @@ -403,7 +403,15 @@ struct gfs2_ea_header { * Log header structure */ -#define GFS2_LOG_HEAD_UNMOUNT 0x00000001 /* log is clean */ +#define GFS2_LOG_HEAD_UNMOUNT 0x00000001 /* log is clean */ +#define GFS2_LOG_HEAD_FLUSH_NORMAL 0x00000002 /* normal log flush */ +#define GFS2_LOG_HEAD_FLUSH_SYNC 0x00000004 /* Sync log flush */ +#define GFS2_LOG_HEAD_FLUSH_SHUTDOWN 0x00000008 /* Shutdown log flush */ +#define GFS2_LOG_HEAD_FLUSH_FREEZE 0x00000010 /* Freeze flush */ +#define GFS2_LOG_HEAD_RECOVERY 0x00000020 /* Journal recovery */ +#define GFS2_LOG_HEAD_USERSPACE 0x80000000 /* Written by gfs2-utils */ + +#define LH_V1_SIZE (offsetofend(struct gfs2_log_header, lh_hash)) struct gfs2_log_header { struct gfs2_meta_header lh_header; @@ -412,7 +420,21 @@ struct gfs2_log_header { __be32 lh_flags; /* GFS2_LOG_HEAD_... */ __be32 lh_tail; /* Block number of log tail */ __be32 lh_blkno; - __be32 lh_hash; + __be32 lh_hash; /* crc up to here with this field 0 */ + + /* Version 2 additional fields start here */ + __be32 lh_crc; /* crc32c from lh_nsec to end of block */ + __be32 lh_nsec; /* Nanoseconds of timestamp */ + __be64 lh_sec; /* Seconds of timestamp */ + __be64 lh_addr; /* Block addr of this log header (absolute) */ + __be64 lh_jinode; /* Journal inode number */ + __be64 lh_statfs_addr; /* Local statfs inode number */ + __be64 lh_quota_addr; /* Local quota change inode number */ + + /* Statfs local changes (i.e. diff from global statfs) */ + __be64 lh_local_total; + __be64 lh_local_free; + __be64 lh_local_dinodes; }; /* -- cgit v1.2.3 From 805c090750a315c5443c14e06304e19a01c697a0 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 8 Jan 2018 10:34:17 -0500 Subject: GFS2: Log the reason for log flushes in every log header This patch just adds the capability for GFS2 to track which function called gfs2_log_flush. This should make it easier to diagnose problems based on the sequence of events found in the journals. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- include/uapi/linux/gfs2_ondisk.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h index 9a81d520f54a..2dc10a034de1 100644 --- a/include/uapi/linux/gfs2_ondisk.h +++ b/include/uapi/linux/gfs2_ondisk.h @@ -411,6 +411,27 @@ struct gfs2_ea_header { #define GFS2_LOG_HEAD_RECOVERY 0x00000020 /* Journal recovery */ #define GFS2_LOG_HEAD_USERSPACE 0x80000000 /* Written by gfs2-utils */ +/* Log flush callers */ +#define GFS2_LFC_SHUTDOWN 0x00000100 +#define GFS2_LFC_JDATA_WPAGES 0x00000200 +#define GFS2_LFC_SET_FLAGS 0x00000400 +#define GFS2_LFC_AIL_EMPTY_GL 0x00000800 +#define GFS2_LFC_AIL_FLUSH 0x00001000 +#define GFS2_LFC_RGRP_GO_SYNC 0x00002000 +#define GFS2_LFC_INODE_GO_SYNC 0x00004000 +#define GFS2_LFC_INODE_GO_INVAL 0x00008000 +#define GFS2_LFC_FREEZE_GO_SYNC 0x00010000 +#define GFS2_LFC_KILL_SB 0x00020000 +#define GFS2_LFC_DO_SYNC 0x00040000 +#define GFS2_LFC_INPLACE_RESERVE 0x00080000 +#define GFS2_LFC_WRITE_INODE 0x00100000 +#define GFS2_LFC_MAKE_FS_RO 0x00200000 +#define GFS2_LFC_SYNC_FS 0x00400000 +#define GFS2_LFC_EVICT_INODE 0x00800000 +#define GFS2_LFC_TRANS_END 0x01000000 +#define GFS2_LFC_LOGD_JFLUSH_REQD 0x02000000 +#define GFS2_LFC_LOGD_AIL_FLUSH_REQD 0x04000000 + #define LH_V1_SIZE (offsetofend(struct gfs2_log_header, lh_hash)) struct gfs2_log_header { -- cgit v1.2.3