From afc7cbca5bfd556c3e12d3acefbee5ab0cbd4670 Mon Sep 17 00:00:00 2001 From: Takashi Sato Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Support large blocksize up to PAGESIZE This patch set supports large block size(>4k, <=64k) in ext4, just enlarging the block size limit. But it is NOT possible to have 64kB blocksize on ext4 without some changes to the directory handling code. The reason is that an empty 64kB directory block would have a rec_len == (__u16)2^16 == 0, and this would cause an error to be hit in the filesystem. The proposed solution is treat 64k rec_len with a an impossible value like rec_len = 0xffff to handle this. The Patch-set consists of the following 2 patches. [1/2] ext4: enlarge blocksize - Allow blocksize up to pagesize [2/2] ext4: fix rec_len overflow - prevent rec_len from overflow with 64KB blocksize Now on 64k page ppc64 box runs with this patch set we could create a 64k block size ext4dev, and able to handle empty directory block. Signed-off-by: Takashi Sato Signed-off-by: Mingming Cao --- include/linux/ext4_fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 97dd409d5f4a..dfe4487fc739 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -73,8 +73,8 @@ * Macro-instructions used to manage several block sizes */ #define EXT4_MIN_BLOCK_SIZE 1024 -#define EXT4_MAX_BLOCK_SIZE 4096 -#define EXT4_MIN_BLOCK_LOG_SIZE 10 +#define EXT4_MAX_BLOCK_SIZE 65536 +#define EXT4_MIN_BLOCK_LOG_SIZE 10 #ifdef __KERNEL__ # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) #else -- cgit v1.2.3 From a72d7f834e1afa08421938d7eb06bd8e56b0e58c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Avoid rec_len overflow with 64KB block size With 64KB blocksize, a directory entry can have size 64KB which does not fit into 16 bits we have for entry lenght. So we store 0xffff instead and convert value when read from / written to disk. The patch also converts some places to use ext4_next_entry() when we are changing them anyway. Signed-off-by: Jan Kara Signed-off-by: Mingming Cao --- include/linux/ext4_fs.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index dfe4487fc739..fb31c1aba989 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -767,6 +767,26 @@ struct ext4_dir_entry_2 { #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) #define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ ~EXT4_DIR_ROUND) +#define EXT4_MAX_REC_LEN ((1<<16)-1) + +static inline unsigned ext4_rec_len_from_disk(__le16 dlen) +{ + unsigned len = le16_to_cpu(dlen); + + if (len == EXT4_MAX_REC_LEN) + return 1 << 16; + return len; +} + +static inline __le16 ext4_rec_len_to_disk(unsigned len) +{ + if (len == (1 << 16)) + return cpu_to_le16(EXT4_MAX_REC_LEN); + else if (len > (1 << 16)) + BUG(); + return cpu_to_le16(len); +} + /* * Hash Tree Directory indexing * (c) Daniel Phillips, 2001 -- cgit v1.2.3 From 725d26d3f09ccb5bac4b4293096b985a312a0d67 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Introduce ext4_lblk_t This patch adds a new data type ext4_lblk_t to represent the logical file blocks. This is the preparatory patch to support large files in ext4 The follow up patch with convert the ext4_inode i_blocks to represent the number of blocks in file system block size. This changes makes it possible to have a block number 2**32 -1 which will result in overflow if the block number is represented by signed long. This patch convert all the block number to type ext4_lblk_t which is typedef to __u32 Also remove dead code ext4_ext_walk_space Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: Eric Sandeen --- include/linux/ext4_fs.h | 29 +++++++++++++++++++---------- include/linux/ext4_fs_extents.h | 19 ++----------------- include/linux/ext4_fs_i.h | 9 ++++++--- 3 files changed, 27 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index fb31c1aba989..5e2da0974d16 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -935,11 +935,14 @@ extern unsigned long ext4_count_free (struct buffer_head *, unsigned); /* inode.c */ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr); -struct buffer_head * ext4_getblk (handle_t *, struct inode *, long, int, int *); -struct buffer_head * ext4_bread (handle_t *, struct inode *, int, int, int *); +struct buffer_head *ext4_getblk(handle_t *, struct inode *, + ext4_lblk_t, int, int *); +struct buffer_head *ext4_bread(handle_t *, struct inode *, + ext4_lblk_t, int, int *); int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, - sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, - int create, int extend_disksize); + ext4_lblk_t iblock, unsigned long maxblocks, + struct buffer_head *bh_result, + int create, int extend_disksize); extern void ext4_read_inode (struct inode *); extern int ext4_write_inode (struct inode *, int); @@ -1068,7 +1071,7 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_writepage_trans_blocks(struct inode *, int); extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t iblock, + ext4_lblk_t iblock, unsigned long max_blocks, struct buffer_head *bh_result, int create, int extend_disksize); extern void ext4_ext_truncate(struct inode *, struct page *); @@ -1081,11 +1084,17 @@ ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, unsigned long max_blocks, struct buffer_head *bh, int create, int extend_disksize) { - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) - return ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, create, extend_disksize); - return ext4_get_blocks_handle(handle, inode, block, max_blocks, bh, - create, extend_disksize); + int retval; + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { + retval = ext4_ext_get_blocks(handle, inode, + (ext4_lblk_t)block, max_blocks, + bh, create, extend_disksize); + } else { + retval = ext4_get_blocks_handle(handle, inode, + (ext4_lblk_t)block, max_blocks, + bh, create, extend_disksize); + } + return retval; } diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index d2045a26195d..023683b9d883 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -124,20 +124,6 @@ struct ext4_ext_path { #define EXT4_EXT_CACHE_GAP 1 #define EXT4_EXT_CACHE_EXTENT 2 -/* - * to be called by ext4_ext_walk_space() - * negative retcode - error - * positive retcode - signal for ext4_ext_walk_space(), see below - * callback must return valid extent (passed or newly created) - */ -typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, - struct ext4_ext_cache *, - void *); - -#define EXT_CONTINUE 0 -#define EXT_BREAK 1 -#define EXT_REPEAT 2 - #define EXT_MAX_BLOCK 0xffffffff @@ -233,8 +219,7 @@ extern int ext4_ext_try_to_merge(struct inode *inode, struct ext4_extent *); extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); -extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *); -extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *); - +extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, + struct ext4_ext_path *); #endif /* _LINUX_EXT4_EXTENTS */ diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h index 86ddfe2089f3..6c610b67b047 100644 --- a/include/linux/ext4_fs_i.h +++ b/include/linux/ext4_fs_i.h @@ -27,6 +27,9 @@ typedef int ext4_grpblk_t; /* data type for filesystem-wide blocks number */ typedef unsigned long long ext4_fsblk_t; +/* data type for file logical block number */ +typedef __u32 ext4_lblk_t; + struct ext4_reserve_window { ext4_fsblk_t _rsv_start; /* First byte reserved */ ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ @@ -48,7 +51,7 @@ struct ext4_block_alloc_info { * most-recently-allocated block in this file. * We use this for detecting linearly ascending allocation requests. */ - __u32 last_alloc_logical_block; + ext4_lblk_t last_alloc_logical_block; /* * Was i_next_alloc_goal in ext4_inode_info * is the *physical* companion to i_next_alloc_block. @@ -67,7 +70,7 @@ struct ext4_block_alloc_info { */ struct ext4_ext_cache { ext4_fsblk_t ec_start; - __u32 ec_block; + ext4_lblk_t ec_block; __u32 ec_len; /* must be 32bit to return holes */ __u32 ec_type; }; @@ -95,7 +98,7 @@ struct ext4_inode_info { /* block reservation info */ struct ext4_block_alloc_info *i_block_alloc_info; - __u32 i_dir_start_lookup; + ext4_lblk_t i_dir_start_lookup; #ifdef CONFIG_EXT4DEV_FS_XATTR /* * Extended attributes can be read independently of the main file -- cgit v1.2.3 From fd2d42912f9f09e5250cb3b024ee0625704e9cb7 Mon Sep 17 00:00:00 2001 From: Avantika Mathur Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: add ext4_group_t, and change all group variables to this type. In many places variables for block group are of type int, which limits the maximum number of block groups to 2^31. Each block group can have up to 2^15 blocks, with a 4K block size, and the max filesystem size is limited to 2^31 * (2^15 * 2^12) = 2^58 -- or 256 PB This patch introduces a new type ext4_group_t, of type unsigned long, to represent block group numbers in ext4. All occurrences of block group variables are converted to type ext4_group_t. Signed-off-by: Avantika Mathur --- include/linux/ext4_fs.h | 11 ++++++----- include/linux/ext4_fs_i.h | 5 ++++- include/linux/ext4_fs_sb.h | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 5e2da0974d16..e1103c2a0d42 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -830,7 +830,7 @@ struct ext4_iloc { struct buffer_head *bh; unsigned long offset; - unsigned long block_group; + ext4_group_t block_group; }; static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) @@ -855,7 +855,7 @@ struct dir_private_info { /* calculate the first block number of the group */ static inline ext4_fsblk_t -ext4_group_first_block_no(struct super_block *sb, unsigned long group_no) +ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) { return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); @@ -886,8 +886,9 @@ extern unsigned int ext4_block_group(struct super_block *sb, ext4_fsblk_t blocknr); extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, ext4_fsblk_t blocknr); -extern int ext4_bg_has_super(struct super_block *sb, int group); -extern unsigned long ext4_bg_num_gdb(struct super_block *sb, int group); +extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); +extern unsigned long ext4_bg_num_gdb(struct super_block *sb, + ext4_group_t group); extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, ext4_fsblk_t goal, int *errp); extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, @@ -900,7 +901,7 @@ extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); extern void ext4_check_blocks_bitmap (struct super_block *); extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, - unsigned int block_group, + ext4_group_t block_group, struct buffer_head ** bh); extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); extern void ext4_init_block_alloc_info(struct inode *); diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h index 6c610b67b047..2b4e3700c725 100644 --- a/include/linux/ext4_fs_i.h +++ b/include/linux/ext4_fs_i.h @@ -30,6 +30,9 @@ typedef unsigned long long ext4_fsblk_t; /* data type for file logical block number */ typedef __u32 ext4_lblk_t; +/* data type for block group number */ +typedef unsigned long ext4_group_t; + struct ext4_reserve_window { ext4_fsblk_t _rsv_start; /* First byte reserved */ ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ @@ -92,7 +95,7 @@ struct ext4_inode_info { * place a file's data blocks near its inode block, and new inodes * near to their parent directory's inode. */ - __u32 i_block_group; + ext4_group_t i_block_group; __u32 i_state; /* Dynamic state flags for ext4 */ /* block reservation info */ diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h index b40e827cd495..f15821c5e4c4 100644 --- a/include/linux/ext4_fs_sb.h +++ b/include/linux/ext4_fs_sb.h @@ -35,7 +35,7 @@ struct ext4_sb_info { unsigned long s_itb_per_group; /* Number of inode table blocks per group */ unsigned long s_gdb_count; /* Number of group descriptor blocks */ unsigned long s_desc_per_block; /* Number of group descriptors per block */ - unsigned long s_groups_count; /* Number of groups in the fs */ + ext4_group_t s_groups_count; /* Number of groups in the fs */ unsigned long s_overhead_last; /* Last calculated overhead */ unsigned long s_blocks_last; /* Last seen block count */ struct buffer_head * s_sbh; /* Buffer containing the super block */ -- cgit v1.2.3 From 99e6f829a854daa6d56006cad51156e98863e73a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Introduce ext4_update_*_feature Introduce ext4_update_*_feature and use them instead of opencoding. Signed-off-by: Aneesh Kumar K.V --- include/linux/ext4_fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index e1103c2a0d42..429dbfc851ea 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -989,6 +989,12 @@ extern void ext4_abort (struct super_block *, const char *, const char *, ...) extern void ext4_warning (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); extern void ext4_update_dynamic_rev (struct super_block *sb); +extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, + __u32 compat); +extern int ext4_update_rocompat_feature(handle_t *handle, + struct super_block *sb, __u32 rocompat); +extern int ext4_update_incompat_feature(handle_t *handle, + struct super_block *sb, __u32 incompat); extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg); extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, -- cgit v1.2.3 From 1d03ec984ca41ba184822d1101babb3fa3e26c77 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Fix sparse warnings. Fix sparse warnings related to static functions and local variables. Signed-off-by: Aneesh Kumar K.V --- include/linux/ext4_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 429dbfc851ea..1a27433b159a 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -893,6 +893,8 @@ extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, ext4_fsblk_t goal, int *errp); extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, ext4_fsblk_t goal, unsigned long *count, int *errp); +extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp); extern void ext4_free_blocks (handle_t *handle, struct inode *inode, ext4_fsblk_t block, unsigned long count); extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, -- cgit v1.2.3 From 7973c0c19ecba92f113488045005f8e7ce1cd7c8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Rename i_file_acl to i_file_acl_lo Rename i_file_acl to i_file_acl_lo. This helps in finding bugs where we use i_file_acl instead of the combined i_file_acl_lo and i_file_acl_high Signed-off-by: Aneesh Kumar K.V --- include/linux/ext4_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 1a27433b159a..6894f361d01d 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -297,7 +297,7 @@ struct ext4_inode { } osd1; /* OS dependent 1 */ __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ __le32 i_generation; /* File version (for NFS) */ - __le32 i_file_acl; /* File ACL */ + __le32 i_file_acl_lo; /* File ACL */ __le32 i_dir_acl; /* Directory ACL */ __le32 i_obso_faddr; /* Obsoleted fragment address */ union { -- cgit v1.2.3 From a48380f769dfed6163fb82a68b13bd562ea1e027 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Rename i_dir_acl to i_size_high Rename ext4_inode.i_dir_acl to i_size_high drop ext4_inode_info.i_dir_acl as it is not used Rename ext4_inode.i_size to ext4_inode.i_size_lo Add helper function for accessing the ext4_inode combined i_size. Signed-off-by: Aneesh Kumar K.V --- include/linux/ext4_fs.h | 15 ++++++++++++--- include/linux/ext4_fs_i.h | 1 - 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 6894f361d01d..a8f3faea8eff 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -275,7 +275,7 @@ struct ext4_mount_options { struct ext4_inode { __le16 i_mode; /* File mode */ __le16 i_uid; /* Low 16 bits of Owner Uid */ - __le32 i_size; /* Size in bytes */ + __le32 i_size_lo; /* Size in bytes */ __le32 i_atime; /* Access time */ __le32 i_ctime; /* Inode Change time */ __le32 i_mtime; /* Modification time */ @@ -298,7 +298,7 @@ struct ext4_inode { __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ __le32 i_generation; /* File version (for NFS) */ __le32 i_file_acl_lo; /* File ACL */ - __le32 i_dir_acl; /* Directory ACL */ + __le32 i_size_high; __le32 i_obso_faddr; /* Obsoleted fragment address */ union { struct { @@ -330,7 +330,6 @@ struct ext4_inode { __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ }; -#define i_size_high i_dir_acl #define EXT4_EPOCH_BITS 2 #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) @@ -1049,7 +1048,17 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); } +static inline loff_t ext4_isize(struct ext4_inode *raw_inode) +{ + return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | + le32_to_cpu(raw_inode->i_size_lo); +} +static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) +{ + raw_inode->i_size_lo = cpu_to_le32(i_size); + raw_inode->i_size_high = cpu_to_le32(i_size >> 32); +} #define ext4_std_error(sb, errno) \ do { \ diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h index 2b4e3700c725..f1cd4934e46f 100644 --- a/include/linux/ext4_fs_i.h +++ b/include/linux/ext4_fs_i.h @@ -85,7 +85,6 @@ struct ext4_inode_info { __le32 i_data[15]; /* unconverted */ __u32 i_flags; ext4_fsblk_t i_file_acl; - __u32 i_dir_acl; __u32 i_dtime; /* -- cgit v1.2.3 From 0fc1b451471dfc3cabd6e99ef441df9804616e63 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:26 -0500 Subject: ext4: Add support for 48 bit inode i_blocks. Use the __le16 l_i_reserved1 field of the linux2 struct of ext4_inode to represet the higher 16 bits for i_blocks. With this change max_file size becomes (2**48 -1 )* 512 bytes. We add a RO_COMPAT feature to the super block to indicate that inode have i_blocks represented as a split 48 bits. Super block with this feature set cannot be mounted read write on a kernel with CONFIG_LSF disabled. Super block flag EXT4_FEATURE_RO_COMPAT_HUGE_FILE Signed-off-by: Aneesh Kumar K.V --- include/linux/ext4_fs.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index a8f3faea8eff..be25eca9c040 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -282,7 +282,7 @@ struct ext4_inode { __le32 i_dtime; /* Deletion Time */ __le16 i_gid; /* Low 16 bits of Group Id */ __le16 i_links_count; /* Links count */ - __le32 i_blocks; /* Blocks count */ + __le32 i_blocks_lo; /* Blocks count */ __le32 i_flags; /* File flags */ union { struct { @@ -302,7 +302,7 @@ struct ext4_inode { __le32 i_obso_faddr; /* Obsoleted fragment address */ union { struct { - __le16 l_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ + __le16 l_i_blocks_high; /* were l_i_reserved1 */ __le16 l_i_file_acl_high; __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ @@ -404,6 +404,7 @@ do { \ #if defined(__KERNEL__) || defined(__linux__) #define i_reserved1 osd1.linux1.l_i_reserved1 #define i_file_acl_high osd2.linux2.l_i_file_acl_high +#define i_blocks_high osd2.linux2.l_i_blocks_high #define i_uid_low i_uid #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high @@ -670,6 +671,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008 #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 @@ -681,6 +683,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 #define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 +#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR @@ -695,7 +698,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ - EXT4_FEATURE_RO_COMPAT_BTREE_DIR) + EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ + EXT4_FEATURE_RO_COMPAT_HUGE_FILE) /* * Default values for user and/or group using reserved blocks -- cgit v1.2.3 From 8180a5627d126362c2f64e4fa886d6f608d9632a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Support large files This patch converts ext4_inode i_blocks to represent total blocks occupied by the inode in file system block size. Earlier the variable used to represent this in 512 byte block size. This actually limited the total size of the file. The feature is enabled transparently when we write an inode whose i_blocks cannot be represnted as 512 byte units in a 48 bit variable. inode flag EXT4_HUGE_FILE_FL Signed-off-by: Aneesh Kumar K.V --- include/linux/ext4_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index be25eca9c040..6ae91f40aaa2 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -178,8 +178,9 @@ struct ext4_group_desc #define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ +#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ -- cgit v1.2.3 From e2b4657453c0d5571bd3c7256585c486ed42d364 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: store maxbytes for bitmapped files and return EFBIG as appropriate Calculate & store the max offset for bitmapped files, and catch too-large seeks, truncates, and writes in ext4, shortening or rejecting as appropriate. Signed-off-by: Eric Sandeen --- include/linux/ext4_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h index f15821c5e4c4..38a47ec06df9 100644 --- a/include/linux/ext4_fs_sb.h +++ b/include/linux/ext4_fs_sb.h @@ -38,6 +38,7 @@ struct ext4_sb_info { ext4_group_t s_groups_count; /* Number of groups in the fs */ unsigned long s_overhead_last; /* Last calculated overhead */ unsigned long s_blocks_last; /* Last seen block count */ + loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ struct buffer_head ** s_group_desc; -- cgit v1.2.3 From 91b51a018d7711b20e9e0bb14c3d790de4e310d4 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: sync up block group descriptor with e2fsprogs. This patch extends bg_itable_unused of ext4 group descriptor from 16bit into 32bit. In order to add bg_itable_unused_hi into struct ext4_group_desc, some extra fields which are already introduced into e2fsprogs are also added in for consistency. Signed-off-by: Coly Li Cc: Andreas Dilger Signed-off-by: Mingming Cao --- include/linux/ext4_fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 6ae91f40aaa2..55a376e7bca0 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -118,6 +118,11 @@ struct ext4_group_desc __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ __le32 bg_inode_table_hi; /* Inodes table block MSB */ + __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ + __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ + __le16 bg_used_dirs_count_hi; /* Directories count MSB */ + __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ + __u32 bg_reserved2[3]; }; #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ -- cgit v1.2.3 From 389d1b083c767a360ec84b27a95da06244becec8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:26 -0500 Subject: Add buffer head related helper functions Add buffer head related helper function bh_uptodate_or_lock and bh_submit_read which can be used by file system Signed-off-by: Aneesh Kumar K.V --- include/linux/buffer_head.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index da0d83fbadc0..e98801f06dcc 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -192,6 +192,8 @@ int sync_dirty_buffer(struct buffer_head *bh); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); +int bh_uptodate_or_lock(struct buffer_head *bh); +int bh_submit_read(struct buffer_head *bh); extern int buffer_heads_over_limit; -- cgit v1.2.3 From 36df53f4a3e445175fc1e9d7f433599482ec6d7f Mon Sep 17 00:00:00 2001 From: Chris Snook Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: jbd2: Remove printk from J_ASSERT to preserve registers during BUG Signed-off-by: Chris Snook Cc: "Stephen C. Tweedie" Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 06ef11457051..d5f7cff4cb28 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -256,17 +256,7 @@ typedef struct journal_superblock_s #include #include -#define JBD2_ASSERTIONS -#ifdef JBD2_ASSERTIONS -#define J_ASSERT(assert) \ -do { \ - if (!(assert)) { \ - printk (KERN_EMERG \ - "Assertion failure in %s() at %s:%d: \"%s\"\n", \ - __FUNCTION__, __FILE__, __LINE__, # assert); \ - BUG(); \ - } \ -} while (0) +#define J_ASSERT(assert) BUG_ON(!(assert)) #if defined(CONFIG_BUFFER_DEBUG) void buffer_assertion_failure(struct buffer_head *bh); @@ -282,10 +272,6 @@ void buffer_assertion_failure(struct buffer_head *bh); #define J_ASSERT_JH(jh, expr) J_ASSERT(expr) #endif -#else -#define J_ASSERT(assert) do { } while (0) -#endif /* JBD2_ASSERTIONS */ - #if defined(JBD2_PARANOID_IOFAIL) #define J_EXPECT(expr, why...) J_ASSERT(expr) #define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) -- cgit v1.2.3 From f5a7a6b0d9b6af7d46124ed3f6b3995225cb62d0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: jbd2: Fix assertion failure in fs/jbd2/checkpoint.c Before we start committing a transaction, we call __journal_clean_checkpoint_list() to cleanup transaction's written-back buffers. If this call happens to remove all of them (and there were already some buffers), __journal_remove_checkpoint() will decide to free the transaction because it isn't (yet) a committing transaction and soon we fail some assertion - the transaction really isn't ready to be freed :). We change the check in __journal_remove_checkpoint() to free only a transaction in T_FINISHED state. The locking there is subtle though (as everywhere in JBD ;(). We use j_list_lock to protect the check and a subsequent call to __journal_drop_transaction() and do the same in the end of journal_commit_transaction() which is the only place where a transaction can get to T_FINISHED state. Probably I'm too paranoid here and such locking is not really necessary - checkpoint lists are processed only from log_do_checkpoint() where a transaction must be already committed to be processed or from __journal_clean_checkpoint_list() where kjournald itself calls it and thus transaction cannot change state either. Better be safe if something changes in future... Signed-off-by: Jan Kara Cc: Signed-off-by: Andrew Morton --- include/linux/jbd2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d5f7cff4cb28..d861ffd49821 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -442,6 +442,8 @@ struct transaction_s /* * Transaction's current state * [no locking - only kjournald2 alters this] + * [j_list_lock] guards transition of a transaction into T_FINISHED + * state and subsequent call of __jbd2_journal_drop_transaction() * FIXME: needs barriers * KLUDGE: [use j_state_lock] */ -- cgit v1.2.3 From c278bfecebfb1ed67c326ef472660878baa745cd Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Make ext4_get_blocks_wrap take the truncate_mutex early. When doing a migrate from ext3 to ext4 inode we need to make sure the test for inode type and walking inode data happens inside lock. To make this happen move truncate_mutex early before checking the i_flags. This actually should enable us to remove the verify_chain(). Signed-off-by: Aneesh Kumar K.V --- include/linux/ext4_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 55a376e7bca0..583049c1d366 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -1113,6 +1113,7 @@ ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, int create, int extend_disksize) { int retval; + mutex_lock(&EXT4_I(inode)->truncate_mutex); if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { retval = ext4_ext_get_blocks(handle, inode, (ext4_lblk_t)block, max_blocks, @@ -1122,6 +1123,7 @@ ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, (ext4_lblk_t)block, max_blocks, bh, create, extend_disksize); } + mutex_unlock(&EXT4_I(inode)->truncate_mutex); return retval; } -- cgit v1.2.3 From 0e855ac8b103ef579052936b59fe7c599ac422a4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:26 -0500 Subject: ext4: Convert truncate_mutex to read write semaphore. We are currently taking the truncate_mutex for every read. This would have performance impact on large CPU configuration. Convert the lock to read write semaphore and take read lock when we are trying to read the file. Signed-off-by: Aneesh Kumar K.V --- include/linux/ext4_fs.h | 25 ++++--------------------- include/linux/ext4_fs_i.h | 6 +++--- 2 files changed, 7 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 583049c1d366..300cc5a5adb9 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -1107,27 +1107,10 @@ extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len); -static inline int -ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, - unsigned long max_blocks, struct buffer_head *bh, - int create, int extend_disksize) -{ - int retval; - mutex_lock(&EXT4_I(inode)->truncate_mutex); - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { - retval = ext4_ext_get_blocks(handle, inode, - (ext4_lblk_t)block, max_blocks, - bh, create, extend_disksize); - } else { - retval = ext4_get_blocks_handle(handle, inode, - (ext4_lblk_t)block, max_blocks, - bh, create, extend_disksize); - } - mutex_unlock(&EXT4_I(inode)->truncate_mutex); - return retval; -} - - +extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, + sector_t block, unsigned long max_blocks, + struct buffer_head *bh, int create, + int extend_disksize); #endif /* __KERNEL__ */ #endif /* _LINUX_EXT4_FS_H */ diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h index f1cd4934e46f..4377d249d378 100644 --- a/include/linux/ext4_fs_i.h +++ b/include/linux/ext4_fs_i.h @@ -139,16 +139,16 @@ struct ext4_inode_info { __u16 i_extra_isize; /* - * truncate_mutex is for serialising ext4_truncate() against + * i_data_sem is for serialising ext4_truncate() against * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's * data tree are chopped off during truncate. We can't do that in * ext4 because whenever we perform intermediate commits during * truncate, the inode and all the metadata blocks *must* be in a * consistent state which allows truncation of the orphans to restart * during recovery. Hence we must fix the get_block-vs-truncate race - * by other means, so we have truncate_mutex. + * by other means, so we have i_data_sem. */ - struct mutex truncate_mutex; + struct rw_semaphore i_data_sem; struct inode vfs_inode; unsigned long i_ext_generation; -- cgit v1.2.3 From 8e85fb3f305b24b79c6d9cb7a56d22b062335ad3 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: jbd2: jbd2 stats through procfs The patch below updates the jbd stats patch to 2.6.20/jbd2. The initial patch was posted by Alex Tomas in December 2005 (http://marc.info/?l=linux-ext4&m=113538565128617&w=2). It provides statistics via procfs such as transaction lifetime and size. Sometimes, investigating performance problems, i find useful to have stats from jbd about transaction's lifetime, size, etc. here is a patch for review and inclusion probably. for example, stats after creation of 3M files in htree directory: [root@bob ~]# cat /proc/fs/jbd/sda/history R/C tid wait run lock flush log hndls block inlog ctime write drop close R 261 8260 2720 0 0 750 9892 8170 8187 C 259 750 0 4885 1 R 262 20 2200 10 0 770 9836 8170 8187 R 263 30 2200 10 0 3070 9812 8170 8187 R 264 0 5000 10 0 1340 0 0 0 C 261 8240 3212 4957 0 R 265 8260 1470 0 0 4640 9854 8170 8187 R 266 0 5000 10 0 1460 0 0 0 C 262 8210 2989 4868 0 R 267 8230 1490 10 0 4440 9875 8171 8188 R 268 0 5000 10 0 1260 0 0 0 C 263 7710 2937 4908 0 R 269 7730 1470 10 0 3330 9841 8170 8187 R 270 0 5000 10 0 830 0 0 0 C 265 8140 3234 4898 0 C 267 720 0 4849 1 R 271 8630 2740 20 0 740 9819 8170 8187 C 269 800 0 4214 1 R 272 40 2170 10 0 830 9716 8170 8187 R 273 40 2280 0 0 3530 9799 8170 8187 R 274 0 5000 10 0 990 0 0 0 where, R - line for transaction's life from T_RUNNING to T_FINISHED C - line for transaction's checkpointing tid - transaction's id wait - for how long we were waiting for new transaction to start (the longest period journal_start() took in this transaction) run - real transaction's lifetime (from T_RUNNING to T_LOCKED lock - how long we were waiting for all handles to close (time the transaction was in T_LOCKED) flush - how long it took to flush all data (data=ordered) log - how long it took to write the transaction to the log hndls - how many handles got to the transaction block - how many blocks got to the transaction inlog - how many blocks are written to the log (block + descriptors) ctime - how long it took to checkpoint the transaction write - how many blocks have been written during checkpointing drop - how many blocks have been dropped during checkpointing close - how many running transactions have been closed to checkpoint this one all times are in msec. [root@bob ~]# cat /proc/fs/jbd/sda/info 280 transaction, each upto 8192 blocks average: 1633ms waiting for transaction 3616ms running transaction 5ms transaction was being locked 1ms flushing data (in ordered mode) 1799ms logging transaction 11781 handles per transaction 5629 blocks per transaction 5641 logged blocks per transaction Signed-off-by: Johann Lombardi Signed-off-by: Mariusz Kozlowski Signed-off-by: Mingming Cao Signed-off-by: Eric Sandeen --- include/linux/jbd2.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d861ffd49821..685640036e81 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -395,6 +395,16 @@ struct handle_s }; +/* + * Some stats for checkpoint phase + */ +struct transaction_chp_stats_s { + unsigned long cs_chp_time; + unsigned long cs_forced_to_close; + unsigned long cs_written; + unsigned long cs_dropped; +}; + /* The transaction_t type is the guts of the journaling mechanism. It * tracks a compound transaction through its various states: * @@ -531,6 +541,21 @@ struct transaction_s */ spinlock_t t_handle_lock; + /* + * Longest time some handle had to wait for running transaction + */ + unsigned long t_max_wait; + + /* + * When transaction started + */ + unsigned long t_start; + + /* + * Checkpointing stats [j_checkpoint_sem] + */ + struct transaction_chp_stats_s t_chp_stats; + /* * Number of outstanding updates running on this transaction * [t_handle_lock] @@ -562,6 +587,39 @@ struct transaction_s }; +struct transaction_run_stats_s { + unsigned long rs_wait; + unsigned long rs_running; + unsigned long rs_locked; + unsigned long rs_flushing; + unsigned long rs_logging; + + unsigned long rs_handle_count; + unsigned long rs_blocks; + unsigned long rs_blocks_logged; +}; + +struct transaction_stats_s { + int ts_type; + unsigned long ts_tid; + union { + struct transaction_run_stats_s run; + struct transaction_chp_stats_s chp; + } u; +}; + +#define JBD2_STATS_RUN 1 +#define JBD2_STATS_CHECKPOINT 2 + +static inline unsigned long +jbd2_time_diff(unsigned long start, unsigned long end) +{ + if (end >= start) + return end - start; + + return end + (MAX_JIFFY_OFFSET - start); +} + /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. @@ -623,6 +681,12 @@ struct transaction_s * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the * number that will fit in j_blocksize * @j_last_sync_writer: most recent pid which did a synchronous write + * @j_history: Buffer storing the transactions statistics history + * @j_history_max: Maximum number of transactions in the statistics history + * @j_history_cur: Current number of transactions in the statistics history + * @j_history_lock: Protect the transactions statistics history + * @j_proc_entry: procfs entry for the jbd statistics directory + * @j_stats: Overall statistics * @j_private: An opaque pointer to fs-private information. */ @@ -814,6 +878,19 @@ struct journal_s pid_t j_last_sync_writer; + /* + * Journal statistics + */ + struct transaction_stats_s *j_history; + int j_history_max; + int j_history_cur; + /* + * Protect the transactions statistics history + */ + spinlock_t j_history_lock; + struct proc_dir_entry *j_proc_entry; + struct transaction_stats_s j_stats; + /* * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here -- cgit v1.2.3 From 818d276ceb83aa9fdebb5e0a53188290312de987 Mon Sep 17 00:00:00 2001 From: Girish Shilamkar Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Add the journal checksum feature The journal checksum feature adds two new flags i.e JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT and JBD2_FEATURE_COMPAT_CHECKSUM. JBD2_FEATURE_CHECKSUM flag indicates that the commit block contains the checksum for the blocks described by the descriptor blocks. Due to checksums, writing of the commit record no longer needs to be synchronous. Now commit record can be sent to disk without waiting for descriptor blocks to be written to disk. This behavior is controlled using JBD2_FEATURE_ASYNC_COMMIT flag. Older kernels/e2fsck should not be able to recover the journal with _ASYNC_COMMIT hence it is made incompat. The commit header has been extended to hold the checksum along with the type of the checksum. For recovery in pass scan checksums are verified to ensure the sanity and completeness(in case of _ASYNC_COMMIT) of every transaction. Signed-off-by: Andreas Dilger Signed-off-by: Girish Shilamkar Signed-off-by: Dave Kleikamp Signed-off-by: Mingming Cao --- include/linux/ext4_fs.h | 3 ++- include/linux/jbd2.h | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 300cc5a5adb9..cd406dba0e64 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -467,7 +467,8 @@ do { \ #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ #define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ - +#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ +#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 685640036e81..98a2bc5d3e3f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -149,6 +149,28 @@ typedef struct journal_header_s __be32 h_sequence; } journal_header_t; +/* + * Checksum types. + */ +#define JBD2_CRC32_CHKSUM 1 +#define JBD2_MD5_CHKSUM 2 +#define JBD2_SHA1_CHKSUM 3 + +#define JBD2_CRC32_CHKSUM_SIZE 4 + +#define JBD2_CHECKSUM_BYTES (32 / sizeof(u32)) +/* + * Commit block header for storing transactional checksums: + */ +struct commit_header { + __be32 h_magic; + __be32 h_blocktype; + __be32 h_sequence; + unsigned char h_chksum_type; + unsigned char h_chksum_size; + unsigned char h_padding[2]; + __be32 h_chksum[JBD2_CHECKSUM_BYTES]; +}; /* * The block tag: used to describe a single buffer in the journal. @@ -242,14 +264,18 @@ typedef struct journal_superblock_s ((j)->j_format_version >= 2 && \ ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) -#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 -#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 +#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001 + +#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 +#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 +#define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 /* Features known to this kernel version: */ -#define JBD2_KNOWN_COMPAT_FEATURES 0 +#define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ - JBD2_FEATURE_INCOMPAT_64BIT) + JBD2_FEATURE_INCOMPAT_64BIT | \ + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) #ifdef __KERNEL__ @@ -997,6 +1023,8 @@ extern int jbd2_journal_check_available_features (journal_t *, unsigned long, unsigned long, unsigned long); extern int jbd2_journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); +extern void jbd2_journal_clear_features + (journal_t *, unsigned long, unsigned long, unsigned long); extern int jbd2_journal_create (journal_t *); extern int jbd2_journal_load (journal_t *journal); extern void jbd2_journal_destroy (journal_t *); -- cgit v1.2.3 From 7a224228ed79d587ece2304869000aad1b8e97dd Mon Sep 17 00:00:00 2001 From: Jean Noel Cordenner Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: vfs: Add 64 bit i_version support The i_version field of the inode is changed to be a 64-bit counter that is set on every inode creation and that is incremented every time the inode data is modified (similarly to the "ctime" time-stamp). The aim is to fulfill a NFSv4 requirement for rfc3530. This first part concerns the vfs, it converts the 32-bit i_version in the generic inode to a 64-bit, a flag is added in the super block in order to check if the feature is enabled and the i_version is incremented in the vfs. Signed-off-by: Mingming Cao Signed-off-by: Jean Noel Cordenner Signed-off-by: Kalpak Shah --- include/linux/fs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 21398a5d688d..9608839552b3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -124,6 +124,7 @@ extern int dir_notify_enable; #define MS_SHARED (1<<20) /* change to shared */ #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -173,6 +174,7 @@ extern int dir_notify_enable; ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) +#define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) @@ -599,7 +601,7 @@ struct inode { uid_t i_uid; gid_t i_gid; dev_t i_rdev; - unsigned long i_version; + u64 i_version; loff_t i_size; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; @@ -1394,6 +1396,7 @@ static inline void inode_dec_link_count(struct inode *inode) mark_inode_dirty(inode); } +extern void inode_inc_iversion(struct inode *inode); extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); static inline void file_accessed(struct file *file) { -- cgit v1.2.3 From 25ec56b518257a56d2ff41a941d288e4b5ff9488 Mon Sep 17 00:00:00 2001 From: Jean Noel Cordenner Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Add inode version support in ext4 This patch adds 64-bit inode version support to ext4. The lower 32 bits are stored in the osd1.linux1.l_i_version field while the high 32 bits are stored in the i_version_hi field newly created in the ext4_inode. This field is incremented in case the ext4_inode is large enough. A i_version mount option has been added to enable the feature. Signed-off-by: Mingming Cao Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: Aneesh Kumar K.V Signed-off-by: Jean Noel Cordenner --- include/linux/ext4_fs.h | 6 +++++- include/linux/fs.h | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index cd406dba0e64..b6092940c8b8 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -292,7 +292,7 @@ struct ext4_inode { __le32 i_flags; /* File flags */ union { struct { - __u32 l_i_reserved1; + __le32 l_i_version; } linux1; struct { __u32 h_i_translator; @@ -334,6 +334,7 @@ struct ext4_inode { __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ __le32 i_crtime; /* File Creation time */ __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ + __le32 i_version_hi; /* high 32 bits for 64-bit version */ }; @@ -407,6 +408,8 @@ do { \ raw_inode->xtime ## _extra); \ } while (0) +#define i_disk_version osd1.linux1.l_i_version + #if defined(__KERNEL__) || defined(__linux__) #define i_reserved1 osd1.linux1.l_i_reserved1 #define i_file_acl_high osd2.linux2.l_i_file_acl_high @@ -469,6 +472,7 @@ do { \ #define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ +#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt diff --git a/include/linux/fs.h b/include/linux/fs.h index 9608839552b3..a516b6716870 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1396,7 +1396,21 @@ static inline void inode_dec_link_count(struct inode *inode) mark_inode_dirty(inode); } -extern void inode_inc_iversion(struct inode *inode); +/** + * inode_inc_iversion - increments i_version + * @inode: inode that need to be updated + * + * Every time the inode is modified, the i_version field will be incremented. + * The filesystem has to be mounted with i_version flag + */ + +static inline void inode_inc_iversion(struct inode *inode) +{ + spin_lock(&inode->i_lock); + inode->i_version++; + spin_unlock(&inode->i_lock); +} + extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); static inline void file_accessed(struct file *file) { -- cgit v1.2.3 From c14c6fd5c56a0d0495d8a7c0f2bc330be658663e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:26 -0500 Subject: ext4: Add EXT4_IOC_MIGRATE ioctl The below patch add ioctl for migrating ext3 indirect block mapped inode to ext4 extent mapped inode. Signed-off-by: Aneesh Kumar K.V --- include/linux/ext4_fs.h | 4 ++++ include/linux/ext4_fs_extents.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index b6092940c8b8..213974f7c5d8 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -243,6 +243,7 @@ struct ext4_new_group_data { #endif #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) +#define EXT4_IOC_MIGRATE _IO('f', 7) /* * ioctl commands in 32 bit emulation @@ -983,6 +984,9 @@ extern int ext4_ioctl (struct inode *, struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); +/* migrate.c */ +extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, + unsigned long); /* namei.c */ extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *); diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index 023683b9d883..1cfb4854c964 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -212,6 +212,8 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); } +extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); +extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); extern int ext4_ext_try_to_merge(struct inode *inode, -- cgit v1.2.3 From aa02ad67d9b308290fde390682cd039b29f7ab85 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Add ext4_find_next_bit() This function is used by the ext4 multi block allocator patches. Also add generic_find_next_le_bit Signed-off-by: Aneesh Kumar K.V Cc: Signed-off-by: Andrew Morton --- include/asm-arm/bitops.h | 2 ++ include/asm-generic/bitops/ext2-non-atomic.h | 2 ++ include/asm-generic/bitops/le.h | 4 ++++ include/asm-m68k/bitops.h | 2 ++ include/asm-m68knommu/bitops.h | 2 ++ include/asm-powerpc/bitops.h | 4 ++++ include/asm-s390/bitops.h | 2 ++ include/linux/ext4_fs.h | 1 + 8 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/asm-arm/bitops.h b/include/asm-arm/bitops.h index 47a6b086eee2..5c60bfc1a84d 100644 --- a/include/asm-arm/bitops.h +++ b/include/asm-arm/bitops.h @@ -310,6 +310,8 @@ static inline int constant_fls(int x) _find_first_zero_bit_le(p,sz) #define ext2_find_next_zero_bit(p,sz,off) \ _find_next_zero_bit_le(p,sz,off) +#define ext2_find_next_bit(p, sz, off) \ + _find_next_bit_le(p, sz, off) /* * Minix is defined to use little-endian byte ordering. diff --git a/include/asm-generic/bitops/ext2-non-atomic.h b/include/asm-generic/bitops/ext2-non-atomic.h index 1697404afa05..63cf822431a2 100644 --- a/include/asm-generic/bitops/ext2-non-atomic.h +++ b/include/asm-generic/bitops/ext2-non-atomic.h @@ -14,5 +14,7 @@ generic_find_first_zero_le_bit((unsigned long *)(addr), (size)) #define ext2_find_next_zero_bit(addr, size, off) \ generic_find_next_zero_le_bit((unsigned long *)(addr), (size), (off)) +#define ext2_find_next_bit(addr, size, off) \ + generic_find_next_le_bit((unsigned long *)(addr), (size), (off)) #endif /* _ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_ */ diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h index b9c7e5d2d2ad..80e3bf13b2b9 100644 --- a/include/asm-generic/bitops/le.h +++ b/include/asm-generic/bitops/le.h @@ -20,6 +20,8 @@ #define generic___test_and_clear_le_bit(nr, addr) __test_and_clear_bit(nr, addr) #define generic_find_next_zero_le_bit(addr, size, offset) find_next_zero_bit(addr, size, offset) +#define generic_find_next_le_bit(addr, size, offset) \ + find_next_bit(addr, size, offset) #elif defined(__BIG_ENDIAN) @@ -42,6 +44,8 @@ extern unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned long size, unsigned long offset); +extern unsigned long generic_find_next_le_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); #else #error "Please fix " diff --git a/include/asm-m68k/bitops.h b/include/asm-m68k/bitops.h index 2976b5d68e96..83d1f286230b 100644 --- a/include/asm-m68k/bitops.h +++ b/include/asm-m68k/bitops.h @@ -410,6 +410,8 @@ static inline int ext2_find_next_zero_bit(const void *vaddr, unsigned size, res = ext2_find_first_zero_bit (p, size - 32 * (p - addr)); return (p - addr) * 32 + res; } +#define ext2_find_next_bit(addr, size, off) \ + generic_find_next_le_bit((unsigned long *)(addr), (size), (off)) #endif /* __KERNEL__ */ diff --git a/include/asm-m68knommu/bitops.h b/include/asm-m68knommu/bitops.h index f8dfb7ba2e25..f43afe1fc3b3 100644 --- a/include/asm-m68knommu/bitops.h +++ b/include/asm-m68knommu/bitops.h @@ -294,6 +294,8 @@ found_middle: return result + ffz(__swab32(tmp)); } +#define ext2_find_next_bit(addr, size, off) \ + generic_find_next_le_bit((unsigned long *)(addr), (size), (off)) #include #endif /* __KERNEL__ */ diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h index 733b4af7f4f1..220d9a781ab9 100644 --- a/include/asm-powerpc/bitops.h +++ b/include/asm-powerpc/bitops.h @@ -359,6 +359,8 @@ static __inline__ int test_le_bit(unsigned long nr, unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned long size, unsigned long offset); +unsigned long generic_find_next_le_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); /* Bitmap functions for the ext2 filesystem */ #define ext2_set_bit(nr,addr) \ @@ -378,6 +380,8 @@ unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, #define ext2_find_next_zero_bit(addr, size, off) \ generic_find_next_zero_le_bit((unsigned long*)addr, size, off) +#define ext2_find_next_bit(addr, size, off) \ + generic_find_next_le_bit((unsigned long *)addr, size, off) /* Bitmap functions for the minix filesystem. */ #define minix_test_and_set_bit(nr,addr) \ diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h index 34d9a6357c38..dba6fecad0be 100644 --- a/include/asm-s390/bitops.h +++ b/include/asm-s390/bitops.h @@ -772,6 +772,8 @@ static inline int sched_find_first_bit(unsigned long *b) test_and_clear_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) #define ext2_test_bit(nr, addr) \ test_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) +#define ext2_find_next_bit(addr, size, off) \ + generic_find_next_le_bit((unsigned long *)(addr), (size), (off)) #ifndef __s390x__ diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 213974f7c5d8..d0b7ca99b91f 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -493,6 +493,7 @@ do { \ #define ext4_test_bit ext2_test_bit #define ext4_find_first_zero_bit ext2_find_first_zero_bit #define ext4_find_next_zero_bit ext2_find_next_zero_bit +#define ext4_find_next_bit ext2_find_next_bit /* * Maximal mount counts between two filesystem checks -- cgit v1.2.3 From 1988b51e476bd097d910c9245b53f2e38aedaf0d Mon Sep 17 00:00:00 2001 From: Alex Tomas Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: ext4: Add new functions for searching extent tree Add the functions ext4_ext_search_left() and ext4_ext_search_right(), which are used by mballoc during ext4_ext_get_blocks to decided whether to merge extent information. Signed-off-by: Alex Tomas Signed-off-by: Andreas Dilger Signed-off-by: Johann Lombardi Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- include/linux/ext4_fs_extents.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index 1cfb4854c964..697da4bce6c5 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -223,5 +223,9 @@ extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, struct ext4_ext_path *); +extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, + ext4_lblk_t *, ext4_fsblk_t *); +extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, + ext4_lblk_t *, ext4_fsblk_t *); #endif /* _LINUX_EXT4_EXTENTS */ -- cgit v1.2.3 From c9de560ded61faa5b754137b7753da252391c55a Mon Sep 17 00:00:00 2001 From: Alex Tomas Date: Tue, 29 Jan 2008 00:19:52 -0500 Subject: ext4: Add multi block allocator for ext4 Signed-off-by: Alex Tomas Signed-off-by: Andreas Dilger Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- include/linux/ext4_fs.h | 76 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/ext4_fs_i.h | 4 +++ include/linux/ext4_fs_sb.h | 52 +++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index d0b7ca99b91f..1852313fc7c7 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -20,6 +20,8 @@ #include #include +#include + /* * The second extended filesystem constants/structures */ @@ -51,6 +53,50 @@ #define ext4_debug(f, a...) do {} while (0) #endif +#define EXT4_MULTIBLOCK_ALLOCATOR 1 + +/* prefer goal again. length */ +#define EXT4_MB_HINT_MERGE 1 +/* blocks already reserved */ +#define EXT4_MB_HINT_RESERVED 2 +/* metadata is being allocated */ +#define EXT4_MB_HINT_METADATA 4 +/* first blocks in the file */ +#define EXT4_MB_HINT_FIRST 8 +/* search for the best chunk */ +#define EXT4_MB_HINT_BEST 16 +/* data is being allocated */ +#define EXT4_MB_HINT_DATA 32 +/* don't preallocate (for tails) */ +#define EXT4_MB_HINT_NOPREALLOC 64 +/* allocate for locality group */ +#define EXT4_MB_HINT_GROUP_ALLOC 128 +/* allocate goal blocks or none */ +#define EXT4_MB_HINT_GOAL_ONLY 256 +/* goal is meaningful */ +#define EXT4_MB_HINT_TRY_GOAL 512 + +struct ext4_allocation_request { + /* target inode for block we're allocating */ + struct inode *inode; + /* logical block in target inode */ + ext4_lblk_t logical; + /* phys. target (a hint) */ + ext4_fsblk_t goal; + /* the closest logical allocated block to the left */ + ext4_lblk_t lleft; + /* phys. block for ^^^ */ + ext4_fsblk_t pleft; + /* the closest logical allocated block to the right */ + ext4_lblk_t lright; + /* phys. block for ^^^ */ + ext4_fsblk_t pright; + /* how many blocks we want to allocate */ + unsigned long len; + /* flags. see above EXT4_MB_HINT_* */ + unsigned long flags; +}; + /* * Special inodes numbers */ @@ -474,6 +520,7 @@ do { \ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ +#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt @@ -912,7 +959,7 @@ extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, ext4_fsblk_t goal, unsigned long *count, int *errp); extern void ext4_free_blocks (handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count); + ext4_fsblk_t block, unsigned long count, int metadata); extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count, unsigned long *pdquot_freed_blocks); @@ -950,6 +997,20 @@ extern unsigned long ext4_count_dirs (struct super_block *); extern void ext4_check_inodes_bitmap (struct super_block *); extern unsigned long ext4_count_free (struct buffer_head *, unsigned); +/* mballoc.c */ +extern long ext4_mb_stats; +extern long ext4_mb_max_to_scan; +extern int ext4_mb_init(struct super_block *, int); +extern int ext4_mb_release(struct super_block *); +extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, + struct ext4_allocation_request *, int *); +extern int ext4_mb_reserve_blocks(struct super_block *, int); +extern void ext4_mb_discard_inode_preallocations(struct inode *); +extern int __init init_ext4_mballoc(void); +extern void exit_ext4_mballoc(void); +extern void ext4_mb_free_blocks(handle_t *, struct inode *, + unsigned long, unsigned long, int, unsigned long *); + /* inode.c */ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, @@ -1080,6 +1141,19 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) raw_inode->i_size_high = cpu_to_le32(i_size >> 32); } +static inline +struct ext4_group_info *ext4_get_group_info(struct super_block *sb, + ext4_group_t group) +{ + struct ext4_group_info ***grp_info; + long indexv, indexh; + grp_info = EXT4_SB(sb)->s_group_info; + indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); + indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); + return grp_info[indexv][indexh]; +} + + #define ext4_std_error(sb, errno) \ do { \ if ((errno)) \ diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h index 4377d249d378..d5508d3cf290 100644 --- a/include/linux/ext4_fs_i.h +++ b/include/linux/ext4_fs_i.h @@ -158,6 +158,10 @@ struct ext4_inode_info { * struct timespec i_{a,c,m}time in the generic inode. */ struct timespec i_crtime; + + /* mballoc */ + struct list_head i_prealloc_list; + spinlock_t i_prealloc_lock; }; #endif /* _LINUX_EXT4_FS_I */ diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h index 38a47ec06df9..abaae2c8cccf 100644 --- a/include/linux/ext4_fs_sb.h +++ b/include/linux/ext4_fs_sb.h @@ -91,6 +91,58 @@ struct ext4_sb_info { unsigned long s_ext_blocks; unsigned long s_ext_extents; #endif + + /* for buddy allocator */ + struct ext4_group_info ***s_group_info; + struct inode *s_buddy_cache; + long s_blocks_reserved; + spinlock_t s_reserve_lock; + struct list_head s_active_transaction; + struct list_head s_closed_transaction; + struct list_head s_committed_transaction; + spinlock_t s_md_lock; + tid_t s_last_transaction; + unsigned short *s_mb_offsets, *s_mb_maxs; + + /* tunables */ + unsigned long s_stripe; + unsigned long s_mb_stream_request; + unsigned long s_mb_max_to_scan; + unsigned long s_mb_min_to_scan; + unsigned long s_mb_stats; + unsigned long s_mb_order2_reqs; + unsigned long s_mb_group_prealloc; + /* where last allocation was done - for stream allocation */ + unsigned long s_mb_last_group; + unsigned long s_mb_last_start; + + /* history to debug policy */ + struct ext4_mb_history *s_mb_history; + int s_mb_history_cur; + int s_mb_history_max; + int s_mb_history_num; + struct proc_dir_entry *s_mb_proc; + spinlock_t s_mb_history_lock; + int s_mb_history_filter; + + /* stats for buddy allocator */ + spinlock_t s_mb_pa_lock; + atomic_t s_bal_reqs; /* number of reqs with len > 1 */ + atomic_t s_bal_success; /* we found long enough chunks */ + atomic_t s_bal_allocated; /* in blocks */ + atomic_t s_bal_ex_scanned; /* total extents scanned */ + atomic_t s_bal_goals; /* goal hits */ + atomic_t s_bal_breaks; /* too long searches */ + atomic_t s_bal_2orders; /* 2^order hits */ + spinlock_t s_bal_lock; + unsigned long s_mb_buddies_generated; + unsigned long long s_mb_generation_time; + atomic_t s_mb_lost_chunks; + atomic_t s_mb_preallocated; + atomic_t s_mb_discarded; + + /* locality groups */ + struct ext4_locality_group *s_locality_groups; }; #endif /* _LINUX_EXT4_FS_SB */ -- cgit v1.2.3 From 7b7510662f4d05ddcc45d435769860e73e6aa20e Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Mon, 28 Jan 2008 23:58:27 -0500 Subject: jbd2: add lockdep support Ported from similar patch for the jbd layer. Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 98a2bc5d3e3f..2cbf6fdb1799 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -418,6 +418,10 @@ struct handle_s unsigned int h_sync: 1; /* sync-on-close */ unsigned int h_jdata: 1; /* force data journaling */ unsigned int h_aborted: 1; /* fatal error on handle */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map h_lockdep_map; +#endif }; -- cgit v1.2.3