diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/balloc.c | 14 | ||||
-rw-r--r-- | fs/ext4/dir.c | 38 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 44 | ||||
-rw-r--r-- | fs/ext4/extents.c | 29 | ||||
-rw-r--r-- | fs/ext4/file.c | 7 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 23 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 47 | ||||
-rw-r--r-- | fs/ext4/inline.c | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 64 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 9 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 61 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 27 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 12 | ||||
-rw-r--r-- | fs/ext4/namei.c | 96 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 19 | ||||
-rw-r--r-- | fs/ext4/resize.c | 77 | ||||
-rw-r--r-- | fs/ext4/super.c | 194 |
17 files changed, 525 insertions, 238 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 2455fe1446d6..de601f3c023d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -279,6 +279,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, ext4_group_t ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *bh_p; if (block_group >= ngroups) { ext4_error(sb, "block_group >= groups_count - block_group = %u," @@ -289,7 +290,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); - if (!sbi->s_group_desc[group_desc]) { + bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc); + /* + * sbi_array_rcu_deref returns with rcu unlocked, this is ok since + * the pointer being dereferenced won't be dereferenced again. By + * looking at the usage in add_new_gdb() the value isn't modified, + * just the pointer, and so it remains valid. + */ + if (!bh_p) { ext4_error(sb, "Group descriptor not loaded - " "block_group = %u, group_desc = %u, desc = %u", block_group, group_desc, offset); @@ -297,10 +305,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, } desc = (struct ext4_group_desc *)( - (__u8 *)sbi->s_group_desc[group_desc]->b_data + + (__u8 *)bh_p->b_data + offset * EXT4_DESC_SIZE(sb)); if (bh) - *bh = sbi->s_group_desc[group_desc]; + *bh = bh_p; return desc; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index e16bc4cec62e..6b3a32f75dad 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -75,6 +75,11 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, error_msg = "rec_len is too small for name_len"; else if (unlikely(((char *) de - buf) + rlen > size)) error_msg = "directory entry overrun"; + else if (unlikely(((char *) de - buf) + rlen > + size - EXT4_DIR_REC_LEN(1) && + ((char *) de - buf) + rlen != size)) { + error_msg = "directory entry too close to block end"; + } else if (unlikely(le32_to_cpu(de->inode) > le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))) error_msg = "inode out of bounds"; @@ -106,7 +111,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; - int dir_has_error = 0; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); if (ext4_encrypted_inode(inode)) { @@ -120,12 +124,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) if (err != ERR_BAD_DX_DIR) { return err; } - /* - * We don't set the inode dirty flag since it's not - * critical that it get flushed back to the disk. - */ - ext4_clear_inode_flag(file_inode(file), - EXT4_INODE_INDEX); + /* Can we just clear INDEX flag to ignore htree information? */ + if (!ext4_has_metadata_csum(sb)) { + /* + * We don't set the inode dirty flag since it's not + * critical that it gets flushed back to the disk. + */ + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } if (ext4_has_inline_data(inode)) { @@ -142,8 +148,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) return err; } - offset = ctx->pos & (sb->s_blocksize - 1); - while (ctx->pos < inode->i_size) { struct ext4_map_blocks map; @@ -152,9 +156,18 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) goto errout; } cond_resched(); + offset = ctx->pos & (sb->s_blocksize - 1); map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); map.m_len = 1; err = ext4_map_blocks(NULL, inode, &map, 0); + if (err == 0) { + /* m_len should never be zero but let's avoid + * an infinite loop if it somehow is */ + if (map.m_len == 0) + map.m_len = 1; + ctx->pos += map.m_len * sb->s_blocksize; + continue; + } if (err > 0) { pgoff_t index = map.m_pblk >> (PAGE_SHIFT - inode->i_blkbits); @@ -173,13 +186,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) } if (!bh) { - if (!dir_has_error) { - EXT4_ERROR_FILE(file, 0, - "directory contains a " - "hole at offset %llu", - (unsigned long long) ctx->pos); - dir_has_error = 1; - } /* corrupt size? Maybe no more blocks to read */ if (ctx->pos > inode->i_blocks << 9) break; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 567a6c7af677..eb0ec5068423 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1367,7 +1367,7 @@ struct ext4_sb_info { loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ - struct buffer_head **s_group_desc; + struct buffer_head * __rcu *s_group_desc; unsigned int s_mount_opt; unsigned int s_mount_opt2; unsigned int s_mount_flags; @@ -1427,7 +1427,7 @@ struct ext4_sb_info { #endif /* for buddy allocator */ - struct ext4_group_info ***s_group_info; + struct ext4_group_info ** __rcu *s_group_info; struct inode *s_buddy_cache; spinlock_t s_md_lock; unsigned short *s_mb_offsets; @@ -1475,7 +1475,7 @@ struct ext4_sb_info { unsigned int s_extent_max_zeroout_kb; unsigned int s_log_groups_per_flex; - struct flex_groups *s_flex_groups; + struct flex_groups * __rcu *s_flex_groups; ext4_group_t s_flex_groups_allocated; /* workqueue for reserved extent conversions (buffered io) */ @@ -1514,8 +1514,11 @@ struct ext4_sb_info { struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; - /* Barrier between changing inodes' journal flags and writepages ops. */ - struct percpu_rw_semaphore s_journal_flag_rwsem; + /* + * Barrier between writepages ops and changing any inode's JOURNAL_DATA + * or EXTENTS flag. + */ + struct percpu_rw_semaphore s_writepages_rwsem; /* Encryption support */ #ifdef CONFIG_EXT4_FS_ENCRYPTION @@ -1547,6 +1550,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) } /* + * Returns: sbi->field[index] + * Used to access an array element from the following sbi fields which require + * rcu protection to avoid dereferencing an invalid pointer due to reassignment + * - s_group_desc + * - s_group_info + * - s_flex_group + */ +#define sbi_array_rcu_deref(sbi, field, index) \ +({ \ + typeof(*((sbi)->field)) _v; \ + rcu_read_lock(); \ + _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \ + rcu_read_unlock(); \ + _v; \ +}) + +/* * Inode dynamic state flags */ enum { @@ -2375,8 +2395,11 @@ int ext4_insert_dentry(struct inode *dir, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!ext4_has_feature_dir_index(inode->i_sb)) + if (!ext4_has_feature_dir_index(inode->i_sb)) { + /* ext4_iget() should have caught this... */ + WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb)); ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } static unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK @@ -2552,6 +2575,7 @@ extern int ext4_generic_delete_entry(handle_t *handle, extern bool ext4_empty_dir(struct inode *inode); /* resize.c */ +extern void ext4_kvfree_array_rcu(void *to_free); extern int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input); extern int ext4_group_extend(struct super_block *sb, @@ -2792,13 +2816,13 @@ static inline struct ext4_group_info *ext4_get_group_info(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info ***grp_info; + struct ext4_group_info **grp_info; long indexv, indexh; BUG_ON(group >= EXT4_SB(sb)->s_groups_count); - grp_info = EXT4_SB(sb)->s_group_info; indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); - return grp_info[indexv][indexh]; + grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); + return grp_info[indexh]; } /* @@ -2848,7 +2872,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) !inode_is_locked(inode)); down_write(&EXT4_I(inode)->i_data_sem); if (newsize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = newsize; + WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize); up_write(&EXT4_I(inode)->i_data_sem); } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 106a5bb3ae68..999d2a54297d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1047,6 +1047,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, __le32 border; ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ int err = 0; + size_t ext_size = 0; /* make decision: where to split? */ /* FIXME: now decision is simplest: at current extent */ @@ -1138,6 +1139,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, le16_add_cpu(&neh->eh_entries, m); } + /* zero out unused area in the extent block */ + ext_size = sizeof(struct ext4_extent_header) + + sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries); + memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size); ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1217,6 +1222,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, sizeof(struct ext4_extent_idx) * m); le16_add_cpu(&neh->eh_entries, m); } + /* zero out unused area in the extent block */ + ext_size = sizeof(struct ext4_extent_header) + + (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries)); + memset(bh->b_data + ext_size, 0, + inode->i_sb->s_blocksize - ext_size); ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1282,6 +1292,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock, goal = 0; struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; int err = 0; + size_t ext_size = 0; /* Try to prepend new index to old one */ if (ext_depth(inode)) @@ -1307,9 +1318,11 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, goto out; } + ext_size = sizeof(EXT4_I(inode)->i_data); /* move top-level index/leaf into new block */ - memmove(bh->b_data, EXT4_I(inode)->i_data, - sizeof(EXT4_I(inode)->i_data)); + memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size); + /* zero out unused area in the extent block */ + memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size); /* set size of new block */ neh = ext_block_hdr(bh); @@ -3432,8 +3445,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, (unsigned long long)map->m_lblk, map_len); sbi = EXT4_SB(inode->i_sb); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map_len) eof_block = map->m_lblk + map_len; @@ -3688,8 +3701,8 @@ static int ext4_split_convert_extents(handle_t *handle, __func__, inode->i_ino, (unsigned long long)map->m_lblk, map->m_len); - eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> - inode->i_sb->s_blocksize_bits; + eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1) + >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map->m_len) eof_block = map->m_lblk + map->m_len; /* @@ -3742,8 +3755,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, * illegal. */ if (ee_block != map->m_lblk || ee_len > map->m_len) { -#ifdef EXT4_DEBUG - ext4_warning("Inode (%ld) finished: extent logical block %llu," +#ifdef CONFIG_EXT4_DEBUG + ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu," " len %u; IO logical block %llu, len %u", inode->i_ino, (unsigned long long)ee_block, ee_len, (unsigned long long)map->m_lblk, map->m_len); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index fe76d0957a1f..59d3ea7094a0 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -163,6 +163,13 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } ret = __generic_file_write_iter(iocb, from); + /* + * Unaligned direct AIO must be the only IO in flight. Otherwise + * overlapping aligned IO after unaligned might result in data + * corruption. + */ + if (ret == -EIOCBQUEUED && unaligned_aio) + ext4_unwritten_wait(inode); inode_unlock(inode); if (ret > 0) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 4f78e099de1d..c5af7bbf906f 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -331,11 +331,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) percpu_counter_inc(&sbi->s_freeinodes_counter); if (sbi->s_log_groups_per_flex) { - ext4_group_t f = ext4_flex_group(sbi, block_group); + struct flex_groups *fg; - atomic_inc(&sbi->s_flex_groups[f].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, + ext4_flex_group(sbi, block_group)); + atomic_inc(&fg->free_inodes); if (is_directory) - atomic_dec(&sbi->s_flex_groups[f].used_dirs); + atomic_dec(&fg->used_dirs); } BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); @@ -376,12 +378,13 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, int flex_size, struct orlov_stats *stats) { struct ext4_group_desc *desc; - struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; if (flex_size > 1) { - stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); - stats->used_dirs = atomic_read(&flex_group[g].used_dirs); + struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb), + s_flex_groups, g); + stats->free_inodes = atomic_read(&fg->free_inodes); + stats->free_clusters = atomic64_read(&fg->free_clusters); + stats->used_dirs = atomic_read(&fg->used_dirs); return; } @@ -988,7 +991,8 @@ got: if (sbi->s_log_groups_per_flex) { ext4_group_t f = ext4_flex_group(sbi, group); - atomic_inc(&sbi->s_flex_groups[f].used_dirs); + atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, + f)->used_dirs); } } if (ext4_has_group_desc_csum(sb)) { @@ -1011,7 +1015,8 @@ got: if (sbi->s_log_groups_per_flex) { flex_group = ext4_flex_group(sbi, group); - atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); + atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_inodes); } inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 14007e621d2a..d2844fe9040d 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -1217,6 +1217,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, ext4_lblk_t offsets[4], offsets2[4]; Indirect chain[4], chain2[4]; Indirect *partial, *partial2; + Indirect *p = NULL, *p2 = NULL; ext4_lblk_t max_block; __le32 nr = 0, nr2 = 0; int n = 0, n2 = 0; @@ -1258,7 +1259,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, } - partial = ext4_find_shared(inode, n, offsets, chain, &nr); + partial = p = ext4_find_shared(inode, n, offsets, chain, &nr); if (nr) { if (partial == chain) { /* Shared branch grows from the inode */ @@ -1283,13 +1284,11 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, partial->p + 1, (__le32 *)partial->bh->b_data+addr_per_block, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); partial--; } end_range: - partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); + partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); if (nr2) { if (partial2 == chain2) { /* @@ -1319,16 +1318,14 @@ end_range: (__le32 *)partial2->bh->b_data, partial2->p, (chain2+n2-1) - partial2); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); partial2--; } goto do_indirects; } /* Punch happened within the same level (n == n2) */ - partial = ext4_find_shared(inode, n, offsets, chain, &nr); - partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); + partial = p = ext4_find_shared(inode, n, offsets, chain, &nr); + partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); /* Free top, but only if partial2 isn't its subtree. */ if (nr) { @@ -1385,15 +1382,7 @@ end_range: partial->p + 1, partial2->p, (chain+n-1) - partial); - while (partial > chain) { - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - } - while (partial2 > chain2) { - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); - } - return 0; + goto cleanup; } /* @@ -1408,8 +1397,6 @@ end_range: partial->p + 1, (__le32 *)partial->bh->b_data+addr_per_block, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); partial--; } if (partial2 > chain2 && depth2 <= depth) { @@ -1417,11 +1404,21 @@ end_range: (__le32 *)partial2->bh->b_data, partial2->p, (chain2+n2-1) - partial2); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); partial2--; } } + +cleanup: + while (p && p > chain) { + BUFFER_TRACE(p->bh, "call brelse"); + brelse(p->bh); + p--; + } + while (p2 && p2 > chain2) { + BUFFER_TRACE(p2->bh, "call brelse"); + brelse(p2->bh); + p2--; + } return 0; do_indirects: @@ -1429,7 +1426,7 @@ do_indirects: switch (offsets[0]) { default: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_IND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); @@ -1437,7 +1434,7 @@ do_indirects: } case EXT4_IND_BLOCK: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_DIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); @@ -1445,7 +1442,7 @@ do_indirects: } case EXT4_DIND_BLOCK: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_TIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); @@ -1454,5 +1451,5 @@ do_indirects: case EXT4_TIND_BLOCK: ; } - return 0; + goto cleanup; } diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 9a13f86fed62..4df4d31057b3 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1417,7 +1417,7 @@ int htree_inlinedir_to_tree(struct file *dir_file, err = ext4_htree_store_dirent(dir_file, hinfo->hash, hinfo->minor_hash, de, &tmp_str); if (err) { - count = err; + ret = err; goto out; } count++; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4815be26b15f..45bcde1969e3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2475,7 +2475,7 @@ update_disksize: * truncate are avoided by checking i_size under i_data_sem. */ disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; - if (disksize > EXT4_I(inode)->i_disksize) { + if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { int err2; loff_t i_size; @@ -2652,7 +2652,7 @@ static int ext4_writepages(struct address_space *mapping, struct blk_plug plug; bool give_up_on_write = false; - percpu_down_read(&sbi->s_journal_flag_rwsem); + percpu_down_read(&sbi->s_writepages_rwsem); trace_ext4_writepages(inode, wbc); if (dax_mapping(mapping)) { @@ -2853,7 +2853,7 @@ retry: out_writepages: trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); - percpu_up_read(&sbi->s_journal_flag_rwsem); + percpu_up_read(&sbi->s_writepages_rwsem); return ret; } @@ -3957,6 +3957,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) trace_ext4_punch_hole(inode, offset, length, 0); + ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + if (ext4_has_inline_data(inode)) { + down_write(&EXT4_I(inode)->i_mmap_sem); + ret = ext4_convert_inline_data(inode); + up_write(&EXT4_I(inode)->i_mmap_sem); + if (ret) + return ret; + } + /* * Write out all dirty pages to avoid race conditions * Then release them. @@ -4585,6 +4594,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = -EFSCORRUPTED; goto bad_inode; } + /* + * If dir_index is not enabled but there's dir with INDEX flag set, + * we'd normally treat htree data as empty space. But with metadata + * checksumming that corrupts checksums so forbid that. + */ + if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) && + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { + EXT4_ERROR_INODE(inode, + "iget: Dir with htree data on filesystem without dir_index feature."); + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; @@ -4733,7 +4754,7 @@ static int ext4_inode_blocks_set(handle_t *handle, struct ext4_inode_info *ei) { struct inode *inode = &(ei->vfs_inode); - u64 i_blocks = inode->i_blocks; + u64 i_blocks = READ_ONCE(inode->i_blocks); struct super_block *sb = inode->i_sb; if (i_blocks <= ~0U) { @@ -5071,11 +5092,15 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) offset = inode->i_size & (PAGE_SIZE - 1); /* - * All buffers in the last page remain valid? Then there's nothing to - * do. We do the check mainly to optimize the common PAGE_SIZE == - * blocksize case + * If the page is fully truncated, we don't need to wait for any commit + * (and we even should not as __ext4_journalled_invalidatepage() may + * strip all buffers from the page but keep the page dirty which can then + * confuse e.g. concurrent ext4_writepage() seeing dirty page without + * buffers). Also we don't need to wait for any commit if all buffers in + * the page remain valid. This is most beneficial for the common case of + * blocksize == PAGESIZE. */ - if (offset > PAGE_SIZE - i_blocksize(inode)) + if (!offset || offset > (PAGE_SIZE - i_blocksize(inode))) return; while (1) { page = find_lock_page(inode->i_mapping, @@ -5223,7 +5248,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) up_write(&EXT4_I(inode)->i_data_sem); ext4_journal_stop(handle); if (error) { - if (orphan) + if (orphan && inode->i_nlink) ext4_orphan_del(NULL, inode); goto err_out; } @@ -5459,10 +5484,25 @@ static int ext4_expand_extra_isize(struct inode *inode, { struct ext4_inode *raw_inode; struct ext4_xattr_ibody_header *header; + unsigned int inode_size = EXT4_INODE_SIZE(inode->i_sb); + struct ext4_inode_info *ei = EXT4_I(inode); if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) return 0; + /* this was checked at iget time, but double check for good measure */ + if ((EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > inode_size) || + (ei->i_extra_isize & 3)) { + EXT4_ERROR_INODE(inode, "bad extra_isize %u (inode size %u)", + ei->i_extra_isize, + EXT4_INODE_SIZE(inode->i_sb)); + return -EFSCORRUPTED; + } + if ((new_extra_isize < ei->i_extra_isize) || + (new_extra_isize < 4) || + (new_extra_isize > inode_size - EXT4_GOOD_OLD_INODE_SIZE)) + return -EINVAL; /* Should never happen */ + raw_inode = ext4_raw_inode(&iloc); header = IHDR(inode, raw_inode); @@ -5648,7 +5688,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) } } - percpu_down_write(&sbi->s_journal_flag_rwsem); + percpu_down_write(&sbi->s_writepages_rwsem); jbd2_journal_lock_updates(journal); /* @@ -5665,7 +5705,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) err = jbd2_journal_flush(journal); if (err < 0) { jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); ext4_inode_resume_unlocked_dio(inode); return err; } @@ -5674,7 +5714,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); if (val) up_write(&EXT4_I(inode)->i_mmap_sem); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 2880e017cd0a..baa2f6375226 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -727,7 +727,7 @@ group_add_out: if (err == 0) err = err2; mnt_drop_write_file(filp); - if (!err && (o_group > EXT4_SB(sb)->s_groups_count) && + if (!err && (o_group < EXT4_SB(sb)->s_groups_count) && ext4_has_group_desc_csum(sb) && test_opt(sb, INIT_INODE_TABLE)) err = ext4_register_li_request(sb, o_group); @@ -749,6 +749,13 @@ resizefs_out: if (!blk_queue_discard(q)) return -EOPNOTSUPP; + /* + * We haven't replayed the journal, so we cannot use our + * block-bitmap-guided storage zapping commands. + */ + if (test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) + return -EROFS; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a49d0e5d7baf..c18668e3135e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2377,7 +2377,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned size; - struct ext4_group_info ***new_groupinfo; + struct ext4_group_info ***old_groupinfo, ***new_groupinfo; size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); @@ -2390,13 +2390,16 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; } - if (sbi->s_group_info) { - memcpy(new_groupinfo, sbi->s_group_info, + rcu_read_lock(); + old_groupinfo = rcu_dereference(sbi->s_group_info); + if (old_groupinfo) + memcpy(new_groupinfo, old_groupinfo, sbi->s_group_info_size * sizeof(*sbi->s_group_info)); - kvfree(sbi->s_group_info); - } - sbi->s_group_info = new_groupinfo; + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_group_info, new_groupinfo); sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); + if (old_groupinfo) + ext4_kvfree_array_rcu(old_groupinfo); ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", sbi->s_group_info_size); return 0; @@ -2408,6 +2411,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, { int i; int metalen = 0; + int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb); struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_info **meta_group_info; struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2426,12 +2430,12 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, "for a buddy group"); goto exit_meta_group_info; } - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = - meta_group_info; + rcu_read_lock(); + rcu_dereference(sbi->s_group_info)[idx] = meta_group_info; + rcu_read_unlock(); } - meta_group_info = - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; + meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx); i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); @@ -2479,8 +2483,13 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, exit_group_info: /* If a meta_group_info table has been allocated, release it now */ if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { - kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; + struct ext4_group_info ***group_info; + + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); + kfree(group_info[idx]); + group_info[idx] = NULL; + rcu_read_unlock(); } exit_meta_group_info: return -ENOMEM; @@ -2493,6 +2502,7 @@ static int ext4_mb_init_backend(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); int err; struct ext4_group_desc *desc; + struct ext4_group_info ***group_info; struct kmem_cache *cachep; err = ext4_mb_alloc_groupinfo(sb, ngroups); @@ -2527,11 +2537,16 @@ err_freebuddy: while (i-- > 0) kmem_cache_free(cachep, ext4_get_group_info(sb, i)); i = sbi->s_group_info_size; + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); while (i-- > 0) - kfree(sbi->s_group_info[i]); + kfree(group_info[i]); + rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: - kvfree(sbi->s_group_info); + rcu_read_lock(); + kvfree(rcu_dereference(sbi->s_group_info)); + rcu_read_unlock(); return -ENOMEM; } @@ -2720,7 +2735,7 @@ int ext4_mb_release(struct super_block *sb) ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int num_meta_group_infos; - struct ext4_group_info *grinfo; + struct ext4_group_info *grinfo, ***group_info; struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2738,9 +2753,12 @@ int ext4_mb_release(struct super_block *sb) num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); for (i = 0; i < num_meta_group_infos; i++) - kfree(sbi->s_group_info[i]); - kvfree(sbi->s_group_info); + kfree(group_info[i]); + kvfree(group_info); + rcu_read_unlock(); } kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); @@ -2998,7 +3016,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); atomic64_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -4888,7 +4907,8 @@ do_more: if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(count_clusters, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) @@ -5033,7 +5053,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 364ea4d4a943..bce2d696d6b9 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -434,6 +434,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) int ext4_ext_migrate(struct inode *inode) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); handle_t *handle; int retval = 0, i; __le32 *i_data; @@ -458,6 +459,8 @@ int ext4_ext_migrate(struct inode *inode) */ return retval; + percpu_down_write(&sbi->s_writepages_rwsem); + /* * Worst case we can touch the allocation bitmaps, a bgd * block, and a block to link in the orphan list. We do need @@ -468,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(handle)) { retval = PTR_ERR(handle); - return retval; + goto out_unlock; } goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; @@ -479,7 +482,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(tmp_inode)) { retval = PTR_ERR(tmp_inode); ext4_journal_stop(handle); - return retval; + goto out_unlock; } i_size_write(tmp_inode, i_size_read(inode)); /* @@ -521,7 +524,7 @@ int ext4_ext_migrate(struct inode *inode) */ ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); - goto out; + goto out_tmp_inode; } ei = EXT4_I(inode); @@ -602,10 +605,11 @@ err_out: /* Reset the extent details */ ext4_ext_tree_init(handle, tmp_inode); ext4_journal_stop(handle); -out: +out_tmp_inode: unlock_new_inode(tmp_inode); iput(tmp_inode); - +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return retval; } @@ -615,7 +619,8 @@ out: int ext4_ind_migrate(struct inode *inode) { struct ext4_extent_header *eh; - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_super_block *es = sbi->s_es; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; @@ -639,9 +644,13 @@ int ext4_ind_migrate(struct inode *inode) if (test_opt(inode->i_sb, DELALLOC)) ext4_alloc_da_blocks(inode); + percpu_down_write(&sbi->s_writepages_rwsem); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_unlock; + } down_write(&EXT4_I(inode)->i_data_sem); ret = ext4_ext_check_inode(inode); @@ -676,5 +685,7 @@ int ext4_ind_migrate(struct inode *inode) errout: ext4_journal_stop(handle); up_write(&EXT4_I(inode)->i_data_sem); +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return ret; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index c2e830a6206d..fb1ad9510c5f 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -119,10 +119,10 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, { __ext4_warning(sb, function, line, "%s", msg); __ext4_warning(sb, function, line, - "MMP failure info: last update time: %llu, last update " - "node: %s, last update device: %s", - (long long unsigned int) le64_to_cpu(mmp->mmp_time), - mmp->mmp_nodename, mmp->mmp_bdevname); + "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s", + (unsigned long long)le64_to_cpu(mmp->mmp_time), + (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename, + (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname); } /* @@ -153,6 +153,7 @@ static int kmmpd(void *data) mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, EXT4_MMP_MIN_CHECK_INTERVAL); mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); + BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); bdevname(bh->b_bdev, mmp->mmp_bdevname); memcpy(mmp->mmp_nodename, init_utsname()->nodename, @@ -377,7 +378,8 @@ skip: /* * Start a kernel thread to update the MMP block periodically. */ - EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", + EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s", + (int)sizeof(mmp->mmp_bdevname), bdevname(bh->b_bdev, mmp->mmp_bdevname)); if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 3c3757ee11f0..339ede11896a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -79,8 +79,18 @@ static struct buffer_head *ext4_append(handle_t *handle, static int ext4_dx_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent); +/* + * Hints to ext4_read_dirblock regarding whether we expect a directory + * block being read to be an index block, or a block containing + * directory entries (and if the latter, whether it was found via a + * logical block in an htree index block). This is used to control + * what sort of sanity checkinig ext4_read_dirblock() will do on the + * directory block read from the storage device. EITHER will means + * the caller doesn't know what kind of directory block will be read, + * so no specific verification will be done. + */ typedef enum { - EITHER, INDEX, DIRENT + EITHER, INDEX, DIRENT, DIRENT_HTREE } dirblock_type_t; #define ext4_read_dirblock(inode, block, type) \ @@ -106,11 +116,14 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, return bh; } - if (!bh) { + if (!bh && (type == INDEX || type == DIRENT_HTREE)) { ext4_error_inode(inode, func, line, block, - "Directory hole found"); + "Directory hole found for htree %s block", + (type == INDEX) ? "index" : "leaf"); return ERR_PTR(-EFSCORRUPTED); } + if (!bh) + return NULL; dirent = (struct ext4_dir_entry *) bh->b_data; /* Determine whether or not we have an index block */ if (is_dx(inode)) { @@ -960,7 +973,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", (unsigned long)block)); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) return PTR_ERR(bh); @@ -1432,6 +1445,7 @@ restart: /* * We deal with the read-ahead logic here. */ + cond_resched(); if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; @@ -1537,7 +1551,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, return (struct buffer_head *) frame; do { block = dx_get_block(frame->at); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) goto errout; @@ -2135,6 +2149,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; + /* Can we just ignore htree data? */ + if (ext4_has_metadata_csum(sb)) { + EXT4_ERROR_INODE(dir, + "Directory has corrupted htree index."); + retval = -EFSCORRUPTED; + goto out; + } ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; ext4_mark_inode_dirty(handle, dir); @@ -2142,6 +2163,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { bh = ext4_read_dirblock(dir, block, DIRENT); + if (bh == NULL) { + bh = ext4_bread(handle, dir, block, + EXT4_GET_BLOCKS_CREATE); + goto add_to_new_block; + } if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2162,6 +2188,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, brelse(bh); } bh = ext4_append(handle, dir, &block); +add_to_new_block: if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2203,7 +2230,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, return PTR_ERR(frame); entries = frame->entries; at = frame->at; - bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); + bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE); if (IS_ERR(bh)) { err = PTR_ERR(bh); bh = NULL; @@ -2702,7 +2729,7 @@ bool ext4_empty_dir(struct inode *inode) { unsigned int offset; struct buffer_head *bh; - struct ext4_dir_entry_2 *de, *de1; + struct ext4_dir_entry_2 *de; struct super_block *sb; if (ext4_has_inline_data(inode)) { @@ -2719,36 +2746,48 @@ bool ext4_empty_dir(struct inode *inode) EXT4_ERROR_INODE(inode, "invalid size"); return true; } - bh = ext4_read_dirblock(inode, 0, EITHER); + /* The first directory block must not be a hole, + * so treat it as DIRENT_HTREE + */ + bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) return true; de = (struct ext4_dir_entry_2 *) bh->b_data; - de1 = ext4_next_entry(de, sb->s_blocksize); - if (le32_to_cpu(de->inode) != inode->i_ino || - le32_to_cpu(de1->inode) == 0 || - strcmp(".", de->name) || strcmp("..", de1->name)) { - ext4_warning_inode(inode, "directory missing '.' and/or '..'"); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, + 0) || + le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { + ext4_warning_inode(inode, "directory missing '.'"); brelse(bh); return true; } - offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) + - ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize); - de = ext4_next_entry(de1, sb->s_blocksize); + offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); + de = ext4_next_entry(de, sb->s_blocksize); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, + offset) || + le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { + ext4_warning_inode(inode, "directory missing '..'"); + brelse(bh); + return true; + } + offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); while (offset < inode->i_size) { - if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { + if (!(offset & (sb->s_blocksize - 1))) { unsigned int lblock; brelse(bh); lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); bh = ext4_read_dirblock(inode, lblock, EITHER); + if (bh == NULL) { + offset += sb->s_blocksize; + continue; + } if (IS_ERR(bh)) return true; - de = (struct ext4_dir_entry_2 *) bh->b_data; } + de = (struct ext4_dir_entry_2 *) (bh->b_data + + (offset & (sb->s_blocksize - 1))); if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, offset)) { - de = (struct ext4_dir_entry_2 *)(bh->b_data + - sb->s_blocksize); offset = (offset | (sb->s_blocksize - 1)) + 1; continue; } @@ -2757,7 +2796,6 @@ bool ext4_empty_dir(struct inode *inode) return false; } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); - de = ext4_next_entry(de, sb->s_blocksize); } brelse(bh); return true; @@ -3052,18 +3090,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - if (inode->i_nlink == 0) { - ext4_warning_inode(inode, "Deleting file '%.*s' with no links", - dentry->d_name.len, dentry->d_name.name); - set_nlink(inode, 1); - } retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_unlink; dir->i_ctime = dir->i_mtime = ext4_current_time(dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); - drop_nlink(inode); + if (inode->i_nlink == 0) + ext4_warning_inode(inode, "Deleting file '%.*s' with no links", + dentry->d_name.len, dentry->d_name.name); + else + drop_nlink(inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); inode->i_ctime = ext4_current_time(inode); @@ -3302,7 +3339,10 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { - bh = ext4_read_dirblock(inode, 0, EITHER); + /* The first directory block must not be a hole, so + * treat it as DIRENT_HTREE + */ + bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) { *retval = PTR_ERR(bh); return NULL; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 0094923e5ebf..94f60f9d57fd 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -469,16 +469,25 @@ int ext4_bio_write_page(struct ext4_io_submit *io, nr_to_submit) { gfp_t gfp_flags = GFP_NOFS; + /* + * Since bounce page allocation uses a mempool, we can only use + * a waiting mask (i.e. request guaranteed allocation) on the + * first page of the bio. Otherwise it can deadlock. + */ + if (io->io_bio) + gfp_flags = GFP_NOWAIT | __GFP_NOWARN; retry_encrypt: data_page = fscrypt_encrypt_page(inode, page, gfp_flags); if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); - if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { - if (io->io_bio) { + if (ret == -ENOMEM && + (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { + gfp_flags = GFP_NOFS; + if (io->io_bio) ext4_io_submit(io); - congestion_wait(BLK_RW_ASYNC, HZ/50); - } - gfp_flags |= __GFP_NOFAIL; + else + gfp_flags |= __GFP_NOFAIL; + congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry_encrypt; } data_page = NULL; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 67b359629a66..845d9841c91c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -16,6 +16,33 @@ #include "ext4_jbd2.h" +struct ext4_rcu_ptr { + struct rcu_head rcu; + void *ptr; +}; + +static void ext4_rcu_ptr_callback(struct rcu_head *head) +{ + struct ext4_rcu_ptr *ptr; + + ptr = container_of(head, struct ext4_rcu_ptr, rcu); + kvfree(ptr->ptr); + kfree(ptr); +} + +void ext4_kvfree_array_rcu(void *to_free) +{ + struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); + + if (ptr) { + ptr->ptr = to_free; + call_rcu(&ptr->rcu, ext4_rcu_ptr_callback); + return; + } + synchronize_rcu(); + kvfree(to_free); +} + int ext4_resize_begin(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -541,8 +568,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb, brelse(gdb); goto out; } - memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, - gdb->b_size); + memcpy(gdb->b_data, sbi_array_rcu_deref(sbi, + s_group_desc, j)->b_data, gdb->b_size); set_buffer_uptodate(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); @@ -849,13 +876,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, } brelse(dind); - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); err = ext4_handle_dirty_super(handle, sb); @@ -903,15 +932,24 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; - EXT4_SB(sb)->s_group_desc = n_group_desc; - EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); + if (err) { + kvfree(n_group_desc); + brelse(gdb_bh); + return err; + } + + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); + EXT4_SB(sb)->s_gdb_count++; + ext4_kvfree_array_rcu(o_group_desc); return err; } @@ -1173,7 +1211,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, * use non-sparse filesystems anymore. This is already checked above. */ if (gdb_off) { - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); @@ -1255,7 +1294,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, /* * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). */ - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)(gdb_bh->b_data + gdb_off * EXT4_DESC_SIZE(sb)); @@ -1383,11 +1422,14 @@ static void ext4_update_super(struct super_block *sb, percpu_counter_read(&sbi->s_freeclusters_counter)); if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { ext4_group_t flex_group; + struct flex_groups *fg; + flex_group = ext4_flex_group(sbi, group_data[0].group); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), - &sbi->s_flex_groups[flex_group].free_clusters); + &fg->free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, - &sbi->s_flex_groups[flex_group].free_inodes); + &fg->free_inodes); } /* @@ -1482,7 +1524,8 @@ exit_journal: for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); if (old_gdb == gdb_bh->b_blocknr) continue; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, @@ -2040,6 +2083,10 @@ out: free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); - ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count); + if (err) + ext4_warning(sb, "error (%d) occurred during " + "file system resize", err); + ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", + ext4_blocks_count(es)); return err; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a6c7ace9cfd1..ed0520fe4dad 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -344,7 +344,8 @@ static void save_error_info(struct super_block *sb, const char *func, unsigned int line) { __save_error_info(sb, func, line); - ext4_commit_super(sb, 1); + if (!bdev_read_only(sb->s_bdev)) + ext4_commit_super(sb, 1); } /* @@ -826,6 +827,8 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + struct buffer_head **group_desc; + struct flex_groups **flex_groups; int aborted = 0; int i, err; @@ -857,15 +860,23 @@ static void ext4_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) ext4_commit_super(sb, 1); + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < sbi->s_gdb_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); - kvfree(sbi->s_flex_groups); + brelse(group_desc[i]); + kvfree(group_desc); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } + rcu_read_unlock(); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); brelse(sbi->s_sbh); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) @@ -2109,8 +2120,8 @@ done: int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct flex_groups *new_groups; - int size; + struct flex_groups **old_groups, **new_groups; + int size, i, j; if (!sbi->s_log_groups_per_flex) return 0; @@ -2119,22 +2130,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) if (size <= sbi->s_flex_groups_allocated) return 0; - size = roundup_pow_of_two(size * sizeof(struct flex_groups)); - new_groups = ext4_kvzalloc(size, GFP_KERNEL); + new_groups = ext4_kvzalloc(roundup_pow_of_two(size * + sizeof(*sbi->s_flex_groups)), GFP_KERNEL); if (!new_groups) { - ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", - size / (int) sizeof(struct flex_groups)); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex group pointers", size); return -ENOMEM; } - - if (sbi->s_flex_groups) { - memcpy(new_groups, sbi->s_flex_groups, - (sbi->s_flex_groups_allocated * - sizeof(struct flex_groups))); - kvfree(sbi->s_flex_groups); + for (i = sbi->s_flex_groups_allocated; i < size; i++) { + new_groups[i] = ext4_kvzalloc(roundup_pow_of_two( + sizeof(struct flex_groups)), + GFP_KERNEL); + if (!new_groups[i]) { + for (j = sbi->s_flex_groups_allocated; j < i; j++) + kvfree(new_groups[j]); + kvfree(new_groups); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex groups", size); + return -ENOMEM; + } } - sbi->s_flex_groups = new_groups; - sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); + rcu_read_lock(); + old_groups = rcu_dereference(sbi->s_flex_groups); + if (old_groups) + memcpy(new_groups, old_groups, + (sbi->s_flex_groups_allocated * + sizeof(struct flex_groups *))); + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_flex_groups, new_groups); + sbi->s_flex_groups_allocated = size; + if (old_groups) + ext4_kvfree_array_rcu(old_groups); return 0; } @@ -2142,6 +2168,7 @@ static int ext4_fill_flex_info(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; + struct flex_groups *fg; ext4_group_t flex_group; int i, err; @@ -2159,12 +2186,11 @@ static int ext4_fill_flex_info(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); flex_group = ext4_flex_group(sbi, i); - atomic_add(ext4_free_inodes_count(sb, gdp), - &sbi->s_flex_groups[flex_group].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); + atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes); atomic64_add(ext4_free_group_clusters(sb, gdp), - &sbi->s_flex_groups[flex_group].free_clusters); - atomic_add(ext4_used_dirs_count(sb, gdp), - &sbi->s_flex_groups[flex_group].used_dirs); + &fg->free_clusters); + atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs); } return 1; @@ -2743,17 +2769,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#ifndef CONFIG_QUOTA - if (ext4_has_feature_quota(sb) && !readonly) { - ext4_msg(sb, KERN_ERR, - "Filesystem with quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); - return 0; - } - if (ext4_has_feature_project(sb) && !readonly) { +#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) + if (!readonly && (ext4_has_feature_quota(sb) || + ext4_has_feature_project(sb))) { ext4_msg(sb, KERN_ERR, - "Filesystem with project quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); + "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2"); return 0; } #endif /* CONFIG_QUOTA */ @@ -3342,6 +3362,40 @@ int ext4_calculate_overhead(struct super_block *sb) return 0; } +static void ext4_clamp_want_extra_isize(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + unsigned def_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + + if (sbi->s_inode_size == EXT4_GOOD_OLD_INODE_SIZE) { + sbi->s_want_extra_isize = 0; + return; + } + if (sbi->s_want_extra_isize < 4) { + sbi->s_want_extra_isize = def_extra_isize; + if (ext4_has_feature_extra_isize(sb)) { + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_want_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_want_extra_isize); + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_min_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_min_extra_isize); + } + } + /* Check if enough inode space is available */ + if ((sbi->s_want_extra_isize > sbi->s_inode_size) || + (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > + sbi->s_inode_size)) { + sbi->s_want_extra_isize = def_extra_isize; + ext4_msg(sb, KERN_INFO, + "required extra inode space not available"); + } +} + static void ext4_set_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; @@ -3375,9 +3429,10 @@ static void ext4_set_resv_clusters(struct super_block *sb) static int ext4_fill_super(struct super_block *sb, void *data, int silent) { char *orig_data = kstrdup(data, GFP_KERNEL); - struct buffer_head *bh; + struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + struct flex_groups **flex_groups; ext4_fsblk_t block; ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; @@ -3766,7 +3821,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || sbi->s_inodes_per_group > blocksize * 8) { ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", - sbi->s_blocks_per_group); + sbi->s_inodes_per_group); goto failed_mount; } sbi->s_itb_per_group = sbi->s_inodes_per_group / @@ -3897,9 +3952,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_BLOCKS_PER_GROUP(sb) - 1); do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { - ext4_msg(sb, KERN_WARNING, "groups count too large: %u " + ext4_msg(sb, KERN_WARNING, "groups count too large: %llu " "(block count %llu, first data block %u, " - "blocks per group %lu)", sbi->s_groups_count, + "blocks per group %lu)", blocks_count, ext4_blocks_count(es), le32_to_cpu(es->s_first_data_block), EXT4_BLOCKS_PER_GROUP(sb)); @@ -3927,9 +3982,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } } - sbi->s_group_desc = ext4_kvmalloc(db_count * + rcu_assign_pointer(sbi->s_group_desc, + ext4_kvmalloc(db_count * sizeof(struct buffer_head *), - GFP_KERNEL); + GFP_KERNEL)); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); ret = -ENOMEM; @@ -3939,14 +3995,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) bgl_lock_init(sbi->s_blockgroup_lock); for (i = 0; i < db_count; i++) { + struct buffer_head *bh; + block = descriptor_loc(sb, logical_sb_block, i); - sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); - if (!sbi->s_group_desc[i]) { + bh = sb_bread_unmovable(sb, block); + if (!bh) { ext4_msg(sb, KERN_ERR, "can't read group descriptor %d", i); db_count = i; goto failed_mount2; } + rcu_read_lock(); + rcu_dereference(sbi->s_group_desc)[i] = bh; + rcu_read_unlock(); } sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { @@ -4034,7 +4095,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "data=, fs mounted w/o journal"); goto failed_mount_wq; } - sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM; + sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; clear_opt(sb, JOURNAL_CHECKSUM); clear_opt(sb, DATA_FLAGS); sbi->s_journal = NULL; @@ -4156,29 +4217,7 @@ no_journal: if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) sb->s_flags |= MS_RDONLY; - /* determine the minimum size of new large inodes, if present */ - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; - if (ext4_has_feature_extra_isize(sb)) { - if (sbi->s_want_extra_isize < - le16_to_cpu(es->s_want_extra_isize)) - sbi->s_want_extra_isize = - le16_to_cpu(es->s_want_extra_isize); - if (sbi->s_want_extra_isize < - le16_to_cpu(es->s_min_extra_isize)) - sbi->s_want_extra_isize = - le16_to_cpu(es->s_min_extra_isize); - } - } - /* Check if enough inode space is available */ - if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > - sbi->s_inode_size) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; - ext4_msg(sb, KERN_INFO, "required extra inode space not" - "available"); - } + ext4_clamp_want_extra_isize(sb); ext4_set_resv_clusters(sb); @@ -4217,7 +4256,7 @@ no_journal: err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, GFP_KERNEL); if (!err) - err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); + err = percpu_init_rwsem(&sbi->s_writepages_rwsem); if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); @@ -4310,13 +4349,19 @@ failed_mount7: ext4_unregister_li_request(sb); failed_mount6: ext4_mb_release(sb); - if (sbi->s_flex_groups) - kvfree(sbi->s_flex_groups); + rcu_read_lock(); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } + rcu_read_unlock(); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); failed_mount5: ext4_ext_release(sb); ext4_release_system_zone(sb); @@ -4343,9 +4388,12 @@ failed_mount3: if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); @@ -4959,6 +5007,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + ext4_clamp_want_extra_isize(sb); + if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ test_opt(sb, JOURNAL_CHECKSUM)) { ext4_msg(sb, KERN_ERR, "changing journal_checksum " |