diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/ext4.h | 11 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 3 | ||||
-rw-r--r-- | fs/ext4/extents.c | 70 | ||||
-rw-r--r-- | fs/ext4/inode.c | 150 | ||||
-rw-r--r-- | fs/ext4/super.c | 1 |
5 files changed, 153 insertions, 82 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 56f9271ee8cc..874d169a193e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -361,14 +361,11 @@ struct ext4_new_group_data { so set the magic i_delalloc_reserve_flag after taking the inode allocation semaphore for */ #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 - /* Call ext4_da_update_reserve_space() after successfully - allocating the blocks */ -#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008 /* caller is from the direct IO path, request to creation of an unitialized extents if not allocated, split the uninitialized extent if blocks has been preallocated already*/ -#define EXT4_GET_BLOCKS_DIO 0x0010 -#define EXT4_GET_BLOCKS_CONVERT 0x0020 +#define EXT4_GET_BLOCKS_DIO 0x0008 +#define EXT4_GET_BLOCKS_CONVERT 0x0010 #define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) /* Convert extent to initialized after direct IO complete */ @@ -699,6 +696,8 @@ struct ext4_inode_info { unsigned int i_reserved_meta_blocks; unsigned int i_allocated_meta_blocks; unsigned short i_delalloc_reserved_flag; + sector_t i_da_metadata_calc_last_lblock; + int i_da_metadata_calc_len; /* on-disk additional length */ __u16 i_extra_isize; @@ -1441,6 +1440,8 @@ extern int ext4_block_truncate_page(handle_t *handle, extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern int flush_aio_dio_completed_IO(struct inode *inode); +extern void ext4_da_update_reserve_space(struct inode *inode, + int used, int quota_claim); /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 2ca686454e87..bdb6ce7e2eb4 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext) ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); } -extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); +extern int ext4_ext_calc_metadata_amount(struct inode *inode, + sector_t lblocks); extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 91ae46098ea4..765a4826b118 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) * to allocate @blocks * Worse case is one block per extent */ -int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) +int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) { - int lcap, icap, rcap, leafs, idxs, num; - int newextents = blocks; - - rcap = ext4_ext_space_root_idx(inode, 0); - lcap = ext4_ext_space_block(inode, 0); - icap = ext4_ext_space_block_idx(inode, 0); + struct ext4_inode_info *ei = EXT4_I(inode); + int idxs, num = 0; - /* number of new leaf blocks needed */ - num = leafs = (newextents + lcap - 1) / lcap; + idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) + / sizeof(struct ext4_extent_idx)); /* - * Worse case, we need separate index block(s) - * to link all new leaf blocks + * If the new delayed allocation block is contiguous with the + * previous da block, it can share index blocks with the + * previous block, so we only need to allocate a new index + * block every idxs leaf blocks. At ldxs**2 blocks, we need + * an additional index block, and at ldxs**3 blocks, yet + * another index blocks. */ - idxs = (leafs + icap - 1) / icap; - do { - num += idxs; - idxs = (idxs + icap - 1) / icap; - } while (idxs > rcap); + if (ei->i_da_metadata_calc_len && + ei->i_da_metadata_calc_last_lblock+1 == lblock) { + if ((ei->i_da_metadata_calc_len % idxs) == 0) + num++; + if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) + num++; + if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { + num++; + ei->i_da_metadata_calc_len = 0; + } else + ei->i_da_metadata_calc_len++; + ei->i_da_metadata_calc_last_lblock++; + return num; + } - return num; + /* + * In the worst case we need a new set of index blocks at + * every level of the inode's extent tree. + */ + ei->i_da_metadata_calc_len = 1; + ei->i_da_metadata_calc_last_lblock = lblock; + return ext_depth(inode) + 1; } static int @@ -3117,7 +3132,19 @@ out: unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, newblock + max_blocks, allocated - max_blocks); + allocated = max_blocks; } + + /* + * If we have done fallocate with the offset that is already + * delayed allocated, we would have block reservation + * and quota reservation done in the delayed write path. + * But fallocate would have already updated quota and block + * count for this offset. So cancel these reservation + */ + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + ext4_da_update_reserve_space(inode, allocated, 0); + map_out: set_buffer_mapped(bh_result); out1: @@ -3353,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* previous routine could use block we allocated */ newblock = ext_pblock(&newex); allocated = ext4_ext_get_actual_len(&newex); + if (allocated > max_blocks) + allocated = max_blocks; set_buffer_new(bh_result); /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. + */ + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + ext4_da_update_reserve_space(inode, allocated, 1); + + /* * Cache the extent and update transaction to commit on fdatasync only * when it is _not_ an uninitialized extent. */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 84eeb8f515a3..e11952404e02 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1009,49 +1009,56 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) return &EXT4_I(inode)->i_reserved_quota; } #endif + /* * Calculate the number of metadata blocks need to reserve - * to allocate @blocks for non extent file based file + * to allocate a new block at @lblocks for non extent file based file */ -static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) +static int ext4_indirect_calc_metadata_amount(struct inode *inode, + sector_t lblock) { - int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); - int ind_blks, dind_blks, tind_blks; - - /* number of new indirect blocks needed */ - ind_blks = (blocks + icap - 1) / icap; + struct ext4_inode_info *ei = EXT4_I(inode); + int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1; + int blk_bits; - dind_blks = (ind_blks + icap - 1) / icap; + if (lblock < EXT4_NDIR_BLOCKS) + return 0; - tind_blks = 1; + lblock -= EXT4_NDIR_BLOCKS; - return ind_blks + dind_blks + tind_blks; + if (ei->i_da_metadata_calc_len && + (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { + ei->i_da_metadata_calc_len++; + return 0; + } + ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; + ei->i_da_metadata_calc_len = 1; + blk_bits = roundup_pow_of_two(lblock + 1); + return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; } /* * Calculate the number of metadata blocks need to reserve - * to allocate given number of blocks + * to allocate a block located at @lblock */ -static int ext4_calc_metadata_amount(struct inode *inode, int blocks) +static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) { - if (!blocks) - return 0; - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) - return ext4_ext_calc_metadata_amount(inode, blocks); + return ext4_ext_calc_metadata_amount(inode, lblock); - return ext4_indirect_calc_metadata_amount(inode, blocks); + return ext4_indirect_calc_metadata_amount(inode, lblock); } /* * Called with i_data_sem down, which is important since we can call * ext4_discard_preallocations() from here. */ -static void ext4_da_update_reserve_space(struct inode *inode, int used) +void ext4_da_update_reserve_space(struct inode *inode, + int used, int quota_claim) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); - int mdb_free = 0; + int mdb_free = 0, allocated_meta_blocks = 0; spin_lock(&ei->i_block_reservation_lock); if (unlikely(used > ei->i_reserved_data_blocks)) { @@ -1067,6 +1074,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) ei->i_reserved_data_blocks -= used; used += ei->i_allocated_meta_blocks; ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; + allocated_meta_blocks = ei->i_allocated_meta_blocks; ei->i_allocated_meta_blocks = 0; percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); @@ -1076,16 +1084,31 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) * only when we have written all of the delayed * allocation blocks. */ - mdb_free = ei->i_allocated_meta_blocks; + mdb_free = ei->i_reserved_meta_blocks; + ei->i_reserved_meta_blocks = 0; + ei->i_da_metadata_calc_len = 0; percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); - ei->i_allocated_meta_blocks = 0; } spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); /* Update quota subsystem */ - vfs_dq_claim_block(inode, used); - if (mdb_free) - vfs_dq_release_reservation_block(inode, mdb_free); + if (quota_claim) { + vfs_dq_claim_block(inode, used); + if (mdb_free) + vfs_dq_release_reservation_block(inode, mdb_free); + } else { + /* + * We did fallocate with an offset that is already delayed + * allocated. So on delayed allocated writeback we should + * not update the quota for allocated blocks. But then + * converting an fallocate region to initialized region would + * have caused a metadata allocation. So claim quota for + * that + */ + if (allocated_meta_blocks) + vfs_dq_claim_block(inode, allocated_meta_blocks); + vfs_dq_release_reservation_block(inode, mdb_free + used); + } /* * If we have done all the pending block allocations and if @@ -1285,18 +1308,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, */ EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; } - } + /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. We don't + * support fallocate for non extent files. So we can update + * reserve space here. + */ + if ((retval > 0) && + (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) + ext4_da_update_reserve_space(inode, retval, 1); + } if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) EXT4_I(inode)->i_delalloc_reserved_flag = 0; - /* - * Update reserved blocks/metadata blocks after successful - * block allocation which had been deferred till now. - */ - if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) - ext4_da_update_reserve_space(inode, retval); - up_write((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && buffer_mapped(bh)) { int ret = check_block_validity(inode, "file system " @@ -1802,12 +1827,15 @@ static int ext4_journalled_write_end(struct file *file, return ret ? ret : copied; } -static int ext4_da_reserve_space(struct inode *inode, int nrblocks) +/* + * Reserve a single block located at lblock + */ +static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) { int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); - unsigned long md_needed, md_reserved, total = 0; + unsigned long md_needed, md_reserved; /* * recalculate the amount of metadata blocks to reserve @@ -1817,8 +1845,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) repeat: spin_lock(&ei->i_block_reservation_lock); md_reserved = ei->i_reserved_meta_blocks; - md_needed = ext4_calc_metadata_amount(inode, nrblocks); - total = md_needed + nrblocks; + md_needed = ext4_calc_metadata_amount(inode, lblock); spin_unlock(&ei->i_block_reservation_lock); /* @@ -1826,31 +1853,19 @@ repeat: * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, total)) { - /* - * We tend to badly over-estimate the amount of - * metadata blocks which are needed, so if we have - * reserved any metadata blocks, try to force out the - * inode and see if we have any better luck. - */ - if (md_reserved && retries++ <= 3) - goto retry; + if (vfs_dq_reserve_block(inode, md_needed + 1)) return -EDQUOT; - } - if (ext4_claim_free_blocks(sbi, total)) { - vfs_dq_release_reservation_block(inode, total); + if (ext4_claim_free_blocks(sbi, md_needed + 1)) { + vfs_dq_release_reservation_block(inode, md_needed + 1); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - retry: - if (md_reserved) - write_inode_now(inode, (retries == 3)); yield(); goto repeat; } return -ENOSPC; } spin_lock(&ei->i_block_reservation_lock); - ei->i_reserved_data_blocks += nrblocks; + ei->i_reserved_data_blocks++; ei->i_reserved_meta_blocks += md_needed; spin_unlock(&ei->i_block_reservation_lock); @@ -1889,8 +1904,9 @@ static void ext4_da_release_space(struct inode *inode, int to_free) * only when we have written all of the delayed * allocation blocks. */ - to_free += ei->i_allocated_meta_blocks; - ei->i_allocated_meta_blocks = 0; + to_free += ei->i_reserved_meta_blocks; + ei->i_reserved_meta_blocks = 0; + ei->i_da_metadata_calc_len = 0; } /* update fs dirty blocks counter */ @@ -2203,10 +2219,10 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) * variables are updated after the blocks have been allocated. */ new.b_state = 0; - get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | - EXT4_GET_BLOCKS_DELALLOC_RESERVE); + get_blocks_flags = EXT4_GET_BLOCKS_CREATE; if (mpd->b_state & (1 << BH_Delay)) - get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; + get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; + blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, &new, get_blocks_flags); if (blks < 0) { @@ -2504,7 +2520,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, * XXX: __block_prepare_write() unmaps passed block, * is it OK? */ - ret = ext4_da_reserve_space(inode, 1); + ret = ext4_da_reserve_space(inode, iblock); if (ret) /* not enough space to reserve */ return ret; @@ -3022,7 +3038,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - int ret, retries = 0; + int ret, retries = 0, quota_retries = 0; struct page *page; pgoff_t index; unsigned from, to; @@ -3081,6 +3097,22 @@ retry: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; + + if ((ret == -EDQUOT) && + EXT4_I(inode)->i_reserved_meta_blocks && + (quota_retries++ < 3)) { + /* + * Since we often over-estimate the number of meta + * data blocks required, we may sometimes get a + * spurios out of quota error even though there would + * be enough space once we write the data blocks and + * find out how many meta data blocks were _really_ + * required. So try forcing the inode write to see if + * that helps. + */ + write_inode_now(inode, (quota_retries == 3)); + goto retry; + } out: return ret; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7cccb35c0f4d..735c20d5fd56 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_reserved_data_blocks = 0; ei->i_reserved_meta_blocks = 0; ei->i_allocated_meta_blocks = 0; + ei->i_da_metadata_calc_len = 0; ei->i_delalloc_reserved_flag = 0; spin_lock_init(&(ei->i_block_reservation_lock)); #ifdef CONFIG_QUOTA |