diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-28 10:44:50 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-28 10:44:50 -0700 |
commit | f793f2961170c0b49c1650e69e7825484159ce62 (patch) | |
tree | 06d27973f9db1080c1460f32155ce2baf610c3d4 /fs/gfs2/file.c | |
parent | dabcbb1bae0f55378060b285062b20f6ec648c6a (diff) | |
parent | b99b98dc2673a123a73068f16720232d7be7e669 (diff) |
Merge http://sucs.org/~rohan/git/gfs2-3.0-nmw
* http://sucs.org/~rohan/git/gfs2-3.0-nmw: (24 commits)
GFS2: Move readahead of metadata during deallocation into its own function
GFS2: Remove two unused variables
GFS2: Misc fixes
GFS2: rewrite fallocate code to write blocks directly
GFS2: speed up delete/unlink performance for large files
GFS2: Fix off-by-one in gfs2_blk2rgrpd
GFS2: Clean up ->page_mkwrite
GFS2: Correctly set goal block after allocation
GFS2: Fix AIL flush issue during fsync
GFS2: Use cached rgrp in gfs2_rlist_add()
GFS2: Call do_strip() directly from recursive_scan()
GFS2: Remove obsolete assert
GFS2: Cache the most recently used resource group in the inode
GFS2: Make resource groups "append only" during life of fs
GFS2: Use rbtree for resource groups and clean up bitmap buffer ref count scheme
GFS2: Fix lseek after SEEK_DATA, SEEK_HOLE have been added
GFS2: Clean up gfs2_create
GFS2: Use ->dirty_inode()
GFS2: Fix bug trap and journaled data fsync
GFS2: Fix inode allocation error path
...
Diffstat (limited to 'fs/gfs2/file.c')
-rw-r--r-- | fs/gfs2/file.c | 295 |
1 files changed, 110 insertions, 185 deletions
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index edeb9e802903..5002408dabea 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -59,15 +59,24 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) struct gfs2_holder i_gh; loff_t error; - if (origin == 2) { + switch (origin) { + case SEEK_END: /* These reference inode->i_size */ + case SEEK_DATA: + case SEEK_HOLE: error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (!error) { error = generic_file_llseek_unlocked(file, offset, origin); gfs2_glock_dq_uninit(&i_gh); } - } else + break; + case SEEK_CUR: + case SEEK_SET: error = generic_file_llseek_unlocked(file, offset, origin); + break; + default: + error = -EINVAL; + } return error; } @@ -357,8 +366,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned int data_blocks, ind_blocks, rblocks; struct gfs2_holder gh; struct gfs2_alloc *al; + loff_t size; int ret; + /* Wait if fs is frozen. This is racy so we check again later on + * and retry if the fs has been frozen after the page lock has + * been acquired + */ + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) @@ -367,8 +383,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); - if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) + if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) { + lock_page(page); + if (!PageUptodate(page) || page->mapping != inode->i_mapping) { + ret = -EAGAIN; + unlock_page(page); + } goto out_unlock; + } + ret = -ENOMEM; al = gfs2_alloc_get(ip); if (al == NULL) @@ -388,7 +411,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; - rblocks += gfs2_rg_blocks(al); + rblocks += gfs2_rg_blocks(ip); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) @@ -396,21 +419,29 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) lock_page(page); ret = -EINVAL; - last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT; - if (page->index > last_index) - goto out_unlock_page; + size = i_size_read(inode); + last_index = (size - 1) >> PAGE_CACHE_SHIFT; + /* Check page index against inode size */ + if (size == 0 || (page->index > last_index)) + goto out_trans_end; + + ret = -EAGAIN; + /* If truncated, we must retry the operation, we may have raced + * with the glock demotion code. + */ + if (!PageUptodate(page) || page->mapping != inode->i_mapping) + goto out_trans_end; + + /* Unstuff, if required, and allocate backing blocks for page */ ret = 0; - if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping) - goto out_unlock_page; - if (gfs2_is_stuffed(ip)) { + if (gfs2_is_stuffed(ip)) ret = gfs2_unstuff_dinode(ip, page); - if (ret) - goto out_unlock_page; - } - ret = gfs2_allocate_page_backing(page); + if (ret == 0) + ret = gfs2_allocate_page_backing(page); -out_unlock_page: - unlock_page(page); +out_trans_end: + if (ret) + unlock_page(page); gfs2_trans_end(sdp); out_trans_fail: gfs2_inplace_release(ip); @@ -422,11 +453,17 @@ out_unlock: gfs2_glock_dq(&gh); out: gfs2_holder_uninit(&gh); - if (ret == -ENOMEM) - ret = VM_FAULT_OOM; - else if (ret) - ret = VM_FAULT_SIGBUS; - return ret; + if (ret == 0) { + set_page_dirty(page); + /* This check must be post dropping of transaction lock */ + if (inode->i_sb->s_frozen == SB_UNFROZEN) { + wait_on_page_writeback(page); + } else { + ret = -EAGAIN; + unlock_page(page); + } + } + return block_page_mkwrite_return(ret); } static const struct vm_operations_struct gfs2_vm_ops = { @@ -551,8 +588,16 @@ static int gfs2_close(struct inode *inode, struct file *file) * @end: the end position in the file to sync * @datasync: set if we can ignore timestamp changes * - * The VFS will flush data for us. We only need to worry - * about metadata here. + * We split the data flushing here so that we don't wait for the data + * until after we've also sent the metadata to disk. Note that for + * data=ordered, we will write & wait for the data at the log flush + * stage anyway, so this is unlikely to make much of a difference + * except in the data=writeback case. + * + * If the fdatawrite fails due to any reason except -EIO, we will + * continue the remainder of the fsync, although we'll still report + * the error at the end. This is to match filemap_write_and_wait_range() + * behaviour. * * Returns: errno */ @@ -560,30 +605,34 @@ static int gfs2_close(struct inode *inode, struct file *file) static int gfs2_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct inode *inode = file->f_mapping->host; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); struct gfs2_inode *ip = GFS2_I(inode); - int ret; + int ret, ret1 = 0; - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret) - return ret; - mutex_lock(&inode->i_mutex); + if (mapping->nrpages) { + ret1 = filemap_fdatawrite_range(mapping, start, end); + if (ret1 == -EIO) + return ret1; + } if (datasync) sync_state &= ~I_DIRTY_SYNC; if (sync_state) { ret = sync_inode_metadata(inode, 1); - if (ret) { - mutex_unlock(&inode->i_mutex); + if (ret) return ret; - } - gfs2_ail_flush(ip->i_gl); + if (gfs2_is_jdata(ip)) + filemap_write_and_wait(mapping); + gfs2_ail_flush(ip->i_gl, 1); } - mutex_unlock(&inode->i_mutex); - return 0; + if (mapping->nrpages) + ret = filemap_fdatawait_range(mapping, start, end); + + return ret ? ret : ret1; } /** @@ -620,135 +669,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return generic_file_aio_write(iocb, iov, nr_segs, pos); } -static int empty_write_end(struct page *page, unsigned from, - unsigned to, int mode) -{ - struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - struct buffer_head *bh; - unsigned offset, blksize = 1 << inode->i_blkbits; - pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT; - - zero_user(page, from, to-from); - mark_page_accessed(page); - - if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) { - if (!gfs2_is_writeback(ip)) - gfs2_page_add_databufs(ip, page, from, to); - - block_commit_write(page, from, to); - return 0; - } - - offset = 0; - bh = page_buffers(page); - while (offset < to) { - if (offset >= from) { - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - clear_buffer_new(bh); - write_dirty_buffer(bh, WRITE); - } - offset += blksize; - bh = bh->b_this_page; - } - - offset = 0; - bh = page_buffers(page); - while (offset < to) { - if (offset >= from) { - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - return -EIO; - } - offset += blksize; - bh = bh->b_this_page; - } - return 0; -} - -static int needs_empty_write(sector_t block, struct inode *inode) -{ - int error; - struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; - - bh_map.b_size = 1 << inode->i_blkbits; - error = gfs2_block_map(inode, block, &bh_map, 0); - if (unlikely(error)) - return error; - return !buffer_mapped(&bh_map); -} - -static int write_empty_blocks(struct page *page, unsigned from, unsigned to, - int mode) -{ - struct inode *inode = page->mapping->host; - unsigned start, end, next, blksize; - sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - int ret; - - blksize = 1 << inode->i_blkbits; - next = end = 0; - while (next < from) { - next += blksize; - block++; - } - start = next; - do { - next += blksize; - ret = needs_empty_write(block, inode); - if (unlikely(ret < 0)) - return ret; - if (ret == 0) { - if (end) { - ret = __block_write_begin(page, start, end - start, - gfs2_block_map); - if (unlikely(ret)) - return ret; - ret = empty_write_end(page, start, end, mode); - if (unlikely(ret)) - return ret; - end = 0; - } - start = next; - } - else - end = next; - block++; - } while (next < to); - - if (end) { - ret = __block_write_begin(page, start, end - start, gfs2_block_map); - if (unlikely(ret)) - return ret; - ret = empty_write_end(page, start, end, mode); - if (unlikely(ret)) - return ret; - } - - return 0; -} - static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, int mode) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *dibh; int error; - u64 start = offset >> PAGE_CACHE_SHIFT; - unsigned int start_offset = offset & ~PAGE_CACHE_MASK; - u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; - pgoff_t curr; - struct page *page; - unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; - unsigned int from, to; - - if (!end_offset) - end_offset = PAGE_CACHE_SIZE; + unsigned int nr_blks; + sector_t lblock = offset >> inode->i_blkbits; error = gfs2_meta_inode_buffer(ip, &dibh); if (unlikely(error)) - goto out; + return error; gfs2_trans_add_bh(ip->i_gl, dibh, 1); @@ -758,40 +690,31 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, goto out; } - curr = start; - offset = start << PAGE_CACHE_SHIFT; - from = start_offset; - to = PAGE_CACHE_SIZE; - while (curr <= end) { - page = grab_cache_page_write_begin(inode->i_mapping, curr, - AOP_FLAG_NOFS); - if (unlikely(!page)) { - error = -ENOMEM; - goto out; - } + while (len) { + struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; + bh_map.b_size = len; + set_buffer_zeronew(&bh_map); - if (curr == end) - to = end_offset; - error = write_empty_blocks(page, from, to, mode); - if (!error && offset + to > inode->i_size && - !(mode & FALLOC_FL_KEEP_SIZE)) { - i_size_write(inode, offset + to); - } - unlock_page(page); - page_cache_release(page); - if (error) + error = gfs2_block_map(inode, lblock, &bh_map, 1); + if (unlikely(error)) goto out; - curr++; - offset += PAGE_CACHE_SIZE; - from = 0; + len -= bh_map.b_size; + nr_blks = bh_map.b_size >> inode->i_blkbits; + lblock += nr_blks; + if (!buffer_new(&bh_map)) + continue; + if (unlikely(!buffer_zeronew(&bh_map))) { + error = -EIO; + goto out; + } } + if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) + i_size_write(inode, offset + len); - gfs2_dinode_out(ip, dibh->b_data); mark_inode_dirty(inode); - brelse(dibh); - out: + brelse(dibh); return error; } @@ -799,7 +722,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, unsigned int *data_blocks, unsigned int *ind_blocks) { const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; + unsigned int max_blocks = ip->i_rgd->rd_free_clone; unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); for (tmp = max_data; tmp > sdp->sd_diptrs;) { @@ -831,6 +754,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, int error; loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; + loff_t max_chunk_size = UINT_MAX & bsize_mask; next = (next + 1) << sdp->sd_sb.sb_bsize_shift; /* We only support the FALLOC_FL_KEEP_SIZE mode */ @@ -884,11 +808,12 @@ retry: goto out_qunlock; } max_bytes = bytes; - calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); + calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, + &max_bytes, &data_blocks, &ind_blocks); al->al_requested = data_blocks + ind_blocks; rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + - RES_RG_HDR + gfs2_rg_blocks(al); + RES_RG_HDR + gfs2_rg_blocks(ip); if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; |