diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-09 19:11:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-09 19:11:44 -0700 |
commit | 64b2d1fbbfda07765dae3f601862796a61b2c451 (patch) | |
tree | 67947ede8fc007a9f0925e697a302a02bd087032 /fs/f2fs/node.c | |
parent | b1cce8032f6abe900b078d24f3c3938726528f97 (diff) | |
parent | 9ab701349247368f9d57a993b95a5bb05bb37e10 (diff) |
Merge tag 'for-f2fs-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"In this round, there is no special interesting feature, but we've
investigated a couple of tuning points with respect to the I/O flow.
Several major bug fixes and a bunch of clean-ups also have been made.
This patch-set includes the following major enhancement patches:
- enhance wait_on_page_writeback
- support SEEK_DATA and SEEK_HOLE
- enhance readahead flows
- enhance IO flushes
- support fiemap
- add some tracepoints
The other bug fixes are as follows:
- fix to support a large volume > 2TB correctly
- recovery bug fix wrt fallocated space
- fix recursive lock on xattr operations
- fix some cases on the remount flow
And, there are a bunch of cleanups"
* tag 'for-f2fs-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (52 commits)
f2fs: support f2fs_fiemap
f2fs: avoid not to call remove_dirty_inode
f2fs: recover fallocated space
f2fs: fix to recover data written by dio
f2fs: large volume support
f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages
f2fs: avoid overflow when large directory feathure is enabled
f2fs: fix recursive lock by f2fs_setxattr
MAINTAINERS: add a co-maintainer from samsung for F2FS
MAINTAINERS: change the email address for f2fs
f2fs: use inode_init_owner() to simplify codes
f2fs: avoid to use slab memory in f2fs_issue_flush for efficiency
f2fs: add a tracepoint for f2fs_read_data_page
f2fs: add a tracepoint for f2fs_write_{meta,node,data}_pages
f2fs: add a tracepoint for f2fs_write_{meta,node,data}_page
f2fs: add a tracepoint for f2fs_write_end
f2fs: add a tracepoint for f2fs_write_begin
f2fs: fix checkpatch warning
f2fs: deactivate inode page if the inode is evicted
f2fs: decrease the lock granularity during write_begin
...
Diffstat (limited to 'fs/f2fs/node.c')
-rw-r--r-- | fs/f2fs/node.c | 154 |
1 files changed, 84 insertions, 70 deletions
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 57caa6eaf47b..9dfb9a042fd2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -26,20 +26,26 @@ static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; -static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type) +bool available_free_memory(struct f2fs_sb_info *sbi, int type) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; unsigned long mem_size = 0; + bool res = false; si_meminfo(&val); - if (type == FREE_NIDS) - mem_size = nm_i->fcnt * sizeof(struct free_nid); - else if (type == NAT_ENTRIES) - mem_size += nm_i->nat_cnt * sizeof(struct nat_entry); - mem_size >>= 12; - - /* give 50:50 memory for free nids and nat caches respectively */ - return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11)); + /* give 25%, 25%, 50% memory for each components respectively */ + if (type == FREE_NIDS) { + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12; + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + } else if (type == NAT_ENTRIES) { + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + } else if (type == DIRTY_DENTS) { + mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); + } + return res; } static void clear_node_page_dirty(struct page *page) @@ -147,6 +153,18 @@ bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) return fsync_done; } +void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; + + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e) + e->fsync_done = false; + write_unlock(&nm_i->nat_tree_lock); +} + static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) { struct nat_entry *new; @@ -179,9 +197,7 @@ retry: write_unlock(&nm_i->nat_tree_lock); goto retry; } - nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); - nat_set_ino(e, le32_to_cpu(ne->ino)); - nat_set_version(e, ne->version); + node_info_from_raw_nat(&e->ni, ne); } write_unlock(&nm_i->nat_tree_lock); } @@ -243,7 +259,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); - if (available_free_memory(nm_i, NAT_ENTRIES)) + if (available_free_memory(sbi, NAT_ENTRIES)) return 0; write_lock(&nm_i->nat_tree_lock); @@ -849,8 +865,7 @@ struct page *new_node_page(struct dnode_of_data *dn, if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return ERR_PTR(-EPERM); - page = grab_cache_page_write_begin(NODE_MAPPING(sbi), - dn->nid, AOP_FLAG_NOFS); + page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); if (!page) return ERR_PTR(-ENOMEM); @@ -867,6 +882,7 @@ struct page *new_node_page(struct dnode_of_data *dn, new_ni.ino = dn->inode->i_ino; set_node_addr(sbi, &new_ni, NEW_ADDR, false); + f2fs_wait_on_page_writeback(page, NODE); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(dn->inode, page); SetPageUptodate(page); @@ -946,8 +962,7 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) struct page *page; int err; repeat: - page = grab_cache_page_write_begin(NODE_MAPPING(sbi), - nid, AOP_FLAG_NOFS); + page = grab_cache_page(NODE_MAPPING(sbi), nid); if (!page) return ERR_PTR(-ENOMEM); @@ -1194,6 +1209,8 @@ static int f2fs_write_node_page(struct page *page, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, }; + trace_f2fs_writepage(page, NODE); + if (unlikely(sbi->por_doing)) goto redirty_out; @@ -1225,10 +1242,7 @@ static int f2fs_write_node_page(struct page *page, return 0; redirty_out: - dec_page_count(sbi, F2FS_DIRTY_NODES); - wbc->pages_skipped++; - account_page_redirty(page); - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } @@ -1238,6 +1252,8 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); long diff; + trace_f2fs_writepages(mapping->host, wbc, NODE); + /* balancing f2fs's metadata in background */ f2fs_balance_fs_bg(sbi); @@ -1313,13 +1329,14 @@ static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i, radix_tree_delete(&nm_i->free_nid_root, i->nid); } -static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) +static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; struct nat_entry *ne; bool allocated = false; - if (!available_free_memory(nm_i, FREE_NIDS)) + if (!available_free_memory(sbi, FREE_NIDS)) return -1; /* 0 nid should not be used */ @@ -1372,9 +1389,10 @@ static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) kmem_cache_free(free_nid_slab, i); } -static void scan_nat_page(struct f2fs_nm_info *nm_i, +static void scan_nat_page(struct f2fs_sb_info *sbi, struct page *nat_page, nid_t start_nid) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct f2fs_nat_block *nat_blk = page_address(nat_page); block_t blk_addr; int i; @@ -1389,7 +1407,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i, blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); f2fs_bug_on(blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) { - if (add_free_nid(nm_i, start_nid, true) < 0) + if (add_free_nid(sbi, start_nid, true) < 0) break; } } @@ -1413,7 +1431,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) while (1) { struct page *page = get_current_nat_page(sbi, nid); - scan_nat_page(nm_i, page, nid); + scan_nat_page(sbi, page, nid); f2fs_put_page(page, 1); nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); @@ -1433,7 +1451,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); nid = le32_to_cpu(nid_in_journal(sum, i)); if (addr == NULL_ADDR) - add_free_nid(nm_i, nid, true); + add_free_nid(sbi, nid, true); else remove_free_nid(nm_i, nid); } @@ -1450,7 +1468,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i = NULL; retry: - if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) + if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) return false; spin_lock(&nm_i->free_nid_list_lock); @@ -1510,7 +1528,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); f2fs_bug_on(!i || i->state != NID_ALLOC); - if (!available_free_memory(nm_i, FREE_NIDS)) { + if (!available_free_memory(sbi, FREE_NIDS)) { __del_from_free_nid_list(nm_i, i); need_free = true; } else { @@ -1532,7 +1550,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, clear_node_page_dirty(page); } -void recover_inline_xattr(struct inode *inode, struct page *page) +static void recover_inline_xattr(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); void *src_addr, *dst_addr; @@ -1557,6 +1575,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page) src_addr = inline_xattr_addr(page); inline_size = inline_xattr_size(inode); + f2fs_wait_on_page_writeback(ipage, NODE); memcpy(dst_addr, src_addr, inline_size); update_inode(inode, ipage); @@ -1612,6 +1631,11 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) struct node_info old_ni, new_ni; struct page *ipage; + get_node_info(sbi, ino, &old_ni); + + if (unlikely(old_ni.blk_addr != NULL_ADDR)) + return -EINVAL; + ipage = grab_cache_page(NODE_MAPPING(sbi), ino); if (!ipage) return -ENOMEM; @@ -1619,7 +1643,6 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* Should not use this inode from free nid list */ remove_free_nid(NM_I(sbi), ino); - get_node_info(sbi, ino, &old_ni); SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); @@ -1645,35 +1668,29 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-readed pages are linked in pages list. + * these pre-readed pages are alloced in bd_inode's mapping tree. */ -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, int start, int nrpages) { - struct page *page; - int page_idx = start; + struct inode *inode = sbi->sb->s_bdev->bd_inode; + struct address_space *mapping = inode->i_mapping; + int i, page_idx = start; struct f2fs_io_info fio = { .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO }; - for (; page_idx < start + nrpages; page_idx++) { - /* alloc temporal page for read node summary info*/ - page = alloc_page(GFP_F2FS_ZERO); - if (!page) + for (i = 0; page_idx < start + nrpages; page_idx++, i++) { + /* alloc page in bd_inode for reading node summary info */ + pages[i] = grab_cache_page(mapping, page_idx); + if (!pages[i]) break; - - lock_page(page); - page->index = page_idx; - list_add_tail(&page->lru, pages); + f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio); } - list_for_each_entry(page, pages, lru) - f2fs_submit_page_mbio(sbi, page, page->index, &fio); - f2fs_submit_merged_bio(sbi, META, READ); - - return page_idx - start; + return i; } int restore_node_summary(struct f2fs_sb_info *sbi, @@ -1681,11 +1698,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi, { struct f2fs_node *rn; struct f2fs_summary *sum_entry; - struct page *page, *tmp; + struct inode *inode = sbi->sb->s_bdev->bd_inode; block_t addr; int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - int i, last_offset, nrpages, err = 0; - LIST_HEAD(page_list); + struct page *pages[bio_blocks]; + int i, idx, last_offset, nrpages, err = 0; /* scan the node segment */ last_offset = sbi->blocks_per_seg; @@ -1696,29 +1713,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi, nrpages = min(last_offset - i, bio_blocks); /* read ahead node pages */ - nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages); + nrpages = ra_sum_pages(sbi, pages, addr, nrpages); if (!nrpages) return -ENOMEM; - list_for_each_entry_safe(page, tmp, &page_list, lru) { + for (idx = 0; idx < nrpages; idx++) { if (err) goto skip; - lock_page(page); - if (unlikely(!PageUptodate(page))) { + lock_page(pages[idx]); + if (unlikely(!PageUptodate(pages[idx]))) { err = -EIO; } else { - rn = F2FS_NODE(page); + rn = F2FS_NODE(pages[idx]); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; sum_entry++; } - unlock_page(page); + unlock_page(pages[idx]); skip: - list_del(&page->lru); - __free_pages(page, 0); + page_cache_release(pages[idx]); } + + invalidate_mapping_pages(inode->i_mapping, addr, + addr + nrpages); } return err; } @@ -1756,9 +1775,7 @@ retry: write_unlock(&nm_i->nat_tree_lock); goto retry; } - nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); - nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); - nat_set_version(ne, raw_ne.version); + node_info_from_raw_nat(&ne->ni, &raw_ne); __set_nat_cache_dirty(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); } @@ -1791,7 +1808,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) nid_t nid; struct f2fs_nat_entry raw_ne; int offset = -1; - block_t new_blkaddr; if (nat_get_blkaddr(ne) == NEW_ADDR) continue; @@ -1827,11 +1843,7 @@ to_nat_page: f2fs_bug_on(!nat_blk); raw_ne = nat_blk->entries[nid - start_nid]; flush_now: - new_blkaddr = nat_get_blkaddr(ne); - - raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); - raw_ne.block_addr = cpu_to_le32(new_blkaddr); - raw_ne.version = nat_get_version(ne); + raw_nat_from_node_info(&raw_ne, &ne->ni); if (offset < 0) { nat_blk->entries[nid - start_nid] = raw_ne; @@ -1841,7 +1853,7 @@ flush_now: } if (nat_get_blkaddr(ne) == NULL_ADDR && - add_free_nid(NM_I(sbi), nid, false) <= 0) { + add_free_nid(sbi, nid, false) <= 0) { write_lock(&nm_i->nat_tree_lock); __del_from_nat_cache(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); @@ -1869,8 +1881,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; + /* not used nids: 0, node, meta, (and root counted as valid node) */ - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3; + nm_i->available_nids = nm_i->max_nid - 3; nm_i->fcnt = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; |