From 91942321e4c9f8460f260cdfcf0a7a48a73a84a4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 10:13:22 -0700 Subject: f2fs: use inode pointer for {set, clear}_inode_flag This patch refactors to use inode pointer for set_inode_flag and clear_inode_flag. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2e6f537a0e7d..77dc929715cf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -241,7 +241,7 @@ void drop_inmem_pages(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_ATOMIC_FILE); mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); -- cgit v1.2.3 From 0a87f664d1ad29cc4712303d2142fe386368f07d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 23 May 2016 12:04:56 -0700 Subject: f2fs: detect congestion of flush command issues If flush commands do not incur any congestion, we don't need to throw that to dispatching queue which causes unnecessary latency. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 77dc929715cf..34a9159cf5ac 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -433,24 +433,28 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) if (test_opt(sbi, NOBARRIER)) return 0; - if (!test_opt(sbi, FLUSH_MERGE)) { + if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) { struct bio *bio = f2fs_bio_alloc(0); int ret; + atomic_inc(&fcc->submit_flush); bio->bi_bdev = sbi->sb->s_bdev; ret = submit_bio_wait(WRITE_FLUSH, bio); + atomic_dec(&fcc->submit_flush); bio_put(bio); return ret; } init_completion(&cmd.wait); + atomic_inc(&fcc->submit_flush); llist_add(&cmd.llnode, &fcc->issue_list); if (!fcc->dispatch_list) wake_up(&fcc->flush_wait_queue); wait_for_completion(&cmd.wait); + atomic_dec(&fcc->submit_flush); return cmd.ret; } @@ -464,6 +468,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; + atomic_set(&fcc->submit_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->cmd_control_info = fcc; -- cgit v1.2.3 From e589c2c477b44e06754508a4e8b883e5ae7294aa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 2 Jun 2016 15:24:24 -0700 Subject: f2fs: control not to exceed # of cached nat entries This is to avoid cache entry management overhead including radix tree. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 34a9159cf5ac..9011bffd1dd0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -345,6 +345,11 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (!need) return; + + /* balance_fs_bg is able to be pending */ + if (excess_cached_nats(sbi)) + f2fs_balance_fs_bg(sbi); + /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. -- cgit v1.2.3 From 7dfeaa32204841aade36ba243a1cb45c54f42c15 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 4 Jun 2016 14:21:28 -0700 Subject: f2fs: avoid reverse IO order for NODE and DATA There is a data race between allocate_data_block() and f2fs_sbumit_page_mbio(), which incur unnecessary reversed bio submission. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9011bffd1dd0..7b58bfbd84a3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1399,11 +1399,17 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio->page, fio->type); + if (fio->type == NODE || fio->type == DATA) + mutex_lock(&fio->sbi->wio_mutex[fio->type]); + allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type); /* writeout dirty page into bdev */ f2fs_submit_page_mbio(fio); + + if (fio->type == NODE || fio->type == DATA) + mutex_unlock(&fio->sbi->wio_mutex[fio->type]); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) -- cgit v1.2.3 From 19a5f5e2ef37f032efd840ada257bce2e91c8066 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 4 Jun 2016 14:25:24 -0700 Subject: f2fs: drop any block plugging In f2fs, we don't need to keep block plugging for NODE and DATA writes, since we already merged bios as much as possible. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7b58bfbd84a3..eff046a792ad 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -379,13 +379,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) excess_prefree_segs(sbi) || excess_dirty_nats(sbi) || (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) { - if (test_opt(sbi, DATA_FLUSH)) { - struct blk_plug plug; - - blk_start_plug(&plug); + if (test_opt(sbi, DATA_FLUSH)) sync_dirty_inodes(sbi, FILE_INODE); - blk_finish_plug(&plug); - } f2fs_sync_fs(sbi->sb, true); stat_inc_bg_cp_count(sbi->stat_info); } -- cgit v1.2.3 From 36abef4e796d382e81a0c2d21ea5327481dd7154 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 3 Jun 2016 19:29:38 -0700 Subject: f2fs: introduce mode=lfs mount option This mount option is to enable original log-structured filesystem forcefully. So, there should be no random writes for main area. Especially, this supports host-managed SMR device. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index eff046a792ad..4792f94089f7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -707,6 +707,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; + unsigned int secno, start_segno; mutex_lock(&dirty_i->seglist_lock); @@ -726,8 +727,22 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (!test_opt(sbi, DISCARD)) continue; - f2fs_issue_discard(sbi, START_BLOCK(sbi, start), + if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { + f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); + continue; + } +next: + secno = GET_SECNO(sbi, start); + start_segno = secno * sbi->segs_per_sec; + if (!IS_CURSEC(sbi, secno) && + !get_valid_blocks(sbi, start, sbi->segs_per_sec)) + f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), + sbi->segs_per_sec << sbi->log_blocks_per_seg); + + start = start_segno + sbi->segs_per_sec; + if (start < end) + goto next; } mutex_unlock(&dirty_i->seglist_lock); @@ -1221,6 +1236,9 @@ void allocate_new_segments(struct f2fs_sb_info *sbi) { int i; + if (test_opt(sbi, LFS)) + return; + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) __allocate_new_segments(sbi, i); } -- cgit v1.2.3 From 52763a4b7a2112743745c5bbfe43fe6f54d4b39a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Jun 2016 09:47:48 -0700 Subject: f2fs: detect host-managed SMR by feature flag If mkfs.f2fs gives a feature flag for host-managed SMR, we can set mode=lfs by default. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4792f94089f7..782975e791f1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2402,7 +2402,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); sm_info->rec_prefree_segments = sm_info->main_segments * DEF_RECLAIM_PREFREE_SEGMENTS / 100; - sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; + if (!test_opt(sbi, LFS)) + sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; -- cgit v1.2.3 From ad4edb83143fdeef9e6fdd9daaa735b59476565b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 Jun 2016 16:41:49 -0700 Subject: f2fs: produce more nids and reduce readahead nats The readahead nat pages are more likely to be reclaimed quickly, so it'd better to gather more free nids in advance. And, let's keep some free nids as much as possible. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 782975e791f1..6d16ecf9d29e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -371,7 +371,9 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); if (!available_free_memory(sbi, FREE_NIDS)) - try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES); + try_to_free_nids(sbi, MAX_FREE_NIDS); + else + build_free_nids(sbi); /* checkpoint is the only way to shrink partial cached entries */ if (!available_free_memory(sbi, NAT_ENTRIES) || -- cgit v1.2.3 From c7b41e161368388487238d71986a65290f83d74a Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 7 Jul 2016 12:13:33 +0800 Subject: f2fs: avoid mismatching block range for discard This patch skip discard block range smaller than trim_minlen, and can not be merged by neighbour Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6d16ecf9d29e..9e13db05e3f0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -672,6 +672,10 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); + if (force && start && end != max_blocks + && (end - start) < cpc->trim_minlen) + continue; + __add_discard_entry(sbi, cpc, se, start, end); } } -- cgit v1.2.3 From c24a0fd655431e9f14c8a8754d0a6cc247f9e9e5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 7 Jul 2016 22:46:55 +0800 Subject: f2fs: fix to avoid redundant discard during fstrim With below test steps, f2fs will issue redundant discard when doing fstrim, the reason is that we issue discards for both prefree segments and consecutive freed region user wants to trim, part regions they covered are overlapped, here, we change to do not to issue any discards for prefree segments in trimmed range. 1. mount -t f2fs -o discard /dev/zram0 /mnt/f2fs 2. fstrim -o 0 -l 3221225472 -m 2097152 -v /mnt/f2fs/ 3. dd if=/dev/zero of=/mnt/f2fs/a bs=2M count=1 4. dd if=/dev/zero of=/mnt/f2fs/b bs=1M count=1 5. sync 6. rm /mnt/f2fs/a /mnt/f2fs/b 7. fstrim -o 0 -l 3221225472 -m 2097152 -v /mnt/f2fs/ Before: <...>-5428 [001] ...1 9511.052125: f2fs_issue_discard: dev = (251,0), blkstart = 0x2200, blklen = 0x200 <...>-5428 [001] ...1 9511.052787: f2fs_issue_discard: dev = (251,0), blkstart = 0x2200, blklen = 0x300 After: <...>-6764 [000] ...1 9720.382504: f2fs_issue_discard: dev = (251,0), blkstart = 0x2200, blklen = 0x300 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9e13db05e3f0..08f6c0be20cf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -714,6 +714,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; unsigned int secno, start_segno; + bool force = (cpc->reason == CP_DISCARD); mutex_lock(&dirty_i->seglist_lock); @@ -730,7 +731,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) dirty_i->nr_dirty[PRE] -= end - start; - if (!test_opt(sbi, DISCARD)) + if (force || !test_opt(sbi, DISCARD)) continue; if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { @@ -754,7 +755,7 @@ next: /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { - if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen) + if (force && entry->len < cpc->trim_minlen) goto skip; f2fs_issue_discard(sbi, entry->blkaddr, entry->len); cpc->trimmed += entry->len; -- cgit v1.2.3 From 44a83499dda714d9262a9bf4fdac8c077893c9e6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Jul 2016 18:23:35 -0700 Subject: f2fs: add maximum prefree segments In 1TB storage, we need to admit 22841 prefree segments, which can consume too much segments. This patch sets 8GB in max. prefree segments in that case. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 08f6c0be20cf..e87aa058f57a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2409,6 +2409,9 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); sm_info->rec_prefree_segments = sm_info->main_segments * DEF_RECLAIM_PREFREE_SEGMENTS / 100; + if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) + sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; + if (!test_opt(sbi, LFS)) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; -- cgit v1.2.3 From 9dfa1baff76d08843aaf5e3c78f6da6950957702 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Jul 2016 19:33:19 -0700 Subject: f2fs: use blk_plug in all the possible paths This patch reverts 19a5f5e2ef37 (f2fs: drop any block plugging), and adds blk_plug in write paths additionally. The main reason is that blk_start_plug can be used to wake up from low-power mode before submitting further bios. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e87aa058f57a..d45e6bbf8493 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -381,8 +381,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) excess_prefree_segs(sbi) || excess_dirty_nats(sbi) || (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) { - if (test_opt(sbi, DATA_FLUSH)) + if (test_opt(sbi, DATA_FLUSH)) { + struct blk_plug plug; + + blk_start_plug(&plug); sync_dirty_inodes(sbi, FILE_INODE); + blk_finish_plug(&plug); + } f2fs_sync_fs(sbi->sb, true); stat_inc_bg_cp_count(sbi->stat_info); } -- cgit v1.2.3