diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-21 14:50:04 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-21 14:50:04 -0700 |
| commit | d46dd0d88341e45f8e0226fdef5462f5270898fc (patch) | |
| tree | e7413796e3ed09bf8c060470d7b71348e9e036de | |
| parent | bb0bc49a1cef574646eb25d74709c5ff200903a8 (diff) | |
| parent | cb8ff3ead9a3fc43727980be58c7099506f65261 (diff) | |
Merge tag 'f2fs-for-7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"In this round, the changes primarily focus on resolving race
conditions, memory safety issues (UAF), and improving the robustness
of garbage collection (GC), and folio management.
Enhancements:
- add page-order information for large folio reads in iostat
- add defrag_blocks sysfs node
Bug fixes:
- fix uninitialized kobject put in f2fs_init_sysfs()
- disallow setting an extension to both cold and hot
- fix node_cnt race between extent node destroy and writeback
- preserve previous reserve_{blocks,node} value when remount
- freeze GC and discard threads quickly
- fix false alarm of lockdep on cp_global_sem lock
- fix data loss caused by incorrect use of nat_entry flag
- skip empty sections in f2fs_get_victim
- fix inline data not being written to disk in writeback path
- fix fsck inconsistency caused by FGGC of node block
- fix fsck inconsistency caused by incorrect nat_entry flag usage
- call f2fs_handle_critical_error() to set cp_error flag
- fix fiemap boundary handling when read extent cache is incomplete
- fix use-after-free of sbi in f2fs_compress_write_end_io()
- fix UAF caused by decrementing sbi->nr_pages[] in f2fs_write_end_io()
- fix incorrect file address mapping when inline inode is unwritten
- fix incomplete search range in f2fs_get_victim when f2fs_need_rand_seg is enabled
- avoid memory leak in f2fs_rename()"
* tag 'f2fs-for-7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (35 commits)
f2fs: add page-order information for large folio reads in iostat
f2fs: do not support mmap write for large folio
f2fs: fix uninitialized kobject put in f2fs_init_sysfs()
f2fs: protect extension_list reading with sb_lock in f2fs_sbi_show()
f2fs: disallow setting an extension to both cold and hot
f2fs: fix node_cnt race between extent node destroy and writeback
f2fs: allow empty mount string for Opt_usr|grp|projjquota
f2fs: fix to preserve previous reserve_{blocks,node} value when remount
f2fs: invalidate block device page cache on umount
f2fs: fix to freeze GC and discard threads quickly
f2fs: fix to avoid uninit-value access in f2fs_sanity_check_node_footer
f2fs: fix false alarm of lockdep on cp_global_sem lock
f2fs: fix data loss caused by incorrect use of nat_entry flag
f2fs: fix to skip empty sections in f2fs_get_victim
f2fs: fix inline data not being written to disk in writeback path
f2fs: fix fsck inconsistency caused by FGGC of node block
f2fs: fix fsck inconsistency caused by incorrect nat_entry flag usage
f2fs: fix to do sanity check on dcc->discard_cmd_cnt conditionally
f2fs: refactor node footer flag setting related code
f2fs: refactor f2fs_move_node_folio function
...
| -rw-r--r-- | Documentation/ABI/testing/sysfs-fs-f2fs | 6 | ||||
| -rw-r--r-- | fs/f2fs/checkpoint.c | 9 | ||||
| -rw-r--r-- | fs/f2fs/compress.c | 14 | ||||
| -rw-r--r-- | fs/f2fs/data.c | 53 | ||||
| -rw-r--r-- | fs/f2fs/debug.c | 1 | ||||
| -rw-r--r-- | fs/f2fs/extent_cache.c | 17 | ||||
| -rw-r--r-- | fs/f2fs/f2fs.h | 41 | ||||
| -rw-r--r-- | fs/f2fs/file.c | 15 | ||||
| -rw-r--r-- | fs/f2fs/gc.c | 23 | ||||
| -rw-r--r-- | fs/f2fs/inline.c | 22 | ||||
| -rw-r--r-- | fs/f2fs/inode.c | 2 | ||||
| -rw-r--r-- | fs/f2fs/iostat.c | 38 | ||||
| -rw-r--r-- | fs/f2fs/iostat.h | 4 | ||||
| -rw-r--r-- | fs/f2fs/namei.c | 16 | ||||
| -rw-r--r-- | fs/f2fs/node.c | 112 | ||||
| -rw-r--r-- | fs/f2fs/node.h | 23 | ||||
| -rw-r--r-- | fs/f2fs/segment.c | 20 | ||||
| -rw-r--r-- | fs/f2fs/super.c | 70 | ||||
| -rw-r--r-- | fs/f2fs/sysfs.c | 27 | ||||
| -rw-r--r-- | include/linux/f2fs_fs.h | 3 | ||||
| -rw-r--r-- | include/trace/events/f2fs.h | 21 |
21 files changed, 386 insertions, 151 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index c1d2b3fd9c65..423ec40e2e4e 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -407,6 +407,12 @@ Contact: "Hridya Valsaraju" <hridya@google.com> Description: Average number of valid blocks. Available when CONFIG_F2FS_STAT_FS=y. +What: /sys/fs/f2fs/<disk>/defrag_blocks +Date: February 2026 +Contact: "Jinbao Liu" <liujinbao1@xiaomi.com> +Description: Number of blocks moved by defragment. + Available when CONFIG_F2FS_STAT_FS=y. + What: /sys/fs/f2fs/<disk>/mounted_time_sec Date: February 2020 Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0143365c07dc..c00a6b6ebcbd 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -232,15 +232,6 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) static struct kmem_cache *ino_entry_slab; struct kmem_cache *f2fs_inode_entry_slab; -void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, - unsigned char reason) -{ - f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL); - if (!end_io) - f2fs_flush_merged_writes(sbi); - f2fs_handle_critical_error(sbi, reason); -} - /* * We guarantee no failure on the returned page. */ diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 50fac72734ac..881e76158b96 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1491,10 +1491,10 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) f2fs_compress_free_page(page); - dec_page_count(sbi, type); - - if (atomic_dec_return(&cic->pending_pages)) + if (atomic_dec_return(&cic->pending_pages)) { + dec_page_count(sbi, type); return; + } for (i = 0; i < cic->nr_rpages; i++) { WARN_ON(!cic->rpages[i]); @@ -1504,6 +1504,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) page_array_free(sbi, cic->rpages, cic->nr_rpages); kmem_cache_free(cic_entry_slab, cic); + + /* + * Make sure dec_page_count() is the last access to sbi. + * Once it drops the F2FS_WB_CP_DATA counter to zero, the + * unmount thread can proceed to destroy sbi and + * sbi->page_array_slab. + */ + dec_page_count(sbi, type); } static int f2fs_write_raw_pages(struct compress_ctx *cc, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cf05014fa5e3..8d4f1e75dee3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -173,7 +173,8 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) while (nr_pages--) dec_page_count(F2FS_F_SB(folio), __read_io_type(folio)); - if (F2FS_F_SB(folio)->node_inode && is_node_folio(folio) && + if (bio->bi_status == BLK_STS_OK && + F2FS_F_SB(folio)->node_inode && is_node_folio(folio) && f2fs_sanity_check_node_footer(F2FS_F_SB(folio), folio, folio->index, NODE_TYPE_REGULAR, true)) bio->bi_status = BLK_STS_IOERR; @@ -386,6 +387,8 @@ static void f2fs_write_end_io(struct bio *bio) folio->index, NODE_TYPE_REGULAR, true); f2fs_bug_on(sbi, folio->index != nid_of_node(folio)); } + if (f2fs_in_warm_node_list(folio)) + f2fs_del_fsync_node_entry(sbi, folio); dec_page_count(sbi, type); @@ -397,8 +400,6 @@ static void f2fs_write_end_io(struct bio *bio) wq_has_sleeper(&sbi->cp_wait)) wake_up(&sbi->cp_wait); - if (f2fs_in_warm_node_list(sbi, folio)) - f2fs_del_fsync_node_entry(sbi, folio); folio_clear_f2fs_gcing(folio); folio_end_writeback(folio); } @@ -1578,7 +1579,8 @@ static bool f2fs_map_blocks_cached(struct inode *inode, f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); - if (f2fs_allow_multi_device_dio(sbi, flag)) { + map->m_multidev_dio = f2fs_allow_multi_device_dio(sbi, flag); + if (map->m_multidev_dio) { int bidx = f2fs_target_device_index(sbi, map->m_pblk); struct f2fs_dev_info *dev = &sbi->devs[bidx]; @@ -1638,8 +1640,26 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && map->m_may_create); - if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) - goto out; + if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) { + struct extent_info ei; + + /* + * 1. If map->m_multidev_dio is true, map->m_pblk cannot be + * waitted by f2fs_wait_on_block_writeback_range() and are not + * mergeable. + * 2. If pgofs hits the read extent cache, it means the mapping + * is already cached in the extent cache, but it is not + * mergeable, and there is no need to query the mapping again + * via f2fs_get_dnode_of_data(). + */ + pgofs = (pgoff_t)map->m_lblk + map->m_len; + if (map->m_len == maxblocks || + map->m_multidev_dio || + f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) + goto out; + ofs = map->m_len; + goto map_more; + } map->m_bdev = inode->i_sb->s_bdev; map->m_multidev_dio = @@ -1650,7 +1670,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) /* it only supports block size == page size */ pgofs = (pgoff_t)map->m_lblk; - end = pgofs + maxblocks; +map_more: + end = (pgoff_t)map->m_lblk + maxblocks; if (flag == F2FS_GET_BLOCK_PRECACHE) mode = LOOKUP_NODE_RA; @@ -2490,6 +2511,8 @@ next_folio: if (!folio) goto out; + f2fs_update_read_folio_count(F2FS_I_SB(inode), folio); + folio_in_bio = false; index = folio->index; offset = 0; @@ -2664,6 +2687,8 @@ static int f2fs_mpage_readpages(struct inode *inode, struct fsverity_info *vi, prefetchw(&folio->flags); } + f2fs_update_read_folio_count(F2FS_I_SB(inode), folio); + #ifdef CONFIG_F2FS_FS_COMPRESSION index = folio->index; @@ -2790,7 +2815,6 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio) struct inode *inode = fio_inode(fio); struct folio *mfolio; struct page *page; - gfp_t gfp_flags = GFP_NOFS; if (!f2fs_encrypted_file(inode)) return 0; @@ -2800,19 +2824,10 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio) if (fscrypt_inode_uses_inline_crypto(inode)) return 0; -retry_encrypt: fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page_folio(page), - PAGE_SIZE, 0, gfp_flags); - if (IS_ERR(fio->encrypted_page)) { - /* flush pending IOs and wait for a while in the ENOMEM case */ - if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { - f2fs_flush_merged_writes(fio->sbi); - memalloc_retry_wait(GFP_NOFS); - gfp_flags |= __GFP_NOFAIL; - goto retry_encrypt; - } + PAGE_SIZE, 0, GFP_NOFS); + if (IS_ERR(fio->encrypted_page)) return PTR_ERR(fio->encrypted_page); - } mfolio = filemap_lock_folio(META_MAPPING(fio->sbi), fio->old_blkaddr); if (!IS_ERR(mfolio)) { diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8e1040e375a7..af88db8fdb71 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -659,6 +659,7 @@ static int stat_show(struct seq_file *s, void *v) si->bg_node_blks); seq_printf(s, "BG skip : IO: %u, Other: %u\n", si->io_skip_bggc, si->other_skip_bggc); + seq_printf(s, "defrag blocks : %u\n", si->defrag_blks); seq_puts(s, "\nExtent Cache (Read):\n"); seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n", si->hit_largest, si->hit_cached[EX_READ], diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index d73aeef333a2..d2e006420f04 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -119,9 +119,10 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type) if (!__init_may_extent_tree(inode, type)) return false; + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return false; + if (type == EX_READ) { - if (is_inode_flag_set(inode, FI_NO_EXTENT)) - return false; if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && !f2fs_sb_has_readonly(F2FS_I_SB(inode))) return false; @@ -644,6 +645,8 @@ static unsigned int __destroy_extent_node(struct inode *inode, while (atomic_read(&et->node_cnt)) { write_lock(&et->lock); + if (!is_inode_flag_set(inode, FI_NO_EXTENT)) + set_inode_flag(inode, FI_NO_EXTENT); node_cnt += __free_extent_tree(sbi, et, nr_shrink); write_unlock(&et->lock); } @@ -688,12 +691,12 @@ static void __update_extent_tree_range(struct inode *inode, write_lock(&et->lock); - if (type == EX_READ) { - if (is_inode_flag_set(inode, FI_NO_EXTENT)) { - write_unlock(&et->lock); - return; - } + if (is_inode_flag_set(inode, FI_NO_EXTENT)) { + write_unlock(&et->lock); + return; + } + if (type == EX_READ) { prev = et->largest; dei.len = 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7d0a467982d6..91f506e7c9cf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -10,6 +10,7 @@ #include <linux/uio.h> #include <linux/types.h> +#include <linux/mmzone.h> #include <linux/page-flags.h> #include <linux/slab.h> #include <linux/crc32.h> @@ -2032,6 +2033,8 @@ struct f2fs_sb_info { unsigned long long iostat_count[NR_IO_TYPE]; unsigned long long iostat_bytes[NR_IO_TYPE]; unsigned long long prev_iostat_bytes[NR_IO_TYPE]; + unsigned long long iostat_read_folio_count[NR_PAGE_ORDERS]; + unsigned long long prev_iostat_read_folio_count[NR_PAGE_ORDERS]; bool iostat_enable; unsigned long iostat_next_period; unsigned int iostat_period_ms; @@ -2040,6 +2043,9 @@ struct f2fs_sb_info { spinlock_t iostat_lat_lock; struct iostat_lat_info *iostat_io_lat; #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key cp_global_sem_key; +#endif }; /* Definitions to access f2fs_sb_info */ @@ -3900,7 +3906,6 @@ int f2fs_do_quota_sync(struct super_block *sb, int type); loff_t max_file_blocks(struct inode *inode); void f2fs_quota_off_umount(struct super_block *sb); void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag); -void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason); void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); @@ -3919,11 +3924,11 @@ enum node_type; int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type); -bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio); +bool f2fs_in_warm_node_list(struct folio *folio); void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi); void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct folio *folio); void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi); -int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); +bool f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, @@ -3945,6 +3950,8 @@ int f2fs_sanity_check_node_footer(struct f2fs_sb_info *sbi, enum node_type ntype, bool in_irq); struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino); struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid); +int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, + bool mark_dirty, enum iostat_type io_type); int f2fs_move_node_folio(struct folio *node_folio, int gc_type); void f2fs_flush_inline_data(struct f2fs_sb_info *sbi); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, @@ -3987,7 +3994,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); int f2fs_start_discard_thread(struct f2fs_sb_info *sbi); void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi); -bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi); +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi, bool need_check); void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); @@ -4286,6 +4293,7 @@ struct f2fs_stat_info { int gc_secs[2][2]; int tot_blks, data_blks, node_blks; int bg_data_blks, bg_node_blks; + unsigned int defrag_blks; int blkoff[NR_CURSEG_TYPE]; int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; @@ -4420,6 +4428,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) si->bg_node_blks += ((gc_type) == BG_GC) ? (blks) : 0; \ } while (0) +#define stat_inc_defrag_blk_count(sbi, blks) \ + (F2FS_STAT(sbi)->defrag_blks += (blks)) + int f2fs_build_stats(struct f2fs_sb_info *sbi); void f2fs_destroy_stats(struct f2fs_sb_info *sbi); void __init f2fs_create_root_stats(void); @@ -4461,6 +4472,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #define stat_inc_tot_blk_count(si, blks) do { } while (0) #define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0) #define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0) +#define stat_inc_defrag_blk_count(sbi, blks) do { } while (0) static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } @@ -5063,8 +5075,25 @@ static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, return; if (ofs == sbi->page_eio_ofs[type]) { - if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO) - set_ckpt_flags(sbi, CP_ERROR_FLAG); + if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO) { + enum stop_cp_reason stop_reason; + + switch (type) { + case META: + stop_reason = STOP_CP_REASON_READ_META; + break; + case NODE: + stop_reason = STOP_CP_REASON_READ_NODE; + break; + case DATA: + stop_reason = STOP_CP_REASON_READ_DATA; + break; + default: + f2fs_bug_on(sbi, 1); + return; + } + f2fs_stop_checkpoint(sbi, false, stop_reason); + } } else { sbi->page_eio_ofs[type] = ofs; sbi->page_eio_cnt[type] = 0; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 42f5832242b3..fb12c5c9affd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -81,8 +81,17 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) int err = 0; vm_fault_t ret; - if (unlikely(IS_IMMUTABLE(inode))) + /* + * We only support large folio on the read case. + * Don't make any dirty pages. + */ + if (unlikely(IS_IMMUTABLE(inode)) || + mapping_large_folio_support(inode->i_mapping)) { + f2fs_err(sbi, "Not expected: immutable: %d large_folio: %d", + IS_IMMUTABLE(inode), + mapping_large_folio_support(inode->i_mapping)); return VM_FAULT_SIGBUS; + } if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { err = -EIO; @@ -3042,8 +3051,10 @@ out: clear_inode_flag(inode, FI_OPU_WRITE); unlock_out: inode_unlock(inode); - if (!err) + if (!err) { range->len = (u64)total << PAGE_SHIFT; + stat_inc_defrag_blk_count(sbi, total); + } return err; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c0c8a1056d6b..ba93010924c0 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -316,10 +316,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->max_search = sbi->max_victim_search; /* let's select beginning hot/small space first. */ - if (f2fs_need_rand_seg(sbi)) + if (f2fs_need_rand_seg(sbi)) { p->offset = get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); - else if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + SIT_I(sbi)->last_victim[p->gc_mode] = p->offset; + } else if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) p->offset = 0; else p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; @@ -909,6 +910,9 @@ retry: if (!f2fs_segment_has_free_slot(sbi, segno)) goto next; } + + if (!get_valid_blocks(sbi, segno, true)) + goto next; } if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) @@ -1230,7 +1234,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) .encrypted_page = NULL, .in_list = 0, }; - int err; + int err = 0; folio = f2fs_grab_cache_folio(mapping, index, true); if (IS_ERR(folio)) @@ -1283,6 +1287,9 @@ got_it: fio.encrypted_page = &efolio->page; + if (folio_test_uptodate(efolio)) + goto put_encrypted_page; + err = f2fs_submit_page_bio(&fio); if (err) goto put_encrypted_page; @@ -1888,12 +1895,18 @@ freed: sbi->next_victim_seg[gc_type] = (cur_segno + 1 < sec_end_segno) ? cur_segno + 1 : NULL_SEGNO; + + if (unlikely(freezing(current))) { + folio_put_refs(sum_folio, 2); + goto stop; + } } next_block: folio_put_refs(sum_folio, 2); segno = block_end_segno; } +stop: if (submitted) f2fs_submit_merged_write(sbi, data_type); @@ -1967,6 +1980,10 @@ gc_more: goto stop; } retry: + if (unlikely(freezing(current))) { + ret = 0; + goto stop; + } ret = __get_victim(sbi, &segno, gc_type, gc_control->one_time); if (ret) { /* allow to search victim from sections has pinned data */ diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 2669439b9413..7aabfc9b43cb 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -792,7 +792,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, int f2fs_inline_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { - __u64 byteaddr, ilen; + __u64 byteaddr = 0, ilen; __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED | FIEMAP_EXTENT_LAST; struct node_info ni; @@ -814,6 +814,15 @@ int f2fs_inline_data_fiemap(struct inode *inode, goto out; } + if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { + err = f2fs_write_single_node_folio(ifolio, true, false, FS_NODE_IO); + if (err) + return err; + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); + } ilen = min_t(size_t, MAX_INLINE_DATA(inode), i_size_read(inode)); if (start >= ilen) goto out; @@ -825,9 +834,14 @@ int f2fs_inline_data_fiemap(struct inode *inode, if (err) goto out; - byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; - byteaddr += (char *)inline_data_addr(inode, ifolio) - - (char *)F2FS_INODE(ifolio); + if (__is_valid_data_blkaddr(ni.blk_addr)) { + byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; + byteaddr += (char *)inline_data_addr(inode, ifolio) - + (char *)F2FS_INODE(ifolio); + } else { + f2fs_bug_on(F2FS_I_SB(inode), ni.blk_addr != NEW_ADDR); + flags |= FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN; + } err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err); out: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index f27198d6695b..c6dcda447882 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -687,7 +687,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio) ri->i_uid = cpu_to_le32(i_uid_read(inode)); ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); - ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1); + ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(READ_ONCE(inode->i_blocks)) + 1); if (!f2fs_is_atomic_file(inode) || is_inode_flag_set(inode, FI_ATOMIC_COMMITTED)) diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c index f8703038e1d8..ae265e3e9b2c 100644 --- a/fs/f2fs/iostat.c +++ b/fs/f2fs/iostat.c @@ -34,6 +34,7 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; if (!sbi->iostat_enable) return 0; @@ -76,6 +77,12 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) IOSTAT_INFO_SHOW("fs node", FS_NODE_READ_IO); IOSTAT_INFO_SHOW("fs meta", FS_META_READ_IO); + /* print read folio order stats */ + seq_printf(seq, "%-23s", "fs read folio order:"); + for (i = 0; i < NR_PAGE_ORDERS; i++) + seq_printf(seq, " %llu", sbi->iostat_read_folio_count[i]); + seq_putc(seq, '\n'); + /* print other IOs */ seq_puts(seq, "[OTHER]\n"); IOSTAT_INFO_SHOW("fs discard", FS_DISCARD_IO); @@ -113,6 +120,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi) static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi) { unsigned long long iostat_diff[NR_IO_TYPE]; + unsigned long long read_folio_count_diff[NR_PAGE_ORDERS]; int i; unsigned long flags; @@ -133,9 +141,15 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi) sbi->prev_iostat_bytes[i]; sbi->prev_iostat_bytes[i] = sbi->iostat_bytes[i]; } + + for (i = 0; i < NR_PAGE_ORDERS; i++) { + read_folio_count_diff[i] = sbi->iostat_read_folio_count[i] - + sbi->prev_iostat_read_folio_count[i]; + sbi->prev_iostat_read_folio_count[i] = sbi->iostat_read_folio_count[i]; + } spin_unlock_irqrestore(&sbi->iostat_lock, flags); - trace_f2fs_iostat(sbi, iostat_diff); + trace_f2fs_iostat(sbi, iostat_diff, read_folio_count_diff); __record_iostat_latency(sbi); } @@ -151,6 +165,10 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi) sbi->iostat_bytes[i] = 0; sbi->prev_iostat_bytes[i] = 0; } + for (i = 0; i < NR_PAGE_ORDERS; i++) { + sbi->iostat_read_folio_count[i] = 0; + sbi->prev_iostat_read_folio_count[i] = 0; + } spin_unlock_irq(&sbi->iostat_lock); spin_lock_irq(&sbi->iostat_lat_lock); @@ -165,6 +183,24 @@ static inline void __f2fs_update_iostat(struct f2fs_sb_info *sbi, sbi->iostat_count[type]++; } +void f2fs_update_read_folio_count(struct f2fs_sb_info *sbi, struct folio *folio) +{ + unsigned int order = folio_order(folio); + unsigned long flags; + + if (!sbi->iostat_enable) + return; + + if (order >= NR_PAGE_ORDERS) + order = NR_PAGE_ORDERS - 1; + + spin_lock_irqsave(&sbi->iostat_lock, flags); + sbi->iostat_read_folio_count[order]++; + spin_unlock_irqrestore(&sbi->iostat_lock, flags); + + f2fs_record_iostat(sbi); +} + void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode, enum iostat_type type, unsigned long long io_bytes) { diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h index eb99d05cf272..2025225b5bed 100644 --- a/fs/f2fs/iostat.h +++ b/fs/f2fs/iostat.h @@ -34,6 +34,8 @@ extern int __maybe_unused iostat_info_seq_show(struct seq_file *seq, extern void f2fs_reset_iostat(struct f2fs_sb_info *sbi); extern void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode, enum iostat_type type, unsigned long long io_bytes); +extern void f2fs_update_read_folio_count(struct f2fs_sb_info *sbi, + struct folio *folio); struct bio_iostat_ctx { struct f2fs_sb_info *sbi; @@ -68,6 +70,8 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi); #else static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode, enum iostat_type type, unsigned long long io_bytes) {} +static inline void f2fs_update_read_folio_count(struct f2fs_sb_info *sbi, + struct folio *folio) {} static inline void iostat_update_and_unbind_ctx(struct bio *bio) {} static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi, struct bio *bio, struct bio_post_read_ctx *ctx) {} diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index efbb0732d420..cac03b8e91a1 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -83,6 +83,21 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, if (set) { if (total_count == F2FS_MAX_EXTENSION) return -EINVAL; + + if (hot) { + start = 0; + count = cold_count; + } else { + start = cold_count; + count = total_count; + } + for (i = start; i < count; i++) { + if (!strcmp(name, extlist[i])) { + f2fs_warn(sbi, "extension '%s' already exists in %s list", + name, hot ? "cold" : "hot"); + return -EINVAL; + } + } } else { if (!hot && !cold_count) return -EINVAL; @@ -964,6 +979,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, return err; err = f2fs_create_whiteout(idmap, old_dir, &whiteout, &fname); + f2fs_free_filename(&fname); if (err) return err; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e5b4a5b97b57..4e5bd9e4cfc3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -325,7 +325,7 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, start, nr); } -bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio) +bool f2fs_in_warm_node_list(struct folio *folio) { return is_node_folio(folio) && IS_DNODE(folio) && is_cold_node(folio); } @@ -391,7 +391,7 @@ void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi) spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); } -int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) +bool f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; @@ -427,7 +427,9 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; bool need_update = true; + struct f2fs_lock_context lc; + f2fs_down_read_trace(&sbi->node_write, &lc); f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino, false); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && @@ -435,6 +437,7 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; f2fs_up_read(&nm_i->nat_tree_lock); + f2fs_up_read_trace(&sbi->node_write, &lc); return need_update; } @@ -1113,7 +1116,7 @@ out_err: } static int truncate_partial_nodes(struct dnode_of_data *dn, - struct f2fs_inode *ri, int *offset, int depth) + int *offset, int depth) { struct folio *folios[2]; nid_t nid[3]; @@ -1184,7 +1187,6 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) int err = 0, cont = 1; int level, offset[4], noffset[4]; unsigned int nofs = 0; - struct f2fs_inode *ri; struct dnode_of_data dn; struct folio *folio; @@ -1212,7 +1214,6 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) set_new_dnode(&dn, inode, folio, NULL, 0); folio_unlock(folio); - ri = F2FS_INODE(folio); switch (level) { case 0: case 1: @@ -1222,7 +1223,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) nofs = noffset[1]; if (!offset[level - 1]) goto skip_partial; - err = truncate_partial_nodes(&dn, ri, offset, level); + err = truncate_partial_nodes(&dn, offset, level); if (err < 0 && err != -ENOENT) goto fail; nofs += 1 + NIDS_PER_BLOCK; @@ -1231,7 +1232,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) nofs = 5 + 2 * NIDS_PER_BLOCK; if (!offset[level - 1]) goto skip_partial; - err = truncate_partial_nodes(&dn, ri, offset, level); + err = truncate_partial_nodes(&dn, offset, level); if (err < 0 && err != -ENOENT) goto fail; break; @@ -1729,9 +1730,10 @@ continue_unlock: return last_folio; } -static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted, - struct writeback_control *wbc, bool do_balance, - enum iostat_type io_type, unsigned int *seq_id) +static bool __write_node_folio(struct folio *folio, bool atomic, bool do_fsync, + bool *submitted, struct writeback_control *wbc, + bool do_balance, enum iostat_type io_type, + unsigned int *seq_id) { struct f2fs_sb_info *sbi = F2FS_F_SB(folio); nid_t nid; @@ -1776,7 +1778,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted if (f2fs_sanity_check_node_footer(sbi, folio, nid, NODE_TYPE_REGULAR, false)) { - f2fs_handle_critical_error(sbi, STOP_CP_REASON_CORRUPTED_NID); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_NID); goto redirty_out; } @@ -1801,16 +1803,17 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted goto redirty_out; } - if (atomic) { - if (!test_opt(sbi, NOBARRIER)) - fio.op_flags |= REQ_PREFLUSH | REQ_FUA; - if (IS_INODE(folio)) - set_dentry_mark(folio, + if (atomic && !test_opt(sbi, NOBARRIER)) + fio.op_flags |= REQ_PREFLUSH | REQ_FUA; + + set_dentry_mark(folio, false); + set_fsync_mark(folio, do_fsync); + if (IS_INODE(folio) && (atomic || is_fsync_dnode(folio))) + set_dentry_mark(folio, f2fs_need_dentry_mark(sbi, ino_of_node(folio))); - } /* should add to global list before clearing PAGECACHE status */ - if (f2fs_in_warm_node_list(sbi, folio)) { + if (f2fs_in_warm_node_list(folio)) { seq = f2fs_add_fsync_node_entry(sbi, folio); if (seq_id) *seq_id = seq; @@ -1843,41 +1846,51 @@ redirty_out: return false; } -int f2fs_move_node_folio(struct folio *node_folio, int gc_type) +int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, + bool mark_dirty, enum iostat_type io_type) { int err = 0; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 1, + }; - if (gc_type == FG_GC) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 1, - }; + if (!sync_mode) { + /* set page dirty and write it */ + if (!folio_test_writeback(node_folio)) + folio_mark_dirty(node_folio); + goto out_folio; + } - f2fs_folio_wait_writeback(node_folio, NODE, true, true); + f2fs_folio_wait_writeback(node_folio, NODE, true, true); + if (mark_dirty) folio_mark_dirty(node_folio); + else if (!folio_test_dirty(node_folio)) + goto out_folio; - if (!folio_clear_dirty_for_io(node_folio)) { - err = -EAGAIN; - goto out_page; - } - - if (!__write_node_folio(node_folio, false, NULL, - &wbc, false, FS_GC_NODE_IO, NULL)) - err = -EAGAIN; - goto release_page; - } else { - /* set page dirty and write it */ - if (!folio_test_writeback(node_folio)) - folio_mark_dirty(node_folio); + if (!folio_clear_dirty_for_io(node_folio)) { + err = -EAGAIN; + goto out_folio; } -out_page: + + if (!__write_node_folio(node_folio, false, false, NULL, + &wbc, false, FS_GC_NODE_IO, NULL)) + err = -EAGAIN; + goto release_folio; +out_folio: folio_unlock(node_folio); -release_page: +release_folio: f2fs_folio_put(node_folio, false); return err; } +int f2fs_move_node_folio(struct folio *node_folio, int gc_type) +{ + return f2fs_write_single_node_folio(node_folio, gc_type == FG_GC, + true, FS_GC_NODE_IO); +} + int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, unsigned int *seq_id) @@ -1908,6 +1921,7 @@ retry: for (i = 0; i < nr_folios; i++) { struct folio *folio = fbatch.folios[i]; bool submitted = false; + bool do_fsync = false; if (unlikely(f2fs_cp_error(sbi))) { f2fs_folio_put(last_folio, false); @@ -1938,19 +1952,13 @@ continue_unlock: f2fs_folio_wait_writeback(folio, NODE, true, true); - set_fsync_mark(folio, 0); - set_dentry_mark(folio, 0); - if (!atomic || folio == last_folio) { - set_fsync_mark(folio, 1); + do_fsync = true; percpu_counter_inc(&sbi->rf_node_block_count); if (IS_INODE(folio)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) f2fs_update_inode(inode, folio); - if (!atomic) - set_dentry_mark(folio, - f2fs_need_dentry_mark(sbi, ino)); } /* may be written by other thread */ if (!folio_test_dirty(folio)) @@ -1962,8 +1970,9 @@ continue_unlock: if (!__write_node_folio(folio, atomic && folio == last_folio, - &submitted, wbc, true, - FS_NODE_IO, seq_id)) { + do_fsync, &submitted, + wbc, true, FS_NODE_IO, + seq_id)) { f2fs_folio_put(last_folio, false); folio_batch_release(&fbatch); ret = -EIO; @@ -2163,10 +2172,7 @@ write_node: if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - set_fsync_mark(folio, 0); - set_dentry_mark(folio, 0); - - if (!__write_node_folio(folio, false, &submitted, + if (!__write_node_folio(folio, false, false, &submitted, wbc, do_balance, io_type, NULL)) { folio_batch_release(&fbatch); ret = -EIO; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 824ac9f0e6e4..bcf2034e4263 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -400,27 +400,26 @@ static inline int is_node(const struct folio *folio, int type) #define is_fsync_dnode(folio) is_node(folio, FSYNC_BIT_SHIFT) #define is_dent_dnode(folio) is_node(folio, DENT_BIT_SHIFT) -static inline void set_cold_node(const struct folio *folio, bool is_dir) +static inline void __set_mark(const struct folio *folio, bool mark, int type) { struct f2fs_node *rn = F2FS_NODE(folio); unsigned int flag = le32_to_cpu(rn->footer.flag); - if (is_dir) - flag &= ~BIT(COLD_BIT_SHIFT); - else - flag |= BIT(COLD_BIT_SHIFT); - rn->footer.flag = cpu_to_le32(flag); -} - -static inline void set_mark(struct folio *folio, int mark, int type) -{ - struct f2fs_node *rn = F2FS_NODE(folio); - unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= BIT(type); else flag &= ~BIT(type); rn->footer.flag = cpu_to_le32(flag); +} + +static inline void set_cold_node(const struct folio *folio, bool is_dir) +{ + __set_mark(folio, !is_dir, COLD_BIT_SHIFT); +} + +static inline void set_mark(struct folio *folio, bool mark, int type) +{ + __set_mark(folio, mark, type); #ifdef CONFIG_F2FS_CHECK_FS f2fs_inode_chksum_set(F2FS_F_SB(folio), folio); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6a97fe76712b..788f8b050249 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1606,6 +1606,9 @@ static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, if (dc->state != D_PREP) goto next; + if (*issued > 0 && unlikely(freezing(current))) + break; + if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; break; @@ -1645,6 +1648,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct blk_plug plug; int i, issued; bool io_interrupted = false; + bool suspended = false; if (dpolicy->timeout) f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); @@ -1675,6 +1679,11 @@ retry: list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); + if (issued > 0 && unlikely(freezing(current))) { + suspended = true; + break; + } + if (dpolicy->timeout && f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) break; @@ -1694,7 +1703,8 @@ retry: next: mutex_unlock(&dcc->cmd_lock); - if (issued >= dpolicy->max_requests || io_interrupted) + if (issued >= dpolicy->max_requests || io_interrupted || + suspended) break; } @@ -1880,7 +1890,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi) * * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false. */ -bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi, bool need_check) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_policy dpolicy; @@ -1897,7 +1907,7 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) /* just to make sure there is no pending discard commands */ __wait_all_discard_cmd(sbi, NULL); - f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt)); + f2fs_bug_on(sbi, need_check && atomic_read(&dcc->discard_cmd_cnt)); return !dropped; } @@ -2367,7 +2377,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) * Recovery can cache discard commands, so in error path of * fill_super(), it needs to give a chance to handle them. */ - f2fs_issue_discard_timeout(sbi); + f2fs_issue_discard_timeout(sbi, true); kfree(dcc); SM_I(sbi)->dcc_info = NULL; @@ -3980,7 +3990,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host)) fscrypt_finalize_bounce_page(&fio->encrypted_page); folio_end_writeback(folio); - if (f2fs_in_warm_node_list(fio->sbi, folio)) + if (f2fs_in_warm_node_list(folio)) f2fs_del_fsync_node_entry(fio->sbi, folio); f2fs_bug_on(fio->sbi, !is_set_ckpt_flags(fio->sbi, CP_ERROR_FLAG)); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8774c60b4be4..ccf806b676f5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -336,9 +336,12 @@ static const struct fs_parameter_spec f2fs_param_specs[] = { fsparam_flag("usrquota", Opt_usrquota), fsparam_flag("grpquota", Opt_grpquota), fsparam_flag("prjquota", Opt_prjquota), - fsparam_string_empty("usrjquota", Opt_usrjquota), - fsparam_string_empty("grpjquota", Opt_grpjquota), - fsparam_string_empty("prjjquota", Opt_prjjquota), + fsparam_string("usrjquota", Opt_usrjquota), + fsparam_flag("usrjquota", Opt_usrjquota), + fsparam_string("grpjquota", Opt_grpjquota), + fsparam_flag("grpjquota", Opt_grpjquota), + fsparam_string("prjjquota", Opt_prjjquota), + fsparam_flag("prjjquota", Opt_prjjquota), fsparam_flag("nat_bits", Opt_nat_bits), fsparam_enum("jqfmt", Opt_jqfmt, f2fs_param_jqfmt), fsparam_enum("alloc_mode", Opt_alloc, f2fs_param_alloc_mode), @@ -979,26 +982,26 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx_set_opt(ctx, F2FS_MOUNT_PRJQUOTA); break; case Opt_usrjquota: - if (!*param->string) - ret = f2fs_unnote_qf_name(fc, USRQUOTA); - else + if (param->type == fs_value_is_string && *param->string) ret = f2fs_note_qf_name(fc, USRQUOTA, param); + else + ret = f2fs_unnote_qf_name(fc, USRQUOTA); if (ret) return ret; break; case Opt_grpjquota: - if (!*param->string) - ret = f2fs_unnote_qf_name(fc, GRPQUOTA); - else + if (param->type == fs_value_is_string && *param->string) ret = f2fs_note_qf_name(fc, GRPQUOTA, param); + else + ret = f2fs_unnote_qf_name(fc, GRPQUOTA); if (ret) return ret; break; case Opt_prjjquota: - if (!*param->string) - ret = f2fs_unnote_qf_name(fc, PRJQUOTA); - else + if (param->type == fs_value_is_string && *param->string) ret = f2fs_note_qf_name(fc, PRJQUOTA, param); + else + ret = f2fs_unnote_qf_name(fc, PRJQUOTA); if (ret) return ret; break; @@ -1515,6 +1518,7 @@ static int f2fs_check_opt_consistency(struct fs_context *fc, F2FS_OPTION(sbi).root_reserved_blocks); ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); ctx->opt_mask &= ~BIT(F2FS_MOUNT_RESERVE_ROOT); + ctx->spec_mask &= ~F2FS_SPEC_reserve_root; } if (test_opt(sbi, RESERVE_NODE) && (ctx->opt_mask & BIT(F2FS_MOUNT_RESERVE_NODE)) && @@ -1523,6 +1527,7 @@ static int f2fs_check_opt_consistency(struct fs_context *fc, F2FS_OPTION(sbi).root_reserved_nodes); ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_NODE); ctx->opt_mask &= ~BIT(F2FS_MOUNT_RESERVE_NODE); + ctx->spec_mask &= ~F2FS_SPEC_reserve_node; } err = f2fs_check_test_dummy_encryption(fc, sb); @@ -2009,7 +2014,7 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - done = f2fs_issue_discard_timeout(sbi); + done = f2fs_issue_discard_timeout(sbi, true); if (f2fs_realtime_discard_enable(sbi) && !sbi->discard_blks && done) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, @@ -2088,6 +2093,12 @@ static void f2fs_put_super(struct super_block *sb) #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif + sync_blockdev(sb->s_bdev); + invalidate_bdev(sb->s_bdev); + for (i = 1; i < sbi->s_ndevs; i++) { + sync_blockdev(FDEV(i).bdev); + invalidate_bdev(FDEV(i).bdev); + } } int f2fs_sync_fs(struct super_block *sb, int sync) @@ -2152,7 +2163,7 @@ static int f2fs_unfreeze(struct super_block *sb) * will recover after removal of snapshot. */ if (test_opt(sbi, DISCARD) && !f2fs_hw_support_discard(sbi)) - f2fs_issue_discard_timeout(sbi); + f2fs_issue_discard_timeout(sbi, true); clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; @@ -2957,7 +2968,12 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb) need_stop_discard = true; } else { f2fs_stop_discard_thread(sbi); - f2fs_issue_discard_timeout(sbi); + /* + * f2fs_ioc_fitrim() won't race w/ "remount ro" + * so it's safe to check discard_cmd_cnt in + * f2fs_issue_discard_timeout(). + */ + f2fs_issue_discard_timeout(sbi, flags & SB_RDONLY); need_restart_discard = true; } } @@ -4650,7 +4666,8 @@ static bool system_going_down(void) || system_state == SYSTEM_RESTART; } -void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason) +static void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, + unsigned char reason) { struct super_block *sb = sbi->sb; bool shutdown = reason == STOP_CP_REASON_SHUTDOWN; @@ -4707,6 +4724,16 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason) */ } +void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, + unsigned char reason) +{ + f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL); + if (!end_io) + f2fs_flush_merged_writes(sbi); + f2fs_handle_critical_error(sbi, reason); +} + + static void f2fs_record_error_work(struct work_struct *work) { struct f2fs_sb_info *sbi = container_of(work, @@ -4948,6 +4975,11 @@ try_onemore: init_f2fs_rwsem_trace(&sbi->gc_lock, sbi, LOCK_NAME_GC_LOCK); mutex_init(&sbi->writepages); init_f2fs_rwsem_trace(&sbi->cp_global_sem, sbi, LOCK_NAME_CP_GLOBAL); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_register_key(&sbi->cp_global_sem_key); + lockdep_set_class(&sbi->cp_global_sem.internal_rwsem, + &sbi->cp_global_sem_key); +#endif init_f2fs_rwsem_trace(&sbi->node_write, sbi, LOCK_NAME_NODE_WRITE); init_f2fs_rwsem_trace(&sbi->node_change, sbi, LOCK_NAME_NODE_CHANGE); spin_lock_init(&sbi->stat_lock); @@ -5419,6 +5451,9 @@ free_options: free_sb_buf: kfree(raw_super); free_sbi: +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_unregister_key(&sbi->cp_global_sem_key); +#endif kfree(sbi); sb->s_fs_info = NULL; @@ -5500,6 +5535,9 @@ static void kill_f2fs_super(struct super_block *sb) /* Release block devices last, after fscrypt_destroy_keyring(). */ if (sbi) { destroy_device_list(sbi); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_unregister_key(&sbi->cp_global_sem_key); +#endif kfree(sbi); sb->s_fs_info = NULL; } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 5fbfdc96e502..352e96ad5c3a 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -338,6 +338,14 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a, f2fs_update_sit_info(sbi); return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->avg_vblocks)); } + +static ssize_t defrag_blocks_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->defrag_blks)); +} #endif static ssize_t main_blkaddr_show(struct f2fs_attr *a, @@ -379,10 +387,12 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!strcmp(a->attr.name, "extension_list")) { __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; - int cold_count = le32_to_cpu(sbi->raw_super->extension_count); - int hot_count = sbi->raw_super->hot_ext_count; + int cold_count, hot_count; int len = 0, i; + f2fs_down_read(&sbi->sb_lock); + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; len += sysfs_emit_at(buf, len, "cold file extension:\n"); for (i = 0; i < cold_count; i++) len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); @@ -390,6 +400,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, len += sysfs_emit_at(buf, len, "hot file extension:\n"); for (i = cold_count; i < cold_count + hot_count; i++) len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); + f2fs_up_read(&sbi->sb_lock); return len; } @@ -1351,6 +1362,7 @@ F2FS_GENERAL_RO_ATTR(gc_mode); F2FS_GENERAL_RO_ATTR(moved_blocks_background); F2FS_GENERAL_RO_ATTR(moved_blocks_foreground); F2FS_GENERAL_RO_ATTR(avg_vblocks); +F2FS_GENERAL_RO_ATTR(defrag_blocks); #endif #ifdef CONFIG_FS_ENCRYPTION @@ -1473,6 +1485,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(moved_blocks_foreground), ATTR_LIST(moved_blocks_background), ATTR_LIST(avg_vblocks), + ATTR_LIST(defrag_blocks), #endif #ifdef CONFIG_BLK_DEV_ZONED ATTR_LIST(unusable_blocks_per_sec), @@ -1984,24 +1997,26 @@ int __init f2fs_init_sysfs(void) ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, NULL, "features"); if (ret) - goto put_kobject; + goto unregister_kset; ret = kobject_init_and_add(&f2fs_tune, &f2fs_tune_ktype, NULL, "tuning"); if (ret) - goto put_kobject; + goto put_feat; f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); if (!f2fs_proc_root) { ret = -ENOMEM; - goto put_kobject; + goto put_tune; } return 0; -put_kobject: +put_tune: kobject_put(&f2fs_tune); +put_feat: kobject_put(&f2fs_feat); +unregister_kset: kset_unregister(&f2fs_kset); return ret; } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index dc41722fcc9d..829a59399dac 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -80,6 +80,9 @@ enum stop_cp_reason { STOP_CP_REASON_NO_SEGMENT, STOP_CP_REASON_CORRUPTED_FREE_BITMAP, STOP_CP_REASON_CORRUPTED_NID, + STOP_CP_REASON_READ_META, + STOP_CP_REASON_READ_NODE, + STOP_CP_REASON_READ_DATA, STOP_CP_REASON_MAX, }; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 05a46908acd9..b5188d2671d7 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -2116,9 +2116,10 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end, #ifdef CONFIG_F2FS_IOSTAT TRACE_EVENT(f2fs_iostat, - TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat), + TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat, + unsigned long long *read_folio_count), - TP_ARGS(sbi, iostat), + TP_ARGS(sbi, iostat, read_folio_count), TP_STRUCT__entry( __field(dev_t, dev) @@ -2150,6 +2151,7 @@ TRACE_EVENT(f2fs_iostat, __field(unsigned long long, fs_mrio) __field(unsigned long long, fs_discard) __field(unsigned long long, fs_reset_zone) + __array(unsigned long long, read_folio_count, 11) ), TP_fast_assign( @@ -2182,6 +2184,9 @@ TRACE_EVENT(f2fs_iostat, __entry->fs_mrio = iostat[FS_META_READ_IO]; __entry->fs_discard = iostat[FS_DISCARD_IO]; __entry->fs_reset_zone = iostat[FS_ZONE_RESET_IO]; + memset(__entry->read_folio_count, 0, sizeof(__entry->read_folio_count)); + memcpy(__entry->read_folio_count, read_folio_count, + sizeof(unsigned long long) * min_t(int, NR_PAGE_ORDERS, 11)); ), TP_printk("dev = (%d,%d), " @@ -2194,7 +2199,9 @@ TRACE_EVENT(f2fs_iostat, "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " "compr(buffered=%llu, mapped=%llu)], " "fs [data=%llu, (gc_data=%llu, cdata=%llu), " - "node=%llu, meta=%llu]", + "node=%llu, meta=%llu], " + "read_folio_count [0=%llu, 1=%llu, 2=%llu, 3=%llu, 4=%llu, " + "5=%llu, 6=%llu, 7=%llu, 8=%llu, 9=%llu, 10=%llu]", show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, __entry->app_bio, __entry->app_mio, __entry->app_bcdio, __entry->app_mcdio, __entry->fs_dio, __entry->fs_cdio, @@ -2205,7 +2212,13 @@ TRACE_EVENT(f2fs_iostat, __entry->app_rio, __entry->app_drio, __entry->app_brio, __entry->app_mrio, __entry->app_bcrio, __entry->app_mcrio, __entry->fs_drio, __entry->fs_gdrio, - __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio) + __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio, + __entry->read_folio_count[0], __entry->read_folio_count[1], + __entry->read_folio_count[2], __entry->read_folio_count[3], + __entry->read_folio_count[4], __entry->read_folio_count[5], + __entry->read_folio_count[6], __entry->read_folio_count[7], + __entry->read_folio_count[8], __entry->read_folio_count[9], + __entry->read_folio_count[10]) ); #ifndef __F2FS_IOSTAT_LATENCY_TYPE |
