summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-21 14:50:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-21 14:50:04 -0700
commitd46dd0d88341e45f8e0226fdef5462f5270898fc (patch)
treee7413796e3ed09bf8c060470d7b71348e9e036de
parentbb0bc49a1cef574646eb25d74709c5ff200903a8 (diff)
parentcb8ff3ead9a3fc43727980be58c7099506f65261 (diff)
Merge tag 'f2fs-for-7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this round, the changes primarily focus on resolving race conditions, memory safety issues (UAF), and improving the robustness of garbage collection (GC), and folio management. Enhancements: - add page-order information for large folio reads in iostat - add defrag_blocks sysfs node Bug fixes: - fix uninitialized kobject put in f2fs_init_sysfs() - disallow setting an extension to both cold and hot - fix node_cnt race between extent node destroy and writeback - preserve previous reserve_{blocks,node} value when remount - freeze GC and discard threads quickly - fix false alarm of lockdep on cp_global_sem lock - fix data loss caused by incorrect use of nat_entry flag - skip empty sections in f2fs_get_victim - fix inline data not being written to disk in writeback path - fix fsck inconsistency caused by FGGC of node block - fix fsck inconsistency caused by incorrect nat_entry flag usage - call f2fs_handle_critical_error() to set cp_error flag - fix fiemap boundary handling when read extent cache is incomplete - fix use-after-free of sbi in f2fs_compress_write_end_io() - fix UAF caused by decrementing sbi->nr_pages[] in f2fs_write_end_io() - fix incorrect file address mapping when inline inode is unwritten - fix incomplete search range in f2fs_get_victim when f2fs_need_rand_seg is enabled - avoid memory leak in f2fs_rename()" * tag 'f2fs-for-7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (35 commits) f2fs: add page-order information for large folio reads in iostat f2fs: do not support mmap write for large folio f2fs: fix uninitialized kobject put in f2fs_init_sysfs() f2fs: protect extension_list reading with sb_lock in f2fs_sbi_show() f2fs: disallow setting an extension to both cold and hot f2fs: fix node_cnt race between extent node destroy and writeback f2fs: allow empty mount string for Opt_usr|grp|projjquota f2fs: fix to preserve previous reserve_{blocks,node} value when remount f2fs: invalidate block device page cache on umount f2fs: fix to freeze GC and discard threads quickly f2fs: fix to avoid uninit-value access in f2fs_sanity_check_node_footer f2fs: fix false alarm of lockdep on cp_global_sem lock f2fs: fix data loss caused by incorrect use of nat_entry flag f2fs: fix to skip empty sections in f2fs_get_victim f2fs: fix inline data not being written to disk in writeback path f2fs: fix fsck inconsistency caused by FGGC of node block f2fs: fix fsck inconsistency caused by incorrect nat_entry flag usage f2fs: fix to do sanity check on dcc->discard_cmd_cnt conditionally f2fs: refactor node footer flag setting related code f2fs: refactor f2fs_move_node_folio function ...
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs6
-rw-r--r--fs/f2fs/checkpoint.c9
-rw-r--r--fs/f2fs/compress.c14
-rw-r--r--fs/f2fs/data.c53
-rw-r--r--fs/f2fs/debug.c1
-rw-r--r--fs/f2fs/extent_cache.c17
-rw-r--r--fs/f2fs/f2fs.h41
-rw-r--r--fs/f2fs/file.c15
-rw-r--r--fs/f2fs/gc.c23
-rw-r--r--fs/f2fs/inline.c22
-rw-r--r--fs/f2fs/inode.c2
-rw-r--r--fs/f2fs/iostat.c38
-rw-r--r--fs/f2fs/iostat.h4
-rw-r--r--fs/f2fs/namei.c16
-rw-r--r--fs/f2fs/node.c112
-rw-r--r--fs/f2fs/node.h23
-rw-r--r--fs/f2fs/segment.c20
-rw-r--r--fs/f2fs/super.c70
-rw-r--r--fs/f2fs/sysfs.c27
-rw-r--r--include/linux/f2fs_fs.h3
-rw-r--r--include/trace/events/f2fs.h21
21 files changed, 386 insertions, 151 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index c1d2b3fd9c65..423ec40e2e4e 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -407,6 +407,12 @@ Contact: "Hridya Valsaraju" <hridya@google.com>
Description: Average number of valid blocks.
Available when CONFIG_F2FS_STAT_FS=y.
+What: /sys/fs/f2fs/<disk>/defrag_blocks
+Date: February 2026
+Contact: "Jinbao Liu" <liujinbao1@xiaomi.com>
+Description: Number of blocks moved by defragment.
+ Available when CONFIG_F2FS_STAT_FS=y.
+
What: /sys/fs/f2fs/<disk>/mounted_time_sec
Date: February 2020
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0143365c07dc..c00a6b6ebcbd 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -232,15 +232,6 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
static struct kmem_cache *ino_entry_slab;
struct kmem_cache *f2fs_inode_entry_slab;
-void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
- unsigned char reason)
-{
- f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL);
- if (!end_io)
- f2fs_flush_merged_writes(sbi);
- f2fs_handle_critical_error(sbi, reason);
-}
-
/*
* We guarantee no failure on the returned page.
*/
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 50fac72734ac..881e76158b96 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1491,10 +1491,10 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio)
f2fs_compress_free_page(page);
- dec_page_count(sbi, type);
-
- if (atomic_dec_return(&cic->pending_pages))
+ if (atomic_dec_return(&cic->pending_pages)) {
+ dec_page_count(sbi, type);
return;
+ }
for (i = 0; i < cic->nr_rpages; i++) {
WARN_ON(!cic->rpages[i]);
@@ -1504,6 +1504,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio)
page_array_free(sbi, cic->rpages, cic->nr_rpages);
kmem_cache_free(cic_entry_slab, cic);
+
+ /*
+ * Make sure dec_page_count() is the last access to sbi.
+ * Once it drops the F2FS_WB_CP_DATA counter to zero, the
+ * unmount thread can proceed to destroy sbi and
+ * sbi->page_array_slab.
+ */
+ dec_page_count(sbi, type);
}
static int f2fs_write_raw_pages(struct compress_ctx *cc,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cf05014fa5e3..8d4f1e75dee3 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -173,7 +173,8 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
while (nr_pages--)
dec_page_count(F2FS_F_SB(folio), __read_io_type(folio));
- if (F2FS_F_SB(folio)->node_inode && is_node_folio(folio) &&
+ if (bio->bi_status == BLK_STS_OK &&
+ F2FS_F_SB(folio)->node_inode && is_node_folio(folio) &&
f2fs_sanity_check_node_footer(F2FS_F_SB(folio),
folio, folio->index, NODE_TYPE_REGULAR, true))
bio->bi_status = BLK_STS_IOERR;
@@ -386,6 +387,8 @@ static void f2fs_write_end_io(struct bio *bio)
folio->index, NODE_TYPE_REGULAR, true);
f2fs_bug_on(sbi, folio->index != nid_of_node(folio));
}
+ if (f2fs_in_warm_node_list(folio))
+ f2fs_del_fsync_node_entry(sbi, folio);
dec_page_count(sbi, type);
@@ -397,8 +400,6 @@ static void f2fs_write_end_io(struct bio *bio)
wq_has_sleeper(&sbi->cp_wait))
wake_up(&sbi->cp_wait);
- if (f2fs_in_warm_node_list(sbi, folio))
- f2fs_del_fsync_node_entry(sbi, folio);
folio_clear_f2fs_gcing(folio);
folio_end_writeback(folio);
}
@@ -1578,7 +1579,8 @@ static bool f2fs_map_blocks_cached(struct inode *inode,
f2fs_wait_on_block_writeback_range(inode,
map->m_pblk, map->m_len);
- if (f2fs_allow_multi_device_dio(sbi, flag)) {
+ map->m_multidev_dio = f2fs_allow_multi_device_dio(sbi, flag);
+ if (map->m_multidev_dio) {
int bidx = f2fs_target_device_index(sbi, map->m_pblk);
struct f2fs_dev_info *dev = &sbi->devs[bidx];
@@ -1638,8 +1640,26 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
map->m_may_create);
- if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
- goto out;
+ if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) {
+ struct extent_info ei;
+
+ /*
+ * 1. If map->m_multidev_dio is true, map->m_pblk cannot be
+ * waitted by f2fs_wait_on_block_writeback_range() and are not
+ * mergeable.
+ * 2. If pgofs hits the read extent cache, it means the mapping
+ * is already cached in the extent cache, but it is not
+ * mergeable, and there is no need to query the mapping again
+ * via f2fs_get_dnode_of_data().
+ */
+ pgofs = (pgoff_t)map->m_lblk + map->m_len;
+ if (map->m_len == maxblocks ||
+ map->m_multidev_dio ||
+ f2fs_lookup_read_extent_cache(inode, pgofs, &ei))
+ goto out;
+ ofs = map->m_len;
+ goto map_more;
+ }
map->m_bdev = inode->i_sb->s_bdev;
map->m_multidev_dio =
@@ -1650,7 +1670,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
/* it only supports block size == page size */
pgofs = (pgoff_t)map->m_lblk;
- end = pgofs + maxblocks;
+map_more:
+ end = (pgoff_t)map->m_lblk + maxblocks;
if (flag == F2FS_GET_BLOCK_PRECACHE)
mode = LOOKUP_NODE_RA;
@@ -2490,6 +2511,8 @@ next_folio:
if (!folio)
goto out;
+ f2fs_update_read_folio_count(F2FS_I_SB(inode), folio);
+
folio_in_bio = false;
index = folio->index;
offset = 0;
@@ -2664,6 +2687,8 @@ static int f2fs_mpage_readpages(struct inode *inode, struct fsverity_info *vi,
prefetchw(&folio->flags);
}
+ f2fs_update_read_folio_count(F2FS_I_SB(inode), folio);
+
#ifdef CONFIG_F2FS_FS_COMPRESSION
index = folio->index;
@@ -2790,7 +2815,6 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
struct inode *inode = fio_inode(fio);
struct folio *mfolio;
struct page *page;
- gfp_t gfp_flags = GFP_NOFS;
if (!f2fs_encrypted_file(inode))
return 0;
@@ -2800,19 +2824,10 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
if (fscrypt_inode_uses_inline_crypto(inode))
return 0;
-retry_encrypt:
fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page_folio(page),
- PAGE_SIZE, 0, gfp_flags);
- if (IS_ERR(fio->encrypted_page)) {
- /* flush pending IOs and wait for a while in the ENOMEM case */
- if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
- f2fs_flush_merged_writes(fio->sbi);
- memalloc_retry_wait(GFP_NOFS);
- gfp_flags |= __GFP_NOFAIL;
- goto retry_encrypt;
- }
+ PAGE_SIZE, 0, GFP_NOFS);
+ if (IS_ERR(fio->encrypted_page))
return PTR_ERR(fio->encrypted_page);
- }
mfolio = filemap_lock_folio(META_MAPPING(fio->sbi), fio->old_blkaddr);
if (!IS_ERR(mfolio)) {
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 8e1040e375a7..af88db8fdb71 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -659,6 +659,7 @@ static int stat_show(struct seq_file *s, void *v)
si->bg_node_blks);
seq_printf(s, "BG skip : IO: %u, Other: %u\n",
si->io_skip_bggc, si->other_skip_bggc);
+ seq_printf(s, "defrag blocks : %u\n", si->defrag_blks);
seq_puts(s, "\nExtent Cache (Read):\n");
seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
si->hit_largest, si->hit_cached[EX_READ],
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index d73aeef333a2..d2e006420f04 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -119,9 +119,10 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type)
if (!__init_may_extent_tree(inode, type))
return false;
+ if (is_inode_flag_set(inode, FI_NO_EXTENT))
+ return false;
+
if (type == EX_READ) {
- if (is_inode_flag_set(inode, FI_NO_EXTENT))
- return false;
if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
!f2fs_sb_has_readonly(F2FS_I_SB(inode)))
return false;
@@ -644,6 +645,8 @@ static unsigned int __destroy_extent_node(struct inode *inode,
while (atomic_read(&et->node_cnt)) {
write_lock(&et->lock);
+ if (!is_inode_flag_set(inode, FI_NO_EXTENT))
+ set_inode_flag(inode, FI_NO_EXTENT);
node_cnt += __free_extent_tree(sbi, et, nr_shrink);
write_unlock(&et->lock);
}
@@ -688,12 +691,12 @@ static void __update_extent_tree_range(struct inode *inode,
write_lock(&et->lock);
- if (type == EX_READ) {
- if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
- write_unlock(&et->lock);
- return;
- }
+ if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
+ write_unlock(&et->lock);
+ return;
+ }
+ if (type == EX_READ) {
prev = et->largest;
dei.len = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7d0a467982d6..91f506e7c9cf 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -10,6 +10,7 @@
#include <linux/uio.h>
#include <linux/types.h>
+#include <linux/mmzone.h>
#include <linux/page-flags.h>
#include <linux/slab.h>
#include <linux/crc32.h>
@@ -2032,6 +2033,8 @@ struct f2fs_sb_info {
unsigned long long iostat_count[NR_IO_TYPE];
unsigned long long iostat_bytes[NR_IO_TYPE];
unsigned long long prev_iostat_bytes[NR_IO_TYPE];
+ unsigned long long iostat_read_folio_count[NR_PAGE_ORDERS];
+ unsigned long long prev_iostat_read_folio_count[NR_PAGE_ORDERS];
bool iostat_enable;
unsigned long iostat_next_period;
unsigned int iostat_period_ms;
@@ -2040,6 +2043,9 @@ struct f2fs_sb_info {
spinlock_t iostat_lat_lock;
struct iostat_lat_info *iostat_io_lat;
#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lock_class_key cp_global_sem_key;
+#endif
};
/* Definitions to access f2fs_sb_info */
@@ -3900,7 +3906,6 @@ int f2fs_do_quota_sync(struct super_block *sb, int type);
loff_t max_file_blocks(struct inode *inode);
void f2fs_quota_off_umount(struct super_block *sb);
void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
-void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason);
void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
@@ -3919,11 +3924,11 @@ enum node_type;
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
-bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio);
+bool f2fs_in_warm_node_list(struct folio *folio);
void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi);
void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct folio *folio);
void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi);
-int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
+bool f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
@@ -3945,6 +3950,8 @@ int f2fs_sanity_check_node_footer(struct f2fs_sb_info *sbi,
enum node_type ntype, bool in_irq);
struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino);
struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid);
+int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode,
+ bool mark_dirty, enum iostat_type io_type);
int f2fs_move_node_folio(struct folio *node_folio, int gc_type);
void f2fs_flush_inline_data(struct f2fs_sb_info *sbi);
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
@@ -3987,7 +3994,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
int f2fs_start_discard_thread(struct f2fs_sb_info *sbi);
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
-bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi, bool need_check);
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
@@ -4286,6 +4293,7 @@ struct f2fs_stat_info {
int gc_secs[2][2];
int tot_blks, data_blks, node_blks;
int bg_data_blks, bg_node_blks;
+ unsigned int defrag_blks;
int blkoff[NR_CURSEG_TYPE];
int curseg[NR_CURSEG_TYPE];
int cursec[NR_CURSEG_TYPE];
@@ -4420,6 +4428,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
si->bg_node_blks += ((gc_type) == BG_GC) ? (blks) : 0; \
} while (0)
+#define stat_inc_defrag_blk_count(sbi, blks) \
+ (F2FS_STAT(sbi)->defrag_blks += (blks))
+
int f2fs_build_stats(struct f2fs_sb_info *sbi);
void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
void __init f2fs_create_root_stats(void);
@@ -4461,6 +4472,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi);
#define stat_inc_tot_blk_count(si, blks) do { } while (0)
#define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0)
#define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0)
+#define stat_inc_defrag_blk_count(sbi, blks) do { } while (0)
static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
@@ -5063,8 +5075,25 @@ static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi,
return;
if (ofs == sbi->page_eio_ofs[type]) {
- if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO)
- set_ckpt_flags(sbi, CP_ERROR_FLAG);
+ if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO) {
+ enum stop_cp_reason stop_reason;
+
+ switch (type) {
+ case META:
+ stop_reason = STOP_CP_REASON_READ_META;
+ break;
+ case NODE:
+ stop_reason = STOP_CP_REASON_READ_NODE;
+ break;
+ case DATA:
+ stop_reason = STOP_CP_REASON_READ_DATA;
+ break;
+ default:
+ f2fs_bug_on(sbi, 1);
+ return;
+ }
+ f2fs_stop_checkpoint(sbi, false, stop_reason);
+ }
} else {
sbi->page_eio_ofs[type] = ofs;
sbi->page_eio_cnt[type] = 0;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 42f5832242b3..fb12c5c9affd 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -81,8 +81,17 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
int err = 0;
vm_fault_t ret;
- if (unlikely(IS_IMMUTABLE(inode)))
+ /*
+ * We only support large folio on the read case.
+ * Don't make any dirty pages.
+ */
+ if (unlikely(IS_IMMUTABLE(inode)) ||
+ mapping_large_folio_support(inode->i_mapping)) {
+ f2fs_err(sbi, "Not expected: immutable: %d large_folio: %d",
+ IS_IMMUTABLE(inode),
+ mapping_large_folio_support(inode->i_mapping));
return VM_FAULT_SIGBUS;
+ }
if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
err = -EIO;
@@ -3042,8 +3051,10 @@ out:
clear_inode_flag(inode, FI_OPU_WRITE);
unlock_out:
inode_unlock(inode);
- if (!err)
+ if (!err) {
range->len = (u64)total << PAGE_SHIFT;
+ stat_inc_defrag_blk_count(sbi, total);
+ }
return err;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index c0c8a1056d6b..ba93010924c0 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -316,10 +316,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->max_search = sbi->max_victim_search;
/* let's select beginning hot/small space first. */
- if (f2fs_need_rand_seg(sbi))
+ if (f2fs_need_rand_seg(sbi)) {
p->offset = get_random_u32_below(MAIN_SECS(sbi) *
SEGS_PER_SEC(sbi));
- else if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
+ SIT_I(sbi)->last_victim[p->gc_mode] = p->offset;
+ } else if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
p->offset = 0;
else
p->offset = SIT_I(sbi)->last_victim[p->gc_mode];
@@ -909,6 +910,9 @@ retry:
if (!f2fs_segment_has_free_slot(sbi, segno))
goto next;
}
+
+ if (!get_valid_blocks(sbi, segno, true))
+ goto next;
}
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
@@ -1230,7 +1234,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
.encrypted_page = NULL,
.in_list = 0,
};
- int err;
+ int err = 0;
folio = f2fs_grab_cache_folio(mapping, index, true);
if (IS_ERR(folio))
@@ -1283,6 +1287,9 @@ got_it:
fio.encrypted_page = &efolio->page;
+ if (folio_test_uptodate(efolio))
+ goto put_encrypted_page;
+
err = f2fs_submit_page_bio(&fio);
if (err)
goto put_encrypted_page;
@@ -1888,12 +1895,18 @@ freed:
sbi->next_victim_seg[gc_type] =
(cur_segno + 1 < sec_end_segno) ?
cur_segno + 1 : NULL_SEGNO;
+
+ if (unlikely(freezing(current))) {
+ folio_put_refs(sum_folio, 2);
+ goto stop;
+ }
}
next_block:
folio_put_refs(sum_folio, 2);
segno = block_end_segno;
}
+stop:
if (submitted)
f2fs_submit_merged_write(sbi, data_type);
@@ -1967,6 +1980,10 @@ gc_more:
goto stop;
}
retry:
+ if (unlikely(freezing(current))) {
+ ret = 0;
+ goto stop;
+ }
ret = __get_victim(sbi, &segno, gc_type, gc_control->one_time);
if (ret) {
/* allow to search victim from sections has pinned data */
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 2669439b9413..7aabfc9b43cb 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -792,7 +792,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
int f2fs_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, __u64 start, __u64 len)
{
- __u64 byteaddr, ilen;
+ __u64 byteaddr = 0, ilen;
__u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED |
FIEMAP_EXTENT_LAST;
struct node_info ni;
@@ -814,6 +814,15 @@ int f2fs_inline_data_fiemap(struct inode *inode,
goto out;
}
+ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
+ err = f2fs_write_single_node_folio(ifolio, true, false, FS_NODE_IO);
+ if (err)
+ return err;
+ ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino);
+ if (IS_ERR(ifolio))
+ return PTR_ERR(ifolio);
+ f2fs_folio_wait_writeback(ifolio, NODE, true, true);
+ }
ilen = min_t(size_t, MAX_INLINE_DATA(inode), i_size_read(inode));
if (start >= ilen)
goto out;
@@ -825,9 +834,14 @@ int f2fs_inline_data_fiemap(struct inode *inode,
if (err)
goto out;
- byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
- byteaddr += (char *)inline_data_addr(inode, ifolio) -
- (char *)F2FS_INODE(ifolio);
+ if (__is_valid_data_blkaddr(ni.blk_addr)) {
+ byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
+ byteaddr += (char *)inline_data_addr(inode, ifolio) -
+ (char *)F2FS_INODE(ifolio);
+ } else {
+ f2fs_bug_on(F2FS_I_SB(inode), ni.blk_addr != NEW_ADDR);
+ flags |= FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN;
+ }
err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err);
out:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index f27198d6695b..c6dcda447882 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -687,7 +687,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio)
ri->i_uid = cpu_to_le32(i_uid_read(inode));
ri->i_gid = cpu_to_le32(i_gid_read(inode));
ri->i_links = cpu_to_le32(inode->i_nlink);
- ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1);
+ ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(READ_ONCE(inode->i_blocks)) + 1);
if (!f2fs_is_atomic_file(inode) ||
is_inode_flag_set(inode, FI_ATOMIC_COMMITTED))
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
index f8703038e1d8..ae265e3e9b2c 100644
--- a/fs/f2fs/iostat.c
+++ b/fs/f2fs/iostat.c
@@ -34,6 +34,7 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int i;
if (!sbi->iostat_enable)
return 0;
@@ -76,6 +77,12 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
IOSTAT_INFO_SHOW("fs node", FS_NODE_READ_IO);
IOSTAT_INFO_SHOW("fs meta", FS_META_READ_IO);
+ /* print read folio order stats */
+ seq_printf(seq, "%-23s", "fs read folio order:");
+ for (i = 0; i < NR_PAGE_ORDERS; i++)
+ seq_printf(seq, " %llu", sbi->iostat_read_folio_count[i]);
+ seq_putc(seq, '\n');
+
/* print other IOs */
seq_puts(seq, "[OTHER]\n");
IOSTAT_INFO_SHOW("fs discard", FS_DISCARD_IO);
@@ -113,6 +120,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
{
unsigned long long iostat_diff[NR_IO_TYPE];
+ unsigned long long read_folio_count_diff[NR_PAGE_ORDERS];
int i;
unsigned long flags;
@@ -133,9 +141,15 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
sbi->prev_iostat_bytes[i];
sbi->prev_iostat_bytes[i] = sbi->iostat_bytes[i];
}
+
+ for (i = 0; i < NR_PAGE_ORDERS; i++) {
+ read_folio_count_diff[i] = sbi->iostat_read_folio_count[i] -
+ sbi->prev_iostat_read_folio_count[i];
+ sbi->prev_iostat_read_folio_count[i] = sbi->iostat_read_folio_count[i];
+ }
spin_unlock_irqrestore(&sbi->iostat_lock, flags);
- trace_f2fs_iostat(sbi, iostat_diff);
+ trace_f2fs_iostat(sbi, iostat_diff, read_folio_count_diff);
__record_iostat_latency(sbi);
}
@@ -151,6 +165,10 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
sbi->iostat_bytes[i] = 0;
sbi->prev_iostat_bytes[i] = 0;
}
+ for (i = 0; i < NR_PAGE_ORDERS; i++) {
+ sbi->iostat_read_folio_count[i] = 0;
+ sbi->prev_iostat_read_folio_count[i] = 0;
+ }
spin_unlock_irq(&sbi->iostat_lock);
spin_lock_irq(&sbi->iostat_lat_lock);
@@ -165,6 +183,24 @@ static inline void __f2fs_update_iostat(struct f2fs_sb_info *sbi,
sbi->iostat_count[type]++;
}
+void f2fs_update_read_folio_count(struct f2fs_sb_info *sbi, struct folio *folio)
+{
+ unsigned int order = folio_order(folio);
+ unsigned long flags;
+
+ if (!sbi->iostat_enable)
+ return;
+
+ if (order >= NR_PAGE_ORDERS)
+ order = NR_PAGE_ORDERS - 1;
+
+ spin_lock_irqsave(&sbi->iostat_lock, flags);
+ sbi->iostat_read_folio_count[order]++;
+ spin_unlock_irqrestore(&sbi->iostat_lock, flags);
+
+ f2fs_record_iostat(sbi);
+}
+
void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
enum iostat_type type, unsigned long long io_bytes)
{
diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
index eb99d05cf272..2025225b5bed 100644
--- a/fs/f2fs/iostat.h
+++ b/fs/f2fs/iostat.h
@@ -34,6 +34,8 @@ extern int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
extern void f2fs_reset_iostat(struct f2fs_sb_info *sbi);
extern void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
enum iostat_type type, unsigned long long io_bytes);
+extern void f2fs_update_read_folio_count(struct f2fs_sb_info *sbi,
+ struct folio *folio);
struct bio_iostat_ctx {
struct f2fs_sb_info *sbi;
@@ -68,6 +70,8 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
#else
static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
enum iostat_type type, unsigned long long io_bytes) {}
+static inline void f2fs_update_read_folio_count(struct f2fs_sb_info *sbi,
+ struct folio *folio) {}
static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
struct bio *bio, struct bio_post_read_ctx *ctx) {}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index efbb0732d420..cac03b8e91a1 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -83,6 +83,21 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
if (set) {
if (total_count == F2FS_MAX_EXTENSION)
return -EINVAL;
+
+ if (hot) {
+ start = 0;
+ count = cold_count;
+ } else {
+ start = cold_count;
+ count = total_count;
+ }
+ for (i = start; i < count; i++) {
+ if (!strcmp(name, extlist[i])) {
+ f2fs_warn(sbi, "extension '%s' already exists in %s list",
+ name, hot ? "cold" : "hot");
+ return -EINVAL;
+ }
+ }
} else {
if (!hot && !cold_count)
return -EINVAL;
@@ -964,6 +979,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
return err;
err = f2fs_create_whiteout(idmap, old_dir, &whiteout, &fname);
+ f2fs_free_filename(&fname);
if (err)
return err;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index e5b4a5b97b57..4e5bd9e4cfc3 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -325,7 +325,7 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
start, nr);
}
-bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio)
+bool f2fs_in_warm_node_list(struct folio *folio)
{
return is_node_folio(folio) && IS_DNODE(folio) && is_cold_node(folio);
}
@@ -391,7 +391,7 @@ void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi)
spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
}
-int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
+bool f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
@@ -427,7 +427,9 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
bool need_update = true;
+ struct f2fs_lock_context lc;
+ f2fs_down_read_trace(&sbi->node_write, &lc);
f2fs_down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ino, false);
if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
@@ -435,6 +437,7 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
get_nat_flag(e, HAS_FSYNCED_INODE)))
need_update = false;
f2fs_up_read(&nm_i->nat_tree_lock);
+ f2fs_up_read_trace(&sbi->node_write, &lc);
return need_update;
}
@@ -1113,7 +1116,7 @@ out_err:
}
static int truncate_partial_nodes(struct dnode_of_data *dn,
- struct f2fs_inode *ri, int *offset, int depth)
+ int *offset, int depth)
{
struct folio *folios[2];
nid_t nid[3];
@@ -1184,7 +1187,6 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
int err = 0, cont = 1;
int level, offset[4], noffset[4];
unsigned int nofs = 0;
- struct f2fs_inode *ri;
struct dnode_of_data dn;
struct folio *folio;
@@ -1212,7 +1214,6 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
set_new_dnode(&dn, inode, folio, NULL, 0);
folio_unlock(folio);
- ri = F2FS_INODE(folio);
switch (level) {
case 0:
case 1:
@@ -1222,7 +1223,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
nofs = noffset[1];
if (!offset[level - 1])
goto skip_partial;
- err = truncate_partial_nodes(&dn, ri, offset, level);
+ err = truncate_partial_nodes(&dn, offset, level);
if (err < 0 && err != -ENOENT)
goto fail;
nofs += 1 + NIDS_PER_BLOCK;
@@ -1231,7 +1232,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
nofs = 5 + 2 * NIDS_PER_BLOCK;
if (!offset[level - 1])
goto skip_partial;
- err = truncate_partial_nodes(&dn, ri, offset, level);
+ err = truncate_partial_nodes(&dn, offset, level);
if (err < 0 && err != -ENOENT)
goto fail;
break;
@@ -1729,9 +1730,10 @@ continue_unlock:
return last_folio;
}
-static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted,
- struct writeback_control *wbc, bool do_balance,
- enum iostat_type io_type, unsigned int *seq_id)
+static bool __write_node_folio(struct folio *folio, bool atomic, bool do_fsync,
+ bool *submitted, struct writeback_control *wbc,
+ bool do_balance, enum iostat_type io_type,
+ unsigned int *seq_id)
{
struct f2fs_sb_info *sbi = F2FS_F_SB(folio);
nid_t nid;
@@ -1776,7 +1778,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted
if (f2fs_sanity_check_node_footer(sbi, folio, nid,
NODE_TYPE_REGULAR, false)) {
- f2fs_handle_critical_error(sbi, STOP_CP_REASON_CORRUPTED_NID);
+ f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_NID);
goto redirty_out;
}
@@ -1801,16 +1803,17 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted
goto redirty_out;
}
- if (atomic) {
- if (!test_opt(sbi, NOBARRIER))
- fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
- if (IS_INODE(folio))
- set_dentry_mark(folio,
+ if (atomic && !test_opt(sbi, NOBARRIER))
+ fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
+
+ set_dentry_mark(folio, false);
+ set_fsync_mark(folio, do_fsync);
+ if (IS_INODE(folio) && (atomic || is_fsync_dnode(folio)))
+ set_dentry_mark(folio,
f2fs_need_dentry_mark(sbi, ino_of_node(folio)));
- }
/* should add to global list before clearing PAGECACHE status */
- if (f2fs_in_warm_node_list(sbi, folio)) {
+ if (f2fs_in_warm_node_list(folio)) {
seq = f2fs_add_fsync_node_entry(sbi, folio);
if (seq_id)
*seq_id = seq;
@@ -1843,41 +1846,51 @@ redirty_out:
return false;
}
-int f2fs_move_node_folio(struct folio *node_folio, int gc_type)
+int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode,
+ bool mark_dirty, enum iostat_type io_type)
{
int err = 0;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 1,
+ };
- if (gc_type == FG_GC) {
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 1,
- };
+ if (!sync_mode) {
+ /* set page dirty and write it */
+ if (!folio_test_writeback(node_folio))
+ folio_mark_dirty(node_folio);
+ goto out_folio;
+ }
- f2fs_folio_wait_writeback(node_folio, NODE, true, true);
+ f2fs_folio_wait_writeback(node_folio, NODE, true, true);
+ if (mark_dirty)
folio_mark_dirty(node_folio);
+ else if (!folio_test_dirty(node_folio))
+ goto out_folio;
- if (!folio_clear_dirty_for_io(node_folio)) {
- err = -EAGAIN;
- goto out_page;
- }
-
- if (!__write_node_folio(node_folio, false, NULL,
- &wbc, false, FS_GC_NODE_IO, NULL))
- err = -EAGAIN;
- goto release_page;
- } else {
- /* set page dirty and write it */
- if (!folio_test_writeback(node_folio))
- folio_mark_dirty(node_folio);
+ if (!folio_clear_dirty_for_io(node_folio)) {
+ err = -EAGAIN;
+ goto out_folio;
}
-out_page:
+
+ if (!__write_node_folio(node_folio, false, false, NULL,
+ &wbc, false, FS_GC_NODE_IO, NULL))
+ err = -EAGAIN;
+ goto release_folio;
+out_folio:
folio_unlock(node_folio);
-release_page:
+release_folio:
f2fs_folio_put(node_folio, false);
return err;
}
+int f2fs_move_node_folio(struct folio *node_folio, int gc_type)
+{
+ return f2fs_write_single_node_folio(node_folio, gc_type == FG_GC,
+ true, FS_GC_NODE_IO);
+}
+
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct writeback_control *wbc, bool atomic,
unsigned int *seq_id)
@@ -1908,6 +1921,7 @@ retry:
for (i = 0; i < nr_folios; i++) {
struct folio *folio = fbatch.folios[i];
bool submitted = false;
+ bool do_fsync = false;
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_folio_put(last_folio, false);
@@ -1938,19 +1952,13 @@ continue_unlock:
f2fs_folio_wait_writeback(folio, NODE, true, true);
- set_fsync_mark(folio, 0);
- set_dentry_mark(folio, 0);
-
if (!atomic || folio == last_folio) {
- set_fsync_mark(folio, 1);
+ do_fsync = true;
percpu_counter_inc(&sbi->rf_node_block_count);
if (IS_INODE(folio)) {
if (is_inode_flag_set(inode,
FI_DIRTY_INODE))
f2fs_update_inode(inode, folio);
- if (!atomic)
- set_dentry_mark(folio,
- f2fs_need_dentry_mark(sbi, ino));
}
/* may be written by other thread */
if (!folio_test_dirty(folio))
@@ -1962,8 +1970,9 @@ continue_unlock:
if (!__write_node_folio(folio, atomic &&
folio == last_folio,
- &submitted, wbc, true,
- FS_NODE_IO, seq_id)) {
+ do_fsync, &submitted,
+ wbc, true, FS_NODE_IO,
+ seq_id)) {
f2fs_folio_put(last_folio, false);
folio_batch_release(&fbatch);
ret = -EIO;
@@ -2163,10 +2172,7 @@ write_node:
if (!folio_clear_dirty_for_io(folio))
goto continue_unlock;
- set_fsync_mark(folio, 0);
- set_dentry_mark(folio, 0);
-
- if (!__write_node_folio(folio, false, &submitted,
+ if (!__write_node_folio(folio, false, false, &submitted,
wbc, do_balance, io_type, NULL)) {
folio_batch_release(&fbatch);
ret = -EIO;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 824ac9f0e6e4..bcf2034e4263 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -400,27 +400,26 @@ static inline int is_node(const struct folio *folio, int type)
#define is_fsync_dnode(folio) is_node(folio, FSYNC_BIT_SHIFT)
#define is_dent_dnode(folio) is_node(folio, DENT_BIT_SHIFT)
-static inline void set_cold_node(const struct folio *folio, bool is_dir)
+static inline void __set_mark(const struct folio *folio, bool mark, int type)
{
struct f2fs_node *rn = F2FS_NODE(folio);
unsigned int flag = le32_to_cpu(rn->footer.flag);
- if (is_dir)
- flag &= ~BIT(COLD_BIT_SHIFT);
- else
- flag |= BIT(COLD_BIT_SHIFT);
- rn->footer.flag = cpu_to_le32(flag);
-}
-
-static inline void set_mark(struct folio *folio, int mark, int type)
-{
- struct f2fs_node *rn = F2FS_NODE(folio);
- unsigned int flag = le32_to_cpu(rn->footer.flag);
if (mark)
flag |= BIT(type);
else
flag &= ~BIT(type);
rn->footer.flag = cpu_to_le32(flag);
+}
+
+static inline void set_cold_node(const struct folio *folio, bool is_dir)
+{
+ __set_mark(folio, !is_dir, COLD_BIT_SHIFT);
+}
+
+static inline void set_mark(struct folio *folio, bool mark, int type)
+{
+ __set_mark(folio, mark, type);
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_inode_chksum_set(F2FS_F_SB(folio), folio);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 6a97fe76712b..788f8b050249 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1606,6 +1606,9 @@ static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
if (dc->state != D_PREP)
goto next;
+ if (*issued > 0 && unlikely(freezing(current)))
+ break;
+
if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
io_interrupted = true;
break;
@@ -1645,6 +1648,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct blk_plug plug;
int i, issued;
bool io_interrupted = false;
+ bool suspended = false;
if (dpolicy->timeout)
f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
@@ -1675,6 +1679,11 @@ retry:
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
+ if (issued > 0 && unlikely(freezing(current))) {
+ suspended = true;
+ break;
+ }
+
if (dpolicy->timeout &&
f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
break;
@@ -1694,7 +1703,8 @@ retry:
next:
mutex_unlock(&dcc->cmd_lock);
- if (issued >= dpolicy->max_requests || io_interrupted)
+ if (issued >= dpolicy->max_requests || io_interrupted ||
+ suspended)
break;
}
@@ -1880,7 +1890,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
*
* Return true if issued all discard cmd or no discard cmd need issue, otherwise return false.
*/
-bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi, bool need_check)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_policy dpolicy;
@@ -1897,7 +1907,7 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
/* just to make sure there is no pending discard commands */
__wait_all_discard_cmd(sbi, NULL);
- f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
+ f2fs_bug_on(sbi, need_check && atomic_read(&dcc->discard_cmd_cnt));
return !dropped;
}
@@ -2367,7 +2377,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
* Recovery can cache discard commands, so in error path of
* fill_super(), it needs to give a chance to handle them.
*/
- f2fs_issue_discard_timeout(sbi);
+ f2fs_issue_discard_timeout(sbi, true);
kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
@@ -3980,7 +3990,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host))
fscrypt_finalize_bounce_page(&fio->encrypted_page);
folio_end_writeback(folio);
- if (f2fs_in_warm_node_list(fio->sbi, folio))
+ if (f2fs_in_warm_node_list(folio))
f2fs_del_fsync_node_entry(fio->sbi, folio);
f2fs_bug_on(fio->sbi, !is_set_ckpt_flags(fio->sbi,
CP_ERROR_FLAG));
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8774c60b4be4..ccf806b676f5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -336,9 +336,12 @@ static const struct fs_parameter_spec f2fs_param_specs[] = {
fsparam_flag("usrquota", Opt_usrquota),
fsparam_flag("grpquota", Opt_grpquota),
fsparam_flag("prjquota", Opt_prjquota),
- fsparam_string_empty("usrjquota", Opt_usrjquota),
- fsparam_string_empty("grpjquota", Opt_grpjquota),
- fsparam_string_empty("prjjquota", Opt_prjjquota),
+ fsparam_string("usrjquota", Opt_usrjquota),
+ fsparam_flag("usrjquota", Opt_usrjquota),
+ fsparam_string("grpjquota", Opt_grpjquota),
+ fsparam_flag("grpjquota", Opt_grpjquota),
+ fsparam_string("prjjquota", Opt_prjjquota),
+ fsparam_flag("prjjquota", Opt_prjjquota),
fsparam_flag("nat_bits", Opt_nat_bits),
fsparam_enum("jqfmt", Opt_jqfmt, f2fs_param_jqfmt),
fsparam_enum("alloc_mode", Opt_alloc, f2fs_param_alloc_mode),
@@ -979,26 +982,26 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx_set_opt(ctx, F2FS_MOUNT_PRJQUOTA);
break;
case Opt_usrjquota:
- if (!*param->string)
- ret = f2fs_unnote_qf_name(fc, USRQUOTA);
- else
+ if (param->type == fs_value_is_string && *param->string)
ret = f2fs_note_qf_name(fc, USRQUOTA, param);
+ else
+ ret = f2fs_unnote_qf_name(fc, USRQUOTA);
if (ret)
return ret;
break;
case Opt_grpjquota:
- if (!*param->string)
- ret = f2fs_unnote_qf_name(fc, GRPQUOTA);
- else
+ if (param->type == fs_value_is_string && *param->string)
ret = f2fs_note_qf_name(fc, GRPQUOTA, param);
+ else
+ ret = f2fs_unnote_qf_name(fc, GRPQUOTA);
if (ret)
return ret;
break;
case Opt_prjjquota:
- if (!*param->string)
- ret = f2fs_unnote_qf_name(fc, PRJQUOTA);
- else
+ if (param->type == fs_value_is_string && *param->string)
ret = f2fs_note_qf_name(fc, PRJQUOTA, param);
+ else
+ ret = f2fs_unnote_qf_name(fc, PRJQUOTA);
if (ret)
return ret;
break;
@@ -1515,6 +1518,7 @@ static int f2fs_check_opt_consistency(struct fs_context *fc,
F2FS_OPTION(sbi).root_reserved_blocks);
ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT);
ctx->opt_mask &= ~BIT(F2FS_MOUNT_RESERVE_ROOT);
+ ctx->spec_mask &= ~F2FS_SPEC_reserve_root;
}
if (test_opt(sbi, RESERVE_NODE) &&
(ctx->opt_mask & BIT(F2FS_MOUNT_RESERVE_NODE)) &&
@@ -1523,6 +1527,7 @@ static int f2fs_check_opt_consistency(struct fs_context *fc,
F2FS_OPTION(sbi).root_reserved_nodes);
ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_NODE);
ctx->opt_mask &= ~BIT(F2FS_MOUNT_RESERVE_NODE);
+ ctx->spec_mask &= ~F2FS_SPEC_reserve_node;
}
err = f2fs_check_test_dummy_encryption(fc, sb);
@@ -2009,7 +2014,7 @@ static void f2fs_put_super(struct super_block *sb)
}
/* be sure to wait for any on-going discard commands */
- done = f2fs_issue_discard_timeout(sbi);
+ done = f2fs_issue_discard_timeout(sbi, true);
if (f2fs_realtime_discard_enable(sbi) && !sbi->discard_blks && done) {
struct cp_control cpc = {
.reason = CP_UMOUNT | CP_TRIMMED,
@@ -2088,6 +2093,12 @@ static void f2fs_put_super(struct super_block *sb)
#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
#endif
+ sync_blockdev(sb->s_bdev);
+ invalidate_bdev(sb->s_bdev);
+ for (i = 1; i < sbi->s_ndevs; i++) {
+ sync_blockdev(FDEV(i).bdev);
+ invalidate_bdev(FDEV(i).bdev);
+ }
}
int f2fs_sync_fs(struct super_block *sb, int sync)
@@ -2152,7 +2163,7 @@ static int f2fs_unfreeze(struct super_block *sb)
* will recover after removal of snapshot.
*/
if (test_opt(sbi, DISCARD) && !f2fs_hw_support_discard(sbi))
- f2fs_issue_discard_timeout(sbi);
+ f2fs_issue_discard_timeout(sbi, true);
clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
return 0;
@@ -2957,7 +2968,12 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
need_stop_discard = true;
} else {
f2fs_stop_discard_thread(sbi);
- f2fs_issue_discard_timeout(sbi);
+ /*
+ * f2fs_ioc_fitrim() won't race w/ "remount ro"
+ * so it's safe to check discard_cmd_cnt in
+ * f2fs_issue_discard_timeout().
+ */
+ f2fs_issue_discard_timeout(sbi, flags & SB_RDONLY);
need_restart_discard = true;
}
}
@@ -4650,7 +4666,8 @@ static bool system_going_down(void)
|| system_state == SYSTEM_RESTART;
}
-void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason)
+static void f2fs_handle_critical_error(struct f2fs_sb_info *sbi,
+ unsigned char reason)
{
struct super_block *sb = sbi->sb;
bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
@@ -4707,6 +4724,16 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason)
*/
}
+void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
+ unsigned char reason)
+{
+ f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL);
+ if (!end_io)
+ f2fs_flush_merged_writes(sbi);
+ f2fs_handle_critical_error(sbi, reason);
+}
+
+
static void f2fs_record_error_work(struct work_struct *work)
{
struct f2fs_sb_info *sbi = container_of(work,
@@ -4948,6 +4975,11 @@ try_onemore:
init_f2fs_rwsem_trace(&sbi->gc_lock, sbi, LOCK_NAME_GC_LOCK);
mutex_init(&sbi->writepages);
init_f2fs_rwsem_trace(&sbi->cp_global_sem, sbi, LOCK_NAME_CP_GLOBAL);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ lockdep_register_key(&sbi->cp_global_sem_key);
+ lockdep_set_class(&sbi->cp_global_sem.internal_rwsem,
+ &sbi->cp_global_sem_key);
+#endif
init_f2fs_rwsem_trace(&sbi->node_write, sbi, LOCK_NAME_NODE_WRITE);
init_f2fs_rwsem_trace(&sbi->node_change, sbi, LOCK_NAME_NODE_CHANGE);
spin_lock_init(&sbi->stat_lock);
@@ -5419,6 +5451,9 @@ free_options:
free_sb_buf:
kfree(raw_super);
free_sbi:
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ lockdep_unregister_key(&sbi->cp_global_sem_key);
+#endif
kfree(sbi);
sb->s_fs_info = NULL;
@@ -5500,6 +5535,9 @@ static void kill_f2fs_super(struct super_block *sb)
/* Release block devices last, after fscrypt_destroy_keyring(). */
if (sbi) {
destroy_device_list(sbi);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ lockdep_unregister_key(&sbi->cp_global_sem_key);
+#endif
kfree(sbi);
sb->s_fs_info = NULL;
}
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 5fbfdc96e502..352e96ad5c3a 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -338,6 +338,14 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a,
f2fs_update_sit_info(sbi);
return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->avg_vblocks));
}
+
+static ssize_t defrag_blocks_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
+
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->defrag_blks));
+}
#endif
static ssize_t main_blkaddr_show(struct f2fs_attr *a,
@@ -379,10 +387,12 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
if (!strcmp(a->attr.name, "extension_list")) {
__u8 (*extlist)[F2FS_EXTENSION_LEN] =
sbi->raw_super->extension_list;
- int cold_count = le32_to_cpu(sbi->raw_super->extension_count);
- int hot_count = sbi->raw_super->hot_ext_count;
+ int cold_count, hot_count;
int len = 0, i;
+ f2fs_down_read(&sbi->sb_lock);
+ cold_count = le32_to_cpu(sbi->raw_super->extension_count);
+ hot_count = sbi->raw_super->hot_ext_count;
len += sysfs_emit_at(buf, len, "cold file extension:\n");
for (i = 0; i < cold_count; i++)
len += sysfs_emit_at(buf, len, "%s\n", extlist[i]);
@@ -390,6 +400,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
len += sysfs_emit_at(buf, len, "hot file extension:\n");
for (i = cold_count; i < cold_count + hot_count; i++)
len += sysfs_emit_at(buf, len, "%s\n", extlist[i]);
+ f2fs_up_read(&sbi->sb_lock);
return len;
}
@@ -1351,6 +1362,7 @@ F2FS_GENERAL_RO_ATTR(gc_mode);
F2FS_GENERAL_RO_ATTR(moved_blocks_background);
F2FS_GENERAL_RO_ATTR(moved_blocks_foreground);
F2FS_GENERAL_RO_ATTR(avg_vblocks);
+F2FS_GENERAL_RO_ATTR(defrag_blocks);
#endif
#ifdef CONFIG_FS_ENCRYPTION
@@ -1473,6 +1485,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(moved_blocks_foreground),
ATTR_LIST(moved_blocks_background),
ATTR_LIST(avg_vblocks),
+ ATTR_LIST(defrag_blocks),
#endif
#ifdef CONFIG_BLK_DEV_ZONED
ATTR_LIST(unusable_blocks_per_sec),
@@ -1984,24 +1997,26 @@ int __init f2fs_init_sysfs(void)
ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype,
NULL, "features");
if (ret)
- goto put_kobject;
+ goto unregister_kset;
ret = kobject_init_and_add(&f2fs_tune, &f2fs_tune_ktype,
NULL, "tuning");
if (ret)
- goto put_kobject;
+ goto put_feat;
f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
if (!f2fs_proc_root) {
ret = -ENOMEM;
- goto put_kobject;
+ goto put_tune;
}
return 0;
-put_kobject:
+put_tune:
kobject_put(&f2fs_tune);
+put_feat:
kobject_put(&f2fs_feat);
+unregister_kset:
kset_unregister(&f2fs_kset);
return ret;
}
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index dc41722fcc9d..829a59399dac 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -80,6 +80,9 @@ enum stop_cp_reason {
STOP_CP_REASON_NO_SEGMENT,
STOP_CP_REASON_CORRUPTED_FREE_BITMAP,
STOP_CP_REASON_CORRUPTED_NID,
+ STOP_CP_REASON_READ_META,
+ STOP_CP_REASON_READ_NODE,
+ STOP_CP_REASON_READ_DATA,
STOP_CP_REASON_MAX,
};
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 05a46908acd9..b5188d2671d7 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -2116,9 +2116,10 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end,
#ifdef CONFIG_F2FS_IOSTAT
TRACE_EVENT(f2fs_iostat,
- TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat),
+ TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat,
+ unsigned long long *read_folio_count),
- TP_ARGS(sbi, iostat),
+ TP_ARGS(sbi, iostat, read_folio_count),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -2150,6 +2151,7 @@ TRACE_EVENT(f2fs_iostat,
__field(unsigned long long, fs_mrio)
__field(unsigned long long, fs_discard)
__field(unsigned long long, fs_reset_zone)
+ __array(unsigned long long, read_folio_count, 11)
),
TP_fast_assign(
@@ -2182,6 +2184,9 @@ TRACE_EVENT(f2fs_iostat,
__entry->fs_mrio = iostat[FS_META_READ_IO];
__entry->fs_discard = iostat[FS_DISCARD_IO];
__entry->fs_reset_zone = iostat[FS_ZONE_RESET_IO];
+ memset(__entry->read_folio_count, 0, sizeof(__entry->read_folio_count));
+ memcpy(__entry->read_folio_count, read_folio_count,
+ sizeof(unsigned long long) * min_t(int, NR_PAGE_ORDERS, 11));
),
TP_printk("dev = (%d,%d), "
@@ -2194,7 +2199,9 @@ TRACE_EVENT(f2fs_iostat,
"app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], "
"compr(buffered=%llu, mapped=%llu)], "
"fs [data=%llu, (gc_data=%llu, cdata=%llu), "
- "node=%llu, meta=%llu]",
+ "node=%llu, meta=%llu], "
+ "read_folio_count [0=%llu, 1=%llu, 2=%llu, 3=%llu, 4=%llu, "
+ "5=%llu, 6=%llu, 7=%llu, 8=%llu, 9=%llu, 10=%llu]",
show_dev(__entry->dev), __entry->app_wio, __entry->app_dio,
__entry->app_bio, __entry->app_mio, __entry->app_bcdio,
__entry->app_mcdio, __entry->fs_dio, __entry->fs_cdio,
@@ -2205,7 +2212,13 @@ TRACE_EVENT(f2fs_iostat,
__entry->app_rio, __entry->app_drio, __entry->app_brio,
__entry->app_mrio, __entry->app_bcrio, __entry->app_mcrio,
__entry->fs_drio, __entry->fs_gdrio,
- __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio)
+ __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio,
+ __entry->read_folio_count[0], __entry->read_folio_count[1],
+ __entry->read_folio_count[2], __entry->read_folio_count[3],
+ __entry->read_folio_count[4], __entry->read_folio_count[5],
+ __entry->read_folio_count[6], __entry->read_folio_count[7],
+ __entry->read_folio_count[8], __entry->read_folio_count[9],
+ __entry->read_folio_count[10])
);
#ifndef __F2FS_IOSTAT_LATENCY_TYPE