diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-14 09:48:10 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-14 09:48:10 -0800 |
| commit | 3e48a11675c50698374d4ac596fb506736eb1c53 (patch) | |
| tree | 19784102302960f534344f97950dd3230a0163f5 /include | |
| parent | 770aaedb461a055f79b971d538678942b6607894 (diff) | |
| parent | 52190933c37a96164b271f3f30c16099d9eb8c09 (diff) | |
Merge tag 'f2fs-for-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"In this development cycle, we focused on several key performance
optimizations:
- introducing large folio support to enhance read speeds for
immutable files
- reducing checkpoint=enable latency by flushing only committed dirty
pages
- implementing tracepoints to diagnose and resolve lock priority
inversion.
Additionally, we introduced the packed_ssa feature to optimize the SSA
footprint when utilizing large block sizes.
Detail summary:
Enhancements:
- support large folio for immutable non-compressed case
- support non-4KB block size without packed_ssa feature
- optimize f2fs_enable_checkpoint() to avoid long delay
- optimize f2fs_overwrite_io() for f2fs_iomap_begin
- optimize NAT block loading during checkpoint write
- add write latency stats for NAT and SIT blocks in
f2fs_write_checkpoint
- pin files do not require sbi->writepages lock for ordering
- avoid f2fs_map_blocks() for consecutive holes in readpages
- flush plug periodically during GC to maximize readahead effect
- add tracepoints to catch lock overheads
- add several sysfs entries to tune internal lock priorities
Fixes:
- fix lock priority inversion issue
- fix incomplete block usage in compact SSA summaries
- fix to show simulate_lock_timeout correctly
- fix to avoid mapping wrong physical block for swapfile
- fix IS_CHECKPOINTED flag inconsistency issue caused by
concurrent atomic commit and checkpoint writes
- fix to avoid UAF in f2fs_write_end_io()"
* tag 'f2fs-for-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (61 commits)
f2fs: sysfs: introduce critical_task_priority
f2fs: introduce trace_f2fs_priority_update
f2fs: fix lock priority inversion issue
f2fs: optimize f2fs_overwrite_io() for f2fs_iomap_begin
f2fs: fix incomplete block usage in compact SSA summaries
f2fs: decrease maximum flush retry count in f2fs_enable_checkpoint()
f2fs: optimize NAT block loading during checkpoint write
f2fs: change size parameter of __has_cursum_space() to unsigned int
f2fs: add write latency stats for NAT and SIT blocks in f2fs_write_checkpoint
f2fs: pin files do not require sbi->writepages lock for ordering
f2fs: fix to show simulate_lock_timeout correctly
f2fs: introduce FAULT_SKIP_WRITE
f2fs: check skipped write in f2fs_enable_checkpoint()
Revert "f2fs: add timeout in f2fs_enable_checkpoint()"
f2fs: fix to unlock folio in f2fs_read_data_large_folio()
f2fs: fix error path handling in f2fs_read_data_large_folio()
f2fs: use folio_end_read
f2fs: fix to avoid mapping wrong physical block for swapfile
f2fs: avoid f2fs_map_blocks() for consecutive holes in readpages
f2fs: advance index and offset after zeroing in large folio read
...
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/f2fs_fs.h | 73 | ||||
| -rw-r--r-- | include/trace/events/f2fs.h | 142 |
2 files changed, 186 insertions, 29 deletions
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a7880787cad3..dc41722fcc9d 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -17,7 +17,6 @@ #define F2FS_LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9) /* log number for sector/blk */ #define F2FS_BLKSIZE PAGE_SIZE /* support only block == page */ #define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */ -#define F2FS_SUM_BLKSIZE 4096 /* only support 4096 byte sum block */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_EXTENSION_LEN 8 /* max size of extension */ @@ -442,10 +441,8 @@ struct f2fs_sit_block { * from node's page's beginning to get a data block address. * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) */ -#define ENTRIES_IN_SUM (F2FS_SUM_BLKSIZE / 8) #define SUMMARY_SIZE (7) /* sizeof(struct f2fs_summary) */ #define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ -#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) /* a summary entry for a block in a segment */ struct f2fs_summary { @@ -468,22 +465,6 @@ struct summary_footer { __le32 check_sum; /* summary checksum */ } __packed; -#define SUM_JOURNAL_SIZE (F2FS_SUM_BLKSIZE - SUM_FOOTER_SIZE -\ - SUM_ENTRY_SIZE) -#define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ - sizeof(struct nat_journal_entry)) -#define NAT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ - sizeof(struct nat_journal_entry)) -#define SIT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ - sizeof(struct sit_journal_entry)) -#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ - sizeof(struct sit_journal_entry)) - -/* Reserved area should make size of f2fs_extra_info equals to - * that of nat_journal and sit_journal. - */ -#define EXTRA_INFO_RESERVED (SUM_JOURNAL_SIZE - 2 - 8) - /* * frequently updated NAT/SIT entries can be stored in the spare area in * summary blocks @@ -498,9 +479,16 @@ struct nat_journal_entry { struct f2fs_nat_entry ne; } __packed; +/* + * The nat_journal structure is a placeholder whose actual size varies depending + * on the use of packed_ssa. Therefore, it must always be accessed only through + * specific sets of macros and fields, and size calculations should use + * size-related macros instead of sizeof(). + * Relevant macros: sbi->nat_journal_entries, nat_in_journal(), + * nid_in_journal(), MAX_NAT_JENTRIES(). + */ struct nat_journal { - struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES]; - __u8 reserved[NAT_JOURNAL_RESERVED]; + struct nat_journal_entry entries[0]; } __packed; struct sit_journal_entry { @@ -508,14 +496,21 @@ struct sit_journal_entry { struct f2fs_sit_entry se; } __packed; +/* + * The sit_journal structure is a placeholder whose actual size varies depending + * on the use of packed_ssa. Therefore, it must always be accessed only through + * specific sets of macros and fields, and size calculations should use + * size-related macros instead of sizeof(). + * Relevant macros: sbi->sit_journal_entries, sit_in_journal(), + * segno_in_journal(), MAX_SIT_JENTRIES(). + */ struct sit_journal { - struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES]; - __u8 reserved[SIT_JOURNAL_RESERVED]; + struct sit_journal_entry entries[0]; } __packed; struct f2fs_extra_info { __le64 kbytes_written; - __u8 reserved[EXTRA_INFO_RESERVED]; + __u8 reserved[]; } __packed; struct f2fs_journal { @@ -531,11 +526,33 @@ struct f2fs_journal { }; } __packed; -/* Block-sized summary block structure */ +/* + * Block-sized summary block structure + * + * The f2fs_summary_block structure is a placeholder whose actual size varies + * depending on the use of packed_ssa. Therefore, it must always be accessed + * only through specific sets of macros and fields, and size calculations should + * use size-related macros instead of sizeof(). + * Relevant macros: sbi->sum_blocksize, sbi->entries_in_sum, + * sbi->sum_entry_size, sum_entries(), sum_journal(), sum_footer(). + * + * Summary Block Layout + * + * +-----------------------+ <--- Block Start + * | struct f2fs_summary | + * | entries[0] | + * | ... | + * | entries[N-1] | + * +-----------------------+ + * | struct f2fs_journal | + * +-----------------------+ + * | struct summary_footer | + * +-----------------------+ <--- Block End + */ struct f2fs_summary_block { - struct f2fs_summary entries[ENTRIES_IN_SUM]; - struct f2fs_journal journal; - struct summary_footer footer; + struct f2fs_summary entries[0]; + // struct f2fs_journal journal; + // struct summary_footer footer; } __packed; /* diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index df4017dcc701..9364e6775562 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -184,6 +184,15 @@ TRACE_DEFINE_ENUM(CP_PHASE_FINISH_CHECKPOINT); { CP_PHASE_FINISH_BLOCK_OPS, "finish block_ops" }, \ { CP_PHASE_FINISH_CHECKPOINT, "finish checkpoint" }) +#define show_lock_name(lock) \ + __print_symbolic(lock, \ + { LOCK_NAME_CP_RWSEM, "cp_rwsem" }, \ + { LOCK_NAME_NODE_CHANGE, "node_change" }, \ + { LOCK_NAME_NODE_WRITE, "node_write" }, \ + { LOCK_NAME_GC_LOCK, "gc_lock" }, \ + { LOCK_NAME_CP_GLOBAL, "cp_global" }, \ + { LOCK_NAME_IO_RWSEM, "io_rwsem" }) + struct f2fs_sb_info; struct f2fs_io_info; struct extent_info; @@ -1358,6 +1367,7 @@ DECLARE_EVENT_CLASS(f2fs__folio, __field(int, type) __field(int, dir) __field(pgoff_t, index) + __field(pgoff_t, nrpages) __field(int, dirty) __field(int, uptodate) ), @@ -1368,16 +1378,18 @@ DECLARE_EVENT_CLASS(f2fs__folio, __entry->type = type; __entry->dir = S_ISDIR(folio->mapping->host->i_mode); __entry->index = folio->index; + __entry->nrpages= folio_nr_pages(folio); __entry->dirty = folio_test_dirty(folio); __entry->uptodate = folio_test_uptodate(folio); ), - TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, " + TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, nr_pages = %lu, " "dirty = %d, uptodate = %d", show_dev_ino(__entry), show_block_type(__entry->type), show_file_type(__entry->dir), (unsigned long)__entry->index, + (unsigned long)__entry->nrpages, __entry->dirty, __entry->uptodate) ); @@ -1403,6 +1415,13 @@ DEFINE_EVENT(f2fs__folio, f2fs_readpage, TP_ARGS(folio, type) ); +DEFINE_EVENT(f2fs__folio, f2fs_read_folio, + + TP_PROTO(struct folio *folio, int type), + + TP_ARGS(folio, type) +); + DEFINE_EVENT(f2fs__folio, f2fs_set_page_dirty, TP_PROTO(struct folio *folio, int type), @@ -2442,6 +2461,127 @@ DEFINE_EVENT(f2fs__rw_end, f2fs_datawrite_end, TP_ARGS(inode, offset, bytes) ); +TRACE_EVENT(f2fs_lock_elapsed_time, + + TP_PROTO(struct f2fs_sb_info *sbi, enum f2fs_lock_name lock_name, + bool is_write, struct task_struct *p, int ioprio, + unsigned long long total_time, + unsigned long long running_time, + unsigned long long runnable_time, + unsigned long long io_sleep_time, + unsigned long long other_time), + + TP_ARGS(sbi, lock_name, is_write, p, ioprio, total_time, running_time, + runnable_time, io_sleep_time, other_time), + + TP_STRUCT__entry( + __field(dev_t, dev) + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(int, prio) + __field(int, ioprio_class) + __field(int, ioprio_data) + __field(unsigned int, lock_name) + __field(bool, is_write) + __field(unsigned long long, total_time) + __field(unsigned long long, running_time) + __field(unsigned long long, runnable_time) + __field(unsigned long long, io_sleep_time) + __field(unsigned long long, other_time) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->ioprio_class = IOPRIO_PRIO_CLASS(ioprio); + __entry->ioprio_data = IOPRIO_PRIO_DATA(ioprio); + __entry->lock_name = lock_name; + __entry->is_write = is_write; + __entry->total_time = total_time; + __entry->running_time = running_time; + __entry->runnable_time = runnable_time; + __entry->io_sleep_time = io_sleep_time; + __entry->other_time = other_time; + ), + + TP_printk("dev = (%d,%d), comm: %s, pid: %d, prio: %d, " + "ioprio_class: %d, ioprio_data: %d, lock_name: %s, " + "lock_type: %s, total: %llu, running: %llu, " + "runnable: %llu, io_sleep: %llu, other: %llu", + show_dev(__entry->dev), + __entry->comm, + __entry->pid, + __entry->prio, + __entry->ioprio_class, + __entry->ioprio_data, + show_lock_name(__entry->lock_name), + __entry->is_write ? "wlock" : "rlock", + __entry->total_time, + __entry->running_time, + __entry->runnable_time, + __entry->io_sleep_time, + __entry->other_time) +); + +DECLARE_EVENT_CLASS(f2fs_priority_update, + + TP_PROTO(struct f2fs_sb_info *sbi, enum f2fs_lock_name lock_name, + bool is_write, struct task_struct *p, int orig_prio, + int new_prio), + + TP_ARGS(sbi, lock_name, is_write, p, orig_prio, new_prio), + + TP_STRUCT__entry( + __field(dev_t, dev) + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(unsigned int, lock_name) + __field(bool, is_write) + __field(int, orig_prio) + __field(int, new_prio) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->lock_name = lock_name; + __entry->is_write = is_write; + __entry->orig_prio = orig_prio; + __entry->new_prio = new_prio; + ), + + TP_printk("dev = (%d,%d), comm: %s, pid: %d, lock_name: %s, " + "lock_type: %s, orig_prio: %d, new_prio: %d", + show_dev(__entry->dev), + __entry->comm, + __entry->pid, + show_lock_name(__entry->lock_name), + __entry->is_write ? "wlock" : "rlock", + __entry->orig_prio, + __entry->new_prio) +); + +DEFINE_EVENT(f2fs_priority_update, f2fs_priority_uplift, + + TP_PROTO(struct f2fs_sb_info *sbi, enum f2fs_lock_name lock_name, + bool is_write, struct task_struct *p, int orig_prio, + int new_prio), + + TP_ARGS(sbi, lock_name, is_write, p, orig_prio, new_prio) +); + +DEFINE_EVENT(f2fs_priority_update, f2fs_priority_restore, + + TP_PROTO(struct f2fs_sb_info *sbi, enum f2fs_lock_name lock_name, + bool is_write, struct task_struct *p, int orig_prio, + int new_prio), + + TP_ARGS(sbi, lock_name, is_write, p, orig_prio, new_prio) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ |
