From b7a2441f9966fe3e1be960a876ab52e6029ea005 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 21 Jul 2010 22:19:51 -0600 Subject: writeback: remove writeback_control.more_io When wbc.more_io was first introduced, it indicates whether there are at least one superblock whose s_more_io contains more IO work. Now with the per-bdi writeback, it can be replaced with a simple b_more_io test. Acked-by: Jan Kara Acked-by: Mel Gorman Reviewed-by: Minchan Kim Signed-off-by: Wu Fengguang --- include/trace/events/ext4.h | 6 ++---- include/trace/events/writeback.h | 5 +---- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index e09592d2f916..b225d0d8c87f 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -404,7 +404,6 @@ TRACE_EVENT(ext4_da_writepages_result, __field( int, pages_written ) __field( long, pages_skipped ) __field( int, sync_mode ) - __field( char, more_io ) __field( pgoff_t, writeback_index ) ), @@ -415,16 +414,15 @@ TRACE_EVENT(ext4_da_writepages_result, __entry->pages_written = pages_written; __entry->pages_skipped = wbc->pages_skipped; __entry->sync_mode = wbc->sync_mode; - __entry->more_io = wbc->more_io; __entry->writeback_index = inode->i_mapping->writeback_index; ), TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld " - " more_io %d sync_mode %d writeback_index %lu", + "sync_mode %d writeback_index %lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->ret, __entry->pages_written, __entry->pages_skipped, - __entry->more_io, __entry->sync_mode, + __entry->sync_mode, (unsigned long) __entry->writeback_index) ); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 4e249b927eaa..b2cfac5f3313 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -101,7 +101,6 @@ DECLARE_EVENT_CLASS(wbc_class, __field(int, for_background) __field(int, for_reclaim) __field(int, range_cyclic) - __field(int, more_io) __field(unsigned long, older_than_this) __field(long, range_start) __field(long, range_end) @@ -116,7 +115,6 @@ DECLARE_EVENT_CLASS(wbc_class, __entry->for_background = wbc->for_background; __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; - __entry->more_io = wbc->more_io; __entry->older_than_this = wbc->older_than_this ? *wbc->older_than_this : 0; __entry->range_start = (long)wbc->range_start; @@ -124,7 +122,7 @@ DECLARE_EVENT_CLASS(wbc_class, ), TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " - "bgrd=%d reclm=%d cyclic=%d more=%d older=0x%lx " + "bgrd=%d reclm=%d cyclic=%d older=0x%lx " "start=0x%lx end=0x%lx", __entry->name, __entry->nr_to_write, @@ -134,7 +132,6 @@ DECLARE_EVENT_CLASS(wbc_class, __entry->for_background, __entry->for_reclaim, __entry->range_cyclic, - __entry->more_io, __entry->older_than_this, __entry->range_start, __entry->range_end) -- cgit v1.2.3 From 846d5a091b0506b75489577cde27f39b37a192a4 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 5 May 2011 21:10:38 -0600 Subject: writeback: remove .nonblocking and .encountered_congestion Remove two unused struct writeback_control fields: .encountered_congestion (completely unused) .nonblocking (never set, checked/showed in XFS,NFS/btrfs) The .for_background check in nfs_write_inode() is also removed btw, as .for_background implies WB_SYNC_NONE. Reviewed-by: Jan Kara Proposed-by: Christoph Hellwig Signed-off-by: Wu Fengguang --- include/trace/events/btrfs.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 4114129f0794..b31702ac15be 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -284,7 +284,6 @@ DECLARE_EVENT_CLASS(btrfs__writepage, __field( long, pages_skipped ) __field( loff_t, range_start ) __field( loff_t, range_end ) - __field( char, nonblocking ) __field( char, for_kupdate ) __field( char, for_reclaim ) __field( char, range_cyclic ) @@ -299,7 +298,6 @@ DECLARE_EVENT_CLASS(btrfs__writepage, __entry->pages_skipped = wbc->pages_skipped; __entry->range_start = wbc->range_start; __entry->range_end = wbc->range_end; - __entry->nonblocking = wbc->nonblocking; __entry->for_kupdate = wbc->for_kupdate; __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; @@ -310,13 +308,13 @@ DECLARE_EVENT_CLASS(btrfs__writepage, TP_printk("root = %llu(%s), ino = %lu, page_index = %lu, " "nr_to_write = %ld, pages_skipped = %ld, range_start = %llu, " - "range_end = %llu, nonblocking = %d, for_kupdate = %d, " + "range_end = %llu, for_kupdate = %d, " "for_reclaim = %d, range_cyclic = %d, writeback_index = %lu", show_root_type(__entry->root_objectid), (unsigned long)__entry->ino, __entry->index, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, - __entry->nonblocking, __entry->for_kupdate, + __entry->for_kupdate, __entry->for_reclaim, __entry->range_cyclic, (unsigned long)__entry->writeback_index) ); -- cgit v1.2.3 From 251d6a471c831e22880b3c146bb4556ddfb1dc82 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 1 Dec 2010 17:33:37 -0600 Subject: writeback: trace event writeback_single_inode It is valuable to know how the dirty inodes are iterated and their IO size. "writeback_single_inode: bdi 8:0: ino=134246746 state=I_DIRTY_SYNC|I_SYNC age=414 index=0 to_write=1024 wrote=0" - "state" reflects inode->i_state at the end of writeback_single_inode() - "index" reflects mapping->writeback_index after the ->writepages() call - "to_write" is the wbc->nr_to_write at entrance of writeback_single_inode() - "wrote" is the number of pages actually written v2: add trace event writeback_single_inode_requeue as proposed by Dave. CC: Dave Chinner Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 70 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index b2cfac5f3313..898277bc89b4 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -8,6 +8,19 @@ #include #include +#define show_inode_state(state) \ + __print_flags(state, "|", \ + {I_DIRTY_SYNC, "I_DIRTY_SYNC"}, \ + {I_DIRTY_DATASYNC, "I_DIRTY_DATASYNC"}, \ + {I_DIRTY_PAGES, "I_DIRTY_PAGES"}, \ + {I_NEW, "I_NEW"}, \ + {I_WILL_FREE, "I_WILL_FREE"}, \ + {I_FREEING, "I_FREEING"}, \ + {I_CLEAR, "I_CLEAR"}, \ + {I_SYNC, "I_SYNC"}, \ + {I_REFERENCED, "I_REFERENCED"} \ + ) + struct wb_writeback_work; DECLARE_EVENT_CLASS(writeback_work_class, @@ -184,6 +197,63 @@ DEFINE_EVENT(writeback_congest_waited_template, writeback_wait_iff_congested, TP_ARGS(usec_timeout, usec_delayed) ); +DECLARE_EVENT_CLASS(writeback_single_inode_template, + + TP_PROTO(struct inode *inode, + struct writeback_control *wbc, + unsigned long nr_to_write + ), + + TP_ARGS(inode, wbc, nr_to_write), + + TP_STRUCT__entry( + __array(char, name, 32) + __field(unsigned long, ino) + __field(unsigned long, state) + __field(unsigned long, age) + __field(unsigned long, writeback_index) + __field(long, nr_to_write) + __field(unsigned long, wrote) + ), + + TP_fast_assign( + strncpy(__entry->name, + dev_name(inode->i_mapping->backing_dev_info->dev), 32); + __entry->ino = inode->i_ino; + __entry->state = inode->i_state; + __entry->age = (jiffies - inode->dirtied_when) * + 1000 / HZ; + __entry->writeback_index = inode->i_mapping->writeback_index; + __entry->nr_to_write = nr_to_write; + __entry->wrote = nr_to_write - wbc->nr_to_write; + ), + + TP_printk("bdi %s: ino=%lu state=%s age=%lu " + "index=%lu to_write=%ld wrote=%lu", + __entry->name, + __entry->ino, + show_inode_state(__entry->state), + __entry->age, + __entry->writeback_index, + __entry->nr_to_write, + __entry->wrote + ) +); + +DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_requeue, + TP_PROTO(struct inode *inode, + struct writeback_control *wbc, + unsigned long nr_to_write), + TP_ARGS(inode, wbc, nr_to_write) +); + +DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, + TP_PROTO(struct inode *inode, + struct writeback_control *wbc, + unsigned long nr_to_write), + TP_ARGS(inode, wbc, nr_to_write) +); + #endif /* _TRACE_WRITEBACK_H */ /* This part must be outside protection */ -- cgit v1.2.3 From e84d0a4f8e39a73003a6ec9a11b07702745f4c1f Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sat, 23 Apr 2011 12:27:27 -0600 Subject: writeback: trace event writeback_queue_io Note that it adds a little overheads to account the moved/enqueued inodes from b_dirty to b_io. The "moved" accounting may be later used to limit the number of inodes that can be moved in one shot, in order to keep spinlock hold time under control. Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 898277bc89b4..205d14919ef2 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -162,6 +162,31 @@ DEFINE_WBC_EVENT(wbc_balance_dirty_written); DEFINE_WBC_EVENT(wbc_balance_dirty_wait); DEFINE_WBC_EVENT(wbc_writepage); +TRACE_EVENT(writeback_queue_io, + TP_PROTO(struct bdi_writeback *wb, + unsigned long *older_than_this, + int moved), + TP_ARGS(wb, older_than_this, moved), + TP_STRUCT__entry( + __array(char, name, 32) + __field(unsigned long, older) + __field(long, age) + __field(int, moved) + ), + TP_fast_assign( + strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + __entry->older = older_than_this ? *older_than_this : 0; + __entry->age = older_than_this ? + (jiffies - *older_than_this) * 1000 / HZ : -1; + __entry->moved = moved; + ), + TP_printk("bdi %s: older=%lu age=%ld enqueue=%d", + __entry->name, + __entry->older, /* older_than_this in jiffies */ + __entry->age, /* older_than_this in relative milliseconds */ + __entry->moved) +); + DECLARE_EVENT_CLASS(writeback_congest_waited_template, TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), -- cgit v1.2.3 From d46db3d58233be4be980eb1e42eebe7808bcabab Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 4 May 2011 19:54:37 -0600 Subject: writeback: make writeback_control.nr_to_write straight Pass struct wb_writeback_work all the way down to writeback_sb_inodes(), and initialize the struct writeback_control there. struct writeback_control is basically designed to control writeback of a single file, but we keep abuse it for writing multiple files in writeback_sb_inodes() and its callers. It immediately clean things up, e.g. suddenly wbc.nr_to_write vs work->nr_pages starts to make sense, and instead of saving and restoring pages_skipped in writeback_sb_inodes it can always start with a clean zero value. It also makes a neat IO pattern change: large dirty files are now written in the full 4MB writeback chunk size, rather than whatever remained quota in wbc->nr_to_write. Acked-by: Jan Kara Proposed-by: Christoph Hellwig Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 205d14919ef2..3e7662a0cfa3 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -62,6 +62,9 @@ DEFINE_EVENT(writeback_work_class, name, \ DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread); DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); +DEFINE_WRITEBACK_WORK_EVENT(writeback_start); +DEFINE_WRITEBACK_WORK_EVENT(writeback_written); +DEFINE_WRITEBACK_WORK_EVENT(writeback_wait); TRACE_EVENT(writeback_pages_written, TP_PROTO(long pages_written), @@ -101,6 +104,30 @@ DEFINE_WRITEBACK_EVENT(writeback_bdi_register); DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); DEFINE_WRITEBACK_EVENT(writeback_thread_start); DEFINE_WRITEBACK_EVENT(writeback_thread_stop); +DEFINE_WRITEBACK_EVENT(balance_dirty_start); +DEFINE_WRITEBACK_EVENT(balance_dirty_wait); + +TRACE_EVENT(balance_dirty_written, + + TP_PROTO(struct backing_dev_info *bdi, int written), + + TP_ARGS(bdi, written), + + TP_STRUCT__entry( + __array(char, name, 32) + __field(int, written) + ), + + TP_fast_assign( + strncpy(__entry->name, dev_name(bdi->dev), 32); + __entry->written = written; + ), + + TP_printk("bdi %s written %d", + __entry->name, + __entry->written + ) +); DECLARE_EVENT_CLASS(wbc_class, TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), @@ -114,7 +141,6 @@ DECLARE_EVENT_CLASS(wbc_class, __field(int, for_background) __field(int, for_reclaim) __field(int, range_cyclic) - __field(unsigned long, older_than_this) __field(long, range_start) __field(long, range_end) ), @@ -128,14 +154,12 @@ DECLARE_EVENT_CLASS(wbc_class, __entry->for_background = wbc->for_background; __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; - __entry->older_than_this = wbc->older_than_this ? - *wbc->older_than_this : 0; __entry->range_start = (long)wbc->range_start; __entry->range_end = (long)wbc->range_end; ), TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " - "bgrd=%d reclm=%d cyclic=%d older=0x%lx " + "bgrd=%d reclm=%d cyclic=%d " "start=0x%lx end=0x%lx", __entry->name, __entry->nr_to_write, @@ -145,7 +169,6 @@ DECLARE_EVENT_CLASS(wbc_class, __entry->for_background, __entry->for_reclaim, __entry->range_cyclic, - __entry->older_than_this, __entry->range_start, __entry->range_end) ) @@ -154,12 +177,6 @@ DECLARE_EVENT_CLASS(wbc_class, DEFINE_EVENT(wbc_class, name, \ TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \ TP_ARGS(wbc, bdi)) -DEFINE_WBC_EVENT(wbc_writeback_start); -DEFINE_WBC_EVENT(wbc_writeback_written); -DEFINE_WBC_EVENT(wbc_writeback_wait); -DEFINE_WBC_EVENT(wbc_balance_dirty_start); -DEFINE_WBC_EVENT(wbc_balance_dirty_written); -DEFINE_WBC_EVENT(wbc_balance_dirty_wait); DEFINE_WBC_EVENT(wbc_writepage); TRACE_EVENT(writeback_queue_io, -- cgit v1.2.3 From e1cbe236013c82bcf9a156e98d7b47efb89d2674 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Mon, 6 Dec 2010 22:34:29 -0600 Subject: writeback: trace global_dirty_state Add trace event balance_dirty_state for showing the global dirty page counts and thresholds at each global_dirty_limits() invocation. This will cover the callers throttle_vm_writeout(), over_bground_thresh() and each balance_dirty_pages() loop. Signed-off-by: Wu Fengguang --- include/trace/events/writeback.h | 46 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 3e7662a0cfa3..6bca4cc0063c 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -204,6 +204,52 @@ TRACE_EVENT(writeback_queue_io, __entry->moved) ); +TRACE_EVENT(global_dirty_state, + + TP_PROTO(unsigned long background_thresh, + unsigned long dirty_thresh + ), + + TP_ARGS(background_thresh, + dirty_thresh + ), + + TP_STRUCT__entry( + __field(unsigned long, nr_dirty) + __field(unsigned long, nr_writeback) + __field(unsigned long, nr_unstable) + __field(unsigned long, background_thresh) + __field(unsigned long, dirty_thresh) + __field(unsigned long, dirty_limit) + __field(unsigned long, nr_dirtied) + __field(unsigned long, nr_written) + ), + + TP_fast_assign( + __entry->nr_dirty = global_page_state(NR_FILE_DIRTY); + __entry->nr_writeback = global_page_state(NR_WRITEBACK); + __entry->nr_unstable = global_page_state(NR_UNSTABLE_NFS); + __entry->nr_dirtied = global_page_state(NR_DIRTIED); + __entry->nr_written = global_page_state(NR_WRITTEN); + __entry->background_thresh = background_thresh; + __entry->dirty_thresh = dirty_thresh; + __entry->dirty_limit = global_dirty_limit; + ), + + TP_printk("dirty=%lu writeback=%lu unstable=%lu " + "bg_thresh=%lu thresh=%lu limit=%lu " + "dirtied=%lu written=%lu", + __entry->nr_dirty, + __entry->nr_writeback, + __entry->nr_unstable, + __entry->background_thresh, + __entry->dirty_thresh, + __entry->dirty_limit, + __entry->nr_dirtied, + __entry->nr_written + ) +); + DECLARE_EVENT_CLASS(writeback_congest_waited_template, TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), -- cgit v1.2.3