summaryrefslogtreecommitdiff
path: root/fs/sync.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2013-11-12 15:07:51 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 12:09:07 +0900
commitc4a391b53a72d2df4ee97f96f78c1d5971b47489 (patch)
treeedaedbc54345e62211d36273caea8fa9f60d8916 /fs/sync.c
parent46c77e2bb07eba3b38edfec76873f12942c49dd3 (diff)
writeback: do not sync data dirtied after sync start
When there are processes heavily creating small files while sync(2) is running, it can easily happen that quite some new files are created between WB_SYNC_NONE and WB_SYNC_ALL pass of sync(2). That can happen especially if there are several busy filesystems (remember that sync traverses filesystems sequentially and waits in WB_SYNC_ALL phase on one fs before starting it on another fs). Because WB_SYNC_ALL pass is slow (e.g. causes a transaction commit and cache flush for each inode in ext3), resulting sync(2) times are rather large. The following script reproduces the problem: function run_writers { for (( i = 0; i < 10; i++ )); do mkdir $1/dir$i for (( j = 0; j < 40000; j++ )); do dd if=/dev/zero of=$1/dir$i/$j bs=4k count=4 &>/dev/null done & done } for dir in "$@"; do run_writers $dir done sleep 40 time sync Fix the problem by disregarding inodes dirtied after sync(2) was called in the WB_SYNC_ALL pass. To allow for this, sync_inodes_sb() now takes a time stamp when sync has started which is used for setting up work for flusher threads. To give some numbers, when above script is run on two ext4 filesystems on simple SATA drive, the average sync time from 10 runs is 267.549 seconds with standard deviation 104.799426. With the patched kernel, the average sync time from 10 runs is 2.995 seconds with standard deviation 0.096. Signed-off-by: Jan Kara <jack@suse.cz> Reviewed-by: Fengguang Wu <fengguang.wu@intel.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/sync.c')
-rw-r--r--fs/sync.c15
1 files changed, 9 insertions, 6 deletions
diff --git a/fs/sync.c b/fs/sync.c
index 905f3f6b3d85..ff96f99fef64 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -27,10 +27,11 @@
* wait == 1 case since in that case write_inode() functions do
* sync_dirty_buffer() and thus effectively write one block at a time.
*/
-static int __sync_filesystem(struct super_block *sb, int wait)
+static int __sync_filesystem(struct super_block *sb, int wait,
+ unsigned long start)
{
if (wait)
- sync_inodes_sb(sb);
+ sync_inodes_sb(sb, start);
else
writeback_inodes_sb(sb, WB_REASON_SYNC);
@@ -47,6 +48,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
int sync_filesystem(struct super_block *sb)
{
int ret;
+ unsigned long start = jiffies;
/*
* We need to be protected against the filesystem going from
@@ -60,17 +62,17 @@ int sync_filesystem(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return 0;
- ret = __sync_filesystem(sb, 0);
+ ret = __sync_filesystem(sb, 0, start);
if (ret < 0)
return ret;
- return __sync_filesystem(sb, 1);
+ return __sync_filesystem(sb, 1, start);
}
EXPORT_SYMBOL_GPL(sync_filesystem);
static void sync_inodes_one_sb(struct super_block *sb, void *arg)
{
if (!(sb->s_flags & MS_RDONLY))
- sync_inodes_sb(sb);
+ sync_inodes_sb(sb, *((unsigned long *)arg));
}
static void sync_fs_one_sb(struct super_block *sb, void *arg)
@@ -102,9 +104,10 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
SYSCALL_DEFINE0(sync)
{
int nowait = 0, wait = 1;
+ unsigned long start = jiffies;
wakeup_flusher_threads(0, WB_REASON_SYNC);
- iterate_supers(sync_inodes_one_sb, NULL);
+ iterate_supers(sync_inodes_one_sb, &start);
iterate_supers(sync_fs_one_sb, &nowait);
iterate_supers(sync_fs_one_sb, &wait);
iterate_bdevs(fdatawrite_one_bdev, NULL);