summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/space-info.c40
2 files changed, 40 insertions, 4 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 73b6413cc9af..25eb214f56ac 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9879,10 +9879,6 @@ static int start_delalloc_inodes(struct btrfs_root *root,
&work->work);
} else {
ret = sync_inode(inode, wbc);
- if (!ret &&
- test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags))
- ret = sync_inode(inode, wbc);
btrfs_add_delayed_iput(inode);
if (ret || wbc->nr_to_write <= 0)
goto out;
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index eb90a262563f..d9c8d738678f 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -532,9 +532,49 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
while ((delalloc_bytes || ordered_bytes) && loops < 3) {
u64 temp = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
long nr_pages = min_t(u64, temp, LONG_MAX);
+ int async_pages;
btrfs_start_delalloc_roots(fs_info, nr_pages, true);
+ /*
+ * We need to make sure any outstanding async pages are now
+ * processed before we continue. This is because things like
+ * sync_inode() try to be smart and skip writing if the inode is
+ * marked clean. We don't use filemap_fwrite for flushing
+ * because we want to control how many pages we write out at a
+ * time, thus this is the only safe way to make sure we've
+ * waited for outstanding compressed workers to have started
+ * their jobs and thus have ordered extents set up properly.
+ *
+ * This exists because we do not want to wait for each
+ * individual inode to finish its async work, we simply want to
+ * start the IO on everybody, and then come back here and wait
+ * for all of the async work to catch up. Once we're done with
+ * that we know we'll have ordered extents for everything and we
+ * can decide if we wait for that or not.
+ *
+ * If we choose to replace this in the future, make absolutely
+ * sure that the proper waiting is being done in the async case,
+ * as there have been bugs in that area before.
+ */
+ async_pages = atomic_read(&fs_info->async_delalloc_pages);
+ if (!async_pages)
+ goto skip_async;
+
+ /*
+ * We don't want to wait forever, if we wrote less pages in this
+ * loop than we have outstanding, only wait for that number of
+ * pages, otherwise we can wait for all async pages to finish
+ * before continuing.
+ */
+ if (async_pages > nr_pages)
+ async_pages -= nr_pages;
+ else
+ async_pages = 0;
+ wait_event(fs_info->async_submit_wait,
+ atomic_read(&fs_info->async_delalloc_pages) <=
+ async_pages);
+skip_async:
loops++;
if (wait_ordered && !trans) {
btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);