From d84275c938e1a5e2dc5b89eb9b878e0ddb2c55e0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 9 Jun 2009 15:39:08 -0400 Subject: Btrfs: don't allow WRITE_SYNC bios to starve out regular writes Btrfs uses dedicated threads to submit bios when checksumming is on, which allows us to make sure the threads dedicated to checksumming don't get stuck waiting for requests. For each btrfs device, there are two lists of bios. One list is for WRITE_SYNC bios and the other is for regular priority bios. The IO submission threads used to process all of the WRITE_SYNC bios first and then switch to the regular bios. This commit makes sure we don't completely starve the regular bios by rotating between the two lists. WRITE_SYNC bios are still favored 2:1 over the regular bios, and this tries to run in batches to avoid seeking. Benchmarking shows this eliminates stalls during streaming buffered writes on both multi-device and single device filesystems. If the regular bios starve, the system can end up with a large amount of ram pinned down in writeback pages. If we are a little more fair between the two classes, we're able to keep throughput up and make progress on the bulk of our dirty ram. Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8bc6a8807482..9d3618192009 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -163,6 +163,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) unsigned long num_sync_run; unsigned long limit; unsigned long last_waited = 0; + int force_reg = 0; bdi = blk_get_backing_dev_info(device->bdev); fs_info = device->dev_root->fs_info; @@ -176,19 +177,22 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) loop: spin_lock(&device->io_lock); - num_run = 0; loop_lock: + num_run = 0; /* take all the bios off the list at once and process them * later on (without the lock held). But, remember the * tail and other pointers so the bios can be properly reinserted * into the list if we hit congestion */ - if (device->pending_sync_bios.head) + if (!force_reg && device->pending_sync_bios.head) { pending_bios = &device->pending_sync_bios; - else + force_reg = 1; + } else { pending_bios = &device->pending_bios; + force_reg = 0; + } pending = pending_bios->head; tail = pending_bios->tail; @@ -228,10 +232,14 @@ loop_lock: while (pending) { rmb(); - if (pending_bios != &device->pending_sync_bios && - device->pending_sync_bios.head && - num_run > 16) { - cond_resched(); + /* we want to work on both lists, but do more bios on the + * sync list than the regular list + */ + if ((num_run > 32 && + pending_bios != &device->pending_sync_bios && + device->pending_sync_bios.head) || + (num_run > 64 && pending_bios == &device->pending_sync_bios && + device->pending_bios.head)) { spin_lock(&device->io_lock); requeue_list(pending_bios, pending, tail); goto loop_lock; -- cgit v1.2.3