summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-05-11 14:34:56 +1000
committerNeilBrown <neilb@suse.de>2011-05-11 14:34:56 +1000
commit76073054c95b12af6bd0cc9b9462a265b45ba38f (patch)
tree78f830289dd8bb5337a7d3efa442ae44abd4dbab
parent56d9912106b0974ffb6dd264c80c7e816677e998 (diff)
md/raid1: clean up read_balance.
read_balance has two loops which both look for a 'best' device based on slightly different criteria. This is clumsy and makes is hard to add extra criteria. So replace it all with a single loop that combines everything. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid1.c83
1 files changed, 34 insertions, 49 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 2b7a7ff401dc..f0b0c79b3899 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -411,10 +411,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
{
const sector_t this_sector = r1_bio->sector;
const int sectors = r1_bio->sectors;
- int new_disk = -1;
int start_disk;
+ int best_disk;
int i;
- sector_t new_distance, current_distance;
+ sector_t best_dist;
mdk_rdev_t *rdev;
int choose_first;
@@ -425,6 +425,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
* We take the first readable disk when above the resync window.
*/
retry:
+ best_disk = -1;
+ best_dist = MaxSector;
if (conf->mddev->recovery_cp < MaxSector &&
(this_sector + sectors >= conf->next_resync)) {
choose_first = 1;
@@ -434,8 +436,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
start_disk = conf->last_used;
}
- /* make sure the disk is operational */
for (i = 0 ; i < conf->raid_disks ; i++) {
+ sector_t dist;
int disk = start_disk + i;
if (disk >= conf->raid_disks)
disk -= conf->raid_disks;
@@ -443,60 +445,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED
|| rdev == NULL
- || !test_bit(In_sync, &rdev->flags))
+ || test_bit(Faulty, &rdev->flags))
continue;
-
- new_disk = disk;
- if (!test_bit(WriteMostly, &rdev->flags))
- break;
- }
-
- if (new_disk < 0 || choose_first)
- goto rb_out;
-
- /*
- * Don't change to another disk for sequential reads:
- */
- if (conf->next_seq_sect == this_sector)
- goto rb_out;
- if (this_sector == conf->mirrors[new_disk].head_position)
- goto rb_out;
-
- current_distance = abs(this_sector
- - conf->mirrors[new_disk].head_position);
-
- /* look for a better disk - i.e. head is closer */
- start_disk = new_disk;
- for (i = 1; i < conf->raid_disks; i++) {
- int disk = start_disk + 1;
- if (disk >= conf->raid_disks)
- disk -= conf->raid_disks;
-
- rdev = rcu_dereference(conf->mirrors[disk].rdev);
- if (r1_bio->bios[disk] == IO_BLOCKED
- || rdev == NULL
- || !test_bit(In_sync, &rdev->flags)
- || test_bit(WriteMostly, &rdev->flags))
+ if (!test_bit(In_sync, &rdev->flags) &&
+ rdev->recovery_offset < this_sector + sectors)
continue;
-
- if (!atomic_read(&rdev->nr_pending)) {
- new_disk = disk;
+ if (test_bit(WriteMostly, &rdev->flags)) {
+ /* Don't balance among write-mostly, just
+ * use the first as a last resort */
+ if (best_disk < 0)
+ best_disk = disk;
+ continue;
+ }
+ /* This is a reasonable device to use. It might
+ * even be best.
+ */
+ dist = abs(this_sector - conf->mirrors[disk].head_position);
+ if (choose_first
+ /* Don't change to another disk for sequential reads */
+ || conf->next_seq_sect == this_sector
+ || dist == 0
+ /* If device is idle, use it */
+ || atomic_read(&rdev->nr_pending) == 0) {
+ best_disk = disk;
break;
}
- new_distance = abs(this_sector - conf->mirrors[disk].head_position);
- if (new_distance < current_distance) {
- current_distance = new_distance;
- new_disk = disk;
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_disk = disk;
}
}
- rb_out:
- if (new_disk >= 0) {
- rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
+ if (best_disk >= 0) {
+ rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
if (!rdev)
goto retry;
atomic_inc(&rdev->nr_pending);
- if (!test_bit(In_sync, &rdev->flags)) {
+ if (test_bit(Faulty, &rdev->flags)) {
/* cannot risk returning a device that failed
* before we inc'ed nr_pending
*/
@@ -504,11 +489,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
goto retry;
}
conf->next_seq_sect = this_sector + sectors;
- conf->last_used = new_disk;
+ conf->last_used = best_disk;
}
rcu_read_unlock();
- return new_disk;
+ return best_disk;
}
static int raid1_congested(void *data, int bits)