summaryrefslogtreecommitdiff
path: root/drivers/md/dm-raid1.c
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2009-12-10 23:52:05 +0000
committerAlasdair G Kergon <agk@redhat.com>2009-12-10 23:52:05 +0000
commit60f355ead31e2be8d06ac8acb163df91a1c64e3b (patch)
tree11a011cb6bcbf4adc27b871556a6d475b6826dd3 /drivers/md/dm-raid1.c
parentc58098be979509a54021e837a47fcad08db31f94 (diff)
dm raid1: hold write bios when errors are handled
Hold all write bios when errors are handled. Previously the failures list was used only when handling errors with a userspace daemon such as dmeventd. Now, it is always used for all bios. The regions where some writes failed must be marked as nosync. This can only be done in process context (i.e. in raid1 workqueue), not in the write_callback function. Previously the write would succeed if writing to at least one leg succeeded. This is wrong because data from the failed leg may be replicated to the correct leg. Now, if using a userspace daemon, the write with some failures will be held until the daemon has done its job and reconfigured the array. If not using a daemon, the write still succeeds if at least one leg succeeds. This is bad, but it is consistent with current behavior. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Reviewed-by: Takahiro Yasui <tyasui@redhat.com> Tested-by: Takahiro Yasui <tyasui@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md/dm-raid1.c')
-rw-r--r--drivers/md/dm-raid1.c63
1 files changed, 32 insertions, 31 deletions
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4f466ad75680..e363335e8d81 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -578,7 +578,6 @@ static void write_callback(unsigned long error, void *context)
unsigned i, ret = 0;
struct bio *bio = (struct bio *) context;
struct mirror_set *ms;
- int uptodate = 0;
int should_wake = 0;
unsigned long flags;
@@ -591,36 +590,27 @@ static void write_callback(unsigned long error, void *context)
* This way we handle both writes to SYNC and NOSYNC
* regions with the same code.
*/
- if (likely(!error))
- goto out;
+ if (likely(!error)) {
+ bio_endio(bio, ret);
+ return;
+ }
for (i = 0; i < ms->nr_mirrors; i++)
if (test_bit(i, &error))
fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);
- else
- uptodate = 1;
- if (unlikely(!uptodate)) {
- DMERR("All replicated volumes dead, failing I/O");
- /* None of the writes succeeded, fail the I/O. */
- ret = -EIO;
- } else if (errors_handled(ms)) {
- /*
- * Need to raise event. Since raising
- * events can block, we need to do it in
- * the main thread.
- */
- spin_lock_irqsave(&ms->lock, flags);
- if (!ms->failures.head)
- should_wake = 1;
- bio_list_add(&ms->failures, bio);
- spin_unlock_irqrestore(&ms->lock, flags);
- if (should_wake)
- wakeup_mirrord(ms);
- return;
- }
-out:
- bio_endio(bio, ret);
+ /*
+ * Need to raise event. Since raising
+ * events can block, we need to do it in
+ * the main thread.
+ */
+ spin_lock_irqsave(&ms->lock, flags);
+ if (!ms->failures.head)
+ should_wake = 1;
+ bio_list_add(&ms->failures, bio);
+ spin_unlock_irqrestore(&ms->lock, flags);
+ if (should_wake)
+ wakeup_mirrord(ms);
}
static void do_write(struct mirror_set *ms, struct bio *bio)
@@ -773,15 +763,26 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
* for us to treat them the same and requeue them
* as well.
*/
-
while ((bio = bio_list_pop(failures))) {
- if (ms->log_failure)
- hold_bio(ms, bio);
- else {
+ if (!ms->log_failure) {
ms->in_sync = 0;
dm_rh_mark_nosync(ms->rh, bio);
- bio_endio(bio, 0);
}
+
+ /*
+ * If all the legs are dead, fail the I/O.
+ * If we have been told to handle errors, hold the bio
+ * and wait for userspace to deal with the problem.
+ * Otherwise pretend that the I/O succeeded. (This would
+ * be wrong if the failed leg returned after reboot and
+ * got replicated back to the good legs.)
+ */
+ if (!get_valid_mirror(ms))
+ bio_endio(bio, -EIO);
+ else if (errors_handled(ms))
+ hold_bio(ms, bio);
+ else
+ bio_endio(bio, 0);
}
}