Merge tag 'md/4.3' of git://neil.brown.name/md

Pull md updates from Neil Brown: - an assortment of little fixes, several for minor races only likely to be hit during testing - further cluster-md-raid1 development, not ready for real use yet. - new RAID6 syndrome code for ARM NEON - fix a race where a write can return before failure of one device is properly recorded in metadata, so an immediate crash might result in that write being lost. * tag 'md/4.3' of git://neil.brown.name/md: (33 commits) md/raid5: ensure device failure recorded before write request returns. md/raid5: use bio_list for the list of bios to return. md/raid10: ensure device failure recorded before write request returns. md/raid1: ensure device failure recorded before write request returns. md-cluster: remove inappropriate try_module_get from join() md: extend spinlock protection in register_md_cluster_operations md-cluster: Read the disk bitmap sb and check if it needs recovery md-cluster: only call complete(&cinfo->completion) when node join cluster md-cluster: add missed lockres_free md-cluster: remove the unused sb_lock md-cluster: init suspend_list and suspend_lock early in join md-cluster: add the error check if failed to get dlm lock md-cluster: init completion within lockres_init md-cluster: fix deadlock issue on message lock md-cluster: transfer the resync ownership to another node md-cluster: split recover_slot for future code reuse md-cluster: use %pU to print UUIDs md: setup safemode_timer before it's being used md/raid5: handle possible race as reshape completes. md: sync sync_completed has correct value as recovery finishes. ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-09-05 17:52:22 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-09-05 17:52:22 -0700
commit: 2a013e37ce691a7c072df27b35e9790fc8f5a82f (patch)
tree: 0d81e512ebb93484602a0802396ed7c1f465d3b4 /drivers/md/raid10.c
parent: 17447717a3266965e257d3eae79d89539ce3ec0a (diff)
parent: e89c6fdf9e0eb1b5a03574d4ca73e83eae8deb91 (diff)
1 files changed, 30 insertions, 3 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index b0fce2ebf7ad..0fc33eb88855 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1589,6 +1589,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
 	set_bit(Blocked, &rdev->flags);
 	set_bit(Faulty, &rdev->flags);
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
+	set_bit(MD_CHANGE_PENDING, &mddev->flags);
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 	printk(KERN_ALERT
 	       "md/raid10:%s: Disk failure on %s, disabling device.\n"
@@ -2623,6 +2624,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 		}
 		put_buf(r10_bio);
 	} else {
+		bool fail = false;
 		for (m = 0; m < conf->copies; m++) {
 			int dev = r10_bio->devs[m].devnum;
 			struct bio *bio = r10_bio->devs[m].bio;
@@ -2634,6 +2636,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 					r10_bio->sectors, 0);
 				rdev_dec_pending(rdev, conf->mddev);
 			} else if (bio != NULL && bio->bi_error) {
+				fail = true;
 				if (!narrow_write_error(r10_bio, m)) {
 					md_error(conf->mddev, rdev);
 					set_bit(R10BIO_Degraded,
@@ -2654,7 +2657,13 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 		if (test_bit(R10BIO_WriteError,
 			     &r10_bio->state))
 			close_write(r10_bio);
-		raid_end_bio_io(r10_bio);
+		if (fail) {
+			spin_lock_irq(&conf->device_lock);
+			list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
+			spin_unlock_irq(&conf->device_lock);
+			md_wakeup_thread(conf->mddev->thread);
+		} else
+			raid_end_bio_io(r10_bio);
 	}
 }
 
@@ -2669,6 +2678,23 @@ static void raid10d(struct md_thread *thread)
 
 	md_check_recovery(mddev);
 
+	if (!list_empty_careful(&conf->bio_end_io_list) &&
+	    !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
+		LIST_HEAD(tmp);
+		spin_lock_irqsave(&conf->device_lock, flags);
+		if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
+			list_add(&tmp, &conf->bio_end_io_list);
+			list_del_init(&conf->bio_end_io_list);
+		}
+		spin_unlock_irqrestore(&conf->device_lock, flags);
+		while (!list_empty(&tmp)) {
+			r10_bio = list_first_entry(&conf->bio_end_io_list,
+						  struct r10bio, retry_list);
+			list_del(&r10_bio->retry_list);
+			raid_end_bio_io(r10_bio);
+		}
+	}
+
 	blk_start_plug(&plug);
 	for (;;) {
 
@@ -3443,6 +3469,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 	conf->reshape_safe = conf->reshape_progress;
 	spin_lock_init(&conf->device_lock);
 	INIT_LIST_HEAD(&conf->retry_list);
+	INIT_LIST_HEAD(&conf->bio_end_io_list);
 
 	spin_lock_init(&conf->resync_lock);
 	init_waitqueue_head(&conf->wait_barrier);
@@ -4097,7 +4124,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 	 * at a time, possibly less if that exceeds RESYNC_PAGES,
 	 * or we hit a bad block or something.
 	 * This might mean we pause for normal IO in the middle of
-	 * a chunk, but that is not a problem was mddev->reshape_position
+	 * a chunk, but that is not a problem as mddev->reshape_position
 	 * can record any location.
 	 *
 	 * If we will want to write to a location that isn't
@@ -4121,7 +4148,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 	 *
 	 * In all this the minimum difference in data offsets
 	 * (conf->offset_diff - always positive) allows a bit of slack,
-	 * so next can be after 'safe', but not by more than offset_disk
+	 * so next can be after 'safe', but not by more than offset_diff
 	 *
 	 * We need to prepare all the bios here before we start any IO
 	 * to ensure the size we choose is acceptable to all devices.
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-09-05 17:52:22 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-09-05 17:52:22 -0700
commit	2a013e37ce691a7c072df27b35e9790fc8f5a82f (patch)
tree	0d81e512ebb93484602a0802396ed7c1f465d3b4 /drivers/md/raid10.c
parent	17447717a3266965e257d3eae79d89539ce3ec0a (diff)
parent	e89c6fdf9e0eb1b5a03574d4ca73e83eae8deb91 (diff)