diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-05 17:52:22 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-05 17:52:22 -0700 |
commit | 2a013e37ce691a7c072df27b35e9790fc8f5a82f (patch) | |
tree | 0d81e512ebb93484602a0802396ed7c1f465d3b4 /drivers/md/raid10.c | |
parent | 17447717a3266965e257d3eae79d89539ce3ec0a (diff) | |
parent | e89c6fdf9e0eb1b5a03574d4ca73e83eae8deb91 (diff) |
Merge tag 'md/4.3' of git://neil.brown.name/md
Pull md updates from Neil Brown:
- an assortment of little fixes, several for minor races only likely to
be hit during testing
- further cluster-md-raid1 development, not ready for real use yet.
- new RAID6 syndrome code for ARM NEON
- fix a race where a write can return before failure of one device is
properly recorded in metadata, so an immediate crash might result in
that write being lost.
* tag 'md/4.3' of git://neil.brown.name/md: (33 commits)
md/raid5: ensure device failure recorded before write request returns.
md/raid5: use bio_list for the list of bios to return.
md/raid10: ensure device failure recorded before write request returns.
md/raid1: ensure device failure recorded before write request returns.
md-cluster: remove inappropriate try_module_get from join()
md: extend spinlock protection in register_md_cluster_operations
md-cluster: Read the disk bitmap sb and check if it needs recovery
md-cluster: only call complete(&cinfo->completion) when node join cluster
md-cluster: add missed lockres_free
md-cluster: remove the unused sb_lock
md-cluster: init suspend_list and suspend_lock early in join
md-cluster: add the error check if failed to get dlm lock
md-cluster: init completion within lockres_init
md-cluster: fix deadlock issue on message lock
md-cluster: transfer the resync ownership to another node
md-cluster: split recover_slot for future code reuse
md-cluster: use %pU to print UUIDs
md: setup safemode_timer before it's being used
md/raid5: handle possible race as reshape completes.
md: sync sync_completed has correct value as recovery finishes.
...
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 33 |
1 files changed, 30 insertions, 3 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index b0fce2ebf7ad..0fc33eb88855 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1589,6 +1589,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); spin_unlock_irqrestore(&conf->device_lock, flags); printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n" @@ -2623,6 +2624,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) } put_buf(r10_bio); } else { + bool fail = false; for (m = 0; m < conf->copies; m++) { int dev = r10_bio->devs[m].devnum; struct bio *bio = r10_bio->devs[m].bio; @@ -2634,6 +2636,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) r10_bio->sectors, 0); rdev_dec_pending(rdev, conf->mddev); } else if (bio != NULL && bio->bi_error) { + fail = true; if (!narrow_write_error(r10_bio, m)) { md_error(conf->mddev, rdev); set_bit(R10BIO_Degraded, @@ -2654,7 +2657,13 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) if (test_bit(R10BIO_WriteError, &r10_bio->state)) close_write(r10_bio); - raid_end_bio_io(r10_bio); + if (fail) { + spin_lock_irq(&conf->device_lock); + list_add(&r10_bio->retry_list, &conf->bio_end_io_list); + spin_unlock_irq(&conf->device_lock); + md_wakeup_thread(conf->mddev->thread); + } else + raid_end_bio_io(r10_bio); } } @@ -2669,6 +2678,23 @@ static void raid10d(struct md_thread *thread) md_check_recovery(mddev); + if (!list_empty_careful(&conf->bio_end_io_list) && + !test_bit(MD_CHANGE_PENDING, &mddev->flags)) { + LIST_HEAD(tmp); + spin_lock_irqsave(&conf->device_lock, flags); + if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { + list_add(&tmp, &conf->bio_end_io_list); + list_del_init(&conf->bio_end_io_list); + } + spin_unlock_irqrestore(&conf->device_lock, flags); + while (!list_empty(&tmp)) { + r10_bio = list_first_entry(&conf->bio_end_io_list, + struct r10bio, retry_list); + list_del(&r10_bio->retry_list); + raid_end_bio_io(r10_bio); + } + } + blk_start_plug(&plug); for (;;) { @@ -3443,6 +3469,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) conf->reshape_safe = conf->reshape_progress; spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); + INIT_LIST_HEAD(&conf->bio_end_io_list); spin_lock_init(&conf->resync_lock); init_waitqueue_head(&conf->wait_barrier); @@ -4097,7 +4124,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, * at a time, possibly less if that exceeds RESYNC_PAGES, * or we hit a bad block or something. * This might mean we pause for normal IO in the middle of - * a chunk, but that is not a problem was mddev->reshape_position + * a chunk, but that is not a problem as mddev->reshape_position * can record any location. * * If we will want to write to a location that isn't @@ -4121,7 +4148,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, * * In all this the minimum difference in data offsets * (conf->offset_diff - always positive) allows a bit of slack, - * so next can be after 'safe', but not by more than offset_disk + * so next can be after 'safe', but not by more than offset_diff * * We need to prepare all the bios here before we start any IO * to ensure the size we choose is acceptable to all devices. |