diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-30 17:16:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-30 17:16:17 -0700 |
commit | f1e9a236e5ddab6c349611ee86f54291916f226c (patch) | |
tree | be3a2ede1b6c09793403f50a02729563b512ab45 /drivers | |
parent | 2e1deaad1e48453cea782854ab87df3f78c121c2 (diff) | |
parent | 32f9f570d04461a41bdcd5c1d93b41ebc5ce182a (diff) |
Merge tag 'md-3.10' of git://neil.brown.name/md
Pull md fixes from NeilBrown:
"A mixed bag of little fixes. No real new functionality here. Several
patches are tagged for -stable."
* tag 'md-3.10' of git://neil.brown.name/md:
MD: ignore discard request for hard disks of hybid raid1/raid10 array
md: bad block list should default to disabled.
md: raid1/raid10 md devices leak memory when stopping
DM RAID: Add message/status support for changing sync action
MD: Export 'md_reap_sync_thread' function
md: don't update metadata when stopping a read-only array.
md: Allow devices to be re-added to a read-only array.
md/raid10: Allow skipping recovery when clean arrays are assembled
MD: Fix typos in MD documentation
md/raid5: avoid an extra write when writing to a known-bad-block.
md/raid5: Change or of some order to improve efficiency.
md: use set_bit_le and clear_bit_le
md: HOT_DISK_REMOVE shouldn't make a read-auto device active.
md: use common code for all calls to ->hot_remove_disk()
md: never update metadata when array is read-only.
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/bitmap.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 111 | ||||
-rw-r--r-- | drivers/md/md.c | 235 | ||||
-rw-r--r-- | drivers/md/md.h | 1 | ||||
-rw-r--r-- | drivers/md/raid1.c | 8 | ||||
-rw-r--r-- | drivers/md/raid10.c | 24 | ||||
-rw-r--r-- | drivers/md/raid5.c | 16 |
7 files changed, 293 insertions, 106 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 4fd9d6aeff6a..5a2c75499824 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -846,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) set_bit(bit, kaddr); else - test_and_set_bit_le(bit, kaddr); + set_bit_le(bit, kaddr); kunmap_atomic(kaddr); pr_debug("set file bit %lu page %lu\n", bit, page->index); /* record page number so it gets flushed to disk when unplug occurs */ @@ -868,7 +868,7 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) clear_bit(bit, paddr); else - test_and_clear_bit_le(bit, paddr); + clear_bit_le(bit, paddr); kunmap_atomic(paddr); if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) { set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 311e3d35b272..1d3fe1a40a9b 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1279,6 +1279,31 @@ static int raid_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } +static const char *decipher_sync_action(struct mddev *mddev) +{ + if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) + return "frozen"; + + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) { + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) + return "reshape"; + + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { + if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) + return "resync"; + else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) + return "check"; + return "repair"; + } + + if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) + return "recover"; + } + + return "idle"; +} + static void raid_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { @@ -1298,8 +1323,18 @@ static void raid_status(struct dm_target *ti, status_type_t type, sync = rs->md.recovery_cp; if (sync >= rs->md.resync_max_sectors) { + /* + * Sync complete. + */ array_in_sync = 1; sync = rs->md.resync_max_sectors; + } else if (test_bit(MD_RECOVERY_REQUESTED, &rs->md.recovery)) { + /* + * If "check" or "repair" is occurring, the array has + * undergone and initial sync and the health characters + * should not be 'a' anymore. + */ + array_in_sync = 1; } else { /* * The array may be doing an initial sync, or it may @@ -1311,6 +1346,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) array_in_sync = 1; } + /* * Status characters: * 'D' = Dead/Failed device @@ -1339,6 +1375,21 @@ static void raid_status(struct dm_target *ti, status_type_t type, (unsigned long long) sync, (unsigned long long) rs->md.resync_max_sectors); + /* + * Sync action: + * See Documentation/device-mapper/dm-raid.c for + * information on each of these states. + */ + DMEMIT(" %s", decipher_sync_action(&rs->md)); + + /* + * resync_mismatches/mismatch_cnt + * This field shows the number of discrepancies found when + * performing a "check" of the array. + */ + DMEMIT(" %llu", + (unsigned long long) + atomic64_read(&rs->md.resync_mismatches)); break; case STATUSTYPE_TABLE: /* The string you would use to construct this array */ @@ -1425,7 +1476,62 @@ static void raid_status(struct dm_target *ti, status_type_t type, } } -static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) +static int raid_message(struct dm_target *ti, unsigned argc, char **argv) +{ + struct raid_set *rs = ti->private; + struct mddev *mddev = &rs->md; + + if (!strcasecmp(argv[0], "reshape")) { + DMERR("Reshape not supported."); + return -EINVAL; + } + + if (!mddev->pers || !mddev->pers->sync_request) + return -EINVAL; + + if (!strcasecmp(argv[0], "frozen")) + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + else + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + + if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) { + if (mddev->sync_thread) { + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + md_reap_sync_thread(mddev); + } + } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) + return -EBUSY; + else if (!strcasecmp(argv[0], "resync")) + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + else if (!strcasecmp(argv[0], "recover")) { + set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + } else { + if (!strcasecmp(argv[0], "check")) + set_bit(MD_RECOVERY_CHECK, &mddev->recovery); + else if (!!strcasecmp(argv[0], "repair")) + return -EINVAL; + set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); + set_bit(MD_RECOVERY_SYNC, &mddev->recovery); + } + if (mddev->ro == 2) { + /* A write to sync_action is enough to justify + * canceling read-auto mode + */ + mddev->ro = 0; + if (!mddev->suspended) + md_wakeup_thread(mddev->sync_thread); + } + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + if (!mddev->suspended) + md_wakeup_thread(mddev->thread); + + return 0; +} + +static int raid_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) { struct raid_set *rs = ti->private; unsigned i; @@ -1482,12 +1588,13 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 4, 2}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, .map = raid_map, .status = raid_status, + .message = raid_message, .iterate_devices = raid_iterate_devices, .io_hints = raid_io_hints, .presuspend = raid_presuspend, diff --git a/drivers/md/md.c b/drivers/md/md.c index aeceedfc530b..4c74424c78b0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -72,6 +72,9 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait); static struct workqueue_struct *md_wq; static struct workqueue_struct *md_misc_wq; +static int remove_and_add_spares(struct mddev *mddev, + struct md_rdev *this); + #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } /* @@ -1564,8 +1567,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ sector, count, 1) == 0) return -EINVAL; } - } else if (sb->bblog_offset == 0) - rdev->badblocks.shift = -1; + } else if (sb->bblog_offset != 0) + rdev->badblocks.shift = 0; if (!refdev) { ret = 1; @@ -2411,6 +2414,11 @@ static void md_update_sb(struct mddev * mddev, int force_change) int nospares = 0; int any_badblocks_changed = 0; + if (mddev->ro) { + if (force_change) + set_bit(MD_CHANGE_DEVS, &mddev->flags); + return; + } repeat: /* First make sure individual recovery_offsets are correct */ rdev_for_each(rdev, mddev) { @@ -2800,12 +2808,10 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) /* personality does all needed checks */ if (rdev->mddev->pers->hot_remove_disk == NULL) return -EINVAL; - err = rdev->mddev->pers-> - hot_remove_disk(rdev->mddev, rdev); - if (err) - return err; - sysfs_unlink_rdev(rdev->mddev, rdev); - rdev->raid_disk = -1; + clear_bit(Blocked, &rdev->flags); + remove_and_add_spares(rdev->mddev, rdev); + if (rdev->raid_disk >= 0) + return -EBUSY; set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); md_wakeup_thread(rdev->mddev->thread); } else if (rdev->mddev->pers) { @@ -3221,7 +3227,7 @@ int md_rdev_init(struct md_rdev *rdev) * be used - I wonder if that matters */ rdev->badblocks.count = 0; - rdev->badblocks.shift = 0; + rdev->badblocks.shift = -1; /* disabled until explicitly enabled */ rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL); seqlock_init(&rdev->badblocks.lock); if (rdev->badblocks.page == NULL) @@ -3293,9 +3299,6 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe goto abort_free; } } - if (super_format == -1) - /* hot-add for 0.90, or non-persistent: so no badblocks */ - rdev->badblocks.shift = -1; return rdev; @@ -4225,8 +4228,6 @@ action_show(struct mddev *mddev, char *page) return sprintf(page, "%s\n", type); } -static void reap_sync_thread(struct mddev *mddev); - static ssize_t action_store(struct mddev *mddev, const char *page, size_t len) { @@ -4241,7 +4242,7 @@ action_store(struct mddev *mddev, const char *page, size_t len) if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); - reap_sync_thread(mddev); + md_reap_sync_thread(mddev); } } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) @@ -5279,7 +5280,7 @@ static void __md_stop_writes(struct mddev *mddev) if (mddev->sync_thread) { set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery); - reap_sync_thread(mddev); + md_reap_sync_thread(mddev); } del_timer_sync(&mddev->safemode_timer); @@ -5287,7 +5288,8 @@ static void __md_stop_writes(struct mddev *mddev) bitmap_flush(mddev); md_super_wait(mddev); - if (!mddev->in_sync || mddev->flags) { + if (mddev->ro == 0 && + (!mddev->in_sync || mddev->flags)) { /* mark array as shutdown cleanly */ mddev->in_sync = 1; md_update_sb(mddev, 1); @@ -5810,7 +5812,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) else sysfs_notify_dirent_safe(rdev->sysfs_state); - md_update_sb(mddev, 1); + set_bit(MD_CHANGE_DEVS, &mddev->flags); if (mddev->degraded) set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -5877,6 +5879,9 @@ static int hot_remove_disk(struct mddev * mddev, dev_t dev) if (!rdev) return -ENXIO; + clear_bit(Blocked, &rdev->flags); + remove_and_add_spares(mddev, rdev); + if (rdev->raid_disk >= 0) goto busy; @@ -6490,6 +6495,28 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = md_set_readonly(mddev, bdev); goto done_unlock; + case HOT_REMOVE_DISK: + err = hot_remove_disk(mddev, new_decode_dev(arg)); + goto done_unlock; + + case ADD_NEW_DISK: + /* We can support ADD_NEW_DISK on read-only arrays + * on if we are re-adding a preexisting device. + * So require mddev->pers and MD_DISK_SYNC. + */ + if (mddev->pers) { + mdu_disk_info_t info; + if (copy_from_user(&info, argp, sizeof(info))) + err = -EFAULT; + else if (!(info.state & (1<<MD_DISK_SYNC))) + /* Need to clear read-only for this */ + break; + else + err = add_new_disk(mddev, &info); + goto done_unlock; + } + break; + case BLKROSET: if (get_user(ro, (int __user *)(arg))) { err = -EFAULT; @@ -6560,10 +6587,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, goto done_unlock; } - case HOT_REMOVE_DISK: - err = hot_remove_disk(mddev, new_decode_dev(arg)); - goto done_unlock; - case HOT_ADD_DISK: err = hot_add_disk(mddev, new_decode_dev(arg)); goto done_unlock; @@ -7644,14 +7667,16 @@ void md_do_sync(struct md_thread *thread) } EXPORT_SYMBOL_GPL(md_do_sync); -static int remove_and_add_spares(struct mddev *mddev) +static int remove_and_add_spares(struct mddev *mddev, + struct md_rdev *this) { struct md_rdev *rdev; int spares = 0; int removed = 0; rdev_for_each(rdev, mddev) - if (rdev->raid_disk >= 0 && + if ((this == NULL || rdev == this) && + rdev->raid_disk >= 0 && !test_bit(Blocked, &rdev->flags) && (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) && @@ -7666,74 +7691,52 @@ static int remove_and_add_spares(struct mddev *mddev) if (removed && mddev->kobj.sd) sysfs_notify(&mddev->kobj, NULL, "degraded"); + if (this) + goto no_add; + rdev_for_each(rdev, mddev) { if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) spares++; - if (rdev->raid_disk < 0 - && !test_bit(Faulty, &rdev->flags)) { - rdev->recovery_offset = 0; - if (mddev->pers-> - hot_add_disk(mddev, rdev) == 0) { - if (sysfs_link_rdev(mddev, rdev)) - /* failure here is OK */; - spares++; - md_new_event(mddev); - set_bit(MD_CHANGE_DEVS, &mddev->flags); - } + if (rdev->raid_disk >= 0) + continue; + if (test_bit(Faulty, &rdev->flags)) + continue; + if (mddev->ro && + rdev->saved_raid_disk < 0) + continue; + + rdev->recovery_offset = 0; + if (rdev->saved_raid_disk >= 0 && mddev->in_sync) { + spin_lock_irq(&mddev->write_lock); + if (mddev->in_sync) + /* OK, this device, which is in_sync, + * will definitely be noticed before + * the next write, so recovery isn't + * needed. + */ + rdev->recovery_offset = mddev->recovery_cp; + spin_unlock_irq(&mddev->write_lock); + } + if (mddev->ro && rdev->recovery_offset != MaxSector) + /* not safe to add this disk now */ + continue; + if (mddev->pers-> + hot_add_disk(mddev, rdev) == 0) { + if (sysfs_link_rdev(mddev, rdev)) + /* failure here is OK */; + spares++; + md_new_event(mddev); + set_bit(MD_CHANGE_DEVS, &mddev->flags); } } +no_add: if (removed) set_bit(MD_CHANGE_DEVS, &mddev->flags); return spares; } -static void reap_sync_thread(struct mddev *mddev) -{ - struct md_rdev *rdev; - - /* resync has finished, collect result */ - md_unregister_thread(&mddev->sync_thread); - if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && - !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { - /* success...*/ - /* activate any spares */ - if (mddev->pers->spare_active(mddev)) { - sysfs_notify(&mddev->kobj, NULL, - "degraded"); - set_bit(MD_CHANGE_DEVS, &mddev->flags); - } - } - if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && - mddev->pers->finish_reshape) - mddev->pers->finish_reshape(mddev); - - /* If array is no-longer degraded, then any saved_raid_disk - * information must be scrapped. Also if any device is now - * In_sync we must scrape the saved_raid_disk for that device - * do the superblock for an incrementally recovered device - * written out. - */ - rdev_for_each(rdev, mddev) - if (!mddev->degraded || - test_bit(In_sync, &rdev->flags)) - rdev->saved_raid_disk = -1; - - md_update_sb(mddev, 1); - clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); - clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); - clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); - /* flag recovery needed just to double check */ - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - sysfs_notify_dirent_safe(mddev->sysfs_action); - md_new_event(mddev); - if (mddev->event_work.func) - queue_work(md_misc_wq, &mddev->event_work); -} - /* * This routine is regularly called by all per-raid-array threads to * deal with generic issues like resync and super-block update. @@ -7789,22 +7792,16 @@ void md_check_recovery(struct mddev *mddev) int spares = 0; if (mddev->ro) { - /* Only thing we do on a ro array is remove - * failed devices. + /* On a read-only array we can: + * - remove failed devices + * - add already-in_sync devices if the array itself + * is in-sync. + * As we only add devices that are already in-sync, + * we can activate the spares immediately. */ - struct md_rdev *rdev; - rdev_for_each(rdev, mddev) - if (rdev->raid_disk >= 0 && - !test_bit(Blocked, &rdev->flags) && - test_bit(Faulty, &rdev->flags) && - atomic_read(&rdev->nr_pending)==0) { - if (mddev->pers->hot_remove_disk( - mddev, rdev) == 0) { - sysfs_unlink_rdev(mddev, rdev); - rdev->raid_disk = -1; - } - } clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + remove_and_add_spares(mddev, NULL); + mddev->pers->spare_active(mddev); goto unlock; } @@ -7836,7 +7833,7 @@ void md_check_recovery(struct mddev *mddev) goto unlock; } if (mddev->sync_thread) { - reap_sync_thread(mddev); + md_reap_sync_thread(mddev); goto unlock; } /* Set RUNNING before clearing NEEDED to avoid @@ -7867,7 +7864,7 @@ void md_check_recovery(struct mddev *mddev) goto unlock; set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); - } else if ((spares = remove_and_add_spares(mddev))) { + } else if ((spares = remove_and_add_spares(mddev, NULL))) { clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); @@ -7917,6 +7914,51 @@ void md_check_recovery(struct mddev *mddev) } } +void md_reap_sync_thread(struct mddev *mddev) +{ + struct md_rdev *rdev; + + /* resync has finished, collect result */ + md_unregister_thread(&mddev->sync_thread); + if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && + !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { + /* success...*/ + /* activate any spares */ + if (mddev->pers->spare_active(mddev)) { + sysfs_notify(&mddev->kobj, NULL, + "degraded"); + set_bit(MD_CHANGE_DEVS, &mddev->flags); + } + } + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && + mddev->pers->finish_reshape) + mddev->pers->finish_reshape(mddev); + + /* If array is no-longer degraded, then any saved_raid_disk + * information must be scrapped. Also if any device is now + * In_sync we must scrape the saved_raid_disk for that device + * do the superblock for an incrementally recovered device + * written out. + */ + rdev_for_each(rdev, mddev) + if (!mddev->degraded || + test_bit(In_sync, &rdev->flags)) + rdev->saved_raid_disk = -1; + + md_update_sb(mddev, 1); + clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); + clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); + clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); + clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + /* flag recovery needed just to double check */ + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + sysfs_notify_dirent_safe(mddev->sysfs_action); + md_new_event(mddev); + if (mddev->event_work.func) + queue_work(md_misc_wq, &mddev->event_work); +} + void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev) { sysfs_notify_dirent_safe(rdev->sysfs_state); @@ -8642,6 +8684,7 @@ EXPORT_SYMBOL(md_register_thread); EXPORT_SYMBOL(md_unregister_thread); EXPORT_SYMBOL(md_wakeup_thread); EXPORT_SYMBOL(md_check_recovery); +EXPORT_SYMBOL(md_reap_sync_thread); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD RAID framework"); MODULE_ALIAS("md"); diff --git a/drivers/md/md.h b/drivers/md/md.h index d90fb1a879e1..653f992b687a 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -567,6 +567,7 @@ extern struct md_thread *md_register_thread( extern void md_unregister_thread(struct md_thread **threadp); extern void md_wakeup_thread(struct md_thread *thread); extern void md_check_recovery(struct mddev *mddev); +extern void md_reap_sync_thread(struct mddev *mddev); extern void md_write_start(struct mddev *mddev, struct bio *bi); extern void md_write_end(struct mddev *mddev); extern void md_done_sync(struct mddev *mddev, int blocks, int ok); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fd86b372692d..851023e2ba5d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -981,7 +981,12 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; bio->bi_next = NULL; - generic_make_request(bio); + if (unlikely((bio->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) + /* Just ignore it */ + bio_endio(bio, 0); + else + generic_make_request(bio); bio = next; } kfree(plug); @@ -2901,6 +2906,7 @@ static int stop(struct mddev *mddev) if (conf->r1bio_pool) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); + safe_put_page(conf->tmppage); kfree(conf->poolinfo); kfree(conf); mddev->private = NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 77b562d18a90..018741ba9310 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1133,7 +1133,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; bio->bi_next = NULL; - generic_make_request(bio); + if (unlikely((bio->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) + /* Just ignore it */ + bio_endio(bio, 0); + else + generic_make_request(bio); bio = next; } kfree(plug); @@ -2913,6 +2918,22 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, if (init_resync(conf)) return 0; + /* + * Allow skipping a full rebuild for incremental assembly + * of a clean array, like RAID1 does. + */ + if (mddev->bitmap == NULL && + mddev->recovery_cp == MaxSector && + !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && + conf->fullsync == 0) { + *skipped = 1; + max_sector = mddev->dev_sectors; + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || + test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) + max_sector = mddev->resync_max_sectors; + return max_sector - sector_nr; + } + skipped: max_sector = mddev->dev_sectors; if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || @@ -3810,6 +3831,7 @@ static int stop(struct mddev *mddev) if (conf->r10bio_pool) mempool_destroy(conf->r10bio_pool); + safe_put_page(conf->tmppage); kfree(conf->mirrors); kfree(conf); mddev->private = NULL; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f4e87bfc7567..4a7be455d6d8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1887,8 +1887,15 @@ static void raid5_end_write_request(struct bio *bi, int error) &rdev->mddev->recovery); } else if (is_badblock(rdev, sh->sector, STRIPE_SECTORS, - &first_bad, &bad_sectors)) + &first_bad, &bad_sectors)) { set_bit(R5_MadeGood, &sh->dev[i].flags); + if (test_bit(R5_ReadError, &sh->dev[i].flags)) + /* That was a successful write so make + * sure it looks like we already did + * a re-write. + */ + set_bit(R5_ReWrite, &sh->dev[i].flags); + } } rdev_dec_pending(rdev, conf->mddev); @@ -4672,9 +4679,10 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped = 1; return rv; } - if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && - !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && - !conf->fullsync && sync_blocks >= STRIPE_SECTORS) { + if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && + !conf->fullsync && + !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && + sync_blocks >= STRIPE_SECTORS) { /* we can skip this block, and probably more */ sync_blocks /= STRIPE_SECTORS; *skipped = 1; |