From 3a3a5ddb7a0f43c3dd0f98673f3d930a456725f8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 16 Aug 2010 18:09:31 +1000 Subject: Update recovery_offset even when external metadata is used. The update of ->recovery_offset in sync_sbs is appropriate even then external metadata is in use. However sync_sbs is only called when native metadata is used. So move that update in to the top of md_update_sb (which is the only caller of sync_sbs) before the test on ->external. This moves the update out of ->write_lock protection, but those fields only need ->reconfig_mutex protection which they still have. Also move the test on ->persistent up to where ->external is set as for metadata update purposes they are the same. Clear MD_CHANGE_DEVS and MD_CHANGE_CLEAN as they can only be confusing if ->external is set or ->persistent isn't. Finally move the update of ->utime down as it is only relevent (like the ->events update) for native metadata. Signed-off-by: NeilBrown Reported-by: "Kwolek, Adam" --- drivers/md/md.c | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 11567c7999a2..c148b6302154 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2136,16 +2136,6 @@ static void sync_sbs(mddev_t * mddev, int nospares) * with the rest of the array) */ mdk_rdev_t *rdev; - - /* First make sure individual recovery_offsets are correct */ - list_for_each_entry(rdev, &mddev->disks, same_set) { - if (rdev->raid_disk >= 0 && - mddev->delta_disks >= 0 && - !test_bit(In_sync, &rdev->flags) && - mddev->curr_resync_completed > rdev->recovery_offset) - rdev->recovery_offset = mddev->curr_resync_completed; - - } list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->sb_events == mddev->events || (nospares && @@ -2167,12 +2157,27 @@ static void md_update_sb(mddev_t * mddev, int force_change) int sync_req; int nospares = 0; - mddev->utime = get_seconds(); - if (mddev->external) - return; repeat: + /* First make sure individual recovery_offsets are correct */ + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (rdev->raid_disk >= 0 && + mddev->delta_disks >= 0 && + !test_bit(In_sync, &rdev->flags) && + mddev->curr_resync_completed > rdev->recovery_offset) + rdev->recovery_offset = mddev->curr_resync_completed; + + } + if (mddev->external || !mddev->persistent) { + clear_bit(MD_CHANGE_DEVS, &mddev->flags); + clear_bit(MD_CHANGE_CLEAN, &mddev->flags); + wake_up(&mddev->sb_wait); + return; + } + spin_lock_irq(&mddev->write_lock); + mddev->utime = get_seconds(); + set_bit(MD_CHANGE_PENDING, &mddev->flags); if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags)) force_change = 1; @@ -2221,19 +2226,6 @@ repeat: MD_BUG(); mddev->events --; } - - /* - * do not write anything to disk if using - * nonpersistent superblocks - */ - if (!mddev->persistent) { - if (!mddev->external) - clear_bit(MD_CHANGE_PENDING, &mddev->flags); - - spin_unlock_irq(&mddev->write_lock); - wake_up(&mddev->sb_wait); - return; - } sync_sbs(mddev, nospares); spin_unlock_irq(&mddev->write_lock); -- cgit v1.2.3 From bd52b746262c8d77e73903d6608014fb2fcdcd9d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 30 Aug 2010 17:33:33 +1000 Subject: md: don't clear MD_CHANGE_CLEAN in md_update_sb() for external arrays If this bit is cleared in md_update_sb() the kernel will allow writes to the array if userspace triggers md_allow_write(), e.g. through stripe_cache_size, when mdmon is not active. When mdmon is active the array transitions to active-idle bypassing write-pending, setting up a race for mdmon to set the array clean before a write arrives. Signed-off-by: Dan Williams Signed-off-by: NeilBrown --- drivers/md/md.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index c148b6302154..a1f6b59b8b37 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2167,9 +2167,10 @@ repeat: rdev->recovery_offset = mddev->curr_resync_completed; } - if (mddev->external || !mddev->persistent) { + if (!mddev->persistent) { + if (!mddev->external) + clear_bit(MD_CHANGE_CLEAN, &mddev->flags); clear_bit(MD_CHANGE_DEVS, &mddev->flags); - clear_bit(MD_CHANGE_CLEAN, &mddev->flags); wake_up(&mddev->sb_wait); return; } -- cgit v1.2.3 From 070dc6dd7103b6b3f7e4d46e754354a5c15f366e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 30 Aug 2010 17:33:34 +1000 Subject: md: resolve confusion of MD_CHANGE_CLEAN MD_CHANGE_CLEAN is used for two different purposes and this leads to confusion. One of the purposes is largely mirrored by MD_CHANGE_PENDING which is not used for anything else, so have MD_CHANGE_PENDING take over that purpose fully. The two purposes are: 1/ tell md_update_sb that an update is needed and that it is just a clean/dirty transition. 2/ tell user-space that an transition from clean to dirty is pending (something wants to write), and tell te kernel (by clearin the flag) that the transition is OK. The first purpose remains wit MD_CHANGE_CLEAN, the second is moved fully to MD_CHANGE_PENDING. This means that various places which conditionally set or cleared MD_CHANGE_CLEAN no longer need to be conditional. Signed-off-by: NeilBrown --- drivers/md/md.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index a1f6b59b8b37..43cf9cc9c1df 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2168,8 +2168,7 @@ repeat: } if (!mddev->persistent) { - if (!mddev->external) - clear_bit(MD_CHANGE_CLEAN, &mddev->flags); + clear_bit(MD_CHANGE_CLEAN, &mddev->flags); clear_bit(MD_CHANGE_DEVS, &mddev->flags); wake_up(&mddev->sb_wait); return; @@ -2179,7 +2178,6 @@ repeat: mddev->utime = get_seconds(); - set_bit(MD_CHANGE_PENDING, &mddev->flags); if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags)) force_change = 1; if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags)) @@ -3372,7 +3370,7 @@ array_state_show(mddev_t *mddev, char *page) case 0: if (mddev->in_sync) st = clean; - else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) + else if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) st = write_pending; else if (mddev->safemode) st = active_idle; @@ -3453,9 +3451,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) mddev->in_sync = 1; if (mddev->safemode == 1) mddev->safemode = 0; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, - &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); } err = 0; } else @@ -3467,8 +3463,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) case active: if (mddev->pers) { restart_array(mddev); - if (mddev->external) - clear_bit(MD_CHANGE_CLEAN, &mddev->flags); + clear_bit(MD_CHANGE_PENDING, &mddev->flags); wake_up(&mddev->sb_wait); err = 0; } else { @@ -6573,6 +6568,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) if (mddev->in_sync) { mddev->in_sync = 0; set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); md_wakeup_thread(mddev->thread); did_change = 1; } @@ -6581,7 +6577,6 @@ void md_write_start(mddev_t *mddev, struct bio *bi) if (did_change) sysfs_notify_dirent_safe(mddev->sysfs_state); wait_event(mddev->sb_wait, - !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && !test_bit(MD_CHANGE_PENDING, &mddev->flags)); } @@ -6617,6 +6612,7 @@ int md_allow_write(mddev_t *mddev) if (mddev->in_sync) { mddev->in_sync = 0; set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); if (mddev->safemode_delay && mddev->safemode == 0) mddev->safemode = 1; @@ -6626,7 +6622,7 @@ int md_allow_write(mddev_t *mddev) } else spin_unlock_irq(&mddev->write_lock); - if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) + if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) return -EAGAIN; else return 0; @@ -6824,8 +6820,7 @@ void md_do_sync(mddev_t *mddev) atomic_read(&mddev->recovery_active) == 0); mddev->curr_resync_completed = mddev->curr_resync; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } @@ -7104,8 +7099,7 @@ void md_check_recovery(mddev_t *mddev) mddev->recovery_cp == MaxSector) { mddev->in_sync = 1; did_change = 1; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); } if (mddev->safemode == 1) mddev->safemode = 0; -- cgit v1.2.3