summaryrefslogtreecommitdiff
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c207
1 files changed, 114 insertions, 93 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 89a270d29369..aae9ec78c0e8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -92,6 +92,18 @@ static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *this);
static void mddev_detach(struct mddev *mddev);
+enum md_ro_state {
+ MD_RDWR,
+ MD_RDONLY,
+ MD_AUTO_READ,
+ MD_MAX_STATE
+};
+
+static bool md_is_rdwr(struct mddev *mddev)
+{
+ return (mddev->ro == MD_RDWR);
+}
+
/*
* Default number of read corrections we'll attempt on an rdev
* before ejecting it from the array. We divide the read error
@@ -461,7 +473,7 @@ static blk_qc_t md_submit_bio(struct bio *bio)
if (!bio)
return BLK_QC_T_NONE;
- if (mddev->ro == 1 && unlikely(rw == WRITE)) {
+ if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) {
if (bio_sectors(bio) != 0)
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
@@ -2680,7 +2692,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
int any_badblocks_changed = 0;
int ret = -1;
- if (mddev->ro) {
+ if (!md_is_rdwr(mddev)) {
if (force_change)
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
return;
@@ -2992,10 +3004,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
md_error(rdev->mddev, rdev);
- if (test_bit(Faulty, &rdev->flags))
- err = 0;
- else
+
+ if (test_bit(MD_BROKEN, &rdev->mddev->flags))
err = -EBUSY;
+ else
+ err = 0;
} else if (cmd_match(buf, "remove")) {
if (rdev->mddev->pers) {
clear_bit(Blocked, &rdev->flags);
@@ -3952,7 +3965,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
goto out_unlock;
}
rv = -EROFS;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
goto out_unlock;
/* request to change the personality. Need to ensure:
@@ -4158,7 +4171,7 @@ layout_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers) {
if (mddev->pers->check_reshape == NULL)
err = -EBUSY;
- else if (mddev->ro)
+ else if (!md_is_rdwr(mddev))
err = -EROFS;
else {
mddev->new_layout = n;
@@ -4267,7 +4280,7 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers) {
if (mddev->pers->check_reshape == NULL)
err = -EBUSY;
- else if (mddev->ro)
+ else if (!md_is_rdwr(mddev))
err = -EROFS;
else {
mddev->new_chunk_sectors = n >> 9;
@@ -4364,10 +4377,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
* like active, but no writes have been seen for a while (100msec).
*
* broken
- * RAID0/LINEAR-only: same as clean, but array is missing a member.
- * It's useful because RAID0/LINEAR mounted-arrays aren't stopped
- * when a member is gone, so this state will at least alert the
- * user that something is wrong.
+* Array is failed. It's useful because mounted-arrays aren't stopped
+* when array is failed, so this state will at least alert the user that
+* something is wrong.
*/
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
write_pending, active_idle, broken, bad_word};
@@ -4391,13 +4403,13 @@ array_state_show(struct mddev *mddev, char *page)
if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
switch(mddev->ro) {
- case 1:
+ case MD_RDONLY:
st = readonly;
break;
- case 2:
+ case MD_AUTO_READ:
st = read_auto;
break;
- case 0:
+ case MD_RDWR:
spin_lock(&mddev->lock);
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
st = write_pending;
@@ -4433,7 +4445,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
int err = 0;
enum array_state st = match_word(buf, array_states);
- if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
+ if (mddev->pers && (st == active || st == clean) &&
+ mddev->ro != MD_RDONLY) {
/* don't take reconfig_mutex when toggling between
* clean and active
*/
@@ -4477,23 +4490,23 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers)
err = md_set_readonly(mddev, NULL);
else {
- mddev->ro = 1;
+ mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
err = do_md_run(mddev);
}
break;
case read_auto:
if (mddev->pers) {
- if (mddev->ro == 0)
+ if (md_is_rdwr(mddev))
err = md_set_readonly(mddev, NULL);
- else if (mddev->ro == 1)
+ else if (mddev->ro == MD_RDONLY)
err = restart_array(mddev);
if (err == 0) {
- mddev->ro = 2;
+ mddev->ro = MD_AUTO_READ;
set_disk_ro(mddev->gendisk, 0);
}
} else {
- mddev->ro = 2;
+ mddev->ro = MD_AUTO_READ;
err = do_md_run(mddev);
}
break;
@@ -4518,7 +4531,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
wake_up(&mddev->sb_wait);
err = 0;
} else {
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
set_disk_ro(mddev->gendisk, 0);
err = do_md_run(mddev);
}
@@ -4819,7 +4832,7 @@ action_show(struct mddev *mddev, char *page)
if (test_bit(MD_RECOVERY_FROZEN, &recovery))
type = "frozen";
else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
- (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
+ (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
type = "reshape";
else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
@@ -4892,11 +4905,11 @@ action_store(struct mddev *mddev, const char *page, size_t len)
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
}
- if (mddev->ro == 2) {
+ if (mddev->ro == MD_AUTO_READ) {
/* A write to sync_action is enough to justify
* canceling read-auto mode
*/
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
md_wakeup_thread(mddev->sync_thread);
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -5124,8 +5137,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len)
goto out_unlock;
err = -EBUSY;
- if (max < mddev->resync_max &&
- mddev->ro == 0 &&
+ if (max < mddev->resync_max && md_is_rdwr(mddev) &&
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
goto out_unlock;
@@ -5841,8 +5853,8 @@ int md_run(struct mddev *mddev)
continue;
sync_blockdev(rdev->bdev);
invalidate_bdev(rdev->bdev);
- if (mddev->ro != 1 && rdev_read_only(rdev)) {
- mddev->ro = 1;
+ if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) {
+ mddev->ro = MD_RDONLY;
if (mddev->gendisk)
set_disk_ro(mddev->gendisk, 1);
}
@@ -5945,8 +5957,8 @@ int md_run(struct mddev *mddev)
mddev->ok_start_degraded = start_dirty_degraded;
- if (start_readonly && mddev->ro == 0)
- mddev->ro = 2; /* read-only, but switch on first write */
+ if (start_readonly && md_is_rdwr(mddev))
+ mddev->ro = MD_AUTO_READ; /* read-only, but switch on first write */
err = pers->run(mddev);
if (err)
@@ -6021,8 +6033,8 @@ int md_run(struct mddev *mddev)
mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
- } else if (mddev->ro == 2) /* auto-readonly not meaningful */
- mddev->ro = 0;
+ } else if (mddev->ro == MD_AUTO_READ)
+ mddev->ro = MD_RDWR;
atomic_set(&mddev->max_corr_read_errors,
MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
@@ -6040,7 +6052,7 @@ int md_run(struct mddev *mddev)
if (rdev->raid_disk >= 0)
sysfs_link_rdev(mddev, rdev); /* failure here is OK */
- if (mddev->degraded && !mddev->ro)
+ if (mddev->degraded && md_is_rdwr(mddev))
/* This ensures that recovering status is reported immediately
* via sysfs - until a lack of spares is confirmed.
*/
@@ -6130,7 +6142,7 @@ static int restart_array(struct mddev *mddev)
return -ENXIO;
if (!mddev->pers)
return -EINVAL;
- if (!mddev->ro)
+ if (md_is_rdwr(mddev))
return -EBUSY;
rcu_read_lock();
@@ -6149,7 +6161,7 @@ static int restart_array(struct mddev *mddev)
return -EROFS;
mddev->safemode = 0;
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
set_disk_ro(disk, 0);
pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
/* Kick recovery or resync if necessary */
@@ -6176,7 +6188,7 @@ static void md_clean(struct mddev *mddev)
mddev->clevel[0] = 0;
mddev->flags = 0;
mddev->sb_flags = 0;
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
mddev->metadata_type[0] = 0;
mddev->chunk_sectors = 0;
mddev->ctime = mddev->utime = 0;
@@ -6227,7 +6239,7 @@ static void __md_stop_writes(struct mddev *mddev)
}
md_bitmap_flush(mddev);
- if (mddev->ro == 0 &&
+ if (md_is_rdwr(mddev) &&
((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
mddev->sb_flags)) {
/* mark array as shutdown cleanly */
@@ -6299,6 +6311,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
int err = 0;
int did_freeze = 0;
+ if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
+ return -EBUSY;
+
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -6311,8 +6326,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
* which will now never happen */
wake_up_process(mddev->sync_thread->tsk);
- if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
- return -EBUSY;
mddev_unlock(mddev);
wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
&mddev->recovery));
@@ -6325,29 +6338,30 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
mddev->sync_thread ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
pr_warn("md: %s still in use.\n",mdname(mddev));
- if (did_freeze) {
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
- }
err = -EBUSY;
goto out;
}
+
if (mddev->pers) {
__md_stop_writes(mddev);
- err = -ENXIO;
- if (mddev->ro==1)
+ if (mddev->ro == MD_RDONLY) {
+ err = -ENXIO;
goto out;
- mddev->ro = 1;
+ }
+
+ mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
+ }
+
+out:
+ if ((mddev->pers && !err) || did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
sysfs_notify_dirent_safe(mddev->sysfs_state);
- err = 0;
}
-out:
+
mutex_unlock(&mddev->open_mutex);
return err;
}
@@ -6396,7 +6410,7 @@ static int do_md_stop(struct mddev *mddev, int mode,
return -EBUSY;
}
if (mddev->pers) {
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
set_disk_ro(disk, 0);
__md_stop_writes(mddev);
@@ -6413,8 +6427,8 @@ static int do_md_stop(struct mddev *mddev, int mode,
mutex_unlock(&mddev->open_mutex);
mddev->changed = 1;
- if (mddev->ro)
- mddev->ro = 0;
+ if (!md_is_rdwr(mddev))
+ mddev->ro = MD_RDWR;
} else
mutex_unlock(&mddev->open_mutex);
/*
@@ -7226,7 +7240,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
mddev->sync_thread)
return -EBUSY;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
return -EROFS;
rdev_for_each(rdev, mddev) {
@@ -7256,7 +7270,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
/* change the number of raid disks */
if (mddev->pers->check_reshape == NULL)
return -EINVAL;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
return -EROFS;
if (raid_disks <= 0 ||
(mddev->max_disks && raid_disks >= mddev->max_disks))
@@ -7439,7 +7453,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
err = -ENODEV;
else {
md_error(mddev, rdev);
- if (!test_bit(Faulty, &rdev->flags))
+ if (test_bit(MD_BROKEN, &mddev->flags))
err = -EBUSY;
}
rcu_read_unlock();
@@ -7680,26 +7694,25 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
* The remaining ioctls are changing the state of the
* superblock, so we do not allow them on read-only arrays.
*/
- if (mddev->ro && mddev->pers) {
- if (mddev->ro == 2) {
- mddev->ro = 0;
- sysfs_notify_dirent_safe(mddev->sysfs_state);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- /* mddev_unlock will wake thread */
- /* If a device failed while we were read-only, we
- * need to make sure the metadata is updated now.
- */
- if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
- mddev_unlock(mddev);
- wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
- mddev_lock_nointr(mddev);
- }
- } else {
+ if (!md_is_rdwr(mddev) && mddev->pers) {
+ if (mddev->ro != MD_AUTO_READ) {
err = -EROFS;
goto unlock;
}
+ mddev->ro = MD_RDWR;
+ sysfs_notify_dirent_safe(mddev->sysfs_state);
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ /* mddev_unlock will wake thread */
+ /* If a device failed while we were read-only, we
+ * need to make sure the metadata is updated now.
+ */
+ if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
+ mddev_unlock(mddev);
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+ mddev_lock_nointr(mddev);
+ }
}
switch (cmd) {
@@ -7785,11 +7798,11 @@ static int md_set_read_only(struct block_device *bdev, bool ro)
* Transitioning to read-auto need only happen for arrays that call
* md_write_start and which are not ready for writes yet.
*/
- if (!ro && mddev->ro == 1 && mddev->pers) {
+ if (!ro && mddev->ro == MD_RDONLY && mddev->pers) {
err = restart_array(mddev);
if (err)
goto out_unlock;
- mddev->ro = 2;
+ mddev->ro = MD_AUTO_READ;
}
out_unlock:
@@ -7985,13 +7998,19 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
if (!mddev->pers || !mddev->pers->error_handler)
return;
- mddev->pers->error_handler(mddev,rdev);
- if (mddev->degraded)
+ mddev->pers->error_handler(mddev, rdev);
+
+ if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
+ return;
+
+ if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
sysfs_notify_dirent_safe(rdev->sysfs_state);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ if (!test_bit(MD_BROKEN, &mddev->flags)) {
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ }
if (mddev->event_work.func)
queue_work(md_misc_wq, &mddev->event_work);
md_new_event(mddev);
@@ -8241,9 +8260,9 @@ static int md_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%s : %sactive", mdname(mddev),
mddev->pers ? "" : "in");
if (mddev->pers) {
- if (mddev->ro==1)
+ if (mddev->ro == MD_RDONLY)
seq_printf(seq, " (read-only)");
- if (mddev->ro==2)
+ if (mddev->ro == MD_AUTO_READ)
seq_printf(seq, " (auto-read-only)");
seq_printf(seq, " %s", mddev->pers->name);
}
@@ -8503,10 +8522,10 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
if (bio_data_dir(bi) != WRITE)
return true;
- BUG_ON(mddev->ro == 1);
- if (mddev->ro == 2) {
+ BUG_ON(mddev->ro == MD_RDONLY);
+ if (mddev->ro == MD_AUTO_READ) {
/* need to switch to read/write */
- mddev->ro = 0;
+ mddev->ro = MD_RDWR;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread);
@@ -8557,7 +8576,7 @@ void md_write_inc(struct mddev *mddev, struct bio *bi)
{
if (bio_data_dir(bi) != WRITE)
return;
- WARN_ON_ONCE(mddev->in_sync || mddev->ro);
+ WARN_ON_ONCE(mddev->in_sync || !md_is_rdwr(mddev));
percpu_ref_get(&mddev->writes_pending);
}
EXPORT_SYMBOL(md_write_inc);
@@ -8621,7 +8640,8 @@ static void md_end_io_acct(struct bio *bio)
struct md_io_acct *md_io_acct = bio->bi_private;
struct bio *orig_bio = md_io_acct->orig_bio;
- orig_bio->bi_status = bio->bi_status;
+ if (bio->bi_status && !orig_bio->bi_status)
+ orig_bio->bi_status = bio->bi_status;
bio_end_io_acct(orig_bio, md_io_acct->start_time);
bio_put(bio);
@@ -8661,7 +8681,7 @@ void md_allow_write(struct mddev *mddev)
{
if (!mddev->pers)
return;
- if (mddev->ro)
+ if (!md_is_rdwr(mddev))
return;
if (!mddev->pers->sync_request)
return;
@@ -8710,7 +8730,7 @@ void md_do_sync(struct md_thread *thread)
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
return;
- if (mddev->ro) {/* never try to sync a read-only array */
+ if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
return;
}
@@ -9178,9 +9198,9 @@ static int remove_and_add_spares(struct mddev *mddev,
if (test_bit(Faulty, &rdev->flags))
continue;
if (!test_bit(Journal, &rdev->flags)) {
- if (mddev->ro &&
- ! (rdev->saved_raid_disk >= 0 &&
- !test_bit(Bitmap_sync, &rdev->flags)))
+ if (!md_is_rdwr(mddev) &&
+ !(rdev->saved_raid_disk >= 0 &&
+ !test_bit(Bitmap_sync, &rdev->flags)))
continue;
rdev->recovery_offset = 0;
@@ -9278,7 +9298,8 @@ void md_check_recovery(struct mddev *mddev)
flush_signals(current);
}
- if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
+ if (!md_is_rdwr(mddev) &&
+ !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
return;
if ( ! (
(mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
@@ -9297,7 +9318,7 @@ void md_check_recovery(struct mddev *mddev)
if (!mddev->external && mddev->safemode == 1)
mddev->safemode = 0;
- if (mddev->ro) {
+ if (!md_is_rdwr(mddev)) {
struct md_rdev *rdev;
if (!mddev->external && mddev->in_sync)
/* 'Blocked' flag not needed as failed devices