diff options
author | NeilBrown <neilb@suse.de> | 2006-03-27 01:18:14 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-27 08:45:02 -0800 |
commit | e464eafdb4400c6d6576ba3840d8bd40340f8a96 (patch) | |
tree | 0c3f4003c883264ee08300c02007f06e4d1ebb91 | |
parent | 16484bf59634e25d1299761e5ed8bacf22bc6368 (diff) |
[PATCH] md: Support suspending of IO to regions of an md array
This allows user-space to access data safely. This is needed for raid5
reshape as user-space needs to take a backup of the first few stripes before
allowing reshape to commence.
It will also be useful in cluster-aware raid1 configurations so that all
cluster members can leave a section of the array untouched while a
resync/recovery happens.
A 'start' and 'end' of the suspended range are written to 2 sysfs attributes.
Note that only one range can be suspended at a time.
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/md/md.c | 59 | ||||
-rw-r--r-- | drivers/md/raid5.c | 14 | ||||
-rw-r--r-- | include/linux/raid/md_k.h | 4 |
3 files changed, 77 insertions, 0 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index a79dd33d343d..92fd0104fa04 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2365,6 +2365,63 @@ sync_completed_show(mddev_t *mddev, char *page) static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); +static ssize_t +suspend_lo_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); +} + +static ssize_t +suspend_lo_store(mddev_t *mddev, const char *buf, size_t len) +{ + char *e; + unsigned long long new = simple_strtoull(buf, &e, 10); + + if (mddev->pers->quiesce == NULL) + return -EINVAL; + if (buf == e || (*e && *e != '\n')) + return -EINVAL; + if (new >= mddev->suspend_hi || + (new > mddev->suspend_lo && new < mddev->suspend_hi)) { + mddev->suspend_lo = new; + mddev->pers->quiesce(mddev, 2); + return len; + } else + return -EINVAL; +} +static struct md_sysfs_entry md_suspend_lo = +__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); + + +static ssize_t +suspend_hi_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi); +} + +static ssize_t +suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) +{ + char *e; + unsigned long long new = simple_strtoull(buf, &e, 10); + + if (mddev->pers->quiesce == NULL) + return -EINVAL; + if (buf == e || (*e && *e != '\n')) + return -EINVAL; + if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) || + (new > mddev->suspend_lo && new > mddev->suspend_hi)) { + mddev->suspend_hi = new; + mddev->pers->quiesce(mddev, 1); + mddev->pers->quiesce(mddev, 0); + return len; + } else + return -EINVAL; +} +static struct md_sysfs_entry md_suspend_hi = +__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); + + static struct attribute *md_default_attrs[] = { &md_level.attr, &md_raid_disks.attr, @@ -2382,6 +2439,8 @@ static struct attribute *md_redundancy_attrs[] = { &md_sync_max.attr, &md_sync_speed.attr, &md_sync_completed.attr, + &md_suspend_lo.attr, + &md_suspend_hi.attr, NULL, }; static struct attribute_group md_redundancy_group = { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 355dafb98aac..bb16ac231a40 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1805,6 +1805,15 @@ static int make_request(request_queue_t *q, struct bio * bi) goto retry; } } + /* FIXME what if we get a false positive because these + * are being updated. + */ + if (logical_sector >= mddev->suspend_lo && + logical_sector < mddev->suspend_hi) { + release_stripe(sh); + schedule(); + goto retry; + } if (test_bit(STRIPE_EXPANDING, &sh->state) || !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) { @@ -2725,6 +2734,10 @@ static void raid5_quiesce(mddev_t *mddev, int state) raid5_conf_t *conf = mddev_to_conf(mddev); switch(state) { + case 2: /* resume for a suspend */ + wake_up(&conf->wait_for_overlap); + break; + case 1: /* stop all writes */ spin_lock_irq(&conf->device_lock); conf->quiesce = 1; @@ -2738,6 +2751,7 @@ static void raid5_quiesce(mddev_t *mddev, int state) spin_lock_irq(&conf->device_lock); conf->quiesce = 0; wake_up(&conf->wait_for_stripe); + wake_up(&conf->wait_for_overlap); spin_unlock_irq(&conf->device_lock); break; } diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 002ee631fabb..c0d3097846a7 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -151,6 +151,10 @@ struct mddev_s sector_t resync_mismatches; /* count of sectors where * parity/replica mismatch found */ + + /* allow user-space to request suspension of IO to regions of the array */ + sector_t suspend_lo; + sector_t suspend_hi; /* if zero, use the system-wide default */ int sync_speed_min; int sync_speed_max; |