diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 23 | ||||
-rw-r--r-- | drivers/md/md.c | 50 | ||||
-rw-r--r-- | drivers/md/raid1.c | 73 | ||||
-rw-r--r-- | drivers/md/raid10.c | 87 |
4 files changed, 167 insertions, 66 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 7aeceedcf7d4..c14dacdacfac 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1045,8 +1045,14 @@ void bitmap_daemon_work(struct bitmap *bitmap) if (bitmap == NULL) return; if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) - return; + goto done; + bitmap->daemon_lastrun = jiffies; + if (bitmap->allclean) { + bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; + return; + } + bitmap->allclean = 1; for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; @@ -1068,8 +1074,10 @@ void bitmap_daemon_work(struct bitmap *bitmap) clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); - if (need_write) + if (need_write) { write_page(bitmap, page, 0); + bitmap->allclean = 0; + } continue; } @@ -1098,6 +1106,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) /* if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); */ + if (*bmc) + bitmap->allclean = 0; + if (*bmc == 2) { *bmc=1; /* maybe clear the bit next time */ set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); @@ -1132,6 +1143,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) } } + done: + if (bitmap->allclean == 0) + bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ; } static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, @@ -1226,6 +1240,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect sectors -= blocks; else sectors = 0; } + bitmap->allclean = 0; return 0; } @@ -1296,6 +1311,7 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, } } spin_unlock_irq(&bitmap->lock); + bitmap->allclean = 0; return rv; } @@ -1332,6 +1348,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab } unlock: spin_unlock_irqrestore(&bitmap->lock, flags); + bitmap->allclean = 0; } void bitmap_close_sync(struct bitmap *bitmap) @@ -1399,7 +1416,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } spin_unlock_irq(&bitmap->lock); - + bitmap->allclean = 0; } /* dirty the memory and file bits for bitmap chunks "s" to "e" */ diff --git a/drivers/md/md.c b/drivers/md/md.c index 7da6ec244e15..ccbbf63727cc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1105,7 +1105,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; if (rdev->sb_size & bmask) - rdev-> sb_size = (rdev->sb_size | bmask)+1; + rdev->sb_size = (rdev->sb_size | bmask) + 1; + + if (minor_version + && rdev->data_offset < sb_offset + (rdev->sb_size/512)) + return -EINVAL; if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) rdev->desc_nr = -1; @@ -1137,7 +1141,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) else ret = 0; } - if (minor_version) + if (minor_version) rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; else rdev->size = rdev->sb_offset; @@ -1499,7 +1503,8 @@ static void export_rdev(mdk_rdev_t * rdev) free_disk_sb(rdev); list_del_init(&rdev->same_set); #ifndef MODULE - md_autodetect_dev(rdev->bdev->bd_dev); + if (test_bit(AutoDetected, &rdev->flags)) + md_autodetect_dev(rdev->bdev->bd_dev); #endif unlock_rdev(rdev); kobject_put(&rdev->kobj); @@ -1996,9 +2001,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) char *e; unsigned long long size = simple_strtoull(buf, &e, 10); unsigned long long oldsize = rdev->size; + mddev_t *my_mddev = rdev->mddev; + if (e==buf || (*e && *e != '\n')) return -EINVAL; - if (rdev->mddev->pers) + if (my_mddev->pers) return -EBUSY; rdev->size = size; if (size > oldsize && rdev->mddev->external) { @@ -2011,7 +2018,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) int overlap = 0; struct list_head *tmp, *tmp2; - mddev_unlock(rdev->mddev); + mddev_unlock(my_mddev); for_each_mddev(mddev, tmp) { mdk_rdev_t *rdev2; @@ -2031,7 +2038,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) break; } } - mddev_lock(rdev->mddev); + mddev_lock(my_mddev); if (overlap) { /* Someone else could have slipped in a size * change here, but doing so is just silly. @@ -2043,8 +2050,8 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) return -EBUSY; } } - if (size < rdev->mddev->size || rdev->mddev->size == 0) - rdev->mddev->size = size; + if (size < my_mddev->size || my_mddev->size == 0) + my_mddev->size = size; return len; } @@ -2065,10 +2072,21 @@ rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); + mddev_t *mddev = rdev->mddev; + ssize_t rv; if (!entry->show) return -EIO; - return entry->show(rdev, page); + + rv = mddev ? mddev_lock(mddev) : -EBUSY; + if (!rv) { + if (rdev->mddev == NULL) + rv = -EBUSY; + else + rv = entry->show(rdev, page); + mddev_unlock(mddev); + } + return rv; } static ssize_t @@ -2077,15 +2095,19 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, { struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); - int rv; + ssize_t rv; + mddev_t *mddev = rdev->mddev; if (!entry->store) return -EIO; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - rv = mddev_lock(rdev->mddev); + rv = mddev ? mddev_lock(mddev): -EBUSY; if (!rv) { - rv = entry->store(rdev, page, length); + if (rdev->mddev == NULL) + rv = -EBUSY; + else + rv = entry->store(rdev, page, length); mddev_unlock(rdev->mddev); } return rv; @@ -5127,7 +5149,7 @@ static int md_seq_show(struct seq_file *seq, void *v) if (mddev->ro==1) seq_printf(seq, " (read-only)"); if (mddev->ro==2) - seq_printf(seq, "(auto-read-only)"); + seq_printf(seq, " (auto-read-only)"); seq_printf(seq, " %s", mddev->pers->name); } @@ -5351,6 +5373,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) mddev->ro = 0; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); + md_wakeup_thread(mddev->sync_thread); } atomic_inc(&mddev->writes_pending); if (mddev->in_sync) { @@ -6021,6 +6044,7 @@ static void autostart_arrays(int part) MD_BUG(); continue; } + set_bit(AutoDetected, &rdev->flags); list_add(&rdev->same_set, &pending_raid_disks); i_passed++; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5c7fef091cec..ff61b309129a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -592,6 +592,37 @@ static int raid1_congested(void *data, int bits) } +static int flush_pending_writes(conf_t *conf) +{ + /* Any writes that have been queued but are awaiting + * bitmap updates get flushed here. + * We return 1 if any requests were actually submitted. + */ + int rv = 0; + + spin_lock_irq(&conf->device_lock); + + if (conf->pending_bio_list.head) { + struct bio *bio; + bio = bio_list_get(&conf->pending_bio_list); + blk_remove_plug(conf->mddev->queue); + spin_unlock_irq(&conf->device_lock); + /* flush any pending bitmap writes to + * disk before proceeding w/ I/O */ + bitmap_unplug(conf->mddev->bitmap); + + while (bio) { /* submit pending writes */ + struct bio *next = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = next; + } + rv = 1; + } else + spin_unlock_irq(&conf->device_lock); + return rv; +} + /* Barriers.... * Sometimes we need to suspend IO while we do something else, * either some resync/recovery, or reconfigure the array. @@ -673,15 +704,23 @@ static void freeze_array(conf_t *conf) /* stop syncio and normal IO and wait for everything to * go quite. * We increment barrier and nr_waiting, and then - * wait until barrier+nr_pending match nr_queued+2 + * wait until nr_pending match nr_queued+1 + * This is called in the context of one normal IO request + * that has failed. Thus any sync request that might be pending + * will be blocked by nr_pending, and we need to wait for + * pending IO requests to complete or be queued for re-try. + * Thus the number queued (nr_queued) plus this request (1) + * must match the number of pending IOs (nr_pending) before + * we continue. */ spin_lock_irq(&conf->resync_lock); conf->barrier++; conf->nr_waiting++; wait_event_lock_irq(conf->wait_barrier, - conf->barrier+conf->nr_pending == conf->nr_queued+2, + conf->nr_pending == conf->nr_queued+1, conf->resync_lock, - raid1_unplug(conf->mddev->queue)); + ({ flush_pending_writes(conf); + raid1_unplug(conf->mddev->queue); })); spin_unlock_irq(&conf->resync_lock); } static void unfreeze_array(conf_t *conf) @@ -907,6 +946,9 @@ static int make_request(struct request_queue *q, struct bio * bio) blk_plug_device(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); + /* In case raid1d snuck into freeze_array */ + wake_up(&conf->wait_barrier); + if (do_sync) md_wakeup_thread(mddev->thread); #if 0 @@ -1473,28 +1515,14 @@ static void raid1d(mddev_t *mddev) for (;;) { char b[BDEVNAME_SIZE]; - spin_lock_irqsave(&conf->device_lock, flags); - - if (conf->pending_bio_list.head) { - bio = bio_list_get(&conf->pending_bio_list); - blk_remove_plug(mddev->queue); - spin_unlock_irqrestore(&conf->device_lock, flags); - /* flush any pending bitmap writes to disk before proceeding w/ I/O */ - bitmap_unplug(mddev->bitmap); - while (bio) { /* submit pending writes */ - struct bio *next = bio->bi_next; - bio->bi_next = NULL; - generic_make_request(bio); - bio = next; - } - unplug = 1; + unplug += flush_pending_writes(conf); - continue; - } - - if (list_empty(head)) + spin_lock_irqsave(&conf->device_lock, flags); + if (list_empty(head)) { + spin_unlock_irqrestore(&conf->device_lock, flags); break; + } r1_bio = list_entry(head->prev, r1bio_t, retry_list); list_del(head->prev); conf->nr_queued--; @@ -1590,7 +1618,6 @@ static void raid1d(mddev_t *mddev) } } } - spin_unlock_irqrestore(&conf->device_lock, flags); if (unplug) unplug_slaves(mddev); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 017f58113c33..32389d2f18fc 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -537,7 +537,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) current_distance = abs(r10_bio->devs[slot].addr - conf->mirrors[disk].head_position); - /* Find the disk whose head is closest */ + /* Find the disk whose head is closest, + * or - for far > 1 - find the closest to partition beginning */ for (nslot = slot; nslot < conf->copies; nslot++) { int ndisk = r10_bio->devs[nslot].devnum; @@ -557,8 +558,13 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) slot = nslot; break; } - new_distance = abs(r10_bio->devs[nslot].addr - - conf->mirrors[ndisk].head_position); + + /* for far > 1 always use the lowest address */ + if (conf->far_copies > 1) + new_distance = r10_bio->devs[nslot].addr; + else + new_distance = abs(r10_bio->devs[nslot].addr - + conf->mirrors[ndisk].head_position); if (new_distance < current_distance) { current_distance = new_distance; disk = ndisk; @@ -629,7 +635,36 @@ static int raid10_congested(void *data, int bits) return ret; } - +static int flush_pending_writes(conf_t *conf) +{ + /* Any writes that have been queued but are awaiting + * bitmap updates get flushed here. + * We return 1 if any requests were actually submitted. + */ + int rv = 0; + + spin_lock_irq(&conf->device_lock); + + if (conf->pending_bio_list.head) { + struct bio *bio; + bio = bio_list_get(&conf->pending_bio_list); + blk_remove_plug(conf->mddev->queue); + spin_unlock_irq(&conf->device_lock); + /* flush any pending bitmap writes to disk + * before proceeding w/ I/O */ + bitmap_unplug(conf->mddev->bitmap); + + while (bio) { /* submit pending writes */ + struct bio *next = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = next; + } + rv = 1; + } else + spin_unlock_irq(&conf->device_lock); + return rv; +} /* Barriers.... * Sometimes we need to suspend IO while we do something else, * either some resync/recovery, or reconfigure the array. @@ -712,15 +747,23 @@ static void freeze_array(conf_t *conf) /* stop syncio and normal IO and wait for everything to * go quiet. * We increment barrier and nr_waiting, and then - * wait until barrier+nr_pending match nr_queued+2 + * wait until nr_pending match nr_queued+1 + * This is called in the context of one normal IO request + * that has failed. Thus any sync request that might be pending + * will be blocked by nr_pending, and we need to wait for + * pending IO requests to complete or be queued for re-try. + * Thus the number queued (nr_queued) plus this request (1) + * must match the number of pending IOs (nr_pending) before + * we continue. */ spin_lock_irq(&conf->resync_lock); conf->barrier++; conf->nr_waiting++; wait_event_lock_irq(conf->wait_barrier, - conf->barrier+conf->nr_pending == conf->nr_queued+2, + conf->nr_pending == conf->nr_queued+1, conf->resync_lock, - raid10_unplug(conf->mddev->queue)); + ({ flush_pending_writes(conf); + raid10_unplug(conf->mddev->queue); })); spin_unlock_irq(&conf->resync_lock); } @@ -892,6 +935,9 @@ static int make_request(struct request_queue *q, struct bio * bio) blk_plug_device(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); + /* In case raid10d snuck in to freeze_array */ + wake_up(&conf->wait_barrier); + if (do_sync) md_wakeup_thread(mddev->thread); @@ -1464,28 +1510,14 @@ static void raid10d(mddev_t *mddev) for (;;) { char b[BDEVNAME_SIZE]; - spin_lock_irqsave(&conf->device_lock, flags); - if (conf->pending_bio_list.head) { - bio = bio_list_get(&conf->pending_bio_list); - blk_remove_plug(mddev->queue); - spin_unlock_irqrestore(&conf->device_lock, flags); - /* flush any pending bitmap writes to disk before proceeding w/ I/O */ - bitmap_unplug(mddev->bitmap); - - while (bio) { /* submit pending writes */ - struct bio *next = bio->bi_next; - bio->bi_next = NULL; - generic_make_request(bio); - bio = next; - } - unplug = 1; - - continue; - } + unplug += flush_pending_writes(conf); - if (list_empty(head)) + spin_lock_irqsave(&conf->device_lock, flags); + if (list_empty(head)) { + spin_unlock_irqrestore(&conf->device_lock, flags); break; + } r10_bio = list_entry(head->prev, r10bio_t, retry_list); list_del(head->prev); conf->nr_queued--; @@ -1548,7 +1580,6 @@ static void raid10d(mddev_t *mddev) } } } - spin_unlock_irqrestore(&conf->device_lock, flags); if (unplug) unplug_slaves(mddev); } @@ -1787,6 +1818,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (j == conf->copies) { /* Cannot recover, so abort the recovery */ put_buf(r10_bio); + if (rb2) + atomic_dec(&rb2->remaining); r10_bio = rb2; if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", |