diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 28 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 5 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 170 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.h | 2 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 61 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-transfer.c | 10 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 12 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 20 | ||||
-rw-r--r-- | drivers/md/dm-uevent.c | 9 | ||||
-rw-r--r-- | drivers/md/dm.c | 25 | ||||
-rw-r--r-- | drivers/md/linear.c | 12 | ||||
-rw-r--r-- | drivers/md/md.c | 52 | ||||
-rw-r--r-- | drivers/md/md.h | 1 | ||||
-rw-r--r-- | drivers/md/multipath.c | 20 | ||||
-rw-r--r-- | drivers/md/raid0.c | 13 | ||||
-rw-r--r-- | drivers/md/raid1.c | 34 | ||||
-rw-r--r-- | drivers/md/raid10.c | 48 | ||||
-rw-r--r-- | drivers/md/raid5.c | 65 |
21 files changed, 390 insertions, 203 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 60e2b322db11..47831536deb1 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1078,23 +1078,31 @@ static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, * out to disk */ -void bitmap_daemon_work(struct bitmap *bitmap) +void bitmap_daemon_work(mddev_t *mddev) { + struct bitmap *bitmap; unsigned long j; unsigned long flags; struct page *page = NULL, *lastpage = NULL; int blocks; void *paddr; - if (bitmap == NULL) + /* Use a mutex to guard daemon_work against + * bitmap_destroy. + */ + mutex_lock(&mddev->bitmap_mutex); + bitmap = mddev->bitmap; + if (bitmap == NULL) { + mutex_unlock(&mddev->bitmap_mutex); return; + } if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) goto done; bitmap->daemon_lastrun = jiffies; if (bitmap->allclean) { bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; - return; + goto done; } bitmap->allclean = 1; @@ -1203,6 +1211,7 @@ void bitmap_daemon_work(struct bitmap *bitmap) done: if (bitmap->allclean == 0) bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ; + mutex_unlock(&mddev->bitmap_mutex); } static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, @@ -1308,7 +1317,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto { if (!bitmap) return; if (behind) { - atomic_dec(&bitmap->behind_writes); + if (atomic_dec_and_test(&bitmap->behind_writes)) + wake_up(&bitmap->behind_wait); PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n", atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); } @@ -1541,9 +1551,9 @@ void bitmap_flush(mddev_t *mddev) */ sleep = bitmap->daemon_sleep; bitmap->daemon_sleep = 0; - bitmap_daemon_work(bitmap); - bitmap_daemon_work(bitmap); - bitmap_daemon_work(bitmap); + bitmap_daemon_work(mddev); + bitmap_daemon_work(mddev); + bitmap_daemon_work(mddev); bitmap->daemon_sleep = sleep; bitmap_update_sb(bitmap); } @@ -1574,6 +1584,7 @@ static void bitmap_free(struct bitmap *bitmap) kfree(bp); kfree(bitmap); } + void bitmap_destroy(mddev_t *mddev) { struct bitmap *bitmap = mddev->bitmap; @@ -1581,7 +1592,9 @@ void bitmap_destroy(mddev_t *mddev) if (!bitmap) /* there was no bitmap */ return; + mutex_lock(&mddev->bitmap_mutex); mddev->bitmap = NULL; /* disconnect from the md device */ + mutex_unlock(&mddev->bitmap_mutex); if (mddev->thread) mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; @@ -1617,6 +1630,7 @@ int bitmap_create(mddev_t *mddev) atomic_set(&bitmap->pending_writes, 0); init_waitqueue_head(&bitmap->write_wait); init_waitqueue_head(&bitmap->overflow_wait); + init_waitqueue_head(&bitmap->behind_wait); bitmap->mddev = mddev; diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index e98900671ca9..86950bc527af 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h @@ -254,6 +254,9 @@ struct bitmap { wait_queue_head_t write_wait; wait_queue_head_t overflow_wait; +#ifndef __GENKSYMS__ + wait_queue_head_t behind_wait; +#endif }; /* the bitmap API */ @@ -282,7 +285,7 @@ void bitmap_close_sync(struct bitmap *bitmap); void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); void bitmap_unplug(struct bitmap *bitmap); -void bitmap_daemon_work(struct bitmap *bitmap); +void bitmap_daemon_work(mddev_t *mddev); #endif #endif diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ed1038164019..959d6d14cee0 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2003 Christophe Saout <christophe@saout.de> * Copyright (C) 2004 Clemens Fruhwirth <clemens@endorphin.org> - * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. + * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ @@ -71,10 +71,21 @@ struct crypt_iv_operations { int (*ctr)(struct crypt_config *cc, struct dm_target *ti, const char *opts); void (*dtr)(struct crypt_config *cc); - const char *(*status)(struct crypt_config *cc); + int (*init)(struct crypt_config *cc); + int (*wipe)(struct crypt_config *cc); int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector); }; +struct iv_essiv_private { + struct crypto_cipher *tfm; + struct crypto_hash *hash_tfm; + u8 *salt; +}; + +struct iv_benbi_private { + int shift; +}; + /* * Crypt: maps a linear range of a block device * and encrypts / decrypts at the same time. @@ -102,8 +113,8 @@ struct crypt_config { struct crypt_iv_operations *iv_gen_ops; char *iv_mode; union { - struct crypto_cipher *essiv_tfm; - int benbi_shift; + struct iv_essiv_private essiv; + struct iv_benbi_private benbi; } iv_gen_private; sector_t iv_offset; unsigned int iv_size; @@ -169,88 +180,114 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector) return 0; } -static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, - const char *opts) +/* Initialise ESSIV - compute salt but no local memory allocations */ +static int crypt_iv_essiv_init(struct crypt_config *cc) { - struct crypto_cipher *essiv_tfm; - struct crypto_hash *hash_tfm; + struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; struct hash_desc desc; struct scatterlist sg; - unsigned int saltsize; - u8 *salt; int err; - if (opts == NULL) { + sg_init_one(&sg, cc->key, cc->key_size); + desc.tfm = essiv->hash_tfm; + desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + err = crypto_hash_digest(&desc, &sg, cc->key_size, essiv->salt); + if (err) + return err; + + return crypto_cipher_setkey(essiv->tfm, essiv->salt, + crypto_hash_digestsize(essiv->hash_tfm)); +} + +/* Wipe salt and reset key derived from volume key */ +static int crypt_iv_essiv_wipe(struct crypt_config *cc) +{ + struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; + unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm); + + memset(essiv->salt, 0, salt_size); + + return crypto_cipher_setkey(essiv->tfm, essiv->salt, salt_size); +} + +static void crypt_iv_essiv_dtr(struct crypt_config *cc) +{ + struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; + + crypto_free_cipher(essiv->tfm); + essiv->tfm = NULL; + + crypto_free_hash(essiv->hash_tfm); + essiv->hash_tfm = NULL; + + kzfree(essiv->salt); + essiv->salt = NULL; +} + +static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, + const char *opts) +{ + struct crypto_cipher *essiv_tfm = NULL; + struct crypto_hash *hash_tfm = NULL; + u8 *salt = NULL; + int err; + + if (!opts) { ti->error = "Digest algorithm missing for ESSIV mode"; return -EINVAL; } - /* Hash the cipher key with the given hash algorithm */ + /* Allocate hash algorithm */ hash_tfm = crypto_alloc_hash(opts, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(hash_tfm)) { ti->error = "Error initializing ESSIV hash"; - return PTR_ERR(hash_tfm); + err = PTR_ERR(hash_tfm); + goto bad; } - saltsize = crypto_hash_digestsize(hash_tfm); - salt = kmalloc(saltsize, GFP_KERNEL); - if (salt == NULL) { + salt = kzalloc(crypto_hash_digestsize(hash_tfm), GFP_KERNEL); + if (!salt) { ti->error = "Error kmallocing salt storage in ESSIV"; - crypto_free_hash(hash_tfm); - return -ENOMEM; + err = -ENOMEM; + goto bad; } - sg_init_one(&sg, cc->key, cc->key_size); - desc.tfm = hash_tfm; - desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - err = crypto_hash_digest(&desc, &sg, cc->key_size, salt); - crypto_free_hash(hash_tfm); - - if (err) { - ti->error = "Error calculating hash in ESSIV"; - kfree(salt); - return err; - } - - /* Setup the essiv_tfm with the given salt */ + /* Allocate essiv_tfm */ essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(essiv_tfm)) { ti->error = "Error allocating crypto tfm for ESSIV"; - kfree(salt); - return PTR_ERR(essiv_tfm); + err = PTR_ERR(essiv_tfm); + goto bad; } if (crypto_cipher_blocksize(essiv_tfm) != crypto_ablkcipher_ivsize(cc->tfm)) { ti->error = "Block size of ESSIV cipher does " "not match IV size of block cipher"; - crypto_free_cipher(essiv_tfm); - kfree(salt); - return -EINVAL; - } - err = crypto_cipher_setkey(essiv_tfm, salt, saltsize); - if (err) { - ti->error = "Failed to set key for ESSIV cipher"; - crypto_free_cipher(essiv_tfm); - kfree(salt); - return err; + err = -EINVAL; + goto bad; } - kfree(salt); - cc->iv_gen_private.essiv_tfm = essiv_tfm; + cc->iv_gen_private.essiv.salt = salt; + cc->iv_gen_private.essiv.tfm = essiv_tfm; + cc->iv_gen_private.essiv.hash_tfm = hash_tfm; + return 0; -} -static void crypt_iv_essiv_dtr(struct crypt_config *cc) -{ - crypto_free_cipher(cc->iv_gen_private.essiv_tfm); - cc->iv_gen_private.essiv_tfm = NULL; +bad: + if (essiv_tfm && !IS_ERR(essiv_tfm)) + crypto_free_cipher(essiv_tfm); + if (hash_tfm && !IS_ERR(hash_tfm)) + crypto_free_hash(hash_tfm); + kfree(salt); + return err; } static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector) { memset(iv, 0, cc->iv_size); *(u64 *)iv = cpu_to_le64(sector); - crypto_cipher_encrypt_one(cc->iv_gen_private.essiv_tfm, iv, iv); + crypto_cipher_encrypt_one(cc->iv_gen_private.essiv.tfm, iv, iv); return 0; } @@ -273,7 +310,7 @@ static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, return -EINVAL; } - cc->iv_gen_private.benbi_shift = 9 - log; + cc->iv_gen_private.benbi.shift = 9 - log; return 0; } @@ -288,7 +325,7 @@ static int crypt_iv_benbi_gen(struct crypt_config *cc, u8 *iv, sector_t sector) memset(iv, 0, cc->iv_size - sizeof(u64)); /* rest is cleared below */ - val = cpu_to_be64(((u64)sector << cc->iv_gen_private.benbi_shift) + 1); + val = cpu_to_be64(((u64)sector << cc->iv_gen_private.benbi.shift) + 1); put_unaligned(val, (__be64 *)(iv + cc->iv_size - sizeof(u64))); return 0; @@ -308,6 +345,8 @@ static struct crypt_iv_operations crypt_iv_plain_ops = { static struct crypt_iv_operations crypt_iv_essiv_ops = { .ctr = crypt_iv_essiv_ctr, .dtr = crypt_iv_essiv_dtr, + .init = crypt_iv_essiv_init, + .wipe = crypt_iv_essiv_wipe, .generator = crypt_iv_essiv_gen }; @@ -1039,6 +1078,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0) goto bad_ivmode; + if (cc->iv_gen_ops && cc->iv_gen_ops->init && + cc->iv_gen_ops->init(cc) < 0) { + ti->error = "Error initialising IV"; + goto bad_slab_pool; + } + cc->iv_size = crypto_ablkcipher_ivsize(tfm); if (cc->iv_size) /* at least a 64 bit sector number should fit in our buffer */ @@ -1278,6 +1323,7 @@ static void crypt_resume(struct dm_target *ti) static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) { struct crypt_config *cc = ti->private; + int ret = -EINVAL; if (argc < 2) goto error; @@ -1287,10 +1333,22 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) DMWARN("not suspended during key manipulation."); return -EINVAL; } - if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) - return crypt_set_key(cc, argv[2]); - if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) + if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) { + ret = crypt_set_key(cc, argv[2]); + if (ret) + return ret; + if (cc->iv_gen_ops && cc->iv_gen_ops->init) + ret = cc->iv_gen_ops->init(cc); + return ret; + } + if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) { + if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) { + ret = cc->iv_gen_ops->wipe(cc); + if (ret) + return ret; + } return crypt_wipe_key(cc); + } } error: diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 7dbe652efb5a..205215968ff1 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -216,7 +216,8 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, type = get_type("N"); else { ti->error = "Persistent flag is not P or N"; - return -EINVAL; + r = -EINVAL; + goto bad_type; } if (!type) { diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 8a223a48802c..5f9315b32a42 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -162,7 +162,7 @@ static inline sector_t get_dev_size(struct block_device *bdev) static inline chunk_t sector_to_chunk(struct dm_exception_store *store, sector_t sector) { - return (sector & ~store->chunk_mask) >> store->chunk_shift; + return sector >> store->chunk_shift; } int dm_exception_store_type_register(struct dm_exception_store_type *type); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index a67942931582..818b617ab3b2 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -56,6 +56,11 @@ static void dm_hash_remove_all(int keep_open_devices); */ static DECLARE_RWSEM(_hash_lock); +/* + * Protects use of mdptr to obtain hash cell name and uuid from mapped device. + */ +static DEFINE_MUTEX(dm_hash_cells_mutex); + static void init_buckets(struct list_head *buckets) { unsigned int i; @@ -206,7 +211,9 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid)); } dm_get(md); + mutex_lock(&dm_hash_cells_mutex); dm_set_mdptr(md, cell); + mutex_unlock(&dm_hash_cells_mutex); up_write(&_hash_lock); return 0; @@ -224,7 +231,9 @@ static void __hash_remove(struct hash_cell *hc) /* remove from the dev hash */ list_del(&hc->uuid_list); list_del(&hc->name_list); + mutex_lock(&dm_hash_cells_mutex); dm_set_mdptr(hc->md, NULL); + mutex_unlock(&dm_hash_cells_mutex); table = dm_get_table(hc->md); if (table) { @@ -240,40 +249,46 @@ static void __hash_remove(struct hash_cell *hc) static void dm_hash_remove_all(int keep_open_devices) { - int i, dev_skipped, dev_removed; + int i, dev_skipped; struct hash_cell *hc; - struct list_head *tmp, *n; + struct mapped_device *md; + +retry: + dev_skipped = 0; down_write(&_hash_lock); -retry: - dev_skipped = dev_removed = 0; for (i = 0; i < NUM_BUCKETS; i++) { - list_for_each_safe (tmp, n, _name_buckets + i) { - hc = list_entry(tmp, struct hash_cell, name_list); + list_for_each_entry(hc, _name_buckets + i, name_list) { + md = hc->md; + dm_get(md); - if (keep_open_devices && - dm_lock_for_deletion(hc->md)) { + if (keep_open_devices && dm_lock_for_deletion(md)) { + dm_put(md); dev_skipped++; continue; } + __hash_remove(hc); - dev_removed = 1; - } - } - /* - * Some mapped devices may be using other mapped devices, so if any - * still exist, repeat until we make no further progress. - */ - if (dev_skipped) { - if (dev_removed) - goto retry; + up_write(&_hash_lock); - DMWARN("remove_all left %d open device(s)", dev_skipped); + dm_put(md); + + /* + * Some mapped devices may be using other mapped + * devices, so repeat until we make no further + * progress. If a new mapped device is created + * here it will also get removed. + */ + goto retry; + } } up_write(&_hash_lock); + + if (dev_skipped) + DMWARN("remove_all left %d open device(s)", dev_skipped); } static int dm_hash_rename(uint32_t cookie, const char *old, const char *new) @@ -321,7 +336,9 @@ static int dm_hash_rename(uint32_t cookie, const char *old, const char *new) */ list_del(&hc->name_list); old_name = hc->name; + mutex_lock(&dm_hash_cells_mutex); hc->name = new_name; + mutex_unlock(&dm_hash_cells_mutex); list_add(&hc->name_list, _name_buckets + hash_str(new_name)); /* @@ -1582,8 +1599,7 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid) if (!md) return -ENXIO; - dm_get(md); - down_read(&_hash_lock); + mutex_lock(&dm_hash_cells_mutex); hc = dm_get_mdptr(md); if (!hc || hc->md != md) { r = -ENXIO; @@ -1596,8 +1612,7 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid) strcpy(uuid, hc->uuid ? : ""); out: - up_read(&_hash_lock); - dm_put(md); + mutex_unlock(&dm_hash_cells_mutex); return r; } diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c index 54abf9e303b7..f1c8cae70b4b 100644 --- a/drivers/md/dm-log-userspace-transfer.c +++ b/drivers/md/dm-log-userspace-transfer.c @@ -172,11 +172,15 @@ int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type, { int r = 0; size_t dummy = 0; - int overhead_size = - sizeof(struct dm_ulog_request *) + sizeof(struct cn_msg); + int overhead_size = sizeof(struct dm_ulog_request) + sizeof(struct cn_msg); struct dm_ulog_request *tfr = prealloced_ulog_tfr; struct receiving_pkg pkg; + /* + * Given the space needed to hold the 'struct cn_msg' and + * 'struct dm_ulog_request' - do we have enough payload + * space remaining? + */ if (data_size > (DM_ULOG_PREALLOCED_SIZE - overhead_size)) { DMINFO("Size of tfr exceeds preallocated size"); return -EINVAL; @@ -191,7 +195,7 @@ resend: */ mutex_lock(&dm_ulog_lock); - memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size); + memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - sizeof(struct cn_msg)); memcpy(tfr->uuid, uuid, DM_UUID_LEN); tfr->luid = luid; tfr->seq = dm_ulog_seq++; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 32d0b878eccc..f336c6959082 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -691,6 +691,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as, if (as->argc < nr_params) { ti->error = "not enough path parameters"; + r = -EINVAL; goto bad; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 3a3ba46e6d4b..8a4a9c838afd 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -553,6 +553,8 @@ static int init_hash_tables(struct dm_snapshot *s) hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; hash_size = min(hash_size, max_buckets); + if (hash_size < 64) + hash_size = 64; hash_size = rounddown_pow_of_two(hash_size); if (init_exception_table(&s->complete, hash_size, DM_CHUNK_CONSECUTIVE_BITS)) @@ -1152,10 +1154,11 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, unsigned sz = 0; struct dm_snapshot *snap = ti->private; - down_write(&snap->lock); - switch (type) { case STATUSTYPE_INFO: + + down_write(&snap->lock); + if (!snap->valid) DMEMIT("Invalid"); else { @@ -1171,6 +1174,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, else DMEMIT("Unknown"); } + + up_write(&snap->lock); + break; case STATUSTYPE_TABLE: @@ -1185,8 +1191,6 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, break; } - up_write(&snap->lock); - return 0; } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index e0efc1adcaff..bd58703ee8f6 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -110,7 +110,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) } stripes = simple_strtoul(argv[0], &end, 10); - if (*end) { + if (!stripes || *end) { ti->error = "Invalid stripe count"; return -EINVAL; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 1a6cb3c7822e..e86912864e04 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -499,16 +499,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, return 0; } - if (blk_stack_limits(limits, &q->limits, start << 9) < 0) - DMWARN("%s: target device %s is misaligned: " + if (bdev_stack_limits(limits, bdev, start) < 0) + DMWARN("%s: adding target device %s caused an alignment inconsistency: " "physical_block_size=%u, logical_block_size=%u, " "alignment_offset=%u, start=%llu", dm_device_name(ti->table->md), bdevname(bdev, b), q->limits.physical_block_size, q->limits.logical_block_size, q->limits.alignment_offset, - (unsigned long long) start << 9); - + (unsigned long long) start << SECTOR_SHIFT); /* * Check if merge fn is supported. @@ -1025,9 +1024,9 @@ combine_limits: * for the table. */ if (blk_stack_limits(limits, &ti_limits, 0) < 0) - DMWARN("%s: target device " + DMWARN("%s: adding target device " "(start sect %llu len %llu) " - "is misaligned", + "caused an alignment inconsistency", dm_device_name(table->md), (unsigned long long) ti->begin, (unsigned long long) ti->len); @@ -1079,15 +1078,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { /* - * Each target device in the table has a data area that should normally - * be aligned such that the DM device's alignment_offset is 0. - * FIXME: Propagate alignment_offsets up the stack and warn of - * sub-optimal or inconsistent settings. - */ - limits->alignment_offset = 0; - limits->misaligned = 0; - - /* * Copy table's limits to the DM device's request_queue */ q->limits = *limits; diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c index 6f65883aef12..c7c555a8c7b2 100644 --- a/drivers/md/dm-uevent.c +++ b/drivers/md/dm-uevent.c @@ -139,14 +139,13 @@ void dm_send_uevents(struct list_head *events, struct kobject *kobj) list_del_init(&event->elist); /* - * Need to call dm_copy_name_and_uuid from here for now. - * Context of previous var adds and locking used for - * hash_cell not compatable. + * When a device is being removed this copy fails and we + * discard these unsent events. */ if (dm_copy_name_and_uuid(event->md, event->name, event->uuid)) { - DMERR("%s: dm_copy_name_and_uuid() failed", - __func__); + DMINFO("%s: skipping sending uevent for lost device", + __func__); goto uevent_free; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 724efc63904d..d7786e3514cd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -614,8 +614,10 @@ static void dec_pending(struct dm_io *io, int error) if (!md->barrier_error && io_error != -EOPNOTSUPP) md->barrier_error = io_error; end_io_acct(io); + free_io(md, io); } else { end_io_acct(io); + free_io(md, io); if (io_error != DM_ENDIO_REQUEUE) { trace_block_bio_complete(md->queue, bio); @@ -623,8 +625,6 @@ static void dec_pending(struct dm_io *io, int error) bio_endio(bio, io_error); } } - - free_io(md, io); } } @@ -1487,10 +1487,15 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) return BLKPREP_OK; } -static void map_request(struct dm_target *ti, struct request *rq, - struct mapped_device *md) +/* + * Returns: + * 0 : the request has been processed (not requeued) + * !0 : the request has been requeued + */ +static int map_request(struct dm_target *ti, struct request *rq, + struct mapped_device *md) { - int r; + int r, requeued = 0; struct request *clone = rq->special; struct dm_rq_target_io *tio = clone->end_io_data; @@ -1516,6 +1521,7 @@ static void map_request(struct dm_target *ti, struct request *rq, case DM_MAPIO_REQUEUE: /* The target wants to requeue the I/O */ dm_requeue_unmapped_request(clone); + requeued = 1; break; default: if (r > 0) { @@ -1527,6 +1533,8 @@ static void map_request(struct dm_target *ti, struct request *rq, dm_kill_unmapped_request(clone, r); break; } + + return requeued; } /* @@ -1568,12 +1576,17 @@ static void dm_request_fn(struct request_queue *q) blk_start_request(rq); spin_unlock(q->queue_lock); - map_request(ti, rq, md); + if (map_request(ti, rq, md)) + goto requeued; + spin_lock_irq(q->queue_lock); } goto out; +requeued: + spin_lock_irq(q->queue_lock); + plug_and_out: if (!elv_queue_empty(q)) /* Some requests still remain, retry later */ diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 1ceceb334d5e..dff9d2f449c3 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -172,12 +172,14 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. + * violating it, so limit max_phys_segments to 1 lying within + * a single page. */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_phys_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } conf->array_sectors += rdev->sectors; cnt++; diff --git a/drivers/md/md.c b/drivers/md/md.c index b182f86a19dd..01470794d9c3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -282,7 +282,9 @@ static void mddev_put(mddev_t *mddev) if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) return; if (!mddev->raid_disks && list_empty(&mddev->disks) && - !mddev->hold_active) { + mddev->ctime == 0 && !mddev->hold_active) { + /* Array is not configured at all, and not held active, + * so destroy it */ list_del(&mddev->all_mddevs); if (mddev->gendisk) { /* we did a probe so need to clean up. @@ -367,6 +369,7 @@ static mddev_t * mddev_find(dev_t unit) mutex_init(&new->open_mutex); mutex_init(&new->reconfig_mutex); + mutex_init(&new->bitmap_mutex); INIT_LIST_HEAD(&new->disks); INIT_LIST_HEAD(&new->all_mddevs); init_timer(&new->safemode_timer); @@ -2008,12 +2011,18 @@ repeat: if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ /* .. if the array isn't clean, an 'even' event must also go * to spares. */ - if ((mddev->events&1)==0) + if ((mddev->events&1)==0) { nospares = 0; + sync_req = 2; /* force a second update to get the + * even/odd in sync */ + } } else { /* otherwise an 'odd' event must go to spares */ - if ((mddev->events&1)) + if ((mddev->events&1)) { nospares = 0; + sync_req = 2; /* force a second update to get the + * even/odd in sync */ + } } } @@ -4170,7 +4179,7 @@ static int do_md_run(mddev_t * mddev) mddev->barriers_work = 1; mddev->ok_start_degraded = start_dirty_degraded; - if (start_readonly) + if (start_readonly && mddev->ro == 0) mddev->ro = 2; /* read-only, but switch on first write */ err = mddev->pers->run(mddev); @@ -5070,6 +5079,10 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) mddev->minor_version = info->minor_version; mddev->patch_version = info->patch_version; mddev->persistent = !info->not_persistent; + /* ensure mddev_put doesn't delete this now that there + * is some minimal configuration. + */ + mddev->ctime = get_seconds(); return 0; } mddev->major_version = MD_MAJOR_VERSION; @@ -5321,6 +5334,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, int err = 0; void __user *argp = (void __user *)arg; mddev_t *mddev = NULL; + int ro; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -5456,6 +5470,34 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = do_md_stop(mddev, 1, 1); goto done_unlock; + case BLKROSET: + if (get_user(ro, (int __user *)(arg))) { + err = -EFAULT; + goto done_unlock; + } + err = -EINVAL; + + /* if the bdev is going readonly the value of mddev->ro + * does not matter, no writes are coming + */ + if (ro) + goto done_unlock; + + /* are we are already prepared for writes? */ + if (mddev->ro != 1) + goto done_unlock; + + /* transitioning to readauto need only happen for + * arrays that call md_write_start + */ + if (mddev->pers) { + err = restart_array(mddev); + if (err == 0) { + mddev->ro = 2; + set_disk_ro(mddev->gendisk, 0); + } + } + goto done_unlock; } /* @@ -6629,7 +6671,7 @@ void md_check_recovery(mddev_t *mddev) if (mddev->bitmap) - bitmap_daemon_work(mddev->bitmap); + bitmap_daemon_work(mddev); if (mddev->ro) return; diff --git a/drivers/md/md.h b/drivers/md/md.h index f184b69ef337..87430fea2875 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -289,6 +289,7 @@ struct mddev_s * hot-adding a bitmap. It should * eventually be settable by sysfs. */ + struct mutex bitmap_mutex; struct list_head all_mddevs; }; diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index ee7646f974a0..e4b11f18adc7 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -301,14 +301,16 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. + * violating it, so limit ->max_phys_segments to one, lying + * within a single page. * (Note: it is very unlikely that a device with * merge_bvec_fn will be involved in multipath.) */ - if (q->merge_bvec_fn && - queue_max_sectors(q) > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); + if (q->merge_bvec_fn) { + blk_queue_max_phys_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } conf->working_disks++; mddev->degraded--; @@ -476,9 +478,11 @@ static int multipath_run (mddev_t *mddev) /* as we don't honour merge_bvec_fn, we must never risk * violating it, not that we ever expect a device with * a merge_bvec_fn to be involved in multipath */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_phys_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } if (!test_bit(Faulty, &rdev->flags)) conf->working_disks++; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index d3a4ce06015a..3db857cdd33e 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -176,14 +176,15 @@ static int create_strip_zones(mddev_t *mddev) disk_stack_limits(mddev->gendisk, rdev1->bdev, rdev1->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. + * violating it, so limit ->max_phys_segments to 1, lying within + * a single page. */ - if (rdev1->bdev->bd_disk->queue->merge_bvec_fn && - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); - + if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_phys_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } if (!smallest || (rdev1->sectors < smallest->sectors)) smallest = rdev1; cnt++; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e07ce2e033a9..791e1951430a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -417,7 +417,7 @@ static void raid1_end_write_request(struct bio *bio, int error) */ static int read_balance(conf_t *conf, r1bio_t *r1_bio) { - const unsigned long this_sector = r1_bio->sector; + const sector_t this_sector = r1_bio->sector; int new_disk = conf->last_used, disk = new_disk; int wonly_disk = -1; const int sectors = r1_bio->sectors; @@ -433,7 +433,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) retry: if (conf->mddev->recovery_cp < MaxSector && (this_sector + sectors >= conf->next_resync)) { - /* Choose the first operation device, for consistancy */ + /* Choose the first operational device, for consistancy */ new_disk = 0; for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev); @@ -845,6 +845,15 @@ static int make_request(struct request_queue *q, struct bio * bio) } mirror = conf->mirrors + rdisk; + if (test_bit(WriteMostly, &mirror->rdev->flags) && + bitmap) { + /* Reading from a write-mostly device must + * take care not to over-take any writes + * that are 'behind' + */ + wait_event(bitmap->behind_wait, + atomic_read(&bitmap->behind_writes) == 0); + } r1_bio->read_disk = rdisk; read_bio = bio_clone(bio, GFP_NOIO); @@ -891,9 +900,10 @@ static int make_request(struct request_queue *q, struct bio * bio) if (test_bit(Faulty, &rdev->flags)) { rdev_dec_pending(rdev, mddev); r1_bio->bios[i] = NULL; - } else + } else { r1_bio->bios[i] = bio; - targets++; + targets++; + } } else r1_bio->bios[i] = NULL; } @@ -921,9 +931,13 @@ static int make_request(struct request_queue *q, struct bio * bio) set_bit(R1BIO_Degraded, &r1_bio->state); } - /* do behind I/O ? */ + /* do behind I/O ? + * Not if there are too many, or cannot allocate memory, + * or a reader on WriteMostly is waiting for behind writes + * to flush */ if (bitmap && atomic_read(&bitmap->behind_writes) < bitmap->max_write_behind && + !waitqueue_active(&bitmap->behind_wait) && (behind_pages = alloc_behind_pages(bio)) != NULL) set_bit(R1BIO_BehindIO, &r1_bio->state); @@ -2104,15 +2118,13 @@ static int stop(mddev_t *mddev) { conf_t *conf = mddev->private; struct bitmap *bitmap = mddev->bitmap; - int behind_wait = 0; /* wait for behind writes to complete */ - while (bitmap && atomic_read(&bitmap->behind_writes) > 0) { - behind_wait++; - printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); /* wait a second */ + if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { + printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop.\n", mdname(mddev)); /* need to kick something here to make sure I/O goes? */ + wait_event(bitmap->behind_wait, + atomic_read(&bitmap->behind_writes) == 0); } raise_barrier(conf); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index c2cb7b87b440..1b4e232bce3c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -493,7 +493,7 @@ static int raid10_mergeable_bvec(struct request_queue *q, */ static int read_balance(conf_t *conf, r10bio_t *r10_bio) { - const unsigned long this_sector = r10_bio->sector; + const sector_t this_sector = r10_bio->sector; int disk, slot, nslot; const int sectors = r10_bio->sectors; sector_t new_distance, current_distance; @@ -824,11 +824,29 @@ static int make_request(struct request_queue *q, struct bio * bio) */ bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); + + /* Each of these 'make_request' calls will call 'wait_barrier'. + * If the first succeeds but the second blocks due to the resync + * thread raising the barrier, we will deadlock because the + * IO to the underlying device will be queued in generic_make_request + * and will never complete, so will never reduce nr_pending. + * So increment nr_waiting here so no new raise_barriers will + * succeed, and so the second wait_barrier cannot block. + */ + spin_lock_irq(&conf->resync_lock); + conf->nr_waiting++; + spin_unlock_irq(&conf->resync_lock); + if (make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (make_request(q, &bp->bio2)) generic_make_request(&bp->bio2); + spin_lock_irq(&conf->resync_lock); + conf->nr_waiting--; + wake_up(&conf->wait_barrier); + spin_unlock_irq(&conf->resync_lock); + bio_pair_release(bp); return 0; bad_map: @@ -1155,13 +1173,17 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. + /* as we don't honour merge_bvec_fn, we must + * never risk violating it, so limit + * ->max_phys_segments to one lying with a single + * page, as a one page request is never in + * violation. */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_phys_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } p->head_position = 0; rdev->raid_disk = mirror; @@ -2155,12 +2177,14 @@ static int run(mddev_t *mddev) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_sector to one PAGE, as - * a one page request is never in violation. + * violating it, so limit max_phys_segments to 1 lying + * within a single page. */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) - blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_phys_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); + } disk->head_position = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d29215d966da..23949739fc25 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1526,7 +1526,7 @@ static void raid5_end_read_request(struct bio * bi, int error) clear_bit(R5_UPTODATE, &sh->dev[i].flags); atomic_inc(&rdev->read_errors); - if (conf->mddev->degraded) + if (conf->mddev->degraded >= conf->max_degraded) printk_rl(KERN_WARNING "raid5:%s: read error not correctable " "(sector %llu on %s).\n", @@ -1649,8 +1649,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, int previous, int *dd_idx, struct stripe_head *sh) { - long stripe; - unsigned long chunk_number; + sector_t stripe, stripe2; + sector_t chunk_number; unsigned int chunk_offset; int pd_idx, qd_idx; int ddf_layout = 0; @@ -1670,18 +1670,13 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, */ chunk_offset = sector_div(r_sector, sectors_per_chunk); chunk_number = r_sector; - BUG_ON(r_sector != chunk_number); /* * Compute the stripe number */ - stripe = chunk_number / data_disks; - - /* - * Compute the data disk and parity disk indexes inside the stripe - */ - *dd_idx = chunk_number % data_disks; - + stripe = chunk_number; + *dd_idx = sector_div(stripe, data_disks); + stripe2 = stripe; /* * Select the parity disk based on the user selected algorithm. */ @@ -1693,21 +1688,21 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, case 5: switch (algorithm) { case ALGORITHM_LEFT_ASYMMETRIC: - pd_idx = data_disks - stripe % raid_disks; + pd_idx = data_disks - sector_div(stripe2, raid_disks); if (*dd_idx >= pd_idx) (*dd_idx)++; break; case ALGORITHM_RIGHT_ASYMMETRIC: - pd_idx = stripe % raid_disks; + pd_idx = sector_div(stripe2, raid_disks); if (*dd_idx >= pd_idx) (*dd_idx)++; break; case ALGORITHM_LEFT_SYMMETRIC: - pd_idx = data_disks - stripe % raid_disks; + pd_idx = data_disks - sector_div(stripe2, raid_disks); *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; break; case ALGORITHM_RIGHT_SYMMETRIC: - pd_idx = stripe % raid_disks; + pd_idx = sector_div(stripe2, raid_disks); *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; break; case ALGORITHM_PARITY_0: @@ -1727,7 +1722,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, switch (algorithm) { case ALGORITHM_LEFT_ASYMMETRIC: - pd_idx = raid_disks - 1 - (stripe % raid_disks); + pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); qd_idx = pd_idx + 1; if (pd_idx == raid_disks-1) { (*dd_idx)++; /* Q D D D P */ @@ -1736,7 +1731,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, (*dd_idx) += 2; /* D D P Q D */ break; case ALGORITHM_RIGHT_ASYMMETRIC: - pd_idx = stripe % raid_disks; + pd_idx = sector_div(stripe2, raid_disks); qd_idx = pd_idx + 1; if (pd_idx == raid_disks-1) { (*dd_idx)++; /* Q D D D P */ @@ -1745,12 +1740,12 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, (*dd_idx) += 2; /* D D P Q D */ break; case ALGORITHM_LEFT_SYMMETRIC: - pd_idx = raid_disks - 1 - (stripe % raid_disks); + pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); qd_idx = (pd_idx + 1) % raid_disks; *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks; break; case ALGORITHM_RIGHT_SYMMETRIC: - pd_idx = stripe % raid_disks; + pd_idx = sector_div(stripe2, raid_disks); qd_idx = (pd_idx + 1) % raid_disks; *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks; break; @@ -1769,7 +1764,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, /* Exactly the same as RIGHT_ASYMMETRIC, but or * of blocks for computing Q is different. */ - pd_idx = stripe % raid_disks; + pd_idx = sector_div(stripe2, raid_disks); qd_idx = pd_idx + 1; if (pd_idx == raid_disks-1) { (*dd_idx)++; /* Q D D D P */ @@ -1784,7 +1779,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, * D D D P Q rather than * Q D D D P */ - pd_idx = raid_disks - 1 - ((stripe + 1) % raid_disks); + stripe2 += 1; + pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); qd_idx = pd_idx + 1; if (pd_idx == raid_disks-1) { (*dd_idx)++; /* Q D D D P */ @@ -1796,7 +1792,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, case ALGORITHM_ROTATING_N_CONTINUE: /* Same as left_symmetric but Q is before P */ - pd_idx = raid_disks - 1 - (stripe % raid_disks); + pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); qd_idx = (pd_idx + raid_disks - 1) % raid_disks; *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; ddf_layout = 1; @@ -1804,27 +1800,27 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, case ALGORITHM_LEFT_ASYMMETRIC_6: /* RAID5 left_asymmetric, with Q on last device */ - pd_idx = data_disks - stripe % (raid_disks-1); + pd_idx = data_disks - sector_div(stripe2, raid_disks-1); if (*dd_idx >= pd_idx) (*dd_idx)++; qd_idx = raid_disks - 1; break; case ALGORITHM_RIGHT_ASYMMETRIC_6: - pd_idx = stripe % (raid_disks-1); + pd_idx = sector_div(stripe2, raid_disks-1); if (*dd_idx >= pd_idx) (*dd_idx)++; qd_idx = raid_disks - 1; break; case ALGORITHM_LEFT_SYMMETRIC_6: - pd_idx = data_disks - stripe % (raid_disks-1); + pd_idx = data_disks - sector_div(stripe2, raid_disks-1); *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); qd_idx = raid_disks - 1; break; case ALGORITHM_RIGHT_SYMMETRIC_6: - pd_idx = stripe % (raid_disks-1); + pd_idx = sector_div(stripe2, raid_disks-1); *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); qd_idx = raid_disks - 1; break; @@ -1869,14 +1865,14 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) : conf->algorithm; sector_t stripe; int chunk_offset; - int chunk_number, dummy1, dd_idx = i; + sector_t chunk_number; + int dummy1, dd_idx = i; sector_t r_sector; struct stripe_head sh2; chunk_offset = sector_div(new_sector, sectors_per_chunk); stripe = new_sector; - BUG_ON(new_sector != stripe); if (i == sh->pd_idx) return 0; @@ -1969,7 +1965,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) } chunk_number = stripe * data_disks + i; - r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset; + r_sector = chunk_number * sectors_per_chunk + chunk_offset; check = raid5_compute_sector(conf, r_sector, previous, &dummy1, &sh2); @@ -5432,11 +5428,11 @@ static int raid5_start_reshape(mddev_t *mddev) !test_bit(Faulty, &rdev->flags)) { if (raid5_add_disk(mddev, rdev) == 0) { char nm[20]; - if (rdev->raid_disk >= conf->previous_raid_disks) + if (rdev->raid_disk >= conf->previous_raid_disks) { set_bit(In_sync, &rdev->flags); - else + added_devices++; + } else rdev->recovery_offset = 0; - added_devices++; sprintf(nm, "rd%d", rdev->raid_disk); if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) @@ -5448,9 +5444,12 @@ static int raid5_start_reshape(mddev_t *mddev) break; } + /* When a reshape changes the number of devices, ->degraded + * is measured against the large of the pre and post number of + * devices.*/ if (mddev->delta_disks > 0) { spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) + mddev->degraded += (conf->raid_disks - conf->previous_raid_disks) - added_devices; spin_unlock_irqrestore(&conf->device_lock, flags); } |