diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 38 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 124 | ||||
-rw-r--r-- | drivers/md/md.c | 40 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree-remove.c | 174 | ||||
-rw-r--r-- | drivers/md/raid1.c | 17 | ||||
-rw-r--r-- | drivers/md/raid10.c | 17 |
8 files changed, 242 insertions, 172 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 6d03774b176e..2a8722b9f91b 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1904,6 +1904,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers) { mddev->pers->quiesce(mddev, 1); rv = bitmap_create(mddev); + if (!rv) + rv = bitmap_load(mddev); if (rv) { bitmap_destroy(mddev); mddev->bitmap_info.offset = 0; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 8c2a000cf3f5..58d8c6dfb085 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -176,7 +176,6 @@ struct crypt_config { #define MIN_IOS 16 #define MIN_POOL_PAGES 32 -#define MIN_BIO_PAGES 8 static struct kmem_cache *_crypt_io_pool; @@ -848,12 +847,11 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, } /* - * if additional pages cannot be allocated without waiting, - * return a partially allocated bio, the caller will then try - * to allocate additional bios while submitting this partial bio + * If additional pages cannot be allocated without waiting, + * return a partially-allocated bio. The caller will then try + * to allocate more bios while submitting this partial bio. */ - if (i == (MIN_BIO_PAGES - 1)) - gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; + gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; len = (size > PAGE_SIZE) ? PAGE_SIZE : size; @@ -1046,16 +1044,14 @@ static void kcryptd_queue_io(struct dm_crypt_io *io) queue_work(cc->io_queue, &io->work); } -static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, - int error, int async) +static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) { struct bio *clone = io->ctx.bio_out; struct crypt_config *cc = io->target->private; - if (unlikely(error < 0)) { + if (unlikely(io->error < 0)) { crypt_free_buffer_pages(cc, clone); bio_put(clone); - io->error = -EIO; crypt_dec_pending(io); return; } @@ -1106,12 +1102,16 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) sector += bio_sectors(clone); crypt_inc_pending(io); + r = crypt_convert(cc, &io->ctx); + if (r < 0) + io->error = -EIO; + crypt_finished = atomic_dec_and_test(&io->ctx.pending); /* Encryption was already finished, submit io now */ if (crypt_finished) { - kcryptd_crypt_write_io_submit(io, r, 0); + kcryptd_crypt_write_io_submit(io, 0); /* * If there was an error, do not try next fragments. @@ -1162,11 +1162,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) crypt_dec_pending(io); } -static void kcryptd_crypt_read_done(struct dm_crypt_io *io, int error) +static void kcryptd_crypt_read_done(struct dm_crypt_io *io) { - if (unlikely(error < 0)) - io->error = -EIO; - crypt_dec_pending(io); } @@ -1181,9 +1178,11 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) io->sector); r = crypt_convert(cc, &io->ctx); + if (r < 0) + io->error = -EIO; if (atomic_dec_and_test(&io->ctx.pending)) - kcryptd_crypt_read_done(io, r); + kcryptd_crypt_read_done(io); crypt_dec_pending(io); } @@ -1204,15 +1203,18 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post) error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq); + if (error < 0) + io->error = -EIO; + mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); if (!atomic_dec_and_test(&ctx->pending)) return; if (bio_data_dir(io->base_bio) == READ) - kcryptd_crypt_read_done(io, error); + kcryptd_crypt_read_done(io); else - kcryptd_crypt_write_io_submit(io, error, 1); + kcryptd_crypt_write_io_submit(io, 1); } static void kcryptd_crypt(struct work_struct *work) diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 042e71996569..aa70f7d43a1a 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -283,7 +283,7 @@ int dm_exception_store_init(void) return 0; persistent_fail: - dm_persistent_snapshot_exit(); + dm_transient_snapshot_exit(); transient_fail: return r; } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index c3087575fef0..da2f0217df66 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -124,7 +124,7 @@ struct cell { struct hlist_node list; struct bio_prison *prison; struct cell_key key; - unsigned count; + struct bio *holder; struct bio_list bios; }; @@ -220,55 +220,60 @@ static struct cell *__search_bucket(struct hlist_head *bucket, * This may block if a new cell needs allocating. You must ensure that * cells will be unlocked even if the calling thread is blocked. * - * Returns the number of entries in the cell prior to the new addition - * or < 0 on failure. + * Returns 1 if the cell was already held, 0 if @inmate is the new holder. */ static int bio_detain(struct bio_prison *prison, struct cell_key *key, struct bio *inmate, struct cell **ref) { - int r; + int r = 1; unsigned long flags; uint32_t hash = hash_key(prison, key); - struct cell *uninitialized_var(cell), *cell2 = NULL; + struct cell *cell, *cell2; BUG_ON(hash > prison->nr_buckets); spin_lock_irqsave(&prison->lock, flags); + cell = __search_bucket(prison->cells + hash, key); + if (cell) { + bio_list_add(&cell->bios, inmate); + goto out; + } - if (!cell) { - /* - * Allocate a new cell - */ - spin_unlock_irqrestore(&prison->lock, flags); - cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); - spin_lock_irqsave(&prison->lock, flags); + /* + * Allocate a new cell + */ + spin_unlock_irqrestore(&prison->lock, flags); + cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); + spin_lock_irqsave(&prison->lock, flags); - /* - * We've been unlocked, so we have to double check that - * nobody else has inserted this cell in the meantime. - */ - cell = __search_bucket(prison->cells + hash, key); + /* + * We've been unlocked, so we have to double check that + * nobody else has inserted this cell in the meantime. + */ + cell = __search_bucket(prison->cells + hash, key); + if (cell) { + mempool_free(cell2, prison->cell_pool); + bio_list_add(&cell->bios, inmate); + goto out; + } + + /* + * Use new cell. + */ + cell = cell2; - if (!cell) { - cell = cell2; - cell2 = NULL; + cell->prison = prison; + memcpy(&cell->key, key, sizeof(cell->key)); + cell->holder = inmate; + bio_list_init(&cell->bios); + hlist_add_head(&cell->list, prison->cells + hash); - cell->prison = prison; - memcpy(&cell->key, key, sizeof(cell->key)); - cell->count = 0; - bio_list_init(&cell->bios); - hlist_add_head(&cell->list, prison->cells + hash); - } - } + r = 0; - r = cell->count++; - bio_list_add(&cell->bios, inmate); +out: spin_unlock_irqrestore(&prison->lock, flags); - if (cell2) - mempool_free(cell2, prison->cell_pool); - *ref = cell; return r; @@ -283,8 +288,8 @@ static void __cell_release(struct cell *cell, struct bio_list *inmates) hlist_del(&cell->list); - if (inmates) - bio_list_merge(inmates, &cell->bios); + bio_list_add(inmates, cell->holder); + bio_list_merge(inmates, &cell->bios); mempool_free(cell, prison->cell_pool); } @@ -305,22 +310,44 @@ static void cell_release(struct cell *cell, struct bio_list *bios) * bio may be in the cell. This function releases the cell, and also does * a sanity check. */ +static void __cell_release_singleton(struct cell *cell, struct bio *bio) +{ + hlist_del(&cell->list); + BUG_ON(cell->holder != bio); + BUG_ON(!bio_list_empty(&cell->bios)); +} + static void cell_release_singleton(struct cell *cell, struct bio *bio) { - struct bio_prison *prison = cell->prison; - struct bio_list bios; - struct bio *b; unsigned long flags; - - bio_list_init(&bios); + struct bio_prison *prison = cell->prison; spin_lock_irqsave(&prison->lock, flags); - __cell_release(cell, &bios); + __cell_release_singleton(cell, bio); spin_unlock_irqrestore(&prison->lock, flags); +} + +/* + * Sometimes we don't want the holder, just the additional bios. + */ +static void __cell_release_no_holder(struct cell *cell, struct bio_list *inmates) +{ + struct bio_prison *prison = cell->prison; - b = bio_list_pop(&bios); - BUG_ON(b != bio); - BUG_ON(!bio_list_empty(&bios)); + hlist_del(&cell->list); + bio_list_merge(inmates, &cell->bios); + + mempool_free(cell, prison->cell_pool); +} + +static void cell_release_no_holder(struct cell *cell, struct bio_list *inmates) +{ + unsigned long flags; + struct bio_prison *prison = cell->prison; + + spin_lock_irqsave(&prison->lock, flags); + __cell_release_no_holder(cell, inmates); + spin_unlock_irqrestore(&prison->lock, flags); } static void cell_error(struct cell *cell) @@ -800,21 +827,16 @@ static void cell_defer(struct thin_c *tc, struct cell *cell, * Same as cell_defer above, except it omits one particular detainee, * a write bio that covers the block and has already been processed. */ -static void cell_defer_except(struct thin_c *tc, struct cell *cell, - struct bio *exception) +static void cell_defer_except(struct thin_c *tc, struct cell *cell) { struct bio_list bios; - struct bio *bio; struct pool *pool = tc->pool; unsigned long flags; bio_list_init(&bios); - cell_release(cell, &bios); spin_lock_irqsave(&pool->lock, flags); - while ((bio = bio_list_pop(&bios))) - if (bio != exception) - bio_list_add(&pool->deferred_bios, bio); + cell_release_no_holder(cell, &pool->deferred_bios); spin_unlock_irqrestore(&pool->lock, flags); wake_worker(pool); @@ -854,7 +876,7 @@ static void process_prepared_mapping(struct new_mapping *m) * the bios in the cell. */ if (bio) { - cell_defer_except(tc, m->cell, bio); + cell_defer_except(tc, m->cell); bio_endio(bio, 0); } else cell_defer(tc, m->cell, m->data_block); diff --git a/drivers/md/md.c b/drivers/md/md.c index f47f1f8ac44b..6f37aa48c075 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1801,13 +1801,13 @@ retry: | BB_LEN(internal_bb)); *bbp++ = cpu_to_le64(store_bb); } + bb->changed = 0; if (read_seqretry(&bb->lock, seq)) goto retry; bb->sector = (rdev->sb_start + (int)le32_to_cpu(sb->bblog_offset)); bb->size = le16_to_cpu(sb->bblog_size); - bb->changed = 0; } } @@ -2362,6 +2362,7 @@ repeat: clear_bit(MD_CHANGE_PENDING, &mddev->flags); list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->badblocks.changed) { + rdev->badblocks.changed = 0; md_ack_all_badblocks(&rdev->badblocks); md_error(mddev, rdev); } @@ -8097,30 +8098,23 @@ static int md_notify_reboot(struct notifier_block *this, struct mddev *mddev; int need_delay = 0; - if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) { - - printk(KERN_INFO "md: stopping all md devices.\n"); - - for_each_mddev(mddev, tmp) { - if (mddev_trylock(mddev)) { - /* Force a switch to readonly even array - * appears to still be in use. Hence - * the '100'. - */ - md_set_readonly(mddev, 100); - mddev_unlock(mddev); - } - need_delay = 1; + for_each_mddev(mddev, tmp) { + if (mddev_trylock(mddev)) { + __md_stop_writes(mddev); + mddev->safemode = 2; + mddev_unlock(mddev); } - /* - * certain more exotic SCSI devices are known to be - * volatile wrt too early system reboots. While the - * right place to handle this issue is the given - * driver, we do want to have a safe RAID driver ... - */ - if (need_delay) - mdelay(1000*1); + need_delay = 1; } + /* + * certain more exotic SCSI devices are known to be + * volatile wrt too early system reboots. While the + * right place to handle this issue is the given + * driver, we do want to have a safe RAID driver ... + */ + if (need_delay) + mdelay(1000*1); + return NOTIFY_DONE; } diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 023fbc2d389e..1a35caf2563d 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -128,18 +128,9 @@ static void delete_at(struct node *n, unsigned index) n->header.nr_entries = cpu_to_le32(nr_entries - 1); } -static unsigned del_threshold(struct node *n) -{ - return le32_to_cpu(n->header.max_entries) / 3; -} - static unsigned merge_threshold(struct node *n) { - /* - * The extra one is because we know we're potentially going to - * delete an entry. - */ - return 2 * (le32_to_cpu(n->header.max_entries) / 3) + 1; + return le32_to_cpu(n->header.max_entries) / 3; } struct child { @@ -188,6 +179,15 @@ static int exit_child(struct dm_btree_info *info, struct child *c) static void shift(struct node *left, struct node *right, int count) { + uint32_t nr_left = le32_to_cpu(left->header.nr_entries); + uint32_t nr_right = le32_to_cpu(right->header.nr_entries); + uint32_t max_entries = le32_to_cpu(left->header.max_entries); + uint32_t r_max_entries = le32_to_cpu(right->header.max_entries); + + BUG_ON(max_entries != r_max_entries); + BUG_ON(nr_left - count > max_entries); + BUG_ON(nr_right + count > max_entries); + if (!count) return; @@ -199,13 +199,8 @@ static void shift(struct node *left, struct node *right, int count) node_shift(right, count); } - left->header.nr_entries = - cpu_to_le32(le32_to_cpu(left->header.nr_entries) - count); - BUG_ON(le32_to_cpu(left->header.nr_entries) > le32_to_cpu(left->header.max_entries)); - - right->header.nr_entries = - cpu_to_le32(le32_to_cpu(right->header.nr_entries) + count); - BUG_ON(le32_to_cpu(right->header.nr_entries) > le32_to_cpu(right->header.max_entries)); + left->header.nr_entries = cpu_to_le32(nr_left - count); + right->header.nr_entries = cpu_to_le32(nr_right + count); } static void __rebalance2(struct dm_btree_info *info, struct node *parent, @@ -215,8 +210,9 @@ static void __rebalance2(struct dm_btree_info *info, struct node *parent, struct node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); + unsigned threshold = 2 * merge_threshold(left) + 1; - if (nr_left + nr_right <= merge_threshold(left)) { + if (nr_left + nr_right < threshold) { /* * Merge */ @@ -234,9 +230,6 @@ static void __rebalance2(struct dm_btree_info *info, struct node *parent, * Rebalance. */ unsigned target_left = (nr_left + nr_right) / 2; - unsigned shift_ = nr_left - target_left; - BUG_ON(le32_to_cpu(left->header.max_entries) <= nr_left - shift_); - BUG_ON(le32_to_cpu(right->header.max_entries) <= nr_right + shift_); shift(left, right, nr_left - target_left); *key_ptr(parent, r->index) = right->keys[0]; } @@ -272,6 +265,84 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, return exit_child(info, &right); } +/* + * We dump as many entries from center as possible into left, then the rest + * in right, then rebalance2. This wastes some cpu, but I want something + * simple atm. + */ +static void delete_center_node(struct dm_btree_info *info, struct node *parent, + struct child *l, struct child *c, struct child *r, + struct node *left, struct node *center, struct node *right, + uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) +{ + uint32_t max_entries = le32_to_cpu(left->header.max_entries); + unsigned shift = min(max_entries - nr_left, nr_center); + + BUG_ON(nr_left + shift > max_entries); + node_copy(left, center, -shift); + left->header.nr_entries = cpu_to_le32(nr_left + shift); + + if (shift != nr_center) { + shift = nr_center - shift; + BUG_ON((nr_right + shift) > max_entries); + node_shift(right, shift); + node_copy(center, right, shift); + right->header.nr_entries = cpu_to_le32(nr_right + shift); + } + *key_ptr(parent, r->index) = right->keys[0]; + + delete_at(parent, c->index); + r->index--; + + dm_tm_dec(info->tm, dm_block_location(c->block)); + __rebalance2(info, parent, l, r); +} + +/* + * Redistributes entries among 3 sibling nodes. + */ +static void redistribute3(struct dm_btree_info *info, struct node *parent, + struct child *l, struct child *c, struct child *r, + struct node *left, struct node *center, struct node *right, + uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) +{ + int s; + uint32_t max_entries = le32_to_cpu(left->header.max_entries); + unsigned target = (nr_left + nr_center + nr_right) / 3; + BUG_ON(target > max_entries); + + if (nr_left < nr_right) { + s = nr_left - target; + + if (s < 0 && nr_center < -s) { + /* not enough in central node */ + shift(left, center, nr_center); + s = nr_center - target; + shift(left, right, s); + nr_right += s; + } else + shift(left, center, s); + + shift(center, right, target - nr_right); + + } else { + s = target - nr_right; + if (s > 0 && nr_center < s) { + /* not enough in central node */ + shift(center, right, nr_center); + s = target - nr_center; + shift(left, right, s); + nr_left -= s; + } else + shift(center, right, s); + + shift(left, center, nr_left - target); + } + + *key_ptr(parent, c->index) = center->keys[0]; + *key_ptr(parent, r->index) = right->keys[0]; +} + static void __rebalance3(struct dm_btree_info *info, struct node *parent, struct child *l, struct child *c, struct child *r) { @@ -282,62 +353,18 @@ static void __rebalance3(struct dm_btree_info *info, struct node *parent, uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_center = le32_to_cpu(center->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); - uint32_t max_entries = le32_to_cpu(left->header.max_entries); - unsigned target; + unsigned threshold = merge_threshold(left) * 4 + 1; BUG_ON(left->header.max_entries != center->header.max_entries); BUG_ON(center->header.max_entries != right->header.max_entries); - if (((nr_left + nr_center + nr_right) / 2) < merge_threshold(center)) { - /* - * Delete center node: - * - * We dump as many entries from center as possible into - * left, then the rest in right, then rebalance2. This - * wastes some cpu, but I want something simple atm. - */ - unsigned shift = min(max_entries - nr_left, nr_center); - - BUG_ON(nr_left + shift > max_entries); - node_copy(left, center, -shift); - left->header.nr_entries = cpu_to_le32(nr_left + shift); - - if (shift != nr_center) { - shift = nr_center - shift; - BUG_ON((nr_right + shift) >= max_entries); - node_shift(right, shift); - node_copy(center, right, shift); - right->header.nr_entries = cpu_to_le32(nr_right + shift); - } - *key_ptr(parent, r->index) = right->keys[0]; - - delete_at(parent, c->index); - r->index--; - - dm_tm_dec(info->tm, dm_block_location(c->block)); - __rebalance2(info, parent, l, r); - - return; - } - - /* - * Rebalance - */ - target = (nr_left + nr_center + nr_right) / 3; - BUG_ON(target > max_entries); - - /* - * Adjust the left node - */ - shift(left, center, nr_left - target); - - /* - * Adjust the right node - */ - shift(center, right, target - nr_right); - *key_ptr(parent, c->index) = center->keys[0]; - *key_ptr(parent, r->index) = right->keys[0]; + if ((nr_left + nr_center + nr_right) < threshold) + delete_center_node(info, parent, l, c, r, left, center, right, + nr_left, nr_center, nr_right); + else + redistribute3(info, parent, l, c, r, left, center, right, + nr_left, nr_center, nr_right); } static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, @@ -441,9 +468,6 @@ static int rebalance_children(struct shadow_spine *s, if (r) return r; - if (child_entries > del_threshold(n)) - return 0; - has_left_sibling = i > 0; has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7d9e071f2304..7af60ec98c60 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -731,9 +731,22 @@ static void wait_barrier(struct r1conf *conf) spin_lock_irq(&conf->resync_lock); if (conf->barrier) { conf->nr_waiting++; - wait_event_lock_irq(conf->wait_barrier, !conf->barrier, + /* Wait for the barrier to drop. + * However if there are already pending + * requests (preventing the barrier from + * rising completely), and the + * pre-process bio queue isn't empty, + * then don't wait, as we need to empty + * that queue to get the nr_pending + * count down. + */ + wait_event_lock_irq(conf->wait_barrier, + !conf->barrier || + (conf->nr_pending && + current->bio_list && + !bio_list_empty(current->bio_list)), conf->resync_lock, - ); + ); conf->nr_waiting--; } conf->nr_pending++; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 685ddf325ee4..b2194494f5f3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -790,9 +790,22 @@ static void wait_barrier(struct r10conf *conf) spin_lock_irq(&conf->resync_lock); if (conf->barrier) { conf->nr_waiting++; - wait_event_lock_irq(conf->wait_barrier, !conf->barrier, + /* Wait for the barrier to drop. + * However if there are already pending + * requests (preventing the barrier from + * rising completely), and the + * pre-process bio queue isn't empty, + * then don't wait, as we need to empty + * that queue to get the nr_pending + * count down. + */ + wait_event_lock_irq(conf->wait_barrier, + !conf->barrier || + (conf->nr_pending && + current->bio_list && + !bio_list_empty(current->bio_list)), conf->resync_lock, - ); + ); conf->nr_waiting--; } conf->nr_pending++; |