summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorSachin Nikam <snikam@nvidia.com>2013-03-06 19:12:15 +0530
committerSachin Nikam <snikam@nvidia.com>2013-03-06 19:31:07 +0530
commitd3dd6b4227fe3e59df0a55f679ec196e4ccb1e3a (patch)
tree0137a07ecab729a19cea68b671c88def5bf83a2d /drivers/md
parentbb36790494c382ec26250f351c8cea76613f0f02 (diff)
parent2713e2797a78a0fdda765bc5ad7fed41e94818ba (diff)
Merge branch 'linux-3.4.35' into rel-17
Bug 1243631 Change-Id: I915826047b2e20f0ad0a7d75df295c6cbf6e5b0a
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-ioctl.c8
-rw-r--r--drivers/md/dm-table.c26
-rw-r--r--drivers/md/dm-verity.c8
-rw-r--r--drivers/md/dm.c64
-rw-r--r--drivers/md/md.c26
-rw-r--r--drivers/md/persistent-data/dm-btree-internal.h16
-rw-r--r--drivers/md/persistent-data/dm-btree-remove.c50
-rw-r--r--drivers/md/persistent-data/dm-btree-spine.c6
-rw-r--r--drivers/md/persistent-data/dm-btree.c22
-rw-r--r--drivers/md/raid1.c2
-rw-r--r--drivers/md/raid10.c120
-rw-r--r--drivers/md/raid10.h2
-rw-r--r--drivers/md/raid5.c4
13 files changed, 214 insertions, 140 deletions
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index a1a3e6df17b8..f011d4b3c139 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1563,6 +1563,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
if (copy_from_user(dmi, user, tmp.data_size))
goto bad;
+ /*
+ * Abort if something changed the ioctl data while it was being copied.
+ */
+ if (dmi->data_size != tmp.data_size) {
+ DMERR("rejecting ioctl: data size modified while processing parameters");
+ goto bad;
+ }
+
/* Wipe the user buffer so we do not return it to userspace */
if (secure_data && clear_user(user, tmp.data_size))
goto bad;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 2e227fbf1622..f220a695a4b1 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1351,17 +1351,25 @@ static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
return q && blk_queue_nonrot(q);
}
-static bool dm_table_is_nonrot(struct dm_table *t)
+static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct request_queue *q = bdev_get_queue(dev->bdev);
+
+ return q && !blk_queue_add_random(q);
+}
+
+static bool dm_table_all_devices_attribute(struct dm_table *t,
+ iterate_devices_callout_fn func)
{
struct dm_target *ti;
unsigned i = 0;
- /* Ensure that all underlying device are non-rotational. */
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
if (!ti->type->iterate_devices ||
- !ti->type->iterate_devices(ti, device_is_nonrot, NULL))
+ !ti->type->iterate_devices(ti, func, NULL))
return 0;
}
@@ -1393,7 +1401,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (!dm_table_discard_zeroes_data(t))
q->limits.discard_zeroes_data = 0;
- if (dm_table_is_nonrot(t))
+ /* Ensure that all underlying devices are non-rotational. */
+ if (dm_table_all_devices_attribute(t, device_is_nonrot))
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
else
queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
@@ -1401,6 +1410,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
dm_table_set_integrity(t);
/*
+ * Determine whether or not this queue's I/O timings contribute
+ * to the entropy pool, Only request-based targets use this.
+ * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
+ * have it set.
+ */
+ if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
+ queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+
+ /*
* QUEUE_FLAG_STACKABLE must be set after all queue settings are
* visible to other CPUs because, once the flag is set, incoming bios
* are processed by request-based dm, which refers to the queue
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index fa365d39b612..68bf5c37c3fe 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -718,8 +718,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
v->hash_dev_block_bits = ffs(num) - 1;
if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
- num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) !=
- (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) {
+ (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
+ >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
ti->error = "Invalid data blocks";
r = -EINVAL;
goto bad;
@@ -733,8 +733,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
- num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) !=
- (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) {
+ (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
+ >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
ti->error = "Invalid hash start";
r = -EINVAL;
goto bad;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e24143cc2040..32370ea32e22 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -754,8 +754,14 @@ static void rq_completed(struct mapped_device *md, int rw, int run_queue)
if (!md_in_flight(md))
wake_up(&md->wait);
+ /*
+ * Run this off this callpath, as drivers could invoke end_io while
+ * inside their request_fn (and holding the queue lock). Calling
+ * back into ->request_fn() could deadlock attempting to grab the
+ * queue lock again.
+ */
if (run_queue)
- blk_run_queue(md->queue);
+ blk_run_queue_async(md->queue);
/*
* dm_put() must be at the end of this function. See the comment above
@@ -865,10 +871,14 @@ static void dm_done(struct request *clone, int error, bool mapped)
{
int r = error;
struct dm_rq_target_io *tio = clone->end_io_data;
- dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
+ dm_request_endio_fn rq_end_io = NULL;
- if (mapped && rq_end_io)
- r = rq_end_io(tio->ti, clone, error, &tio->info);
+ if (tio->ti) {
+ rq_end_io = tio->ti->type->rq_end_io;
+
+ if (mapped && rq_end_io)
+ r = rq_end_io(tio->ti, clone, error, &tio->info);
+ }
if (r <= 0)
/* The target wants to complete the I/O */
@@ -1566,15 +1576,6 @@ static int map_request(struct dm_target *ti, struct request *clone,
int r, requeued = 0;
struct dm_rq_target_io *tio = clone->end_io_data;
- /*
- * Hold the md reference here for the in-flight I/O.
- * We can't rely on the reference count by device opener,
- * because the device may be closed during the request completion
- * when all bios are completed.
- * See the comment in rq_completed() too.
- */
- dm_get(md);
-
tio->ti = ti;
r = ti->type->map_rq(ti, clone, &tio->info);
switch (r) {
@@ -1606,6 +1607,26 @@ static int map_request(struct dm_target *ti, struct request *clone,
return requeued;
}
+static struct request *dm_start_request(struct mapped_device *md, struct request *orig)
+{
+ struct request *clone;
+
+ blk_start_request(orig);
+ clone = orig->special;
+ atomic_inc(&md->pending[rq_data_dir(clone)]);
+
+ /*
+ * Hold the md reference here for the in-flight I/O.
+ * We can't rely on the reference count by device opener,
+ * because the device may be closed during the request completion
+ * when all bios are completed.
+ * See the comment in rq_completed() too.
+ */
+ dm_get(md);
+
+ return clone;
+}
+
/*
* q->request_fn for request-based dm.
* Called with the queue lock held.
@@ -1635,14 +1656,21 @@ static void dm_request_fn(struct request_queue *q)
pos = blk_rq_pos(rq);
ti = dm_table_find_target(map, pos);
- BUG_ON(!dm_target_is_valid(ti));
+ if (!dm_target_is_valid(ti)) {
+ /*
+ * Must perform setup, that dm_done() requires,
+ * before calling dm_kill_unmapped_request
+ */
+ DMERR_LIMIT("request attempted access beyond the end of device");
+ clone = dm_start_request(md, rq);
+ dm_kill_unmapped_request(clone, -EIO);
+ continue;
+ }
if (ti->type->busy && ti->type->busy(ti))
goto delay_and_out;
- blk_start_request(rq);
- clone = rq->special;
- atomic_inc(&md->pending[rq_data_dir(clone)]);
+ clone = dm_start_request(md, rq);
spin_unlock(q->queue_lock);
if (map_request(ti, clone, md))
@@ -1662,8 +1690,6 @@ delay_and_out:
blk_delay_queue(q, HZ / 10);
out:
dm_table_put(map);
-
- return;
}
int dm_underlying_device_busy(struct request_queue *q)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9ee8ce3e9650..0a447a1be2ca 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1143,8 +1143,11 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
ret = 0;
}
rdev->sectors = rdev->sb_start;
- /* Limit to 4TB as metadata cannot record more than that */
- if (rdev->sectors >= (2ULL << 32))
+ /* Limit to 4TB as metadata cannot record more than that.
+ * (not needed for Linear and RAID0 as metadata doesn't
+ * record this size)
+ */
+ if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
rdev->sectors = (2ULL << 32) - 2;
if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
@@ -1426,7 +1429,7 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
/* Limit to 4TB as metadata cannot record more than that.
* 4TB == 2^32 KB, or 2*2^32 sectors.
*/
- if (num_sectors >= (2ULL << 32))
+ if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
num_sectors = (2ULL << 32) - 2;
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
rdev->sb_page);
@@ -1802,10 +1805,10 @@ retry:
memset(bbp, 0xff, PAGE_SIZE);
for (i = 0 ; i < bb->count ; i++) {
- u64 internal_bb = *p++;
+ u64 internal_bb = p[i];
u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
| BB_LEN(internal_bb));
- *bbp++ = cpu_to_le64(store_bb);
+ bbp[i] = cpu_to_le64(store_bb);
}
bb->changed = 0;
if (read_seqretry(&bb->lock, seq))
@@ -7417,6 +7420,8 @@ static int remove_and_add_spares(struct mddev *mddev)
}
}
}
+ if (removed)
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
return spares;
}
@@ -7430,9 +7435,11 @@ static void reap_sync_thread(struct mddev *mddev)
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* success...*/
/* activate any spares */
- if (mddev->pers->spare_active(mddev))
+ if (mddev->pers->spare_active(mddev)) {
sysfs_notify(&mddev->kobj, NULL,
"degraded");
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ }
}
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
mddev->pers->finish_reshape)
@@ -7687,9 +7694,9 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
sector_t *first_bad, int *bad_sectors)
{
int hi;
- int lo = 0;
+ int lo;
u64 *p = bb->page;
- int rv = 0;
+ int rv;
sector_t target = s + sectors;
unsigned seq;
@@ -7704,7 +7711,8 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
retry:
seq = read_seqbegin(&bb->lock);
-
+ lo = 0;
+ rv = 0;
hi = bb->count;
/* Binary search between lo and hi for 'target'
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
index 5709bfeab1e8..accbb05f17b6 100644
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -36,13 +36,13 @@ struct node_header {
__le32 padding;
} __packed;
-struct node {
+struct btree_node {
struct node_header header;
__le64 keys[0];
} __packed;
-void inc_children(struct dm_transaction_manager *tm, struct node *n,
+void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
struct dm_btree_value_type *vt);
int new_block(struct dm_btree_info *info, struct dm_block **result);
@@ -64,7 +64,7 @@ struct ro_spine {
void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
int exit_ro_spine(struct ro_spine *s);
int ro_step(struct ro_spine *s, dm_block_t new_child);
-struct node *ro_node(struct ro_spine *s);
+struct btree_node *ro_node(struct ro_spine *s);
struct shadow_spine {
struct dm_btree_info *info;
@@ -98,17 +98,17 @@ int shadow_root(struct shadow_spine *s);
/*
* Some inlines.
*/
-static inline __le64 *key_ptr(struct node *n, uint32_t index)
+static inline __le64 *key_ptr(struct btree_node *n, uint32_t index)
{
return n->keys + index;
}
-static inline void *value_base(struct node *n)
+static inline void *value_base(struct btree_node *n)
{
return &n->keys[le32_to_cpu(n->header.max_entries)];
}
-static inline void *value_ptr(struct node *n, uint32_t index)
+static inline void *value_ptr(struct btree_node *n, uint32_t index)
{
uint32_t value_size = le32_to_cpu(n->header.value_size);
return value_base(n) + (value_size * index);
@@ -117,7 +117,7 @@ static inline void *value_ptr(struct node *n, uint32_t index)
/*
* Assumes the values are suitably-aligned and converts to core format.
*/
-static inline uint64_t value64(struct node *n, uint32_t index)
+static inline uint64_t value64(struct btree_node *n, uint32_t index)
{
__le64 *values_le = value_base(n);
@@ -127,7 +127,7 @@ static inline uint64_t value64(struct node *n, uint32_t index)
/*
* Searching for a key within a single node.
*/
-int lower_bound(struct node *n, uint64_t key);
+int lower_bound(struct btree_node *n, uint64_t key);
extern struct dm_block_validator btree_node_validator;
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index aa71e2359a07..c4f28133ef82 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -53,7 +53,7 @@
/*
* Some little utilities for moving node data around.
*/
-static void node_shift(struct node *n, int shift)
+static void node_shift(struct btree_node *n, int shift)
{
uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
uint32_t value_size = le32_to_cpu(n->header.value_size);
@@ -79,7 +79,7 @@ static void node_shift(struct node *n, int shift)
}
}
-static void node_copy(struct node *left, struct node *right, int shift)
+static void node_copy(struct btree_node *left, struct btree_node *right, int shift)
{
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t value_size = le32_to_cpu(left->header.value_size);
@@ -108,7 +108,7 @@ static void node_copy(struct node *left, struct node *right, int shift)
/*
* Delete a specific entry from a leaf node.
*/
-static void delete_at(struct node *n, unsigned index)
+static void delete_at(struct btree_node *n, unsigned index)
{
unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
unsigned nr_to_copy = nr_entries - (index + 1);
@@ -128,7 +128,7 @@ static void delete_at(struct node *n, unsigned index)
n->header.nr_entries = cpu_to_le32(nr_entries - 1);
}
-static unsigned merge_threshold(struct node *n)
+static unsigned merge_threshold(struct btree_node *n)
{
return le32_to_cpu(n->header.max_entries) / 3;
}
@@ -136,7 +136,7 @@ static unsigned merge_threshold(struct node *n)
struct child {
unsigned index;
struct dm_block *block;
- struct node *n;
+ struct btree_node *n;
};
static struct dm_btree_value_type le64_type = {
@@ -147,7 +147,7 @@ static struct dm_btree_value_type le64_type = {
.equal = NULL
};
-static int init_child(struct dm_btree_info *info, struct node *parent,
+static int init_child(struct dm_btree_info *info, struct btree_node *parent,
unsigned index, struct child *result)
{
int r, inc;
@@ -177,7 +177,7 @@ static int exit_child(struct dm_btree_info *info, struct child *c)
return dm_tm_unlock(info->tm, c->block);
}
-static void shift(struct node *left, struct node *right, int count)
+static void shift(struct btree_node *left, struct btree_node *right, int count)
{
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
@@ -203,11 +203,11 @@ static void shift(struct node *left, struct node *right, int count)
right->header.nr_entries = cpu_to_le32(nr_right + count);
}
-static void __rebalance2(struct dm_btree_info *info, struct node *parent,
+static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
struct child *l, struct child *r)
{
- struct node *left = l->n;
- struct node *right = r->n;
+ struct btree_node *left = l->n;
+ struct btree_node *right = r->n;
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
unsigned threshold = 2 * merge_threshold(left) + 1;
@@ -239,7 +239,7 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
unsigned left_index)
{
int r;
- struct node *parent;
+ struct btree_node *parent;
struct child left, right;
parent = dm_block_data(shadow_current(s));
@@ -270,9 +270,9 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
* in right, then rebalance2. This wastes some cpu, but I want something
* simple atm.
*/
-static void delete_center_node(struct dm_btree_info *info, struct node *parent,
+static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent,
struct child *l, struct child *c, struct child *r,
- struct node *left, struct node *center, struct node *right,
+ struct btree_node *left, struct btree_node *center, struct btree_node *right,
uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
{
uint32_t max_entries = le32_to_cpu(left->header.max_entries);
@@ -301,9 +301,9 @@ static void delete_center_node(struct dm_btree_info *info, struct node *parent,
/*
* Redistributes entries among 3 sibling nodes.
*/
-static void redistribute3(struct dm_btree_info *info, struct node *parent,
+static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
struct child *l, struct child *c, struct child *r,
- struct node *left, struct node *center, struct node *right,
+ struct btree_node *left, struct btree_node *center, struct btree_node *right,
uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
{
int s;
@@ -343,12 +343,12 @@ static void redistribute3(struct dm_btree_info *info, struct node *parent,
*key_ptr(parent, r->index) = right->keys[0];
}
-static void __rebalance3(struct dm_btree_info *info, struct node *parent,
+static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
struct child *l, struct child *c, struct child *r)
{
- struct node *left = l->n;
- struct node *center = c->n;
- struct node *right = r->n;
+ struct btree_node *left = l->n;
+ struct btree_node *center = c->n;
+ struct btree_node *right = r->n;
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
@@ -371,7 +371,7 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
unsigned left_index)
{
int r;
- struct node *parent = dm_block_data(shadow_current(s));
+ struct btree_node *parent = dm_block_data(shadow_current(s));
struct child left, center, right;
/*
@@ -421,7 +421,7 @@ static int get_nr_entries(struct dm_transaction_manager *tm,
{
int r;
struct dm_block *block;
- struct node *n;
+ struct btree_node *n;
r = dm_tm_read_lock(tm, b, &btree_node_validator, &block);
if (r)
@@ -438,7 +438,7 @@ static int rebalance_children(struct shadow_spine *s,
{
int i, r, has_left_sibling, has_right_sibling;
uint32_t child_entries;
- struct node *n;
+ struct btree_node *n;
n = dm_block_data(shadow_current(s));
@@ -483,7 +483,7 @@ static int rebalance_children(struct shadow_spine *s,
return r;
}
-static int do_leaf(struct node *n, uint64_t key, unsigned *index)
+static int do_leaf(struct btree_node *n, uint64_t key, unsigned *index)
{
int i = lower_bound(n, key);
@@ -506,7 +506,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
uint64_t key, unsigned *index)
{
int i = *index, r;
- struct node *n;
+ struct btree_node *n;
for (;;) {
r = shadow_step(s, root, vt);
@@ -556,7 +556,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
unsigned level, last_level = info->levels - 1;
int index = 0, r = 0;
struct shadow_spine spine;
- struct node *n;
+ struct btree_node *n;
init_shadow_spine(&spine, info);
for (level = 0; level < info->levels; level++) {
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
index d9a7912ee8ee..2f0805c3263e 100644
--- a/drivers/md/persistent-data/dm-btree-spine.c
+++ b/drivers/md/persistent-data/dm-btree-spine.c
@@ -23,7 +23,7 @@ static void node_prepare_for_write(struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
- struct node *n = dm_block_data(b);
+ struct btree_node *n = dm_block_data(b);
struct node_header *h = &n->header;
h->blocknr = cpu_to_le64(dm_block_location(b));
@@ -38,7 +38,7 @@ static int node_check(struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
{
- struct node *n = dm_block_data(b);
+ struct btree_node *n = dm_block_data(b);
struct node_header *h = &n->header;
size_t value_size;
__le32 csum_disk;
@@ -164,7 +164,7 @@ int ro_step(struct ro_spine *s, dm_block_t new_child)
return r;
}
-struct node *ro_node(struct ro_spine *s)
+struct btree_node *ro_node(struct ro_spine *s)
{
struct dm_block *block;
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index d12b2cc51f1a..371f3d49d18e 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -38,7 +38,7 @@ static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
/*----------------------------------------------------------------*/
/* makes the assumption that no two keys are the same. */
-static int bsearch(struct node *n, uint64_t key, int want_hi)
+static int bsearch(struct btree_node *n, uint64_t key, int want_hi)
{
int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
@@ -58,12 +58,12 @@ static int bsearch(struct node *n, uint64_t key, int want_hi)
return want_hi ? hi : lo;
}
-int lower_bound(struct node *n, uint64_t key)
+int lower_bound(struct btree_node *n, uint64_t key)
{
return bsearch(n, key, 0);
}
-void inc_children(struct dm_transaction_manager *tm, struct node *n,
+void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
struct dm_btree_value_type *vt)
{
unsigned i;
@@ -77,7 +77,7 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n,
vt->inc(vt->context, value_ptr(n, i));
}
-static int insert_at(size_t value_size, struct node *node, unsigned index,
+static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
uint64_t key, void *value)
__dm_written_to_disk(value)
{
@@ -122,7 +122,7 @@ int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
{
int r;
struct dm_block *b;
- struct node *n;
+ struct btree_node *n;
size_t block_size;
uint32_t max_entries;
@@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(dm_btree_empty);
#define MAX_SPINE_DEPTH 64
struct frame {
struct dm_block *b;
- struct node *n;
+ struct btree_node *n;
unsigned level;
unsigned nr_children;
unsigned current_child;
@@ -295,7 +295,7 @@ EXPORT_SYMBOL_GPL(dm_btree_del);
/*----------------------------------------------------------------*/
static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
- int (*search_fn)(struct node *, uint64_t),
+ int (*search_fn)(struct btree_node *, uint64_t),
uint64_t *result_key, void *v, size_t value_size)
{
int i, r;
@@ -406,7 +406,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
size_t size;
unsigned nr_left, nr_right;
struct dm_block *left, *right, *parent;
- struct node *ln, *rn, *pn;
+ struct btree_node *ln, *rn, *pn;
__le64 location;
left = shadow_current(s);
@@ -491,7 +491,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
size_t size;
unsigned nr_left, nr_right;
struct dm_block *left, *right, *new_parent;
- struct node *pn, *ln, *rn;
+ struct btree_node *pn, *ln, *rn;
__le64 val;
new_parent = shadow_current(s);
@@ -576,7 +576,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
uint64_t key, unsigned *index)
{
int r, i = *index, top = 1;
- struct node *node;
+ struct btree_node *node;
for (;;) {
r = shadow_step(s, root, vt);
@@ -643,7 +643,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
unsigned level, index = -1, last_level = info->levels - 1;
dm_block_t block = root;
struct shadow_spine spine;
- struct node *n;
+ struct btree_node *n;
struct dm_btree_value_type le64_type;
le64_type.context = NULL;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 23904d233735..df445091384a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2564,7 +2564,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|| disk_idx < 0)
continue;
if (test_bit(Replacement, &rdev->flags))
- disk = conf->mirrors + conf->raid_disks + disk_idx;
+ disk = conf->mirrors + mddev->raid_disks + disk_idx;
else
disk = conf->mirrors + disk_idx;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a954c95d7c92..6137d0041d4d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -476,7 +476,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
*/
one_write_done(r10_bio);
if (dec_rdev)
- rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
+ rdev_dec_pending(rdev, conf->mddev);
}
/*
@@ -612,20 +612,24 @@ static int raid10_mergeable_bvec(struct request_queue *q,
max = biovec->bv_len;
if (mddev->merge_check_needed) {
- struct r10bio r10_bio;
+ struct {
+ struct r10bio r10_bio;
+ struct r10dev devs[conf->copies];
+ } on_stack;
+ struct r10bio *r10_bio = &on_stack.r10_bio;
int s;
- r10_bio.sector = sector;
- raid10_find_phys(conf, &r10_bio);
+ r10_bio->sector = sector;
+ raid10_find_phys(conf, r10_bio);
rcu_read_lock();
for (s = 0; s < conf->copies; s++) {
- int disk = r10_bio.devs[s].devnum;
+ int disk = r10_bio->devs[s].devnum;
struct md_rdev *rdev = rcu_dereference(
conf->mirrors[disk].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) {
struct request_queue *q =
bdev_get_queue(rdev->bdev);
if (q->merge_bvec_fn) {
- bvm->bi_sector = r10_bio.devs[s].addr
+ bvm->bi_sector = r10_bio->devs[s].addr
+ rdev->data_offset;
bvm->bi_bdev = rdev->bdev;
max = min(max, q->merge_bvec_fn(
@@ -637,7 +641,7 @@ static int raid10_mergeable_bvec(struct request_queue *q,
struct request_queue *q =
bdev_get_queue(rdev->bdev);
if (q->merge_bvec_fn) {
- bvm->bi_sector = r10_bio.devs[s].addr
+ bvm->bi_sector = r10_bio->devs[s].addr
+ rdev->data_offset;
bvm->bi_bdev = rdev->bdev;
max = min(max, q->merge_bvec_fn(
@@ -1178,18 +1182,21 @@ retry_write:
blocked_rdev = rrdev;
break;
}
+ if (rdev && (test_bit(Faulty, &rdev->flags)
+ || test_bit(Unmerged, &rdev->flags)))
+ rdev = NULL;
if (rrdev && (test_bit(Faulty, &rrdev->flags)
|| test_bit(Unmerged, &rrdev->flags)))
rrdev = NULL;
r10_bio->devs[i].bio = NULL;
r10_bio->devs[i].repl_bio = NULL;
- if (!rdev || test_bit(Faulty, &rdev->flags) ||
- test_bit(Unmerged, &rdev->flags)) {
+
+ if (!rdev && !rrdev) {
set_bit(R10BIO_Degraded, &r10_bio->state);
continue;
}
- if (test_bit(WriteErrorSeen, &rdev->flags)) {
+ if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
sector_t dev_sector = r10_bio->devs[i].addr;
int bad_sectors;
@@ -1231,8 +1238,10 @@ retry_write:
max_sectors = good_sectors;
}
}
- r10_bio->devs[i].bio = bio;
- atomic_inc(&rdev->nr_pending);
+ if (rdev) {
+ r10_bio->devs[i].bio = bio;
+ atomic_inc(&rdev->nr_pending);
+ }
if (rrdev) {
r10_bio->devs[i].repl_bio = bio;
atomic_inc(&rrdev->nr_pending);
@@ -1288,51 +1297,52 @@ retry_write:
for (i = 0; i < conf->copies; i++) {
struct bio *mbio;
int d = r10_bio->devs[i].devnum;
- if (!r10_bio->devs[i].bio)
- continue;
+ if (r10_bio->devs[i].bio) {
+ struct md_rdev *rdev = conf->mirrors[d].rdev;
+ mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+ md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
+ max_sectors);
+ r10_bio->devs[i].bio = mbio;
+
+ mbio->bi_sector = (r10_bio->devs[i].addr+
+ rdev->data_offset);
+ mbio->bi_bdev = rdev->bdev;
+ mbio->bi_end_io = raid10_end_write_request;
+ mbio->bi_rw = WRITE | do_sync | do_fua;
+ mbio->bi_private = r10_bio;
- mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
- md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
- max_sectors);
- r10_bio->devs[i].bio = mbio;
-
- mbio->bi_sector = (r10_bio->devs[i].addr+
- conf->mirrors[d].rdev->data_offset);
- mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
- mbio->bi_end_io = raid10_end_write_request;
- mbio->bi_rw = WRITE | do_sync | do_fua;
- mbio->bi_private = r10_bio;
-
- atomic_inc(&r10_bio->remaining);
- spin_lock_irqsave(&conf->device_lock, flags);
- bio_list_add(&conf->pending_bio_list, mbio);
- conf->pending_count++;
- spin_unlock_irqrestore(&conf->device_lock, flags);
-
- if (!r10_bio->devs[i].repl_bio)
- continue;
-
- mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
- md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
- max_sectors);
- r10_bio->devs[i].repl_bio = mbio;
+ atomic_inc(&r10_bio->remaining);
+ spin_lock_irqsave(&conf->device_lock, flags);
+ bio_list_add(&conf->pending_bio_list, mbio);
+ conf->pending_count++;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ }
- /* We are actively writing to the original device
- * so it cannot disappear, so the replacement cannot
- * become NULL here
- */
- mbio->bi_sector = (r10_bio->devs[i].addr+
- conf->mirrors[d].replacement->data_offset);
- mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
- mbio->bi_end_io = raid10_end_write_request;
- mbio->bi_rw = WRITE | do_sync | do_fua;
- mbio->bi_private = r10_bio;
+ if (r10_bio->devs[i].repl_bio) {
+ struct md_rdev *rdev = conf->mirrors[d].replacement;
+ if (rdev == NULL) {
+ /* Replacement just got moved to main 'rdev' */
+ smp_mb();
+ rdev = conf->mirrors[d].rdev;
+ }
+ mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+ md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
+ max_sectors);
+ r10_bio->devs[i].repl_bio = mbio;
+
+ mbio->bi_sector = (r10_bio->devs[i].addr+
+ rdev->data_offset);
+ mbio->bi_bdev = rdev->bdev;
+ mbio->bi_end_io = raid10_end_write_request;
+ mbio->bi_rw = WRITE | do_sync | do_fua;
+ mbio->bi_private = r10_bio;
- atomic_inc(&r10_bio->remaining);
- spin_lock_irqsave(&conf->device_lock, flags);
- bio_list_add(&conf->pending_bio_list, mbio);
- conf->pending_count++;
- spin_unlock_irqrestore(&conf->device_lock, flags);
+ atomic_inc(&r10_bio->remaining);
+ spin_lock_irqsave(&conf->device_lock, flags);
+ bio_list_add(&conf->pending_bio_list, mbio);
+ conf->pending_count++;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ }
}
/* Don't remove the bias on 'remaining' (one_write_done) until
@@ -3015,7 +3025,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
else {
bad_sectors -= (sector - first_bad);
if (max_sync > bad_sectors)
- max_sync = max_sync;
+ max_sync = bad_sectors;
continue;
}
}
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 7c615613c381..24d45b8af5c9 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -104,7 +104,7 @@ struct r10bio {
* We choose the number when they are allocated.
* We sometimes need an extra bio to write to the replacement.
*/
- struct {
+ struct r10dev {
struct bio *bio;
union {
struct bio *repl_bio; /* used for resync and
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 73a58007eb99..0240576564dc 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -380,6 +380,8 @@ static int calc_degraded(struct r5conf *conf)
degraded = 0;
for (i = 0; i < conf->previous_raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
+ if (rdev && test_bit(Faulty, &rdev->flags))
+ rdev = rcu_dereference(conf->disks[i].replacement);
if (!rdev || test_bit(Faulty, &rdev->flags))
degraded++;
else if (test_bit(In_sync, &rdev->flags))
@@ -404,6 +406,8 @@ static int calc_degraded(struct r5conf *conf)
degraded2 = 0;
for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
+ if (rdev && test_bit(Faulty, &rdev->flags))
+ rdev = rcu_dereference(conf->disks[i].replacement);
if (!rdev || test_bit(Faulty, &rdev->flags))
degraded2++;
else if (test_bit(In_sync, &rdev->flags))