From ae03bf639a5027d27270123f5f6e3ee6a412781d Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:50 -0400 Subject: block: Use accessor functions for queue limits Convert all external users of queue limits to using wrapper functions instead of poking the request queue variables directly. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/md/linear.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/linear.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 7a36e38393a1..64f1f3e046e0 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -146,7 +146,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); disk->num_sectors = rdev->sectors; -- cgit v1.2.3 From 070ec55d07157a3041f92654135c3c6e2eaaf901 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:54:21 +1000 Subject: md: remove mddev_to_conf "helper" macro Having a macro just to cast a void* isn't really helpful. I would must rather see that we are simply de-referencing ->private, than have to know what the macro does. So open code the macro everywhere and remove the pointless cast. Signed-off-by: NeilBrown --- drivers/md/linear.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/md/linear.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 64f1f3e046e0..31f8ec7131bd 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -28,7 +28,7 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) { dev_info_t *hash; - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf = mddev->private; sector_t idx = sector >> conf->sector_shift; /* @@ -79,7 +79,7 @@ static int linear_mergeable_bvec(struct request_queue *q, static void linear_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf = mddev->private; int i; for (i=0; i < mddev->raid_disks; i++) { @@ -91,7 +91,7 @@ static void linear_unplug(struct request_queue *q) static int linear_congested(void *data, int bits) { mddev_t *mddev = data; - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf = mddev->private; int i, ret = 0; for (i = 0; i < mddev->raid_disks && !ret ; i++) { @@ -103,7 +103,7 @@ static int linear_congested(void *data, int bits) static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks) { - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf = mddev->private; WARN_ONCE(sectors || raid_disks, "%s does not support generic reshape\n", __func__); @@ -294,7 +294,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) if (!newconf) return -ENOMEM; - newconf->prev = mddev_to_conf(mddev); + newconf->prev = mddev->private; mddev->private = newconf; mddev->raid_disks++; md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); @@ -304,7 +304,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) static int linear_stop (mddev_t *mddev) { - linear_conf_t *conf = mddev_to_conf(mddev); + linear_conf_t *conf = mddev->private; blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ do { -- cgit v1.2.3 From 45d4582f219619e368ea91ea1189085e1c5f1969 Mon Sep 17 00:00:00 2001 From: Sandeep K Sinha Date: Tue, 16 Jun 2009 16:55:26 +1000 Subject: md: Removal of hash table in linear raid Get rid of sector_div and hash table for linear raid and replace with a linear search in which_dev. The hash table adds a lot of complexity for little if any gain. Ultimately a binary search will be used which will have smaller cache foot print, a similar number of memory access, and no divisions. Signed-off-by: Sandeep K Sinha Signed-off-by: NeilBrown --- drivers/md/linear.c | 93 ++--------------------------------------------------- 1 file changed, 3 insertions(+), 90 deletions(-) (limited to 'drivers/md/linear.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 31f8ec7131bd..92bcd3dd52cc 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -29,13 +29,8 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) { dev_info_t *hash; linear_conf_t *conf = mddev->private; - sector_t idx = sector >> conf->sector_shift; - /* - * sector_div(a,b) returns the remainer and sets a to a/b - */ - (void)sector_div(idx, conf->spacing); - hash = conf->hash_table[idx]; + hash = conf->disks; while (sector >= hash->num_sectors + hash->start_sector) hash++; @@ -114,11 +109,8 @@ static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks) static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) { linear_conf_t *conf; - dev_info_t **table; mdk_rdev_t *rdev; - int i, nb_zone, cnt; - sector_t min_sectors; - sector_t curr_sector; + int i, cnt; conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t), GFP_KERNEL); @@ -159,63 +151,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) goto out; } - min_sectors = conf->array_sectors; - sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *)); - if (min_sectors == 0) - min_sectors = 1; - - /* min_sectors is the minimum spacing that will fit the hash - * table in one PAGE. This may be much smaller than needed. - * We find the smallest non-terminal set of consecutive devices - * that is larger than min_sectors and use the size of that as - * the actual spacing - */ - conf->spacing = conf->array_sectors; - for (i=0; i < cnt-1 ; i++) { - sector_t tmp = 0; - int j; - for (j = i; j < cnt - 1 && tmp < min_sectors; j++) - tmp += conf->disks[j].num_sectors; - if (tmp >= min_sectors && tmp < conf->spacing) - conf->spacing = tmp; - } - - /* spacing may be too large for sector_div to work with, - * so we might need to pre-shift - */ - conf->sector_shift = 0; - if (sizeof(sector_t) > sizeof(u32)) { - sector_t space = conf->spacing; - while (space > (sector_t)(~(u32)0)) { - space >>= 1; - conf->sector_shift++; - } - } /* - * This code was restructured to work around a gcc-2.95.3 internal - * compiler error. Alter it with care. - */ - { - sector_t sz; - unsigned round; - unsigned long base; - - sz = conf->array_sectors >> conf->sector_shift; - sz += 1; /* force round-up */ - base = conf->spacing >> conf->sector_shift; - round = sector_div(sz, base); - nb_zone = sz + (round ? 1 : 0); - } - BUG_ON(nb_zone > PAGE_SIZE / sizeof(struct dev_info *)); - - conf->hash_table = kmalloc (sizeof (struct dev_info *) * nb_zone, - GFP_KERNEL); - if (!conf->hash_table) - goto out; - - /* - * Here we generate the linear hash table - * First calculate the device offsets. + * Here we calculate the device offsets. */ conf->disks[0].start_sector = 0; for (i = 1; i < raid_disks; i++) @@ -223,29 +160,6 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) conf->disks[i-1].start_sector + conf->disks[i-1].num_sectors; - table = conf->hash_table; - i = 0; - for (curr_sector = 0; - curr_sector < conf->array_sectors; - curr_sector += conf->spacing) { - - while (i < raid_disks-1 && - curr_sector >= conf->disks[i+1].start_sector) - i++; - - *table ++ = conf->disks + i; - } - - if (conf->sector_shift) { - conf->spacing >>= conf->sector_shift; - /* round spacing up so that when we divide by it, - * we err on the side of "too-low", which is safest. - */ - conf->spacing++; - } - - BUG_ON(table - conf->hash_table > nb_zone); - return conf; out: @@ -309,7 +223,6 @@ static int linear_stop (mddev_t *mddev) blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ do { linear_conf_t *t = conf->prev; - kfree(conf->hash_table); kfree(conf); conf = t; } while (conf); -- cgit v1.2.3 From 4db7cdc859f56ecf0a186e0cfb238b5bb3af2efb Mon Sep 17 00:00:00 2001 From: Sandeep K Sinha Date: Tue, 16 Jun 2009 16:56:13 +1000 Subject: md: Removing num_sector and replacing start_sector with end_sector Remove num_sectors from dev_info and replace start_sector with end_sector. This makes a lot of comparisons much simpler. Signed-off-by: Sandeep K Sinha Signed-off-by: NeilBrown --- drivers/md/linear.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'drivers/md/linear.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 92bcd3dd52cc..529a3d37e3fe 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -32,7 +32,7 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) hash = conf->disks; - while (sector >= hash->num_sectors + hash->start_sector) + while (sector >= hash->end_sector) hash++; return hash; } @@ -55,7 +55,7 @@ static int linear_mergeable_bvec(struct request_queue *q, sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); dev0 = which_dev(mddev, sector); - maxsectors = dev0->num_sectors - (sector - dev0->start_sector); + maxsectors = dev0->end_sector - sector; if (maxsectors < bio_sectors) maxsectors = 0; @@ -141,10 +141,9 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); - disk->num_sectors = rdev->sectors; conf->array_sectors += rdev->sectors; - cnt++; + } if (cnt != raid_disks) { printk("linear: not enough drives present. Aborting!\n"); @@ -154,11 +153,12 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) /* * Here we calculate the device offsets. */ - conf->disks[0].start_sector = 0; + conf->disks[0].end_sector = conf->disks[0].rdev->sectors; + for (i = 1; i < raid_disks; i++) - conf->disks[i].start_sector = - conf->disks[i-1].start_sector + - conf->disks[i-1].num_sectors; + conf->disks[i].end_sector = + conf->disks[i-1].end_sector + + conf->disks[i].rdev->sectors; return conf; @@ -235,6 +235,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) const int rw = bio_data_dir(bio); mddev_t *mddev = q->queuedata; dev_info_t *tmp_dev; + sector_t start_sector; int cpu; if (unlikely(bio_barrier(bio))) { @@ -249,32 +250,30 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) part_stat_unlock(); tmp_dev = which_dev(mddev, bio->bi_sector); - - if (unlikely(bio->bi_sector >= (tmp_dev->num_sectors + - tmp_dev->start_sector) - || (bio->bi_sector < - tmp_dev->start_sector))) { + start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; + + if (unlikely(bio->bi_sector >= (tmp_dev->end_sector) + || (bio->bi_sector < start_sector))) { char b[BDEVNAME_SIZE]; printk("linear_make_request: Sector %llu out of bounds on " "dev %s: %llu sectors, offset %llu\n", (unsigned long long)bio->bi_sector, bdevname(tmp_dev->rdev->bdev, b), - (unsigned long long)tmp_dev->num_sectors, - (unsigned long long)tmp_dev->start_sector); + (unsigned long long)tmp_dev->rdev->sectors, + (unsigned long long)start_sector); bio_io_error(bio); return 0; } if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > - tmp_dev->start_sector + tmp_dev->num_sectors)) { + tmp_dev->end_sector)) { /* This bio crosses a device boundary, so we have to * split it. */ struct bio_pair *bp; bp = bio_split(bio, - tmp_dev->start_sector + tmp_dev->num_sectors - - bio->bi_sector); + tmp_dev->end_sector - bio->bi_sector); if (linear_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); @@ -285,7 +284,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) } bio->bi_bdev = tmp_dev->rdev->bdev; - bio->bi_sector = bio->bi_sector - tmp_dev->start_sector + bio->bi_sector = bio->bi_sector - start_sector + tmp_dev->rdev->data_offset; return 1; -- cgit v1.2.3 From aece3d1f40879759f641dfbfdbb9e2593adeb43c Mon Sep 17 00:00:00 2001 From: Sandeep K Sinha Date: Tue, 16 Jun 2009 16:57:08 +1000 Subject: md: Binary search in linear raid Replace the linear search with binary search in which_dev. Signed-off-by: Sandeep K Sinha Signed-off-by: NeilBrown --- drivers/md/linear.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'drivers/md/linear.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 529a3d37e3fe..9b02a73fbc6b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -27,14 +27,26 @@ */ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) { - dev_info_t *hash; + int lo, mid, hi; linear_conf_t *conf = mddev->private; - hash = conf->disks; + lo = 0; + hi = mddev->raid_disks - 1; - while (sector >= hash->end_sector) - hash++; - return hash; + /* + * Binary Search + */ + + while (hi > lo) { + + mid = (hi + lo) / 2; + if (sector < conf->disks[mid].end_sector) + hi = mid; + else + lo = mid + 1; + } + + return conf->disks + lo; } /** -- cgit v1.2.3 From 9d8f0363623b3da12c43007cf77f5e1a4e8a5964 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:45:01 +1000 Subject: md: Make mddev->chunk_size sector-based. This patch renames the chunk_size field to chunk_sectors with the implied change of semantics. Since is_power_of_2(chunk_size) = is_power_of_2(chunk_sectors << 9) = is_power_of_2(chunk_sectors) these bits don't need an adjustment for the shift. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/linear.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 9b02a73fbc6b..9f7cec42dd8e 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -305,7 +305,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) static void linear_status (struct seq_file *seq, mddev_t *mddev) { - seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); + seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); } -- cgit v1.2.3 From 13f2682b7216ebebd72b3d5868fe7fccec91a92d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 18 Jun 2009 08:48:55 +1000 Subject: md: raid0/linear: ensure device sizes are rounded to chunk size. This is currently ensured by common code, but it is more reliable to ensure it where it is needed in personality code. All the other personalities that care already round the size to the chunk_size. raid0 and linear are the only hold-outs. Signed-off-by: NeilBrown --- drivers/md/linear.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/md/linear.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 9f7cec42dd8e..dda2f1b64a6d 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -135,6 +135,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) list_for_each_entry(rdev, &mddev->disks, same_set) { int j = rdev->raid_disk; dev_info_t *disk = conf->disks + j; + sector_t sectors; if (j < 0 || j >= raid_disks || disk->rdev) { printk("linear: disk numbering problem. Aborting!\n"); @@ -142,6 +143,11 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) } disk->rdev = rdev; + if (mddev->chunk_sectors) { + sectors = rdev->sectors; + sector_div(sectors, mddev->chunk_sectors); + rdev->sectors = sectors * mddev->chunk_sectors; + } blk_queue_stack_limits(mddev->queue, rdev->bdev->bd_disk->queue); -- cgit v1.2.3 From 0894cc3066aaa3e75a99383c0d25feebf9b688ac Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:49:23 +1000 Subject: md: Move check for bitmap presence to personality code. If the superblock of a component device indicates the presence of a bitmap but the corresponding raid personality does not support bitmaps (raid0, linear, multipath, faulty), then something is seriously wrong and we'd better refuse to run such an array. Currently, this check is performed while the superblocks are examined, i.e. before entering personality code. Therefore the generic md layer must know which raid levels support bitmaps and which do not. This patch avoids this layer violation without adding identical code to various personalities. This is accomplished by introducing a new public function to md.c, md_check_no_bitmap(), which replaces the hard-coded checks in the superblock loading functions. A call to md_check_no_bitmap() is added to the ->run method of each personality which does not support bitmaps and assembly is aborted if at least one component device contains a bitmap. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/linear.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md/linear.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index dda2f1b64a6d..564c390f8a1b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -189,6 +189,8 @@ static int linear_run (mddev_t *mddev) { linear_conf_t *conf; + if (md_check_no_bitmap(mddev)) + return -EINVAL; mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = linear_conf(mddev, mddev->raid_disks); -- cgit v1.2.3 From af11c397fd8835c70ec0bb777104e4ab98b2d660 Mon Sep 17 00:00:00 2001 From: SandeepKsinha Date: Thu, 18 Jun 2009 08:49:35 +1000 Subject: md linear: Protecting mddev with rcu locks to avoid races Due to the lack of memory ordering guarantees, we may have races around mddev->conf. In particular, the correct contents of the structure we get from dereferencing ->private might not be visible to this CPU yet, and they might not be correct w.r.t mddev->raid_disks. This patch addresses the problem using rcu protection to avoid such race conditions. Signed-off-by: SandeepKsinha Signed-off-by: NeilBrown --- drivers/md/linear.c | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) (limited to 'drivers/md/linear.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 564c390f8a1b..93f2b1d18398 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -28,10 +28,11 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) { int lo, mid, hi; - linear_conf_t *conf = mddev->private; + linear_conf_t *conf; lo = 0; hi = mddev->raid_disks - 1; + conf = rcu_dereference(mddev->private); /* * Binary Search @@ -66,8 +67,10 @@ static int linear_mergeable_bvec(struct request_queue *q, unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); + rcu_read_lock(); dev0 = which_dev(mddev, sector); maxsectors = dev0->end_sector - sector; + rcu_read_unlock(); if (maxsectors < bio_sectors) maxsectors = 0; @@ -86,36 +89,50 @@ static int linear_mergeable_bvec(struct request_queue *q, static void linear_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; - linear_conf_t *conf = mddev->private; + linear_conf_t *conf; int i; + rcu_read_lock(); + conf = rcu_dereference(mddev->private); + for (i=0; i < mddev->raid_disks; i++) { struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev); blk_unplug(r_queue); } + rcu_read_unlock(); } static int linear_congested(void *data, int bits) { mddev_t *mddev = data; - linear_conf_t *conf = mddev->private; + linear_conf_t *conf; int i, ret = 0; + rcu_read_lock(); + conf = rcu_dereference(mddev->private); + for (i = 0; i < mddev->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); ret |= bdi_congested(&q->backing_dev_info, bits); } + + rcu_read_unlock(); return ret; } static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks) { - linear_conf_t *conf = mddev->private; + linear_conf_t *conf; + sector_t array_sectors; + rcu_read_lock(); + conf = rcu_dereference(mddev->private); WARN_ONCE(sectors || raid_disks, "%s does not support generic reshape\n", __func__); + array_sectors = conf->array_sectors; + rcu_read_unlock(); - return conf->array_sectors; + return array_sectors; } static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) @@ -229,8 +246,8 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) return -ENOMEM; newconf->prev = mddev->private; - mddev->private = newconf; mddev->raid_disks++; + rcu_assign_pointer(mddev->private, newconf); md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); return 0; @@ -239,7 +256,13 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) static int linear_stop (mddev_t *mddev) { linear_conf_t *conf = mddev->private; - + + /* + * We do not require rcu protection here since + * we hold reconfig_mutex for both linear_add and + * linear_stop, so they cannot race. + */ + blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ do { linear_conf_t *t = conf->prev; @@ -269,9 +292,11 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) bio_sectors(bio)); part_stat_unlock(); + rcu_read_lock(); tmp_dev = which_dev(mddev, bio->bi_sector); start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; + if (unlikely(bio->bi_sector >= (tmp_dev->end_sector) || (bio->bi_sector < start_sector))) { char b[BDEVNAME_SIZE]; @@ -282,6 +307,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) bdevname(tmp_dev->rdev->bdev, b), (unsigned long long)tmp_dev->rdev->sectors, (unsigned long long)start_sector); + rcu_read_unlock(); bio_io_error(bio); return 0; } @@ -291,9 +317,11 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) * split it. */ struct bio_pair *bp; + sector_t end_sector = tmp_dev->end_sector; + + rcu_read_unlock(); - bp = bio_split(bio, - tmp_dev->end_sector - bio->bi_sector); + bp = bio_split(bio, end_sector - bio->bi_sector); if (linear_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); @@ -306,6 +334,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) bio->bi_bdev = tmp_dev->rdev->bdev; bio->bi_sector = bio->bi_sector - start_sector + tmp_dev->rdev->data_offset; + rcu_read_unlock(); return 1; } -- cgit v1.2.3 From 495d357301e1de01fabe30ce9a555301fb4675c3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 18 Jun 2009 08:49:42 +1000 Subject: md/linear: use call_rcu to free obsolete 'conf' structures. Current, when we update the 'conf' structure, when adding a drive to a linear array, we keep the old version around until the array is finally stopped, as it is not safe to free it immediately. Now that we have rcu protection on all accesses to 'conf', we can use call_rcu to free it more promptly. Signed-off-by: NeilBrown --- drivers/md/linear.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/md/linear.c') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 93f2b1d18398..15c8b7b25a9b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -223,6 +223,12 @@ static int linear_run (mddev_t *mddev) return 0; } +static void free_conf(struct rcu_head *head) +{ + linear_conf_t *conf = container_of(head, linear_conf_t, rcu); + kfree(conf); +} + static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) { /* Adding a drive to a linear array allows the array to grow. @@ -233,7 +239,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) * The current one is never freed until the array is stopped. * This avoids races. */ - linear_conf_t *newconf; + linear_conf_t *newconf, *oldconf; if (rdev->saved_raid_disk != mddev->raid_disks) return -EINVAL; @@ -245,11 +251,12 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) if (!newconf) return -ENOMEM; - newconf->prev = mddev->private; + oldconf = rcu_dereference(mddev->private); mddev->raid_disks++; rcu_assign_pointer(mddev->private, newconf); md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); + call_rcu(&oldconf->rcu, free_conf); return 0; } @@ -261,14 +268,12 @@ static int linear_stop (mddev_t *mddev) * We do not require rcu protection here since * we hold reconfig_mutex for both linear_add and * linear_stop, so they cannot race. + * We should make sure any old 'conf's are properly + * freed though. */ - + rcu_barrier(); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ - do { - linear_conf_t *t = conf->prev; - kfree(conf); - conf = t; - } while (conf); + kfree(conf); return 0; } -- cgit v1.2.3