From ae03bf639a5027d27270123f5f6e3ee6a412781d Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:50 -0400 Subject: block: Use accessor functions for queue limits Convert all external users of queue limits to using wrapper functions instead of poking the request queue variables directly. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/md/raid0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index c08d7559be55..925507e7d673 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -144,7 +144,7 @@ static int create_strip_zones (mddev_t *mddev) */ if (rdev1->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); if (!smallest || (rdev1->sectors < smallest->sectors)) -- cgit v1.2.3 From dc58266385e51420298275c90a616c34f1473a73 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:18:43 +1000 Subject: md: raid0: Replace hash table lookup by looping over all strip_zones. The number of strip_zones of a raid0 array is bounded by the number of drives in the array and is in fact much smaller for typical setups. For example, any raid0 array containing identical disks will have only a single strip_zone. Therefore, the hash tables which are used for quickly finding the strip_zone that holds a particular sector are of questionable value and add quite a bit of unnecessary complexity. This patch replaces the hash table lookup by equivalent code which simply loops over all strip zones to find the zone that holds the given sector. In order to make this loop as fast as possible, the zone->start field of struct strip_zone has been renamed to zone_end, and it now stores the beginning of the next zone in sectors. This allows to save one addition in the loop. Subsequent cleanup patches will remove the hash table structure. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 925507e7d673..bb245a6d16c8 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -52,7 +52,6 @@ static int raid0_congested(void *data, int bits) return ret; } - static int create_strip_zones (mddev_t *mddev) { int i, c, j; @@ -158,7 +157,7 @@ static int create_strip_zones (mddev_t *mddev) } zone->nb_dev = cnt; zone->sectors = smallest->sectors * cnt; - zone->zone_start = 0; + zone->zone_end = zone->sectors; current_start = smallest->sectors; curr_zone_start = zone->sectors; @@ -198,14 +197,13 @@ static int create_strip_zones (mddev_t *mddev) printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", zone->nb_dev, (unsigned long long)zone->sectors); - zone->zone_start = curr_zone_start; + zone->zone_end = curr_zone_start + zone->sectors; curr_zone_start += zone->sectors; current_start = smallest->sectors; printk(KERN_INFO "raid0: current zone start: %llu\n", (unsigned long long)current_start); } - /* Now find appropriate hash spacing. * We want a number which causes most hash entries to cover * at most two strips, but the hash table must be at most @@ -398,6 +396,19 @@ static int raid0_stop (mddev_t *mddev) return 0; } +/* Find the zone which holds a particular offset */ +static struct strip_zone *find_zone(struct raid0_private_data *conf, + sector_t sector) +{ + int i; + struct strip_zone *z = conf->strip_zone; + + for (i = 0; i < conf->nr_strip_zones; i++) + if (sector < z[i].zone_end) + return z + i; + BUG(); +} + static int raid0_make_request (struct request_queue *q, struct bio *bio) { mddev_t *mddev = q->queuedata; @@ -443,22 +454,11 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bio_pair_release(bp); return 0; } - - - { - sector_t x = sector >> conf->sector_shift; - sector_div(x, (u32)conf->spacing); - zone = conf->hash_table[x]; - } - - while (sector >= zone->zone_start + zone->sectors) - zone++; - + zone = find_zone(conf, sector); sect_in_chunk = bio->bi_sector & (chunk_sects - 1); - - { - sector_t x = (sector - zone->zone_start) >> chunksect_bits; + sector_t x = (zone->sectors + sector - zone->zone_end) + >> chunksect_bits; sector_div(x, zone->nb_dev); chunk = x; @@ -503,8 +503,8 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) seq_printf(seq, "%s/", bdevname( conf->strip_zone[j].dev[k]->bdev,b)); - seq_printf(seq, "] zs=%d ds=%d s=%d\n", - conf->strip_zone[j].zone_start, + seq_printf(seq, "] ze=%d ds=%d s=%d\n", + conf->strip_zone[j].zone_end, conf->strip_zone[j].dev_start, conf->strip_zone[j].sectors); } -- cgit v1.2.3 From d27a43abd7be0ab4b2337e4587feca8c7340e5f9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:46:46 +1000 Subject: md/raid0: two cleanups in create_stripe_zones. 1/ remove current_start. The same value is available in zone->dev_start and storing it separately doesn't gain anything. 2/ rename curr_zone_start to curr_zone_end as we are now more focused on the 'end' of each zone. We end up storing the same number though - the old name was a little confusing (and what does 'current' mean in this context anyway). Signed-off-by: NeilBrown --- drivers/md/raid0.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index bb245a6d16c8..1afdfd120bba 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -55,7 +55,7 @@ static int raid0_congested(void *data, int bits) static int create_strip_zones (mddev_t *mddev) { int i, c, j; - sector_t current_start, curr_zone_start; + sector_t curr_zone_end; sector_t min_spacing; raid0_conf_t *conf = mddev_to_conf(mddev); mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; @@ -159,8 +159,7 @@ static int create_strip_zones (mddev_t *mddev) zone->sectors = smallest->sectors * cnt; zone->zone_end = zone->sectors; - current_start = smallest->sectors; - curr_zone_start = zone->sectors; + curr_zone_end = zone->sectors; /* now do the other zones */ for (i = 1; i < conf->nr_strip_zones; i++) @@ -169,7 +168,7 @@ static int create_strip_zones (mddev_t *mddev) zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; printk(KERN_INFO "raid0: zone %d\n", i); - zone->dev_start = current_start; + zone->dev_start = smallest->sectors; smallest = NULL; c = 0; @@ -178,7 +177,7 @@ static int create_strip_zones (mddev_t *mddev) rdev = conf->strip_zone[0].dev[j]; printk(KERN_INFO "raid0: checking %s ...", bdevname(rdev->bdev, b)); - if (rdev->sectors <= current_start) { + if (rdev->sectors <= zone->dev_start) { printk(KERN_INFO " nope.\n"); continue; } @@ -193,16 +192,15 @@ static int create_strip_zones (mddev_t *mddev) } zone->nb_dev = c; - zone->sectors = (smallest->sectors - current_start) * c; + zone->sectors = (smallest->sectors - zone->dev_start) * c; printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", zone->nb_dev, (unsigned long long)zone->sectors); - zone->zone_end = curr_zone_start + zone->sectors; - curr_zone_start += zone->sectors; + curr_zone_end += zone->sectors; + zone->zone_end = curr_zone_end; - current_start = smallest->sectors; printk(KERN_INFO "raid0: current zone start: %llu\n", - (unsigned long long)current_start); + (unsigned long long)smallest->sectors); } /* Now find appropriate hash spacing. * We want a number which causes most hash entries to cover @@ -212,8 +210,8 @@ static int create_strip_zones (mddev_t *mddev) * strip though as it's size has no bearing on the efficacy of the hash * table. */ - conf->spacing = curr_zone_start; - min_spacing = curr_zone_start; + conf->spacing = curr_zone_end; + min_spacing = curr_zone_end; sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); for (i=0; i < conf->nr_strip_zones-1; i++) { sector_t s = 0; -- cgit v1.2.3 From 09770e0b6ee649313611a2d6a9b44f456072dbd6 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:46:48 +1000 Subject: md: raid0: Remove hash table. The raid0 hash table has become unused due to the changes in the previous patch. This patch removes the hash table allocation and setup code and kills the hash_table field of struct raid0_private_data. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 1afdfd120bba..d4c9c5d5d7f5 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -326,22 +326,14 @@ static int raid0_run (mddev_t *mddev) nb_zone = s + round; } printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone); - - printk(KERN_INFO "raid0 : Allocating %zu bytes for hash.\n", - nb_zone*sizeof(struct strip_zone*)); - conf->hash_table = kmalloc (sizeof (struct strip_zone *)*nb_zone, GFP_KERNEL); - if (!conf->hash_table) - goto out_free_conf; sectors = conf->strip_zone[cur].sectors; - conf->hash_table[0] = conf->strip_zone + cur; for (i=1; i< nb_zone; i++) { while (sectors <= conf->spacing) { cur++; sectors += conf->strip_zone[cur].sectors; } sectors -= conf->spacing; - conf->hash_table[i] = conf->strip_zone + cur; } if (conf->sector_shift) { conf->spacing >>= conf->sector_shift; @@ -384,8 +376,6 @@ static int raid0_stop (mddev_t *mddev) raid0_conf_t *conf = mddev_to_conf(mddev); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ - kfree(conf->hash_table); - conf->hash_table = NULL; kfree(conf->strip_zone); conf->strip_zone = NULL; kfree(conf); @@ -494,8 +484,6 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) h = 0; for (j = 0; j < conf->nr_strip_zones; j++) { seq_printf(seq, " z%d", j); - if (conf->hash_table[h] == conf->strip_zone+j) - seq_printf(seq, "(h%d)", h++); seq_printf(seq, "=["); for (k = 0; k < conf->strip_zone[j].nb_dev; k++) seq_printf(seq, "%s/", bdevname( -- cgit v1.2.3 From 8f79cfcdb65472f1504ade2f53e5f2bfdaeb95da Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:47:10 +1000 Subject: md: raid0: Remove hash spacing and sector shift. The "sector_shift" and "spacing" fields of struct raid0_private_data were only used for the hash table lookups. So the removal of the hash table allows get rid of these fields as well which simplifies create_strip_zones() and raid0_run() quite a bit. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 63 +----------------------------------------------------- 1 file changed, 1 insertion(+), 62 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index d4c9c5d5d7f5..edffc4940b49 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -56,7 +56,6 @@ static int create_strip_zones (mddev_t *mddev) { int i, c, j; sector_t curr_zone_end; - sector_t min_spacing; raid0_conf_t *conf = mddev_to_conf(mddev); mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; struct strip_zone *zone; @@ -202,28 +201,7 @@ static int create_strip_zones (mddev_t *mddev) printk(KERN_INFO "raid0: current zone start: %llu\n", (unsigned long long)smallest->sectors); } - /* Now find appropriate hash spacing. - * We want a number which causes most hash entries to cover - * at most two strips, but the hash table must be at most - * 1 PAGE. We choose the smallest strip, or contiguous collection - * of strips, that has big enough size. We never consider the last - * strip though as it's size has no bearing on the efficacy of the hash - * table. - */ - conf->spacing = curr_zone_end; - min_spacing = curr_zone_end; - sector_div(min_spacing, PAGE_SIZE/sizeof(struct strip_zone*)); - for (i=0; i < conf->nr_strip_zones-1; i++) { - sector_t s = 0; - for (j = i; j < conf->nr_strip_zones - 1 && - s < min_spacing; j++) - s += conf->strip_zone[j].sectors; - if (s >= min_spacing && s < conf->spacing) - conf->spacing = s; - } - mddev->queue->unplug_fn = raid0_unplug; - mddev->queue->backing_dev_info.congested_fn = raid0_congested; mddev->queue->backing_dev_info.congested_data = mddev; @@ -273,10 +251,8 @@ static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks) return array_sectors; } -static int raid0_run (mddev_t *mddev) +static int raid0_run(mddev_t *mddev) { - unsigned cur=0, i=0, nb_zone; - s64 sectors; raid0_conf_t *conf; if (mddev->chunk_size == 0) { @@ -306,43 +282,6 @@ static int raid0_run (mddev_t *mddev) printk(KERN_INFO "raid0 : md_size is %llu sectors.\n", (unsigned long long)mddev->array_sectors); - printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n", - (unsigned long long)conf->spacing); - { - sector_t s = raid0_size(mddev, 0, 0); - sector_t space = conf->spacing; - int round; - conf->sector_shift = 0; - if (sizeof(sector_t) > sizeof(u32)) { - /*shift down space and s so that sector_div will work */ - while (space > (sector_t) (~(u32)0)) { - s >>= 1; - space >>= 1; - s += 1; /* force round-up */ - conf->sector_shift++; - } - } - round = sector_div(s, (u32)space) ? 1 : 0; - nb_zone = s + round; - } - printk(KERN_INFO "raid0 : nb_zone is %d.\n", nb_zone); - sectors = conf->strip_zone[cur].sectors; - - for (i=1; i< nb_zone; i++) { - while (sectors <= conf->spacing) { - cur++; - sectors += conf->strip_zone[cur].sectors; - } - sectors -= conf->spacing; - } - if (conf->sector_shift) { - conf->spacing >>= conf->sector_shift; - /* round spacing up so when we divide by it, we - * err on the side of too-low, which is safest - */ - conf->spacing++; - } - /* calculate the max read-ahead size. * For read-ahead of large files to be effective, we need to * readahead at least twice a whole stripe. i.e. number of devices -- cgit v1.2.3 From 5568a6035d9fca2cd8f1ef7005e215eae4e65fab Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:47:21 +1000 Subject: md: raid0: Make raid0_run() return a proper error code. Currently raid0_run() always returns -ENOMEM on errors. This is incorrect as running the array might fail for other reasons, for example because not all component devices were available. This patch changes create_strip_zones() so that it returns a proper error code (either -ENOMEM or -EINVAL) rather than 1 on errors and makes raid0_run(), its single caller, return that value instead of -ENOMEM. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index edffc4940b49..e5648b660e75 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -105,12 +105,12 @@ static int create_strip_zones (mddev_t *mddev) conf->strip_zone = kzalloc(sizeof(struct strip_zone)* conf->nr_strip_zones, GFP_KERNEL); if (!conf->strip_zone) - return 1; + return -ENOMEM; conf->devlist = kzalloc(sizeof(mdk_rdev_t*)* conf->nr_strip_zones*mddev->raid_disks, GFP_KERNEL); if (!conf->devlist) - return 1; + return -ENOMEM; /* The first zone must contain all devices, so here we check that * there is a proper alignment of slots to devices and find them all @@ -207,8 +207,8 @@ static int create_strip_zones (mddev_t *mddev) printk(KERN_INFO "raid0: done.\n"); return 0; - abort: - return 1; +abort: + return -EINVAL; } /** @@ -254,6 +254,7 @@ static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks) static int raid0_run(mddev_t *mddev) { raid0_conf_t *conf; + int ret; if (mddev->chunk_size == 0) { printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); @@ -269,12 +270,13 @@ static int raid0_run(mddev_t *mddev) conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL); if (!conf) - goto out; + return -ENOMEM; mddev->private = (void *)conf; conf->strip_zone = NULL; conf->devlist = NULL; - if (create_strip_zones (mddev)) + ret = create_strip_zones(mddev); + if (ret < 0) goto out_free_conf; /* calculate array device size */ @@ -306,8 +308,7 @@ out_free_conf: kfree(conf->devlist); kfree(conf); mddev->private = NULL; -out: - return -ENOMEM; + return ret; } static int raid0_stop (mddev_t *mddev) -- cgit v1.2.3 From ed7b00380d957ec770b5e90380d012c6062c13cc Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:47:36 +1000 Subject: md: raid0: Allocate all buffers for the raid0 configuration in one function. Currently the raid0 configuration is allocated in raid0_run() while the buffers for the strip_zone and the dev_list arrays are allocated in create_strip_zones(). On errors, all three buffers are freed in raid0_run(). It's easier and more readable to do the allocation and cleanup within a single function. So move that code into create_strip_zones(). Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 47 +++++++++++++++++------------------------------ 1 file changed, 17 insertions(+), 30 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e5648b660e75..99cee51734e5 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -52,21 +52,18 @@ static int raid0_congested(void *data, int bits) return ret; } -static int create_strip_zones (mddev_t *mddev) +static int create_strip_zones(mddev_t *mddev) { - int i, c, j; + int i, c, j, err; sector_t curr_zone_end; - raid0_conf_t *conf = mddev_to_conf(mddev); mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; struct strip_zone *zone; int cnt; char b[BDEVNAME_SIZE]; - - /* - * The number of 'same size groups' - */ - conf->nr_strip_zones = 0; - + raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL); + + if (!conf) + return -ENOMEM; list_for_each_entry(rdev1, &mddev->disks, same_set) { printk(KERN_INFO "raid0: looking at %s\n", bdevname(rdev1->bdev,b)); @@ -101,16 +98,16 @@ static int create_strip_zones (mddev_t *mddev) } } printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones); - + err = -ENOMEM; conf->strip_zone = kzalloc(sizeof(struct strip_zone)* conf->nr_strip_zones, GFP_KERNEL); if (!conf->strip_zone) - return -ENOMEM; + goto abort; conf->devlist = kzalloc(sizeof(mdk_rdev_t*)* conf->nr_strip_zones*mddev->raid_disks, GFP_KERNEL); if (!conf->devlist) - return -ENOMEM; + goto abort; /* The first zone must contain all devices, so here we check that * there is a proper alignment of slots to devices and find them all @@ -119,6 +116,7 @@ static int create_strip_zones (mddev_t *mddev) cnt = 0; smallest = NULL; zone->dev = conf->devlist; + err = -EINVAL; list_for_each_entry(rdev1, &mddev->disks, same_set) { int j = rdev1->raid_disk; @@ -206,9 +204,14 @@ static int create_strip_zones (mddev_t *mddev) mddev->queue->backing_dev_info.congested_data = mddev; printk(KERN_INFO "raid0: done.\n"); + mddev->private = conf; return 0; abort: - return -EINVAL; + kfree(conf->strip_zone); + kfree(conf->devlist); + kfree(conf); + mddev->private = NULL; + return err; } /** @@ -253,7 +256,6 @@ static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks) static int raid0_run(mddev_t *mddev) { - raid0_conf_t *conf; int ret; if (mddev->chunk_size == 0) { @@ -268,16 +270,9 @@ static int raid0_run(mddev_t *mddev) blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1); mddev->queue->queue_lock = &mddev->queue->__queue_lock; - conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL); - if (!conf) - return -ENOMEM; - mddev->private = (void *)conf; - - conf->strip_zone = NULL; - conf->devlist = NULL; ret = create_strip_zones(mddev); if (ret < 0) - goto out_free_conf; + return ret; /* calculate array device size */ md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); @@ -299,16 +294,8 @@ static int raid0_run(mddev_t *mddev) mddev->queue->backing_dev_info.ra_pages = 2* stripe; } - blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); return 0; - -out_free_conf: - kfree(conf->strip_zone); - kfree(conf->devlist); - kfree(conf); - mddev->private = NULL; - return ret; } static int raid0_stop (mddev_t *mddev) -- cgit v1.2.3 From fb5ab4b5d6e16fd5006c9f800d0116f3547cb760 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Tue, 16 Jun 2009 16:48:19 +1000 Subject: md: raid0: Fix a memory leak when stopping a raid0 array. raid0_stop() removes all references to the raid0 configuration but misses to free the ->devlist buffer. This patch closes this leak, removes a pointless initialization and fixes a coding style issue in raid0_stop(). Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 99cee51734e5..0d62ad6df212 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -298,16 +298,15 @@ static int raid0_run(mddev_t *mddev) return 0; } -static int raid0_stop (mddev_t *mddev) +static int raid0_stop(mddev_t *mddev) { raid0_conf_t *conf = mddev_to_conf(mddev); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree(conf->strip_zone); - conf->strip_zone = NULL; + kfree(conf->devlist); kfree(conf); mddev->private = NULL; - return 0; } -- cgit v1.2.3 From 49f357a22b3fa3eeac042dfa0a6cae920c174e48 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:50:35 +1000 Subject: md: raid0: remove ->sectors from the strip_zone structure. storing ->sectors is redundant as is can be computed from the difference z->zone_end - (z-1)->zone_end The one place where it is used, it is just as efficient to use a zone_end value instead. And removing it makes strip_zone smaller, so they array of these that is searched on every request has a better chance to say in cache. So discard the field and get the value from elsewhere. Signed-off-by: NeilBrown --- drivers/md/raid0.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 0d62ad6df212..07ef936afc71 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -55,7 +55,7 @@ static int raid0_congested(void *data, int bits) static int create_strip_zones(mddev_t *mddev) { int i, c, j, err; - sector_t curr_zone_end; + sector_t curr_zone_end, sectors; mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; struct strip_zone *zone; int cnt; @@ -153,10 +153,9 @@ static int create_strip_zones(mddev_t *mddev) goto abort; } zone->nb_dev = cnt; - zone->sectors = smallest->sectors * cnt; - zone->zone_end = zone->sectors; + zone->zone_end = smallest->sectors * cnt; - curr_zone_end = zone->sectors; + curr_zone_end = zone->zone_end; /* now do the other zones */ for (i = 1; i < conf->nr_strip_zones; i++) @@ -189,11 +188,11 @@ static int create_strip_zones(mddev_t *mddev) } zone->nb_dev = c; - zone->sectors = (smallest->sectors - zone->dev_start) * c; + sectors = (smallest->sectors - zone->dev_start) * c; printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", - zone->nb_dev, (unsigned long long)zone->sectors); + zone->nb_dev, (unsigned long long)sectors); - curr_zone_end += zone->sectors; + curr_zone_end += sectors; zone->zone_end = curr_zone_end; printk(KERN_INFO "raid0: current zone start: %llu\n", @@ -310,16 +309,22 @@ static int raid0_stop(mddev_t *mddev) return 0; } -/* Find the zone which holds a particular offset */ +/* Find the zone which holds a particular offset + * Update *sectorp to be an offset in that zone + */ static struct strip_zone *find_zone(struct raid0_private_data *conf, - sector_t sector) + sector_t *sectorp) { int i; struct strip_zone *z = conf->strip_zone; + sector_t sector = *sectorp; for (i = 0; i < conf->nr_strip_zones; i++) - if (sector < z[i].zone_end) + if (sector < z[i].zone_end) { + if (i) + *sectorp = sector - z[i-1].zone_end; return z + i; + } BUG(); } @@ -331,7 +336,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) struct strip_zone *zone; mdk_rdev_t *tmp_dev; sector_t chunk; - sector_t sector, rsect; + sector_t sector, rsect, sector_offset; const int rw = bio_data_dir(bio); int cpu; @@ -368,11 +373,11 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bio_pair_release(bp); return 0; } - zone = find_zone(conf, sector); + sector_offset = sector; + zone = find_zone(conf, §or_offset); sect_in_chunk = bio->bi_sector & (chunk_sects - 1); { - sector_t x = (zone->sectors + sector - zone->zone_end) - >> chunksect_bits; + sector_t x = sector_offset >> chunksect_bits; sector_div(x, zone->nb_dev); chunk = x; -- cgit v1.2.3 From b414579f4573b6dc8583e31b01dcffd13f49fd62 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:50:52 +1000 Subject: md: raid0: remove ->dev pointer from strip_zone structure If we treat conf->devlist more like a 2 dimensional array, we can get the devlist for a particular zone simply by indexing that array, so we don't need to store the pointers to subarrays in strip_zone. This makes strip_zone smaller and so (hopefully) searches faster. Signed-of-by: NeilBrown --- drivers/md/raid0.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 07ef936afc71..af0df78223b1 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -27,7 +27,7 @@ static void raid0_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; raid0_conf_t *conf = mddev_to_conf(mddev); - mdk_rdev_t **devlist = conf->strip_zone[0].dev; + mdk_rdev_t **devlist = conf->devlist; int i; for (i=0; iraid_disks; i++) { @@ -41,7 +41,7 @@ static int raid0_congested(void *data, int bits) { mddev_t *mddev = data; raid0_conf_t *conf = mddev_to_conf(mddev); - mdk_rdev_t **devlist = conf->strip_zone[0].dev; + mdk_rdev_t **devlist = conf->devlist; int i, ret = 0; for (i = 0; i < mddev->raid_disks && !ret ; i++) { @@ -56,7 +56,7 @@ static int create_strip_zones(mddev_t *mddev) { int i, c, j, err; sector_t curr_zone_end, sectors; - mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; + mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev; struct strip_zone *zone; int cnt; char b[BDEVNAME_SIZE]; @@ -115,7 +115,7 @@ static int create_strip_zones(mddev_t *mddev) zone = &conf->strip_zone[0]; cnt = 0; smallest = NULL; - zone->dev = conf->devlist; + dev = conf->devlist; err = -EINVAL; list_for_each_entry(rdev1, &mddev->disks, same_set) { int j = rdev1->raid_disk; @@ -125,12 +125,12 @@ static int create_strip_zones(mddev_t *mddev) "aborting!\n", j); goto abort; } - if (zone->dev[j]) { + if (dev[j]) { printk(KERN_ERR "raid0: multiple devices for %d - " "aborting!\n", j); goto abort; } - zone->dev[j] = rdev1; + dev[j] = rdev1; blk_queue_stack_limits(mddev->queue, rdev1->bdev->bd_disk->queue); @@ -161,7 +161,7 @@ static int create_strip_zones(mddev_t *mddev) for (i = 1; i < conf->nr_strip_zones; i++) { zone = conf->strip_zone + i; - zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; + dev = conf->devlist + i * mddev->raid_disks; printk(KERN_INFO "raid0: zone %d\n", i); zone->dev_start = smallest->sectors; @@ -170,7 +170,7 @@ static int create_strip_zones(mddev_t *mddev) for (j=0; jstrip_zone[0].dev[j]; + rdev = conf->devlist[j]; printk(KERN_INFO "raid0: checking %s ...", bdevname(rdev->bdev, b)); if (rdev->sectors <= zone->dev_start) { @@ -178,7 +178,7 @@ static int create_strip_zones(mddev_t *mddev) continue; } printk(KERN_INFO " contained as device %d\n", c); - zone->dev[c] = rdev; + dev[c] = rdev; c++; if (!smallest || rdev->sectors < smallest->sectors) { smallest = rdev; @@ -383,7 +383,8 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) chunk = x; x = sector >> chunksect_bits; - tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; + tmp_dev = conf->devlist[(zone - conf->strip_zone)*mddev->raid_disks + + sector_div(x, zone->nb_dev)]; } rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk; -- cgit v1.2.3 From a6b3deafe0c50e3e873e8ed5cc8abfcb25c05eff Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:54:07 +1000 Subject: md: raid0: remove setting of segment boundary. This setting doesn't seem to make sense (half the chunk size??) and shouldn't be needed. The segment boundary exported by raid0 should simply be the minimum of the segment boundary of all component devices. And we already get that right. Signed-off-by: NeilBrown --- drivers/md/raid0.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index af0df78223b1..e2e9c1833336 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -261,12 +261,7 @@ static int raid0_run(mddev_t *mddev) printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); return -EINVAL; } - printk(KERN_INFO "%s: setting max_sectors to %d, segment boundary to %d\n", - mdname(mddev), - mddev->chunk_size >> 9, - (mddev->chunk_size>>1)-1); blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); - blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1); mddev->queue->queue_lock = &mddev->queue->__queue_lock; ret = create_strip_zones(mddev); -- cgit v1.2.3 From 070ec55d07157a3041f92654135c3c6e2eaaf901 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Jun 2009 16:54:21 +1000 Subject: md: remove mddev_to_conf "helper" macro Having a macro just to cast a void* isn't really helpful. I would must rather see that we are simply de-referencing ->private, than have to know what the macro does. So open code the macro everywhere and remove the pointless cast. Signed-off-by: NeilBrown --- drivers/md/raid0.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e2e9c1833336..77764dad1bcb 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -26,7 +26,7 @@ static void raid0_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; mdk_rdev_t **devlist = conf->devlist; int i; @@ -40,7 +40,7 @@ static void raid0_unplug(struct request_queue *q) static int raid0_congested(void *data, int bits) { mddev_t *mddev = data; - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; mdk_rdev_t **devlist = conf->devlist; int i, ret = 0; @@ -294,7 +294,7 @@ static int raid0_run(mddev_t *mddev) static int raid0_stop(mddev_t *mddev) { - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree(conf->strip_zone); @@ -327,7 +327,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) { mddev_t *mddev = q->queuedata; unsigned int sect_in_chunk, chunksect_bits, chunk_sects; - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; struct strip_zone *zone; mdk_rdev_t *tmp_dev; sector_t chunk; @@ -406,7 +406,7 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) #ifdef MD_DEBUG int j, k, h; char b[BDEVNAME_SIZE]; - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; h = 0; for (j = 0; j < conf->nr_strip_zones; j++) { -- cgit v1.2.3 From 1b9614291eb319fad96de45392eb4452ad39f0ee Mon Sep 17 00:00:00 2001 From: raz ben yehuda Date: Tue, 16 Jun 2009 16:57:40 +1000 Subject: md: have raid0 compile with MD_DEBUG on Because of the removal of the device list from the strips raid0 did not compile with MD_DEBUG flag on Signed-off-by: NeilBrown --- drivers/md/raid0.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 77764dad1bcb..d8692fc17963 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -400,7 +400,7 @@ bad_map: return 0; } -static void raid0_status (struct seq_file *seq, mddev_t *mddev) +static void raid0_status(struct seq_file *seq, mddev_t *mddev) { #undef MD_DEBUG #ifdef MD_DEBUG @@ -408,18 +408,24 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) char b[BDEVNAME_SIZE]; raid0_conf_t *conf = mddev->private; + sector_t zone_size; + sector_t zone_start = 0; h = 0; + for (j = 0; j < conf->nr_strip_zones; j++) { seq_printf(seq, " z%d", j); seq_printf(seq, "=["); for (k = 0; k < conf->strip_zone[j].nb_dev; k++) seq_printf(seq, "%s/", bdevname( - conf->strip_zone[j].dev[k]->bdev,b)); - - seq_printf(seq, "] ze=%d ds=%d s=%d\n", - conf->strip_zone[j].zone_end, - conf->strip_zone[j].dev_start, - conf->strip_zone[j].sectors); + conf->devlist[j*mddev->raid_disks + k] + ->bdev, b)); + + zone_size = conf->strip_zone[j].zone_end - zone_start; + seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n", + (unsigned long long)zone_start>>1, + (unsigned long long)conf->strip_zone[j].dev_start>>1, + (unsigned long long)zone_size>>1); + zone_start = conf->strip_zone[j].zone_end; } #endif seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); -- cgit v1.2.3 From 46994191ae8fdf1cbcc1f29282576b269a638c69 Mon Sep 17 00:00:00 2001 From: raz ben yehuda Date: Tue, 16 Jun 2009 17:00:54 +1000 Subject: md: have raid0 report its formation Report to the user what are the raid zones Signed-off-by: raziebe@gmail.com Signed-off-by: NeilBrown --- drivers/md/raid0.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index d8692fc17963..62fde23bf281 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -52,6 +52,38 @@ static int raid0_congested(void *data, int bits) return ret; } +/* + * inform the user of the raid configuration +*/ +static void dump_zones(mddev_t *mddev) +{ + int j, k, h; + sector_t zone_size = 0; + sector_t zone_start = 0; + char b[BDEVNAME_SIZE]; + raid0_conf_t *conf = mddev->private; + printk(KERN_INFO "******* %s configuration *********\n", + mdname(mddev)); + h = 0; + for (j = 0; j < conf->nr_strip_zones; j++) { + printk(KERN_INFO "zone%d=[", j); + for (k = 0; k < conf->strip_zone[j].nb_dev; k++) + printk("%s/", + bdevname(conf->devlist[j*mddev->raid_disks + + k]->bdev, b)); + printk("]\n"); + + zone_size = conf->strip_zone[j].zone_end - zone_start; + printk(KERN_INFO " zone offset=%llukb " + "device offset=%llukb size=%llukb\n", + (unsigned long long)zone_start>>1, + (unsigned long long)conf->strip_zone[j].dev_start>>1, + (unsigned long long)zone_size>>1); + zone_start = conf->strip_zone[j].zone_end; + } + printk(KERN_INFO "**********************************\n\n"); +} + static int create_strip_zones(mddev_t *mddev) { int i, c, j, err; @@ -289,6 +321,7 @@ static int raid0_run(mddev_t *mddev) } blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); + dump_zones(mddev); return 0; } -- cgit v1.2.3 From 92e59b6ba21845fadd2cce725010a9351740b76e Mon Sep 17 00:00:00 2001 From: raz ben yehuda Date: Tue, 16 Jun 2009 17:00:57 +1000 Subject: md: raid0: chunk size check in raid0_run have raid0 check chunk size in run method instead of in md. This is part of a series moving the checks from common code to the personalities where they belong. hardsect is short and chunksize is an int, so it is safe to use %. Signed-off-by: raziebe@gmail.com Signed-off-by: NeilBrown --- drivers/md/raid0.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 62fde23bf281..39936a217f95 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -234,6 +234,16 @@ static int create_strip_zones(mddev_t *mddev) mddev->queue->backing_dev_info.congested_fn = raid0_congested; mddev->queue->backing_dev_info.congested_data = mddev; + /* + * now since we have the hard sector sizes, we can make sure + * chunk size is a multiple of that sector size + */ + if (mddev->chunk_size % queue_logical_block_size(mddev->queue)) { + printk(KERN_ERR "%s chunk_size of %d not valid\n", + mdname(mddev), + mddev->chunk_size); + goto abort; + } printk(KERN_INFO "raid0: done.\n"); mddev->private = conf; return 0; @@ -289,8 +299,9 @@ static int raid0_run(mddev_t *mddev) { int ret; - if (mddev->chunk_size == 0) { - printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); + if (mddev->chunk_size == 0 || + !is_power_of_2(mddev->chunk_size)) { + printk(KERN_ERR "md/raid0: chunk size must be a power of 2.\n"); return -EINVAL; } blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); -- cgit v1.2.3 From fbb704efb784e2c8418e34dc3013af76bdd58101 Mon Sep 17 00:00:00 2001 From: raz ben yehuda Date: Tue, 16 Jun 2009 17:02:05 +1000 Subject: md: raid0 :Enables chunk size other than powers of 2. Maintain two flows, one for pow2 chunk sizes (which uses masks and shift), and a flow for the general case (which uses sector_div). This is for the sake of performance. - introduce map_sector and is_io_in_chunk_boundary to encapsulate those two flows better for raid0_make_request - fix blk_mergeable to support the two flows. Signed-off-by: raziebe@gmail.com Signed-off-by: NeilBrown --- drivers/md/raid0.c | 107 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 30 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 39936a217f95..7cd2671cc794 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -273,7 +273,12 @@ static int raid0_mergeable_bvec(struct request_queue *q, unsigned int chunk_sectors = mddev->chunk_size >> 9; unsigned int bio_sectors = bvm->bi_size >> 9; - max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; + if (is_power_of_2(mddev->chunk_size)) + max = (chunk_sectors - ((sector & (chunk_sectors-1)) + + bio_sectors)) << 9; + else + max = (chunk_sectors - (sector_div(sector, chunk_sectors) + + bio_sectors)) << 9; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ if (max <= biovec->bv_len && bio_sectors == 0) return biovec->bv_len; @@ -299,9 +304,8 @@ static int raid0_run(mddev_t *mddev) { int ret; - if (mddev->chunk_size == 0 || - !is_power_of_2(mddev->chunk_size)) { - printk(KERN_ERR "md/raid0: chunk size must be a power of 2.\n"); + if (mddev->chunk_size == 0) { + printk(KERN_ERR "md/raid0: chunk size must be set.\n"); return -EINVAL; } blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); @@ -367,15 +371,65 @@ static struct strip_zone *find_zone(struct raid0_private_data *conf, BUG(); } -static int raid0_make_request (struct request_queue *q, struct bio *bio) +/* + * remaps the bio to the target device. we separate two flows. + * power 2 flow and a general flow for the sake of perfromance +*/ +static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, + sector_t sector, sector_t *sector_offset) { - mddev_t *mddev = q->queuedata; - unsigned int sect_in_chunk, chunksect_bits, chunk_sects; + unsigned int sect_in_chunk; + sector_t chunk; raid0_conf_t *conf = mddev->private; + unsigned int chunk_sects = mddev->chunk_size >> 9; + + if (is_power_of_2(mddev->chunk_size)) { + int chunksect_bits = ffz(~chunk_sects); + /* find the sector offset inside the chunk */ + sect_in_chunk = sector & (chunk_sects - 1); + sector >>= chunksect_bits; + /* chunk in zone */ + chunk = *sector_offset; + /* quotient is the chunk in real device*/ + sector_div(chunk, zone->nb_dev << chunksect_bits); + } else{ + sect_in_chunk = sector_div(sector, chunk_sects); + chunk = *sector_offset; + sector_div(chunk, chunk_sects * zone->nb_dev); + } + /* + * position the bio over the real device + * real sector = chunk in device + starting of zone + * + the position in the chunk + */ + *sector_offset = (chunk * chunk_sects) + sect_in_chunk; + return conf->devlist[(zone - conf->strip_zone)*mddev->raid_disks + + sector_div(sector, zone->nb_dev)]; +} + +/* + * Is io distribute over 1 or more chunks ? +*/ +static inline int is_io_in_chunk_boundary(mddev_t *mddev, + unsigned int chunk_sects, struct bio *bio) +{ + if (likely(is_power_of_2(mddev->chunk_size))) { + return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) + + (bio->bi_size >> 9)); + } else{ + sector_t sector = bio->bi_sector; + return chunk_sects >= (sector_div(sector, chunk_sects) + + (bio->bi_size >> 9)); + } +} + +static int raid0_make_request(struct request_queue *q, struct bio *bio) +{ + mddev_t *mddev = q->queuedata; + unsigned int chunk_sects; + sector_t sector_offset; struct strip_zone *zone; mdk_rdev_t *tmp_dev; - sector_t chunk; - sector_t sector, rsect, sector_offset; const int rw = bio_data_dir(bio); int cpu; @@ -391,10 +445,8 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) part_stat_unlock(); chunk_sects = mddev->chunk_size >> 9; - chunksect_bits = ffz(~chunk_sects); - sector = bio->bi_sector; - - if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) { + if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) { + sector_t sector = bio->bi_sector; struct bio_pair *bp; /* Sanity check -- queue functions should prevent this happening */ if (bio->bi_vcnt != 1 || @@ -403,7 +455,12 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); + if (likely(is_power_of_2(mddev->chunk_size))) + bp = bio_split(bio, chunk_sects - (sector & + (chunk_sects-1))); + else + bp = bio_split(bio, chunk_sects - + sector_div(sector, chunk_sects)); if (raid0_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (raid0_make_request(q, &bp->bio2)) @@ -412,24 +469,14 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bio_pair_release(bp); return 0; } - sector_offset = sector; - zone = find_zone(conf, §or_offset); - sect_in_chunk = bio->bi_sector & (chunk_sects - 1); - { - sector_t x = sector_offset >> chunksect_bits; - - sector_div(x, zone->nb_dev); - chunk = x; - x = sector >> chunksect_bits; - tmp_dev = conf->devlist[(zone - conf->strip_zone)*mddev->raid_disks - + sector_div(x, zone->nb_dev)]; - } - rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk; - + sector_offset = bio->bi_sector; + zone = find_zone(mddev->private, §or_offset); + tmp_dev = map_sector(mddev, zone, bio->bi_sector, + §or_offset); bio->bi_bdev = tmp_dev->bdev; - bio->bi_sector = rsect + tmp_dev->data_offset; - + bio->bi_sector = sector_offset + zone->dev_start + + tmp_dev->data_offset; /* * Let the main block layer submit the IO and resolve recursion: */ -- cgit v1.2.3 From 9d8f0363623b3da12c43007cf77f5e1a4e8a5964 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:45:01 +1000 Subject: md: Make mddev->chunk_size sector-based. This patch renames the chunk_size field to chunk_sectors with the implied change of semantics. Since is_power_of_2(chunk_size) = is_power_of_2(chunk_sectors << 9) = is_power_of_2(chunk_sectors) these bits don't need an adjustment for the shift. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7cd2671cc794..f20b18ff7969 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -238,10 +238,10 @@ static int create_strip_zones(mddev_t *mddev) * now since we have the hard sector sizes, we can make sure * chunk size is a multiple of that sector size */ - if (mddev->chunk_size % queue_logical_block_size(mddev->queue)) { + if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) { printk(KERN_ERR "%s chunk_size of %d not valid\n", mdname(mddev), - mddev->chunk_size); + mddev->chunk_sectors << 9); goto abort; } printk(KERN_INFO "raid0: done.\n"); @@ -270,10 +270,10 @@ static int raid0_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; - if (is_power_of_2(mddev->chunk_size)) + if (is_power_of_2(mddev->chunk_sectors)) max = (chunk_sectors - ((sector & (chunk_sectors-1)) + bio_sectors)) << 9; else @@ -304,11 +304,11 @@ static int raid0_run(mddev_t *mddev) { int ret; - if (mddev->chunk_size == 0) { + if (mddev->chunk_sectors == 0) { printk(KERN_ERR "md/raid0: chunk size must be set.\n"); return -EINVAL; } - blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); + blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors); mddev->queue->queue_lock = &mddev->queue->__queue_lock; ret = create_strip_zones(mddev); @@ -330,7 +330,8 @@ static int raid0_run(mddev_t *mddev) * chunksize should be used in that case. */ { - int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_SIZE; + int stripe = mddev->raid_disks * + (mddev->chunk_sectors << 9) / PAGE_SIZE; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; } @@ -381,9 +382,9 @@ static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, unsigned int sect_in_chunk; sector_t chunk; raid0_conf_t *conf = mddev->private; - unsigned int chunk_sects = mddev->chunk_size >> 9; + unsigned int chunk_sects = mddev->chunk_sectors; - if (is_power_of_2(mddev->chunk_size)) { + if (is_power_of_2(mddev->chunk_sectors)) { int chunksect_bits = ffz(~chunk_sects); /* find the sector offset inside the chunk */ sect_in_chunk = sector & (chunk_sects - 1); @@ -413,7 +414,7 @@ static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, static inline int is_io_in_chunk_boundary(mddev_t *mddev, unsigned int chunk_sects, struct bio *bio) { - if (likely(is_power_of_2(mddev->chunk_size))) { + if (likely(is_power_of_2(mddev->chunk_sectors))) { return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) + (bio->bi_size >> 9)); } else{ @@ -444,7 +445,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio) bio_sectors(bio)); part_stat_unlock(); - chunk_sects = mddev->chunk_size >> 9; + chunk_sects = mddev->chunk_sectors; if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) { sector_t sector = bio->bi_sector; struct bio_pair *bp; @@ -455,7 +456,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - if (likely(is_power_of_2(mddev->chunk_size))) + if (likely(is_power_of_2(mddev->chunk_sectors))) bp = bio_split(bio, chunk_sects - (sector & (chunk_sects-1))); else @@ -519,7 +520,7 @@ static void raid0_status(struct seq_file *seq, mddev_t *mddev) zone_start = conf->strip_zone[j].zone_end; } #endif - seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); + seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2); return; } -- cgit v1.2.3 From d6e412eaa52db82010f12ea7d2c9b9468e933c44 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 18 Jun 2009 08:47:00 +1000 Subject: md: raid0: chunk_sectors cleanups. following the conversion to chunk_sectors, there is room for cleaning up a little. Signed-off-by: NeilBrown --- drivers/md/raid0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f20b18ff7969..11e384253718 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -273,7 +273,7 @@ static int raid0_mergeable_bvec(struct request_queue *q, unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; - if (is_power_of_2(mddev->chunk_sectors)) + if (is_power_of_2(chunk_sectors)) max = (chunk_sectors - ((sector & (chunk_sectors-1)) + bio_sectors)) << 9; else @@ -384,7 +384,7 @@ static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, raid0_conf_t *conf = mddev->private; unsigned int chunk_sects = mddev->chunk_sectors; - if (is_power_of_2(mddev->chunk_sectors)) { + if (is_power_of_2(chunk_sects)) { int chunksect_bits = ffz(~chunk_sects); /* find the sector offset inside the chunk */ sect_in_chunk = sector & (chunk_sects - 1); @@ -414,7 +414,7 @@ static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, static inline int is_io_in_chunk_boundary(mddev_t *mddev, unsigned int chunk_sects, struct bio *bio) { - if (likely(is_power_of_2(mddev->chunk_sectors))) { + if (likely(is_power_of_2(chunk_sects))) { return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) + (bio->bi_size >> 9)); } else{ @@ -456,7 +456,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - if (likely(is_power_of_2(mddev->chunk_sectors))) + if (likely(is_power_of_2(chunk_sects))) bp = bio_split(bio, chunk_sects - (sector & (chunk_sects-1))); else -- cgit v1.2.3 From 13f2682b7216ebebd72b3d5868fe7fccec91a92d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 18 Jun 2009 08:48:55 +1000 Subject: md: raid0/linear: ensure device sizes are rounded to chunk size. This is currently ensured by common code, but it is more reliable to ensure it where it is needed in personality code. All the other personalities that care already round the size to the chunk_size. raid0 and linear are the only hold-outs. Signed-off-by: NeilBrown --- drivers/md/raid0.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 11e384253718..717e64a4af9a 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -100,6 +100,12 @@ static int create_strip_zones(mddev_t *mddev) printk(KERN_INFO "raid0: looking at %s\n", bdevname(rdev1->bdev,b)); c = 0; + + /* round size to chunk_size */ + sectors = rdev1->sectors; + sector_div(sectors, mddev->chunk_sectors); + rdev1->sectors = sectors * mddev->chunk_sectors; + list_for_each_entry(rdev2, &mddev->disks, same_set) { printk(KERN_INFO "raid0: comparing %s(%llu)", bdevname(rdev1->bdev,b), -- cgit v1.2.3 From 0894cc3066aaa3e75a99383c0d25feebf9b688ac Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 18 Jun 2009 08:49:23 +1000 Subject: md: Move check for bitmap presence to personality code. If the superblock of a component device indicates the presence of a bitmap but the corresponding raid personality does not support bitmaps (raid0, linear, multipath, faulty), then something is seriously wrong and we'd better refuse to run such an array. Currently, this check is performed while the superblocks are examined, i.e. before entering personality code. Therefore the generic md layer must know which raid levels support bitmaps and which do not. This patch avoids this layer violation without adding identical code to various personalities. This is accomplished by introducing a new public function to md.c, md_check_no_bitmap(), which replaces the hard-coded checks in the superblock loading functions. A call to md_check_no_bitmap() is added to the ->run method of each personality which does not support bitmaps and assembly is aborted if at least one component device contains a bitmap. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- drivers/md/raid0.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md/raid0.c') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 717e64a4af9a..ab4a489d8695 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -314,6 +314,8 @@ static int raid0_run(mddev_t *mddev) printk(KERN_ERR "md/raid0: chunk size must be set.\n"); return -EINVAL; } + if (md_check_no_bitmap(mddev)) + return -EINVAL; blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors); mddev->queue->queue_lock = &mddev->queue->__queue_lock; -- cgit v1.2.3