From d15b774c2920d55e3d58275c97fbe3adc3afde38 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Tue, 2 Aug 2011 12:32:01 +0100 Subject: dm: fix idr leak on module removal Destroy _minor_idr when unloading the core dm module. (Found by kmemleak.) Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0cf68b478878..41abc6dd481b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -37,6 +37,8 @@ static const char *_name = DM_NAME; static unsigned int major = 0; static unsigned int _major = 0; +static DEFINE_IDR(_minor_idr); + static DEFINE_SPINLOCK(_minor_lock); /* * For bio-based dm. @@ -313,6 +315,12 @@ static void __exit dm_exit(void) while (i--) _exits[i](); + + /* + * Should be empty by this point. + */ + idr_remove_all(&_minor_idr); + idr_destroy(&_minor_idr); } /* @@ -1705,8 +1713,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits) /*----------------------------------------------------------------- * An IDR is used to keep track of allocated minor numbers. *---------------------------------------------------------------*/ -static DEFINE_IDR(_minor_idr); - static void free_minor(int minor) { spin_lock(&_minor_lock); -- cgit v1.2.3 From 936688d7eb0f39be96c5791be1a04994cc8d6aa0 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:01 +0100 Subject: dm table: fix discard support Remove 'discards_supported' from the dm_table structure. The same information can be easily discovered from the table's target(s) in dm_table_supports_discards(). Before this fix dm_table_supports_discards() would skip checking the individual targets' 'discards_supported' flag if any one target in the table didn't set num_discard_requests > 0. Now the per-target 'discards_supported' flag is effective at insuring the final DM device advertises discard support. But, to be clear, targets that don't support discards (!num_discard_requests) will not receive discard requests. Also DMWARN if a target sets 'discards_supported' override but forgets to set 'num_discard_requests'. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 41abc6dd481b..aeb0fa1ccfe4 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1179,7 +1179,8 @@ static int __clone_and_map_discard(struct clone_info *ci) /* * Even though the device advertised discard support, - * reconfiguration might have changed that since the + * that does not mean every target supports it, and + * reconfiguration might also have changed that since the * check was performed. */ if (!ti->num_discard_requests) -- cgit v1.2.3 From d5b9dd04bd74b774b8e8d93ced7a0d15ad403fa9 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:04 +0100 Subject: dm: ignore merge_bvec for snapshots when safe Add a new flag DMF_MERGE_IS_OPTIONAL to struct mapped_device to indicate whether the device can accept bios larger than the size its merge function returns. When set, use this to send large bios to snapshots which can split them if necessary. Snapshot I/O may be significantly fragmented and this approach seems to improve peformance. Before the patch, dm_set_device_limits restricted bio size to page size if the underlying device had a merge function and the target didn't provide a merge function. After the patch, dm_set_device_limits restricts bio size to page size if the underlying device has a merge function, doesn't have DMF_MERGE_IS_OPTIONAL flag and the target doesn't provide a merge function. The snapshot target can't provide a merge function because when the merge function is called, it is impossible to determine where the bio will be remapped. Previously this led us to impose a 4k limit, which we can now remove if the snapshot store is located on a device without a merge function. Together with another patch for optimizing full chunk writes, it improves performance from 29MB/s to 40MB/s when writing to the filesystem on snapshot store. If the snapshot store is placed on a non-dm device with a merge function (such as md-raid), device mapper still limits all bios to page size. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index aeb0fa1ccfe4..1000eaf984ef 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -111,6 +111,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_FREEING 3 #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 +#define DMF_MERGE_IS_OPTIONAL 6 /* * Work processed by per-device workqueue. @@ -1992,6 +1993,59 @@ static void __set_size(struct mapped_device *md, sector_t size) i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); } +/* + * Return 1 if the queue has a compulsory merge_bvec_fn function. + * + * If this function returns 0, then the device is either a non-dm + * device without a merge_bvec_fn, or it is a dm device that is + * able to split any bios it receives that are too big. + */ +int dm_queue_merge_is_compulsory(struct request_queue *q) +{ + struct mapped_device *dev_md; + + if (!q->merge_bvec_fn) + return 0; + + if (q->make_request_fn == dm_request) { + dev_md = q->queuedata; + if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) + return 0; + } + + return 1; +} + +static int dm_device_merge_is_compulsory(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct block_device *bdev = dev->bdev; + struct request_queue *q = bdev_get_queue(bdev); + + return dm_queue_merge_is_compulsory(q); +} + +/* + * Return 1 if it is acceptable to ignore merge_bvec_fn based + * on the properties of the underlying devices. + */ +static int dm_table_merge_is_optional(struct dm_table *table) +{ + unsigned i = 0; + struct dm_target *ti; + + while (i < dm_table_get_num_targets(table)) { + ti = dm_table_get_target(table, i++); + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL)) + return 0; + } + + return 1; +} + /* * Returns old map, which caller must destroy. */ @@ -2002,6 +2056,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, struct request_queue *q = md->queue; sector_t size; unsigned long flags; + int merge_is_optional; size = dm_table_get_size(t); @@ -2027,10 +2082,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, __bind_mempools(md, t); + merge_is_optional = dm_table_merge_is_optional(t); + write_lock_irqsave(&md->map_lock, flags); old_map = md->map; md->map = t; dm_table_set_restrictions(t, q, limits); + if (merge_is_optional) + set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); + else + clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); write_unlock_irqrestore(&md->map_lock, flags); return old_map; -- cgit v1.2.3 From ed8b752bccf2560e305e25125721d2f0ac759e88 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:08 +0100 Subject: dm table: set flush capability based on underlying devices DM has always advertised both REQ_FLUSH and REQ_FUA flush capabilities regardless of whether or not a given DM device's underlying devices also advertised a need for them. Block's flush-merge changes from 2.6.39 have proven to be more costly for DM devices. Performance regressions have been reported even when DM's underlying devices do not advertise that they have a write cache. Fix the performance regressions by configuring a DM device's flushing capabilities based on those of the underlying devices' capabilities. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1000eaf984ef..52b39f335bb3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1808,7 +1808,6 @@ static void dm_init_md_queue(struct mapped_device *md) blk_queue_make_request(md->queue, dm_request); blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); blk_queue_merge_bvec(md->queue, dm_merge_bvec); - blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); } /* -- cgit v1.2.3