From b844fe691897221ad0d5e0279c8ea9e3e4a46982 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 5 Apr 2013 15:36:32 +0100 Subject: dm cache: fix writes to cache device in writethrough mode The dm-cache writethrough strategy introduced by commit e2e74d617eadc15 ("dm cache: fix race in writethrough implementation") issues a bio to the origin device, remaps and then issues the bio to the cache device. This more conservative in-series approach was selected to favor correctness over performance (of the previous parallel writethrough). However, this in-series implementation that reuses the same bio to write both the origin and cache device didn't take into account that the block layer's req_bio_endio() modifies a completing bio's bi_sector and bi_size. So the new writethrough strategy needs to preserve these bio fields, and restore them before submission to the cache device, otherwise nothing gets written to the cache (because bi_size is 0). This patch adds a struct dm_bio_details field to struct per_bio_data, and uses dm_bio_record() and dm_bio_restore() to ensure the bio is restored before reissuing to the cache device. Adding such a large structure to the per_bio_data is not ideal but we can improve this later, for now correctness is the important thing. This problem initially went unnoticed because the dm-cache test-suite uses a linear DM device for the dm-cache device's origin device. Writethrough worked as expected because DM submits a *clone* of the original bio, so the original bio which was reused for the cache was never touched. Signed-off-by: Darrick J. Wong Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-cache-target.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 66120bd46d15..1ab122a75764 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -6,6 +6,7 @@ #include "dm.h" #include "dm-bio-prison.h" +#include "dm-bio-record.h" #include "dm-cache-metadata.h" #include @@ -205,6 +206,7 @@ struct per_bio_data { struct cache *cache; dm_cblock_t cblock; bio_end_io_t *saved_bi_end_io; + struct dm_bio_details bio_details; }; struct dm_cache_migration { @@ -643,6 +645,7 @@ static void writethrough_endio(struct bio *bio, int err) return; } + dm_bio_restore(&pb->bio_details, bio); remap_to_cache(pb->cache, bio, pb->cblock); /* @@ -667,6 +670,7 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, pb->cache = cache; pb->cblock = cblock; pb->saved_bi_end_io = bio->bi_end_io; + dm_bio_record(&pb->bio_details, bio); bio->bi_end_io = writethrough_endio; remap_to_origin_clear_discard(pb->cache, bio, oblock); -- cgit v1.2.3 From 19b0092e265fe9ab129902373c3127c0e0be3376 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 5 Apr 2013 15:36:34 +0100 Subject: dm cache: reduce bio front_pad size in writeback mode A recent patch to fix the dm cache target's writethrough mode extended the bio's front_pad to include a 1056-byte struct dm_bio_details. Writeback mode doesn't need this, so this patch reduces the per_bio_data_size to 16 bytes in this case instead of 1096. The dm_bio_details structure was added in "dm cache: fix writes to cache device in writethrough mode" which fixed commit e2e74d617e ("dm cache: fix race in writethrough implementation"). In writeback mode we avoid allocating the writethrough-specific members of the per_bio_data structure (the dm_bio_details structure included). Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-cache-target.c | 47 ++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1ab122a75764..10744091e6ca 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -202,7 +202,11 @@ struct per_bio_data { unsigned req_nr:2; struct dm_deferred_entry *all_io_entry; - /* writethrough fields */ + /* + * writethrough fields. These MUST remain at the end of this + * structure and the 'cache' member must be the first as it + * is used to determine the offsetof the writethrough fields. + */ struct cache *cache; dm_cblock_t cblock; bio_end_io_t *saved_bi_end_io; @@ -515,16 +519,28 @@ static void save_stats(struct cache *cache) /*---------------------------------------------------------------- * Per bio data *--------------------------------------------------------------*/ -static struct per_bio_data *get_per_bio_data(struct bio *bio) + +/* + * If using writeback, leave out struct per_bio_data's writethrough fields. + */ +#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) +#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) + +static size_t get_per_bio_data_size(struct cache *cache) +{ + return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; +} + +static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) { - struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); + struct per_bio_data *pb = dm_per_bio_data(bio, data_size); BUG_ON(!pb); return pb; } -static struct per_bio_data *init_per_bio_data(struct bio *bio) +static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) { - struct per_bio_data *pb = get_per_bio_data(bio); + struct per_bio_data *pb = get_per_bio_data(bio, data_size); pb->tick = false; pb->req_nr = dm_bio_get_target_bio_nr(bio); @@ -558,7 +574,8 @@ static void remap_to_cache(struct cache *cache, struct bio *bio, static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) { unsigned long flags; - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); spin_lock_irqsave(&cache->lock, flags); if (cache->need_tick_bio && @@ -637,7 +654,7 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio) static void writethrough_endio(struct bio *bio, int err) { - struct per_bio_data *pb = get_per_bio_data(bio); + struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); bio->bi_end_io = pb->saved_bi_end_io; if (err) { @@ -665,7 +682,7 @@ static void writethrough_endio(struct bio *bio, int err) static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, dm_oblock_t oblock, dm_cblock_t cblock) { - struct per_bio_data *pb = get_per_bio_data(bio); + struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); pb->cache = cache; pb->cblock = cblock; @@ -1039,7 +1056,8 @@ static void defer_bio(struct cache *cache, struct bio *bio) static void process_flush_bio(struct cache *cache, struct bio *bio) { - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); BUG_ON(bio->bi_size); if (!pb->req_nr) @@ -1111,7 +1129,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, dm_oblock_t block = get_bio_block(cache, bio); struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; struct policy_result lookup_result; - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); bool discarded_block = is_discarded_oblock(cache, block); bool can_migrate = discarded_block || spare_migration_bandwidth(cache); @@ -1885,7 +1904,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) cache->ti = ca->ti; ti->private = cache; - ti->per_bio_data_size = sizeof(struct per_bio_data); ti->num_flush_bios = 2; ti->flush_supported = true; @@ -1894,6 +1912,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->discard_zeroes_data_unsupported = true; memcpy(&cache->features, &ca->features, sizeof(cache->features)); + ti->per_bio_data_size = get_per_bio_data_size(cache); cache->callbacks.congested_fn = cache_is_congested; dm_table_add_target_callbacks(ti->table, &cache->callbacks); @@ -2096,6 +2115,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio) int r; dm_oblock_t block = get_bio_block(cache, bio); + size_t pb_data_size = get_per_bio_data_size(cache); bool can_migrate = false; bool discarded_block; struct dm_bio_prison_cell *cell; @@ -2112,7 +2132,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } - pb = init_per_bio_data(bio); + pb = init_per_bio_data(bio, pb_data_size); if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { defer_bio(cache, bio); @@ -2197,7 +2217,8 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) { struct cache *cache = ti->private; unsigned long flags; - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); if (pb->tick) { policy_tick(cache->policy); -- cgit v1.2.3