diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-09-28 10:00:01 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-09-28 10:00:01 -0700 | 
| commit | c3a086e638e0cf76b26f6053a096354980af9396 (patch) | |
| tree | c5408d74db0ebc5a7a6138cf68f4aaeb41c67be5 | |
| parent | 99a1300e1d84709f419182bb5189760e78234882 (diff) | |
| parent | 1d55f6bcc0331d744cd5b56c4ee79e3809438161 (diff) | |
Merge tag 'dm-3.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm
Pull dm fixes from Alasdair G Kergon:
 "A few fixes for problems discovered during the 3.6 cycle.
  Of particular note, are fixes to the thin target's discard support,
  which I hope is finally working correctly; and fixes for multipath
  ioctls and device limits when there are no paths."
* tag 'dm-3.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm:
  dm verity: fix overflow check
  dm thin: fix discard support for data devices
  dm thin: tidy discard support
  dm: retain table limits when swapping to new table with no devices
  dm table: clear add_random unless all devices have it set
  dm: handle requests beyond end of device instead of using BUG_ON
  dm mpath: only retry ioctl when no paths if queue_if_no_path set
  dm thin: do not set discard_zeroes_data
| -rw-r--r-- | drivers/md/dm-mpath.c | 11 | ||||
| -rw-r--r-- | drivers/md/dm-table.c | 61 | ||||
| -rw-r--r-- | drivers/md/dm-thin.c | 135 | ||||
| -rw-r--r-- | drivers/md/dm-verity.c | 8 | ||||
| -rw-r--r-- | drivers/md/dm.c | 71 | ||||
| -rw-r--r-- | drivers/md/dm.h | 1 | 
6 files changed, 209 insertions, 78 deletions
| diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d8abb90a6c2f..034233eefc82 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1555,6 +1555,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,  			   unsigned long arg)  {  	struct multipath *m = ti->private; +	struct pgpath *pgpath;  	struct block_device *bdev;  	fmode_t mode;  	unsigned long flags; @@ -1570,12 +1571,14 @@ again:  	if (!m->current_pgpath)  		__choose_pgpath(m, 0); -	if (m->current_pgpath) { -		bdev = m->current_pgpath->path.dev->bdev; -		mode = m->current_pgpath->path.dev->mode; +	pgpath = m->current_pgpath; + +	if (pgpath) { +		bdev = pgpath->path.dev->bdev; +		mode = pgpath->path.dev->mode;  	} -	if (m->queue_io) +	if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path))  		r = -EAGAIN;  	else if (!bdev)  		r = -EIO; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f90069029aae..100368eb7991 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1212,6 +1212,41 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)  	return &t->targets[(KEYS_PER_NODE * n) + k];  } +static int count_device(struct dm_target *ti, struct dm_dev *dev, +			sector_t start, sector_t len, void *data) +{ +	unsigned *num_devices = data; + +	(*num_devices)++; + +	return 0; +} + +/* + * Check whether a table has no data devices attached using each + * target's iterate_devices method. + * Returns false if the result is unknown because a target doesn't + * support iterate_devices. + */ +bool dm_table_has_no_data_devices(struct dm_table *table) +{ +	struct dm_target *uninitialized_var(ti); +	unsigned i = 0, num_devices = 0; + +	while (i < dm_table_get_num_targets(table)) { +		ti = dm_table_get_target(table, i++); + +		if (!ti->type->iterate_devices) +			return false; + +		ti->type->iterate_devices(ti, count_device, &num_devices); +		if (num_devices) +			return false; +	} + +	return true; +} +  /*   * Establish the new table's queue_limits and validate them.   */ @@ -1354,17 +1389,25 @@ static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,  	return q && blk_queue_nonrot(q);  } -static bool dm_table_is_nonrot(struct dm_table *t) +static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, +			     sector_t start, sector_t len, void *data) +{ +	struct request_queue *q = bdev_get_queue(dev->bdev); + +	return q && !blk_queue_add_random(q); +} + +static bool dm_table_all_devices_attribute(struct dm_table *t, +					   iterate_devices_callout_fn func)  {  	struct dm_target *ti;  	unsigned i = 0; -	/* Ensure that all underlying device are non-rotational. */  	while (i < dm_table_get_num_targets(t)) {  		ti = dm_table_get_target(t, i++);  		if (!ti->type->iterate_devices || -		    !ti->type->iterate_devices(ti, device_is_nonrot, NULL)) +		    !ti->type->iterate_devices(ti, func, NULL))  			return 0;  	} @@ -1396,7 +1439,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,  	if (!dm_table_discard_zeroes_data(t))  		q->limits.discard_zeroes_data = 0; -	if (dm_table_is_nonrot(t)) +	/* Ensure that all underlying devices are non-rotational. */ +	if (dm_table_all_devices_attribute(t, device_is_nonrot))  		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);  	else  		queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); @@ -1404,6 +1448,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,  	dm_table_set_integrity(t);  	/* +	 * Determine whether or not this queue's I/O timings contribute +	 * to the entropy pool, Only request-based targets use this. +	 * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not +	 * have it set. +	 */ +	if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random)) +		queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); + +	/*  	 * QUEUE_FLAG_STACKABLE must be set after all queue settings are  	 * visible to other CPUs because, once the flag is set, incoming bios  	 * are processed by request-based dm, which refers to the queue diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index af1fc3b2c2ad..c29410af1e22 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -509,9 +509,9 @@ enum pool_mode {  struct pool_features {  	enum pool_mode mode; -	unsigned zero_new_blocks:1; -	unsigned discard_enabled:1; -	unsigned discard_passdown:1; +	bool zero_new_blocks:1; +	bool discard_enabled:1; +	bool discard_passdown:1;  };  struct thin_c; @@ -580,7 +580,8 @@ struct pool_c {  	struct dm_target_callbacks callbacks;  	dm_block_t low_water_blocks; -	struct pool_features pf; +	struct pool_features requested_pf; /* Features requested during table load */ +	struct pool_features adjusted_pf;  /* Features used after adjusting for constituent devices */  };  /* @@ -1839,6 +1840,47 @@ static void __requeue_bios(struct pool *pool)  /*----------------------------------------------------------------   * Binding of control targets to a pool object   *--------------------------------------------------------------*/ +static bool data_dev_supports_discard(struct pool_c *pt) +{ +	struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); + +	return q && blk_queue_discard(q); +} + +/* + * If discard_passdown was enabled verify that the data device + * supports discards.  Disable discard_passdown if not. + */ +static void disable_passdown_if_not_supported(struct pool_c *pt) +{ +	struct pool *pool = pt->pool; +	struct block_device *data_bdev = pt->data_dev->bdev; +	struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits; +	sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT; +	const char *reason = NULL; +	char buf[BDEVNAME_SIZE]; + +	if (!pt->adjusted_pf.discard_passdown) +		return; + +	if (!data_dev_supports_discard(pt)) +		reason = "discard unsupported"; + +	else if (data_limits->max_discard_sectors < pool->sectors_per_block) +		reason = "max discard sectors smaller than a block"; + +	else if (data_limits->discard_granularity > block_size) +		reason = "discard granularity larger than a block"; + +	else if (block_size & (data_limits->discard_granularity - 1)) +		reason = "discard granularity not a factor of block size"; + +	if (reason) { +		DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason); +		pt->adjusted_pf.discard_passdown = false; +	} +} +  static int bind_control_target(struct pool *pool, struct dm_target *ti)  {  	struct pool_c *pt = ti->private; @@ -1847,31 +1889,16 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)  	 * We want to make sure that degraded pools are never upgraded.  	 */  	enum pool_mode old_mode = pool->pf.mode; -	enum pool_mode new_mode = pt->pf.mode; +	enum pool_mode new_mode = pt->adjusted_pf.mode;  	if (old_mode > new_mode)  		new_mode = old_mode;  	pool->ti = ti;  	pool->low_water_blocks = pt->low_water_blocks; -	pool->pf = pt->pf; -	set_pool_mode(pool, new_mode); +	pool->pf = pt->adjusted_pf; -	/* -	 * If discard_passdown was enabled verify that the data device -	 * supports discards.  Disable discard_passdown if not; otherwise -	 * -EOPNOTSUPP will be returned. -	 */ -	/* FIXME: pull this out into a sep fn. */ -	if (pt->pf.discard_passdown) { -		struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); -		if (!q || !blk_queue_discard(q)) { -			char buf[BDEVNAME_SIZE]; -			DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.", -			       bdevname(pt->data_dev->bdev, buf)); -			pool->pf.discard_passdown = 0; -		} -	} +	set_pool_mode(pool, new_mode);  	return 0;  } @@ -1889,9 +1916,9 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti)  static void pool_features_init(struct pool_features *pf)  {  	pf->mode = PM_WRITE; -	pf->zero_new_blocks = 1; -	pf->discard_enabled = 1; -	pf->discard_passdown = 1; +	pf->zero_new_blocks = true; +	pf->discard_enabled = true; +	pf->discard_passdown = true;  }  static void __pool_destroy(struct pool *pool) @@ -2119,13 +2146,13 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,  		argc--;  		if (!strcasecmp(arg_name, "skip_block_zeroing")) -			pf->zero_new_blocks = 0; +			pf->zero_new_blocks = false;  		else if (!strcasecmp(arg_name, "ignore_discard")) -			pf->discard_enabled = 0; +			pf->discard_enabled = false;  		else if (!strcasecmp(arg_name, "no_discard_passdown")) -			pf->discard_passdown = 0; +			pf->discard_passdown = false;  		else if (!strcasecmp(arg_name, "read_only"))  			pf->mode = PM_READ_ONLY; @@ -2259,8 +2286,9 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)  	pt->metadata_dev = metadata_dev;  	pt->data_dev = data_dev;  	pt->low_water_blocks = low_water_blocks; -	pt->pf = pf; +	pt->adjusted_pf = pt->requested_pf = pf;  	ti->num_flush_requests = 1; +  	/*  	 * Only need to enable discards if the pool should pass  	 * them down to the data device.  The thin device's discard @@ -2268,12 +2296,14 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)  	 */  	if (pf.discard_enabled && pf.discard_passdown) {  		ti->num_discard_requests = 1; +  		/*  		 * Setting 'discards_supported' circumvents the normal  		 * stacking of discard limits (this keeps the pool and  		 * thin devices' discard limits consistent).  		 */  		ti->discards_supported = true; +		ti->discard_zeroes_data_unsupported = true;  	}  	ti->private = pt; @@ -2703,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,  		       format_dev_t(buf2, pt->data_dev->bdev->bd_dev),  		       (unsigned long)pool->sectors_per_block,  		       (unsigned long long)pt->low_water_blocks); -		emit_flags(&pt->pf, result, sz, maxlen); +		emit_flags(&pt->requested_pf, result, sz, maxlen);  		break;  	} @@ -2732,20 +2762,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,  	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));  } -static void set_discard_limits(struct pool *pool, struct queue_limits *limits) +static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)  { -	/* -	 * FIXME: these limits may be incompatible with the pool's data device -	 */ +	struct pool *pool = pt->pool; +	struct queue_limits *data_limits; +  	limits->max_discard_sectors = pool->sectors_per_block;  	/* -	 * This is just a hint, and not enforced.  We have to cope with -	 * bios that cover a block partially.  A discard that spans a block -	 * boundary is not sent to this target. +	 * discard_granularity is just a hint, and not enforced.  	 */ -	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; -	limits->discard_zeroes_data = pool->pf.zero_new_blocks; +	if (pt->adjusted_pf.discard_passdown) { +		data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; +		limits->discard_granularity = data_limits->discard_granularity; +	} else +		limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;  }  static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -2755,15 +2786,25 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)  	blk_limits_io_min(limits, 0);  	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); -	if (pool->pf.discard_enabled) -		set_discard_limits(pool, limits); + +	/* +	 * pt->adjusted_pf is a staging area for the actual features to use. +	 * They get transferred to the live pool in bind_control_target() +	 * called from pool_preresume(). +	 */ +	if (!pt->adjusted_pf.discard_enabled) +		return; + +	disable_passdown_if_not_supported(pt); + +	set_discard_limits(pt, limits);  }  static struct target_type pool_target = {  	.name = "thin-pool",  	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |  		    DM_TARGET_IMMUTABLE, -	.version = {1, 3, 0}, +	.version = {1, 4, 0},  	.module = THIS_MODULE,  	.ctr = pool_ctr,  	.dtr = pool_dtr, @@ -3042,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti,  	return 0;  } +/* + * A thin device always inherits its queue limits from its pool. + */  static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)  {  	struct thin_c *tc = ti->private; -	struct pool *pool = tc->pool; -	blk_limits_io_min(limits, 0); -	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); -	set_discard_limits(pool, limits); +	*limits = bdev_get_queue(tc->pool_dev->bdev)->limits;  }  static struct target_type thin_target = {  	.name = "thin", -	.version = {1, 3, 0}, +	.version = {1, 4, 0},  	.module	= THIS_MODULE,  	.ctr = thin_ctr,  	.dtr = thin_dtr, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 254d19268ad2..892ae2766aa6 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -718,8 +718,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)  	v->hash_dev_block_bits = ffs(num) - 1;  	if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 || -	    num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) != -	    (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) { +	    (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) +	    >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {  		ti->error = "Invalid data blocks";  		r = -EINVAL;  		goto bad; @@ -733,8 +733,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)  	}  	if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 || -	    num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) != -	    (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) { +	    (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) +	    >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {  		ti->error = "Invalid hash start";  		r = -EINVAL;  		goto bad; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4e09b6ff5b49..67ffa391edcf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -865,10 +865,14 @@ static void dm_done(struct request *clone, int error, bool mapped)  {  	int r = error;  	struct dm_rq_target_io *tio = clone->end_io_data; -	dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; +	dm_request_endio_fn rq_end_io = NULL; -	if (mapped && rq_end_io) -		r = rq_end_io(tio->ti, clone, error, &tio->info); +	if (tio->ti) { +		rq_end_io = tio->ti->type->rq_end_io; + +		if (mapped && rq_end_io) +			r = rq_end_io(tio->ti, clone, error, &tio->info); +	}  	if (r <= 0)  		/* The target wants to complete the I/O */ @@ -1588,15 +1592,6 @@ static int map_request(struct dm_target *ti, struct request *clone,  	int r, requeued = 0;  	struct dm_rq_target_io *tio = clone->end_io_data; -	/* -	 * Hold the md reference here for the in-flight I/O. -	 * We can't rely on the reference count by device opener, -	 * because the device may be closed during the request completion -	 * when all bios are completed. -	 * See the comment in rq_completed() too. -	 */ -	dm_get(md); -  	tio->ti = ti;  	r = ti->type->map_rq(ti, clone, &tio->info);  	switch (r) { @@ -1628,6 +1623,26 @@ static int map_request(struct dm_target *ti, struct request *clone,  	return requeued;  } +static struct request *dm_start_request(struct mapped_device *md, struct request *orig) +{ +	struct request *clone; + +	blk_start_request(orig); +	clone = orig->special; +	atomic_inc(&md->pending[rq_data_dir(clone)]); + +	/* +	 * Hold the md reference here for the in-flight I/O. +	 * We can't rely on the reference count by device opener, +	 * because the device may be closed during the request completion +	 * when all bios are completed. +	 * See the comment in rq_completed() too. +	 */ +	dm_get(md); + +	return clone; +} +  /*   * q->request_fn for request-based dm.   * Called with the queue lock held. @@ -1657,14 +1672,21 @@ static void dm_request_fn(struct request_queue *q)  			pos = blk_rq_pos(rq);  		ti = dm_table_find_target(map, pos); -		BUG_ON(!dm_target_is_valid(ti)); +		if (!dm_target_is_valid(ti)) { +			/* +			 * Must perform setup, that dm_done() requires, +			 * before calling dm_kill_unmapped_request +			 */ +			DMERR_LIMIT("request attempted access beyond the end of device"); +			clone = dm_start_request(md, rq); +			dm_kill_unmapped_request(clone, -EIO); +			continue; +		}  		if (ti->type->busy && ti->type->busy(ti))  			goto delay_and_out; -		blk_start_request(rq); -		clone = rq->special; -		atomic_inc(&md->pending[rq_data_dir(clone)]); +		clone = dm_start_request(md, rq);  		spin_unlock(q->queue_lock);  		if (map_request(ti, clone, md)) @@ -1684,8 +1706,6 @@ delay_and_out:  	blk_delay_queue(q, HZ / 10);  out:  	dm_table_put(map); - -	return;  }  int dm_underlying_device_busy(struct request_queue *q) @@ -2409,7 +2429,7 @@ static void dm_queue_flush(struct mapped_device *md)   */  struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)  { -	struct dm_table *map = ERR_PTR(-EINVAL); +	struct dm_table *live_map, *map = ERR_PTR(-EINVAL);  	struct queue_limits limits;  	int r; @@ -2419,6 +2439,19 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)  	if (!dm_suspended_md(md))  		goto out; +	/* +	 * If the new table has no data devices, retain the existing limits. +	 * This helps multipath with queue_if_no_path if all paths disappear, +	 * then new I/O is queued based on these limits, and then some paths +	 * reappear. +	 */ +	if (dm_table_has_no_data_devices(table)) { +		live_map = dm_get_live_table(md); +		if (live_map) +			limits = md->queue->limits; +		dm_table_put(live_map); +	} +  	r = dm_calculate_queue_limits(table, &limits);  	if (r) {  		map = ERR_PTR(r); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 52eef493d266..6a99fefaa743 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -54,6 +54,7 @@ void dm_table_event_callback(struct dm_table *t,  			     void (*fn)(void *), void *context);  struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);  struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); +bool dm_table_has_no_data_devices(struct dm_table *table);  int dm_calculate_queue_limits(struct dm_table *table,  			      struct queue_limits *limits);  void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, | 
