summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/request.c5
-rw-r--r--drivers/md/dm-io.c7
-rw-r--r--drivers/md/dm-mpath.c18
-rw-r--r--drivers/md/dm-snap-persistent.c20
-rw-r--r--drivers/md/dm-snap.c5
-rw-r--r--drivers/md/dm-stats.c23
-rw-r--r--drivers/md/dm-thin.c14
-rw-r--r--drivers/md/dm.c71
-rw-r--r--drivers/md/dm.h3
-rw-r--r--drivers/md/md.c5
-rw-r--r--drivers/md/raid1.c1
-rw-r--r--drivers/md/raid10.c1
-rw-r--r--drivers/md/raid5.c20
13 files changed, 157 insertions, 36 deletions
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 71eb233b9ace..2a7f0dd6abab 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -996,10 +996,11 @@ static void request_write(struct cached_dev *dc, struct search *s)
closure_bio_submit(bio, cl, s->d);
} else {
bch_writeback_add(dc);
+ s->op.cache_bio = bio;
if (bio->bi_rw & REQ_FLUSH) {
/* Also need to send a flush to the backing device */
- struct bio *flush = bio_alloc_bioset(0, GFP_NOIO,
+ struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
dc->disk.bio_split);
flush->bi_rw = WRITE_FLUSH;
@@ -1008,8 +1009,6 @@ static void request_write(struct cached_dev *dc, struct search *s)
flush->bi_private = cl;
closure_bio_submit(flush, cl, s->d);
- } else {
- s->op.cache_bio = bio;
}
}
out:
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index ea49834377c8..2a20986a2fec 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -19,8 +19,6 @@
#define DM_MSG_PREFIX "io"
#define DM_IO_MAX_REGIONS BITS_PER_LONG
-#define MIN_IOS 16
-#define MIN_BIOS 16
struct dm_io_client {
mempool_t *pool;
@@ -50,16 +48,17 @@ static struct kmem_cache *_dm_io_cache;
struct dm_io_client *dm_io_client_create(void)
{
struct dm_io_client *client;
+ unsigned min_ios = dm_get_reserved_bio_based_ios();
client = kmalloc(sizeof(*client), GFP_KERNEL);
if (!client)
return ERR_PTR(-ENOMEM);
- client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache);
+ client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache);
if (!client->pool)
goto bad;
- client->bios = bioset_create(MIN_BIOS, 0);
+ client->bios = bioset_create(min_ios, 0);
if (!client->bios)
goto bad;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index b759a127f9c3..de570a558764 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -7,6 +7,7 @@
#include <linux/device-mapper.h>
+#include "dm.h"
#include "dm-path-selector.h"
#include "dm-uevent.h"
@@ -116,8 +117,6 @@ struct dm_mpath_io {
typedef int (*action_fn) (struct pgpath *pgpath);
-#define MIN_IOS 256 /* Mempool size */
-
static struct kmem_cache *_mpio_cache;
static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
@@ -190,6 +189,7 @@ static void free_priority_group(struct priority_group *pg,
static struct multipath *alloc_multipath(struct dm_target *ti)
{
struct multipath *m;
+ unsigned min_ios = dm_get_reserved_rq_based_ios();
m = kzalloc(sizeof(*m), GFP_KERNEL);
if (m) {
@@ -202,7 +202,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
INIT_WORK(&m->trigger_event, trigger_event);
init_waitqueue_head(&m->pg_init_wait);
mutex_init(&m->work_mutex);
- m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
+ m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
if (!m->mpio_pool) {
kfree(m);
return NULL;
@@ -1268,6 +1268,7 @@ static int noretry_error(int error)
case -EREMOTEIO:
case -EILSEQ:
case -ENODATA:
+ case -ENOSPC:
return 1;
}
@@ -1298,8 +1299,17 @@ static int do_end_io(struct multipath *m, struct request *clone,
if (!error && !clone->errors)
return 0; /* I/O complete */
- if (noretry_error(error))
+ if (noretry_error(error)) {
+ if ((clone->cmd_flags & REQ_WRITE_SAME) &&
+ !clone->q->limits.max_write_same_sectors) {
+ struct queue_limits *limits;
+
+ /* device doesn't really support WRITE SAME, disable it */
+ limits = dm_get_queue_limits(dm_table_get_md(m->ti->table));
+ limits->max_write_same_sectors = 0;
+ }
return error;
+ }
if (mpio->pgpath)
fail_path(mpio->pgpath);
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 3ac415675b6c..2d2b1b7588d7 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
*/
INIT_WORK_ONSTACK(&req.work, do_metadata);
queue_work(ps->metadata_wq, &req.work);
- flush_work(&req.work);
+ flush_workqueue(ps->metadata_wq);
return req.result;
}
@@ -269,6 +269,14 @@ static chunk_t area_location(struct pstore *ps, chunk_t area)
return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
}
+static void skip_metadata(struct pstore *ps)
+{
+ uint32_t stride = ps->exceptions_per_area + 1;
+ chunk_t next_free = ps->next_free;
+ if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
+ ps->next_free++;
+}
+
/*
* Read or write a metadata area. Remembering to skip the first
* chunk which holds the header.
@@ -502,6 +510,8 @@ static int read_exceptions(struct pstore *ps,
ps->current_area--;
+ skip_metadata(ps);
+
return 0;
}
@@ -616,8 +626,6 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
struct dm_exception *e)
{
struct pstore *ps = get_info(store);
- uint32_t stride;
- chunk_t next_free;
sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
/* Is there enough room ? */
@@ -630,10 +638,8 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
* Move onto the next free pending, making sure to take
* into account the location of the metadata chunks.
*/
- stride = (ps->exceptions_per_area + 1);
- next_free = ++ps->next_free;
- if (sector_div(next_free, stride) == 1)
- ps->next_free++;
+ ps->next_free++;
+ skip_metadata(ps);
atomic_inc(&ps->pending_count);
return 0;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index c434e5aab2df..aec57d76db5d 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -725,17 +725,16 @@ static int calc_max_buckets(void)
*/
static int init_hash_tables(struct dm_snapshot *s)
{
- sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
+ sector_t hash_size, cow_dev_size, max_buckets;
/*
* Calculate based on the size of the original volume or
* the COW volume...
*/
cow_dev_size = get_dev_size(s->cow->bdev);
- origin_dev_size = get_dev_size(s->origin->bdev);
max_buckets = calc_max_buckets();
- hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
+ hash_size = cow_dev_size >> s->store->chunk_shift;
hash_size = min(hash_size, max_buckets);
if (hash_size < 64)
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 8ae31e8d3d64..3d404c1371ed 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -451,19 +451,26 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
struct dm_stat_percpu *p;
/*
- * For strict correctness we should use local_irq_disable/enable
+ * For strict correctness we should use local_irq_save/restore
* instead of preempt_disable/enable.
*
- * This is racy if the driver finishes bios from non-interrupt
- * context as well as from interrupt context or from more different
- * interrupts.
+ * preempt_disable/enable is racy if the driver finishes bios
+ * from non-interrupt context as well as from interrupt context
+ * or from more different interrupts.
*
- * However, the race only results in not counting some events,
- * so it is acceptable.
+ * On 64-bit architectures the race only results in not counting some
+ * events, so it is acceptable. On 32-bit architectures the race could
+ * cause the counter going off by 2^32, so we need to do proper locking
+ * there.
*
* part_stat_lock()/part_stat_unlock() have this race too.
*/
+#if BITS_PER_LONG == 32
+ unsigned long flags;
+ local_irq_save(flags);
+#else
preempt_disable();
+#endif
p = &s->stat_percpu[smp_processor_id()][entry];
if (!end) {
@@ -478,7 +485,11 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
p->ticks[idx] += duration;
}
+#if BITS_PER_LONG == 32
+ local_irq_restore(flags);
+#else
preempt_enable();
+#endif
}
static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index ed063427d676..2c0cf511ec23 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2095,6 +2095,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
* them down to the data device. The thin device's discard
* processing will cause mappings to be removed from the btree.
*/
+ ti->discard_zeroes_data_unsupported = true;
if (pf.discard_enabled && pf.discard_passdown) {
ti->num_discard_bios = 1;
@@ -2104,7 +2105,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
* thin devices' discard limits consistent).
*/
ti->discards_supported = true;
- ti->discard_zeroes_data_unsupported = true;
}
ti->private = pt;
@@ -2689,8 +2689,16 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
* They get transferred to the live pool in bind_control_target()
* called from pool_preresume().
*/
- if (!pt->adjusted_pf.discard_enabled)
+ if (!pt->adjusted_pf.discard_enabled) {
+ /*
+ * Must explicitly disallow stacking discard limits otherwise the
+ * block layer will stack them if pool's data device has support.
+ * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the
+ * user to see that, so make sure to set all discard limits to 0.
+ */
+ limits->discard_granularity = 0;
return;
+ }
disable_passdown_if_not_supported(pt);
@@ -2826,10 +2834,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
/* In case the pool supports discards, pass them on. */
+ ti->discard_zeroes_data_unsupported = true;
if (tc->pool->pf.discard_enabled) {
ti->discards_supported = true;
ti->num_discard_bios = 1;
- ti->discard_zeroes_data_unsupported = true;
/* Discard bios must be split on a block boundary */
ti->split_discard_bios = true;
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6a5e9ed2fcc3..b3e26c7d1417 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -211,10 +211,55 @@ struct dm_md_mempools {
struct bio_set *bs;
};
-#define MIN_IOS 256
+#define RESERVED_BIO_BASED_IOS 16
+#define RESERVED_REQUEST_BASED_IOS 256
+#define RESERVED_MAX_IOS 1024
static struct kmem_cache *_io_cache;
static struct kmem_cache *_rq_tio_cache;
+/*
+ * Bio-based DM's mempools' reserved IOs set by the user.
+ */
+static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
+
+/*
+ * Request-based DM's mempools' reserved IOs set by the user.
+ */
+static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
+
+static unsigned __dm_get_reserved_ios(unsigned *reserved_ios,
+ unsigned def, unsigned max)
+{
+ unsigned ios = ACCESS_ONCE(*reserved_ios);
+ unsigned modified_ios = 0;
+
+ if (!ios)
+ modified_ios = def;
+ else if (ios > max)
+ modified_ios = max;
+
+ if (modified_ios) {
+ (void)cmpxchg(reserved_ios, ios, modified_ios);
+ ios = modified_ios;
+ }
+
+ return ios;
+}
+
+unsigned dm_get_reserved_bio_based_ios(void)
+{
+ return __dm_get_reserved_ios(&reserved_bio_based_ios,
+ RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
+}
+EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
+
+unsigned dm_get_reserved_rq_based_ios(void)
+{
+ return __dm_get_reserved_ios(&reserved_rq_based_ios,
+ RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
+}
+EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
+
static int __init local_init(void)
{
int r = -ENOMEM;
@@ -2278,6 +2323,17 @@ struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
}
/*
+ * The queue_limits are only valid as long as you have a reference
+ * count on 'md'.
+ */
+struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
+{
+ BUG_ON(!atomic_read(&md->holders));
+ return &md->queue->limits;
+}
+EXPORT_SYMBOL_GPL(dm_get_queue_limits);
+
+/*
* Fully initialize a request-based queue (->elevator, ->request_fn, etc).
*/
static int dm_init_request_based_queue(struct mapped_device *md)
@@ -2862,18 +2918,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
if (type == DM_TYPE_BIO_BASED) {
cachep = _io_cache;
- pool_size = 16;
+ pool_size = dm_get_reserved_bio_based_ios();
front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
} else if (type == DM_TYPE_REQUEST_BASED) {
cachep = _rq_tio_cache;
- pool_size = MIN_IOS;
+ pool_size = dm_get_reserved_rq_based_ios();
front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
/* per_bio_data_size is not used. See __bind_mempools(). */
WARN_ON(per_bio_data_size != 0);
} else
goto out;
- pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep);
+ pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
if (!pools->io_pool)
goto out;
@@ -2924,6 +2980,13 @@ module_exit(dm_exit);
module_param(major, uint, 0);
MODULE_PARM_DESC(major, "The major number of the device mapper");
+
+module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
+
+module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
+
MODULE_DESCRIPTION(DM_NAME " driver");
MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 5e604cc7b4aa..1d1ad7b7e527 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -184,6 +184,9 @@ void dm_free_md_mempools(struct dm_md_mempools *pools);
/*
* Helpers that are used by DM core
*/
+unsigned dm_get_reserved_bio_based_ios(void);
+unsigned dm_get_reserved_rq_based_ios(void);
+
static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
{
return !maxlen || strlen(result) + 1 >= maxlen;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index adf4d7e1d5e1..561a65f82e26 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8111,6 +8111,7 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
u64 *p;
int lo, hi;
int rv = 1;
+ unsigned long flags;
if (bb->shift < 0)
/* badblocks are disabled */
@@ -8125,7 +8126,7 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
sectors = next - s;
}
- write_seqlock_irq(&bb->lock);
+ write_seqlock_irqsave(&bb->lock, flags);
p = bb->page;
lo = 0;
@@ -8241,7 +8242,7 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
bb->changed = 1;
if (!acknowledged)
bb->unacked_exist = 1;
- write_sequnlock_irq(&bb->lock);
+ write_sequnlock_irqrestore(&bb->lock, flags);
return rv;
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index d60412c7f995..aacf6bf352d8 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1479,6 +1479,7 @@ static int raid1_spare_active(struct mddev *mddev)
}
}
if (rdev
+ && rdev->recovery_offset == MaxSector
&& !test_bit(Faulty, &rdev->flags)
&& !test_and_set_bit(In_sync, &rdev->flags)) {
count++;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index df7b0a06b0ea..73dc8a377522 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1782,6 +1782,7 @@ static int raid10_spare_active(struct mddev *mddev)
}
sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
} else if (tmp->rdev
+ && tmp->rdev->recovery_offset == MaxSector
&& !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
count++;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7ff4f252ca1a..f8b906843926 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -778,6 +778,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
+ /*
+ * If this is discard request, set bi_vcnt 0. We don't
+ * want to confuse SCSI because SCSI will replace payload
+ */
+ if (rw & REQ_DISCARD)
+ bi->bi_vcnt = 0;
if (rrdev)
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
@@ -816,6 +822,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
rbi->bi_io_vec[0].bv_offset = 0;
rbi->bi_size = STRIPE_SIZE;
+ /*
+ * If this is discard request, set bi_vcnt 0. We don't
+ * want to confuse SCSI because SCSI will replace payload
+ */
+ if (rw & REQ_DISCARD)
+ rbi->bi_vcnt = 0;
if (conf->mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
rbi, disk_devt(conf->mddev->gendisk),
@@ -2910,6 +2922,14 @@ static void handle_stripe_clean_event(struct r5conf *conf,
}
/* now that discard is done we can proceed with any sync */
clear_bit(STRIPE_DISCARD, &sh->state);
+ /*
+ * SCSI discard will change some bio fields and the stripe has
+ * no updated data, so remove it from hash list and the stripe
+ * will be reinitialized
+ */
+ spin_lock_irq(&conf->device_lock);
+ remove_hash(sh);
+ spin_unlock_irq(&conf->device_lock);
if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
set_bit(STRIPE_HANDLE, &sh->state);