diff options
author | NeilBrown <neilb@suse.com> | 2017-06-05 16:49:39 +1000 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2017-06-13 10:18:01 -0700 |
commit | cc27b0c78c79680d128dbac79de0d40556d041bb (patch) | |
tree | 1f32d4acd29d825eecc1aef90d423cdacea8121c /drivers/md/raid5.c | |
parent | 63f700aab4c11d46626de3cd051dae56cf7e9056 (diff) |
md: fix deadlock between mddev_suspend() and md_write_start()
If mddev_suspend() races with md_write_start() we can deadlock
with mddev_suspend() waiting for the request that is currently
in md_write_start() to complete the ->make_request() call,
and md_write_start() waiting for the metadata to be updated
to mark the array as 'dirty'.
As metadata updates done by md_check_recovery() only happen then
the mddev_lock() can be claimed, and as mddev_suspend() is often
called with the lock held, these threads wait indefinitely for each
other.
We fix this by having md_write_start() abort if mddev_suspend()
is happening, and ->make_request() aborts if md_write_start()
aborted.
md_make_request() can detect this abort, decrease the ->active_io
count, and wait for mddev_suspend().
Reported-by: Nix <nix@esperi.org.uk>
Fix: 68866e425be2(MD: no sync IO while suspended)
Cc: stable@vger.kernel.org
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 17 |
1 files changed, 9 insertions, 8 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ec0f951ae19f..b218a42fd702 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5479,7 +5479,6 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9); bi->bi_next = NULL; - md_write_start(mddev, bi); stripe_sectors = conf->chunk_sectors * (conf->raid_disks - conf->max_degraded); @@ -5549,11 +5548,10 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) release_stripe_plug(mddev, sh); } - md_write_end(mddev); bio_endio(bi); } -static void raid5_make_request(struct mddev *mddev, struct bio * bi) +static bool raid5_make_request(struct mddev *mddev, struct bio * bi) { struct r5conf *conf = mddev->private; int dd_idx; @@ -5569,10 +5567,10 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) int ret = r5l_handle_flush_request(conf->log, bi); if (ret == 0) - return; + return true; if (ret == -ENODEV) { md_flush_request(mddev, bi); - return; + return true; } /* ret == -EAGAIN, fallback */ /* @@ -5582,6 +5580,8 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) do_flush = bi->bi_opf & REQ_PREFLUSH; } + if (!md_write_start(mddev, bi)) + return false; /* * If array is degraded, better not do chunk aligned read because * later we might have to read it again in order to reconstruct @@ -5591,18 +5591,18 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) mddev->reshape_position == MaxSector) { bi = chunk_aligned_read(mddev, bi); if (!bi) - return; + return true; } if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) { make_discard_request(mddev, bi); - return; + md_write_end(mddev); + return true; } logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1); last_sector = bio_end_sector(bi); bi->bi_next = NULL; - md_write_start(mddev, bi); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { @@ -5740,6 +5740,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) if (rw == WRITE) md_write_end(mddev); bio_endio(bi); + return true; } static sector_t raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks); |