summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/direct-io.c90
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c2
-rw-r--r--mm/filemap.c9
3 files changed, 39 insertions, 62 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f11f05dc9e61..71f4aeac7632 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -226,6 +226,15 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
{
ssize_t transferred = 0;
+ /*
+ * AIO submission can race with bio completion to get here while
+ * expecting to have the last io completed by bio completion.
+ * In that case -EIOCBQUEUED is in fact not an error we want
+ * to preserve through this call.
+ */
+ if (ret == -EIOCBQUEUED)
+ ret = 0;
+
if (dio->result) {
transferred = dio->result;
@@ -251,24 +260,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
return ret;
}
-/*
- * Called when a BIO has been processed. If the count goes to zero then IO is
- * complete and we can signal this to the AIO layer.
- */
-static void dio_complete_aio(struct dio *dio)
-{
- int ret;
-
- ret = dio_complete(dio, dio->iocb->ki_pos, 0);
-
- /* Complete AIO later if falling back to buffered i/o */
- if (dio->result == dio->size ||
- ((dio->rw == READ) && dio->result)) {
- aio_complete(dio->iocb, ret, 0);
- kfree(dio);
- }
-}
-
static int dio_bio_complete(struct dio *dio, struct bio *bio);
/*
* Asynchronous IO callback.
@@ -290,8 +281,11 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
if (remaining == 1 && waiter_holds_ref)
wake_up_process(dio->waiter);
- if (remaining == 0)
- dio_complete_aio(dio);
+ if (remaining == 0) {
+ int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
+ aio_complete(dio->iocb, ret, 0);
+ kfree(dio);
+ }
return 0;
}
@@ -1082,47 +1076,33 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
mutex_unlock(&dio->inode->i_mutex);
/*
- * OK, all BIOs are submitted, so we can decrement bio_count to truly
- * reflect the number of to-be-processed BIOs.
+ * The only time we want to leave bios in flight is when a successful
+ * partial aio read or full aio write have been setup. In that case
+ * bio completion will call aio_complete. The only time it's safe to
+ * call aio_complete is when we return -EIOCBQUEUED, so we key on that.
+ * This had *better* be the only place that raises -EIOCBQUEUED.
*/
- if (dio->is_async) {
- int should_wait = 0;
-
- if (dio->result < dio->size && (rw & WRITE)) {
- dio->waiter = current;
- should_wait = 1;
- }
- if (ret == 0)
- ret = dio->result;
-
- if (should_wait)
- dio_await_completion(dio);
-
- /* this can free the dio */
- if (atomic_dec_and_test(&dio->refcount))
- dio_complete_aio(dio);
+ BUG_ON(ret == -EIOCBQUEUED);
+ if (dio->is_async && ret == 0 && dio->result &&
+ ((rw & READ) || (dio->result == dio->size)))
+ ret = -EIOCBQUEUED;
- if (should_wait)
- kfree(dio);
- } else {
+ if (ret != -EIOCBQUEUED)
dio_await_completion(dio);
+ /*
+ * Sync will always be dropping the final ref and completing the
+ * operation. AIO can if it was a broken operation described above
+ * or in fact if all the bios race to complete before we get here.
+ * In that case dio_complete() translates the EIOCBQUEUED into
+ * the proper return code that the caller will hand to aio_complete().
+ */
+ if (atomic_dec_and_test(&dio->refcount)) {
ret = dio_complete(dio, offset, ret);
+ kfree(dio);
+ } else
+ BUG_ON(ret != -EIOCBQUEUED);
- /* We could have also come here on an AIO file extend */
- if (!is_sync_kiocb(iocb) && (rw & WRITE) &&
- ret >= 0 && dio->result == dio->size)
- /*
- * For AIO writes where we have completed the
- * i/o, we have to mark the the aio complete.
- */
- aio_complete(iocb, ret, 0);
-
- if (atomic_dec_and_test(&dio->refcount))
- kfree(dio);
- else
- BUG();
- }
return ret;
}
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 8e6b56fc1cad..b56eb754e2d2 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1406,7 +1406,7 @@ xfs_vm_direct_IO(
xfs_end_io_direct);
}
- if (unlikely(ret <= 0 && iocb->private))
+ if (unlikely(ret != -EIOCBQUEUED && iocb->private))
xfs_destroy_ioend(iocb->private);
return ret;
}
diff --git a/mm/filemap.c b/mm/filemap.c
index 606432f71b3a..8332c77b1bd1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1181,8 +1181,6 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
if (pos < size) {
retval = generic_file_direct_IO(READ, iocb,
iov, pos, nr_segs);
- if (retval > 0 && !is_sync_kiocb(iocb))
- retval = -EIOCBQUEUED;
if (retval > 0)
*ppos = pos + retval;
}
@@ -2047,15 +2045,14 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
* Sync the fs metadata but not the minor inode changes and
* of course not the data as we did direct DMA for the IO.
* i_mutex is held, which protects generic_osync_inode() from
- * livelocking.
+ * livelocking. AIO O_DIRECT ops attempt to sync metadata here.
*/
- if (written >= 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
+ if ((written >= 0 || written == -EIOCBQUEUED) &&
+ ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err = generic_osync_inode(inode, mapping, OSYNC_METADATA);
if (err < 0)
written = err;
}
- if (written == count && !is_sync_kiocb(iocb))
- written = -EIOCBQUEUED;
return written;
}
EXPORT_SYMBOL(generic_file_direct_write);