From 48e70bc18ac81881dedd3aa327c55b924fc41ecf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Apr 2009 08:19:27 +0200 Subject: Document and move the various READ/WRITE types It's a somewhat twisty maze of hints and behavioural modifiers, try and clear it up a bit with some documentation. Signed-off-by: Jens Axboe --- include/linux/fs.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 562d2855cf30..b535aec4406b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -87,6 +87,60 @@ struct inodes_stat_t { */ #define FMODE_NOCMTIME ((__force fmode_t)2048) +/* + * The below are the various read and write types that we support. Some of + * them include behavioral modifiers that send information down to the + * block layer and IO scheduler. Terminology: + * + * The block layer uses device plugging to defer IO a little bit, in + * the hope that we will see more IO very shortly. This increases + * coalescing of adjacent IO and thus reduces the number of IOs we + * have to send to the device. It also allows for better queuing, + * if the IO isn't mergeable. If the caller is going to be waiting + * for the IO, then he must ensure that the device is unplugged so + * that the IO is dispatched to the driver. + * + * All IO is handled async in Linux. This is fine for background + * writes, but for reads or writes that someone waits for completion + * on, we want to notify the block layer and IO scheduler so that they + * know about it. That allows them to make better scheduling + * decisions. So when the below references 'sync' and 'async', it + * is referencing this priority hint. + * + * With that in mind, the available types are: + * + * READ A normal read operation. Device will be plugged. + * READ_SYNC A synchronous read. Device is not plugged, caller can + * immediately wait on this read without caring about + * unplugging. + * READA Used for read-ahead operations. Lower priority, and the + * block layer could (in theory) choose to ignore this + * request if it runs into resource problems. + * WRITE A normal async write. Device will be plugged. + * SWRITE Like WRITE, but a special case for ll_rw_block() that + * tells it to lock the buffer first. Normally a buffer + * must be locked before doing IO. + * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down + * the hint that someone will be waiting on this IO + * shortly. The device must still be unplugged explicitly, + * WRITE_SYNC_PLUG does not do this as we could be + * submitting more writes before we actually wait on any + * of them. + * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device + * immediately after submission. The write equivalent + * of READ_SYNC. + * WRITE_ODIRECT Special case write for O_DIRECT only. + * SWRITE_SYNC + * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. + * See SWRITE. + * WRITE_BARRIER Like WRITE, but tells the block layer that all + * previously submitted writes must be safely on storage + * before this one is started. Also guarantees that when + * this write is complete, it itself is also safely on + * storage. Prevents reordering of writes on both sides + * of this IO. + * + */ #define RW_MASK 1 #define RWA_MASK 2 #define READ 0 @@ -102,6 +156,11 @@ struct inodes_stat_t { (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) + +/* + * These aren't really reads or writes, they pass down information about + * parts of device that are now unused by the file system. + */ #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) -- cgit v1.2.3 From f8cc774ce4844811a55e2352f1443055e3994e28 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:40 +0200 Subject: splice: remove generic_file_splice_write_nolock() Remove the now unused generic_file_splice_write_nolock() function. It's conceptually broken anyway, because splice may need to wait for pipe events so holding locks across the whole operation is wrong. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index b535aec4406b..907d8f56c6fa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2209,8 +2209,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); -extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *, - struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags); extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, -- cgit v1.2.3 From 61e0d47c33cc371f725bcda4a47ae0efe652dba8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:41 +0200 Subject: splice: add helpers for locking pipe inode There are lots of sequences like this, especially in splice code: if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); /* do something */ if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); so introduce helpers which do the conditional locking and unlocking. Also replace the inode_double_lock() call with a pipe_double_lock() helper to avoid spreading the use of this functionality beyond the pipe code. This patch is just a cleanup, and should cause no behavioral changes. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/fs.h') diff --git a/include/linux/fs.h b/include/linux/fs.h index 907d8f56c6fa..e766be0d4329 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -797,9 +797,6 @@ enum inode_i_mutex_lock_class I_MUTEX_QUOTA }; -extern void inode_double_lock(struct inode *inode1, struct inode *inode2); -extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); - /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic -- cgit v1.2.3