summaryrefslogtreecommitdiff
path: root/fs/splice.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/splice.c')
-rw-r--r--fs/splice.c338
1 files changed, 318 insertions, 20 deletions
diff --git a/fs/splice.c b/fs/splice.c
index 666953d59a35..73766d24f97b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -507,9 +507,131 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
return ret;
}
-
EXPORT_SYMBOL(generic_file_splice_read);
+static const struct pipe_buf_operations default_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = generic_pipe_buf_release,
+ .steal = generic_pipe_buf_steal,
+ .get = generic_pipe_buf_get,
+};
+
+static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+ unsigned long vlen, loff_t offset)
+{
+ mm_segment_t old_fs;
+ loff_t pos = offset;
+ ssize_t res;
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ /* The cast to a user pointer is valid due to the set_fs() */
+ res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
+ set_fs(old_fs);
+
+ return res;
+}
+
+static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
+ loff_t pos)
+{
+ mm_segment_t old_fs;
+ ssize_t res;
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ /* The cast to a user pointer is valid due to the set_fs() */
+ res = vfs_write(file, (const char __user *)buf, count, &pos);
+ set_fs(old_fs);
+
+ return res;
+}
+
+ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ unsigned int nr_pages;
+ unsigned int nr_freed;
+ size_t offset;
+ struct page *pages[PIPE_BUFFERS];
+ struct partial_page partial[PIPE_BUFFERS];
+ struct iovec vec[PIPE_BUFFERS];
+ pgoff_t index;
+ ssize_t res;
+ size_t this_len;
+ int error;
+ int i;
+ struct splice_pipe_desc spd = {
+ .pages = pages,
+ .partial = partial,
+ .flags = flags,
+ .ops = &default_pipe_buf_ops,
+ .spd_release = spd_release_page,
+ };
+
+ index = *ppos >> PAGE_CACHE_SHIFT;
+ offset = *ppos & ~PAGE_CACHE_MASK;
+ nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
+ struct page *page;
+
+ page = alloc_page(GFP_USER);
+ error = -ENOMEM;
+ if (!page)
+ goto err;
+
+ this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
+ vec[i].iov_base = (void __user *) page_address(page);
+ vec[i].iov_len = this_len;
+ pages[i] = page;
+ spd.nr_pages++;
+ len -= this_len;
+ offset = 0;
+ }
+
+ res = kernel_readv(in, vec, spd.nr_pages, *ppos);
+ if (res < 0) {
+ error = res;
+ goto err;
+ }
+
+ error = 0;
+ if (!res)
+ goto err;
+
+ nr_freed = 0;
+ for (i = 0; i < spd.nr_pages; i++) {
+ this_len = min_t(size_t, vec[i].iov_len, res);
+ partial[i].offset = 0;
+ partial[i].len = this_len;
+ if (!this_len) {
+ __free_page(pages[i]);
+ pages[i] = NULL;
+ nr_freed++;
+ }
+ res -= this_len;
+ }
+ spd.nr_pages -= nr_freed;
+
+ res = splice_to_pipe(pipe, &spd);
+ if (res > 0)
+ *ppos += res;
+
+ return res;
+
+err:
+ for (i = 0; i < spd.nr_pages; i++)
+ __free_page(pages[i]);
+
+ return error;
+}
+EXPORT_SYMBOL(default_file_splice_read);
+
/*
* Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
* using sendpage(). Return the number of bytes sent.
@@ -881,6 +1003,36 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
EXPORT_SYMBOL(generic_file_splice_write);
+static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct splice_desc *sd)
+{
+ int ret;
+ void *data;
+
+ ret = buf->ops->confirm(pipe, buf);
+ if (ret)
+ return ret;
+
+ data = buf->ops->map(pipe, buf, 0);
+ ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
+ buf->ops->unmap(pipe, buf, data);
+
+ return ret;
+}
+
+static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos,
+ size_t len, unsigned int flags)
+{
+ ssize_t ret;
+
+ ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
+ if (ret > 0)
+ *ppos += ret;
+
+ return ret;
+}
+
/**
* generic_splice_sendpage - splice data from a pipe to a socket
* @pipe: pipe to splice from
@@ -908,11 +1060,10 @@ EXPORT_SYMBOL(generic_splice_sendpage);
static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
{
+ ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
+ loff_t *, size_t, unsigned int);
int ret;
- if (unlikely(!out->f_op || !out->f_op->splice_write))
- return -EINVAL;
-
if (unlikely(!(out->f_mode & FMODE_WRITE)))
return -EBADF;
@@ -923,7 +1074,11 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
if (unlikely(ret < 0))
return ret;
- return out->f_op->splice_write(pipe, out, ppos, len, flags);
+ splice_write = out->f_op->splice_write;
+ if (!splice_write)
+ splice_write = default_file_splice_write;
+
+ return splice_write(pipe, out, ppos, len, flags);
}
/*
@@ -933,11 +1088,10 @@ static long do_splice_to(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
+ ssize_t (*splice_read)(struct file *, loff_t *,
+ struct pipe_inode_info *, size_t, unsigned int);
int ret;
- if (unlikely(!in->f_op || !in->f_op->splice_read))
- return -EINVAL;
-
if (unlikely(!(in->f_mode & FMODE_READ)))
return -EBADF;
@@ -945,7 +1099,11 @@ static long do_splice_to(struct file *in, loff_t *ppos,
if (unlikely(ret < 0))
return ret;
- return in->f_op->splice_read(in, ppos, pipe, len, flags);
+ splice_read = in->f_op->splice_read;
+ if (!splice_read)
+ splice_read = default_file_splice_read;
+
+ return splice_read(in, ppos, pipe, len, flags);
}
/**
@@ -1112,6 +1270,9 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
return ret;
}
+static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
+ struct pipe_inode_info *opipe,
+ size_t len, unsigned int flags);
/*
* After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
* location, so checking ->i_pipe is not enough to verify that this is a
@@ -1132,12 +1293,32 @@ static long do_splice(struct file *in, loff_t __user *off_in,
struct file *out, loff_t __user *off_out,
size_t len, unsigned int flags)
{
- struct pipe_inode_info *pipe;
+ struct pipe_inode_info *ipipe;
+ struct pipe_inode_info *opipe;
loff_t offset, *off;
long ret;
- pipe = pipe_info(in->f_path.dentry->d_inode);
- if (pipe) {
+ ipipe = pipe_info(in->f_path.dentry->d_inode);
+ opipe = pipe_info(out->f_path.dentry->d_inode);
+
+ if (ipipe && opipe) {
+ if (off_in || off_out)
+ return -ESPIPE;
+
+ if (!(in->f_mode & FMODE_READ))
+ return -EBADF;
+
+ if (!(out->f_mode & FMODE_WRITE))
+ return -EBADF;
+
+ /* Splicing to self would be fun, but... */
+ if (ipipe == opipe)
+ return -EINVAL;
+
+ return splice_pipe_to_pipe(ipipe, opipe, len, flags);
+ }
+
+ if (ipipe) {
if (off_in)
return -ESPIPE;
if (off_out) {
@@ -1149,7 +1330,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
} else
off = &out->f_pos;
- ret = do_splice_from(pipe, out, off, len, flags);
+ ret = do_splice_from(ipipe, out, off, len, flags);
if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
ret = -EFAULT;
@@ -1157,8 +1338,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
return ret;
}
- pipe = pipe_info(out->f_path.dentry->d_inode);
- if (pipe) {
+ if (opipe) {
if (off_out)
return -ESPIPE;
if (off_in) {
@@ -1170,7 +1350,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
} else
off = &in->f_pos;
- ret = do_splice_to(in, off, pipe, len, flags);
+ ret = do_splice_to(in, off, opipe, len, flags);
if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
ret = -EFAULT;
@@ -1511,7 +1691,7 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
* Make sure there's data to read. Wait for input if we can, otherwise
* return an appropriate error.
*/
-static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
{
int ret;
@@ -1549,7 +1729,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
* Make sure there's writeable room. Wait for room if we can, otherwise
* return an appropriate error.
*/
-static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
{
int ret;
@@ -1587,6 +1767,124 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
}
/*
+ * Splice contents of ipipe to opipe.
+ */
+static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
+ struct pipe_inode_info *opipe,
+ size_t len, unsigned int flags)
+{
+ struct pipe_buffer *ibuf, *obuf;
+ int ret = 0, nbuf;
+ bool input_wakeup = false;
+
+
+retry:
+ ret = ipipe_prep(ipipe, flags);
+ if (ret)
+ return ret;
+
+ ret = opipe_prep(opipe, flags);
+ if (ret)
+ return ret;
+
+ /*
+ * Potential ABBA deadlock, work around it by ordering lock
+ * grabbing by pipe info address. Otherwise two different processes
+ * could deadlock (one doing tee from A -> B, the other from B -> A).
+ */
+ pipe_double_lock(ipipe, opipe);
+
+ do {
+ if (!opipe->readers) {
+ send_sig(SIGPIPE, current, 0);
+ if (!ret)
+ ret = -EPIPE;
+ break;
+ }
+
+ if (!ipipe->nrbufs && !ipipe->writers)
+ break;
+
+ /*
+ * Cannot make any progress, because either the input
+ * pipe is empty or the output pipe is full.
+ */
+ if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) {
+ /* Already processed some buffers, break */
+ if (ret)
+ break;
+
+ if (flags & SPLICE_F_NONBLOCK) {
+ ret = -EAGAIN;
+ break;
+ }
+
+ /*
+ * We raced with another reader/writer and haven't
+ * managed to process any buffers. A zero return
+ * value means EOF, so retry instead.
+ */
+ pipe_unlock(ipipe);
+ pipe_unlock(opipe);
+ goto retry;
+ }
+
+ ibuf = ipipe->bufs + ipipe->curbuf;
+ nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS;
+ obuf = opipe->bufs + nbuf;
+
+ if (len >= ibuf->len) {
+ /*
+ * Simply move the whole buffer from ipipe to opipe
+ */
+ *obuf = *ibuf;
+ ibuf->ops = NULL;
+ opipe->nrbufs++;
+ ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS;
+ ipipe->nrbufs--;
+ input_wakeup = true;
+ } else {
+ /*
+ * Get a reference to this pipe buffer,
+ * so we can copy the contents over.
+ */
+ ibuf->ops->get(ipipe, ibuf);
+ *obuf = *ibuf;
+
+ /*
+ * Don't inherit the gift flag, we need to
+ * prevent multiple steals of this page.
+ */
+ obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
+
+ obuf->len = len;
+ opipe->nrbufs++;
+ ibuf->offset += obuf->len;
+ ibuf->len -= obuf->len;
+ }
+ ret += obuf->len;
+ len -= obuf->len;
+ } while (len);
+
+ pipe_unlock(ipipe);
+ pipe_unlock(opipe);
+
+ /*
+ * If we put data in the output pipe, wakeup any potential readers.
+ */
+ if (ret > 0) {
+ smp_mb();
+ if (waitqueue_active(&opipe->wait))
+ wake_up_interruptible(&opipe->wait);
+ kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
+ }
+ if (input_wakeup)
+ wakeup_pipe_writers(ipipe);
+
+ return ret;
+}
+
+/*
* Link contents of ipipe to opipe.
*/
static int link_pipe(struct pipe_inode_info *ipipe,
@@ -1690,9 +1988,9 @@ static long do_tee(struct file *in, struct file *out, size_t len,
* Keep going, unless we encounter an error. The ipipe/opipe
* ordering doesn't really matter.
*/
- ret = link_ipipe_prep(ipipe, flags);
+ ret = ipipe_prep(ipipe, flags);
if (!ret) {
- ret = link_opipe_prep(opipe, flags);
+ ret = opipe_prep(opipe, flags);
if (!ret)
ret = link_pipe(ipipe, opipe, len, flags);
}