diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2023-06-08 19:40:32 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2023-06-08 19:40:33 -0700 |
| commit | fd5f4d7da29218485153fd8b4c08da7fc130c79f (patch) | |
| tree | 59302809bba3f3bd7661440cdf57e02ab9ba4d1d /include | |
| parent | 736013292e3ca5ec2aabb32daf72a73b1256ac57 (diff) | |
| parent | 3dc8976c7ad6ec46954eb99076551e2a2c4114da (diff) | |
Merge branch 'splice-net-rewrite-splice-to-socket-fix-splice_f_more-and-handle-msg_splice_pages-in-af_tls'
David Howells says:
====================
splice, net: Rewrite splice-to-socket, fix SPLICE_F_MORE and handle MSG_SPLICE_PAGES in AF_TLS
Here are patches to do the following:
(1) Block MSG_SENDPAGE_* flags from leaking into ->sendmsg() from
userspace, whilst allowing splice_to_socket() to pass them in.
(2) Allow MSG_SPLICE_PAGES to be passed into tls_*_sendmsg(). Until
support is added, it will be ignored and a splice-driven sendmsg()
will be treated like a normal sendmsg(). TCP, UDP, AF_UNIX and
Chelsio-TLS already handle the flag in net-next.
(3) Replace a chain of functions to splice-to-sendpage with a single
function to splice via sendmsg() with MSG_SPLICE_PAGES. This allows a
bunch of pages to be spliced from a pipe in a single call using a
bio_vec[] and pushes the main processing loop down into the bowels of
the protocol driver rather than repeatedly calling in with a page at a
time.
(4) Provide a ->splice_eof() op[2] that allows splice to signal to its
output that the input observed a premature EOF and that the caller
didn't flag SPLICE_F_MORE, thereby allowing a corked socket to be
flushed. This attempts to maintain the current behaviour. It is also
not called if we didn't manage to read any data and so didn't called
the actor function.
This needs routing though several layers to get it down to the network
protocol.
[!] Note that I chose not to pass in any flags - I'm not sure it's
particularly useful to pass in the splice flags; I also elected
not to return any error code - though we might actually want to do
that.
(5) Provide tls_{device,sw}_splice_eof() to flush a pending TLS record if
there is one.
(6) Provide splice_eof() for UDP, TCP, Chelsio-TLS and AF_KCM. AF_UNIX
doesn't seem to pay attention to the MSG_MORE or MSG_SENDPAGE_NOTLAST
flags.
(7) Alter the behaviour of sendfile() and fix SPLICE_F_MORE/MSG_MORE
signalling[1] such SPLICE_F_MORE is always signalled until we have
read sufficient data to finish the request. If we get a zero-length
before we've managed to splice sufficient data, we now leave the
socket expecting more data and leave it to userspace to deal with it.
(8) Make AF_TLS handle the MSG_SPLICE_PAGES internal sendmsg flag.
MSG_SPLICE_PAGES is an internal hint that tells the protocol that it
should splice the pages supplied if it can. Its sendpage
implementations are then turned into wrappers around that.
Link: https://lore.kernel.org/r/499791.1685485603@warthog.procyon.org.uk/ [1]
Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/ [2]
Link: https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?id=51c78a4d532efe9543a4df019ff405f05c6157f6 # part 1
Link: https://lore.kernel.org/r/20230524153311.3625329-1-dhowells@redhat.com/ # v1
====================
Link: https://lore.kernel.org/r/20230607181920.2294972-1-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/fs.h | 3 | ||||
| -rw-r--r-- | include/linux/net.h | 1 | ||||
| -rw-r--r-- | include/linux/socket.h | 4 | ||||
| -rw-r--r-- | include/linux/splice.h | 3 | ||||
| -rw-r--r-- | include/net/inet_common.h | 1 | ||||
| -rw-r--r-- | include/net/sock.h | 1 | ||||
| -rw-r--r-- | include/net/tcp.h | 1 | ||||
| -rw-r--r-- | include/net/udp.h | 1 |
8 files changed, 12 insertions, 3 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index 133f0640fb24..de2cb1132f07 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1796,6 +1796,7 @@ struct file_operations { int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); + void (*splice_eof)(struct file *file); int (*setlease)(struct file *, long, struct file_lock **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); @@ -2759,8 +2760,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); -extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, - struct file *out, loff_t *, size_t len, unsigned int flags); extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, loff_t *opos, size_t len, unsigned int flags); diff --git a/include/linux/net.h b/include/linux/net.h index b73ad8e3c212..8defc8f1d82e 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -210,6 +210,7 @@ struct proto_ops { int offset, size_t size, int flags); ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); + void (*splice_eof)(struct socket *sock); int (*set_peek_off)(struct sock *sk, int val); int (*peek_len)(struct socket *sock); diff --git a/include/linux/socket.h b/include/linux/socket.h index bd1cc3238851..3fd3436bc09f 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -339,7 +339,9 @@ struct ucred { #endif /* Flags to be cleared on entry by sendmsg and sendmmsg syscalls */ -#define MSG_INTERNAL_SENDMSG_FLAGS (MSG_SPLICE_PAGES) +#define MSG_INTERNAL_SENDMSG_FLAGS \ + (MSG_SPLICE_PAGES | MSG_SENDPAGE_NOPOLICY | MSG_SENDPAGE_NOTLAST | \ + MSG_SENDPAGE_DECRYPTED) /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */ #define SOL_IP 0 diff --git a/include/linux/splice.h b/include/linux/splice.h index a55179fd60fc..4fab18a6e371 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -38,6 +38,7 @@ struct splice_desc { struct file *file; /* file to read/write */ void *data; /* cookie */ } u; + void (*splice_eof)(struct splice_desc *sd); /* Unexpected EOF handler */ loff_t pos; /* file position */ loff_t *opos; /* sendfile: output position */ size_t num_spliced; /* number of bytes already spliced */ @@ -84,6 +85,8 @@ extern long do_splice(struct file *in, loff_t *off_in, extern long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags); +extern ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags); /* * for dynamic pipe sizing diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 77f4b0ef5b92..a75333342c4e 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -35,6 +35,7 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk); int inet_send_prepare(struct sock *sk); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); +void inet_splice_eof(struct socket *sock); ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, diff --git a/include/net/sock.h b/include/net/sock.h index 6f428a7f3567..2790133b4b76 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1279,6 +1279,7 @@ struct proto { size_t len, int flags, int *addr_len); int (*sendpage)(struct sock *sk, struct page *page, int offset, size_t size, int flags); + void (*splice_eof)(struct socket *sock); int (*bind)(struct sock *sk, struct sockaddr *addr, int addr_len); int (*bind_add)(struct sock *sk, diff --git a/include/net/tcp.h b/include/net/tcp.h index 68990a8f556a..49611af31bb7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -327,6 +327,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, size_t size, struct ubuf_info *uarg); +void tcp_splice_eof(struct socket *sock); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, diff --git a/include/net/udp.h b/include/net/udp.h index 5cad44318d71..4ed0b47c5582 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -278,6 +278,7 @@ int udp_get_port(struct sock *sk, unsigned short snum, int udp_err(struct sk_buff *, u32); int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); +void udp_splice_eof(struct socket *sock); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size); |
