From de2ea4b64b75a79ed9cdf9bf30e0e197901084e4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Oct 2019 14:41:29 -0600 Subject: net: add __sys_accept4_file() helper This is identical to __sys_accept4(), except it takes a struct file instead of an fd, and it also allows passing in extra file->f_flags flags. The latter is done to support masking in O_NONBLOCK without manipulating the original file flags. No functional changes in this patch. Cc: netdev@vger.kernel.org Acked-by: David S. Miller Signed-off-by: Jens Axboe --- net/socket.c | 65 ++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 24 deletions(-) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index 6a9ab7a8b1d2..40ab39f6c5d8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1690,24 +1690,13 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog) return __sys_listen(fd, backlog); } -/* - * For accept, we attempt to create a new socket, set up the link - * with the client, wake up the client, then return the new - * connected fd. We collect the address of the connector in kernel - * space and move it to user at the very end. This is unclean because - * we open the socket then return an error. - * - * 1003.1g adds the ability to recvmsg() to query connection pending - * status to recvmsg. We need to add that support in a way thats - * clean when we restructure accept also. - */ - -int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags) +int __sys_accept4_file(struct file *file, unsigned file_flags, + struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) { struct socket *sock, *newsock; struct file *newfile; - int err, len, newfd, fput_needed; + int err, len, newfd; struct sockaddr_storage address; if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) @@ -1716,14 +1705,14 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - sock = sockfd_lookup_light(fd, &err, &fput_needed); + sock = sock_from_file(file, &err); if (!sock) goto out; err = -ENFILE; newsock = sock_alloc(); if (!newsock) - goto out_put; + goto out; newsock->type = sock->type; newsock->ops = sock->ops; @@ -1738,20 +1727,21 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); - goto out_put; + goto out; } newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); if (IS_ERR(newfile)) { err = PTR_ERR(newfile); put_unused_fd(newfd); - goto out_put; + goto out; } err = security_socket_accept(sock, newsock); if (err) goto out_fd; - err = sock->ops->accept(sock, newsock, sock->file->f_flags, false); + err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags, + false); if (err < 0) goto out_fd; @@ -1772,15 +1762,42 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, fd_install(newfd, newfile); err = newfd; - -out_put: - fput_light(sock->file, fput_needed); out: return err; out_fd: fput(newfile); put_unused_fd(newfd); - goto out_put; + goto out; + +} + +/* + * For accept, we attempt to create a new socket, set up the link + * with the client, wake up the client, then return the new + * connected fd. We collect the address of the connector in kernel + * space and move it to user at the very end. This is unclean because + * we open the socket then return an error. + * + * 1003.1g adds the ability to recvmsg() to query connection pending + * status to recvmsg. We need to add that support in a way thats + * clean when we restructure accept also. + */ + +int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) +{ + int ret = -EBADF; + struct fd f; + + f = fdget(fd); + if (f.file) { + ret = __sys_accept4_file(f.file, 0, upeer_sockaddr, + upeer_addrlen, flags); + if (f.flags) + fput(f.file); + } + + return ret; } SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, -- cgit v1.2.3 From d8e464ecc17b4444e9a3e148a9748c4828c6328c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Nov 2019 11:20:48 -0800 Subject: vfs: mark pipes and sockets as stream-like file descriptors In commit 3975b097e577 ("convert stream-like files -> stream_open, even if they use noop_llseek") Kirill used a coccinelle script to change "nonseekable_open()" to "stream_open()", which changed the trivial cases of stream-like file descriptors to the new model with FMODE_STREAM. However, the two big cases - sockets and pipes - don't actually have that trivial pattern at all, and were thus never converted to FMODE_STREAM even though it makes lots of sense to do so. That's particularly true when looking forward to the next change: getting rid of FMODE_ATOMIC_POS entirely, and just using FMODE_STREAM to decide whether f_pos updates are needed or not. And if they are, we'll always do them atomically. This came up because KCSAN (correctly) noted that the non-locked f_pos updates are data races: they are clearly benign for the case where we don't care, but it would be good to just not have that issue exist at all. Note that the reason we used FMODE_ATOMIC_POS originally is that only doing it for the minimal required case is "safer" in that it's possible that the f_pos locking can cause unnecessary serialization across the whole write() call. And in the worst case, that kind of serialization can cause deadlock issues: think writers that need readers to empty the state using the same file descriptor. [ Note that the locking is per-file descriptor - because it protects "f_pos", which is obviously per-file descriptor - so it only affects cases where you literally use the same file descriptor to both read and write. So a regular pipe that has separate reading and writing file descriptors doesn't really have this situation even though it's the obvious case of "reader empties what a bit writer concurrently fills" But we want to make pipes as being stream-line anyway, because we don't want the unnecessary overhead of locking, and because a named pipe can be (ab-)used by reading and writing to the same file descriptor. ] There are likely a lot of other cases that might want FMODE_STREAM, and looking for ".llseek = no_llseek" users and other cases that don't have an lseek file operation at all and making them use "stream_open()" might be a good idea. But pipes and sockets are likely to be the two main cases. Cc: Kirill Smelkov Cc: Eic Dumazet Cc: Al Viro Cc: Alan Stern Cc: Marco Elver Cc: Andrea Parri Cc: Paul McKenney Signed-off-by: Linus Torvalds --- net/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index 6a9ab7a8b1d2..3c6d60eadf7a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -404,6 +404,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) sock->file = file; file->private_data = sock; + stream_open(SOCK_INODE(sock), file); return file; } EXPORT_SYMBOL(sock_alloc_file); -- cgit v1.2.3