From 8f0b3cc9a4c102c24808c87f1bc943659d7a7f9f Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:53 +0000 Subject: tcp: RX path for devmem TCP In tcp_recvmsg_locked(), detect if the skb being received by the user is a devmem skb. In this case - if the user provided the MSG_SOCK_DEVMEM flag - pass it to tcp_recvmsg_devmem() for custom handling. tcp_recvmsg_devmem() copies any data in the skb header to the linear buffer, and returns a cmsg to the user indicating the number of bytes returned in the linear buffer. tcp_recvmsg_devmem() then loops over the unaccessible devmem skb frags, and returns to the user a cmsg_devmem indicating the location of the data in the dmabuf device memory. cmsg_devmem contains this information: 1. the offset into the dmabuf where the payload starts. 'frag_offset'. 2. the size of the frag. 'frag_size'. 3. an opaque token 'frag_token' to return to the kernel when the buffer is to be released. The pages awaiting freeing are stored in the newly added sk->sk_user_frags, and each page passed to userspace is get_page()'d. This reference is dropped once the userspace indicates that it is done reading this page. All pages are released when the socket is destroyed. Signed-off-by: Willem de Bruijn Signed-off-by: Kaiyuan Zhang Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240910171458.219195-10-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/linux/socket.h | 1 + include/net/sock.h | 2 ++ include/uapi/asm-generic/socket.h | 5 +++++ include/uapi/linux/uio.h | 13 +++++++++++++ 4 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index df9cdb8bbfb8..d18cc47e89bd 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -327,6 +327,7 @@ struct ucred { * plain text and require encryption */ +#define MSG_SOCK_DEVMEM 0x2000000 /* Receive devmem skbs as cmsg */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ diff --git a/include/net/sock.h b/include/net/sock.h index f51d61fab059..c58ca8dd561b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -337,6 +337,7 @@ struct sk_filter; * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference + * @sk_user_frags: xarray of pages the user is holding a reference on. */ struct sock { /* @@ -542,6 +543,7 @@ struct sock { #endif struct rcu_head sk_rcu; netns_tracker ns_tracker; + struct xarray sk_user_frags; }; struct sock_bh_locked { diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 8ce8a39a1e5f..e993edc9c0ee 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -135,6 +135,11 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SO_DEVMEM_LINEAR 78 +#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR +#define SO_DEVMEM_DMABUF 79 +#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/uio.h b/include/uapi/linux/uio.h index 059b1a9147f4..3a22ddae376a 100644 --- a/include/uapi/linux/uio.h +++ b/include/uapi/linux/uio.h @@ -20,6 +20,19 @@ struct iovec __kernel_size_t iov_len; /* Must be size_t (1003.1g) */ }; +struct dmabuf_cmsg { + __u64 frag_offset; /* offset into the dmabuf where the frag starts. + */ + __u32 frag_size; /* size of the frag. */ + __u32 frag_token; /* token representing this frag for + * DEVMEM_DONTNEED. + */ + __u32 dmabuf_id; /* dmabuf id this frag belongs to. */ + __u32 flags; /* Currently unused. Reserved for future + * uses. + */ +}; + /* * UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1) */ -- cgit v1.2.3