summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2026-01-22 15:41:35 +0100
committerPaolo Abeni <pabeni@redhat.com>2026-01-22 15:41:36 +0100
commit5778d65d4b85d4929d30998863e08e20af4b6113 (patch)
tree22a6c93948b01538ac61945aa81be61fe5247536
parentca1bb3fedf26a08ed31974131bc0064d4fe33649 (diff)
parent2a689f76edd04a53137bd01d4618343f4cdd7e23 (diff)
Merge branch 'vsock-virtio-fix-tx-credit-handling'
Stefano Garzarella says: ==================== vsock/virtio: fix TX credit handling The original series was posted by Melbin K Mathew <mlbnkm1@gmail.com> till v4. Since it's a real issue and the original author seems busy, I'm sending the new version fixing my comments but keeping the authorship (and restoring mine on patch 2 as reported on v4). v5: https://lore.kernel.org/netdev/20260116201517.273302-1-sgarzare@redhat.com/ v4: https://lore.kernel.org/netdev/20251217181206.3681159-1-mlbnkm1@gmail.com/ From Melbin K Mathew <mlbnkm1@gmail.com>: This series fixes TX credit handling in virtio-vsock: Patch 1: Fix potential underflow in get_credit() using s64 arithmetic Patch 2: Fix vsock_test seqpacket bounds test Patch 3: Cap TX credit to local buffer size (security hardening) Patch 4: Add stream TX credit bounds regression test The core issue is that a malicious guest can advertise a huge buffer size via SO_VM_SOCKETS_BUFFER_SIZE, causing the host to allocate excessive sk_buff memory when sending data to that guest. On an unpatched Ubuntu 22.04 host (~64 GiB RAM), running a PoC with 32 guest vsock connections advertising 2 GiB each and reading slowly drove Slab/SUnreclaim from ~0.5 GiB to ~57 GiB; the system only recovered after killing the QEMU process. With this series applied, the same PoC shows only ~35 MiB increase in Slab/SUnreclaim, no host OOM, and the guest remains responsive. ==================== Link: https://patch.msgid.link/20260121093628.9941-1-sgarzare@redhat.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--net/vmw_vsock/virtio_transport_common.c30
-rw-r--r--tools/testing/vsock/vsock_test.c112
2 files changed, 133 insertions, 9 deletions
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 26b979ad71f0..d3e26025ef58 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -28,6 +28,7 @@
static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
bool cancel_timeout);
+static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs);
static const struct virtio_transport *
virtio_transport_get_ops(struct vsock_sock *vsk)
@@ -499,9 +500,7 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
return 0;
spin_lock_bh(&vvs->tx_lock);
- ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
- if (ret > credit)
- ret = credit;
+ ret = min_t(u32, credit, virtio_transport_has_space(vvs));
vvs->tx_cnt += ret;
vvs->bytes_unsent += ret;
spin_unlock_bh(&vvs->tx_lock);
@@ -822,6 +821,15 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
}
EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
+static u32 virtio_transport_tx_buf_size(struct virtio_vsock_sock *vvs)
+{
+ /* The peer advertises its receive buffer via peer_buf_alloc, but we
+ * cap it to our local buf_alloc so a remote peer cannot force us to
+ * queue more data than our own buffer configuration allows.
+ */
+ return min(vvs->peer_buf_alloc, vvs->buf_alloc);
+}
+
int
virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
struct msghdr *msg,
@@ -831,7 +839,7 @@ virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
spin_lock_bh(&vvs->tx_lock);
- if (len > vvs->peer_buf_alloc) {
+ if (len > virtio_transport_tx_buf_size(vvs)) {
spin_unlock_bh(&vvs->tx_lock);
return -EMSGSIZE;
}
@@ -877,12 +885,16 @@ u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data);
-static s64 virtio_transport_has_space(struct vsock_sock *vsk)
+static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs)
{
- struct virtio_vsock_sock *vvs = vsk->trans;
s64 bytes;
- bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
+ /* Use s64 arithmetic so if the peer shrinks peer_buf_alloc while
+ * we have bytes in flight (tx_cnt - peer_fwd_cnt), the subtraction
+ * does not underflow.
+ */
+ bytes = (s64)virtio_transport_tx_buf_size(vvs) -
+ (vvs->tx_cnt - vvs->peer_fwd_cnt);
if (bytes < 0)
bytes = 0;
@@ -895,7 +907,7 @@ s64 virtio_transport_stream_has_space(struct vsock_sock *vsk)
s64 bytes;
spin_lock_bh(&vvs->tx_lock);
- bytes = virtio_transport_has_space(vsk);
+ bytes = virtio_transport_has_space(vvs);
spin_unlock_bh(&vvs->tx_lock);
return bytes;
@@ -1492,7 +1504,7 @@ static bool virtio_transport_space_update(struct sock *sk,
spin_lock_bh(&vvs->tx_lock);
vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc);
vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt);
- space_available = virtio_transport_has_space(vsk);
+ space_available = virtio_transport_has_space(vvs);
spin_unlock_bh(&vvs->tx_lock);
return space_available;
}
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 27e39354499a..5bd20ccd9335 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -347,10 +347,12 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
}
#define SOCK_BUF_SIZE (2 * 1024 * 1024)
+#define SOCK_BUF_SIZE_SMALL (64 * 1024)
#define MAX_MSG_PAGES 4
static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
{
+ unsigned long long sock_buf_size;
unsigned long curr_hash;
size_t max_msg_size;
int page_size;
@@ -363,6 +365,16 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
exit(EXIT_FAILURE);
}
+ sock_buf_size = SOCK_BUF_SIZE;
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
+
/* Wait, until receiver sets buffer size. */
control_expectln("SRVREADY");
@@ -2219,6 +2231,101 @@ static void test_stream_accepted_setsockopt_server(const struct test_opts *opts)
close(fd);
}
+static void test_stream_tx_credit_bounds_client(const struct test_opts *opts)
+{
+ unsigned long long sock_buf_size;
+ size_t total = 0;
+ char buf[4096];
+ int fd;
+
+ memset(buf, 'A', sizeof(buf));
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ sock_buf_size = SOCK_BUF_SIZE_SMALL;
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
+
+ if (fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK) < 0) {
+ perror("fcntl(F_SETFL)");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("SRVREADY");
+
+ for (;;) {
+ ssize_t sent = send(fd, buf, sizeof(buf), 0);
+
+ if (sent == 0) {
+ fprintf(stderr, "unexpected EOF while sending bytes\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (sent < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ break;
+
+ perror("send");
+ exit(EXIT_FAILURE);
+ }
+
+ total += sent;
+ }
+
+ control_writeln("CLIDONE");
+ close(fd);
+
+ /* We should not be able to send more bytes than the value set as
+ * local buffer size.
+ */
+ if (total > sock_buf_size) {
+ fprintf(stderr,
+ "TX credit too large: queued %zu bytes (expected <= %llu)\n",
+ total, sock_buf_size);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void test_stream_tx_credit_bounds_server(const struct test_opts *opts)
+{
+ unsigned long long sock_buf_size;
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ sock_buf_size = SOCK_BUF_SIZE;
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
+
+ setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
+ sock_buf_size,
+ "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
+
+ control_writeln("SRVREADY");
+ control_expectln("CLIDONE");
+
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -2408,6 +2515,11 @@ static struct test_case test_cases[] = {
.run_client = test_stream_msgzcopy_mangle_client,
.run_server = test_stream_msgzcopy_mangle_server,
},
+ {
+ .name = "SOCK_STREAM TX credit bounds",
+ .run_client = test_stream_tx_credit_bounds_client,
+ .run_server = test_stream_tx_credit_bounds_server,
+ },
{},
};