From 373a83a6997bfdaf767f6a65fc5cc0054b246984 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 17 May 2010 15:12:49 +0200 Subject: vhost: Storage class should be before const qualifier The C99 specification states in section 6.11.5: The placement of a storage-class specifier other than at the beginning of the declaration specifiers in a declaration is an obsolescent feature. Signed-off-by: Tobias Klauser Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vhost/net.c') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index aa88911c9504..cd36f5ff2255 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -626,7 +626,7 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl, } #endif -const static struct file_operations vhost_net_fops = { +static const struct file_operations vhost_net_fops = { .owner = THIS_MODULE, .release = vhost_net_release, .unlocked_ioctl = vhost_net_ioctl, -- cgit v1.2.3 From dd1f4078f0d2de74a308f00a2dffbd550cfba59f Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 4 Mar 2010 16:10:14 -0500 Subject: vhost-net: minor cleanup Delete a label and goto from vhost_net_set_backend Inverting a test allows a label and goto to be eliminated. Signed-off-by: Jeff Dike Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/vhost/net.c') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index cd36f5ff2255..0868569d7124 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -519,13 +519,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) /* start polling new socket */ oldsock = vq->private_data; - if (sock == oldsock) - goto done; + if (sock != oldsock){ + vhost_net_disable_vq(n, vq); + rcu_assign_pointer(vq->private_data, sock); + vhost_net_enable_vq(n, vq); + } - vhost_net_disable_vq(n, vq); - rcu_assign_pointer(vq->private_data, sock); - vhost_net_enable_vq(n, vq); -done: if (oldsock) { vhost_net_flush_vq(n, index); fput(oldsock->file); -- cgit v1.2.3 From 4c3e5edf2f33546392fbd9beaae04e9bed7304ff Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 21 Jul 2010 21:09:23 -0700 Subject: vhost net: Fix warning. Reported by Stephen Rothwell. Signed-off-by: David S. Miller --- drivers/vhost/net.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/vhost/net.c') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 7a104e2de3fa..f11e6bb5b036 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -533,7 +533,6 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) vhost_net_enable_vq(n, vq); } -done: mutex_unlock(&vq->mutex); if (oldsock) { -- cgit v1.2.3 From c23f3445e68e1db0e74099f264bc5ff5d55ebdeb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 2 Jun 2010 20:40:00 +0200 Subject: vhost: replace vhost_workqueue with per-vhost kthread Replace vhost_workqueue with per-vhost kthread. Other than callback argument change from struct work_struct * to struct vhost_work *, there's no visible change to vhost_poll_*() interface. This conversion is to make each vhost use a dedicated kthread so that resource control via cgroup can be applied. Partially based on Sridhar Samudrala's patch. * Updated to use sub structure vhost_work instead of directly using vhost_poll at Michael's suggestion. * Added flusher wake_up() optimization at Michael's suggestion. Changes by MST: * Converted atomics/barrier use to a spinlock. * Create thread on SET_OWNER * Fix flushing Signed-off-by: Tejun Heo Signed-off-by: Michael S. Tsirkin Cc: Sridhar Samudrala --- drivers/vhost/net.c | 56 +++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 32 deletions(-) (limited to 'drivers/vhost/net.c') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f11e6bb5b036..d395b59289ae 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -302,54 +302,58 @@ static void handle_rx(struct vhost_net *net) unuse_mm(net->dev.mm); } -static void handle_tx_kick(struct work_struct *work) +static void handle_tx_kick(struct vhost_work *work) { - struct vhost_virtqueue *vq; - struct vhost_net *net; - vq = container_of(work, struct vhost_virtqueue, poll.work); - net = container_of(vq->dev, struct vhost_net, dev); + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); + handle_tx(net); } -static void handle_rx_kick(struct work_struct *work) +static void handle_rx_kick(struct vhost_work *work) { - struct vhost_virtqueue *vq; - struct vhost_net *net; - vq = container_of(work, struct vhost_virtqueue, poll.work); - net = container_of(vq->dev, struct vhost_net, dev); + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); + handle_rx(net); } -static void handle_tx_net(struct work_struct *work) +static void handle_tx_net(struct vhost_work *work) { - struct vhost_net *net; - net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work); + struct vhost_net *net = container_of(work, struct vhost_net, + poll[VHOST_NET_VQ_TX].work); handle_tx(net); } -static void handle_rx_net(struct work_struct *work) +static void handle_rx_net(struct vhost_work *work) { - struct vhost_net *net; - net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work); + struct vhost_net *net = container_of(work, struct vhost_net, + poll[VHOST_NET_VQ_RX].work); handle_rx(net); } static int vhost_net_open(struct inode *inode, struct file *f) { struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); + struct vhost_dev *dev; int r; + if (!n) return -ENOMEM; + + dev = &n->dev; n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick; n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick; - r = vhost_dev_init(&n->dev, n->vqs, VHOST_NET_VQ_MAX); + r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX); if (r < 0) { kfree(n); return r; } - vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT); - vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN); + vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); + vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); n->tx_poll_state = VHOST_NET_POLL_DISABLED; f->private_data = n; @@ -656,25 +660,13 @@ static struct miscdevice vhost_net_misc = { static int vhost_net_init(void) { - int r = vhost_init(); - if (r) - goto err_init; - r = misc_register(&vhost_net_misc); - if (r) - goto err_reg; - return 0; -err_reg: - vhost_cleanup(); -err_init: - return r; - + return misc_register(&vhost_net_misc); } module_init(vhost_net_init); static void vhost_net_exit(void) { misc_deregister(&vhost_net_misc); - vhost_cleanup(); } module_exit(vhost_net_exit); -- cgit v1.2.3 From 8dd014adfea6f173c1ef6378f7e5e7924866c923 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Tue, 27 Jul 2010 18:52:21 +0300 Subject: vhost-net: mergeable buffers support This adds support for mergeable buffers in vhost-net: this is needed for older guests without indirect buffer support, as well as for zero copy with some devices. Includes changes by Michael S. Tsirkin to make the patch as low risk as possible (i.e., close to no changes when feature is disabled). Signed-off-by: David Stevens Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 237 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 228 insertions(+), 9 deletions(-) (limited to 'drivers/vhost/net.c') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d395b59289ae..f13e56babe4b 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -74,6 +74,22 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to, } return seg; } +/* Copy iovec entries for len bytes from iovec. */ +static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, + size_t len, int iovcount) +{ + int seg = 0; + size_t size; + while (len && seg < iovcount) { + size = min(from->iov_len, len); + to->iov_base = from->iov_base; + to->iov_len = size; + len -= size; + ++from; + ++to; + ++seg; + } +} /* Caller must have TX VQ lock */ static void tx_poll_stop(struct vhost_net *net) @@ -129,7 +145,7 @@ static void handle_tx(struct vhost_net *net) if (wmem < sock->sk->sk_sndbuf / 2) tx_poll_stop(net); - hdr_size = vq->hdr_size; + hdr_size = vq->vhost_hlen; for (;;) { head = vhost_get_vq_desc(&net->dev, vq, vq->iov, @@ -172,7 +188,7 @@ static void handle_tx(struct vhost_net *net) /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(NULL, sock, &msg, len); if (unlikely(err < 0)) { - vhost_discard_vq_desc(vq); + vhost_discard_vq_desc(vq, 1); tx_poll_start(net, sock); break; } @@ -191,9 +207,82 @@ static void handle_tx(struct vhost_net *net) unuse_mm(net->dev.mm); } +static int peek_head_len(struct sock *sk) +{ + struct sk_buff *head; + int len = 0; + + lock_sock(sk); + head = skb_peek(&sk->sk_receive_queue); + if (head) + len = head->len; + release_sock(sk); + return len; +} + +/* This is a multi-buffer version of vhost_get_desc, that works if + * vq has read descriptors only. + * @vq - the relevant virtqueue + * @datalen - data length we'll be reading + * @iovcount - returned count of io vectors we fill + * @log - vhost log + * @log_num - log offset + * returns number of buffer heads allocated, negative on error + */ +static int get_rx_bufs(struct vhost_virtqueue *vq, + struct vring_used_elem *heads, + int datalen, + unsigned *iovcount, + struct vhost_log *log, + unsigned *log_num) +{ + unsigned int out, in; + int seg = 0; + int headcount = 0; + unsigned d; + int r, nlogs = 0; + + while (datalen > 0) { + if (unlikely(headcount >= VHOST_NET_MAX_SG)) { + r = -ENOBUFS; + goto err; + } + d = vhost_get_vq_desc(vq->dev, vq, vq->iov + seg, + ARRAY_SIZE(vq->iov) - seg, &out, + &in, log, log_num); + if (d == vq->num) { + r = 0; + goto err; + } + if (unlikely(out || in <= 0)) { + vq_err(vq, "unexpected descriptor format for RX: " + "out %d, in %d\n", out, in); + r = -EINVAL; + goto err; + } + if (unlikely(log)) { + nlogs += *log_num; + log += *log_num; + } + heads[headcount].id = d; + heads[headcount].len = iov_length(vq->iov + seg, in); + datalen -= heads[headcount].len; + ++headcount; + seg += in; + } + heads[headcount - 1].len += datalen; + *iovcount = seg; + if (unlikely(log)) + *log_num = nlogs; + return headcount; +err: + vhost_discard_vq_desc(vq, headcount); + return r; +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ -static void handle_rx(struct vhost_net *net) +static void handle_rx_big(struct vhost_net *net) { struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; unsigned out, in, log, s; @@ -223,7 +312,7 @@ static void handle_rx(struct vhost_net *net) use_mm(net->dev.mm); mutex_lock(&vq->mutex); vhost_disable_notify(vq); - hdr_size = vq->hdr_size; + hdr_size = vq->vhost_hlen; vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? vq->log : NULL; @@ -270,14 +359,14 @@ static void handle_rx(struct vhost_net *net) len, MSG_DONTWAIT | MSG_TRUNC); /* TODO: Check specific error and bomb out unless EAGAIN? */ if (err < 0) { - vhost_discard_vq_desc(vq); + vhost_discard_vq_desc(vq, 1); break; } /* TODO: Should check and handle checksum. */ if (err > len) { pr_debug("Discarded truncated rx packet: " " len %d > %zd\n", err, len); - vhost_discard_vq_desc(vq); + vhost_discard_vq_desc(vq, 1); continue; } len = err; @@ -302,6 +391,123 @@ static void handle_rx(struct vhost_net *net) unuse_mm(net->dev.mm); } +/* Expects to be always run from workqueue - which acts as + * read-size critical section for our kind of RCU. */ +static void handle_rx_mergeable(struct vhost_net *net) +{ + struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; + unsigned uninitialized_var(in), log; + struct vhost_log *vq_log; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_control = NULL, /* FIXME: get and handle RX aux data. */ + .msg_controllen = 0, + .msg_iov = vq->iov, + .msg_flags = MSG_DONTWAIT, + }; + + struct virtio_net_hdr_mrg_rxbuf hdr = { + .hdr.flags = 0, + .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE + }; + + size_t total_len = 0; + int err, headcount; + size_t vhost_hlen, sock_hlen; + size_t vhost_len, sock_len; + struct socket *sock = rcu_dereference(vq->private_data); + if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) + return; + + use_mm(net->dev.mm); + mutex_lock(&vq->mutex); + vhost_disable_notify(vq); + vhost_hlen = vq->vhost_hlen; + sock_hlen = vq->sock_hlen; + + vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? + vq->log : NULL; + + while ((sock_len = peek_head_len(sock->sk))) { + sock_len += sock_hlen; + vhost_len = sock_len + vhost_hlen; + headcount = get_rx_bufs(vq, vq->heads, vhost_len, + &in, vq_log, &log); + /* On error, stop handling until the next kick. */ + if (unlikely(headcount < 0)) + break; + /* OK, now we need to know about added descriptors. */ + if (!headcount) { + if (unlikely(vhost_enable_notify(vq))) { + /* They have slipped one in as we were + * doing that: check again. */ + vhost_disable_notify(vq); + continue; + } + /* Nothing new? Wait for eventfd to tell us + * they refilled. */ + break; + } + /* We don't need to be notified again. */ + if (unlikely((vhost_hlen))) + /* Skip header. TODO: support TSO. */ + move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); + else + /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: + * needed because sendmsg can modify msg_iov. */ + copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in); + msg.msg_iovlen = in; + err = sock->ops->recvmsg(NULL, sock, &msg, + sock_len, MSG_DONTWAIT | MSG_TRUNC); + /* Userspace might have consumed the packet meanwhile: + * it's not supposed to do this usually, but might be hard + * to prevent. Discard data we got (if any) and keep going. */ + if (unlikely(err != sock_len)) { + pr_debug("Discarded rx packet: " + " len %d, expected %zd\n", err, sock_len); + vhost_discard_vq_desc(vq, headcount); + continue; + } + if (unlikely(vhost_hlen) && + memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0, + vhost_hlen)) { + vq_err(vq, "Unable to write vnet_hdr at addr %p\n", + vq->iov->iov_base); + break; + } + /* TODO: Should check and handle checksum. */ + if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) && + memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, + offsetof(typeof(hdr), num_buffers), + sizeof hdr.num_buffers)) { + vq_err(vq, "Failed num_buffers write"); + vhost_discard_vq_desc(vq, headcount); + break; + } + vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, + headcount); + if (unlikely(vq_log)) + vhost_log_write(vq, vq_log, log, vhost_len); + total_len += vhost_len; + if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + vhost_poll_queue(&vq->poll); + break; + } + } + + mutex_unlock(&vq->mutex); + unuse_mm(net->dev.mm); +} + +static void handle_rx(struct vhost_net *net) +{ + if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF)) + handle_rx_mergeable(net); + else + handle_rx_big(net); +} + static void handle_tx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, @@ -577,9 +783,21 @@ done: static int vhost_net_set_features(struct vhost_net *n, u64 features) { - size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ? - sizeof(struct virtio_net_hdr) : 0; + size_t vhost_hlen, sock_hlen, hdr_len; int i; + + hdr_len = (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? + sizeof(struct virtio_net_hdr_mrg_rxbuf) : + sizeof(struct virtio_net_hdr); + if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) { + /* vhost provides vnet_hdr */ + vhost_hlen = hdr_len; + sock_hlen = 0; + } else { + /* socket provides vnet_hdr */ + vhost_hlen = 0; + sock_hlen = hdr_len; + } mutex_lock(&n->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && !vhost_log_access_ok(&n->dev)) { @@ -590,7 +808,8 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) smp_wmb(); for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { mutex_lock(&n->vqs[i].mutex); - n->vqs[i].hdr_size = hdr_size; + n->vqs[i].vhost_hlen = vhost_hlen; + n->vqs[i].sock_hlen = sock_hlen; mutex_unlock(&n->vqs[i].mutex); } vhost_net_flush(n); -- cgit v1.2.3