diff options
author | David S. Miller <davem@davemloft.net> | 2012-05-07 23:05:13 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-05-07 23:05:26 -0400 |
commit | 6e06c0e2347ec79d0bd5702b2438fe883f784545 (patch) | |
tree | 3f55976275c94822cd406e7edc549f668901affa /drivers | |
parent | 3a084ddb4bf299a6e898a9a07c89f3917f0713f7 (diff) | |
parent | c70aa540c7a9f67add11ad3161096fb95233aa2e (diff) |
Merge branch 'vhost-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Michael S. Tsirkin says:
--------------------
There are mostly bugfixes here.
I hope to merge some more patches by 3.5, in particular
vlan support fixes are waiting for Eric's ack,
and a version of tracepoint patch might be
ready in time, but let's merge what's ready so it's testable.
This includes a ton of zerocopy fixes by Jason -
good stuff but too intrusive for 3.4 and zerocopy is experimental
anyway.
virtio supported delayed interrupt for a while now
so adding support to the virtio tool made sense
--------------------
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/macvtap.c | 57 | ||||
-rw-r--r-- | drivers/vhost/net.c | 7 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 1 |
3 files changed, 46 insertions, 19 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0427c6561c84..163559c16988 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -505,10 +505,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, if (copy > size) { ++from; --count; - } + offset = 0; + } else + offset += size; copy -= size; offset1 += size; - offset = 0; } if (len == offset1) @@ -518,24 +519,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, struct page *page[MAX_SKB_FRAGS]; int num_pages; unsigned long base; + unsigned long truesize; - len = from->iov_len - offset1; + len = from->iov_len - offset; if (!len) { - offset1 = 0; + offset = 0; ++from; continue; } - base = (unsigned long)from->iov_base + offset1; + base = (unsigned long)from->iov_base + offset; size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; + if (i + size > MAX_SKB_FRAGS) + return -EMSGSIZE; num_pages = get_user_pages_fast(base, size, 0, &page[i]); - if ((num_pages != size) || - (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags)) - /* put_page is in skb free */ + if (num_pages != size) { + for (i = 0; i < num_pages; i++) + put_page(page[i]); return -EFAULT; + } + truesize = size * PAGE_SIZE; skb->data_len += len; skb->len += len; - skb->truesize += len; - atomic_add(len, &skb->sk->sk_wmem_alloc); + skb->truesize += truesize; + atomic_add(truesize, &skb->sk->sk_wmem_alloc); while (len) { int off = base & ~PAGE_MASK; int size = min_t(int, len, PAGE_SIZE - off); @@ -546,7 +552,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, len -= size; i++; } - offset1 = 0; + offset = 0; ++from; } return 0; @@ -646,7 +652,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; - int copylen; + int copylen = 0; bool zerocopy = false; if (q->flags & IFF_VNET_HDR) { @@ -675,15 +681,31 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (unlikely(len < ETH_HLEN)) goto err; + err = -EMSGSIZE; + if (unlikely(count > UIO_MAXIOV)) + goto err; + if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) zerocopy = true; if (zerocopy) { + /* Userspace may produce vectors with count greater than + * MAX_SKB_FRAGS, so we need to linearize parts of the skb + * to let the rest of data to be fit in the frags. + */ + if (count > MAX_SKB_FRAGS) { + copylen = iov_length(iv, count - MAX_SKB_FRAGS); + if (copylen < vnet_hdr_len) + copylen = 0; + else + copylen -= vnet_hdr_len; + } /* There are 256 bytes to be copied in skb, so there is enough * room for skb expand head in case it is used. * The rest buffer is mapped from userspace. */ - copylen = vnet_hdr.hdr_len; + if (copylen < vnet_hdr.hdr_len) + copylen = vnet_hdr.hdr_len; if (!copylen) copylen = GOODCOPY_LEN; } else @@ -694,10 +716,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (!skb) goto err; - if (zerocopy) { + if (zerocopy) err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count); - skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; - } else + else err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len); if (err) @@ -716,8 +737,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, rcu_read_lock_bh(); vlan = rcu_dereference_bh(q->vlan); /* copy skb_ubuf_info for callback when skb has no error */ - if (zerocopy) + if (zerocopy) { skb_shinfo(skb)->destructor_arg = m->msg_control; + skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + } if (vlan) macvlan_start_xmit(skb, vlan->dev); else diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 1f21d2a1e528..853db7a08a26 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -166,7 +166,7 @@ static void handle_tx(struct vhost_net *net) if (wmem < sock->sk->sk_sndbuf / 2) tx_poll_stop(net); hdr_size = vq->vhost_hlen; - zcopy = vhost_sock_zcopy(sock); + zcopy = vq->ubufs; for (;;) { /* Release DMAs done buffers first */ @@ -257,7 +257,8 @@ static void handle_tx(struct vhost_net *net) UIO_MAXIOV; } vhost_discard_vq_desc(vq, 1); - tx_poll_start(net, sock); + if (err == -EAGAIN || err == -ENOBUFS) + tx_poll_start(net, sock); break; } if (err != len) @@ -265,6 +266,8 @@ static void handle_tx(struct vhost_net *net) " len %d != %zd\n", err, len); if (!zcopy) vhost_add_used_and_signal(&net->dev, vq, head, 0); + else + vhost_zerocopy_signal_used(vq); total_len += len; if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 51e4c1eeec4f..94dbd25caa30 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1603,6 +1603,7 @@ void vhost_zerocopy_callback(struct ubuf_info *ubuf) struct vhost_ubuf_ref *ubufs = ubuf->ctx; struct vhost_virtqueue *vq = ubufs->vq; + vhost_poll_queue(&vq->poll); /* set len = 1 to mark this desc buffers done DMA */ vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; kref_put(&ubufs->kref, vhost_zerocopy_done_signal); |