From 1650d5455bd2dc6b5ee134bd6fc1a3236c266b5b Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Tue, 20 Jan 2015 14:49:52 +0000
Subject: xen-netback: always fully coalesce guest Rx packets

Always fully coalesce guest Rx packets into the minimum number of ring
slots.  Reducing the number of slots per packet has significant
performance benefits when receiving off-host traffic.

Results from XenServer's performance benchmarks:

                         Baseline    Full coalesce
Interhost VM receive      7.2 Gb/s   11 Gb/s
Interhost aggregate      24 Gb/s     24 Gb/s
Intrahost single stream  14 Gb/s     14 Gb/s
Intrahost aggregate      34 Gb/s     34 Gb/s

However, this can increase the number of grant ops per packet which
decreases performance of backend (dom0) to VM traffic (by ~10%)
/unless/ grant copy has been optimized for adjacent ops with the same
source or destination (see "grant-table: defer releasing pages
acquired in a grant copy"[1] expected in Xen 4.6).

[1] http://lists.xen.org/archives/html/xen-devel/2015-01/msg01118.html

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 107 ++------------------------------------
 1 file changed, 3 insertions(+), 104 deletions(-)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 908e65e9b821..49322b6c32df 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -233,51 +233,6 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
 	}
 }
 
-/*
- * Returns true if we should start a new receive buffer instead of
- * adding 'size' bytes to a buffer which currently contains 'offset'
- * bytes.
- */
-static bool start_new_rx_buffer(int offset, unsigned long size, int head,
-				bool full_coalesce)
-{
-	/* simple case: we have completely filled the current buffer. */
-	if (offset == MAX_BUFFER_OFFSET)
-		return true;
-
-	/*
-	 * complex case: start a fresh buffer if the current frag
-	 * would overflow the current buffer but only if:
-	 *     (i)   this frag would fit completely in the next buffer
-	 * and (ii)  there is already some data in the current buffer
-	 * and (iii) this is not the head buffer.
-	 * and (iv)  there is no need to fully utilize the buffers
-	 *
-	 * Where:
-	 * - (i) stops us splitting a frag into two copies
-	 *   unless the frag is too large for a single buffer.
-	 * - (ii) stops us from leaving a buffer pointlessly empty.
-	 * - (iii) stops us leaving the first buffer
-	 *   empty. Strictly speaking this is already covered
-	 *   by (ii) but is explicitly checked because
-	 *   netfront relies on the first buffer being
-	 *   non-empty and can crash otherwise.
-	 * - (iv) is needed for skbs which can use up more than MAX_SKB_FRAGS
-	 *   slot
-	 *
-	 * This means we will effectively linearise small
-	 * frags but do not needlessly split large buffers
-	 * into multiple copies tend to give large frags their
-	 * own buffers as before.
-	 */
-	BUG_ON(size > MAX_BUFFER_OFFSET);
-	if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head &&
-	    !full_coalesce)
-		return true;
-
-	return false;
-}
-
 struct netrx_pending_operations {
 	unsigned copy_prod, copy_cons;
 	unsigned meta_prod, meta_cons;
@@ -336,24 +291,13 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb
 		BUG_ON(offset >= PAGE_SIZE);
 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
 
-		bytes = PAGE_SIZE - offset;
+		if (npo->copy_off == MAX_BUFFER_OFFSET)
+			meta = get_next_rx_buffer(queue, npo);
 
+		bytes = PAGE_SIZE - offset;
 		if (bytes > size)
 			bytes = size;
 
-		if (start_new_rx_buffer(npo->copy_off,
-					bytes,
-					*head,
-					XENVIF_RX_CB(skb)->full_coalesce)) {
-			/*
-			 * Netfront requires there to be some data in the head
-			 * buffer.
-			 */
-			BUG_ON(*head);
-
-			meta = get_next_rx_buffer(queue, npo);
-		}
-
 		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
 			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
 
@@ -652,60 +596,15 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
 
 	while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)
 	       && (skb = xenvif_rx_dequeue(queue)) != NULL) {
-		RING_IDX max_slots_needed;
 		RING_IDX old_req_cons;
 		RING_IDX ring_slots_used;
-		int i;
 
 		queue->last_rx_time = jiffies;
 
-		/* We need a cheap worse case estimate for the number of
-		 * slots we'll use.
-		 */
-
-		max_slots_needed = DIV_ROUND_UP(offset_in_page(skb->data) +
-						skb_headlen(skb),
-						PAGE_SIZE);
-		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-			unsigned int size;
-			unsigned int offset;
-
-			size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
-			offset = skb_shinfo(skb)->frags[i].page_offset;
-
-			/* For a worse-case estimate we need to factor in
-			 * the fragment page offset as this will affect the
-			 * number of times xenvif_gop_frag_copy() will
-			 * call start_new_rx_buffer().
-			 */
-			max_slots_needed += DIV_ROUND_UP(offset + size,
-							 PAGE_SIZE);
-		}
-
-		/* To avoid the estimate becoming too pessimal for some
-		 * frontends that limit posted rx requests, cap the estimate
-		 * at MAX_SKB_FRAGS. In this case netback will fully coalesce
-		 * the skb into the provided slots.
-		 */
-		if (max_slots_needed > MAX_SKB_FRAGS) {
-			max_slots_needed = MAX_SKB_FRAGS;
-			XENVIF_RX_CB(skb)->full_coalesce = true;
-		} else {
-			XENVIF_RX_CB(skb)->full_coalesce = false;
-		}
-
-		/* We may need one more slot for GSO metadata */
-		if (skb_is_gso(skb) &&
-		   (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 ||
-		    skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
-			max_slots_needed++;
-
 		old_req_cons = queue->rx.req_cons;
 		XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
 		ring_slots_used = queue->rx.req_cons - old_req_cons;
 
-		BUG_ON(ring_slots_used > max_slots_needed);
-
 		__skb_queue_tail(&rxq, skb);
 	}
 
-- 
cgit v1.2.3


From 0ae65f49af64d68f0daca37b83383115cae5e690 Mon Sep 17 00:00:00 2001
From: Jennifer Herbert <jennifer.herbert@citrix.com>
Date: Wed, 24 Dec 2014 14:03:16 +0000
Subject: x86/xen: require ballooned pages for grant maps

Ballooned pages are always used for grant maps which means the
original frame does not need to be saved in page->index nor restored
after the grant unmap.

This allows the workaround in netback for the conflicting use of the
(unionized) page->index and page->pfmemalloc to be removed.

Signed-off-by: Jennifer Herbert <jennifer.herbert@citrix.com>
Reviewed-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/net/xen-netback/netback.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 908e65e9b821..64413189ad06 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1241,12 +1241,6 @@ static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb)
 		/* Take an extra reference to offset network stack's put_page */
 		get_page(queue->mmap_pages[pending_idx]);
 	}
-	/* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
-	 * overlaps with "index", and "mapping" is not set. I think mapping
-	 * should be set. If delivered to local stack, it would drop this
-	 * skb in sk_filter unless the socket has the right to use it.
-	 */
-	skb->pfmemalloc	= false;
 }
 
 static int xenvif_get_extras(struct xenvif_queue *queue,
-- 
cgit v1.2.3


From c2677a6fc4dee765fff8f7ac3d61f657dc295650 Mon Sep 17 00:00:00 2001
From: Jennifer Herbert <jennifer.herbert@citrix.com>
Date: Mon, 5 Jan 2015 14:45:10 +0000
Subject: xen-netback: use foreign page information from the pages themselves

Use the foreign page flag in netback to get the domid and grant ref
needed for the grant copy.  This signficiantly simplifies the netback
code and makes netback work with foreign pages from other backends
(e.g., blkback).

This allows blkback to use iSCSI disks provided by domUs running on
the same host.

Signed-off-by: Jennifer Herbert <jennifer.herbert@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/net/xen-netback/netback.c | 100 ++++----------------------------------
 1 file changed, 9 insertions(+), 91 deletions(-)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 64413189ad06..ae3ab3752ea8 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -314,9 +314,7 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
 static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb,
 				 struct netrx_pending_operations *npo,
 				 struct page *page, unsigned long size,
-				 unsigned long offset, int *head,
-				 struct xenvif_queue *foreign_queue,
-				 grant_ref_t foreign_gref)
+				 unsigned long offset, int *head)
 {
 	struct gnttab_copy *copy_gop;
 	struct xenvif_rx_meta *meta;
@@ -333,6 +331,8 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb
 	offset &= ~PAGE_MASK;
 
 	while (size > 0) {
+		struct xen_page_foreign *foreign;
+
 		BUG_ON(offset >= PAGE_SIZE);
 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
 
@@ -361,9 +361,10 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb
 		copy_gop->flags = GNTCOPY_dest_gref;
 		copy_gop->len = bytes;
 
-		if (foreign_queue) {
-			copy_gop->source.domid = foreign_queue->vif->domid;
-			copy_gop->source.u.ref = foreign_gref;
+		foreign = xen_page_foreign(page);
+		if (foreign) {
+			copy_gop->source.domid = foreign->domid;
+			copy_gop->source.u.ref = foreign->gref;
 			copy_gop->flags |= GNTCOPY_source_gref;
 		} else {
 			copy_gop->source.domid = DOMID_SELF;
@@ -405,35 +406,6 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb
 	}
 }
 
-/*
- * Find the grant ref for a given frag in a chain of struct ubuf_info's
- * skb: the skb itself
- * i: the frag's number
- * ubuf: a pointer to an element in the chain. It should not be NULL
- *
- * Returns a pointer to the element in the chain where the page were found. If
- * not found, returns NULL.
- * See the definition of callback_struct in common.h for more details about
- * the chain.
- */
-static const struct ubuf_info *xenvif_find_gref(const struct sk_buff *const skb,
-						const int i,
-						const struct ubuf_info *ubuf)
-{
-	struct xenvif_queue *foreign_queue = ubuf_to_queue(ubuf);
-
-	do {
-		u16 pending_idx = ubuf->desc;
-
-		if (skb_shinfo(skb)->frags[i].page.p ==
-		    foreign_queue->mmap_pages[pending_idx])
-			break;
-		ubuf = (struct ubuf_info *) ubuf->ctx;
-	} while (ubuf);
-
-	return ubuf;
-}
-
 /*
  * Prepare an SKB to be transmitted to the frontend.
  *
@@ -459,8 +431,6 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 	int head = 1;
 	int old_meta_prod;
 	int gso_type;
-	const struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg;
-	const struct ubuf_info *const head_ubuf = ubuf;
 
 	old_meta_prod = npo->meta_prod;
 
@@ -507,68 +477,16 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 			len = skb_tail_pointer(skb) - data;
 
 		xenvif_gop_frag_copy(queue, skb, npo,
-				     virt_to_page(data), len, offset, &head,
-				     NULL,
-				     0);
+				     virt_to_page(data), len, offset, &head);
 		data += len;
 	}
 
 	for (i = 0; i < nr_frags; i++) {
-		/* This variable also signals whether foreign_gref has a real
-		 * value or not.
-		 */
-		struct xenvif_queue *foreign_queue = NULL;
-		grant_ref_t foreign_gref;
-
-		if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) &&
-			(ubuf->callback == &xenvif_zerocopy_callback)) {
-			const struct ubuf_info *const startpoint = ubuf;
-
-			/* Ideally ubuf points to the chain element which
-			 * belongs to this frag. Or if frags were removed from
-			 * the beginning, then shortly before it.
-			 */
-			ubuf = xenvif_find_gref(skb, i, ubuf);
-
-			/* Try again from the beginning of the list, if we
-			 * haven't tried from there. This only makes sense in
-			 * the unlikely event of reordering the original frags.
-			 * For injected local pages it's an unnecessary second
-			 * run.
-			 */
-			if (unlikely(!ubuf) && startpoint != head_ubuf)
-				ubuf = xenvif_find_gref(skb, i, head_ubuf);
-
-			if (likely(ubuf)) {
-				u16 pending_idx = ubuf->desc;
-
-				foreign_queue = ubuf_to_queue(ubuf);
-				foreign_gref =
-					foreign_queue->pending_tx_info[pending_idx].req.gref;
-				/* Just a safety measure. If this was the last
-				 * element on the list, the for loop will
-				 * iterate again if a local page were added to
-				 * the end. Using head_ubuf here prevents the
-				 * second search on the chain. Or the original
-				 * frags changed order, but that's less likely.
-				 * In any way, ubuf shouldn't be NULL.
-				 */
-				ubuf = ubuf->ctx ?
-					(struct ubuf_info *) ubuf->ctx :
-					head_ubuf;
-			} else
-				/* This frag was a local page, added to the
-				 * array after the skb left netback.
-				 */
-				ubuf = head_ubuf;
-		}
 		xenvif_gop_frag_copy(queue, skb, npo,
 				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
 				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
 				     skb_shinfo(skb)->frags[i].page_offset,
-				     &head,
-				     foreign_queue,
-				     foreign_queue ? foreign_gref : UINT_MAX);
+				     &head);
 	}
 
 	return npo->meta_prod - old_meta_prod;
-- 
cgit v1.2.3


From 42b5212fee4f57907e9415b18fe19c13e65574bc Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Mon, 2 Feb 2015 16:57:51 +0000
Subject: xen-netback: stop the guest rx thread after a fatal error

After commit e9d8b2c2968499c1f96563e6522c56958d5a1d0d (xen-netback:
disable rogue vif in kthread context), a fatal (protocol) error would
leave the guest Rx thread spinning, wasting CPU time.  Commit
ecf08d2dbb96d5a4b4bcc53a39e8d29cc8fef02e (xen-netback: reintroduce
guest Rx stall detection) made this even worse by removing a
cond_resched() from this path.

Since a fatal error is non-recoverable, just allow the guest Rx thread
to exit.  This requires taking additional refs to the task so the
thread exiting early is handled safely.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reported-by: Julien Grall <julien.grall@linaro.org>
Tested-by: Julien Grall <julien.grall@linaro.org>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/netback.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/net/xen-netback/netback.c')

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 908e65e9b821..c8ce701a7efb 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -2109,8 +2109,7 @@ int xenvif_kthread_guest_rx(void *data)
 		 */
 		if (unlikely(vif->disabled && queue->id == 0)) {
 			xenvif_carrier_off(vif);
-			xenvif_rx_queue_purge(queue);
-			continue;
+			break;
 		}
 
 		if (!skb_queue_empty(&queue->rx_queue))
-- 
cgit v1.2.3