diff options
author | Herbert Xu <herbert@gondor.apana.org.au> | 2009-04-16 02:02:07 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-04-16 02:02:07 -0700 |
commit | 76620aafd66f0004829764940c5466144969cffc (patch) | |
tree | 38041e6938121b5611546c582cd23f289db047b0 /drivers/net | |
parent | 861ab44059350e5cab350238606cf8814abab93b (diff) |
gro: New frags interface to avoid copying shinfo
It turns out that copying a 16-byte area at ~800k times a second
can be really expensive :) This patch redesigns the frags GRO
interface to avoid copying that area twice.
The two disciples of the frags interface have been converted.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/cxgb3/adapter.h | 2 | ||||
-rw-r--r-- | drivers/net/cxgb3/sge.c | 53 | ||||
-rw-r--r-- | drivers/net/sfc/rx.c | 26 |
3 files changed, 51 insertions, 30 deletions
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h index 714df2b675e6..322434ac42fc 100644 --- a/drivers/net/cxgb3/adapter.h +++ b/drivers/net/cxgb3/adapter.h @@ -195,7 +195,7 @@ struct sge_qset { /* an SGE queue set */ struct sge_rspq rspq; struct sge_fl fl[SGE_RXQ_PER_SET]; struct sge_txq txq[SGE_TXQ_PER_SET]; - struct napi_gro_fraginfo lro_frag_tbl; + int nomem; int lro_enabled; void *lro_va; struct net_device *netdev; diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 26d3587f3399..73d569e758ec 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -654,7 +654,8 @@ static void t3_reset_qset(struct sge_qset *q) q->txq_stopped = 0; q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */ q->rx_reclaim_timer.function = NULL; - q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0; + q->nomem = 0; + napi_free_frags(&q->napi); } @@ -2074,20 +2075,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, struct sge_fl *fl, int len, int complete) { struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; + struct sk_buff *skb = NULL; struct cpl_rx_pkt *cpl; - struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags; - int nr_frags = qs->lro_frag_tbl.nr_frags; - int frag_len = qs->lro_frag_tbl.len; + struct skb_frag_struct *rx_frag; + int nr_frags; int offset = 0; - if (!nr_frags) { - offset = 2 + sizeof(struct cpl_rx_pkt); - qs->lro_va = cpl = sd->pg_chunk.va + 2; + if (!qs->nomem) { + skb = napi_get_frags(&qs->napi); + qs->nomem = !skb; } fl->credits--; - len -= offset; pci_dma_sync_single_for_cpu(adap->pdev, pci_unmap_addr(sd, dma_addr), fl->buf_size - SGE_PG_RSVD, @@ -2100,21 +2100,38 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, fl->alloc_size, PCI_DMA_FROMDEVICE); + if (!skb) { + put_page(sd->pg_chunk.page); + if (complete) + qs->nomem = 0; + return; + } + + rx_frag = skb_shinfo(skb)->frags; + nr_frags = skb_shinfo(skb)->nr_frags; + + if (!nr_frags) { + offset = 2 + sizeof(struct cpl_rx_pkt); + qs->lro_va = sd->pg_chunk.va + 2; + } + len -= offset; + prefetch(qs->lro_va); rx_frag += nr_frags; rx_frag->page = sd->pg_chunk.page; rx_frag->page_offset = sd->pg_chunk.offset + offset; rx_frag->size = len; - frag_len += len; - qs->lro_frag_tbl.nr_frags++; - qs->lro_frag_tbl.len = frag_len; + skb->len += len; + skb->data_len += len; + skb->truesize += len; + skb_shinfo(skb)->nr_frags++; if (!complete) return; - qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY; + skb->ip_summed = CHECKSUM_UNNECESSARY; cpl = qs->lro_va; if (unlikely(cpl->vlan_valid)) { @@ -2123,15 +2140,11 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, struct vlan_group *grp = pi->vlan_grp; if (likely(grp != NULL)) { - vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan), - &qs->lro_frag_tbl); - goto out; + vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan)); + return; } } - napi_gro_frags(&qs->napi, &qs->lro_frag_tbl); - -out: - qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0; + napi_gro_frags(&qs->napi); } /** @@ -2300,8 +2313,6 @@ no_mem: if (fl->use_pages) { void *addr = fl->sdesc[fl->cidx].pg_chunk.va; - prefetch(&qs->lro_frag_tbl); - prefetch(addr); #if L1_CACHE_BYTES < 128 prefetch(addr + L1_CACHE_BYTES); diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c index 66d7fe3db3e6..01f9432c31ef 100644 --- a/drivers/net/sfc/rx.c +++ b/drivers/net/sfc/rx.c @@ -450,17 +450,27 @@ static void efx_rx_packet_lro(struct efx_channel *channel, /* Pass the skb/page into the LRO engine */ if (rx_buf->page) { - struct napi_gro_fraginfo info; + struct sk_buff *skb = napi_get_frags(napi); - info.frags[0].page = rx_buf->page; - info.frags[0].page_offset = efx_rx_buf_offset(rx_buf); - info.frags[0].size = rx_buf->len; - info.nr_frags = 1; - info.ip_summed = CHECKSUM_UNNECESSARY; - info.len = rx_buf->len; + if (!skb) { + put_page(rx_buf->page); + goto out; + } + + skb_shinfo(skb)->frags[0].page = rx_buf->page; + skb_shinfo(skb)->frags[0].page_offset = + efx_rx_buf_offset(rx_buf); + skb_shinfo(skb)->frags[0].size = rx_buf->len; + skb_shinfo(skb)->nr_frags = 1; + + skb->len = rx_buf->len; + skb->data_len = rx_buf->len; + skb->truesize += rx_buf->len; + skb->ip_summed = CHECKSUM_UNNECESSARY; - napi_gro_frags(napi, &info); + napi_gro_frags(napi); +out: EFX_BUG_ON_PARANOID(rx_buf->skb); rx_buf->page = NULL; } else { |