From 76620aafd66f0004829764940c5466144969cffc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 16 Apr 2009 02:02:07 -0700 Subject: gro: New frags interface to avoid copying shinfo It turns out that copying a 16-byte area at ~800k times a second can be really expensive :) This patch redesigns the frags GRO interface to avoid copying that area twice. The two disciples of the frags interface have been converted. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/cxgb3/sge.c | 53 +++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 21 deletions(-) (limited to 'drivers/net/cxgb3/sge.c') diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 26d3587f3399..73d569e758ec 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -654,7 +654,8 @@ static void t3_reset_qset(struct sge_qset *q) q->txq_stopped = 0; q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */ q->rx_reclaim_timer.function = NULL; - q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0; + q->nomem = 0; + napi_free_frags(&q->napi); } @@ -2074,20 +2075,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, struct sge_fl *fl, int len, int complete) { struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; + struct sk_buff *skb = NULL; struct cpl_rx_pkt *cpl; - struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags; - int nr_frags = qs->lro_frag_tbl.nr_frags; - int frag_len = qs->lro_frag_tbl.len; + struct skb_frag_struct *rx_frag; + int nr_frags; int offset = 0; - if (!nr_frags) { - offset = 2 + sizeof(struct cpl_rx_pkt); - qs->lro_va = cpl = sd->pg_chunk.va + 2; + if (!qs->nomem) { + skb = napi_get_frags(&qs->napi); + qs->nomem = !skb; } fl->credits--; - len -= offset; pci_dma_sync_single_for_cpu(adap->pdev, pci_unmap_addr(sd, dma_addr), fl->buf_size - SGE_PG_RSVD, @@ -2100,21 +2100,38 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, fl->alloc_size, PCI_DMA_FROMDEVICE); + if (!skb) { + put_page(sd->pg_chunk.page); + if (complete) + qs->nomem = 0; + return; + } + + rx_frag = skb_shinfo(skb)->frags; + nr_frags = skb_shinfo(skb)->nr_frags; + + if (!nr_frags) { + offset = 2 + sizeof(struct cpl_rx_pkt); + qs->lro_va = sd->pg_chunk.va + 2; + } + len -= offset; + prefetch(qs->lro_va); rx_frag += nr_frags; rx_frag->page = sd->pg_chunk.page; rx_frag->page_offset = sd->pg_chunk.offset + offset; rx_frag->size = len; - frag_len += len; - qs->lro_frag_tbl.nr_frags++; - qs->lro_frag_tbl.len = frag_len; + skb->len += len; + skb->data_len += len; + skb->truesize += len; + skb_shinfo(skb)->nr_frags++; if (!complete) return; - qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY; + skb->ip_summed = CHECKSUM_UNNECESSARY; cpl = qs->lro_va; if (unlikely(cpl->vlan_valid)) { @@ -2123,15 +2140,11 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, struct vlan_group *grp = pi->vlan_grp; if (likely(grp != NULL)) { - vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan), - &qs->lro_frag_tbl); - goto out; + vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan)); + return; } } - napi_gro_frags(&qs->napi, &qs->lro_frag_tbl); - -out: - qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0; + napi_gro_frags(&qs->napi); } /** @@ -2300,8 +2313,6 @@ no_mem: if (fl->use_pages) { void *addr = fl->sdesc[fl->cidx].pg_chunk.va; - prefetch(&qs->lro_frag_tbl); - prefetch(addr); #if L1_CACHE_BYTES < 128 prefetch(addr + L1_CACHE_BYTES); -- cgit v1.2.3 From 28679751a924c11f7135641f26e99249385de5b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 May 2009 19:26:37 +0000 Subject: net: dont update dev->trans_start in 10GB drivers Followup of commits 9d21493b4beb8f918ba248032fefa393074a5e2b and 08baf561083bc27a953aa087dd8a664bb2b88e8e (net: tx scalability works : trans_start) (net: txq_trans_update() helper) Now that core network takes care of trans_start updates, dont do it in drivers themselves, if possible. Multi queue drivers can avoid one cache miss (on dev->trans_start) in their start_xmit() handler. Exceptions are NETIF_F_LLTX drivers (vxge & tehuti) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/cxgb3/sge.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net/cxgb3/sge.c') diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 73d569e758ec..49e64af7b093 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -1286,7 +1286,6 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (vlan_tx_tag_present(skb) && pi->vlan_grp) qs->port_stats[SGE_PSTAT_VLANINS]++; - dev->trans_start = jiffies; spin_unlock(&q->lock); /* -- cgit v1.2.3 From c3a8c5b644118b5e2cfd0690b1dcea904a792c52 Mon Sep 17 00:00:00 2001 From: Divy Le Ray Date: Fri, 29 May 2009 12:52:38 +0000 Subject: cxgb3: move away from LLTX cxgb3 no longer advertizes LLTX. Signed-off-by: Divy Le Ray Signed-off-by: David S. Miller --- drivers/net/cxgb3/sge.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'drivers/net/cxgb3/sge.c') diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 49e64af7b093..0b978827874b 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -1241,7 +1241,6 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) q = &qs->txq[TXQ_ETH]; txq = netdev_get_tx_queue(dev, qidx); - spin_lock(&q->lock); reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); credits = q->size - q->in_use; @@ -1252,7 +1251,6 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) dev_err(&adap->pdev->dev, "%s: Tx ring %u full while queue awake!\n", dev->name, q->cntxt_id & 7); - spin_unlock(&q->lock); return NETDEV_TX_BUSY; } @@ -1286,8 +1284,6 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (vlan_tx_tag_present(skb) && pi->vlan_grp) qs->port_stats[SGE_PSTAT_VLANINS]++; - spin_unlock(&q->lock); - /* * We do not use Tx completion interrupts to free DMAd Tx packets. * This is good for performamce but means that we rely on new Tx @@ -2857,11 +2853,12 @@ static void sge_timer_tx(unsigned long data) unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0}; unsigned long next_period; - if (spin_trylock(&qs->txq[TXQ_ETH].lock)) { - tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH], - TX_RECLAIM_TIMER_CHUNK); - spin_unlock(&qs->txq[TXQ_ETH].lock); + if (__netif_tx_trylock(qs->tx_q)) { + tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH], + TX_RECLAIM_TIMER_CHUNK); + __netif_tx_unlock(qs->tx_q); } + if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) { tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD], TX_RECLAIM_TIMER_CHUNK); @@ -2869,8 +2866,8 @@ static void sge_timer_tx(unsigned long data) } next_period = TX_RECLAIM_PERIOD >> - (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) / - TX_RECLAIM_TIMER_CHUNK); + (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) / + TX_RECLAIM_TIMER_CHUNK); mod_timer(&qs->tx_reclaim_timer, jiffies + next_period); } -- cgit v1.2.3