From 2f8af120a159a843948749ea88bcacda9779b132 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 15 Aug 2006 01:39:10 -0700 Subject: [BNX2]: Fix tx race condition. Fix a subtle race condition between bnx2_start_xmit() and bnx2_tx_int() similar to the one in tg3 discovered by Herbert Xu: CPU0 CPU1 bnx2_start_xmit() if (tx_ring_full) { tx_lock bnx2_tx() if (!netif_queue_stopped) netif_stop_queue() if (!tx_ring_full) update_tx_ring netif_wake_queue() tx_unlock } Even though tx_ring is updated before the if statement in bnx2_tx_int() in program order, it can be re-ordered by the CPU as shown above. This scenario can cause the tx queue to be stopped forever if bnx2_tx_int() has just freed up the entire tx_ring. The possibility of this happening should be very rare though. The following changes are made, very much identical to the tg3 fix: 1. Add memory barrier to fix the above race condition. 2. Eliminate the private tx_lock altogether and rely solely on netif_tx_lock. This eliminates one spinlock in bnx2_start_xmit() when the ring is full. 3. Because of 2, use netif_tx_lock in bnx2_tx_int() before calling netif_wake_queue(). 4. Add memory barrier to bnx2_tx_avail(). 5. Add bp->tx_wake_thresh which is set to half the tx ring size. 6. Check for the full wake queue condition before getting netif_tx_lock in tg3_tx(). This reduces the number of unnecessary spinlocks when the tx ring is full in a steady-state condition. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'drivers/net/bnx2.c') diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index db73de0d2511..2099edbbfdfd 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -209,8 +209,10 @@ MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl); static inline u32 bnx2_tx_avail(struct bnx2 *bp) { - u32 diff = TX_RING_IDX(bp->tx_prod) - TX_RING_IDX(bp->tx_cons); + u32 diff; + smp_mb(); + diff = TX_RING_IDX(bp->tx_prod) - TX_RING_IDX(bp->tx_cons); if (diff > MAX_TX_DESC_CNT) diff = (diff & MAX_TX_DESC_CNT) - 1; return (bp->tx_ring_size - diff); @@ -1686,15 +1688,20 @@ bnx2_tx_int(struct bnx2 *bp) } bp->tx_cons = sw_cons; + /* Need to make the tx_cons update visible to bnx2_start_xmit() + * before checking for netif_queue_stopped(). Without the + * memory barrier, there is a small possibility that bnx2_start_xmit() + * will miss it and cause the queue to be stopped forever. + */ + smp_mb(); - if (unlikely(netif_queue_stopped(bp->dev))) { - spin_lock(&bp->tx_lock); + if (unlikely(netif_queue_stopped(bp->dev)) && + (bnx2_tx_avail(bp) > bp->tx_wake_thresh)) { + netif_tx_lock(bp->dev); if ((netif_queue_stopped(bp->dev)) && - (bnx2_tx_avail(bp) > MAX_SKB_FRAGS)) { - + (bnx2_tx_avail(bp) > bp->tx_wake_thresh)) netif_wake_queue(bp->dev); - } - spin_unlock(&bp->tx_lock); + netif_tx_unlock(bp->dev); } } @@ -3503,6 +3510,8 @@ bnx2_init_tx_ring(struct bnx2 *bp) struct tx_bd *txbd; u32 val; + bp->tx_wake_thresh = bp->tx_ring_size / 2; + txbd = &bp->tx_desc_ring[MAX_TX_DESC_CNT]; txbd->tx_bd_haddr_hi = (u64) bp->tx_desc_mapping >> 32; @@ -4390,10 +4399,8 @@ bnx2_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid) #endif /* Called with netif_tx_lock. - * hard_start_xmit is pseudo-lockless - a lock is only required when - * the tx queue is full. This way, we get the benefit of lockless - * operations most of the time without the complexities to handle - * netif_stop_queue/wake_queue race conditions. + * bnx2_tx_int() runs without netif_tx_lock unless it needs to call + * netif_wake_queue(). */ static int bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -4512,12 +4519,9 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) dev->trans_start = jiffies; if (unlikely(bnx2_tx_avail(bp) <= MAX_SKB_FRAGS)) { - spin_lock(&bp->tx_lock); netif_stop_queue(dev); - - if (bnx2_tx_avail(bp) > MAX_SKB_FRAGS) + if (bnx2_tx_avail(bp) > bp->tx_wake_thresh) netif_wake_queue(dev); - spin_unlock(&bp->tx_lock); } return NETDEV_TX_OK; @@ -5628,7 +5632,6 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) bp->pdev = pdev; spin_lock_init(&bp->phy_lock); - spin_lock_init(&bp->tx_lock); INIT_WORK(&bp->reset_task, bnx2_reset_task, bp); dev->base_addr = dev->mem_start = pci_resource_start(pdev, 0); -- cgit v1.2.3 From 932f3772cf76cc1b1fd1538ceee3edba9bf2164f Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 15 Aug 2006 01:39:36 -0700 Subject: [BNX2]: Convert to netdev_alloc_skb() Convert dev_alloc_skb() to netdev_alloc_skb() and increase default rx ring size to 255. The old ring size of 100 was too small. Update version to 1.4.44. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/net/bnx2.c') diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 2099edbbfdfd..652eb05a6c2d 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -56,8 +56,8 @@ #define DRV_MODULE_NAME "bnx2" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "1.4.43" -#define DRV_MODULE_RELDATE "June 28, 2006" +#define DRV_MODULE_VERSION "1.4.44" +#define DRV_MODULE_RELDATE "August 10, 2006" #define RUN_AT(x) (jiffies + (x)) @@ -1571,7 +1571,7 @@ bnx2_alloc_rx_skb(struct bnx2 *bp, u16 index) struct rx_bd *rxbd = &bp->rx_desc_ring[RX_RING(index)][RX_IDX(index)]; unsigned long align; - skb = dev_alloc_skb(bp->rx_buf_size); + skb = netdev_alloc_skb(bp->dev, bp->rx_buf_size); if (skb == NULL) { return -ENOMEM; } @@ -1580,7 +1580,6 @@ bnx2_alloc_rx_skb(struct bnx2 *bp, u16 index) skb_reserve(skb, 8 - align); } - skb->dev = bp->dev; mapping = pci_map_single(bp->pdev, skb->data, bp->rx_buf_use_size, PCI_DMA_FROMDEVICE); @@ -1793,7 +1792,7 @@ bnx2_rx_int(struct bnx2 *bp, int budget) if ((bp->dev->mtu > 1500) && (len <= RX_COPY_THRESH)) { struct sk_buff *new_skb; - new_skb = dev_alloc_skb(len + 2); + new_skb = netdev_alloc_skb(bp->dev, len + 2); if (new_skb == NULL) goto reuse_rx; @@ -1804,7 +1803,6 @@ bnx2_rx_int(struct bnx2 *bp, int budget) skb_reserve(new_skb, 2); skb_put(new_skb, len); - new_skb->dev = bp->dev; bnx2_reuse_rx_skb(bp, skb, sw_ring_cons, sw_ring_prod); @@ -3961,7 +3959,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode) return -EINVAL; pkt_size = 1514; - skb = dev_alloc_skb(pkt_size); + skb = netdev_alloc_skb(bp->dev, pkt_size); if (!skb) return -ENOMEM; packet = skb_put(skb, pkt_size); @@ -5754,7 +5752,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) bp->mac_addr[5] = (u8) reg; bp->tx_ring_size = MAX_TX_DESC_CNT; - bnx2_set_rx_ring_size(bp, 100); + bnx2_set_rx_ring_size(bp, 255); bp->rx_csum = 1; -- cgit v1.2.3