diff options
author | David S. Miller <davem@davemloft.net> | 2012-03-17 02:02:26 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-03-17 02:02:26 -0700 |
commit | 81a430ac1b88b0702c57d2513e247317e810e04d (patch) | |
tree | 5d4b825c9b568b569833ec72cc5cbf43e1e6031c | |
parent | c2ec3ff6b8712f5d951927d7774c805fe3270caa (diff) | |
parent | 729739b754affa482e92fa7836e4066096089d11 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next
-rw-r--r-- | drivers/net/ethernet/intel/igb/e1000_defines.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/igb/igb_main.c | 37 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe.h | 60 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 23 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 17 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 1261 |
6 files changed, 772 insertions, 628 deletions
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index aed217449f0d..89eb1f85b9fa 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -134,6 +134,8 @@ #define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */ #define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ #define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ +#define E1000_RCTL_DPF 0x00400000 /* Discard Pause Frames */ +#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ #define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ /* diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index e96cef89f121..c4902411d749 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1769,10 +1769,21 @@ static int igb_set_features(struct net_device *netdev, netdev_features_t features) { netdev_features_t changed = netdev->features ^ features; + struct igb_adapter *adapter = netdev_priv(netdev); if (changed & NETIF_F_HW_VLAN_RX) igb_vlan_mode(netdev, features); + if (!(changed & NETIF_F_RXALL)) + return 0; + + netdev->features = features; + + if (netif_running(netdev)) + igb_reinit_locked(adapter); + else + igb_reset(adapter); + return 0; } @@ -1954,6 +1965,7 @@ static int __devinit igb_probe(struct pci_dev *pdev, /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; + netdev->hw_features |= NETIF_F_RXALL; /* set this bit last since it cannot be part of hw_features */ netdev->features |= NETIF_F_HW_VLAN_FILTER; @@ -1964,6 +1976,8 @@ static int __devinit igb_probe(struct pci_dev *pdev, NETIF_F_IPV6_CSUM | NETIF_F_SG; + netdev->priv_flags |= IFF_SUPP_NOFCS; + if (pci_using_dac) { netdev->features |= NETIF_F_HIGHDMA; netdev->vlan_features |= NETIF_F_HIGHDMA; @@ -3003,6 +3017,22 @@ void igb_setup_rctl(struct igb_adapter *adapter) wr32(E1000_QDE, ALL_QUEUES); } + /* This is useful for sniffing bad packets. */ + if (adapter->netdev->features & NETIF_F_RXALL) { + /* UPE and MPE will be handled by normal PROMISC logic + * in e1000e_set_rx_mode */ + rctl |= (E1000_RCTL_SBP | /* Receive bad packets */ + E1000_RCTL_BAM | /* RX All Bcast Pkts */ + E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ + + rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */ + E1000_RCTL_DPF | /* Allow filtered pause */ + E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */ + /* Do not mess with E1000_CTRL_VME, it affects transmit as well, + * and that breaks VLANs. + */ + } + wr32(E1000_RCTL, rctl); } @@ -4293,6 +4323,8 @@ static void igb_tx_map(struct igb_ring *tx_ring, /* write last descriptor with RS and EOP bits */ cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD); + if (unlikely(skb->no_fcs)) + cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS)); tx_desc->read.cmd_type_len = cmd_type; /* set the timestamp */ @@ -6098,8 +6130,9 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) goto next_desc; } - if (igb_test_staterr(rx_desc, - E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { + if (unlikely((igb_test_staterr(rx_desc, + E1000_RXDEXT_ERR_FRAME_ERR_MASK)) + && !(rx_ring->netdev->features & NETIF_F_RXALL))) { dev_kfree_skb_any(skb); goto next_desc; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index f069c1b10753..e0d809d0ed75 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -72,12 +72,6 @@ /* Supported Rx Buffer Sizes */ #define IXGBE_RXBUFFER_512 512 /* Used for packet split */ -#define IXGBE_RXBUFFER_2K 2048 -#define IXGBE_RXBUFFER_3K 3072 -#define IXGBE_RXBUFFER_4K 4096 -#define IXGBE_RXBUFFER_7K 7168 -#define IXGBE_RXBUFFER_8K 8192 -#define IXGBE_RXBUFFER_15K 15360 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ /* @@ -102,7 +96,6 @@ #define IXGBE_TX_FLAGS_FCOE (u32)(1 << 5) #define IXGBE_TX_FLAGS_FSO (u32)(1 << 6) #define IXGBE_TX_FLAGS_TXSW (u32)(1 << 7) -#define IXGBE_TX_FLAGS_MAPPED_AS_PAGE (u32)(1 << 8) #define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 #define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT 29 @@ -156,19 +149,18 @@ struct vf_macvlans { struct ixgbe_tx_buffer { union ixgbe_adv_tx_desc *next_to_watch; unsigned long time_stamp; - dma_addr_t dma; - u32 length; - u32 tx_flags; struct sk_buff *skb; - u32 bytecount; - u16 gso_segs; + unsigned int bytecount; + unsigned short gso_segs; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + u32 tx_flags; }; struct ixgbe_rx_buffer { struct sk_buff *skb; dma_addr_t dma; struct page *page; - dma_addr_t page_dma; unsigned int page_offset; }; @@ -180,7 +172,6 @@ struct ixgbe_queue_stats { struct ixgbe_tx_queue_stats { u64 restart_queue; u64 tx_busy; - u64 completed; u64 tx_done_old; }; @@ -193,21 +184,15 @@ struct ixgbe_rx_queue_stats { u64 csum_err; }; -enum ixbge_ring_state_t { +enum ixgbe_ring_state_t { __IXGBE_TX_FDIR_INIT_DONE, __IXGBE_TX_DETECT_HANG, __IXGBE_HANG_CHECK_ARMED, - __IXGBE_RX_PS_ENABLED, __IXGBE_RX_RSC_ENABLED, __IXGBE_RX_CSUM_UDP_ZERO_ERR, + __IXGBE_RX_FCOE_BUFSZ, }; -#define ring_is_ps_enabled(ring) \ - test_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state) -#define set_ring_ps_enabled(ring) \ - set_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state) -#define clear_ring_ps_enabled(ring) \ - clear_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state) #define check_for_tx_hang(ring) \ test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) #define set_check_for_tx_hang(ring) \ @@ -233,7 +218,6 @@ struct ixgbe_ring { u8 __iomem *tail; u16 count; /* amount of descriptors */ - u16 rx_buf_len; u8 queue_index; /* needed for multiqueue queue management */ u8 reg_idx; /* holds the special value that gets @@ -241,8 +225,13 @@ struct ixgbe_ring { * associated with this ring, which is * different for DCB and RSS modes */ - u8 atr_sample_rate; - u8 atr_count; + union { + struct { + u8 atr_sample_rate; + u8 atr_count; + }; + u16 next_to_alloc; + }; u16 next_to_use; u16 next_to_clean; @@ -287,6 +276,22 @@ struct ixgbe_ring_feature { int mask; } ____cacheline_internodealigned_in_smp; +/* + * FCoE requires that all Rx buffers be over 2200 bytes in length. Since + * this is twice the size of a half page we need to double the page order + * for FCoE enabled Rx queues. + */ +#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192) +static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +{ + return test_bit(__IXGBE_RX_FCOE_BUFSZ, &ring->state) ? 1 : 0; +} +#else +#define ixgbe_rx_pg_order(_ring) 0 +#endif +#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) +#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring)) + struct ixgbe_ring_container { struct ixgbe_ring *ring; /* pointer to linked list of rings */ unsigned int total_bytes; /* total bytes processed this int */ @@ -554,7 +559,7 @@ struct ixgbe_cb { }; dma_addr_t dma; u16 append_cnt; - bool delay_unmap; + bool page_released; }; #define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb) @@ -625,7 +630,8 @@ extern void ixgbe_tx_ctxtdesc(struct ixgbe_ring *, u32, u32, u32, u32); extern void ixgbe_do_reset(struct net_device *netdev); #ifdef IXGBE_FCOE extern void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter); -extern int ixgbe_fso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, +extern int ixgbe_fso(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, u32 tx_flags, u8 *hdr_len); extern void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter); extern int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 24f7291dbe57..b09e67cc9d6e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -35,6 +35,7 @@ #include <linux/netdevice.h> #include <linux/ethtool.h> #include <linux/vmalloc.h> +#include <linux/highmem.h> #include <linux/uaccess.h> #include "ixgbe.h" @@ -1615,7 +1616,6 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter) rx_ring->dev = &adapter->pdev->dev; rx_ring->netdev = adapter->netdev; rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx; - rx_ring->rx_buf_len = IXGBE_RXBUFFER_2K; err = ixgbe_setup_rx_resources(rx_ring); if (err) { @@ -1718,13 +1718,15 @@ static bool ixgbe_check_lbtest_frame(struct ixgbe_rx_buffer *rx_buffer, frame_size >>= 1; - data = rx_buffer->skb->data; + data = kmap(rx_buffer->page) + rx_buffer->page_offset; if (data[3] != 0xFF || data[frame_size + 10] != 0xBE || data[frame_size + 12] != 0xAF) match = false; + kunmap(rx_buffer->page); + return match; } @@ -1746,17 +1748,22 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, /* check Rx buffer */ rx_buffer = &rx_ring->rx_buffer_info[rx_ntc]; - /* unmap Rx buffer, will be remapped by alloc_rx_buffers */ - dma_unmap_single(rx_ring->dev, - rx_buffer->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - rx_buffer->dma = 0; + /* sync Rx buffer for CPU read */ + dma_sync_single_for_cpu(rx_ring->dev, + rx_buffer->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); /* verify contents of skb */ if (ixgbe_check_lbtest_frame(rx_buffer, size)) count++; + /* sync Rx buffer for device write */ + dma_sync_single_for_device(rx_ring->dev, + rx_buffer->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + /* unmap buffer on Tx side */ tx_buffer = &tx_ring->tx_buffer_info[tx_ntc]; ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index da7da752b6b4..5f943d3f85c4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -447,7 +447,7 @@ ddp_out: /** * ixgbe_fso - ixgbe FCoE Sequence Offload (FSO) * @tx_ring: tx desc ring - * @skb: associated skb + * @first: first tx_buffer structure containing skb, tx_flags, and protocol * @tx_flags: tx flags * @hdr_len: hdr_len to be returned * @@ -455,9 +455,11 @@ ddp_out: * * Returns : 0 indicates no FSO, > 0 for FSO, < 0 for error */ -int ixgbe_fso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, +int ixgbe_fso(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, u32 tx_flags, u8 *hdr_len) { + struct sk_buff *skb = first->skb; struct fc_frame_header *fh; u32 vlan_macip_lens; u32 fcoe_sof_eof = 0; @@ -530,9 +532,14 @@ int ixgbe_fso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, *hdr_len = sizeof(struct fcoe_crc_eof); /* hdr_len includes fc_hdr if FCoE LSO is enabled */ - if (skb_is_gso(skb)) - *hdr_len += (skb_transport_offset(skb) + - sizeof(struct fc_frame_header)); + if (skb_is_gso(skb)) { + *hdr_len += skb_transport_offset(skb) + + sizeof(struct fc_frame_header); + /* update gso_segs and bytecount */ + first->gso_segs = DIV_ROUND_UP(skb->len - *hdr_len, + skb_shinfo(skb)->gso_size); + first->bytecount += (first->gso_segs - 1) * *hdr_len; + } /* mss_l4len_id: use 1 for FSO as TSO, no need for L4LEN */ mss_l4len_idx = skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 167e898fbba6..1d8f9f83f8ed 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -289,7 +289,7 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) struct ixgbe_reg_info *reginfo; int n = 0; struct ixgbe_ring *tx_ring; - struct ixgbe_tx_buffer *tx_buffer_info; + struct ixgbe_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; struct my_u0 { u64 a; u64 b; } *u0; struct ixgbe_ring *rx_ring; @@ -329,14 +329,13 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n"); for (n = 0; n < adapter->num_tx_queues; n++) { tx_ring = adapter->tx_ring[n]; - tx_buffer_info = - &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; + tx_buffer = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", n, tx_ring->next_to_use, tx_ring->next_to_clean, - (u64)tx_buffer_info->dma, - tx_buffer_info->length, - tx_buffer_info->next_to_watch, - (u64)tx_buffer_info->time_stamp); + (u64)dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + tx_buffer->next_to_watch, + (u64)tx_buffer->time_stamp); } /* Print TX Rings */ @@ -367,17 +366,17 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { tx_desc = IXGBE_TX_DESC(tx_ring, i); - tx_buffer_info = &tx_ring->tx_buffer_info[i]; + tx_buffer = &tx_ring->tx_buffer_info[i]; u0 = (struct my_u0 *)tx_desc; pr_info("T [0x%03X] %016llX %016llX %016llX" " %04X %p %016llX %p", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), - (u64)tx_buffer_info->dma, - tx_buffer_info->length, - tx_buffer_info->next_to_watch, - (u64)tx_buffer_info->time_stamp, - tx_buffer_info->skb); + (u64)dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + tx_buffer->next_to_watch, + (u64)tx_buffer->time_stamp, + tx_buffer->skb); if (i == tx_ring->next_to_use && i == tx_ring->next_to_clean) pr_cont(" NTC/U\n"); @@ -389,11 +388,13 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) pr_cont("\n"); if (netif_msg_pktdata(adapter) && - tx_buffer_info->dma != 0) + dma_unmap_len(tx_buffer, len) != 0) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt(tx_buffer_info->dma), - tx_buffer_info->length, true); + phys_to_virt(dma_unmap_addr(tx_buffer, + dma)), + dma_unmap_len(tx_buffer, len), + true); } } @@ -469,17 +470,7 @@ rx_ring_summary: print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, phys_to_virt(rx_buffer_info->dma), - rx_ring->rx_buf_len, true); - - if (rx_ring->rx_buf_len - < IXGBE_RXBUFFER_2K) - print_hex_dump(KERN_INFO, "", - DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt( - rx_buffer_info->page_dma + - rx_buffer_info->page_offset - ), - PAGE_SIZE/2, true); + ixgbe_rx_bufsz(rx_ring), true); } } @@ -589,32 +580,26 @@ static inline void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter, } } -static inline void ixgbe_unmap_tx_resource(struct ixgbe_ring *ring, - struct ixgbe_tx_buffer *tx_buffer) +void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *ring, + struct ixgbe_tx_buffer *tx_buffer) { - if (tx_buffer->dma) { - if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_MAPPED_AS_PAGE) - dma_unmap_page(ring->dev, - tx_buffer->dma, - tx_buffer->length, - DMA_TO_DEVICE); - else + if (tx_buffer->skb) { + dev_kfree_skb_any(tx_buffer->skb); + if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, - tx_buffer->dma, - tx_buffer->length, - DMA_TO_DEVICE); + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } else if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); } - tx_buffer->dma = 0; -} - -void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *tx_ring, - struct ixgbe_tx_buffer *tx_buffer_info) -{ - ixgbe_unmap_tx_resource(tx_ring, tx_buffer_info); - if (tx_buffer_info->skb) - dev_kfree_skb_any(tx_buffer_info->skb); - tx_buffer_info->skb = NULL; - /* tx_buffer_info must be completely set up in the transmit path */ + tx_buffer->next_to_watch = NULL; + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* tx_buffer must be completely set up in the transmit path */ } static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) @@ -671,7 +656,7 @@ static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) static u64 ixgbe_get_tx_completed(struct ixgbe_ring *ring) { - return ring->tx_stats.completed; + return ring->stats.packets; } static u64 ixgbe_get_tx_pending(struct ixgbe_ring *ring) @@ -751,12 +736,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, union ixgbe_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = q_vector->tx.work_limit; - u16 i = tx_ring->next_to_clean; + unsigned int i = tx_ring->next_to_clean; + + if (test_bit(__IXGBE_DOWN, &adapter->state)) + return true; tx_buffer = &tx_ring->tx_buffer_info[i]; tx_desc = IXGBE_TX_DESC(tx_ring, i); + i -= tx_ring->count; - for (; budget; budget--) { + do { union ixgbe_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; /* if next_to_watch is not set then there is no work pending */ @@ -770,36 +759,65 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) break; - /* count the packet as being completed */ - tx_ring->tx_stats.completed++; - /* clear next_to_watch to prevent false hangs */ tx_buffer->next_to_watch = NULL; - do { - ixgbe_unmap_tx_resource(tx_ring, tx_buffer); - if (likely(tx_desc == eop_desc)) { - eop_desc = NULL; - dev_kfree_skb_any(tx_buffer->skb); - tx_buffer->skb = NULL; + /* update the statistics for this packet */ + total_bytes += tx_buffer->bytecount; + total_packets += tx_buffer->gso_segs; - total_bytes += tx_buffer->bytecount; - total_packets += tx_buffer->gso_segs; - } + /* free the skb */ + dev_kfree_skb_any(tx_buffer->skb); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + /* clear tx_buffer data */ + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { tx_buffer++; tx_desc++; i++; - if (unlikely(i == tx_ring->count)) { - i = 0; - + if (unlikely(!i)) { + i -= tx_ring->count; tx_buffer = tx_ring->tx_buffer_info; tx_desc = IXGBE_TX_DESC(tx_ring, 0); } - } while (eop_desc); - } + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + } + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IXGBE_TX_DESC(tx_ring, 0); + } + /* issue prefetch for next Tx descriptor */ + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + i += tx_ring->count; tx_ring->next_to_clean = i; u64_stats_update_begin(&tx_ring->syncp); tx_ring->stats.bytes += total_bytes; @@ -811,7 +829,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) { /* schedule immediate reset if we believe we hung */ struct ixgbe_hw *hw = &adapter->hw; - tx_desc = IXGBE_TX_DESC(tx_ring, i); e_err(drv, "Detected Tx Unit Hang\n" " Tx Queue <%d>\n" " TDH, TDT <%x>, <%x>\n" @@ -849,9 +866,11 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, * sees the new next_to_clean. */ smp_mb(); - if (__netif_subqueue_stopped(tx_ring->netdev, tx_ring->queue_index) && - !test_bit(__IXGBE_DOWN, &adapter->state)) { - netif_wake_subqueue(tx_ring->netdev, tx_ring->queue_index); + if (__netif_subqueue_stopped(tx_ring->netdev, + tx_ring->queue_index) + && !test_bit(__IXGBE_DOWN, &adapter->state)) { + netif_wake_subqueue(tx_ring->netdev, + tx_ring->queue_index); ++tx_ring->tx_stats.restart_queue; } } @@ -1006,6 +1025,7 @@ static inline void ixgbe_rx_hash(struct ixgbe_ring *ring, skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); } +#ifdef IXGBE_FCOE /** * ixgbe_rx_is_fcoe - check the rx desc for incoming pkt type * @adapter: address of board private structure @@ -1024,6 +1044,7 @@ static inline bool ixgbe_rx_is_fcoe(struct ixgbe_adapter *adapter, IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT))); } +#endif /* IXGBE_FCOE */ /** * ixgbe_rx_checksum - indicate in skb if hw indicated a good cksum * @ring: structure containing ring specific data @@ -1051,7 +1072,7 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, return; if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { - u16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; + __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; /* * 82599 errata, UDP frames with a 0 checksum can be marked as @@ -1072,6 +1093,9 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, static inline void ixgbe_release_rx_desc(struct ixgbe_ring *rx_ring, u32 val) { rx_ring->next_to_use = val; + + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = val; /* * Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -1082,67 +1106,46 @@ static inline void ixgbe_release_rx_desc(struct ixgbe_ring *rx_ring, u32 val) writel(val, rx_ring->tail); } -static bool ixgbe_alloc_mapped_skb(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *bi) -{ - struct sk_buff *skb = bi->skb; - dma_addr_t dma = bi->dma; - - if (dma) - return true; - - if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_buf_len); - bi->skb = skb; - if (!skb) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - return false; - } - } - - dma = dma_map_single(rx_ring->dev, skb->data, - rx_ring->rx_buf_len, DMA_FROM_DEVICE); - - if (dma_mapping_error(rx_ring->dev, dma)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - return false; - } - - bi->dma = dma; - return true; -} - static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *bi) { struct page *page = bi->page; - dma_addr_t page_dma = bi->page_dma; - unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2); + dma_addr_t dma = bi->dma; - if (page_dma) + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(dma)) return true; - if (!page) { - page = alloc_page(GFP_ATOMIC | __GFP_COLD); - bi->page = page; + /* alloc new page for storage */ + if (likely(!page)) { + page = alloc_pages(GFP_ATOMIC | __GFP_COLD, + ixgbe_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } + bi->page = page; } - page_dma = dma_map_page(rx_ring->dev, page, - page_offset, PAGE_SIZE / 2, - DMA_FROM_DEVICE); + /* map page for use */ + dma = dma_map_page(rx_ring->dev, page, 0, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + + /* + * if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + put_page(page); + bi->page = NULL; - if (dma_mapping_error(rx_ring->dev, page_dma)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } - bi->page_dma = page_dma; - bi->page_offset = page_offset; + bi->dma = dma; + bi->page_offset ^= ixgbe_rx_bufsz(rx_ring); + return true; } @@ -1157,30 +1160,23 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) struct ixgbe_rx_buffer *bi; u16 i = rx_ring->next_to_use; - /* nothing to do or no valid netdev defined */ - if (!cleaned_count || !rx_ring->netdev) + /* nothing to do */ + if (!cleaned_count) return; rx_desc = IXGBE_RX_DESC(rx_ring, i); bi = &rx_ring->rx_buffer_info[i]; i -= rx_ring->count; - while (cleaned_count--) { - if (!ixgbe_alloc_mapped_skb(rx_ring, bi)) + do { + if (!ixgbe_alloc_mapped_page(rx_ring, bi)) break; - /* Refresh the desc even if buffer_addrs didn't change - * because each write-back erases this info. */ - if (ring_is_ps_enabled(rx_ring)) { - rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); - - if (!ixgbe_alloc_mapped_page(rx_ring, bi)) - break; - - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); - } else { - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); - } + /* + * Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); rx_desc++; bi++; @@ -1193,7 +1189,9 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) /* clear the hdr_addr for the next_to_use descriptor */ rx_desc->read.hdr_addr = 0; - } + + cleaned_count--; + } while (cleaned_count); i += rx_ring->count; @@ -1201,90 +1199,6 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) ixgbe_release_rx_desc(rx_ring, i); } -static inline u16 ixgbe_get_hlen(union ixgbe_adv_rx_desc *rx_desc) -{ - /* HW will not DMA in data larger than the given buffer, even if it - * parses the (NFS, of course) header to be larger. In that case, it - * fills the header buffer and spills the rest into the page. - */ - u16 hdr_info = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info); - u16 hlen = (hdr_info & IXGBE_RXDADV_HDRBUFLEN_MASK) >> - IXGBE_RXDADV_HDRBUFLEN_SHIFT; - if (hlen > IXGBE_RX_HDR_SIZE) - hlen = IXGBE_RX_HDR_SIZE; - return hlen; -} - -/** - * ixgbe_merge_active_tail - merge active tail into lro skb - * @tail: pointer to active tail in frag_list - * - * This function merges the length and data of an active tail into the - * skb containing the frag_list. It resets the tail's pointer to the head, - * but it leaves the heads pointer to tail intact. - **/ -static inline struct sk_buff *ixgbe_merge_active_tail(struct sk_buff *tail) -{ - struct sk_buff *head = IXGBE_CB(tail)->head; - - if (!head) - return tail; - - head->len += tail->len; - head->data_len += tail->len; - head->truesize += tail->len; - - IXGBE_CB(tail)->head = NULL; - - return head; -} - -/** - * ixgbe_add_active_tail - adds an active tail into the skb frag_list - * @head: pointer to the start of the skb - * @tail: pointer to active tail to add to frag_list - * - * This function adds an active tail to the end of the frag list. This tail - * will still be receiving data so we cannot yet ad it's stats to the main - * skb. That is done via ixgbe_merge_active_tail. - **/ -static inline void ixgbe_add_active_tail(struct sk_buff *head, - struct sk_buff *tail) -{ - struct sk_buff *old_tail = IXGBE_CB(head)->tail; - - if (old_tail) { - ixgbe_merge_active_tail(old_tail); - old_tail->next = tail; - } else { - skb_shinfo(head)->frag_list = tail; - } - - IXGBE_CB(tail)->head = head; - IXGBE_CB(head)->tail = tail; -} - -/** - * ixgbe_close_active_frag_list - cleanup pointers on a frag_list skb - * @head: pointer to head of an active frag list - * - * This function will clear the frag_tail_tracker pointer on an active - * frag_list and returns true if the pointer was actually set - **/ -static inline bool ixgbe_close_active_frag_list(struct sk_buff *head) -{ - struct sk_buff *tail = IXGBE_CB(head)->tail; - - if (!tail) - return false; - - ixgbe_merge_active_tail(tail); - - IXGBE_CB(head)->tail = NULL; - - return true; -} - /** * ixgbe_get_headlen - determine size of header for RSC/LRO/GRO/FCOE * @data: pointer to the start of the headers @@ -1346,7 +1260,7 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, /* record next protocol */ nexthdr = hdr.ipv4->protocol; hdr.network += hlen; -#ifdef CONFIG_FCOE +#ifdef IXGBE_FCOE } else if (protocol == __constant_htons(ETH_P_FCOE)) { if ((hdr.network - data) > (max_len - FCOE_HEADER_LEN)) return max_len; @@ -1409,7 +1323,7 @@ static void ixgbe_get_rsc_cnt(struct ixgbe_ring *rx_ring, static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring, struct sk_buff *skb) { - u16 hdr_len = ixgbe_get_headlen(skb->data, skb_headlen(skb)); + u16 hdr_len = skb_headlen(skb); /* set gso_size to avoid messing up TCP MSS */ skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len), @@ -1473,149 +1387,346 @@ static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, netif_rx(skb); } +/** + * ixgbe_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: Current socket buffer containing buffer in progress + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(IXGBE_RX_DESC(rx_ring, ntc)); + + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; + + /* append_cnt indicates packet is RSC, if so fetch nextp */ + if (IXGBE_CB(skb)->append_cnt) { + ntc = le32_to_cpu(rx_desc->wb.upper.status_error); + ntc &= IXGBE_RXDADV_NEXTP_MASK; + ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; + } + + /* place skb in next buffer to be received */ + rx_ring->rx_buffer_info[ntc].skb = skb; + rx_ring->rx_stats.non_eop_descs++; + + return true; +} + +/** + * ixgbe_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Check for corrupted packet headers caused by senders on the local L2 + * embedded NIC switch not setting up their Tx Descriptors right. These + * should be very rare. + * + * Also address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + struct net_device *netdev = rx_ring->netdev; + unsigned char *va; + unsigned int pull_len; + + /* if the page was released unmap it, else just sync our portion */ + if (unlikely(IXGBE_CB(skb)->page_released)) { + dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; + } else { + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, + frag->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + IXGBE_CB(skb)->dma = 0; + + /* verify that the packet does not have any known errors */ + if (unlikely(ixgbe_test_staterr(rx_desc, + IXGBE_RXDADV_ERR_FRAME_ERR_MASK) && + !(netdev->features & NETIF_F_RXALL))) { + dev_kfree_skb_any(skb); + return true; + } + + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = skb_frag_size(frag); + if (pull_len > 256) + pull_len = ixgbe_get_headlen(va, pull_len); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; + + /* + * if we sucked the frag empty then we should free it, + * if there are other frags here something is screwed up in hardware + */ + if (skb_frag_size(frag) == 0) { + BUG_ON(skb_shinfo(skb)->nr_frags != 1); + skb_shinfo(skb)->nr_frags = 0; + __skb_frag_unref(frag); + skb->truesize -= ixgbe_rx_bufsz(rx_ring); + } + + /* if skb_pad returns an error the skb was freed */ + if (unlikely(skb->len < 60)) { + int pad_len = 60 - skb->len; + + if (skb_pad(skb, pad_len)) + return true; + __skb_put(skb, pad_len); + } + + return false; +} + +/** + * ixgbe_can_reuse_page - determine if we can reuse a page + * @rx_buffer: pointer to rx_buffer containing the page we want to reuse + * + * Returns true if page can be reused in another Rx buffer + **/ +static inline bool ixgbe_can_reuse_page(struct ixgbe_rx_buffer *rx_buffer) +{ + struct page *page = rx_buffer->page; + + /* if we are only owner of page and it is local we can reuse it */ + return likely(page_count(page) == 1) && + likely(page_to_nid(page) == numa_node_id()); +} + +/** + * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Syncronizes page for reuse by the adapter + **/ +static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *old_buff) +{ + struct ixgbe_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + u16 bufsz = ixgbe_rx_bufsz(rx_ring); + + new_buff = &rx_ring->rx_buffer_info[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + new_buff->page = old_buff->page; + new_buff->dma = old_buff->dma; + + /* flip page offset to other buffer and store to new_buff */ + new_buff->page_offset = old_buff->page_offset ^ bufsz; + + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma, + new_buff->page_offset, bufsz, + DMA_FROM_DEVICE); + + /* bump ref count on page before it is given to the stack */ + get_page(new_buff->page); +} + +/** + * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: buffer containing page to add + * @rx_desc: descriptor containing length of buffer written by hardware + * @skb: sk_buff to place the data into + * + * This function is based on skb_add_rx_frag. I would have used that + * function however it doesn't handle the truesize case correctly since we + * are allocating more memory than might be used for a single receive. + **/ +static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *rx_buffer, + struct sk_buff *skb, int size) +{ + skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, + rx_buffer->page, rx_buffer->page_offset, + size); + skb->len += size; + skb->data_len += size; + skb->truesize += ixgbe_rx_bufsz(rx_ring); +} + +/** + * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf + * @q_vector: structure containing interrupt and ring information + * @rx_ring: rx descriptor ring to transact packets on + * @budget: Total limit on number of packets to process + * + * This function provides a "bounce buffer" approach to Rx interrupt + * processing. The advantage to this is that on systems that have + * expensive overhead for IOMMU access this provides a means of avoiding + * it by maintaining the mapping of the page to the syste. + * + * Returns true if all work is completed without reaching budget + **/ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *rx_ring, int budget) { - union ixgbe_adv_rx_desc *rx_desc, *next_rxd; - struct ixgbe_rx_buffer *rx_buffer_info; - struct sk_buff *skb; unsigned int total_rx_bytes = 0, total_rx_packets = 0; - const int current_node = numa_node_id(); #ifdef IXGBE_FCOE struct ixgbe_adapter *adapter = q_vector->adapter; int ddp_bytes = 0; #endif /* IXGBE_FCOE */ - u16 i; - u16 cleaned_count = 0; + u16 cleaned_count = ixgbe_desc_unused(rx_ring); - i = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC(rx_ring, i); + do { + struct ixgbe_rx_buffer *rx_buffer; + union ixgbe_adv_rx_desc *rx_desc; + struct sk_buff *skb; + struct page *page; + u16 ntc; - while (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) { - u32 upper_len = 0; + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { + ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); + cleaned_count = 0; + } - rmb(); /* read descriptor and rx_buffer_info after status DD */ + ntc = rx_ring->next_to_clean; + rx_desc = IXGBE_RX_DESC(rx_ring, ntc); + rx_buffer = &rx_ring->rx_buffer_info[ntc]; - rx_buffer_info = &rx_ring->rx_buffer_info[i]; + if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) + break; - skb = rx_buffer_info->skb; - rx_buffer_info->skb = NULL; - prefetch(skb->data); + /* + * This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * RXD_STAT_DD bit is set + */ + rmb(); - /* linear means we are building an skb from multiple pages */ - if (!skb_is_nonlinear(skb)) { - u16 hlen; - if (ring_is_ps_enabled(rx_ring)) { - hlen = ixgbe_get_hlen(rx_desc); - upper_len = le16_to_cpu(rx_desc->wb.upper.length); - } else { - hlen = le16_to_cpu(rx_desc->wb.upper.length); + page = rx_buffer->page; + prefetchw(page); + + skb = rx_buffer->skb; + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IXGBE_RX_HDR_SIZE); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_rx_buff_failed++; + break; } - skb_put(skb, hlen); + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); /* * Delay unmapping of the first packet. It carries the * header information, HW may still access the header - * after writeback. Only unmap it when EOP is reached + * after the writeback. Only unmap it when EOP is + * reached */ - if (!IXGBE_CB(skb)->head) { - IXGBE_CB(skb)->delay_unmap = true; - IXGBE_CB(skb)->dma = rx_buffer_info->dma; - } else { - skb = ixgbe_merge_active_tail(skb); - dma_unmap_single(rx_ring->dev, - rx_buffer_info->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - } - rx_buffer_info->dma = 0; + IXGBE_CB(skb)->dma = rx_buffer->dma; } else { - /* assume packet split since header is unmapped */ - upper_len = le16_to_cpu(rx_desc->wb.upper.length); + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); } - if (upper_len) { - dma_unmap_page(rx_ring->dev, - rx_buffer_info->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - rx_buffer_info->page_dma = 0; - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_buffer_info->page, - rx_buffer_info->page_offset, - upper_len); - - if ((page_count(rx_buffer_info->page) == 1) && - (page_to_nid(rx_buffer_info->page) == current_node)) - get_page(rx_buffer_info->page); - else - rx_buffer_info->page = NULL; + /* pull page into skb */ + ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, + le16_to_cpu(rx_desc->wb.upper.length)); - skb->len += upper_len; - skb->data_len += upper_len; - skb->truesize += PAGE_SIZE / 2; + if (ixgbe_can_reuse_page(rx_buffer)) { + /* hand second half of page back to the ring */ + ixgbe_reuse_rx_page(rx_ring, rx_buffer); + } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { + /* the page has been released from the ring */ + IXGBE_CB(skb)->page_released = true; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE); } - ixgbe_get_rsc_cnt(rx_ring, rx_desc, skb); + /* clear contents of buffer_info */ + rx_buffer->skb = NULL; + rx_buffer->dma = 0; + rx_buffer->page = NULL; - i++; - if (i == rx_ring->count) - i = 0; + ixgbe_get_rsc_cnt(rx_ring, rx_desc, skb); - next_rxd = IXGBE_RX_DESC(rx_ring, i); - prefetch(next_rxd); cleaned_count++; - if ((!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) { - struct ixgbe_rx_buffer *next_buffer; - u32 nextp; - - if (IXGBE_CB(skb)->append_cnt) { - nextp = le32_to_cpu( - rx_desc->wb.upper.status_error); - nextp >>= IXGBE_RXDADV_NEXTP_SHIFT; - } else { - nextp = i; - } - - next_buffer = &rx_ring->rx_buffer_info[nextp]; - - if (ring_is_ps_enabled(rx_ring)) { - rx_buffer_info->skb = next_buffer->skb; - rx_buffer_info->dma = next_buffer->dma; - next_buffer->skb = skb; - next_buffer->dma = 0; - } else { - struct sk_buff *next_skb = next_buffer->skb; - ixgbe_add_active_tail(skb, next_skb); - IXGBE_CB(next_skb)->head = skb; - } - rx_ring->rx_stats.non_eop_descs++; - goto next_desc; - } - - dma_unmap_single(rx_ring->dev, - IXGBE_CB(skb)->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - IXGBE_CB(skb)->dma = 0; - IXGBE_CB(skb)->delay_unmap = false; - - if (ixgbe_close_active_frag_list(skb) && - !IXGBE_CB(skb)->append_cnt) { - /* if we got here without RSC the packet is invalid */ - dev_kfree_skb_any(skb); - goto next_desc; - } + /* place incomplete frames back on ring for completion */ + if (ixgbe_is_non_eop(rx_ring, rx_desc, skb)) + continue; - /* ERR_MASK will only have valid bits if EOP set */ - if (unlikely(ixgbe_test_staterr(rx_desc, - IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { - dev_kfree_skb_any(skb); - goto next_desc; - } + /* verify the packet layout is correct */ + if (ixgbe_cleanup_headers(rx_ring, rx_desc, skb)) + continue; /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1630,32 +1741,16 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb); if (!ddp_bytes) { dev_kfree_skb_any(skb); - goto next_desc; + continue; } } + #endif /* IXGBE_FCOE */ ixgbe_rx_skb(q_vector, skb); + /* update budget accounting */ budget--; -next_desc: - if (!budget) - break; - - /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { - ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); - cleaned_count = 0; - } - - /* use prefetched values */ - rx_desc = next_rxd; - } - - rx_ring->next_to_clean = i; - cleaned_count = ixgbe_desc_unused(rx_ring); - - if (cleaned_count) - ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); + } while (likely(budget)); #ifdef IXGBE_FCOE /* include DDPed FCoE data */ @@ -1670,8 +1765,8 @@ next_desc: total_rx_bytes += ddp_bytes; total_rx_packets += DIV_ROUND_UP(ddp_bytes, mss); } -#endif /* IXGBE_FCOE */ +#endif /* IXGBE_FCOE */ u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -1679,6 +1774,9 @@ next_desc: q_vector->rx.total_packets += total_rx_packets; q_vector->rx.total_bytes += total_rx_bytes; + if (cleaned_count) + ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); + return !!budget; } @@ -2634,18 +2732,12 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, srrctl |= (IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) & IXGBE_SRRCTL_BSIZEHDR_MASK; - if (ring_is_ps_enabled(rx_ring)) { -#if (PAGE_SIZE / 2) > IXGBE_MAX_RXBUFFER - srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; +#if PAGE_SIZE > IXGBE_MAX_RXBUFFER + srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; #else - srrctl |= (PAGE_SIZE / 2) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= ixgbe_rx_bufsz(rx_ring) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; #endif - srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; - } else { - srrctl |= ALIGN(rx_ring->rx_buf_len, 1024) >> - IXGBE_SRRCTL_BSIZEPKT_SHIFT; - srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - } + srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(&adapter->hw, IXGBE_SRRCTL(reg_idx), srrctl); } @@ -2728,13 +2820,11 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, { struct ixgbe_hw *hw = &adapter->hw; u32 rscctrl; - int rx_buf_len; u8 reg_idx = ring->reg_idx; if (!ring_is_rsc_enabled(ring)) return; - rx_buf_len = ring->rx_buf_len; rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(reg_idx)); rscctrl |= IXGBE_RSCCTL_RSCEN; /* @@ -2742,24 +2832,13 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, * total size of max desc * buf_len is not greater * than 65536 */ - if (ring_is_ps_enabled(ring)) { -#if (PAGE_SIZE < 8192) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; -#elif (PAGE_SIZE < 16384) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; -#elif (PAGE_SIZE < 32768) - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; +#if (PAGE_SIZE <= 8192) + rscctrl |= IXGBE_RSCCTL_MAXDESC_16; +#elif (PAGE_SIZE <= 16384) + rscctrl |= IXGBE_RSCCTL_MAXDESC_8; #else - rscctrl |= IXGBE_RSCCTL_MAXDESC_1; + rscctrl |= IXGBE_RSCCTL_MAXDESC_4; #endif - } else { - if (rx_buf_len <= IXGBE_RXBUFFER_4K) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; - else if (rx_buf_len <= IXGBE_RXBUFFER_8K) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; - else - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; - } IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl); } @@ -2976,23 +3055,10 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - int rx_buf_len; struct ixgbe_ring *rx_ring; int i; u32 mhadd, hlreg0; - /* Decide whether to use packet split mode or not */ - /* On by default */ - adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED; - - /* Do not use packet split if we're in SR-IOV Mode */ - if (adapter->num_vfs) - adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED; - - /* Disable packet split due to 82599 erratum #45 */ - if (hw->mac.type == ixgbe_mac_82599EB) - adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED; - #ifdef IXGBE_FCOE /* adjust max frame to be able to do baby jumbo for FCoE */ if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) && @@ -3011,27 +3077,6 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) /* MHADD will allow an extra 4 bytes past for vlan tagged frames */ max_frame += VLAN_HLEN; - /* Set the RX buffer length according to the mode */ - if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) { - rx_buf_len = IXGBE_RX_HDR_SIZE; - } else { - if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) && - (netdev->mtu <= ETH_DATA_LEN)) - rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; - /* - * Make best use of allocation by using all but 1K of a - * power of 2 allocation that will be used for skb->head. - */ - else if (max_frame <= IXGBE_RXBUFFER_3K) - rx_buf_len = IXGBE_RXBUFFER_3K; - else if (max_frame <= IXGBE_RXBUFFER_7K) - rx_buf_len = IXGBE_RXBUFFER_7K; - else if (max_frame <= IXGBE_RXBUFFER_15K) - rx_buf_len = IXGBE_RXBUFFER_15K; - else - rx_buf_len = IXGBE_MAX_RXBUFFER; - } - hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); /* set jumbo enable since MHADD.MFS is keeping size locked at max_frame */ hlreg0 |= IXGBE_HLREG0_JUMBOEN; @@ -3043,32 +3088,16 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) */ for (i = 0; i < adapter->num_rx_queues; i++) { rx_ring = adapter->rx_ring[i]; - rx_ring->rx_buf_len = rx_buf_len; - - if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) - set_ring_ps_enabled(rx_ring); - else - clear_ring_ps_enabled(rx_ring); - if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) set_ring_rsc_enabled(rx_ring); else clear_ring_rsc_enabled(rx_ring); - #ifdef IXGBE_FCOE if (netdev->features & NETIF_F_FCOE_MTU) { struct ixgbe_ring_feature *f; f = &adapter->ring_feature[RING_F_FCOE]; - if ((i >= f->mask) && (i < f->mask + f->indices)) { - clear_ring_ps_enabled(rx_ring); - if (rx_buf_len < IXGBE_FCOE_JUMBO_FRAME_SIZE) - rx_ring->rx_buf_len = - IXGBE_FCOE_JUMBO_FRAME_SIZE; - } else if (!ring_is_rsc_enabled(rx_ring) && - !ring_is_ps_enabled(rx_ring)) { - rx_ring->rx_buf_len = - IXGBE_FCOE_JUMBO_FRAME_SIZE; - } + if ((i >= f->mask) && (i < f->mask + f->indices)) + set_bit(__IXGBE_RX_FCOE_BUFSZ, &rx_ring->state); } #endif /* IXGBE_FCOE */ } @@ -3342,6 +3371,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev) fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); /* set all bits that we expect to always be set */ + fctrl &= ~IXGBE_FCTRL_SBP; /* disable store-bad-packets */ fctrl |= IXGBE_FCTRL_BAM; fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */ fctrl |= IXGBE_FCTRL_PMCF; @@ -3390,6 +3420,18 @@ void ixgbe_set_rx_mode(struct net_device *netdev) IXGBE_WRITE_REG(hw, IXGBE_VMOLR(adapter->num_vfs), vmolr); } + /* This is useful for sniffing bad packets. */ + if (adapter->netdev->features & NETIF_F_RXALL) { + /* UPE and MPE will be handled by normal PROMISC logic + * in e1000e_set_rx_mode */ + fctrl |= (IXGBE_FCTRL_SBP | /* Receive bad packets */ + IXGBE_FCTRL_BAM | /* RX All Bcast Pkts */ + IXGBE_FCTRL_PMCF); /* RX All MAC Ctrl Pkts */ + + fctrl &= ~(IXGBE_FCTRL_DPF); + /* NOTE: VLAN filtering is disabled by setting PROMISC */ + } + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); if (netdev->features & NETIF_F_HW_VLAN_RX) @@ -3977,6 +4019,27 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) } /** + * ixgbe_init_rx_page_offset - initialize page offset values for Rx buffers + * @rx_ring: ring to setup + * + * On many IA platforms the L1 cache has a critical stride of 4K, this + * results in each receive buffer starting in the same cache set. To help + * reduce the pressure on this cache set we can interleave the offsets so + * that only every other buffer will be in the same cache set. + **/ +static void ixgbe_init_rx_page_offset(struct ixgbe_ring *rx_ring) +{ + struct ixgbe_rx_buffer *rx_buffer = rx_ring->rx_buffer_info; + u16 i; + + for (i = 0; i < rx_ring->count; i += 2) { + rx_buffer[0].page_offset = 0; + rx_buffer[1].page_offset = ixgbe_rx_bufsz(rx_ring); + rx_buffer = &rx_buffer[2]; + } +} + +/** * ixgbe_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from **/ @@ -3992,49 +4055,40 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { - struct ixgbe_rx_buffer *rx_buffer_info; - - rx_buffer_info = &rx_ring->rx_buffer_info[i]; - if (rx_buffer_info->dma) { - dma_unmap_single(rx_ring->dev, rx_buffer_info->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - rx_buffer_info->dma = 0; - } - if (rx_buffer_info->skb) { - struct sk_buff *skb = rx_buffer_info->skb; - rx_buffer_info->skb = NULL; - /* We need to clean up RSC frag lists */ - skb = ixgbe_merge_active_tail(skb); - ixgbe_close_active_frag_list(skb); - if (IXGBE_CB(skb)->delay_unmap) { - dma_unmap_single(dev, - IXGBE_CB(skb)->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - IXGBE_CB(skb)->dma = 0; - IXGBE_CB(skb)->delay_unmap = false; + struct ixgbe_rx_buffer *rx_buffer; + + rx_buffer = &rx_ring->rx_buffer_info[i]; + if (rx_buffer->skb) { + struct sk_buff *skb = rx_buffer->skb; + if (IXGBE_CB(skb)->page_released) { + dma_unmap_page(dev, + IXGBE_CB(skb)->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; } dev_kfree_skb(skb); } - if (!rx_buffer_info->page) - continue; - if (rx_buffer_info->page_dma) { - dma_unmap_page(dev, rx_buffer_info->page_dma, - PAGE_SIZE / 2, DMA_FROM_DEVICE); - rx_buffer_info->page_dma = 0; - } - put_page(rx_buffer_info->page); - rx_buffer_info->page = NULL; - rx_buffer_info->page_offset = 0; + rx_buffer->skb = NULL; + if (rx_buffer->dma) + dma_unmap_page(dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE); + rx_buffer->dma = 0; + if (rx_buffer->page) + put_page(rx_buffer->page); + rx_buffer->page = NULL; } size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; memset(rx_ring->rx_buffer_info, 0, size); + ixgbe_init_rx_page_offset(rx_ring); + /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); + rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } @@ -5398,6 +5452,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + ixgbe_init_rx_page_offset(rx_ring); + return 0; err: vfree(rx_ring->rx_buffer_info); @@ -5517,20 +5573,24 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; /* MTU < 68 is an error and causes problems on some kernels */ - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED && - hw->mac.type != ixgbe_mac_X540) { - if ((new_mtu < 68) || (max_frame > MAXIMUM_ETHERNET_VLAN_SIZE)) - return -EINVAL; - } else { - if ((new_mtu < 68) || (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE)) + if ((new_mtu < 68) || (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE)) + return -EINVAL; + + /* + * For 82599EB we cannot allow PF to change MTU greater than 1500 + * in SR-IOV mode as it may cause buffer overruns in guest VFs that + * don't allocate and chain buffers correctly. + */ + if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && + (adapter->hw.mac.type == ixgbe_mac_82599EB) && + (max_frame > MAXIMUM_ETHERNET_VLAN_SIZE)) return -EINVAL; - } e_info(probe, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); + /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; @@ -6523,9 +6583,11 @@ void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens, context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); } -static int ixgbe_tso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, +static int ixgbe_tso(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, u32 tx_flags, __be16 protocol, u8 *hdr_len) { + struct sk_buff *skb = first->skb; int err; u32 vlan_macip_lens, type_tucmd; u32 mss_l4len_idx, l4len; @@ -6559,9 +6621,14 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, 0, IPPROTO_TCP, 0); } + /* compute header lengths */ l4len = tcp_hdrlen(skb); *hdr_len = skb_transport_offset(skb) + l4len; + /* update gso size and bytecount with header size */ + first->gso_segs = skb_shinfo(skb)->gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + /* mss_l4len_id: use 1 as index for TSO */ mss_l4len_idx = l4len << IXGBE_ADVTXD_L4LEN_SHIFT; mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT; @@ -6579,9 +6646,10 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, } static bool ixgbe_tx_csum(struct ixgbe_ring *tx_ring, - struct sk_buff *skb, u32 tx_flags, - __be16 protocol) + struct ixgbe_tx_buffer *first, + u32 tx_flags, __be16 protocol) { + struct sk_buff *skb = first->skb; u32 vlan_macip_lens = 0; u32 mss_l4len_idx = 0; u32 type_tucmd = 0; @@ -6658,7 +6726,7 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) /* set segmentation enable bits for TSO/FSO */ #ifdef IXGBE_FCOE - if ((tx_flags & IXGBE_TX_FLAGS_TSO) || (tx_flags & IXGBE_TX_FLAGS_FSO)) + if (tx_flags & (IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_FSO)) #else if (tx_flags & IXGBE_TX_FLAGS_TSO) #endif @@ -6667,201 +6735,193 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) return cmd_type; } -static __le32 ixgbe_tx_olinfo_status(u32 tx_flags, unsigned int paylen) +static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc, + u32 tx_flags, unsigned int paylen) { - __le32 olinfo_status = - cpu_to_le32(paylen << IXGBE_ADVTXD_PAYLEN_SHIFT); - - if (tx_flags & IXGBE_TX_FLAGS_TSO) { - olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_TXSM | - (1 << IXGBE_ADVTXD_IDX_SHIFT)); - /* enble IPv4 checksum for TSO */ - if (tx_flags & IXGBE_TX_FLAGS_IPV4) - olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IXSM); - } + __le32 olinfo_status = cpu_to_le32(paylen << IXGBE_ADVTXD_PAYLEN_SHIFT); /* enable L4 checksum for TSO and TX checksum offload */ if (tx_flags & IXGBE_TX_FLAGS_CSUM) olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_TXSM); -#ifdef IXGBE_FCOE - /* use index 1 context for FCOE/FSO */ - if (tx_flags & IXGBE_TX_FLAGS_FCOE) - olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_CC | - (1 << IXGBE_ADVTXD_IDX_SHIFT)); + /* enble IPv4 checksum for TSO */ + if (tx_flags & IXGBE_TX_FLAGS_IPV4) + olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IXSM); + /* use index 1 context for TSO/FSO/FCOE */ +#ifdef IXGBE_FCOE + if (tx_flags & (IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_FCOE)) +#else + if (tx_flags & IXGBE_TX_FLAGS_TSO) #endif + olinfo_status |= cpu_to_le32(1 << IXGBE_ADVTXD_IDX_SHIFT); + /* * Check Context must be set if Tx switch is enabled, which it * always is for case where virtual functions are running */ +#ifdef IXGBE_FCOE + if (tx_flags & (IXGBE_TX_FLAGS_TXSW | IXGBE_TX_FLAGS_FCOE)) +#else if (tx_flags & IXGBE_TX_FLAGS_TXSW) +#endif olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_CC); - return olinfo_status; + tx_desc->read.olinfo_status = olinfo_status; } #define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \ IXGBE_TXD_CMD_RS) static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, - struct sk_buff *skb, struct ixgbe_tx_buffer *first, u32 tx_flags, const u8 hdr_len) { - struct device *dev = tx_ring->dev; - struct ixgbe_tx_buffer *tx_buffer_info; - union ixgbe_adv_tx_desc *tx_desc; dma_addr_t dma; - __le32 cmd_type, olinfo_status; - struct skb_frag_struct *frag; - unsigned int f = 0; + struct sk_buff *skb = first->skb; + struct ixgbe_tx_buffer *tx_buffer; + union ixgbe_adv_tx_desc *tx_desc; + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; unsigned int data_len = skb->data_len; unsigned int size = skb_headlen(skb); - u32 offset = 0; - u32 paylen = skb->len - hdr_len; + unsigned int paylen = skb->len - hdr_len; + __le32 cmd_type; u16 i = tx_ring->next_to_use; - u16 gso_segs; + + tx_desc = IXGBE_TX_DESC(tx_ring, i); + + ixgbe_tx_olinfo_status(tx_desc, tx_flags, paylen); + cmd_type = ixgbe_tx_cmd_type(tx_flags); #ifdef IXGBE_FCOE if (tx_flags & IXGBE_TX_FLAGS_FCOE) { - if (data_len >= sizeof(struct fcoe_crc_eof)) { - data_len -= sizeof(struct fcoe_crc_eof); - } else { + if (data_len < sizeof(struct fcoe_crc_eof)) { size -= sizeof(struct fcoe_crc_eof) - data_len; data_len = 0; + } else { + data_len -= sizeof(struct fcoe_crc_eof); } } #endif - dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) goto dma_error; - cmd_type = ixgbe_tx_cmd_type(tx_flags); - olinfo_status = ixgbe_tx_olinfo_status(tx_flags, paylen); + /* record length, and DMA address */ + dma_unmap_len_set(first, len, size); + dma_unmap_addr_set(first, dma, dma); + first->tx_flags = tx_flags; - tx_desc = IXGBE_TX_DESC(tx_ring, i); + tx_desc->read.buffer_addr = cpu_to_le64(dma); for (;;) { - while (size > IXGBE_MAX_DATA_PER_TXD) { - tx_desc->read.buffer_addr = cpu_to_le64(dma + offset); + while (unlikely(size > IXGBE_MAX_DATA_PER_TXD)) { tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(IXGBE_MAX_DATA_PER_TXD); - tx_desc->read.olinfo_status = olinfo_status; - - offset += IXGBE_MAX_DATA_PER_TXD; - size -= IXGBE_MAX_DATA_PER_TXD; - tx_desc++; i++; + tx_desc++; if (i == tx_ring->count) { tx_desc = IXGBE_TX_DESC(tx_ring, 0); i = 0; } + + dma += IXGBE_MAX_DATA_PER_TXD; + size -= IXGBE_MAX_DATA_PER_TXD; + + tx_desc->read.buffer_addr = cpu_to_le64(dma); + tx_desc->read.olinfo_status = 0; } - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - tx_buffer_info->length = offset + size; - tx_buffer_info->tx_flags = tx_flags; - tx_buffer_info->dma = dma; + if (likely(!data_len)) + break; - tx_desc->read.buffer_addr = cpu_to_le64(dma + offset); + if (unlikely(skb->no_fcs)) + cmd_type &= ~(cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS)); tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); - tx_desc->read.olinfo_status = olinfo_status; - if (!data_len) - break; + i++; + tx_desc++; + if (i == tx_ring->count) { + tx_desc = IXGBE_TX_DESC(tx_ring, 0); + i = 0; + } - frag = &skb_shinfo(skb)->frags[f]; #ifdef IXGBE_FCOE size = min_t(unsigned int, data_len, skb_frag_size(frag)); #else size = skb_frag_size(frag); #endif data_len -= size; - f++; - offset = 0; - tx_flags |= IXGBE_TX_FLAGS_MAPPED_AS_PAGE; - - dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, + DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) goto dma_error; - tx_desc++; - i++; - if (i == tx_ring->count) { - tx_desc = IXGBE_TX_DESC(tx_ring, 0); - i = 0; - } - } - - tx_desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); - - i++; - if (i == tx_ring->count) - i = 0; + tx_buffer = &tx_ring->tx_buffer_info[i]; + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); - tx_ring->next_to_use = i; + tx_desc->read.buffer_addr = cpu_to_le64(dma); + tx_desc->read.olinfo_status = 0; - if (tx_flags & IXGBE_TX_FLAGS_TSO) - gso_segs = skb_shinfo(skb)->gso_segs; -#ifdef IXGBE_FCOE - /* adjust for FCoE Sequence Offload */ - else if (tx_flags & IXGBE_TX_FLAGS_FSO) - gso_segs = DIV_ROUND_UP(skb->len - hdr_len, - skb_shinfo(skb)->gso_size); -#endif /* IXGBE_FCOE */ - else - gso_segs = 1; + frag++; + } - /* multiply data chunks by size of headers */ - tx_buffer_info->bytecount = paylen + (gso_segs * hdr_len); - tx_buffer_info->gso_segs = gso_segs; - tx_buffer_info->skb = skb; + /* write last descriptor with RS and EOP bits */ + cmd_type |= cpu_to_le32(size) | cpu_to_le32(IXGBE_TXD_CMD); + tx_desc->read.cmd_type_len = cmd_type; - netdev_tx_sent_queue(txring_txq(tx_ring), tx_buffer_info->bytecount); + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); /* set the timestamp */ first->time_stamp = jiffies; /* - * Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). + * Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. (Only applicable for weak-ordered + * memory model archs, such as IA-64). + * + * We also need this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. */ wmb(); /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; + i++; + if (i == tx_ring->count) + i = 0; + + tx_ring->next_to_use = i; + /* notify HW of packet */ writel(i, tx_ring->tail); return; dma_error: - dev_err(dev, "TX DMA map failed\n"); + dev_err(tx_ring->dev, "TX DMA map failed\n"); /* clear dma mappings for failed tx_buffer_info map */ for (;;) { - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - ixgbe_unmap_tx_resource(tx_ring, tx_buffer_info); - if (tx_buffer_info == first) + tx_buffer = &tx_ring->tx_buffer_info[i]; + ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer); + if (tx_buffer == first) break; if (i == 0) i = tx_ring->count; i--; } - dev_kfree_skb_any(skb); - tx_ring->next_to_use = i; } -static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb, +static void ixgbe_atr(struct ixgbe_ring *ring, + struct ixgbe_tx_buffer *first, u32 tx_flags, __be16 protocol) { struct ixgbe_q_vector *q_vector = ring->q_vector; @@ -6886,7 +6946,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb, ring->atr_count++; /* snag network header to get L4 type and address */ - hdr.network = skb_network_header(skb); + hdr.network = skb_network_header(first->skb); /* Currently only IPv4/IPv6 with TCP is supported */ if ((protocol != __constant_htons(ETH_P_IPV6) || @@ -6895,7 +6955,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb, hdr.ipv4->protocol != IPPROTO_TCP)) return; - th = tcp_hdr(skb); + th = tcp_hdr(first->skb); /* skip this packet since it is invalid or the socket is closing */ if (!th || th->fin) @@ -7033,6 +7093,12 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_BUSY; } + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + first->skb = skb; + first->bytecount = skb->len; + first->gso_segs = 1; + /* if we have a HW VLAN tag being added default to the HW one */ if (vlan_tx_tag_present(skb)) { tx_flags |= vlan_tx_tag_get(skb) << IXGBE_TX_FLAGS_VLAN_SHIFT; @@ -7079,14 +7145,11 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, } } - /* record the location of the first descriptor for this packet */ - first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; - #ifdef IXGBE_FCOE /* setup tx offload for FCoE */ if ((protocol == __constant_htons(ETH_P_FCOE)) && (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) { - tso = ixgbe_fso(tx_ring, skb, tx_flags, &hdr_len); + tso = ixgbe_fso(tx_ring, first, tx_flags, &hdr_len); if (tso < 0) goto out_drop; else if (tso) @@ -7103,37 +7166,55 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, if (protocol == __constant_htons(ETH_P_IP)) tx_flags |= IXGBE_TX_FLAGS_IPV4; - tso = ixgbe_tso(tx_ring, skb, tx_flags, protocol, &hdr_len); + tso = ixgbe_tso(tx_ring, first, tx_flags, protocol, &hdr_len); if (tso < 0) goto out_drop; else if (tso) - tx_flags |= IXGBE_TX_FLAGS_TSO; - else if (ixgbe_tx_csum(tx_ring, skb, tx_flags, protocol)) + tx_flags |= IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_CSUM; + else if (ixgbe_tx_csum(tx_ring, first, tx_flags, protocol)) tx_flags |= IXGBE_TX_FLAGS_CSUM; /* add the ATR filter if ATR is on */ if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state)) - ixgbe_atr(tx_ring, skb, tx_flags, protocol); + ixgbe_atr(tx_ring, first, tx_flags, protocol); #ifdef IXGBE_FCOE xmit_fcoe: #endif /* IXGBE_FCOE */ - ixgbe_tx_map(tx_ring, skb, first, tx_flags, hdr_len); + ixgbe_tx_map(tx_ring, first, tx_flags, hdr_len); ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED); return NETDEV_TX_OK; out_drop: - dev_kfree_skb_any(skb); + dev_kfree_skb_any(first->skb); + first->skb = NULL; + return NETDEV_TX_OK; } -static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_ring *tx_ring; + if (skb->len <= 0) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* + * The minimum packet size for olinfo paylen is 17 so pad the skb + * in order to meet this minimum size requirement. + */ + if (skb->len < 17) { + if (skb_padto(skb, 17)) + return NETDEV_TX_OK; + skb->len = 17; + } + tx_ring = adapter->tx_ring[skb->queue_mapping]; return ixgbe_xmit_frame_ring(skb, adapter, tx_ring); } @@ -7457,6 +7538,7 @@ static int ixgbe_set_features(struct net_device *netdev, netdev_features_t data) { struct ixgbe_adapter *adapter = netdev_priv(netdev); + netdev_features_t changed = netdev->features ^ data; bool need_reset = false; /* Make sure RSC matches LRO, reset if change */ @@ -7493,6 +7575,10 @@ static int ixgbe_set_features(struct net_device *netdev, need_reset = true; } + if (changed & NETIF_F_RXALL) + need_reset = true; + + netdev->features = data; if (need_reset) ixgbe_do_reset(netdev); @@ -7771,6 +7857,8 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, break; } + netdev->hw_features |= NETIF_F_RXALL; + netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; netdev->vlan_features |= NETIF_F_IP_CSUM; @@ -7778,6 +7866,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, netdev->vlan_features |= NETIF_F_SG; netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->priv_flags |= IFF_SUPP_NOFCS; if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) adapter->flags &= ~(IXGBE_FLAG_RSS_ENABLED | |