diff options
| -rw-r--r-- | drivers/net/ethernet/freescale/fec.h | 14 | ||||
| -rw-r--r-- | drivers/net/ethernet/freescale/fec_main.c | 1599 |
2 files changed, 1234 insertions, 379 deletions
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index fd9a93d02f8e..7176803146f3 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -340,6 +340,7 @@ struct bufdesc_ex { #define FEC_ENET_TX_FRPPG (PAGE_SIZE / FEC_ENET_TX_FRSIZE) #define TX_RING_SIZE 1024 /* Must be power of two */ #define TX_RING_MOD_MASK 511 /* for this to work */ +#define FEC_XSK_TX_BUDGET_MAX 256 #define BD_ENET_RX_INT 0x00800000 #define BD_ENET_RX_PTP ((ushort)0x0400) @@ -528,6 +529,8 @@ enum fec_txbuf_type { FEC_TXBUF_T_SKB, FEC_TXBUF_T_XDP_NDO, FEC_TXBUF_T_XDP_TX, + FEC_TXBUF_T_XSK_XMIT, + FEC_TXBUF_T_XSK_TX, }; struct fec_tx_buffer { @@ -539,6 +542,7 @@ struct fec_enet_priv_tx_q { struct bufdesc_prop bd; unsigned char *tx_bounce[TX_RING_SIZE]; struct fec_tx_buffer tx_buf[TX_RING_SIZE]; + struct xsk_buff_pool *xsk_pool; unsigned short tx_stop_threshold; unsigned short tx_wake_threshold; @@ -548,9 +552,16 @@ struct fec_enet_priv_tx_q { dma_addr_t tso_hdrs_dma; }; +union fec_rx_buffer { + void *buf_p; + struct page *page; + struct xdp_buff *xdp; +}; + struct fec_enet_priv_rx_q { struct bufdesc_prop bd; - struct page *rx_buf[RX_RING_SIZE]; + union fec_rx_buffer rx_buf[RX_RING_SIZE]; + struct xsk_buff_pool *xsk_pool; /* page_pool */ struct page_pool *page_pool; @@ -643,6 +654,7 @@ struct fec_enet_private { struct pm_qos_request pm_qos_req; unsigned int tx_align; + unsigned int rx_shift; /* hw interrupt coalesce */ unsigned int rx_pkts_itr; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index a1405c928525..0d926bf18195 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -71,6 +71,7 @@ #include <net/page_pool/helpers.h> #include <net/selftests.h> #include <net/tso.h> +#include <net/xdp_sock_drv.h> #include <soc/imx/cpuidle.h> #include "fec.h" @@ -79,7 +80,7 @@ static void set_multicast_list(struct net_device *ndev); static void fec_enet_itr_coal_set(struct net_device *ndev); static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep, int cpu, struct xdp_buff *xdp, - u32 dma_sync_len); + u32 dma_sync_len, int queue); #define DRIVER_NAME "fec" @@ -467,13 +468,13 @@ fec_enet_clear_csum(struct sk_buff *skb, struct net_device *ndev) static int fec_enet_create_page_pool(struct fec_enet_private *fep, - struct fec_enet_priv_rx_q *rxq, int size) + struct fec_enet_priv_rx_q *rxq) { struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog); struct page_pool_params pp_params = { .order = fep->pagepool_order, .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, - .pool_size = size, + .pool_size = rxq->bd.ring_size, .nid = dev_to_node(&fep->pdev->dev), .dev = &fep->pdev->dev, .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, @@ -489,23 +490,18 @@ fec_enet_create_page_pool(struct fec_enet_private *fep, return err; } - err = xdp_rxq_info_reg(&rxq->xdp_rxq, fep->netdev, rxq->id, 0); - if (err < 0) - goto err_free_pp; - - err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL, - rxq->page_pool); - if (err) - goto err_unregister_rxq; - return 0; +} -err_unregister_rxq: - xdp_rxq_info_unreg(&rxq->xdp_rxq); -err_free_pp: - page_pool_destroy(rxq->page_pool); - rxq->page_pool = NULL; - return err; +static void fec_txq_trigger_xmit(struct fec_enet_private *fep, + struct fec_enet_priv_tx_q *txq) +{ + if (!(fep->quirks & FEC_QUIRK_ERR007885) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active)) + writel(0, txq->bd.reg_desc_active); } static struct bufdesc * @@ -717,12 +713,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, txq->bd.cur = bdp; /* Trigger transmission start */ - if (!(fep->quirks & FEC_QUIRK_ERR007885) || - !readl(txq->bd.reg_desc_active) || - !readl(txq->bd.reg_desc_active) || - !readl(txq->bd.reg_desc_active) || - !readl(txq->bd.reg_desc_active)) - writel(0, txq->bd.reg_desc_active); + fec_txq_trigger_xmit(fep, txq); return 0; } @@ -913,12 +904,7 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, txq->bd.cur = bdp; /* Trigger transmission start */ - if (!(fep->quirks & FEC_QUIRK_ERR007885) || - !readl(txq->bd.reg_desc_active) || - !readl(txq->bd.reg_desc_active) || - !readl(txq->bd.reg_desc_active) || - !readl(txq->bd.reg_desc_active)) - writel(0, txq->bd.reg_desc_active); + fec_txq_trigger_xmit(fep, txq); return 0; @@ -1005,6 +991,13 @@ static void fec_enet_bd_init(struct net_device *dev) bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY); else bdp->cbd_sc = cpu_to_fec16(0); + + if (fep->bufdesc_ex) { + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + + ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT); + } + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); } @@ -1022,33 +1015,38 @@ static void fec_enet_bd_init(struct net_device *dev) txq->bd.cur = bdp; for (i = 0; i < txq->bd.ring_size; i++) { + struct page *page; + /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = cpu_to_fec16(0); - if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { + + switch (txq->tx_buf[i].type) { + case FEC_TXBUF_T_SKB: if (bdp->cbd_bufaddr && !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) dma_unmap_single(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr), fec16_to_cpu(bdp->cbd_datlen), DMA_TO_DEVICE); - if (txq->tx_buf[i].buf_p) - dev_kfree_skb_any(txq->tx_buf[i].buf_p); - } else if (txq->tx_buf[i].type == FEC_TXBUF_T_XDP_NDO) { - if (bdp->cbd_bufaddr) - dma_unmap_single(&fep->pdev->dev, - fec32_to_cpu(bdp->cbd_bufaddr), - fec16_to_cpu(bdp->cbd_datlen), - DMA_TO_DEVICE); - - if (txq->tx_buf[i].buf_p) - xdp_return_frame(txq->tx_buf[i].buf_p); - } else { - struct page *page = txq->tx_buf[i].buf_p; - - if (page) - page_pool_put_page(pp_page_to_nmdesc(page)->pp, - page, 0, - false); + dev_kfree_skb_any(txq->tx_buf[i].buf_p); + break; + case FEC_TXBUF_T_XDP_NDO: + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); + xdp_return_frame(txq->tx_buf[i].buf_p); + break; + case FEC_TXBUF_T_XDP_TX: + page = txq->tx_buf[i].buf_p; + page_pool_put_page(pp_page_to_nmdesc(page)->pp, + page, 0, false); + break; + case FEC_TXBUF_T_XSK_TX: + xsk_buff_free(txq->tx_buf[i].buf_p); + break; + default: + break; } txq->tx_buf[i].buf_p = NULL; @@ -1481,27 +1479,102 @@ fec_enet_hwtstamp(struct fec_enet_private *fep, unsigned ts, hwtstamps->hwtstamp = ns_to_ktime(ns); } -static void -fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget) +static bool fec_enet_xsk_xmit(struct fec_enet_private *fep, + struct xsk_buff_pool *pool, + u32 queue) { - struct fec_enet_private *fep; - struct xdp_frame *xdpf; - struct bufdesc *bdp; - unsigned short status; - struct sk_buff *skb; - struct fec_enet_priv_tx_q *txq; + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; + struct xdp_desc *xsk_desc = pool->tx_descs; + int cpu = smp_processor_id(); + int free_bds, budget, batch; struct netdev_queue *nq; - int index = 0; - int entries_free; - struct page *page; - int frame_len; + struct bufdesc *bdp; + dma_addr_t dma; + u32 estatus; + u16 status; + int i, j; - fep = netdev_priv(ndev); + nq = netdev_get_tx_queue(fep->netdev, queue); + __netif_tx_lock(nq, cpu); - txq = fep->tx_queue[queue_id]; - /* get next bdp of dirty_tx */ - nq = netdev_get_tx_queue(ndev, queue_id); - bdp = txq->dirty_tx; + txq_trans_cond_update(nq); + free_bds = fec_enet_get_free_txdesc_num(txq); + if (!free_bds) + goto tx_unlock; + + budget = min(free_bds, FEC_XSK_TX_BUDGET_MAX); + batch = xsk_tx_peek_release_desc_batch(pool, budget); + if (!batch) + goto tx_unlock; + + bdp = txq->bd.cur; + for (i = 0; i < batch; i++) { + dma = xsk_buff_raw_get_dma(pool, xsk_desc[i].addr); + xsk_buff_raw_dma_sync_for_device(pool, dma, xsk_desc[i].len); + + j = fec_enet_get_bd_index(bdp, &txq->bd); + txq->tx_buf[j].type = FEC_TXBUF_T_XSK_XMIT; + txq->tx_buf[j].buf_p = NULL; + + status = fec16_to_cpu(bdp->cbd_sc); + status &= ~BD_ENET_TX_STATS; + status |= BD_ENET_TX_INTR | BD_ENET_TX_LAST; + bdp->cbd_datlen = cpu_to_fec16(xsk_desc[i].len); + bdp->cbd_bufaddr = cpu_to_fec32(dma); + + if (fep->bufdesc_ex) { + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + + estatus = BD_ENET_TX_INT; + if (fep->quirks & FEC_QUIRK_HAS_AVB) + estatus |= FEC_TX_BD_FTYPE(txq->bd.qid); + + ebdp->cbd_bdu = 0; + ebdp->cbd_esc = cpu_to_fec32(estatus); + } + + /* Make sure the updates to rest of the descriptor are performed + * before transferring ownership. + */ + dma_wmb(); + + /* Send it on its way. Tell FEC it's ready, interrupt when done, + * it's the last BD of the frame, and to put the CRC on the end. + */ + status |= BD_ENET_TX_READY | BD_ENET_TX_TC; + bdp->cbd_sc = cpu_to_fec16(status); + dma_wmb(); + + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); + txq->bd.cur = bdp; + } + + /* Trigger transmission start */ + fec_txq_trigger_xmit(fep, txq); + + __netif_tx_unlock(nq); + + return batch < budget; + +tx_unlock: + __netif_tx_unlock(nq); + + return true; +} + +static int fec_enet_tx_queue(struct fec_enet_private *fep, + u16 queue, int budget) +{ + struct netdev_queue *nq = netdev_get_tx_queue(fep->netdev, queue); + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; + struct net_device *ndev = fep->netdev; + struct bufdesc *bdp = txq->dirty_tx; + int index, frame_len, entries_free; + struct fec_tx_buffer *tx_buf; + unsigned short status; + struct sk_buff *skb; + struct page *page; + int xsk_cnt = 0; /* get next bdp of dirty_tx */ bdp = fec_enet_get_nextdesc(bdp, &txq->bd); @@ -1514,45 +1587,77 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget) break; index = fec_enet_get_bd_index(bdp, &txq->bd); + tx_buf = &txq->tx_buf[index]; + frame_len = fec16_to_cpu(bdp->cbd_datlen); - if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) { - skb = txq->tx_buf[index].buf_p; + switch (tx_buf->type) { + case FEC_TXBUF_T_SKB: if (bdp->cbd_bufaddr && !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) dma_unmap_single(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr), - fec16_to_cpu(bdp->cbd_datlen), - DMA_TO_DEVICE); + frame_len, DMA_TO_DEVICE); + bdp->cbd_bufaddr = cpu_to_fec32(0); + skb = tx_buf->buf_p; if (!skb) goto tx_buf_done; - } else { + + frame_len = skb->len; + + /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who + * are to time stamp the packet, so we still need to check time + * stamping enabled flag. + */ + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && + fep->hwts_tx_en) && fep->bufdesc_ex) { + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + struct skb_shared_hwtstamps shhwtstamps; + + fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps); + skb_tstamp_tx(skb, &shhwtstamps); + } + + /* Free the sk buffer associated with this last transmit */ + napi_consume_skb(skb, budget); + break; + case FEC_TXBUF_T_XDP_NDO: /* Tx processing cannot call any XDP (or page pool) APIs if * the "budget" is 0. Because NAPI is called with budget of * 0 (such as netpoll) indicates we may be in an IRQ context, * however, we can't use the page pool from IRQ context. */ if (unlikely(!budget)) - break; - - if (txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO) { - xdpf = txq->tx_buf[index].buf_p; - if (bdp->cbd_bufaddr) - dma_unmap_single(&fep->pdev->dev, - fec32_to_cpu(bdp->cbd_bufaddr), - fec16_to_cpu(bdp->cbd_datlen), - DMA_TO_DEVICE); - } else { - page = txq->tx_buf[index].buf_p; - } + goto out; + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + frame_len, DMA_TO_DEVICE); bdp->cbd_bufaddr = cpu_to_fec32(0); - if (unlikely(!txq->tx_buf[index].buf_p)) { - txq->tx_buf[index].type = FEC_TXBUF_T_SKB; - goto tx_buf_done; - } + xdp_return_frame_rx_napi(tx_buf->buf_p); + break; + case FEC_TXBUF_T_XDP_TX: + if (unlikely(!budget)) + goto out; - frame_len = fec16_to_cpu(bdp->cbd_datlen); + bdp->cbd_bufaddr = cpu_to_fec32(0); + page = tx_buf->buf_p; + /* The dma_sync_size = 0 as XDP_TX has already synced + * DMA for_device + */ + page_pool_put_page(pp_page_to_nmdesc(page)->pp, page, + 0, true); + break; + case FEC_TXBUF_T_XSK_XMIT: + bdp->cbd_bufaddr = cpu_to_fec32(0); + xsk_cnt++; + break; + case FEC_TXBUF_T_XSK_TX: + bdp->cbd_bufaddr = cpu_to_fec32(0); + xsk_buff_free(tx_buf->buf_p); + break; + default: + break; } /* Check for errors. */ @@ -1572,11 +1677,7 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget) ndev->stats.tx_carrier_errors++; } else { ndev->stats.tx_packets++; - - if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) - ndev->stats.tx_bytes += skb->len; - else - ndev->stats.tx_bytes += frame_len; + ndev->stats.tx_bytes += frame_len; } /* Deferred means some collisions occurred during transmit, @@ -1585,33 +1686,9 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget) if (status & BD_ENET_TX_DEF) ndev->stats.collisions++; - if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) { - /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who - * are to time stamp the packet, so we still need to check time - * stamping enabled flag. - */ - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && - fep->hwts_tx_en) && fep->bufdesc_ex) { - struct skb_shared_hwtstamps shhwtstamps; - struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; - - fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps); - skb_tstamp_tx(skb, &shhwtstamps); - } - - /* Free the sk buffer associated with this last transmit */ - napi_consume_skb(skb, budget); - } else if (txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO) { - xdp_return_frame_rx_napi(xdpf); - } else { /* recycle pages of XDP_TX frames */ - /* The dma_sync_size = 0 as XDP_TX has already synced DMA for_device */ - page_pool_put_page(pp_page_to_nmdesc(page)->pp, page, - 0, true); - } - - txq->tx_buf[index].buf_p = NULL; + tx_buf->buf_p = NULL; /* restore default tx buffer type: FEC_TXBUF_T_SKB */ - txq->tx_buf[index].type = FEC_TXBUF_T_SKB; + tx_buf->type = FEC_TXBUF_T_SKB; tx_buf_done: /* Make sure the update to bdp and tx_buf are performed @@ -1632,20 +1709,43 @@ tx_buf_done: } } +out: + /* ERR006358: Keep the transmitter going */ if (bdp != txq->bd.cur && readl(txq->bd.reg_desc_active) == 0) writel(0, txq->bd.reg_desc_active); + + if (txq->xsk_pool) { + struct xsk_buff_pool *pool = txq->xsk_pool; + + if (xsk_cnt) + xsk_tx_completed(pool, xsk_cnt); + + if (xsk_uses_need_wakeup(pool)) + xsk_set_tx_need_wakeup(pool); + + /* If the condition is true, it indicates that there are still + * packets to be transmitted, so return "budget" to make the + * NAPI continue polling. + */ + if (!fec_enet_xsk_xmit(fep, pool, queue)) + return budget; + } + + return 0; } -static void fec_enet_tx(struct net_device *ndev, int budget) +static int fec_enet_tx(struct net_device *ndev, int budget) { struct fec_enet_private *fep = netdev_priv(ndev); - int i; + int i, count = 0; /* Make sure that AVB queues are processed first. */ for (i = fep->num_tx_queues - 1; i >= 0; i--) - fec_enet_tx_queue(ndev, i, budget); + count += fec_enet_tx_queue(fep, i, budget); + + return count; } static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq, @@ -1658,76 +1758,28 @@ static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq, if (unlikely(!new_page)) return -ENOMEM; - rxq->rx_buf[index] = new_page; + rxq->rx_buf[index].page = new_page; phys_addr = page_pool_get_dma_addr(new_page) + FEC_ENET_XDP_HEADROOM; bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); return 0; } -static u32 -fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog, - struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int cpu) +static int fec_enet_update_cbd_zc(struct fec_enet_priv_rx_q *rxq, + struct bufdesc *bdp, int index) { - unsigned int sync, len = xdp->data_end - xdp->data; - u32 ret = FEC_ENET_XDP_PASS; - struct page *page; - int err; - u32 act; - - act = bpf_prog_run_xdp(prog, xdp); - - /* Due xdp_adjust_tail and xdp_adjust_head: DMA sync for_device cover - * max len CPU touch - */ - sync = xdp->data_end - xdp->data; - sync = max(sync, len); - - switch (act) { - case XDP_PASS: - rxq->stats[RX_XDP_PASS]++; - ret = FEC_ENET_XDP_PASS; - break; - - case XDP_REDIRECT: - rxq->stats[RX_XDP_REDIRECT]++; - err = xdp_do_redirect(fep->netdev, xdp, prog); - if (unlikely(err)) - goto xdp_err; - - ret = FEC_ENET_XDP_REDIR; - break; - - case XDP_TX: - rxq->stats[RX_XDP_TX]++; - err = fec_enet_xdp_tx_xmit(fep, cpu, xdp, sync); - if (unlikely(err)) { - rxq->stats[RX_XDP_TX_ERRORS]++; - goto xdp_err; - } + struct xdp_buff *new_xdp; + dma_addr_t phys_addr; - ret = FEC_ENET_XDP_TX; - break; + new_xdp = xsk_buff_alloc(rxq->xsk_pool); + if (unlikely(!new_xdp)) + return -ENOMEM; - default: - bpf_warn_invalid_xdp_action(fep->netdev, prog, act); - fallthrough; - - case XDP_ABORTED: - fallthrough; /* handle aborts by dropping packet */ - - case XDP_DROP: - rxq->stats[RX_XDP_DROP]++; -xdp_err: - ret = FEC_ENET_XDP_CONSUMED; - page = virt_to_head_page(xdp->data); - page_pool_put_page(rxq->page_pool, page, sync, true); - if (act != XDP_DROP) - trace_xdp_exception(fep->netdev, prog, act); - break; - } + rxq->rx_buf[index].xdp = new_xdp; + phys_addr = xsk_buff_xdp_get_dma(new_xdp); + bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); - return ret; + return 0; } static void fec_enet_rx_vlan(const struct net_device *ndev, struct sk_buff *skb) @@ -1746,40 +1798,113 @@ static void fec_enet_rx_vlan(const struct net_device *ndev, struct sk_buff *skb) } } +static int fec_rx_error_check(struct net_device *ndev, u16 status) +{ + if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | + BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_LAST | + BD_ENET_RX_CL)) { + ndev->stats.rx_errors++; + + if (status & BD_ENET_RX_OV) { + /* FIFO overrun */ + ndev->stats.rx_fifo_errors++; + return -EIO; + } + + if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | + BD_ENET_RX_LAST)) { + /* Frame too long or too short. */ + ndev->stats.rx_length_errors++; + if ((status & BD_ENET_RX_LAST) && net_ratelimit()) + netdev_err(ndev, "rcv is not +last\n"); + } + + /* CRC Error */ + if (status & BD_ENET_RX_CR) + ndev->stats.rx_crc_errors++; + + /* Report late collisions as a frame error. */ + if (status & (BD_ENET_RX_NO | BD_ENET_RX_CL)) + ndev->stats.rx_frame_errors++; + + return -EIO; + } + + return 0; +} + +static struct sk_buff *fec_build_skb(struct fec_enet_private *fep, + struct fec_enet_priv_rx_q *rxq, + struct bufdesc *bdp, + struct page *page, u32 len) +{ + struct net_device *ndev = fep->netdev; + struct bufdesc_ex *ebdp; + struct sk_buff *skb; + + skb = build_skb(page_address(page), + PAGE_SIZE << fep->pagepool_order); + if (unlikely(!skb)) { + page_pool_recycle_direct(rxq->page_pool, page); + ndev->stats.rx_dropped++; + if (net_ratelimit()) + netdev_err(ndev, "build_skb failed\n"); + + return NULL; + } + + skb_reserve(skb, FEC_ENET_XDP_HEADROOM + fep->rx_shift); + skb_put(skb, len); + skb_mark_for_recycle(skb); + + /* Get offloads from the enhanced buffer descriptor */ + if (fep->bufdesc_ex) { + ebdp = (struct bufdesc_ex *)bdp; + + /* If this is a VLAN packet remove the VLAN Tag */ + if (ebdp->cbd_esc & cpu_to_fec32(BD_ENET_RX_VLAN)) + fec_enet_rx_vlan(ndev, skb); + + /* Get receive timestamp from the skb */ + if (fep->hwts_rx_en) + fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), + skb_hwtstamps(skb)); + + if (fep->csum_flags & FLAG_RX_CSUM_ENABLED) { + if (!(ebdp->cbd_esc & + cpu_to_fec32(FLAG_RX_CSUM_ERROR))) + /* don't check it */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + else + skb_checksum_none_assert(skb); + } + } + + skb->protocol = eth_type_trans(skb, ndev); + skb_record_rx_queue(skb, rxq->bd.qid); + + return skb; +} + /* During a receive, the bd_rx.cur points to the current incoming buffer. * When we update through the ring, if the next incoming buffer has * not been given to the system, we just set the empty indicator, * effectively tossing the packet. */ -static int -fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget) +static int fec_enet_rx_queue(struct fec_enet_private *fep, + u16 queue, int budget) { - struct fec_enet_private *fep = netdev_priv(ndev); - struct fec_enet_priv_rx_q *rxq; - struct bufdesc *bdp; - unsigned short status; - struct sk_buff *skb; - ushort pkt_len; - int pkt_received = 0; - struct bufdesc_ex *ebdp = NULL; - int index = 0; - bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME; - struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog); - u32 ret, xdp_result = FEC_ENET_XDP_PASS; - u32 data_start = FEC_ENET_XDP_HEADROOM; - int cpu = smp_processor_id(); - struct xdp_buff xdp; + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue]; + bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME; + struct net_device *ndev = fep->netdev; + struct bufdesc *bdp = rxq->bd.cur; + u32 sub_len = 4 + fep->rx_shift; + int pkt_received = 0; + u16 status, pkt_len; + struct sk_buff *skb; struct page *page; - __fec32 cbd_bufaddr; - u32 sub_len = 4; - - /*If it has the FEC_QUIRK_HAS_RACC quirk property, the bit of - * FEC_RACC_SHIFT16 is set by default in the probe function. - */ - if (fep->quirks & FEC_QUIRK_HAS_RACC) { - data_start += 2; - sub_len += 2; - } + dma_addr_t dma; + int index; #if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA) /* @@ -1788,139 +1913,497 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget) */ flush_cache_all(); #endif - rxq = fep->rx_queue[queue_id]; /* First, grab all of the stats for the incoming packet. * These get messed up if we get called due to a busy condition. */ - bdp = rxq->bd.cur; - xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq); - while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) { if (pkt_received >= budget) break; pkt_received++; - writel(FEC_ENET_RXF_GET(queue_id), fep->hwp + FEC_IEVENT); + writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT); /* Check for errors. */ status ^= BD_ENET_RX_LAST; - if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | - BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_LAST | - BD_ENET_RX_CL)) { - ndev->stats.rx_errors++; - if (status & BD_ENET_RX_OV) { - /* FIFO overrun */ - ndev->stats.rx_fifo_errors++; - goto rx_processing_done; - } - if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH - | BD_ENET_RX_LAST)) { - /* Frame too long or too short. */ - ndev->stats.rx_length_errors++; - if (status & BD_ENET_RX_LAST) - netdev_err(ndev, "rcv is not +last\n"); - } - if (status & BD_ENET_RX_CR) /* CRC Error */ - ndev->stats.rx_crc_errors++; - /* Report late collisions as a frame error. */ - if (status & (BD_ENET_RX_NO | BD_ENET_RX_CL)) - ndev->stats.rx_frame_errors++; + if (unlikely(fec_rx_error_check(ndev, status))) goto rx_processing_done; - } /* Process the incoming frame. */ ndev->stats.rx_packets++; pkt_len = fec16_to_cpu(bdp->cbd_datlen); - ndev->stats.rx_bytes += pkt_len; - if (fep->quirks & FEC_QUIRK_HAS_RACC) - ndev->stats.rx_bytes -= 2; + ndev->stats.rx_bytes += pkt_len - fep->rx_shift; index = fec_enet_get_bd_index(bdp, &rxq->bd); - page = rxq->rx_buf[index]; - cbd_bufaddr = bdp->cbd_bufaddr; + page = rxq->rx_buf[index].page; + dma = fec32_to_cpu(bdp->cbd_bufaddr); if (fec_enet_update_cbd(rxq, bdp, index)) { ndev->stats.rx_dropped++; goto rx_processing_done; } - dma_sync_single_for_cpu(&fep->pdev->dev, - fec32_to_cpu(cbd_bufaddr), - pkt_len, + dma_sync_single_for_cpu(&fep->pdev->dev, dma, pkt_len, DMA_FROM_DEVICE); prefetch(page_address(page)); - if (xdp_prog) { - xdp_buff_clear_frags_flag(&xdp); - /* subtract 16bit shift and FCS */ - xdp_prepare_buff(&xdp, page_address(page), - data_start, pkt_len - sub_len, false); - ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, cpu); - xdp_result |= ret; - if (ret != FEC_ENET_XDP_PASS) - goto rx_processing_done; + if (unlikely(need_swap)) { + u8 *data; + + data = page_address(page) + FEC_ENET_XDP_HEADROOM; + swap_buffer(data, pkt_len); } /* The packet length includes FCS, but we don't want to * include that when passing upstream as it messes up * bridging applications. */ - skb = build_skb(page_address(page), - PAGE_SIZE << fep->pagepool_order); - if (unlikely(!skb)) { - page_pool_recycle_direct(rxq->page_pool, page); - ndev->stats.rx_dropped++; + skb = fec_build_skb(fep, rxq, bdp, page, pkt_len - sub_len); + if (!skb) + goto rx_processing_done; + + napi_gro_receive(&fep->napi, skb); - netdev_err_once(ndev, "build_skb failed!\n"); +rx_processing_done: + /* Clear the status flags for this buffer */ + status &= ~BD_ENET_RX_STATS; + + /* Mark the buffer empty */ + status |= BD_ENET_RX_EMPTY; + + if (fep->bufdesc_ex) { + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + + ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT); + ebdp->cbd_prot = 0; + ebdp->cbd_bdu = 0; + } + /* Make sure the updates to rest of the descriptor are + * performed before transferring ownership. + */ + wmb(); + bdp->cbd_sc = cpu_to_fec16(status); + + /* Update BD pointer to next entry */ + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); + + /* Doing this here will keep the FEC running while we process + * incoming frames. On a heavily loaded network, we should be + * able to keep up at the expense of system resources. + */ + writel(0, rxq->bd.reg_desc_active); + } + rxq->bd.cur = bdp; + + return pkt_received; +} + +static void fec_xdp_drop(struct fec_enet_priv_rx_q *rxq, + struct xdp_buff *xdp, u32 sync) +{ + struct page *page = virt_to_head_page(xdp->data); + + page_pool_put_page(rxq->page_pool, page, sync, true); +} + +static int +fec_enet_xdp_get_tx_queue(struct fec_enet_private *fep, int index) +{ + if (unlikely(index < 0)) + return 0; + + return (index % fep->num_tx_queues); +} + +static int fec_enet_rx_queue_xdp(struct fec_enet_private *fep, int queue, + int budget, struct bpf_prog *prog) +{ + u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift; + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue]; + struct net_device *ndev = fep->netdev; + struct bufdesc *bdp = rxq->bd.cur; + u32 sub_len = 4 + fep->rx_shift; + int cpu = smp_processor_id(); + int pkt_received = 0; + struct sk_buff *skb; + u16 status, pkt_len; + struct xdp_buff xdp; + int tx_qid = queue; + struct page *page; + u32 xdp_res = 0; + dma_addr_t dma; + int index, err; + u32 act, sync; + +#if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA) + /* + * Hacky flush of all caches instead of using the DMA API for the TSO + * headers. + */ + flush_cache_all(); +#endif + + if (unlikely(tx_qid >= fep->num_tx_queues)) + tx_qid = fec_enet_xdp_get_tx_queue(fep, cpu); + + xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq); + + while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) { + if (pkt_received >= budget) + break; + pkt_received++; + + writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT); + + /* Check for errors. */ + status ^= BD_ENET_RX_LAST; + if (unlikely(fec_rx_error_check(ndev, status))) + goto rx_processing_done; + + /* Process the incoming frame. */ + ndev->stats.rx_packets++; + pkt_len = fec16_to_cpu(bdp->cbd_datlen); + ndev->stats.rx_bytes += pkt_len - fep->rx_shift; + + index = fec_enet_get_bd_index(bdp, &rxq->bd); + page = rxq->rx_buf[index].page; + dma = fec32_to_cpu(bdp->cbd_bufaddr); + + if (fec_enet_update_cbd(rxq, bdp, index)) { + ndev->stats.rx_dropped++; goto rx_processing_done; } - skb_reserve(skb, data_start); - skb_put(skb, pkt_len - sub_len); - skb_mark_for_recycle(skb); + dma_sync_single_for_cpu(&fep->pdev->dev, dma, pkt_len, + DMA_FROM_DEVICE); + prefetch(page_address(page)); - if (unlikely(need_swap)) { - u8 *data; + xdp_buff_clear_frags_flag(&xdp); + /* subtract 16bit shift and FCS */ + pkt_len -= sub_len; + xdp_prepare_buff(&xdp, page_address(page), data_start, + pkt_len, false); - data = page_address(page) + FEC_ENET_XDP_HEADROOM; - swap_buffer(data, pkt_len); + act = bpf_prog_run_xdp(prog, &xdp); + /* Due xdp_adjust_tail and xdp_adjust_head: DMA sync + * for_device cover max len CPU touch. + */ + sync = xdp.data_end - xdp.data; + sync = max(sync, pkt_len); + + switch (act) { + case XDP_PASS: + rxq->stats[RX_XDP_PASS]++; + /* The packet length includes FCS, but we don't want to + * include that when passing upstream as it messes up + * bridging applications. + */ + skb = fec_build_skb(fep, rxq, bdp, page, pkt_len); + if (!skb) + trace_xdp_exception(ndev, prog, XDP_PASS); + else + napi_gro_receive(&fep->napi, skb); + + break; + case XDP_REDIRECT: + rxq->stats[RX_XDP_REDIRECT]++; + err = xdp_do_redirect(ndev, &xdp, prog); + if (unlikely(err)) { + fec_xdp_drop(rxq, &xdp, sync); + trace_xdp_exception(ndev, prog, XDP_REDIRECT); + } else { + xdp_res |= FEC_ENET_XDP_REDIR; + } + break; + case XDP_TX: + rxq->stats[RX_XDP_TX]++; + err = fec_enet_xdp_tx_xmit(fep, cpu, &xdp, sync, tx_qid); + if (unlikely(err)) { + rxq->stats[RX_XDP_TX_ERRORS]++; + fec_xdp_drop(rxq, &xdp, sync); + trace_xdp_exception(ndev, prog, XDP_TX); + } else { + xdp_res |= FEC_ENET_XDP_TX; + } + break; + default: + bpf_warn_invalid_xdp_action(ndev, prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(ndev, prog, act); + /* handle aborts by dropping packet */ + fallthrough; + case XDP_DROP: + rxq->stats[RX_XDP_DROP]++; + fec_xdp_drop(rxq, &xdp, sync); + break; } - /* Extract the enhanced buffer descriptor */ - ebdp = NULL; - if (fep->bufdesc_ex) - ebdp = (struct bufdesc_ex *)bdp; +rx_processing_done: + /* Clear the status flags for this buffer */ + status &= ~BD_ENET_RX_STATS; + /* Mark the buffer empty */ + status |= BD_ENET_RX_EMPTY; - /* If this is a VLAN packet remove the VLAN Tag */ - if (fep->bufdesc_ex && - (ebdp->cbd_esc & cpu_to_fec32(BD_ENET_RX_VLAN))) - fec_enet_rx_vlan(ndev, skb); + if (fep->bufdesc_ex) { + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; - skb->protocol = eth_type_trans(skb, ndev); + ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT); + ebdp->cbd_prot = 0; + ebdp->cbd_bdu = 0; + } - /* Get receive timestamp from the skb */ - if (fep->hwts_rx_en && fep->bufdesc_ex) - fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), - skb_hwtstamps(skb)); + /* Make sure the updates to rest of the descriptor are + * performed before transferring ownership. + */ + dma_wmb(); + bdp->cbd_sc = cpu_to_fec16(status); - if (fep->bufdesc_ex && - (fep->csum_flags & FLAG_RX_CSUM_ENABLED)) { - if (!(ebdp->cbd_esc & cpu_to_fec32(FLAG_RX_CSUM_ERROR))) { - /* don't check it */ - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else { - skb_checksum_none_assert(skb); + /* Update BD pointer to next entry */ + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); + + /* Doing this here will keep the FEC running while we process + * incoming frames. On a heavily loaded network, we should be + * able to keep up at the expense of system resources. + */ + writel(0, rxq->bd.reg_desc_active); + } + + rxq->bd.cur = bdp; + + if (xdp_res & FEC_ENET_XDP_REDIR) + xdp_do_flush(); + + if (xdp_res & FEC_ENET_XDP_TX) + /* Trigger transmission start */ + fec_txq_trigger_xmit(fep, fep->tx_queue[tx_qid]); + + return pkt_received; +} + +static struct sk_buff *fec_build_skb_zc(struct xdp_buff *xsk, + struct napi_struct *napi) +{ + size_t len = xdp_get_buff_len(xsk); + struct sk_buff *skb; + + skb = napi_alloc_skb(napi, len); + if (unlikely(!skb)) { + xsk_buff_free(xsk); + return NULL; + } + + skb_put_data(skb, xsk->data, len); + xsk_buff_free(xsk); + + return skb; +} + +static int fec_enet_xsk_tx_xmit(struct fec_enet_private *fep, + struct xdp_buff *xsk, int cpu, + int queue) +{ + struct netdev_queue *nq = netdev_get_tx_queue(fep->netdev, queue); + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; + u32 offset = xsk->data - xsk->data_hard_start; + u32 headroom = txq->xsk_pool->headroom; + u32 len = xsk->data_end - xsk->data; + u32 index, status, estatus; + struct bufdesc *bdp; + dma_addr_t dma; + + __netif_tx_lock(nq, cpu); + + /* Avoid tx timeout as XDP shares the queue with kernel stack */ + txq_trans_cond_update(nq); + + if (!fec_enet_get_free_txdesc_num(txq)) { + __netif_tx_unlock(nq); + + return -EBUSY; + } + + /* Fill in a Tx ring entry */ + bdp = txq->bd.cur; + status = fec16_to_cpu(bdp->cbd_sc); + status &= ~BD_ENET_TX_STATS; + + index = fec_enet_get_bd_index(bdp, &txq->bd); + dma = xsk_buff_xdp_get_frame_dma(xsk) + headroom + offset; + + xsk_buff_raw_dma_sync_for_device(txq->xsk_pool, dma, len); + + txq->tx_buf[index].buf_p = xsk; + txq->tx_buf[index].type = FEC_TXBUF_T_XSK_TX; + + status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST); + if (fep->bufdesc_ex) + estatus = BD_ENET_TX_INT; + + bdp->cbd_bufaddr = cpu_to_fec32(dma); + bdp->cbd_datlen = cpu_to_fec16(len); + + if (fep->bufdesc_ex) { + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + + if (fep->quirks & FEC_QUIRK_HAS_AVB) + estatus |= FEC_TX_BD_FTYPE(txq->bd.qid); + + ebdp->cbd_bdu = 0; + ebdp->cbd_esc = cpu_to_fec32(estatus); + } + + dma_wmb(); + status |= BD_ENET_TX_READY | BD_ENET_TX_TC; + bdp->cbd_sc = cpu_to_fec16(status); + dma_wmb(); + + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); + txq->bd.cur = bdp; + + __netif_tx_unlock(nq); + + return 0; +} + +static int fec_enet_rx_queue_xsk(struct fec_enet_private *fep, int queue, + int budget, struct bpf_prog *prog) +{ + u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift; + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue]; + struct net_device *ndev = fep->netdev; + struct bufdesc *bdp = rxq->bd.cur; + u32 sub_len = 4 + fep->rx_shift; + int cpu = smp_processor_id(); + bool wakeup_xsk = false; + struct xdp_buff *xsk; + int pkt_received = 0; + struct sk_buff *skb; + u16 status, pkt_len; + u32 xdp_res = 0; + int index, err; + u32 act; + +#if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA) + /* + * Hacky flush of all caches instead of using the DMA API for the TSO + * headers. + */ + flush_cache_all(); +#endif + + while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) { + if (unlikely(pkt_received >= budget)) + break; + + writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT); + + index = fec_enet_get_bd_index(bdp, &rxq->bd); + xsk = rxq->rx_buf[index].xdp; + if (unlikely(!xsk)) { + if (fec_enet_update_cbd_zc(rxq, bdp, index)) + break; + + if (fep->bufdesc_ex) { + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + + ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT); + ebdp->cbd_prot = 0; + ebdp->cbd_bdu = 0; } + + dma_wmb(); + status &= ~BD_ENET_RX_STATS; + status |= BD_ENET_RX_EMPTY; + bdp->cbd_sc = cpu_to_fec16(status); + break; } - skb_record_rx_queue(skb, queue_id); - napi_gro_receive(&fep->napi, skb); + pkt_received++; + /* Check for errors. */ + status ^= BD_ENET_RX_LAST; + if (unlikely(fec_rx_error_check(ndev, status))) + goto rx_processing_done; + + /* Process the incoming frame. */ + ndev->stats.rx_packets++; + pkt_len = fec16_to_cpu(bdp->cbd_datlen); + ndev->stats.rx_bytes += pkt_len - fep->rx_shift; + + if (fec_enet_update_cbd_zc(rxq, bdp, index)) { + ndev->stats.rx_dropped++; + goto rx_processing_done; + } + + pkt_len -= sub_len; + xsk->data = xsk->data_hard_start + data_start; + /* Subtract FCS and 16bit shift */ + xsk->data_end = xsk->data + pkt_len; + xsk->data_meta = xsk->data; + xsk_buff_dma_sync_for_cpu(xsk); + + /* If the XSK pool is enabled before the bpf program is + * installed, or the bpf program is uninstalled before + * the XSK pool is disabled. prog will be NULL and we + * need to set a default XDP_PASS action. + */ + if (unlikely(!prog)) + act = XDP_PASS; + else + act = bpf_prog_run_xdp(prog, xsk); + + switch (act) { + case XDP_PASS: + rxq->stats[RX_XDP_PASS]++; + skb = fec_build_skb_zc(xsk, &fep->napi); + if (unlikely(!skb)) { + ndev->stats.rx_dropped++; + trace_xdp_exception(ndev, prog, XDP_PASS); + } else { + napi_gro_receive(&fep->napi, skb); + } + + break; + case XDP_TX: + rxq->stats[RX_XDP_TX]++; + err = fec_enet_xsk_tx_xmit(fep, xsk, cpu, queue); + if (unlikely(err)) { + rxq->stats[RX_XDP_TX_ERRORS]++; + xsk_buff_free(xsk); + trace_xdp_exception(ndev, prog, XDP_TX); + } else { + xdp_res |= FEC_ENET_XDP_TX; + } + break; + case XDP_REDIRECT: + rxq->stats[RX_XDP_REDIRECT]++; + err = xdp_do_redirect(ndev, xsk, prog); + if (unlikely(err)) { + if (err == -ENOBUFS) + wakeup_xsk = true; + + rxq->stats[RX_XDP_DROP]++; + xsk_buff_free(xsk); + trace_xdp_exception(ndev, prog, XDP_REDIRECT); + } else { + xdp_res |= FEC_ENET_XDP_REDIR; + } + break; + default: + bpf_warn_invalid_xdp_action(ndev, prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(ndev, prog, act); + fallthrough; + case XDP_DROP: + rxq->stats[RX_XDP_DROP]++; + xsk_buff_free(xsk); + break; + } rx_processing_done: /* Clear the status flags for this buffer */ status &= ~BD_ENET_RX_STATS; - /* Mark the buffer empty */ status |= BD_ENET_RX_EMPTY; @@ -1931,37 +2414,59 @@ rx_processing_done: ebdp->cbd_prot = 0; ebdp->cbd_bdu = 0; } + /* Make sure the updates to rest of the descriptor are * performed before transferring ownership. */ - wmb(); + dma_wmb(); bdp->cbd_sc = cpu_to_fec16(status); /* Update BD pointer to next entry */ bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); /* Doing this here will keep the FEC running while we process - * incoming frames. On a heavily loaded network, we should be + * incoming frames. On a heavily loaded network, we should be * able to keep up at the expense of system resources. */ writel(0, rxq->bd.reg_desc_active); } + rxq->bd.cur = bdp; - if (xdp_result & FEC_ENET_XDP_REDIR) + if (xdp_res & FEC_ENET_XDP_REDIR) xdp_do_flush(); + if (xdp_res & FEC_ENET_XDP_TX) + fec_txq_trigger_xmit(fep, fep->tx_queue[queue]); + + if (rxq->xsk_pool && xsk_uses_need_wakeup(rxq->xsk_pool)) { + if (wakeup_xsk) + xsk_set_rx_need_wakeup(rxq->xsk_pool); + else + xsk_clear_rx_need_wakeup(rxq->xsk_pool); + } + return pkt_received; } static int fec_enet_rx(struct net_device *ndev, int budget) { struct fec_enet_private *fep = netdev_priv(ndev); + struct bpf_prog *prog = READ_ONCE(fep->xdp_prog); int i, done = 0; /* Make sure that AVB queues are processed first. */ - for (i = fep->num_rx_queues - 1; i >= 0; i--) - done += fec_enet_rx_queue(ndev, i, budget - done); + for (i = fep->num_rx_queues - 1; i >= 0; i--) { + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[i]; + int batch = budget - done; + + if (rxq->xsk_pool) + done += fec_enet_rx_queue_xsk(fep, i, batch, prog); + else if (prog) + done += fec_enet_rx_queue_xdp(fep, i, batch, prog); + else + done += fec_enet_rx_queue(fep, i, batch); + } return done; } @@ -2004,19 +2509,22 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget) { struct net_device *ndev = napi->dev; struct fec_enet_private *fep = netdev_priv(ndev); - int done = 0; + int rx_done = 0, tx_done = 0; + int max_done; do { - done += fec_enet_rx(ndev, budget - done); - fec_enet_tx(ndev, budget); - } while ((done < budget) && fec_enet_collect_events(fep)); + rx_done += fec_enet_rx(ndev, budget - rx_done); + tx_done += fec_enet_tx(ndev, budget); + max_done = max(rx_done, tx_done); + } while ((max_done < budget) && fec_enet_collect_events(fep)); - if (done < budget) { - napi_complete_done(napi, done); + if (max_done < budget) { + napi_complete_done(napi, max_done); writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); + return max_done; } - return done; + return budget; } /* ------------------------------------------------------------------------- */ @@ -3303,27 +3811,86 @@ static const struct ethtool_ops fec_enet_ethtool_ops = { .self_test = net_selftest, }; +static int fec_xdp_rxq_info_reg(struct fec_enet_private *fep, + struct fec_enet_priv_rx_q *rxq) +{ + struct net_device *ndev = fep->netdev; + void *allocator; + int type, err; + + err = xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq->id, 0); + if (err) { + netdev_err(ndev, "Failed to register xdp rxq info\n"); + return err; + } + + allocator = rxq->xsk_pool ? NULL : rxq->page_pool; + type = rxq->xsk_pool ? MEM_TYPE_XSK_BUFF_POOL : MEM_TYPE_PAGE_POOL; + err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, type, allocator); + if (err) { + netdev_err(ndev, "Failed to register XDP mem model\n"); + xdp_rxq_info_unreg(&rxq->xdp_rxq); + + return err; + } + + if (rxq->xsk_pool) + xsk_pool_set_rxq_info(rxq->xsk_pool, &rxq->xdp_rxq); + + return 0; +} + +static void fec_xdp_rxq_info_unreg(struct fec_enet_priv_rx_q *rxq) +{ + if (xdp_rxq_info_is_reg(&rxq->xdp_rxq)) { + xdp_rxq_info_unreg_mem_model(&rxq->xdp_rxq); + xdp_rxq_info_unreg(&rxq->xdp_rxq); + } +} + +static void fec_free_rxq_buffers(struct fec_enet_priv_rx_q *rxq) +{ + bool xsk = !!rxq->xsk_pool; + int i; + + for (i = 0; i < rxq->bd.ring_size; i++) { + union fec_rx_buffer *buf = &rxq->rx_buf[i]; + + if (!buf->buf_p) + continue; + + if (xsk) + xsk_buff_free(buf->xdp); + else + page_pool_put_full_page(rxq->page_pool, + buf->page, false); + + rxq->rx_buf[i].buf_p = NULL; + } + + if (!xsk) { + page_pool_destroy(rxq->page_pool); + rxq->page_pool = NULL; + } +} + static void fec_enet_free_buffers(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); unsigned int i; struct fec_enet_priv_tx_q *txq; struct fec_enet_priv_rx_q *rxq; + struct page *page; unsigned int q; for (q = 0; q < fep->num_rx_queues; q++) { rxq = fep->rx_queue[q]; - for (i = 0; i < rxq->bd.ring_size; i++) - page_pool_put_full_page(rxq->page_pool, rxq->rx_buf[i], - false); + + fec_xdp_rxq_info_unreg(rxq); + fec_free_rxq_buffers(rxq); for (i = 0; i < XDP_STATS_TOTAL; i++) rxq->stats[i] = 0; - - if (xdp_rxq_info_is_reg(&rxq->xdp_rxq)) - xdp_rxq_info_unreg(&rxq->xdp_rxq); - page_pool_destroy(rxq->page_pool); - rxq->page_pool = NULL; } for (q = 0; q < fep->num_tx_queues; q++) { @@ -3332,20 +3899,23 @@ static void fec_enet_free_buffers(struct net_device *ndev) kfree(txq->tx_bounce[i]); txq->tx_bounce[i] = NULL; - if (!txq->tx_buf[i].buf_p) { - txq->tx_buf[i].type = FEC_TXBUF_T_SKB; - continue; - } - - if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { + switch (txq->tx_buf[i].type) { + case FEC_TXBUF_T_SKB: dev_kfree_skb(txq->tx_buf[i].buf_p); - } else if (txq->tx_buf[i].type == FEC_TXBUF_T_XDP_NDO) { + break; + case FEC_TXBUF_T_XDP_NDO: xdp_return_frame(txq->tx_buf[i].buf_p); - } else { - struct page *page = txq->tx_buf[i].buf_p; - + break; + case FEC_TXBUF_T_XDP_TX: + page = txq->tx_buf[i].buf_p; page_pool_put_page(pp_page_to_nmdesc(page)->pp, page, 0, false); + break; + case FEC_TXBUF_T_XSK_TX: + xsk_buff_free(txq->tx_buf[i].buf_p); + break; + default: + break; } txq->tx_buf[i].buf_p = NULL; @@ -3422,22 +3992,18 @@ alloc_failed: return ret; } -static int -fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue) +static int fec_alloc_rxq_buffers_pp(struct fec_enet_private *fep, + struct fec_enet_priv_rx_q *rxq) { - struct fec_enet_private *fep = netdev_priv(ndev); - struct fec_enet_priv_rx_q *rxq; + struct bufdesc *bdp = rxq->bd.base; dma_addr_t phys_addr; - struct bufdesc *bdp; struct page *page; int i, err; - rxq = fep->rx_queue[queue]; - bdp = rxq->bd.base; - - err = fec_enet_create_page_pool(fep, rxq, rxq->bd.ring_size); + err = fec_enet_create_page_pool(fep, rxq); if (err < 0) { - netdev_err(ndev, "%s failed queue %d (%d)\n", __func__, queue, err); + netdev_err(fep->netdev, "%s failed queue %d (%d)\n", + __func__, rxq->bd.qid, err); return err; } @@ -3456,31 +4022,81 @@ fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue) for (i = 0; i < rxq->bd.ring_size; i++) { page = page_pool_dev_alloc_pages(rxq->page_pool); - if (!page) - goto err_alloc; + if (!page) { + err = -ENOMEM; + goto free_rx_buffers; + } phys_addr = page_pool_get_dma_addr(page) + FEC_ENET_XDP_HEADROOM; bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); + rxq->rx_buf[i].page = page; + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); + } - rxq->rx_buf[i] = page; - bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY); + return 0; - if (fep->bufdesc_ex) { - struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; - ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT); - } +free_rx_buffers: + fec_free_rxq_buffers(rxq); + + return err; +} +static int fec_alloc_rxq_buffers_zc(struct fec_enet_private *fep, + struct fec_enet_priv_rx_q *rxq) +{ + union fec_rx_buffer *buf = &rxq->rx_buf[0]; + struct bufdesc *bdp = rxq->bd.base; + dma_addr_t phys_addr; + int i; + + for (i = 0; i < rxq->bd.ring_size; i++) { + buf[i].xdp = xsk_buff_alloc(rxq->xsk_pool); + if (!buf[i].xdp) + break; + + phys_addr = xsk_buff_xdp_get_dma(buf[i].xdp); + bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); } - /* Set the last buffer to wrap. */ - bdp = fec_enet_get_prevdesc(bdp, &rxq->bd); - bdp->cbd_sc |= cpu_to_fec16(BD_ENET_RX_WRAP); + for (; i < rxq->bd.ring_size; i++) { + buf[i].xdp = NULL; + bdp->cbd_bufaddr = cpu_to_fec32(0); + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); + } + + return 0; +} + +static int +fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct fec_enet_priv_rx_q *rxq; + int err; + + rxq = fep->rx_queue[queue]; + if (rxq->xsk_pool) { + /* RX XDP ZC buffer pool may not be populated, e.g. + * xdpsock TX-only. + */ + fec_alloc_rxq_buffers_zc(fep, rxq); + } else { + err = fec_alloc_rxq_buffers_pp(fep, rxq); + if (err) + goto free_buffers; + } + + err = fec_xdp_rxq_info_reg(fep, rxq); + if (err) + goto free_buffers; + return 0; - err_alloc: +free_buffers: fec_enet_free_buffers(ndev); - return -ENOMEM; + + return err; } static int @@ -3794,21 +4410,237 @@ static u16 fec_enet_select_queue(struct net_device *ndev, struct sk_buff *skb, return fec_enet_vlan_pri_to_queue[vlan_tag >> 13]; } +static void fec_free_rxq(struct fec_enet_priv_rx_q *rxq) +{ + fec_xdp_rxq_info_unreg(rxq); + fec_free_rxq_buffers(rxq); + kfree(rxq); +} + +static struct fec_enet_priv_rx_q * +fec_alloc_new_rxq_xsk(struct fec_enet_private *fep, int queue, + struct xsk_buff_pool *pool) +{ + struct fec_enet_priv_rx_q *old_rxq = fep->rx_queue[queue]; + struct fec_enet_priv_rx_q *rxq; + union fec_rx_buffer *buf; + int i; + + rxq = kzalloc(sizeof(*rxq), GFP_KERNEL); + if (!rxq) + return NULL; + + /* Copy the BD ring to the new rxq */ + rxq->bd = old_rxq->bd; + rxq->id = queue; + rxq->xsk_pool = pool; + buf = &rxq->rx_buf[0]; + + for (i = 0; i < rxq->bd.ring_size; i++) { + buf[i].xdp = xsk_buff_alloc(pool); + /* RX XDP ZC buffer pool may not be populated, e.g. + * xdpsock TX-only. + */ + if (!buf[i].xdp) + break; + } + + if (fec_xdp_rxq_info_reg(fep, rxq)) + goto free_buffers; + + return rxq; + +free_buffers: + while (--i >= 0) + xsk_buff_free(buf[i].xdp); + + kfree(rxq); + + return NULL; +} + +static struct fec_enet_priv_rx_q * +fec_alloc_new_rxq_pp(struct fec_enet_private *fep, int queue) +{ + struct fec_enet_priv_rx_q *old_rxq = fep->rx_queue[queue]; + struct fec_enet_priv_rx_q *rxq; + union fec_rx_buffer *buf; + int i = 0; + + rxq = kzalloc(sizeof(*rxq), GFP_KERNEL); + if (!rxq) + return NULL; + + rxq->bd = old_rxq->bd; + rxq->id = queue; + + if (fec_enet_create_page_pool(fep, rxq)) + goto free_rxq; + + buf = &rxq->rx_buf[0]; + for (; i < rxq->bd.ring_size; i++) { + buf[i].page = page_pool_dev_alloc_pages(rxq->page_pool); + if (!buf[i].page) + goto free_buffers; + } + + if (fec_xdp_rxq_info_reg(fep, rxq)) + goto free_buffers; + + return rxq; + +free_buffers: + while (--i >= 0) + page_pool_put_full_page(rxq->page_pool, + buf[i].page, false); + + page_pool_destroy(rxq->page_pool); +free_rxq: + kfree(rxq); + + return NULL; +} + +static void fec_init_rxq_bd_buffers(struct fec_enet_priv_rx_q *rxq, bool xsk) +{ + union fec_rx_buffer *buf = &rxq->rx_buf[0]; + struct bufdesc *bdp = rxq->bd.base; + dma_addr_t dma; + + for (int i = 0; i < rxq->bd.ring_size; i++) { + if (xsk) + dma = buf[i].xdp ? + xsk_buff_xdp_get_dma(buf[i].xdp) : 0; + else + dma = page_pool_get_dma_addr(buf[i].page) + + FEC_ENET_XDP_HEADROOM; + + bdp->cbd_bufaddr = cpu_to_fec32(dma); + bdp = fec_enet_get_nextdesc(bdp, &rxq->bd); + } +} + +static int fec_xsk_restart_napi(struct fec_enet_private *fep, + struct xsk_buff_pool *pool, + u16 queue) +{ + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; + struct net_device *ndev = fep->netdev; + struct fec_enet_priv_rx_q *rxq; + int err; + + napi_disable(&fep->napi); + netif_tx_disable(ndev); + synchronize_rcu(); + + rxq = pool ? fec_alloc_new_rxq_xsk(fep, queue, pool) : + fec_alloc_new_rxq_pp(fep, queue); + if (!rxq) { + err = -ENOMEM; + goto err_alloc_new_rxq; + } + + /* Replace the old rxq with the new rxq */ + fec_free_rxq(fep->rx_queue[queue]); + fep->rx_queue[queue] = rxq; + fec_init_rxq_bd_buffers(rxq, !!pool); + txq->xsk_pool = pool; + + fec_restart(ndev); + napi_enable(&fep->napi); + netif_tx_start_all_queues(ndev); + + return 0; + +err_alloc_new_rxq: + napi_enable(&fep->napi); + netif_tx_start_all_queues(ndev); + + return err; +} + +static int fec_enable_xsk_pool(struct fec_enet_private *fep, + struct xsk_buff_pool *pool, + u16 queue) +{ + int err; + + err = xsk_pool_dma_map(pool, &fep->pdev->dev, 0); + if (err) { + netdev_err(fep->netdev, "Failed to map xsk pool\n"); + return err; + } + + if (!netif_running(fep->netdev)) { + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue]; + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; + + rxq->xsk_pool = pool; + txq->xsk_pool = pool; + + return 0; + } + + err = fec_xsk_restart_napi(fep, pool, queue); + if (err) { + xsk_pool_dma_unmap(pool, 0); + return err; + } + + return 0; +} + +static int fec_disable_xsk_pool(struct fec_enet_private *fep, + u16 queue) +{ + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; + struct xsk_buff_pool *old_pool = txq->xsk_pool; + int err; + + if (!netif_running(fep->netdev)) { + struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue]; + + xsk_pool_dma_unmap(old_pool, 0); + rxq->xsk_pool = NULL; + txq->xsk_pool = NULL; + + return 0; + } + + err = fec_xsk_restart_napi(fep, NULL, queue); + if (err) + return err; + + xsk_pool_dma_unmap(old_pool, 0); + + return 0; +} + +static int fec_setup_xsk_pool(struct fec_enet_private *fep, + struct xsk_buff_pool *pool, + u16 queue) +{ + if (queue >= fep->num_rx_queues || queue >= fep->num_tx_queues) + return -ERANGE; + + return pool ? fec_enable_xsk_pool(fep, pool, queue) : + fec_disable_xsk_pool(fep, queue); +} + static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf) { struct fec_enet_private *fep = netdev_priv(dev); bool is_run = netif_running(dev); struct bpf_prog *old_prog; + /* No need to support the SoCs that require to do the frame swap + * because the performance wouldn't be better than the skb mode. + */ + if (fep->quirks & FEC_QUIRK_SWAP_FRAME) + return -EOPNOTSUPP; + switch (bpf->command) { case XDP_SETUP_PROG: - /* No need to support the SoCs that require to - * do the frame swap because the performance wouldn't be - * better than the skb mode. - */ - if (fep->quirks & FEC_QUIRK_SWAP_FRAME) - return -EOPNOTSUPP; - if (!bpf->prog) xdp_features_clear_redirect_target(dev); @@ -3832,24 +4664,14 @@ static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf) xdp_features_set_redirect_target(dev, false); return 0; - case XDP_SETUP_XSK_POOL: - return -EOPNOTSUPP; - + return fec_setup_xsk_pool(fep, bpf->xsk.pool, + bpf->xsk.queue_id); default: return -EOPNOTSUPP; } } -static int -fec_enet_xdp_get_tx_queue(struct fec_enet_private *fep, int index) -{ - if (unlikely(index < 0)) - return 0; - - return (index % fep->num_tx_queues); -} - static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, struct fec_enet_priv_tx_q *txq, void *frame, u32 dma_sync_len, @@ -3935,28 +4757,16 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, txq->bd.cur = bdp; - /* Trigger transmission start */ - if (!(fep->quirks & FEC_QUIRK_ERR007885) || - !readl(txq->bd.reg_desc_active) || - !readl(txq->bd.reg_desc_active) || - !readl(txq->bd.reg_desc_active) || - !readl(txq->bd.reg_desc_active)) - writel(0, txq->bd.reg_desc_active); - return 0; } static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep, int cpu, struct xdp_buff *xdp, - u32 dma_sync_len) + u32 dma_sync_len, int queue) { - struct fec_enet_priv_tx_q *txq; - struct netdev_queue *nq; - int queue, ret; - - queue = fec_enet_xdp_get_tx_queue(fep, cpu); - txq = fep->tx_queue[queue]; - nq = netdev_get_tx_queue(fep->netdev, queue); + struct netdev_queue *nq = netdev_get_tx_queue(fep->netdev, queue); + struct fec_enet_priv_tx_q *txq = fep->tx_queue[queue]; + int ret; __netif_tx_lock(nq, cpu); @@ -3996,11 +4806,37 @@ static int fec_enet_xdp_xmit(struct net_device *dev, sent_frames++; } + if (sent_frames) + fec_txq_trigger_xmit(fep, txq); + __netif_tx_unlock(nq); return sent_frames; } +static int fec_enet_xsk_wakeup(struct net_device *ndev, u32 queue, u32 flags) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct fec_enet_priv_rx_q *rxq; + + if (!netif_running(ndev) || !netif_carrier_ok(ndev)) + return -ENETDOWN; + + if (queue >= fep->num_rx_queues || queue >= fep->num_tx_queues) + return -ERANGE; + + rxq = fep->rx_queue[queue]; + if (!rxq->xsk_pool) + return -EINVAL; + + if (!napi_if_scheduled_mark_missed(&fep->napi)) { + if (likely(napi_schedule_prep(&fep->napi))) + __napi_schedule(&fep->napi); + } + + return 0; +} + static int fec_hwtstamp_get(struct net_device *ndev, struct kernel_hwtstamp_config *config) { @@ -4063,6 +4899,7 @@ static const struct net_device_ops fec_netdev_ops = { .ndo_set_features = fec_set_features, .ndo_bpf = fec_enet_bpf, .ndo_xdp_xmit = fec_enet_xdp_xmit, + .ndo_xsk_wakeup = fec_enet_xsk_wakeup, .ndo_hwtstamp_get = fec_hwtstamp_get, .ndo_hwtstamp_set = fec_hwtstamp_set, }; @@ -4190,7 +5027,8 @@ static int fec_enet_init(struct net_device *ndev) if (!(fep->quirks & FEC_QUIRK_SWAP_FRAME)) ndev->xdp_features = NETDEV_XDP_ACT_BASIC | - NETDEV_XDP_ACT_REDIRECT; + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; fec_restart(ndev); @@ -4593,6 +5431,11 @@ fec_probe(struct platform_device *pdev) ndev->max_mtu = fep->max_buf_size - VLAN_ETH_HLEN - ETH_FCS_LEN; + if (fep->quirks & FEC_QUIRK_HAS_RACC) + fep->rx_shift = 2; + else + fep->rx_shift = 0; + ret = register_netdev(ndev); if (ret) goto failed_register; |
