From 923096b5cec3a68cc4b3816b1b9a50139df62ac7 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 10 Feb 2023 18:06:15 +0100 Subject: ice: Remove two impossible branches on XDP Tx cleaning The tagged commit started sending %XDP_TX frames from XSk Rx ring directly without converting it to an &xdp_frame. However, when XSk is enabled on a queue pair, it has its separate Tx cleaning functions, so neither ice_clean_xdp_irq() nor ice_unmap_and_free_tx_buf() ever happens there. Remove impossible branches in order to reduce the diffstat of the upcoming change. Fixes: a24b4c6e9aab ("ice: xsk: Do not convert to buff to frame for XDP_TX") Suggested-by: Maciej Fijalkowski Signed-off-by: Alexander Lobakin Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20230210170618.1973430-4-alexandr.lobakin@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_txrx.c') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 466113c86e6f..6b99adb695e7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -116,10 +116,7 @@ ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) { devm_kfree(ring->dev, tx_buf->raw_buf); } else if (ice_ring_is_xdp(ring)) { - if (ring->xsk_pool) - xsk_buff_free(tx_buf->xdp); - else - page_frag_free(tx_buf->raw_buf); + page_frag_free(tx_buf->raw_buf); } else { dev_kfree_skb_any(tx_buf->skb); } -- cgit v1.2.3 From aa1d3faf71a6a46f9b859daa8ffa5b86fa07217c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 10 Feb 2023 18:06:16 +0100 Subject: ice: Robustify cleaning/completing XDP Tx buffers When queueing frames from a Page Pool for redirecting to a device backed by the ice driver, `perf top` shows heavy load on page_alloc() and page_frag_free(), despite that on a properly working system it must be fully or at least almost zero-alloc. The problem is in fact a bit deeper and raises from how ice cleans up completed Tx buffers. The story so far: when cleaning/freeing the resources related to a particular completed Tx frame (skbs, DMA mappings etc.), ice uses some heuristics only without setting any type explicitly (except for dummy Flow Director packets, which are marked via ice_tx_buf::tx_flags). This kinda works, but only up to some point. For example, currently ice assumes that each frame coming to __ice_xmit_xdp_ring(), is backed by either plain order-0 page or plain page frag, while it may also be backed by Page Pool or any other possible memory models introduced in future. This means any &xdp_frame must be freed properly via xdp_return_frame() family with no assumptions. In order to do that, the whole heuristics must be replaced with setting the Tx buffer/frame type explicitly, just how it's always been done via an enum. Let us reuse 16 bits from ::tx_flags -- 1 bit-and instr won't hurt much -- especially given that sometimes there was a check for %ICE_TX_FLAGS_DUMMY_PKT, which is now turned from a flag to an enum member. The rest of the changes is straightforward and most of it is just a conversion to rely now on the type set in &ice_tx_buf rather than to some secondary properties. For now, no functional changes intended, the change only prepares the ground for starting freeing XDP frames properly next step. And it must be done atomically/synchronously to not break stuff. Signed-off-by: Alexander Lobakin Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20230210170618.1973430-5-alexandr.lobakin@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 38 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_txrx.c') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 6b99adb695e7..d7e8a3f81e20 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -85,7 +85,7 @@ ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, td_cmd = ICE_TXD_LAST_DESC_CMD | ICE_TX_DESC_CMD_DUMMY | ICE_TX_DESC_CMD_RE; - tx_buf->tx_flags = ICE_TX_FLAGS_DUMMY_PKT; + tx_buf->type = ICE_TX_BUF_DUMMY; tx_buf->raw_buf = raw_packet; tx_desc->cmd_type_offset_bsz = @@ -112,28 +112,26 @@ ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, static void ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) { - if (tx_buf->skb) { - if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) { - devm_kfree(ring->dev, tx_buf->raw_buf); - } else if (ice_ring_is_xdp(ring)) { - page_frag_free(tx_buf->raw_buf); - } else { - dev_kfree_skb_any(tx_buf->skb); - } - if (dma_unmap_len(tx_buf, len)) - dma_unmap_single(ring->dev, - dma_unmap_addr(tx_buf, dma), - dma_unmap_len(tx_buf, len), - DMA_TO_DEVICE); - } else if (dma_unmap_len(tx_buf, len)) { + if (dma_unmap_len(tx_buf, len)) dma_unmap_page(ring->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + + switch (tx_buf->type) { + case ICE_TX_BUF_DUMMY: + devm_kfree(ring->dev, tx_buf->raw_buf); + break; + case ICE_TX_BUF_SKB: + dev_kfree_skb_any(tx_buf->skb); + break; + case ICE_TX_BUF_XDP_TX: + page_frag_free(tx_buf->raw_buf); + break; } tx_buf->next_to_watch = NULL; - tx_buf->skb = NULL; + tx_buf->type = ICE_TX_BUF_EMPTY; dma_unmap_len_set(tx_buf, len, 0); /* tx_buf must be completely set up in the transmit path */ } @@ -266,7 +264,7 @@ static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) DMA_TO_DEVICE); /* clear tx_buf data */ - tx_buf->skb = NULL; + tx_buf->type = ICE_TX_BUF_EMPTY; dma_unmap_len_set(tx_buf, len, 0); /* unmap remaining buffers */ @@ -1709,6 +1707,7 @@ ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first, DMA_TO_DEVICE); tx_buf = &tx_ring->tx_buf[i]; + tx_buf->type = ICE_TX_BUF_FRAG; } /* record SW timestamp if HW timestamp is not available */ @@ -2352,6 +2351,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) /* record the location of the first descriptor for this packet */ first = &tx_ring->tx_buf[tx_ring->next_to_use]; first->skb = skb; + first->type = ICE_TX_BUF_SKB; first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); first->gso_segs = 1; first->tx_flags = 0; @@ -2524,11 +2524,11 @@ void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring) dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); - if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) + if (tx_buf->type == ICE_TX_BUF_DUMMY) devm_kfree(tx_ring->dev, tx_buf->raw_buf); /* clear next_to_watch to prevent false hangs */ - tx_buf->raw_buf = NULL; + tx_buf->type = ICE_TX_BUF_EMPTY; tx_buf->tx_flags = 0; tx_buf->next_to_watch = NULL; dma_unmap_len_set(tx_buf, len, 0); -- cgit v1.2.3 From 055d0920685e53ed8c2ad914888724d69dca17c6 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 10 Feb 2023 18:06:17 +0100 Subject: ice: Fix freeing XDP frames backed by Page Pool As already mentioned, freeing any &xdp_frame via page_frag_free() is wrong, as it assumes the frame is backed by either an order-0 page or a page with no "patrons" behind them, while in fact frames backed by Page Pool can be redirected to a device, which's driver doesn't use it. Keep storing a pointer to the raw buffer and then freeing it unconditionally via page_frag_free() for %XDP_TX frames, but introduce a separate type in the enum for frames coming through .ndo_xdp_xmit(), and free them via xdp_return_frame_bulk(). Note that saving xdpf as xdp_buff->data_hard_start is intentional and is always true when everything is configured properly. After this change, %XDP_REDIRECT from a Page Pool based driver to ice becomes zero-alloc as it should be and horrendous 3.3 Mpps / queue turn into 6.6, hehe. Let it go with no "Fixes:" tag as it spans across good 5+ commits and can't be trivially backported. Signed-off-by: Alexander Lobakin Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20230210170618.1973430-6-alexandr.lobakin@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/intel/ice/ice_txrx.c') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index d7e8a3f81e20..e451276a37b6 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -128,6 +128,9 @@ ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) case ICE_TX_BUF_XDP_TX: page_frag_free(tx_buf->raw_buf); break; + case ICE_TX_BUF_XDP_XMIT: + xdp_return_frame(tx_buf->xdpf); + break; } tx_buf->next_to_watch = NULL; @@ -575,7 +578,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, case XDP_TX: if (static_branch_unlikely(&ice_xdp_locking_key)) spin_lock(&xdp_ring->tx_lock); - ret = __ice_xmit_xdp_ring(xdp, xdp_ring); + ret = __ice_xmit_xdp_ring(xdp, xdp_ring, false); if (static_branch_unlikely(&ice_xdp_locking_key)) spin_unlock(&xdp_ring->tx_lock); if (ret == ICE_XDP_CONSUMED) -- cgit v1.2.3 From ad07f29b9c9a29eba04b19116c6db51387a638d7 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 10 Feb 2023 18:06:18 +0100 Subject: ice: Micro-optimize .ndo_xdp_xmit() path After the recent mbuf changes, ice_xmit_xdp_ring() became a 3-liner. It makes no sense to keep it global in a different file than its caller. Move it just next to the sole call site and mark static. Also, it doesn't need a full xdp_convert_frame_to_buff(). Save several cycles and fill only the fields used by __ice_xmit_xdp_ring() later on. Finally, since it doesn't modify @xdpf anyhow, mark the argument const to save some more (whole -11 bytes of .text! :D). Thanks to 1 jump less and less calcs as well, this yields as many as 6.7 Mpps per queue. `xdp.data_hard_start = xdpf` is fully intentional again (see xdp_convert_buff_to_frame()) and just works when there are no source device's driver issues. Signed-off-by: Alexander Lobakin Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20230210170618.1973430-7-alexandr.lobakin@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/intel/ice/ice_txrx.c') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index e451276a37b6..aaf313a95368 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -605,6 +605,25 @@ exit: ice_set_rx_bufs_act(xdp, rx_ring, ret); } +/** + * ice_xmit_xdp_ring - submit frame to XDP ring for transmission + * @xdpf: XDP frame that will be converted to XDP buff + * @xdp_ring: XDP ring for transmission + */ +static int ice_xmit_xdp_ring(const struct xdp_frame *xdpf, + struct ice_tx_ring *xdp_ring) +{ + struct xdp_buff xdp; + + xdp.data_hard_start = (void *)xdpf; + xdp.data = xdpf->data; + xdp.data_end = xdp.data + xdpf->len; + xdp.frame_sz = xdpf->frame_sz; + xdp.flags = xdpf->flags; + + return __ice_xmit_xdp_ring(&xdp, xdp_ring, true); +} + /** * ice_xdp_xmit - submit packets to XDP ring for transmission * @dev: netdev @@ -650,7 +669,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use]; for (i = 0; i < n; i++) { - struct xdp_frame *xdpf = frames[i]; + const struct xdp_frame *xdpf = frames[i]; int err; err = ice_xmit_xdp_ring(xdpf, xdp_ring); -- cgit v1.2.3