From 9e314a3c525c91c1e918546f472585cdbd44fbd1 Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Thu, 25 Sep 2025 11:22:51 +0200 Subject: ice: remove legacy Rx and construct SKB The commit 53844673d555 ("iavf: kill 'legacy-rx' for good") removed the legacy Rx path in the iavf driver. This change applies the same rationale to the ice driver. The legacy Rx path relied on manual skb allocation and header copying, which has become increasingly inefficient and difficult to maintain. With the stabilization of build_skb() and the growing adoption of features like XDP, page_pool, and multi-buffer support, the legacy approach is no longer viable. Key drawbacks of the legacy path included: - Higher memory pressure due to direct page allocations and splitting; - Redundant memcpy() operations for packet headers; - CPU overhead from eth_get_headlen() and Flow Dissector usage; - Compatibility issues with XDP, which imposes strict headroom and tailroom requirements. The ice driver, like iavf, does not benefit from the minimal headroom savings that legacy Rx once offered, as it already splits pages into fixed halves. Removing this path simplifies the Rx logic, eliminates unnecessary branches in the hotpath, and prepares the driver for upcoming enhancements. In addition to removing the legacy Rx path, this change also eliminates the custom construct_skb() functions from both the standard and zero-copy (ZC) Rx paths. These are replaced with the build_skb() and standardized xdp_build_skb_from_zc() helpers, aligning the driver with the modern XDP infrastructure and reducing code duplication. This cleanup also reduces code complexity and improves maintainability as we move toward a more unified and modern Rx model across drivers. Co-developed-by: Alexander Lobakin Signed-off-by: Alexander Lobakin Reviewed-by: Alexander Lobakin Reviewed-by: Jacob Keller Signed-off-by: Michal Kubiak Tested-by: Alexander Nowlin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_ethtool.c') diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index cb34d4675a78..240e3f35c10a 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -340,7 +340,6 @@ static const struct ice_priv_flag ice_gstrings_priv_flags[] = { ICE_FLAG_VF_TRUE_PROMISC_ENA), ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF), ICE_PRIV_FLAG("vf-vlan-pruning", ICE_FLAG_VF_VLAN_PRUNING), - ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX), }; #define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags) @@ -1856,10 +1855,6 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) ice_nway_reset(netdev); } } - if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) { - /* down and up VSI so that changes of Rx cfg are reflected. */ - ice_down_up(vsi); - } /* don't allow modification of this flag when a single VF is in * promiscuous mode because it's not supported */ -- cgit v1.2.3 From 93f53db9f9dc4a16b40ecd18e6d338ad57e4b670 Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Thu, 25 Sep 2025 11:22:53 +0200 Subject: ice: switch to Page Pool This patch completes the transition of the ice driver to use the Page Pool and libeth APIs, following the same direction as commit 5fa4caff59f2 ("iavf: switch to Page Pool"). With the legacy page splitting and recycling logic already removed, the driver is now in a clean state to adopt the modern memory model. The Page Pool integration simplifies buffer management by offloading DMA mapping and recycling to the core infrastructure. This eliminates the need for driver-specific handling of headroom, buffer sizing, and page order. The libeth helper is used for CPU-side processing, while DMA-for-device is handled by the Page Pool core. Additionally, this patch extends the conversion to cover XDP support. The driver now uses libeth_xdp helpers for Rx buffer processing, and optimizes XDP_TX by skipping per-frame DMA mapping. Instead, all buffers are mapped as bi-directional up front, leveraging Page Pool's lifecycle management. This significantly reduces overhead in virtualized environments. Performance observations: - In typical scenarios (netperf, XDP_PASS, XDP_DROP), performance remains on par with the previous implementation. - In XDP_TX mode: * With IOMMU enabled, performance improves dramatically - over 5x increase - due to reduced DMA mapping overhead and better memory reuse. * With IOMMU disabled, performance remains comparable to the previous implementation, with no significant changes observed. - In XDP_DROP mode: * For small MTUs, (where multiple buffers can be allocated on a single memory page), a performance drop of approximately 20% is observed. According to 'perf top' analysis, the bottleneck is caused by atomic reference counter increments in the Page Pool. * For normal MTUs, (where only one buffer can be allocated within a single memory page), performance remains comparable to baseline levels. This change is also a step toward a more modular and unified XDP implementation across Intel Ethernet drivers, aligning with ongoing efforts to consolidate and streamline feature support. Suggested-by: Maciej Fijalkowski Suggested-by: Alexander Lobakin Reviewed-by: Alexander Lobakin Reviewed-by: Jacob Keller Signed-off-by: Michal Kubiak Tested-by: Alexander Nowlin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_ethtool.c') diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 240e3f35c10a..36fdac4fddc3 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -10,6 +10,7 @@ #include "ice_lib.h" #include "ice_dcb_lib.h" #include +#include struct ice_stats { char stat_string[ETH_GSTRING_LEN]; @@ -1230,8 +1231,9 @@ static int ice_diag_send(struct ice_tx_ring *tx_ring, u8 *data, u16 size) */ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring) { - struct ice_rx_buf *rx_buf; + struct libeth_fqe *rx_buf; int valid_frames, i; + struct page *page; u8 *received_buf; valid_frames = 0; @@ -1246,8 +1248,10 @@ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring) cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S))))) continue; - rx_buf = &rx_ring->rx_buf[i]; - received_buf = page_address(rx_buf->page) + rx_buf->page_offset; + rx_buf = &rx_ring->rx_fqes[i]; + page = __netmem_to_page(rx_buf->netmem); + received_buf = page_address(page) + rx_buf->offset + + page->pp->p.offset; if (ice_lbtest_check_frame(received_buf)) valid_frames++; @@ -3303,7 +3307,8 @@ process_rx: rx_rings[i].count = new_rx_cnt; rx_rings[i].cached_phctime = pf->ptp.cached_phc_time; rx_rings[i].desc = NULL; - rx_rings[i].rx_buf = NULL; + rx_rings[i].xdp_buf = NULL; + /* this is to allow wr32 to have something to write to * during early allocation of Rx buffers */ @@ -3312,10 +3317,6 @@ process_rx: err = ice_setup_rx_ring(&rx_rings[i]); if (err) goto rx_unwind; - - /* allocate Rx buffers */ - err = ice_alloc_rx_bufs(&rx_rings[i], - ICE_RX_DESC_UNUSED(&rx_rings[i])); rx_unwind: if (err) { while (i) { -- cgit v1.2.3 From 8adfcfd6a2eedbe4007ad6732bed829f41ec720f Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 6 Oct 2025 18:20:53 +0200 Subject: ice: implement configurable header split for regular Rx Add second page_pool for header buffers to each Rx queue and ability to toggle the header split on/off using Ethtool (default to off to match the current behaviour). Unlike idpf, all HW backed up by ice doesn't require any W/As and correctly splits all types of packets as configured: after L4 headers for TCP/UDP/SCTP, after L3 headers for other IPv4/IPv6 frames, after the Ethernet header otherwise (in case of tunneling, same as above, but after innermost headers). This doesn't affect the XSk path as there are no benefits of having it there. Signed-off-by: Alexander Lobakin Tested-by: Alexander Nowlin Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/intel/ice/ice_ethtool.c') diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 36fdac4fddc3..a1d9abee97e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3151,6 +3151,10 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, ring->rx_jumbo_max_pending = 0; ring->rx_mini_pending = 0; ring->rx_jumbo_pending = 0; + + kernel_ring->tcp_data_split = vsi->hsplit ? + ETHTOOL_TCP_DATA_SPLIT_ENABLED : + ETHTOOL_TCP_DATA_SPLIT_DISABLED; } static int @@ -3167,6 +3171,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, int i, timeout = 50, err = 0; struct ice_hw *hw = &pf->hw; u16 new_rx_cnt, new_tx_cnt; + bool hsplit; if (ring->tx_pending > ICE_MAX_NUM_DESC_BY_MAC(hw) || ring->tx_pending < ICE_MIN_NUM_DESC || @@ -3192,9 +3197,12 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n", new_rx_cnt); + hsplit = kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED; + /* if nothing to do return success */ if (new_tx_cnt == vsi->tx_rings[0]->count && - new_rx_cnt == vsi->rx_rings[0]->count) { + new_rx_cnt == vsi->rx_rings[0]->count && + hsplit == vsi->hsplit) { netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n"); return 0; } @@ -3224,6 +3232,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, vsi->xdp_rings[i]->count = new_tx_cnt; vsi->num_tx_desc = (u16)new_tx_cnt; vsi->num_rx_desc = (u16)new_rx_cnt; + vsi->hsplit = hsplit; + netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n"); goto done; } @@ -3330,6 +3340,8 @@ rx_unwind: } process_link: + vsi->hsplit = hsplit; + /* Bring interface down, copy in the new ring info, then restore the * interface. if VSI is up, bring it down and then back up */ @@ -4811,6 +4823,7 @@ static const struct ethtool_ops ice_ethtool_ops = { ETHTOOL_COALESCE_USE_ADAPTIVE | ETHTOOL_COALESCE_RX_USECS_HIGH, .supported_input_xfrm = RXH_XFRM_SYM_XOR, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, .get_link_ksettings = ice_get_link_ksettings, .set_link_ksettings = ice_set_link_ksettings, .get_fec_stats = ice_get_fec_stats, -- cgit v1.2.3