ice: Add support for XDP multi-buffer on Rx side

Ice driver needs to be a bit reworked on Rx data path in order to support multi-buffer XDP. For skb path, it currently works in a way that Rx ring carries pointer to skb so if driver didn't manage to combine fragmented frame at current NAPI instance, it can restore the state on next instance and keep looking for last fragment (so descriptor with EOP bit set). What needs to be achieved is that xdp_buff needs to be combined in such way (linear + frags part) in the first place. Then skb will be ready to go in case of XDP_PASS or BPF program being not present on interface. If BPF program is there, it would work on multi-buffer XDP. At this point xdp_buff resides directly on Rx ring, so given the fact that skb will be built straight from xdp_buff, there will be no further need to carry skb on Rx ring. Besides removing skb pointer from Rx ring, lots of members have been moved around within ice_rx_ring. First and foremost reason was to place rx_buf with xdp_buff on the same cacheline. This means that once we touch rx_buf (which is a preceding step before touching xdp_buff), xdp_buff will already be hot in cache. Second thing was that xdp_rxq is used rather rarely and it occupies a separate cacheline, so maybe it is better to have it at the end of ice_rx_ring. Other change that affects ice_rx_ring is the introduction of ice_rx_ring::first_desc. Its purpose is twofold - first is to propagate rx_buf->act to all the parts of current xdp_buff after running XDP program, so that ice_put_rx_buf() that got moved out of the main Rx processing loop will be able to tak an appriopriate action on each buffer. Second is for ice_construct_skb(). ice_construct_skb() has a copybreak mechanism which had an explicit impact on xdp_buff->skb conversion in the new approach when legacy Rx flag is toggled. It works in a way that linear part is 256 bytes long, if frame is bigger than that, remaining bytes are going as a frag to skb_shared_info. This means while memcpying frags from xdp_buff to newly allocated skb, care needs to be taken when picking the destination frag array entry. Upon the time ice_construct_skb() is called, when dealing with fragmented frame, current rx_buf points to the *last* fragment, but copybreak needs to be done against the first one. That's where ice_rx_ring::first_desc helps. When frame building spans across NAPI polls (DD bit is not set on current descriptor and xdp->data is not NULL) with current Rx buffer handling state there might be some problems. Since calls to ice_put_rx_buf() were pulled out of the main Rx processing loop and were scoped from cached_ntc to current ntc, remember that now mentioned function relies on rx_buf->act, which is set within ice_run_xdp(). ice_run_xdp() is called when EOP bit was found, so currently we could put Rx buffer with rx_buf->act being *uninitialized*. To address this, change scoping to rely on first_desc on both boundaries instead. This also implies that cleaned_count which is used as an input to ice_alloc_rx_buffers() and tells how many new buffers should be refilled has to be adjusted. If it stayed as is, what could happen is a case where ntc would go over ntu. Therefore, remove cleaned_count altogether and use against allocing routine newly introduced ICE_RX_DESC_UNUSED() macro which is an equivalent of ICE_DESC_UNUSED() dedicated for Rx side and based on struct ice_rx_ring::first_desc instead of next_to_clean. Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com> Link: https://lore.kernel.org/bpf/20230131204506.219292-11-maciej.fijalkowski@intel.com
author: Maciej Fijalkowski <maciej.fijalkowski@intel.com> 2023-01-31 21:45:03 +0100
committer: Daniel Borkmann <daniel@iogearbox.net> 2023-02-01 23:30:27 +0100
commit: 2fba7dc5157b6f85dbf1b8e26e63a724db1f3d79 (patch)
tree: 14dd6b2cc055edd2424c701900eeb71c2e3a9620 /drivers/net/ethernet/intel/ice/ice_txrx.h
parent: 8a11b334ec9b8088b54764424a49adb8ef1c002a (diff)
1 files changed, 20 insertions, 14 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 9d67d6f1b1f5..26624723352b 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -112,6 +112,10 @@ static inline int ice_skb_pad(void)
 	(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
 	      (R)->next_to_clean - (R)->next_to_use - 1)
 
+#define ICE_RX_DESC_UNUSED(R)	\
+	((((R)->first_desc > (R)->next_to_use) ? 0 : (R)->count) + \
+	      (R)->first_desc - (R)->next_to_use - 1)
+
 #define ICE_RING_QUARTER(R) ((R)->count >> 2)
 
 #define ICE_TX_FLAGS_TSO	BIT(0)
@@ -136,6 +140,7 @@ static inline int ice_skb_pad(void)
 #define ICE_XDP_TX		BIT(1)
 #define ICE_XDP_REDIR		BIT(2)
 #define ICE_XDP_EXIT		BIT(3)
+#define ICE_SKB_CONSUMED	ICE_XDP_CONSUMED
 
 #define ICE_RX_DMA_ATTR \
 	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
@@ -277,43 +282,44 @@ struct ice_rx_ring {
 	struct ice_vsi *vsi;		/* Backreference to associated VSI */
 	struct ice_q_vector *q_vector;	/* Backreference to associated vector */
 	u8 __iomem *tail;
+	u16 q_index;			/* Queue number of ring */
+
+	u16 count;			/* Number of descriptors */
+	u16 reg_idx;			/* HW register index of the ring */
+	u16 next_to_alloc;
+	/* CL2 - 2nd cacheline starts here */
 	union {
 		struct ice_rx_buf *rx_buf;
 		struct xdp_buff **xdp_buf;
 	};
-	/* CL2 - 2nd cacheline starts here */
-	struct xdp_rxq_info xdp_rxq;
+	struct xdp_buff xdp;
 	/* CL3 - 3rd cacheline starts here */
-	u16 q_index;			/* Queue number of ring */
-
-	u16 count;			/* Number of descriptors */
-	u16 reg_idx;			/* HW register index of the ring */
+	struct bpf_prog *xdp_prog;
+	u16 rx_offset;
 
 	/* used in interrupt processing */
 	u16 next_to_use;
 	u16 next_to_clean;
-	u16 next_to_alloc;
-	u16 rx_offset;
-	u16 rx_buf_len;
+	u16 first_desc;
 
 	/* stats structs */
 	struct ice_ring_stats *ring_stats;
 
 	struct rcu_head rcu;		/* to avoid race on free */
-	/* CL4 - 3rd cacheline starts here */
+	/* CL4 - 4th cacheline starts here */
 	struct ice_channel *ch;
-	struct bpf_prog *xdp_prog;
 	struct ice_tx_ring *xdp_ring;
 	struct xsk_buff_pool *xsk_pool;
-	struct xdp_buff xdp;
-	struct sk_buff *skb;
 	dma_addr_t dma;			/* physical address of ring */
 	u64 cached_phctime;
+	u16 rx_buf_len;
 	u8 dcb_tc;			/* Traffic class of ring */
 	u8 ptp_rx;
 #define ICE_RX_FLAGS_RING_BUILD_SKB	BIT(1)
 #define ICE_RX_FLAGS_CRC_STRIP_DIS	BIT(2)
 	u8 flags;
+	/* CL5 - 5th cacheline starts here */
+	struct xdp_rxq_info xdp_rxq;
 } ____cacheline_internodealigned_in_smp;
 
 struct ice_tx_ring {
@@ -436,7 +442,7 @@ static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring)
 
 union ice_32b_rx_flex_desc;
 
-bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, u16 cleaned_count);
+bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, unsigned int cleaned_count);
 netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
 u16
 ice_select_queue(struct net_device *dev, struct sk_buff *skb,
author	Maciej Fijalkowski <maciej.fijalkowski@intel.com>	2023-01-31 21:45:03 +0100
committer	Daniel Borkmann <daniel@iogearbox.net>	2023-02-01 23:30:27 +0100
commit	2fba7dc5157b6f85dbf1b8e26e63a724db1f3d79 (patch)
tree	14dd6b2cc055edd2424c701900eeb71c2e3a9620 /drivers/net/ethernet/intel/ice/ice_txrx.h
parent	8a11b334ec9b8088b54764424a49adb8ef1c002a (diff)