From 229671ac60e298b85c2644f52d7e487e9f487d06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Apr 2025 20:27:42 +0000 Subject: net: remove cpu stall in txq_trans_update() txq_trans_update() currently uses txq->xmit_lock_owner to conditionally update txq->trans_start. For regular devices, txq->xmit_lock_owner is updated from HARD_TX_LOCK() and HARD_TX_UNLOCK(), and this apparently causes cpu stalls. Using dev->lltx, which sits in a read-mostly cache-line, and already used in HARD_TX_LOCK() and HARD_TX_UNLOCK() helps cpu prediction. On an AMD EPYC 7B12 dual socket server, tcp_rr with 128 threads and 30,000 flows gets a 5 % increase in throughput. As explained in commit 95ecba62e2fd ("net: fix races in netdev_tx_sent_queue()/dev_watchdog()") I am planning to no longer update txq->trans_start in the fast path in a followup patch. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250408202742.2145516-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dece2ae396a1..a28a08046615 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4693,9 +4693,10 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) /* * txq->trans_start can be read locklessly from dev_watchdog() */ -static inline void txq_trans_update(struct netdev_queue *txq) +static inline void txq_trans_update(const struct net_device *dev, + struct netdev_queue *txq) { - if (txq->xmit_lock_owner != -1) + if (!dev->lltx) WRITE_ONCE(txq->trans_start, jiffies); } @@ -5214,7 +5215,7 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi rc = __netdev_start_xmit(ops, skb, dev, more); if (rc == NETDEV_TX_OK) - txq_trans_update(txq); + txq_trans_update(dev, txq); return rc; } -- cgit v1.2.3