summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarolina Jubran <cjubran@nvidia.com>2025-11-17 23:42:06 +0200
committerJakub Kicinski <kuba@kernel.org>2025-11-18 18:53:33 -0800
commit391dad2e686f214932f769847cc8603a7df389eb (patch)
treee4900635c299d595141d34b1462b6168852ec967
parent2e4c44b12f4da60d3e8dcbc1ccf38bb28a878050 (diff)
net/mlx5e: Recover SQ on excessive PTP TX timestamp delta
Extend the TX timestamp handler to recover the SQ when the difference between the port and CQE TX timestamps is abnormally large. The current logic aborts timestamp delivery if the delta exceeds 1/128 seconds, which matches the maximum expected packet interval in ptp4l. A larger delta makes the timestamps unreliable. This change adds recovery if the delta exceeds 0.5 seconds. Such a large gap should not occur in normal operation and indicates that firmware is stuck or metadata tracking is out of sync, leading to stale or mismatched timestamps. Recovering the SQ ensures forward progress and avoids silently dropping invalid timestamps. The timestamp handler now takes mlx5e_ptpsq directly to access both CQ stats and the recovery state. Signed-off-by: Carolina Jubran <cjubran@nvidia.com> Reviewed-by: Shahar Shitrit <shshitrit@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Link: https://patch.msgid.link/1763415729-1238421-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c2
3 files changed, 17 insertions, 8 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 12e10feb30f0..424f8a2728a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -82,7 +82,7 @@ static struct mlx5e_skb_cb_hwtstamp *mlx5e_skb_cb_get_hwts(struct sk_buff *skb)
}
static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
- struct mlx5e_ptp_cq_stats *cq_stats)
+ struct mlx5e_ptpsq *ptpsq)
{
struct skb_shared_hwtstamps hwts = {};
ktime_t diff;
@@ -92,8 +92,17 @@ static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
/* Maximal allowed diff is 1 / 128 second */
if (diff > (NSEC_PER_SEC >> 7)) {
- cq_stats->abort++;
- cq_stats->abort_abs_diff_ns += diff;
+ struct mlx5e_txqsq *sq = &ptpsq->txqsq;
+
+ ptpsq->cq_stats->abort++;
+ ptpsq->cq_stats->abort_abs_diff_ns += diff;
+ if (diff > (NSEC_PER_SEC >> 1) &&
+ !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
+ netdev_warn(sq->channel->netdev,
+ "PTP TX timestamp difference between CQE and port exceeds threshold: %lld ns, recovering SQ %u\n",
+ (s64)diff, sq->sqn);
+ queue_work(sq->priv->wq, &ptpsq->report_unhealthy_work);
+ }
return;
}
@@ -103,7 +112,7 @@ static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
ktime_t hwtstamp,
- struct mlx5e_ptp_cq_stats *cq_stats)
+ struct mlx5e_ptpsq *ptpsq)
{
switch (hwtstamp_type) {
case (MLX5E_SKB_CB_CQE_HWTSTAMP):
@@ -121,7 +130,7 @@ void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
!mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp)
return;
- mlx5e_skb_cb_hwtstamp_tx(skb, cq_stats);
+ mlx5e_skb_cb_hwtstamp_tx(skb, ptpsq);
memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
}
@@ -209,7 +218,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP,
- hwtstamp, ptpsq->cq_stats);
+ hwtstamp, ptpsq);
ptpsq->cq_stats->cqe++;
mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 1c0e0a86a9ac..2a457a2ed707 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -147,7 +147,7 @@ enum {
void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
ktime_t hwtstamp,
- struct mlx5e_ptp_cq_stats *cq_stats);
+ struct mlx5e_ptpsq *ptpsq);
void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb);
#endif /* __MLX5_EN_PTP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 2702b3885f06..14884b9ea7f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -755,7 +755,7 @@ static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts);
if (sq->ptpsq) {
mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP,
- hwts.hwtstamp, sq->ptpsq->cq_stats);
+ hwts.hwtstamp, sq->ptpsq);
} else {
skb_tstamp_tx(skb, &hwts);
sq->stats->timestamps++;