summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShay Drory <shayd@nvidia.com>2026-05-04 21:02:03 +0300
committerJakub Kicinski <kuba@kernel.org>2026-05-05 19:13:09 -0700
commit3abcedfdfd3125431ed404fa75724118beac630b (patch)
treec540b2ab57d386cf0da45428dadc5f44a5230c18
parentaf0e9b26b9667d765d71a7f53b7ed242eb1ba671 (diff)
net/mlx5: SD: Serialize init/cleanup
mlx5_sd_init() / mlx5_sd_cleanup() may run from multiple PFs in the same Socket-Direct group. This can cause the SD bring-up/tear-down sequence to be executed more than once or interleaved across PFs. Protect SD init/cleanup with mlx5_devcom_comp_lock() and track the SD group state on the primary device. Skip init if the primary is already UP, and skip cleanup unless the primary is UP. The state check on cleanup is needed because sd_register() drops the devcom comp lock between marking the comp ready and assigning primary_dev on each peer. A concurrent cleanup that acquires the lock in this window would observe devcom_is_ready==true while primary_dev is still NULL (causing mlx5_sd_get_primary() to return NULL) or while the FW alias setup performed by mlx5_sd_init()'s body has not yet run (causing sd_cmd_unset_primary() to dereference a NULL tx_ft). Gate the cleanup body on primary_sd->state == MLX5_SD_STATE_UP, which is set only at the very end of mlx5_sd_init() under the same comp lock - so observing UP guarantees primary_dev, secondaries[], tx_ft, and dfs are all populated. Also bail explicitly if mlx5_sd_get_primary() returns NULL, in case state is checked on a peer whose primary_dev hasn't been assigned yet. In addition, move mlx5_devcom_comp_set_ready(false) from sd_unregister() into the cleanup's locked section, including the !primary and state != UP early-exit paths, so the device cannot unregister and free its struct mlx5_sd while devcom is still marked ready. A concurrent init acquiring the devcom lock will now observe devcom is no longer ready and bail out immediately. Fixes: 381978d28317 ("net/mlx5e: Create single netdev per SD group") Signed-off-by: Shay Drory <shayd@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Link: https://patch.msgid.link/20260504180206.268568-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c56
1 files changed, 50 insertions, 6 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index 762c783156b4..ec42685bdece 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -18,6 +18,7 @@ struct mlx5_sd {
u8 host_buses;
struct mlx5_devcom_comp_dev *devcom;
struct dentry *dfs;
+ u8 state;
bool primary;
union {
struct { /* primary */
@@ -31,6 +32,11 @@ struct mlx5_sd {
};
};
+enum mlx5_sd_state {
+ MLX5_SD_STATE_DOWN = 0,
+ MLX5_SD_STATE_UP,
+};
+
static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
@@ -270,9 +276,6 @@ static void sd_unregister(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
- mlx5_devcom_comp_lock(sd->devcom);
- mlx5_devcom_comp_set_ready(sd->devcom, false);
- mlx5_devcom_comp_unlock(sd->devcom);
mlx5_devcom_unregister_component(sd->devcom);
}
@@ -426,6 +429,7 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
struct mlx5_core_dev *primary, *pos, *to;
struct mlx5_sd *sd = mlx5_get_sd(dev);
u8 alias_key[ACCESS_KEY_LEN];
+ struct mlx5_sd *primary_sd;
int err, i;
err = sd_init(dev);
@@ -440,10 +444,17 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
if (err)
goto err_sd_cleanup;
+ mlx5_devcom_comp_lock(sd->devcom);
if (!mlx5_devcom_comp_is_ready(sd->devcom))
- return 0;
+ goto out;
primary = mlx5_sd_get_primary(dev);
+ if (!primary)
+ goto out;
+
+ primary_sd = mlx5_get_sd(primary);
+ if (primary_sd->state != MLX5_SD_STATE_DOWN)
+ goto out;
for (i = 0; i < ACCESS_KEY_LEN; i++)
alias_key[i] = get_random_u8();
@@ -472,6 +483,9 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
sd->group_id, mlx5_devcom_comp_get_size(sd->devcom));
sd_print_group(primary);
+ primary_sd->state = MLX5_SD_STATE_UP;
+out:
+ mlx5_devcom_comp_unlock(sd->devcom);
return 0;
err_unset_secondaries:
@@ -481,6 +495,15 @@ err_unset_secondaries:
sd_cmd_unset_primary(primary);
debugfs_remove_recursive(sd->dfs);
err_sd_unregister:
+ mlx5_sd_for_each_secondary(i, primary, pos) {
+ struct mlx5_sd *peer_sd = mlx5_get_sd(pos);
+
+ primary_sd->secondaries[i - 1] = NULL;
+ peer_sd->primary_dev = NULL;
+ }
+ primary_sd->primary = false;
+ mlx5_devcom_comp_set_ready(sd->devcom, false);
+ mlx5_devcom_comp_unlock(sd->devcom);
sd_unregister(dev);
err_sd_cleanup:
sd_cleanup(dev);
@@ -491,22 +514,43 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
struct mlx5_core_dev *primary, *pos;
+ struct mlx5_sd *primary_sd;
int i;
if (!sd)
return;
+ mlx5_devcom_comp_lock(sd->devcom);
if (!mlx5_devcom_comp_is_ready(sd->devcom))
- goto out;
+ goto out_unlock;
primary = mlx5_sd_get_primary(dev);
+ if (!primary)
+ goto out_ready_false;
+
+ primary_sd = mlx5_get_sd(primary);
+ if (primary_sd->state != MLX5_SD_STATE_UP)
+ goto out_clear_peers;
+
mlx5_sd_for_each_secondary(i, primary, pos)
sd_cmd_unset_secondary(pos);
sd_cmd_unset_primary(primary);
debugfs_remove_recursive(sd->dfs);
sd_info(primary, "group id %#x, uncombined\n", sd->group_id);
-out:
+ primary_sd->state = MLX5_SD_STATE_DOWN;
+out_clear_peers:
+ mlx5_sd_for_each_secondary(i, primary, pos) {
+ struct mlx5_sd *peer_sd = mlx5_get_sd(pos);
+
+ primary_sd->secondaries[i - 1] = NULL;
+ peer_sd->primary_dev = NULL;
+ }
+ primary_sd->primary = false;
+out_ready_false:
+ mlx5_devcom_comp_set_ready(sd->devcom, false);
+out_unlock:
+ mlx5_devcom_comp_unlock(sd->devcom);
sd_unregister(dev);
sd_cleanup(dev);
}